page_api: tweaks

pageserver: rename data_api to page_api
include lots of changes that went missing by accident
2026-05-17 21:20:37 +00:00 · 2025-04-29 17:23:51 +02:00 · 2025-04-29 15:58:52 +02:00 · 2025-04-29 15:32:27 +03:00 · 2025-04-29 11:52:44 +03:00 · 2025-04-28 13:45:45 +00:00
189 changed files with 12748 additions and 2080 deletions
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -7,7 +7,7 @@ inputs:
    type: boolean
    required: false
    default: false
-  aws-oicd-role-arn:
+  aws-oidc-role-arn:
    description: 'OIDC role arn to interract with S3'
    required: true

@@ -88,7 +88,7 @@ runs:
      if: ${{ !cancelled() }}
      with:
        aws-region: eu-central-1
-        role-to-assume: ${{ inputs.aws-oicd-role-arn }}
+        role-to-assume: ${{ inputs.aws-oidc-role-arn }}
        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report

    # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
--- a/.github/actions/allure-report-store/action.yml
+++ b/.github/actions/allure-report-store/action.yml
@@ -8,7 +8,7 @@ inputs:
  unique-key:
    description: 'string to distinguish different results in the same run'
    required: true
-  aws-oicd-role-arn:
+  aws-oidc-role-arn:
    description: 'OIDC role arn to interract with S3'
    required: true

@@ -39,7 +39,7 @@ runs:
      if: ${{ !cancelled() }}
      with:
        aws-region: eu-central-1
-        role-to-assume: ${{ inputs.aws-oicd-role-arn }}
+        role-to-assume: ${{ inputs.aws-oidc-role-arn }}
        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report

    - name: Upload test results
--- a/.github/actions/download/action.yml
+++ b/.github/actions/download/action.yml
@@ -15,7 +15,7 @@ inputs:
  prefix:
    description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
    required: false
-  aws-oicd-role-arn:
+  aws-oidc-role-arn:
    description: 'OIDC role arn to interract with S3'
    required: true

@@ -25,7 +25,7 @@ runs:
    - uses: aws-actions/configure-aws-credentials@v4
      with:
        aws-region: eu-central-1
-        role-to-assume: ${{ inputs.aws-oicd-role-arn }}
+        role-to-assume: ${{ inputs.aws-oidc-role-arn }}
        role-duration-seconds: 3600

    - name: Download artifact
--- a/.github/actions/neon-project-create/action.yml
+++ b/.github/actions/neon-project-create/action.yml
@@ -49,6 +49,10 @@ inputs:
    description: 'A JSON object with project settings'
    required: false
    default: '{}'
+  default_endpoint_settings:
+    description: 'A JSON object with the default endpoint settings'
+    required: false
+    default: '{}'

 outputs:
  dsn:
@@ -66,9 +70,9 @@ runs:
      # A shell without `set -x` to not to expose password/dsn in logs
      shell: bash -euo pipefail {0}
      run: |
-        project=$(curl \
+        res=$(curl \
          "https://${API_HOST}/api/v2/projects" \
-          --fail \
+          -w "%{http_code}" \
          --header "Accept: application/json" \
          --header "Content-Type: application/json" \
          --header "Authorization: Bearer ${API_KEY}" \
@@ -83,6 +87,15 @@ runs:
              \"settings\": ${PROJECT_SETTINGS}
            }
          }")
+        
+        code=${res: -3}
+        if [[ ${code} -ge 400 ]]; then
+          echo Request failed with error code ${code}
+          echo ${res::-3}
+          exit 1
+        else
+          project=${res::-3}
+        fi

        # Mask password
        echo "::add-mask::$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .password')"
@@ -126,6 +139,22 @@ runs:
            -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
            -d "{\"scheduling\": \"Essential\"}"
        fi
+        # XXX
+        # This is a workaround for the default endpoint settings, which currently do not allow some settings in the public API.
+        # https://github.com/neondatabase/cloud/issues/27108
+        if [[ -n ${DEFAULT_ENDPOINT_SETTINGS} && ${DEFAULT_ENDPOINT_SETTINGS} != "{}" ]] ; then
+          PROJECT_DATA=$(curl -X GET \
+              "https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/projects/${project_id}" \
+              -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
+              -d "{\"scheduling\": \"Essential\"}"
+          )
+          NEW_DEFAULT_ENDPOINT_SETTINGS=$(echo ${PROJECT_DATA} | jq -rc ".project.default_endpoint_settings + ${DEFAULT_ENDPOINT_SETTINGS}")
+          curl -X POST --fail \
+                "https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/projects/${project_id}/default_endpoint_settings" \
+                -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
+                --data "${NEW_DEFAULT_ENDPOINT_SETTINGS}"
+        fi
+        

      env:
        API_HOST: ${{ inputs.api_host }}
@@ -142,3 +171,4 @@ runs:
        PSQL: ${{ inputs.psql_path }}
        LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}
        PROJECT_SETTINGS: ${{ inputs.project_settings }}
+        DEFAULT_ENDPOINT_SETTINGS: ${{ inputs.default_endpoint_settings }}
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -53,7 +53,7 @@ inputs:
    description: 'benchmark durations JSON'
    required: false
    default: '{}'
-  aws-oicd-role-arn:
+  aws-oidc-role-arn:
    description: 'OIDC role arn to interract with S3'
    required: true

@@ -66,7 +66,7 @@ runs:
      with:
        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
        path: /tmp/neon
-        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
+        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}

    - name: Download Neon binaries for the previous release
      if: inputs.build_type != 'remote'
@@ -75,7 +75,7 @@ runs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
        path: /tmp/neon-previous
        prefix: latest
-        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
+        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}

    - name: Download compatibility snapshot
      if: inputs.build_type != 'remote'
@@ -87,7 +87,7 @@ runs:
        # The lack of compatibility snapshot (for example, for the new Postgres version)
        # shouldn't fail the whole job. Only relevant test should fail.
        skip-if-does-not-exist: true
-        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
+        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}

    - name: Checkout
      if: inputs.needs_postgres_source == 'true'
@@ -228,13 +228,13 @@ runs:
        # The lack of compatibility snapshot shouldn't fail the job
        # (for example if we didn't run the test for non build-and-test workflow)
        skip-if-does-not-exist: true
-        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
+        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}

    - uses: aws-actions/configure-aws-credentials@v4
      if: ${{ !cancelled() }}
      with:
        aws-region: eu-central-1
-        role-to-assume: ${{ inputs.aws-oicd-role-arn }}
+        role-to-assume: ${{ inputs.aws-oidc-role-arn }}
        role-duration-seconds: 3600 # 1 hour should be more than enough to upload report

    - name: Upload test results
@@ -243,4 +243,4 @@ runs:
      with:
        report-dir: /tmp/test_output/allure/results
        unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}-${{ runner.arch }}
-        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
+        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
--- a/.github/actions/save-coverage-data/action.yml
+++ b/.github/actions/save-coverage-data/action.yml
@@ -14,11 +14,11 @@ runs:
        name: coverage-data-artifact
        path: /tmp/coverage
        skip-if-does-not-exist: true # skip if there's no previous coverage to download
-        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
+        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}

    - name: Upload coverage data
      uses: ./.github/actions/upload
      with:
        name: coverage-data-artifact
        path: /tmp/coverage
-        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
+        aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
--- a/.github/actions/upload/action.yml
+++ b/.github/actions/upload/action.yml
@@ -14,7 +14,7 @@ inputs:
  prefix:
    description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
    required: false
-  aws-oicd-role-arn:
+  aws-oidc-role-arn:
    description: "the OIDC role arn for aws auth"
    required: false
    default: ""
@@ -61,7 +61,7 @@ runs:
      uses: aws-actions/configure-aws-credentials@v4
      with:
        aws-region: eu-central-1
-        role-to-assume: ${{ inputs.aws-oicd-role-arn }}
+        role-to-assume: ${{ inputs.aws-oidc-role-arn }}
        role-duration-seconds: 3600

    - name: Upload artifact
--- a/.github/workflows/_benchmarking_preparation.yml
+++ b/.github/workflows/_benchmarking_preparation.yml
@@ -81,7 +81,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    # we create a table that has one row for each database that we want to restore with the status whether the restore is done
    - name: Create benchmark_restore_status table if it does not exist
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -28,6 +28,16 @@ on:
        required: false
        default: 'disabled'
        type: string
+      test-selection:
+        description: 'specification of selected test(s) to run'
+        required: false
+        default: ''
+        type: string
+      test-run-count:
+        description: 'number of runs to perform for selected tests'
+        required: false
+        default: 1
+        type: number

 defaults:
  run:
@@ -275,7 +285,7 @@ jobs:
                for io_mode in buffered direct direct-rw ; do
                  NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
                  NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
-                  NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOMODE=$io_mode \
+                  NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE=$io_mode \
                  ${cov_prefix} \
                  cargo nextest run $CARGO_FLAGS $CARGO_FEATURES  -E 'package(pageserver)'
              done
@@ -313,7 +323,7 @@ jobs:
        with:
          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
          path: /tmp/neon
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Check diesel schema
        if: inputs.build-type == 'release' && inputs.arch == 'x64'
@@ -381,21 +391,22 @@ jobs:
          run_with_real_s3: true
          real_s3_bucket: neon-github-ci-tests
          real_s3_region: eu-central-1
-          rerun_failed: true
+          rerun_failed: ${{ inputs.test-run-count == 1 }}
          pg_version: ${{ matrix.pg_version }}
          sanitizers: ${{ inputs.sanitizers }}
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
          # `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
          # Attempt to stop tests gracefully to generate test reports
          # until they are forcibly stopped by the stricter `timeout-minutes` limit.
-          extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }}
+          extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }} --count=${{ inputs.test-run-count }}
+                        ${{ inputs.test-selection != '' && format('-k "{0}"', inputs.test-selection) || '' }}
        env:
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
          BUILD_TAG: ${{ inputs.build-tag }}
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
          PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
-          PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
+          PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
          USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}

      # Temporary disable this step until we figure out why it's so flaky
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -114,7 +114,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Create Neon Project
      id: create-neon-project
@@ -132,7 +132,7 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        # Set --sparse-ordering option of pytest-order plugin
        # to ensure tests are running in order of appears in the file.
        # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
@@ -165,7 +165,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -222,8 +222,8 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-    
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+
    - name: Verify that cumulative statistics are preserved
      uses: ./.github/actions/run-python-test-set
      with:
@@ -233,7 +233,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 3600
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -282,7 +282,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Run Logical Replication benchmarks
      uses: ./.github/actions/run-python-test-set
@@ -293,7 +293,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 5400
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -310,7 +310,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 5400
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -322,7 +322,7 @@ jobs:
      uses: ./.github/actions/allure-report-generate
      with:
        store-test-results-into-db: true
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

@@ -505,7 +505,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Create Neon Project
      if: contains(fromJSON('["neonvm-captest-new", "neonvm-captest-new-many-tables", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
@@ -557,7 +557,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -573,7 +573,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -588,7 +588,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -603,7 +603,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -621,7 +621,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -694,7 +694,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Set up Connection String
      id: set-up-connstr
@@ -726,7 +726,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -741,7 +741,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -752,7 +752,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -828,7 +828,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Set up Connection String
      id: set-up-connstr
@@ -871,7 +871,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -885,7 +885,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -954,7 +954,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Get Connstring Secret Name
      run: |
@@ -1003,7 +1003,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_tpch
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -1015,7 +1015,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -1078,7 +1078,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Set up Connection String
      id: set-up-connstr
@@ -1121,7 +1121,7 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -1132,7 +1132,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
--- a/.github/workflows/build-macos.yml
+++ b/.github/workflows/build-macos.yml
@@ -34,11 +34,10 @@ permissions:
 jobs:
  build-pgxn:
    if: |
-      (inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
-        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
-        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
-        github.ref_name == 'main'
-      )
+      inputs.pg_versions != '[]' || inputs.rebuild_everything ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+      github.ref_name == 'main'
    timeout-minutes: 30
    runs-on: macos-15
    strategy:
@@ -63,13 +62,8 @@ jobs:

      - name: Cache postgres ${{ matrix.postgres-version }} build
        id: cache_pg
-        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
        with:
-          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
-          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
-          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
-          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
-          use-fallback: false
          path: pg_install/${{ matrix.postgres-version }}
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

@@ -105,13 +99,21 @@ jobs:
        run: |
          make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)

+      - name: Upload "pg_install/${{ matrix.postgres-version }}" artifact
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: pg_install--${{ matrix.postgres-version }}
+          path: pg_install/${{ matrix.postgres-version }}
+          # The artifact is supposed to be used by the next job in the same workflow,
+          # so there’s no need to store it for too long.
+          retention-days: 1
+
  build-walproposer-lib:
    if: |
-      (inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
-        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
-        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
-        github.ref_name == 'main'
-      )
+      inputs.pg_versions != '[]' || inputs.rebuild_everything ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+      github.ref_name == 'main'
    timeout-minutes: 30
    runs-on: macos-15
    needs: [build-pgxn]
@@ -132,27 +134,16 @@ jobs:
        id: pg_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"

-      - name: Cache postgres v17 build
-        id: cache_pg
-        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
+      - name: Download "pg_install/v17" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
        with:
-          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
-          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
-          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
-          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
-          use-fallback: false
+          name: pg_install--v17
          path: pg_install/v17
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

      - name: Cache walproposer-lib
        id: cache_walproposer_lib
-        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
        with:
-          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
-          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
-          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
-          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
-          use-fallback: false
          path: pg_install/build/walproposer-lib
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

@@ -178,13 +169,21 @@ jobs:
        run:
          make walproposer-lib -j$(sysctl -n hw.ncpu)

+      - name: Upload "pg_install/build/walproposer-lib" artifact
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: pg_install--build--walproposer-lib
+          path: pg_install/build/walproposer-lib
+          # The artifact is supposed to be used by the next job in the same workflow,
+          # so there’s no need to store it for too long.
+          retention-days: 1
+
  cargo-build:
    if: |
-      (inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything) && (
-        contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
-        contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
-        github.ref_name == 'main'
-      )
+      inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+      github.ref_name == 'main'
    timeout-minutes: 30
    runs-on: macos-15
    needs: [build-pgxn, build-walproposer-lib]
@@ -203,72 +202,45 @@ jobs:
        with:
          submodules: true

-      - name: Set pg v14 for caching
-        id: pg_rev_v14
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) | tee -a "${GITHUB_OUTPUT}"
-      - name: Set pg v15 for caching
-        id: pg_rev_v15
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) | tee -a "${GITHUB_OUTPUT}"
-      - name: Set pg v16 for caching
-        id: pg_rev_v16
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) | tee -a "${GITHUB_OUTPUT}"
-      - name: Set pg v17 for caching
-        id: pg_rev_v17
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
-
-      - name: Cache postgres v14 build
-        id: cache_pg
-        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
+      - name: Download "pg_install/v14" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
        with:
-          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
-          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
-          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
-          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
-          use-fallback: false
+          name: pg_install--v14
          path: pg_install/v14
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v14-${{ steps.pg_rev_v14.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-      - name: Cache postgres v15 build
-        id: cache_pg_v15
-        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
-        with:
-          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
-          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
-          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
-          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
-          use-fallback: false
-          path: pg_install/v15
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v15-${{ steps.pg_rev_v15.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-      - name: Cache postgres v16 build
-        id: cache_pg_v16
-        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
-        with:
-          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
-          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
-          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
-          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
-          use-fallback: false
-          path: pg_install/v16
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v16-${{ steps.pg_rev_v16.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-      - name: Cache postgres v17 build
-        id: cache_pg_v17
-        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
-        with:
-          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
-          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
-          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
-          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
-          use-fallback: false
-          path: pg_install/v17
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

-      - name: Cache cargo deps (only for v17)
-        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
+      - name: Download "pg_install/v15" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--v15
+          path: pg_install/v15
+
+      - name: Download "pg_install/v16" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--v16
+          path: pg_install/v16
+
+      - name: Download "pg_install/v17" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--v17
+          path: pg_install/v17
+
+      - name: Download "pg_install/build/walproposer-lib" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--build--walproposer-lib
+          path: pg_install/build/walproposer-lib
+
+      # `actions/download-artifact` doesn't preserve permissions:
+      # https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
+      - name: Make pg_install/v*/bin/* executable
+        run: |
+          chmod +x pg_install/v*/bin/*
+
+      - name: Cache cargo deps
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
        with:
-          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
-          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
-          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
-          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
-          use-fallback: false
          path: |
            ~/.cargo/registry
            !~/.cargo/registry/src
@@ -276,18 +248,6 @@ jobs:
            target
          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust

-      - name: Cache walproposer-lib
-        id: cache_walproposer_lib
-        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
-        with:
-          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
-          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
-          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
-          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
-          use-fallback: false
-          path: pg_install/build/walproposer-lib
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
      - name: Install build dependencies
        run: |
          brew install flex bison openssl protobuf icu4c
@@ -297,8 +257,8 @@ jobs:
          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV

-      - name: Run cargo build (only for v17)
+      - name: Run cargo build
        run: cargo build --all --release -j$(sysctl -n hw.ncpu)

-      - name: Check that no warnings are produced (only for v17)
+      - name: Check that no warnings are produced
        run: ./run_clippy.sh
--- a/.github/workflows/build_and_run_selected_test.yml
+++ b/.github/workflows/build_and_run_selected_test.yml
@@ -0,0 +1,120 @@
+name: Build and Run Selected Test
+
+on:
+  workflow_dispatch:
+    inputs:
+      test-selection:
+        description: 'Specification of selected test(s), as accepted by pytest -k'
+        required: true
+        type: string
+      run-count:
+        description: 'Number of test runs to perform'
+        required: true
+        type: number
+      archs:
+        description: 'Archs to run tests on, e. g.: ["x64", "arm64"]'
+        default: '["x64"]'
+        required: true
+        type: string
+      build-types:
+        description: 'Build types to run tests on, e. g.: ["debug", "release"]'
+        default: '["release"]'
+        required: true
+        type: string
+      pg-versions:
+        description: 'Postgres versions to use for testing,  e.g,: [{"pg_version":"v16"}, {"pg_version":"v17"}])'
+        default: '[{"pg_version":"v17"}]'
+        required: true
+        type: string
+
+defaults:
+  run:
+    shell: bash -euxo pipefail {0}
+
+env:
+  RUST_BACKTRACE: 1
+  COPT: '-Werror'
+
+jobs:
+  meta:
+    uses: ./.github/workflows/_meta.yml
+    with:
+      github-event-name: ${{ github.event_name }}
+      github-event-json: ${{ toJSON(github.event) }}
+
+  build-and-test-locally:
+    needs: [ meta ]
+    strategy:
+      fail-fast: false
+      matrix:
+        arch: ${{ fromJson(inputs.archs) }}
+        build-type: ${{ fromJson(inputs.build-types) }}
+    uses: ./.github/workflows/_build-and-test-locally.yml
+    with:
+      arch: ${{ matrix.arch }}
+      build-tools-image: ghcr.io/neondatabase/build-tools:pinned-bookworm
+      build-tag: ${{ needs.meta.outputs.build-tag }}
+      build-type: ${{ matrix.build-type }}
+      test-cfg: ${{ inputs.pg-versions }}
+      test-selection: ${{ inputs.test-selection }}
+      test-run-count: ${{ fromJson(inputs.run-count) }}
+    secrets: inherit
+
+  create-test-report:
+    needs: [ build-and-test-locally ]
+    if: ${{ !cancelled() }}
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: write
+      pull-requests: write
+    outputs:
+      report-url: ${{ steps.create-allure-report.outputs.report-url }}
+
+    runs-on: [ self-hosted, small ]
+    container:
+      image: ghcr.io/neondatabase/build-tools:pinned-bookworm
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+      options: --init
+
+    steps:
+      - name: Harden the runner (Audit all outbound calls)
+        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Create Allure report
+        if: ${{ !cancelled() }}
+        id: create-allure-report
+        uses: ./.github/actions/allure-report-generate
+        with:
+          store-test-results-into-db: true
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        env:
+          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_DEV }}
+
+      - uses: actions/github-script@v7
+        if: ${{ !cancelled() }}
+        with:
+          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
+          retries: 5
+          script: |
+            const report = {
+              reportUrl:     "${{ steps.create-allure-report.outputs.report-url }}",
+              reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
+            }
+
+            const coverage = {}
+
+            const script = require("./scripts/comment-test-report.js")
+            await script({
+              github,
+              context,
+              fetch,
+              report,
+              coverage,
+            })
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -317,14 +317,14 @@ jobs:
          extra_params: --splits 5 --group ${{ matrix.pytest_split_group }}
          benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }}
          pg_version: v16
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
          TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
          PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
-          PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
+          PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
          SYNC_BETWEEN_TESTS: true
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones
@@ -384,7 +384,7 @@ jobs:
        uses: ./.github/actions/allure-report-generate
        with:
          store-test-results-into-db: true
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

@@ -451,14 +451,14 @@ jobs:
        with:
          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-artifact
          path: /tmp/neon
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Get coverage artifact
        uses: ./.github/actions/download
        with:
          name: coverage-data-artifact
          path: /tmp/coverage
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Merge coverage data
        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
--- a/.github/workflows/build_and_test_with_sanitizers.yml
+++ b/.github/workflows/build_and_test_with_sanitizers.yml
@@ -117,7 +117,7 @@ jobs:
        uses: ./.github/actions/allure-report-generate
        with:
          store-test-results-into-db: true
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

--- a/.github/workflows/check-permissions.yml
+++ b/.github/workflows/check-permissions.yml
@@ -19,7 +19,7 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
    - name: Harden the runner (Audit all outbound calls)
-      uses: step-security/harden-runner@v2
+      uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
      with:
        egress-policy: audit

--- a/.github/workflows/cleanup-caches-by-a-branch.yml
+++ b/.github/workflows/cleanup-caches-by-a-branch.yml
@@ -12,7 +12,7 @@ jobs:
    runs-on: ubuntu-22.04
    steps:
      - name: Harden the runner (Audit all outbound calls)
-        uses: step-security/harden-runner@v2
+        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
        with:
          egress-policy: audit

--- a/.github/workflows/cloud-extensions.yml
+++ b/.github/workflows/cloud-extensions.yml
@@ -0,0 +1,112 @@
+name: Cloud Extensions Test
+on:
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    #          ┌───────────── minute (0 - 59)
+    #          │ ┌───────────── hour (0 - 23)
+    #          │ │ ┌───────────── day of the month (1 - 31)
+    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron:  '45 1 * * *' # run once a day, timezone is utc
+  workflow_dispatch: # adds ability to run this manually
+    inputs:
+      region_id:
+        description: 'Project region id. If not set, the default region will be used'
+        required: false
+        default: 'aws-us-east-2'
+
+defaults:
+  run:
+    shell: bash -euxo pipefail {0}
+
+permissions:
+  id-token: write # aws-actions/configure-aws-credentials
+  statuses: write
+  contents: write
+
+jobs:
+  regress:
+    env:
+      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      TEST_OUTPUT: /tmp/test_output
+      BUILD_TYPE: remote
+    strategy:
+      fail-fast: false
+      matrix:
+        pg-version: [16, 17]
+
+    runs-on: [ self-hosted, small ]
+    container:
+      # We use the neon-test-extensions image here as it contains the source code for the extensions.
+      image: ghcr.io/neondatabase/neon-test-extensions-v${{ matrix.pg-version }}:latest
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+      options: --init
+
+    steps:
+      - name: Harden the runner (Audit all outbound calls)
+        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Evaluate the settings
+        id: project-settings
+        run: |
+          if [[ $((${{ matrix.pg-version }})) -lt 17 ]]; then
+            ULID=ulid
+          else
+            ULID=pgx_ulid
+          fi
+          LIBS=timescaledb:rag_bge_small_en_v15,rag_jina_reranker_v1_tiny_en:$ULID
+          settings=$(jq -c -n --arg libs $LIBS '{preload_libraries:{use_defaults:false,enabled_libraries:($libs| split(":"))}}')
+          echo settings=$settings >> $GITHUB_OUTPUT
+          
+      - name: Create Neon Project
+        id: create-neon-project
+        uses: ./.github/actions/neon-project-create
+        with:
+          region_id: ${{ inputs.region_id }}
+          postgres_version: ${{ matrix.pg-version }}
+          project_settings: ${{ steps.project-settings.outputs.settings }}
+          # We need these settings to get the expected output results.
+          # We cannot use the environment variables e.g. PGTZ due to
+          # https://github.com/neondatabase/neon/issues/1287
+          default_endpoint_settings: >
+            {
+              "pg_settings": {
+                "DateStyle": "Postgres,MDY",
+                "TimeZone": "America/Los_Angeles",
+                "compute_query_id": "off",
+                "neon.allow_unstable_extensions": "on"
+              }
+            }
+          api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+          admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
+
+      - name: Run the regression tests
+        run: /run-tests.sh -r /ext-src
+        env:
+          BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
+          SKIP: "pg_hint_plan-src,pg_repack-src,pg_cron-src,plpgsql_check-src"
+
+      - name: Delete Neon Project
+        if: ${{ always() }}
+        uses: ./.github/actions/neon-project-delete
+        with:
+          project_id: ${{ steps.create-neon-project.outputs.project_id }}
+          api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+
+      - name: Post to a Slack channel
+        if: ${{ github.event.schedule && failure() }}
+        uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
+        with:
+          channel-id: ${{ vars.SLACK_ON_CALL_QA_STAGING_STREAM }}
+          slack-message: |
+            Periodic extensions test on staging: ${{ job.status }}
+            <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
+        env:
+          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+
--- a/.github/workflows/cloud-regress.yml
+++ b/.github/workflows/cloud-regress.yml
@@ -89,7 +89,7 @@ jobs:
          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
          path: /tmp/neon/
          prefix: latest
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Create a new branch
        id: create-branch
@@ -105,7 +105,7 @@ jobs:
          test_selection: cloud_regress
          pg_version: ${{matrix.pg-version}}
          extra_params: -m remote_cluster
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          BENCHMARK_CONNSTR: ${{steps.create-branch.outputs.dsn}}

@@ -122,7 +122,7 @@ jobs:
        if: ${{ !cancelled() }}
        uses: ./.github/actions/allure-report-generate
        with:
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Post to a Slack channel
        if: ${{ github.event.schedule && failure() }}
--- a/.github/workflows/fast-forward.yml
+++ b/.github/workflows/fast-forward.yml
@@ -14,7 +14,7 @@ jobs:

    steps:
      - name: Harden the runner (Audit all outbound calls)
-        uses: step-security/harden-runner@v2
+        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
        with:
          egress-policy: audit

--- a/.github/workflows/ingest_benchmark.yml
+++ b/.github/workflows/ingest_benchmark.yml
@@ -32,7 +32,7 @@ jobs:
      fail-fast: false # allow other variants to continue even if one fails
      matrix:
        include:
-          - target_project: new_empty_project_stripe_size_2048 
+          - target_project: new_empty_project_stripe_size_2048
            stripe_size: 2048 # 16 MiB
            postgres_version: 16
            disable_sharding: false
@@ -98,7 +98,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Create Neon Project
      if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
@@ -110,10 +110,10 @@ jobs:
        compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
        shard_split_project: ${{ matrix.stripe_size != null && 'true' || 'false' }}
-        admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }} 
+        admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
        shard_count: 8
        stripe_size: ${{ matrix.stripe_size }}
-        disable_sharding: ${{ matrix.disable_sharding }} 
+        disable_sharding: ${{ matrix.disable_sharding }}

    - name: Initialize Neon project
      if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
@@ -171,7 +171,7 @@ jobs:
        extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
        pg_version: v${{ matrix.postgres_version }}
        save_perf_report: true
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
        TARGET_PROJECT_TYPE: ${{ matrix.target_project }}
--- a/.github/workflows/label-for-external-users.yml
+++ b/.github/workflows/label-for-external-users.yml
@@ -28,7 +28,7 @@ jobs:

    steps:
    - name: Harden the runner (Audit all outbound calls)
-      uses: step-security/harden-runner@v2
+      uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
      with:
        egress-policy: audit

@@ -75,7 +75,7 @@ jobs:

    steps:
    - name: Harden the runner (Audit all outbound calls)
-      uses: step-security/harden-runner@v2
+      uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
      with:
        egress-policy: audit

--- a/.github/workflows/large_oltp_benchmark.yml
+++ b/.github/workflows/large_oltp_benchmark.yml
@@ -33,9 +33,9 @@ jobs:
      fail-fast: false # allow other variants to continue even if one fails
      matrix:
        include:
-          - target: new_branch 
+          - target: new_branch
            custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
-          - target: reuse_branch 
+          - target: reuse_branch
            custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
      max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
    permissions:
@@ -43,7 +43,7 @@ jobs:
      statuses: write
      id-token: write # aws-actions/configure-aws-credentials
    env:
-      TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h 
+      TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h
      TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ matrix.custom_scripts }}
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
      PG_VERSION: 16 # pre-determined by pre-determined project
@@ -85,7 +85,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Create Neon Branch for large tenant
      if: ${{ matrix.target == 'new_branch' }}
@@ -129,7 +129,7 @@ jobs:
        ${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';"
        echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs"

-    - name: Benchmark pgbench with custom-scripts 
+    - name: Benchmark pgbench with custom-scripts
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: ${{ env.BUILD_TYPE }}
@@ -138,7 +138,7 @@ jobs:
        save_perf_report: true
        extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -153,7 +153,7 @@ jobs:
        save_perf_report: true
        extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance
        pg_version: ${{ env.PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -179,8 +179,8 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-  
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+
    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -69,10 +69,6 @@ jobs:

  check-macos-build:
    needs: [ check-permissions, files-changed ]
-    if: |
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
-      github.ref_name == 'main'
    uses: ./.github/workflows/build-macos.yml
    with:
      pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
--- a/.github/workflows/periodic_pagebench.yml
+++ b/.github/workflows/periodic_pagebench.yml
@@ -147,7 +147,7 @@ jobs:
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
--- a/.github/workflows/pg-clients.yml
+++ b/.github/workflows/pg-clients.yml
@@ -103,7 +103,7 @@ jobs:
          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
          path: /tmp/neon/
          prefix: latest
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Create Neon Project
        id: create-neon-project
@@ -122,7 +122,7 @@ jobs:
          run_in_parallel: false
          extra_params: -m remote_cluster
          pg_version: ${{ env.DEFAULT_PG_VERSION }}
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}

@@ -139,7 +139,7 @@ jobs:
        uses: ./.github/actions/allure-report-generate
        with:
          store-test-results-into-db: true
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

@@ -178,7 +178,7 @@ jobs:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Create Neon Project
      id: create-neon-project
@@ -195,7 +195,7 @@ jobs:
        run_in_parallel: false
        extra_params: -m remote_cluster
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}

@@ -212,7 +212,7 @@ jobs:
      uses: ./.github/actions/allure-report-generate
      with:
        store-test-results-into-db: true
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
      env:
        REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

--- a/.github/workflows/pin-build-tools-image.yml
+++ b/.github/workflows/pin-build-tools-image.yml
@@ -41,7 +41,7 @@ jobs:

    steps:
      - name: Harden the runner (Audit all outbound calls)
-        uses: step-security/harden-runner@v2
+        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
        with:
          egress-policy: audit

--- a/.github/workflows/random-ops-test.yml
+++ b/.github/workflows/random-ops-test.yml
@@ -66,7 +66,7 @@ jobs:
          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
          path: /tmp/neon/
          prefix: latest
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Run tests
        uses: ./.github/actions/run-python-test-set
@@ -76,7 +76,7 @@ jobs:
          run_in_parallel: false
          extra_params: -m remote_cluster
          pg_version: ${{ matrix.pg-version }}
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
          RANDOM_SEED: ${{ inputs.random_seed }}
@@ -88,6 +88,6 @@ jobs:
        uses: ./.github/actions/allure-report-generate
        with:
          store-test-results-into-db: true
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        env:
          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -35,7 +35,7 @@ jobs:

    steps:
      - name: Harden the runner (Audit all outbound calls)
-        uses: step-security/harden-runner@v2
+        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
        with:
          egress-policy: audit

@@ -73,7 +73,7 @@ jobs:
        }}
    steps:
      - name: Harden the runner (Audit all outbound calls)
-        uses: step-security/harden-runner@v2
+        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
        with:
          egress-policy: audit

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -253,6 +253,17 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a8ab6b55fe97976e46f91ddbed8d147d966475dc29b2032757ba47e02376fbc3"

+[[package]]
+name = "atomic_enum"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "99e1aca718ea7b89985790c94aad72d77533063fe00bc497bb79a7c2dae6a661"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.100",
+]
+
 [[package]]
 name = "autocfg"
 version = "1.1.0"
@@ -687,13 +698,40 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "axum"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
+dependencies = [
+ "async-trait",
+ "axum-core 0.4.5",
+ "bytes",
+ "futures-util",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "http-body-util",
+ "itoa",
+ "matchit 0.7.3",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "sync_wrapper 1.0.1",
+ "tower 0.5.2",
+ "tower-layer",
+ "tower-service",
+]
+
 [[package]]
 name = "axum"
 version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
 dependencies = [
- "axum-core",
+ "axum-core 0.5.0",
 "base64 0.22.1",
 "bytes",
 "form_urlencoded",
@@ -704,7 +742,7 @@ dependencies = [
 "hyper 1.4.1",
 "hyper-util",
 "itoa",
- "matchit",
+ "matchit 0.8.4",
 "memchr",
 "mime",
 "percent-encoding",
@@ -724,6 +762,26 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "axum-core"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper 1.0.1",
+ "tower-layer",
+ "tower-service",
+]
+
 [[package]]
 name = "axum-core"
 version = "0.5.0"
@@ -750,8 +808,8 @@ version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "460fc6f625a1f7705c6cf62d0d070794e94668988b1c38111baeec177c715f7b"
 dependencies = [
- "axum",
- "axum-core",
+ "axum 0.8.1",
+ "axum-core 0.5.0",
 "bytes",
 "futures-util",
 "headers",
@@ -1086,6 +1144,25 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"

+[[package]]
+name = "cbindgen"
+version = "0.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eadd868a2ce9ca38de7eeafdcec9c7065ef89b42b32f0839278d55f35c54d1ff"
+dependencies = [
+ "clap",
+ "heck 0.4.1",
+ "indexmap 2.9.0",
+ "log",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "serde_json",
+ "syn 2.0.100",
+ "tempfile",
+ "toml",
+]
+
 [[package]]
 name = "cc"
 version = "1.2.16"
@@ -1206,7 +1283,7 @@ version = "4.5.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab"
 dependencies = [
- "heck",
+ "heck 0.5.0",
 "proc-macro2",
 "quote",
 "syn 2.0.100",
@@ -1264,13 +1341,40 @@ dependencies = [
 "unicode-width",
 ]

+[[package]]
+name = "communicator"
+version = "0.1.0"
+dependencies = [
+ "atomic_enum",
+ "bytes",
+ "cbindgen",
+ "http 1.1.0",
+ "libc",
+ "neonart",
+ "nix 0.27.1",
+ "pageserver_client_grpc",
+ "pageserver_page_api",
+ "prost 0.13.3",
+ "thiserror 1.0.69",
+ "tokio",
+ "tokio-epoll-uring",
+ "tokio-pipe",
+ "tonic",
+ "tracing",
+ "tracing-subscriber",
+ "uring-common",
+ "utils",
+ "zerocopy 0.8.24",
+ "zerocopy-derive 0.8.24",
+]
+
 [[package]]
 name = "compute_api"
 version = "0.1.0"
 dependencies = [
 "anyhow",
 "chrono",
- "indexmap 2.0.1",
+ "indexmap 2.9.0",
 "jsonwebtoken",
 "regex",
 "remote_storage",
@@ -1288,7 +1392,7 @@ dependencies = [
 "aws-sdk-kms",
 "aws-sdk-s3",
 "aws-smithy-types",
- "axum",
+ "axum 0.8.1",
 "axum-extra",
 "base64 0.13.1",
 "bytes",
@@ -1301,7 +1405,7 @@ dependencies = [
 "flate2",
 "futures",
 "http 1.1.0",
- "indexmap 2.0.1",
+ "indexmap 2.9.0",
 "jsonwebtoken",
 "metrics",
 "nix 0.27.1",
@@ -1323,7 +1427,6 @@ dependencies = [
 "serde_json",
 "serde_with",
 "signal-hook",
- "spki 0.7.3",
 "tar",
 "thiserror 1.0.69",
 "tokio",
@@ -1928,7 +2031,7 @@ checksum = "0892a17df262a24294c382f0d5997571006e7a4348b4327557c4ff1cd4a8bccc"
 dependencies = [
 "darling",
 "either",
- "heck",
+ "heck 0.5.0",
 "proc-macro2",
 "quote",
 "syn 2.0.100",
@@ -2042,7 +2145,7 @@ name = "endpoint_storage"
 version = "0.0.1"
 dependencies = [
 "anyhow",
- "axum",
+ "axum 0.8.1",
 "axum-extra",
 "camino",
 "camino-tempfile",
@@ -2589,7 +2692,7 @@ dependencies = [
 "futures-sink",
 "futures-util",
 "http 0.2.9",
- "indexmap 2.0.1",
+ "indexmap 2.9.0",
 "slab",
 "tokio",
 "tokio-util",
@@ -2608,7 +2711,7 @@ dependencies = [
 "futures-sink",
 "futures-util",
 "http 1.1.0",
- "indexmap 2.0.1",
+ "indexmap 2.9.0",
 "slab",
 "tokio",
 "tokio-util",
@@ -2704,6 +2807,12 @@ dependencies = [
 "http 1.1.0",
 ]

+[[package]]
+name = "heck"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -3192,12 +3301,12 @@ dependencies = [

 [[package]]
 name = "indexmap"
-version = "2.0.1"
+version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad227c3af19d4914570ad36d30409928b75967c298feb9ea1969db3a610bb14e"
+checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
 dependencies = [
 "equivalent",
- "hashbrown 0.14.5",
+ "hashbrown 0.15.2",
 "serde",
 ]

@@ -3220,7 +3329,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
 dependencies = [
 "ahash",
- "indexmap 2.0.1",
+ "indexmap 2.9.0",
 "is-terminal",
 "itoa",
 "log",
@@ -3243,7 +3352,7 @@ dependencies = [
 "crossbeam-utils",
 "dashmap 6.1.0",
 "env_logger",
- "indexmap 2.0.1",
+ "indexmap 2.9.0",
 "itoa",
 "log",
 "num-format",
@@ -3595,6 +3704,12 @@ dependencies = [
 "regex-automata 0.1.10",
 ]

+[[package]]
+name = "matchit"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
+
 [[package]]
 name = "matchit"
 version = "0.8.4"
@@ -3640,7 +3755,7 @@ version = "0.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b9e6777fc80a575f9503d908c8b498782a6c3ee88a06cb416dc3941401e43b94"
 dependencies = [
- "heck",
+ "heck 0.5.0",
 "proc-macro2",
 "quote",
 "syn 2.0.100",
@@ -3786,6 +3901,15 @@ version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"

+[[package]]
+name = "neonart"
+version = "0.1.0"
+dependencies = [
+ "rand 0.8.5",
+ "tracing",
+ "zerocopy 0.8.24",
+]
+
 [[package]]
 name = "never-say-never"
 version = "6.6.666"
@@ -4209,11 +4333,14 @@ dependencies = [
 "humantime-serde",
 "pageserver_api",
 "pageserver_client",
+ "pageserver_client_grpc",
+ "pageserver_page_api",
 "rand 0.8.5",
 "reqwest",
 "serde",
 "serde_json",
 "tokio",
+ "tokio-stream",
 "tokio-util",
 "tracing",
 "utils",
@@ -4285,6 +4412,8 @@ dependencies = [
 "pageserver_api",
 "pageserver_client",
 "pageserver_compaction",
+ "pageserver_page_api",
+ "peekable",
 "pem",
 "pin-project-lite",
 "postgres-protocol",
@@ -4296,12 +4425,14 @@ dependencies = [
 "pprof",
 "pq_proto",
 "procfs",
+ "prost 0.13.3",
 "rand 0.8.5",
 "range-set-blaze",
 "regex",
 "remote_storage",
 "reqwest",
 "rpds",
+ "rstest",
 "rustls 0.23.18",
 "scopeguard",
 "send-future",
@@ -4326,6 +4457,7 @@ dependencies = [
 "tokio-tar",
 "tokio-util",
 "toml_edit",
+ "tonic",
 "tracing",
 "tracing-utils",
 "url",
@@ -4390,6 +4522,19 @@ dependencies = [
 "workspace_hack",
 ]

+[[package]]
+name = "pageserver_client_grpc"
+version = "0.1.0"
+dependencies = [
+ "bytes",
+ "futures",
+ "http 1.1.0",
+ "pageserver_page_api",
+ "thiserror 1.0.69",
+ "tonic",
+ "tracing",
+]
+
 [[package]]
 name = "pageserver_compaction"
 version = "0.1.0"
@@ -4413,6 +4558,17 @@ dependencies = [
 "workspace_hack",
 ]

+[[package]]
+name = "pageserver_page_api"
+version = "0.1.0"
+dependencies = [
+ "prost 0.13.3",
+ "thiserror 1.0.69",
+ "tonic",
+ "tonic-build",
+ "utils",
+]
+
 [[package]]
 name = "papaya"
 version = "0.2.1"
@@ -4539,6 +4695,15 @@ dependencies = [
 "sha2",
 ]

+[[package]]
+name = "peekable"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "225f9651e475709164f871dc2f5724956be59cb9edb055372ffeeab01ec2d20b"
+dependencies = [
+ "smallvec",
+]
+
 [[package]]
 name = "pem"
 version = "3.0.3"
@@ -5010,7 +5175,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4"
 dependencies = [
 "bytes",
- "heck",
+ "heck 0.5.0",
 "itertools 0.12.1",
 "log",
 "multimap",
@@ -5031,7 +5196,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
 dependencies = [
 "bytes",
- "heck",
+ "heck 0.5.0",
 "itertools 0.12.1",
 "log",
 "multimap",
@@ -5134,7 +5299,7 @@ dependencies = [
 "hyper 0.14.30",
 "hyper 1.4.1",
 "hyper-util",
- "indexmap 2.0.1",
+ "indexmap 2.9.0",
 "ipnet",
 "itertools 0.10.5",
 "itoa",
@@ -5645,7 +5810,7 @@ dependencies = [
 "async-trait",
 "getrandom 0.2.11",
 "http 1.1.0",
- "matchit",
+ "matchit 0.8.4",
 "opentelemetry",
 "reqwest",
 "reqwest-middleware",
@@ -6616,12 +6781,14 @@ dependencies = [
 "anyhow",
 "async-stream",
 "bytes",
+ "camino",
 "clap",
 "const_format",
 "futures",
 "futures-core",
 "futures-util",
 "http-body-util",
+ "http-utils",
 "humantime",
 "hyper 1.4.1",
 "hyper-util",
@@ -6631,6 +6798,7 @@ dependencies = [
 "prost 0.13.3",
 "rustls 0.23.18",
 "tokio",
+ "tokio-rustls 0.26.0",
 "tonic",
 "tonic-build",
 "tracing",
@@ -6803,7 +6971,7 @@ version = "0.26.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
 dependencies = [
- "heck",
+ "heck 0.5.0",
 "proc-macro2",
 "quote",
 "rustversion",
@@ -7228,6 +7396,16 @@ dependencies = [
 "syn 2.0.100",
 ]

+[[package]]
+name = "tokio-pipe"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f213a84bffbd61b8fa0ba8a044b4bbe35d471d0b518867181e82bd5c15542784"
+dependencies = [
+ "libc",
+ "tokio",
+]
+
 [[package]]
 name = "tokio-postgres"
 version = "0.7.10"
@@ -7410,7 +7588,7 @@ version = "0.22.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f21c7aaf97f1bd9ca9d4f9e73b0a6c74bd5afef56f2bc931943a6e1c37e04e38"
 dependencies = [
- "indexmap 2.0.1",
+ "indexmap 2.9.0",
 "serde",
 "serde_spanned",
 "toml_datetime",
@@ -7423,9 +7601,13 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
 dependencies = [
+ "async-stream",
 "async-trait",
+ "axum 0.7.9",
 "base64 0.22.1",
 "bytes",
+ "flate2",
+ "h2 0.4.4",
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
@@ -7437,6 +7619,7 @@ dependencies = [
 "prost 0.13.3",
 "rustls-native-certs 0.8.0",
 "rustls-pemfile 2.1.1",
+ "socket2",
 "tokio",
 "tokio-rustls 0.26.0",
 "tokio-stream",
@@ -7936,7 +8119,7 @@ name = "vm_monitor"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "axum",
+ "axum 0.8.1",
 "cgroups-rs",
 "clap",
 "futures",
@@ -8446,7 +8629,7 @@ dependencies = [
 "hyper 1.4.1",
 "hyper-util",
 "indexmap 1.9.3",
- "indexmap 2.0.1",
+ "indexmap 2.9.0",
 "itertools 0.12.1",
 "lazy_static",
 "libc",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,6 +8,7 @@ members = [
    "pageserver/compaction",
    "pageserver/ctl",
    "pageserver/client",
+    "pageserver/client_grpc",
    "pageserver/pagebench",
    "proxy",
    "safekeeper",
@@ -29,6 +30,7 @@ members = [
    "libs/pq_proto",
    "libs/tenant_size_model",
    "libs/metrics",
+    "libs/neonart",
    "libs/postgres_connection",
    "libs/remote_storage",
    "libs/tracing-utils",
@@ -41,6 +43,7 @@ members = [
    "libs/proxy/postgres-types2",
    "libs/proxy/tokio-postgres2",
    "endpoint_storage",
+    "pgxn/neon/communicator",
 ]

 [workspace.package]
@@ -142,6 +145,7 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pem = "3.0.3"
+peekable = "0.3.0"
 pin-project-lite = "0.2"
 pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
 procfs = "0.16"
@@ -187,7 +191,6 @@ thiserror = "1.0"
 tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
 tokio = { version = "1.43.1", features = ["macros"] }
-tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
 tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.12.0"
 tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
@@ -196,7 +199,7 @@ tokio-tar = "0.3"
 tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.8"
 toml_edit = "0.22"
-tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
+tonic = {version = "0.12.3", default-features = false, features = ["channel", "server", "tls", "tls-roots", "gzip"]}
 tower = { version = "0.5.2", default-features = false }
 tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] }

@@ -228,6 +231,9 @@ x509-cert = { version = "0.2.5" }
 env_logger = "0.11"
 log = "0.4"

+tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
+uring-common = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
+
 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
@@ -245,9 +251,12 @@ compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
 http-utils = { version = "0.1", path = "./libs/http-utils/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
+neonart = { version = "0.1", path = "./libs/neonart/" }
 pageserver = { path = "./pageserver" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
 pageserver_client = { path = "./pageserver/client" }
+pageserver_client_grpc = { path = "./pageserver/client_grpc" }
+pageserver_page_api = { path = "./pageserver/page_api" }
 pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
 postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
@@ -271,6 +280,7 @@ wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }
 workspace_hack = { version = "0.1", path = "./workspace_hack/" }

 ## Build dependencies
+cbindgen = "0.28.0"
 criterion = "0.5.1"
 rcgen = "0.13"
 rstest = "0.18"
--- a/7
+++ b/7
@@ -18,10 +18,12 @@ ifeq ($(BUILD_TYPE),release)
 	PG_LDFLAGS = $(LDFLAGS)
 	# Unfortunately, `--profile=...` is a nightly feature
 	CARGO_BUILD_FLAGS += --release
+	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/release
 else ifeq ($(BUILD_TYPE),debug)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
 	PG_CFLAGS += -O0 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
+	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/debug
 else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif
@@ -180,11 +182,16 @@ postgres-check-%: postgres-%

 .PHONY: neon-pg-ext-%
 neon-pg-ext-%: postgres-%
+	+@echo "Compiling communicator $*"
+	$(CARGO_CMD_PREFIX) cargo build -p communicator $(CARGO_BUILD_FLAGS)
+
 	+@echo "Compiling neon $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
+		LIBCOMMUNICATOR_PATH=$(NEON_CARGO_ARTIFACT_TARGET_DIR) \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
+
 	+@echo "Compiling neon_walredo $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -173,7 +173,7 @@ RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v$
    && rm -rf protoc.zip protoc

 # s5cmd
-ENV S5CMD_VERSION=2.2.2
+ENV S5CMD_VERSION=2.3.0
 RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/s5cmd_${S5CMD_VERSION}_Linux-$(uname -m | sed 's/x86_64/64bit/g' | sed 's/aarch64/arm64/g').tar.gz" | tar zxvf - s5cmd \
    && chmod +x s5cmd \
    && mv s5cmd /usr/local/bin/s5cmd
@@ -206,7 +206,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
    && rm awscliv2.zip

 # Mold: A Modern Linker
-ENV MOLD_VERSION=v2.34.1
+ENV MOLD_VERSION=v2.37.1
 RUN set -e \
    && git clone https://github.com/rui314/mold.git \
    && mkdir mold/build \
@@ -268,7 +268,7 @@ WORKDIR /home/nonroot
 RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /home/nonroot/.curlrc

 # Python
-ENV PYTHON_VERSION=3.11.10 \
+ENV PYTHON_VERSION=3.11.12 \
    PYENV_ROOT=/home/nonroot/.pyenv \
    PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
 RUN set -e \
@@ -296,12 +296,12 @@ ENV RUSTC_VERSION=1.86.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
-ARG CARGO_HAKARI_VERSION=0.9.33
-ARG CARGO_DENY_VERSION=0.16.2
-ARG CARGO_HACK_VERSION=0.6.33
-ARG CARGO_NEXTEST_VERSION=0.9.85
+ARG CARGO_HAKARI_VERSION=0.9.36
+ARG CARGO_DENY_VERSION=0.18.2
+ARG CARGO_HACK_VERSION=0.6.36
+ARG CARGO_NEXTEST_VERSION=0.9.94
 ARG CARGO_CHEF_VERSION=0.1.71
-ARG CARGO_DIESEL_CLI_VERSION=2.2.6
+ARG CARGO_DIESEL_CLI_VERSION=2.2.9
 RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
 	chmod +x rustup-init && \
 	./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1800,8 +1800,8 @@ COPY compute/patches/pg_repack.patch /ext-src
 RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /ext-src/pg_repack.patch

 COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
-RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl\
-   && apt clean && rm -rf /ext-src/*.tar.gz /var/lib/apt/lists/*
+RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq \
+   && apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*
 ENV PATH=/usr/local/pgsql/bin:$PATH
 ENV PGHOST=compute
 ENV PGPORT=55433
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -44,7 +44,6 @@ serde.workspace = true
 serde_with.workspace = true
 serde_json.workspace = true
 signal-hook.workspace = true
-spki = { version = "0.7.3", features = ["std"] }
 tar.workspace = true
 tower.workspace = true
 tower-http.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -57,13 +57,24 @@ use tracing::{error, info};
 use url::Url;
 use utils::failpoint_support;

+// Compatibility hack: if the control plane specified any remote-ext-config
+// use the default value for extension storage proxy gateway.
+// Remove this once the control plane is updated to pass the gateway URL
+fn parse_remote_ext_config(arg: &str) -> Result<String> {
+    if arg.starts_with("http") {
+        Ok(arg.trim_end_matches('/').to_string())
+    } else {
+        Ok("http://pg-ext-s3-gateway".to_string())
+    }
+}
+
 #[derive(Parser)]
 #[command(rename_all = "kebab-case")]
 struct Cli {
    #[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
    pub pgbin: String,

-    #[arg(short = 'r', long)]
+    #[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
    pub remote_ext_config: Option<String>,

    /// The port to bind the external listening HTTP server to. Clients running
--- a/compute_tools/src/metrics.rs
+++ b/compute_tools/src/metrics.rs
@@ -1,8 +1,8 @@
-use metrics::core::{AtomicF64, Collector, GenericGauge};
+use metrics::core::{AtomicF64, AtomicU64, Collector, GenericCounter, GenericGauge};
 use metrics::proto::MetricFamily;
 use metrics::{
-    IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter_vec,
-    register_int_gauge_vec, register_uint_gauge_vec,
+    IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter,
+    register_int_counter_vec, register_int_gauge_vec, register_uint_gauge_vec,
 };
 use once_cell::sync::Lazy;

@@ -81,6 +81,22 @@ pub(crate) static COMPUTE_CTL_UP: Lazy<IntGaugeVec> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

+pub(crate) static PG_CURR_DOWNTIME_MS: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {
+    register_gauge!(
+        "compute_pg_current_downtime_ms",
+        "Non-cumulative duration of Postgres downtime in ms; resets after successful check",
+    )
+    .expect("failed to define a metric")
+});
+
+pub(crate) static PG_TOTAL_DOWNTIME_MS: Lazy<GenericCounter<AtomicU64>> = Lazy::new(|| {
+    register_int_counter!(
+        "compute_pg_downtime_ms_total",
+        "Cumulative duration of Postgres downtime in ms",
+    )
+    .expect("failed to define a metric")
+});
+
 pub fn collect() -> Vec<MetricFamily> {
    let mut metrics = COMPUTE_CTL_UP.collect();
    metrics.extend(INSTALLED_EXTENSIONS.collect());
@@ -88,5 +104,7 @@ pub fn collect() -> Vec<MetricFamily> {
    metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
    metrics.extend(DB_MIGRATION_FAILED.collect());
    metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
+    metrics.extend(PG_CURR_DOWNTIME_MS.collect());
+    metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
    metrics
 }
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -6,197 +6,294 @@ use chrono::{DateTime, Utc};
 use compute_api::responses::ComputeStatus;
 use compute_api::spec::ComputeFeature;
 use postgres::{Client, NoTls};
-use tracing::{debug, error, info, warn};
+use tracing::{Level, error, info, instrument, span};

 use crate::compute::ComputeNode;
+use crate::metrics::{PG_CURR_DOWNTIME_MS, PG_TOTAL_DOWNTIME_MS};

 const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);

-// Spin in a loop and figure out the last activity time in the Postgres.
-// Then update it in the shared state. This function never errors out.
-// NB: the only expected panic is at `Mutex` unwrap(), all other errors
-// should be handled gracefully.
-fn watch_compute_activity(compute: &ComputeNode) {
-    // Suppose that `connstr` doesn't change
-    let connstr = compute.params.connstr.clone();
-    let conf = compute.get_conn_conf(Some("compute_ctl:activity_monitor"));
+struct ComputeMonitor {
+    compute: Arc<ComputeNode>,

-    // During startup and configuration we connect to every Postgres database,
-    // but we don't want to count this as some user activity. So wait until
-    // the compute fully started before monitoring activity.
-    wait_for_postgres_start(compute);
+    /// The moment when Postgres had some activity,
+    /// that should prevent compute from being suspended.
+    last_active: Option<DateTime<Utc>>,

-    // Define `client` outside of the loop to reuse existing connection if it's active.
-    let mut client = conf.connect(NoTls);
+    /// The moment when we last tried to check Postgres.
+    last_checked: DateTime<Utc>,
+    /// The last moment we did a successful Postgres check.
+    last_up: DateTime<Utc>,

-    let mut sleep = false;
-    let mut prev_active_time: Option<f64> = None;
-    let mut prev_sessions: Option<i64> = None;
+    /// Only used for internal statistics change tracking
+    /// between monitor runs and can be outdated.
+    active_time: Option<f64>,
+    /// Only used for internal statistics change tracking
+    /// between monitor runs and can be outdated.
+    sessions: Option<i64>,

-    if compute.has_feature(ComputeFeature::ActivityMonitorExperimental) {
-        info!("starting experimental activity monitor for {}", connstr);
-    } else {
-        info!("starting activity monitor for {}", connstr);
+    /// Use experimental statistics-based activity monitor. It's no longer
+    /// 'experimental' per se, as it's enabled for everyone, but we still
+    /// keep the flag as an option to turn it off in some cases if it will
+    /// misbehave.
+    experimental: bool,
+}
+
+impl ComputeMonitor {
+    fn report_down(&self) {
+        let now = Utc::now();
+
+        // Calculate and report current downtime
+        // (since the last time Postgres was up)
+        let downtime = now.signed_duration_since(self.last_up);
+        PG_CURR_DOWNTIME_MS.set(downtime.num_milliseconds() as f64);
+
+        // Calculate and update total downtime
+        // (cumulative duration of Postgres downtime in ms)
+        let inc = now
+            .signed_duration_since(self.last_checked)
+            .num_milliseconds();
+        PG_TOTAL_DOWNTIME_MS.inc_by(inc as u64);
    }

-    loop {
-        // We use `continue` a lot, so it's more convenient to sleep at the top of the loop.
-        // But skip the first sleep, so we can connect to Postgres immediately.
-        if sleep {
-            // Should be outside of the mutex lock to allow others to read while we sleep.
-            thread::sleep(MONITOR_CHECK_INTERVAL);
-        } else {
-            sleep = true;
-        }
+    fn report_up(&mut self) {
+        self.last_up = Utc::now();
+        PG_CURR_DOWNTIME_MS.set(0.0);
+    }

-        match &mut client {
-            Ok(cli) => {
-                if cli.is_closed() {
-                    info!("connection to Postgres is closed, trying to reconnect");
+    fn downtime_info(&self) -> String {
+        format!(
+            "total_ms: {}, current_ms: {}, last_up: {}",
+            PG_TOTAL_DOWNTIME_MS.get(),
+            PG_CURR_DOWNTIME_MS.get(),
+            self.last_up
+        )
+    }

-                    // Connection is closed, reconnect and try again.
-                    client = conf.connect(NoTls);
-                    continue;
-                }
+    /// Spin in a loop and figure out the last activity time in the Postgres.
+    /// Then update it in the shared state. This function never errors out.
+    /// NB: the only expected panic is at `Mutex` unwrap(), all other errors
+    /// should be handled gracefully.
+    #[instrument(skip_all)]
+    pub fn run(&mut self) {
+        // Suppose that `connstr` doesn't change
+        let connstr = self.compute.params.connstr.clone();
+        let conf = self
+            .compute
+            .get_conn_conf(Some("compute_ctl:compute_monitor"));

-                // This is a new logic, only enable if the feature flag is set.
-                // TODO: remove this once we are sure that it works OR drop it altogether.
-                if compute.has_feature(ComputeFeature::ActivityMonitorExperimental) {
-                    // First, check if the total active time or sessions across all databases has changed.
-                    // If it did, it means that user executed some queries. In theory, it can even go down if
-                    // some databases were dropped, but it's still a user activity.
-                    match get_database_stats(cli) {
-                        Ok((active_time, sessions)) => {
-                            let mut detected_activity = false;
+        // During startup and configuration we connect to every Postgres database,
+        // but we don't want to count this as some user activity. So wait until
+        // the compute fully started before monitoring activity.
+        wait_for_postgres_start(&self.compute);

-                            prev_active_time = match prev_active_time {
-                                Some(prev_active_time) => {
-                                    if active_time != prev_active_time {
-                                        detected_activity = true;
-                                    }
-                                    Some(active_time)
-                                }
-                                None => Some(active_time),
-                            };
-                            prev_sessions = match prev_sessions {
-                                Some(prev_sessions) => {
-                                    if sessions != prev_sessions {
-                                        detected_activity = true;
-                                    }
-                                    Some(sessions)
-                                }
-                                None => Some(sessions),
-                            };
+        // Define `client` outside of the loop to reuse existing connection if it's active.
+        let mut client = conf.connect(NoTls);

-                            if detected_activity {
-                                // Update the last active time and continue, we don't need to
-                                // check backends state change.
-                                compute.update_last_active(Some(Utc::now()));
-                                continue;
-                            }
-                        }
-                        Err(e) => {
-                            error!("could not get database statistics: {}", e);
-                            continue;
-                        }
-                    }
-                }
+        info!("starting compute monitor for {}", connstr);

-                // Second, if database statistics is the same, check all backends state change,
-                // maybe there is some with more recent activity. `get_backends_state_change()`
-                // can return None or stale timestamp, so it's `compute.update_last_active()`
-                // responsibility to check if the new timestamp is more recent than the current one.
-                // This helps us to discover new sessions, that did nothing yet.
-                match get_backends_state_change(cli) {
-                    Ok(last_active) => {
-                        compute.update_last_active(last_active);
-                    }
-                    Err(e) => {
-                        error!("could not get backends state change: {}", e);
-                    }
-                }
-
-                // Finally, if there are existing (logical) walsenders, do not suspend.
-                //
-                // walproposer doesn't currently show up in pg_stat_replication,
-                // but protect if it will be
-                let ws_count_query = "select count(*) from pg_stat_replication where application_name != 'walproposer';";
-                match cli.query_one(ws_count_query, &[]) {
-                    Ok(r) => match r.try_get::<&str, i64>("count") {
-                        Ok(num_ws) => {
-                            if num_ws > 0 {
-                                compute.update_last_active(Some(Utc::now()));
-                                continue;
-                            }
-                        }
-                        Err(e) => {
-                            warn!("failed to parse walsenders count: {:?}", e);
-                            continue;
-                        }
-                    },
-                    Err(e) => {
-                        warn!("failed to get list of walsenders: {:?}", e);
-                        continue;
-                    }
-                }
-                //
-                // Don't suspend compute if there is an active logical replication subscription
-                //
-                // `where pid is not null` – to filter out read only computes and subscription on branches
-                //
-                let logical_subscriptions_query =
-                    "select count(*) from pg_stat_subscription where pid is not null;";
-                match cli.query_one(logical_subscriptions_query, &[]) {
-                    Ok(row) => match row.try_get::<&str, i64>("count") {
-                        Ok(num_subscribers) => {
-                            if num_subscribers > 0 {
-                                compute.update_last_active(Some(Utc::now()));
-                                continue;
-                            }
-                        }
-                        Err(e) => {
-                            warn!("failed to parse `pg_stat_subscription` count: {:?}", e);
-                            continue;
-                        }
-                    },
-                    Err(e) => {
-                        warn!(
-                            "failed to get list of active logical replication subscriptions: {:?}",
-                            e
+        loop {
+            match &mut client {
+                Ok(cli) => {
+                    if cli.is_closed() {
+                        info!(
+                            downtime_info = self.downtime_info(),
+                            "connection to Postgres is closed, trying to reconnect"
                        );
-                        continue;
-                    }
-                }
-                //
-                // Do not suspend compute if autovacuum is running
-                //
-                let autovacuum_count_query = "select count(*) from pg_stat_activity where backend_type = 'autovacuum worker'";
-                match cli.query_one(autovacuum_count_query, &[]) {
-                    Ok(r) => match r.try_get::<&str, i64>("count") {
-                        Ok(num_workers) => {
-                            if num_workers > 0 {
-                                compute.update_last_active(Some(Utc::now()));
-                                continue;
+                        self.report_down();
+
+                        // Connection is closed, reconnect and try again.
+                        client = conf.connect(NoTls);
+                    } else {
+                        match self.check(cli) {
+                            Ok(_) => {
+                                self.report_up();
+                                self.compute.update_last_active(self.last_active);
+                            }
+                            Err(e) => {
+                                // Although we have many places where we can return errors in `check()`,
+                                // normally it shouldn't happen. I.e., we will likely return error if
+                                // connection got broken, query timed out, Postgres returned invalid data, etc.
+                                // In all such cases it's suspicious, so let's report this as downtime.
+                                self.report_down();
+                                error!(
+                                    downtime_info = self.downtime_info(),
+                                    "could not check Postgres: {}", e
+                                );
+
+                                // Reconnect to Postgres just in case. During tests, I noticed
+                                // that queries in `check()` can fail with `connection closed`,
+                                // but `cli.is_closed()` above doesn't detect it. Even if old
+                                // connection is still alive, it will be dropped when we reassign
+                                // `client` to a new connection.
+                                client = conf.connect(NoTls);
                            }
                        }
-                        Err(e) => {
-                            warn!("failed to parse autovacuum workers count: {:?}", e);
-                            continue;
-                        }
-                    },
-                    Err(e) => {
-                        warn!("failed to get list of autovacuum workers: {:?}", e);
-                        continue;
                    }
                }
-            }
-            Err(e) => {
-                debug!("could not connect to Postgres: {}, retrying", e);
+                Err(e) => {
+                    info!(
+                        downtime_info = self.downtime_info(),
+                        "could not connect to Postgres: {}, retrying", e
+                    );
+                    self.report_down();

-                // Establish a new connection and try again.
-                client = conf.connect(NoTls);
+                    // Establish a new connection and try again.
+                    client = conf.connect(NoTls);
+                }
+            }
+
+            // Reset the `last_checked` timestamp and sleep before the next iteration.
+            self.last_checked = Utc::now();
+            thread::sleep(MONITOR_CHECK_INTERVAL);
+        }
+    }
+
+    #[instrument(skip_all)]
+    fn check(&mut self, cli: &mut Client) -> anyhow::Result<()> {
+        // This is new logic, only enable if the feature flag is set.
+        // TODO: remove this once we are sure that it works OR drop it altogether.
+        if self.experimental {
+            // Check if the total active time or sessions across all databases has changed.
+            // If it did, it means that user executed some queries. In theory, it can even go down if
+            // some databases were dropped, but it's still user activity.
+            match get_database_stats(cli) {
+                Ok((active_time, sessions)) => {
+                    let mut detected_activity = false;
+
+                    if let Some(prev_active_time) = self.active_time {
+                        if active_time != prev_active_time {
+                            detected_activity = true;
+                        }
+                    }
+                    self.active_time = Some(active_time);
+
+                    if let Some(prev_sessions) = self.sessions {
+                        if sessions != prev_sessions {
+                            detected_activity = true;
+                        }
+                    }
+                    self.sessions = Some(sessions);
+
+                    if detected_activity {
+                        // Update the last active time and continue, we don't need to
+                        // check backends state change.
+                        self.last_active = Some(Utc::now());
+                        return Ok(());
+                    }
+                }
+                Err(e) => {
+                    return Err(anyhow::anyhow!("could not get database statistics: {}", e));
+                }
            }
        }
+
+        // If database statistics are the same, check all backends for state changes.
+        // Maybe there are some with more recent activity. `get_backends_state_change()`
+        // can return None or stale timestamp, so it's `compute.update_last_active()`
+        // responsibility to check if the new timestamp is more recent than the current one.
+        // This helps us to discover new sessions that have not done anything yet.
+        match get_backends_state_change(cli) {
+            Ok(last_active) => match (last_active, self.last_active) {
+                (Some(last_active), Some(prev_last_active)) => {
+                    if last_active > prev_last_active {
+                        self.last_active = Some(last_active);
+                        return Ok(());
+                    }
+                }
+                (Some(last_active), None) => {
+                    self.last_active = Some(last_active);
+                    return Ok(());
+                }
+                _ => {}
+            },
+            Err(e) => {
+                return Err(anyhow::anyhow!(
+                    "could not get backends state change: {}",
+                    e
+                ));
+            }
+        }
+
+        // If there are existing (logical) walsenders, do not suspend.
+        //
+        // N.B. walproposer doesn't currently show up in pg_stat_replication,
+        // but protect if it will.
+        const WS_COUNT_QUERY: &str =
+            "select count(*) from pg_stat_replication where application_name != 'walproposer';";
+        match cli.query_one(WS_COUNT_QUERY, &[]) {
+            Ok(r) => match r.try_get::<&str, i64>("count") {
+                Ok(num_ws) => {
+                    if num_ws > 0 {
+                        self.last_active = Some(Utc::now());
+                        return Ok(());
+                    }
+                }
+                Err(e) => {
+                    let err: anyhow::Error = e.into();
+                    return Err(err.context("failed to parse walsenders count"));
+                }
+            },
+            Err(e) => {
+                return Err(anyhow::anyhow!("failed to get list of walsenders: {}", e));
+            }
+        }
+
+        // Don't suspend compute if there is an active logical replication subscription
+        //
+        // `where pid is not null` – to filter out read only computes and subscription on branches
+        const LOGICAL_SUBSCRIPTIONS_QUERY: &str =
+            "select count(*) from pg_stat_subscription where pid is not null;";
+        match cli.query_one(LOGICAL_SUBSCRIPTIONS_QUERY, &[]) {
+            Ok(row) => match row.try_get::<&str, i64>("count") {
+                Ok(num_subscribers) => {
+                    if num_subscribers > 0 {
+                        self.last_active = Some(Utc::now());
+                        return Ok(());
+                    }
+                }
+                Err(e) => {
+                    return Err(anyhow::anyhow!(
+                        "failed to parse 'pg_stat_subscription' count: {}",
+                        e
+                    ));
+                }
+            },
+            Err(e) => {
+                return Err(anyhow::anyhow!(
+                    "failed to get list of active logical replication subscriptions: {}",
+                    e
+                ));
+            }
+        }
+
+        // Do not suspend compute if autovacuum is running
+        const AUTOVACUUM_COUNT_QUERY: &str =
+            "select count(*) from pg_stat_activity where backend_type = 'autovacuum worker'";
+        match cli.query_one(AUTOVACUUM_COUNT_QUERY, &[]) {
+            Ok(r) => match r.try_get::<&str, i64>("count") {
+                Ok(num_workers) => {
+                    if num_workers > 0 {
+                        self.last_active = Some(Utc::now());
+                        return Ok(());
+                    };
+                }
+                Err(e) => {
+                    return Err(anyhow::anyhow!(
+                        "failed to parse autovacuum workers count: {}",
+                        e
+                    ));
+                }
+            },
+            Err(e) => {
+                return Err(anyhow::anyhow!(
+                    "failed to get list of autovacuum workers: {}",
+                    e
+                ));
+            }
+        }
+
+        Ok(())
    }
 }

@@ -315,9 +412,24 @@ fn get_backends_state_change(cli: &mut Client) -> anyhow::Result<Option<DateTime
 /// Launch a separate compute monitor thread and return its `JoinHandle`.
 pub fn launch_monitor(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
    let compute = Arc::clone(compute);
+    let experimental = compute.has_feature(ComputeFeature::ActivityMonitorExperimental);
+    let now = Utc::now();
+    let mut monitor = ComputeMonitor {
+        compute,
+        last_active: None,
+        last_checked: now,
+        last_up: now,
+        active_time: None,
+        sessions: None,
+        experimental,
+    };

+    let span = span!(Level::INFO, "compute_monitor");
    thread::Builder::new()
        .name("compute-monitor".into())
-        .spawn(move || watch_compute_activity(&compute))
+        .spawn(move || {
+            let _enter = span.enter();
+            monitor.run();
+        })
        .expect("cannot launch compute monitor thread")
 }
--- a/compute_tools/src/tls.rs
+++ b/compute_tools/src/tls.rs
@@ -3,7 +3,6 @@ use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration};
 use anyhow::{Context, Result, bail};
 use compute_api::responses::TlsConfig;
 use ring::digest;
-use spki::der::{Decode, PemReader};
 use x509_cert::Certificate;

 #[derive(Clone, Copy)]
@@ -52,7 +51,7 @@ pub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) {
        match try_update_key_path_blocking(pg_data, tls_config) {
            Ok(()) => break,
            Err(e) => {
-                tracing::error!("could not create key file {e:?}");
+                tracing::error!(error = ?e, "could not create key file");
                std::thread::sleep(Duration::from_secs(1))
            }
        }
@@ -92,8 +91,14 @@ fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Resul
 fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
    use x509_cert::der::oid::db::rfc5912::ECDSA_WITH_SHA_256;

-    let cert = Certificate::decode(&mut PemReader::new(cert.as_bytes()).context("pem reader")?)
-        .context("decode cert")?;
+    let certs = Certificate::load_pem_chain(cert.as_bytes())
+        .context("decoding PEM encoded certificates")?;
+
+    // First certificate is our server-cert,
+    // all the rest of the certs are the CA cert chain.
+    let Some(cert) = certs.first() else {
+        bail!("no certificates found");
+    };

    match cert.signature_algorithm.oid {
        ECDSA_WITH_SHA_256 => {
@@ -115,3 +120,82 @@ fn verify_key_cert(key: &str, cert: &str) -> Result<()> {

    Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::verify_key_cert;
+
+    /// Real certificate chain file, generated by cert-manager in dev.
+    /// The server auth certificate has expired since 2025-04-24T15:41:35Z.
+    const CERT: &str = "
+-----BEGIN CERTIFICATE-----
+MIICCDCCAa+gAwIBAgIQKhLomFcNULbZA/bPdGzaSzAKBggqhkjOPQQDAjBEMQsw
+CQYDVQQGEwJVUzESMBAGA1UEChMJTmVvbiBJbmMuMSEwHwYDVQQDExhOZW9uIEs4
+cyBJbnRlcm1lZGlhdGUgQ0EwHhcNMjUwNDIzMTU0MTM1WhcNMjUwNDI0MTU0MTM1
+WjBBMT8wPQYDVQQDEzZjb21wdXRlLXdpc3B5LWdyYXNzLXcwY21laWp3LmRlZmF1
+bHQuc3ZjLmNsdXN0ZXIubG9jYWwwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATF
+QCcG2m/EVHAiZtSsYgVnHgoTjUL/Jtwfdrpvz2t0bVRZmBmSKhlo53uPV9Y5eKFG
+AmR54p9/gT2eO3xU7vAgo4GFMIGCMA4GA1UdDwEB/wQEAwIFoDAMBgNVHRMBAf8E
+AjAAMB8GA1UdIwQYMBaAFFR2JAhXkeiNQNEixTvAYIwxUu3QMEEGA1UdEQQ6MDiC
+NmNvbXB1dGUtd2lzcHktZ3Jhc3MtdzBjbWVpancuZGVmYXVsdC5zdmMuY2x1c3Rl
+ci5sb2NhbDAKBggqhkjOPQQDAgNHADBEAiBLG22wKG8XS9e9RxBT+kmUx/kIThcP
+DIpp7jx0PrFcdQIgEMTdnXpx5Cv/Z0NIEDxtMHUD7G0vuRPfztki36JuakM=
+-----END CERTIFICATE-----
+-----BEGIN CERTIFICATE-----
+MIICFzCCAb6gAwIBAgIUbbX98N2Ip6lWAONRk8dU9hSz+YIwCgYIKoZIzj0EAwIw
+RDELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UEAxMYTmVv
+biBBV1MgSW50ZXJtZWRpYXRlIENBMB4XDTI1MDQyMjE1MTAxMFoXDTI1MDcyMTE1
+MTAxMFowRDELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UE
+AxMYTmVvbiBLOHMgSW50ZXJtZWRpYXRlIENBMFkwEwYHKoZIzj0CAQYIKoZIzj0D
+AQcDQgAE5++m5owqNI4BPMTVNIUQH0qvU7pYhdpHGVGhdj/Lgars6ROvE6uSNQV4
+SAmJN5HBzj5/6kLQaTPWpXW7EHXjK6OBjTCBijAOBgNVHQ8BAf8EBAMCAQYwEgYD
+VR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUVHYkCFeR6I1A0SLFO8BgjDFS7dAw
+HwYDVR0jBBgwFoAUgHfNXfyKtHO0V9qoLOWCjkNiaI8wJAYDVR0eAQH/BBowGKAW
+MBSCEi5zdmMuY2x1c3Rlci5sb2NhbDAKBggqhkjOPQQDAgNHADBEAiBObVFFdXaL
+QpOXmN60dYUNnQRwjKreFduEkQgOdOlssgIgVAdJJQFgvlrvEOBhY8j5WyeKRwUN
+k/ALs6KpgaFBCGY=
+-----END CERTIFICATE-----
+-----BEGIN CERTIFICATE-----
+MIIB4jCCAYegAwIBAgIUFlxWFn/11yoGdmD+6gf+yQMToS0wCgYIKoZIzj0EAwIw
+ODELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEVMBMGA1UEAxMMTmVv
+biBSb290IENBMB4XDTI1MDQwMzA3MTUyMloXDTI2MDQwMzA3MTUyMlowRDELMAkG
+A1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UEAxMYTmVvbiBBV1Mg
+SW50ZXJtZWRpYXRlIENBMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEqonG/IQ6
+ZxtEtOUTkkoNopPieXDO5CBKUkNFTGeJEB7OxRlSpYJgsBpaYIaD6Vc4sVk3thIF
+p+pLw52idQOIN6NjMGEwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8w
+HQYDVR0OBBYEFIB3zV38irRztFfaqCzlgo5DYmiPMB8GA1UdIwQYMBaAFKh7M4/G
+FHvr/ORDQZt4bMLlJvHCMAoGCCqGSM49BAMCA0kAMEYCIQCbS4x7QPslONzBYbjC
+UQaQ0QLDW4CJHvQ4u4gbWFG87wIhAJMsHQHjP9qTT27Q65zQCR7O8QeLAfha1jrH
+Ag/LsxSr
+-----END CERTIFICATE-----
+";
+
+    /// The key corresponding to [`CERT`]
+    const KEY: &str = "
+-----BEGIN EC PRIVATE KEY-----
+MHcCAQEEIDnAnrqmIJjndCLWP1iIO5X3X63Aia48TGpGuMXwvm6IoAoGCCqGSM49
+AwEHoUQDQgAExUAnBtpvxFRwImbUrGIFZx4KE41C/ybcH3a6b89rdG1UWZgZkioZ
+aOd7j1fWOXihRgJkeeKff4E9njt8VO7wIA==
+-----END EC PRIVATE KEY-----
+";
+
+    /// An incorrect key.
+    const INCORRECT_KEY: &str = "
+-----BEGIN EC PRIVATE KEY-----
+MHcCAQEEIL6WqqBDyvM0HWz7Ir5M5+jhFWB7IzOClGn26OPrzHCXoAoGCCqGSM49
+AwEHoUQDQgAE7XVvdOy5lfwtNKb+gJEUtnG+DrnnXLY5LsHDeGQKV9PTRcEMeCrG
+YZzHyML4P6Sr4yi2ts+4B9i47uvAG8+XwQ==
+-----END EC PRIVATE KEY-----
+";
+
+    #[test]
+    fn certificate_verification() {
+        verify_key_cert(KEY, CERT).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "private key file does not match certificate")]
+    fn certificate_verification_fail() {
+        verify_key_cert(INCORRECT_KEY, CERT).unwrap();
+    }
+}
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -17,8 +17,10 @@ use std::time::Duration;
 use anyhow::{Context, Result, anyhow, bail};
 use clap::Parser;
 use compute_api::spec::ComputeMode;
+use control_plane::broker::StorageBroker;
 use control_plane::endpoint::ComputeControlPlane;
 use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_PORT, EndpointStorage};
+use control_plane::local_env;
 use control_plane::local_env::{
    EndpointStorageConf, InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf,
    NeonLocalInitPageserverConf, SafekeeperConf,
@@ -28,7 +30,6 @@ use control_plane::safekeeper::SafekeeperNode;
 use control_plane::storage_controller::{
    NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
 };
-use control_plane::{broker, local_env};
 use nix::fcntl::{FlockArg, flock};
 use pageserver_api::config::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
@@ -988,7 +989,8 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
        NeonLocalInitConf {
            control_plane_api: Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap()),
            broker: NeonBroker {
-                listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
+                listen_addr: Some(DEFAULT_BROKER_ADDR.parse().unwrap()),
+                listen_https_addr: None,
            },
            safekeepers: vec![SafekeeperConf {
                id: DEFAULT_SAFEKEEPER_ID,
@@ -1777,7 +1779,8 @@ async fn handle_endpoint_storage(
 async fn handle_storage_broker(subcmd: &StorageBrokerCmd, env: &local_env::LocalEnv) -> Result<()> {
    match subcmd {
        StorageBrokerCmd::Start(args) => {
-            if let Err(e) = broker::start_broker_process(env, &args.start_timeout).await {
+            let storage_broker = StorageBroker::from_env(env);
+            if let Err(e) = storage_broker.start(&args.start_timeout).await {
                eprintln!("broker start failed: {e}");
                exit(1);
            }
@@ -1785,7 +1788,8 @@ async fn handle_storage_broker(subcmd: &StorageBrokerCmd, env: &local_env::Local

        StorageBrokerCmd::Stop(_args) => {
            // FIXME: stop_mode unused
-            if let Err(e) = broker::stop_broker_process(env) {
+            let storage_broker = StorageBroker::from_env(env);
+            if let Err(e) = storage_broker.stop() {
                eprintln!("broker stop failed: {e}");
                exit(1);
            }
@@ -1835,8 +1839,11 @@ async fn handle_start_all_impl(
    #[allow(clippy::redundant_closure_call)]
    (|| {
        js.spawn(async move {
-            let retry_timeout = retry_timeout;
-            broker::start_broker_process(env, &retry_timeout).await
+            let storage_broker = StorageBroker::from_env(env);
+            storage_broker
+                .start(&retry_timeout)
+                .await
+                .map_err(|e| e.context("start storage_broker"))
        });

        js.spawn(async move {
@@ -1991,7 +1998,8 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
        }
    }

-    if let Err(e) = broker::stop_broker_process(env) {
+    let storage_broker = StorageBroker::from_env(env);
+    if let Err(e) = storage_broker.stop() {
        eprintln!("neon broker stop failed: {e:#}");
    }

--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -3,60 +3,86 @@
 //! In the local test environment, the storage broker stores its data directly in
 //!
 //! ```text
-//!   .neon
+//!   .neon/storage_broker
 //! ```
 use std::time::Duration;

 use anyhow::Context;
 use camino::Utf8PathBuf;

-use crate::{background_process, local_env};
+use crate::{background_process, local_env::LocalEnv};

-pub async fn start_broker_process(
-    env: &local_env::LocalEnv,
-    retry_timeout: &Duration,
-) -> anyhow::Result<()> {
-    let broker = &env.broker;
-    let listen_addr = &broker.listen_addr;
-
-    print!("Starting neon broker at {}", listen_addr);
-
-    let args = [format!("--listen-addr={listen_addr}")];
-
-    let client = reqwest::Client::new();
-    background_process::start_process(
-        "storage_broker",
-        &env.base_data_dir,
-        &env.storage_broker_bin(),
-        args,
-        [],
-        background_process::InitialPidFile::Create(storage_broker_pid_file_path(env)),
-        retry_timeout,
-        || async {
-            let url = broker.client_url();
-            let status_url = url.join("status").with_context(|| {
-                format!("Failed to append /status path to broker endpoint {url}")
-            })?;
-            let request = client
-                .get(status_url)
-                .build()
-                .with_context(|| format!("Failed to construct request to broker endpoint {url}"))?;
-            match client.execute(request).await {
-                Ok(resp) => Ok(resp.status().is_success()),
-                Err(_) => Ok(false),
-            }
-        },
-    )
-    .await
-    .context("Failed to spawn storage_broker subprocess")?;
-    Ok(())
+pub struct StorageBroker {
+    env: LocalEnv,
 }

-pub fn stop_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
-    background_process::stop_process(true, "storage_broker", &storage_broker_pid_file_path(env))
-}
+impl StorageBroker {
+    /// Create a new `StorageBroker` instance from the environment.
+    pub fn from_env(env: &LocalEnv) -> Self {
+        Self { env: env.clone() }
+    }

-fn storage_broker_pid_file_path(env: &local_env::LocalEnv) -> Utf8PathBuf {
-    Utf8PathBuf::from_path_buf(env.base_data_dir.join("storage_broker.pid"))
-        .expect("non-Unicode path")
+    pub fn initialize(&self) -> anyhow::Result<()> {
+        if self.env.generate_local_ssl_certs {
+            self.env.generate_ssl_cert(
+                &self.env.storage_broker_data_dir().join("server.crt"),
+                &self.env.storage_broker_data_dir().join("server.key"),
+            )?;
+        }
+        Ok(())
+    }
+
+    /// Start the storage broker process.
+    pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
+        let broker = &self.env.broker;
+
+        print!("Starting neon broker at {}", broker.client_url());
+
+        let mut args = Vec::new();
+
+        if let Some(addr) = &broker.listen_addr {
+            args.push(format!("--listen-addr={addr}"));
+        }
+        if let Some(addr) = &broker.listen_https_addr {
+            args.push(format!("--listen-https-addr={addr}"));
+        }
+
+        let client = self.env.create_http_client();
+        background_process::start_process(
+            "storage_broker",
+            &self.env.storage_broker_data_dir(),
+            &self.env.storage_broker_bin(),
+            args,
+            [],
+            background_process::InitialPidFile::Create(self.pid_file_path()),
+            retry_timeout,
+            || async {
+                let url = broker.client_url();
+                let status_url = url.join("status").with_context(|| {
+                    format!("Failed to append /status path to broker endpoint {url}")
+                })?;
+                let request = client.get(status_url).build().with_context(|| {
+                    format!("Failed to construct request to broker endpoint {url}")
+                })?;
+                match client.execute(request).await {
+                    Ok(resp) => Ok(resp.status().is_success()),
+                    Err(_) => Ok(false),
+                }
+            },
+        )
+        .await
+        .context("Failed to spawn storage_broker subprocess")?;
+        Ok(())
+    }
+
+    /// Stop the storage broker process.
+    pub fn stop(&self) -> anyhow::Result<()> {
+        background_process::stop_process(true, "storage_broker", &self.pid_file_path())
+    }
+
+    /// Get the path to the PID file for the storage broker.
+    fn pid_file_path(&self) -> Utf8PathBuf {
+        Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("storage_broker.pid"))
+            .expect("non-Unicode path")
+    }
 }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -4,7 +4,7 @@
 //! script which will use local paths.

 use std::collections::HashMap;
-use std::net::{IpAddr, Ipv4Addr, SocketAddr};
+use std::net::SocketAddr;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 use std::time::Duration;
@@ -14,11 +14,12 @@ use anyhow::{Context, bail};
 use clap::ValueEnum;
 use pem::Pem;
 use postgres_backend::AuthType;
-use reqwest::Url;
+use reqwest::{Certificate, Url};
 use serde::{Deserialize, Serialize};
 use utils::auth::encode_from_key_file;
 use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};

+use crate::broker::StorageBroker;
 use crate::endpoint_storage::{ENDPOINT_STORAGE_REMOTE_STORAGE_DIR, EndpointStorage};
 use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
 use crate::safekeeper::SafekeeperNode;
@@ -157,11 +158,16 @@ pub struct EndpointStorageConf {
 }

 /// Broker config for cluster internal communication.
-#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
+#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Default)]
 #[serde(default)]
 pub struct NeonBroker {
-    /// Broker listen address for storage nodes coordination, e.g. '127.0.0.1:50051'.
-    pub listen_addr: SocketAddr,
+    /// Broker listen HTTP address for storage nodes coordination, e.g. '127.0.0.1:50051'.
+    /// At least one of listen_addr or listen_https_addr must be set.
+    pub listen_addr: Option<SocketAddr>,
+    /// Broker listen HTTPS address for storage nodes coordination, e.g. '127.0.0.1:50051'.
+    /// At least one of listen_addr or listen_https_addr must be set.
+    /// listen_https_addr is preferred over listen_addr in neon_local.
+    pub listen_https_addr: Option<SocketAddr>,
 }

 /// A part of storage controller's config the neon_local knows about.
@@ -235,18 +241,19 @@ impl Default for NeonStorageControllerConf {
    }
 }

-// Dummy Default impl to satisfy Deserialize derive.
-impl Default for NeonBroker {
-    fn default() -> Self {
-        NeonBroker {
-            listen_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 0),
-        }
-    }
-}
-
 impl NeonBroker {
    pub fn client_url(&self) -> Url {
-        Url::parse(&format!("http://{}", self.listen_addr)).expect("failed to construct url")
+        let url = if let Some(addr) = self.listen_https_addr {
+            format!("https://{}", addr)
+        } else {
+            format!(
+                "http://{}",
+                self.listen_addr
+                    .expect("at least one address should be set")
+            )
+        };
+
+        Url::parse(&url).expect("failed to construct url")
    }
 }

@@ -441,6 +448,10 @@ impl LocalEnv {
        self.base_data_dir.join("endpoints")
    }

+    pub fn storage_broker_data_dir(&self) -> PathBuf {
+        self.base_data_dir.join("storage_broker")
+    }
+
    pub fn pageserver_data_dir(&self, pageserver_id: NodeId) -> PathBuf {
        self.base_data_dir
            .join(format!("pageserver_{pageserver_id}"))
@@ -503,6 +514,23 @@ impl LocalEnv {
        )
    }

+    /// Creates HTTP client with local SSL CA certificates.
+    pub fn create_http_client(&self) -> reqwest::Client {
+        let ssl_ca_certs = self.ssl_ca_cert_path().map(|ssl_ca_file| {
+            let buf = std::fs::read(ssl_ca_file).expect("SSL CA file should exist");
+            Certificate::from_pem_bundle(&buf).expect("SSL CA file should be valid")
+        });
+
+        let mut http_client = reqwest::Client::builder();
+        for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() {
+            http_client = http_client.add_root_certificate(ssl_ca_cert);
+        }
+
+        http_client
+            .build()
+            .expect("HTTP client should construct with no error")
+    }
+
    /// Inspect the base data directory and extract the instance id and instance directory path
    /// for all storage controller instances
    pub async fn storage_controller_instances(&self) -> std::io::Result<Vec<(u8, PathBuf)>> {
@@ -911,6 +939,12 @@ impl LocalEnv {
        // create endpoints dir
        fs::create_dir_all(env.endpoints_path())?;

+        // create storage broker dir
+        fs::create_dir_all(env.storage_broker_data_dir())?;
+        StorageBroker::from_env(&env)
+            .initialize()
+            .context("storage broker init failed")?;
+
        // create safekeeper dirs
        for safekeeper in &env.safekeepers {
            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -21,7 +21,6 @@ use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api;
 use postgres_backend::AuthType;
 use postgres_connection::{PgConnectionConfig, parse_host_port};
-use reqwest::Certificate;
 use utils::auth::{Claims, Scope};
 use utils::id::{NodeId, TenantId, TimelineId};
 use utils::lsn::Lsn;
@@ -51,19 +50,6 @@ impl PageServerNode {
            parse_host_port(&conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
        let port = port.unwrap_or(5432);

-        let ssl_ca_certs = env.ssl_ca_cert_path().map(|ssl_ca_file| {
-            let buf = std::fs::read(ssl_ca_file).expect("SSL root CA file should exist");
-            Certificate::from_pem_bundle(&buf).expect("SSL CA file should be valid")
-        });
-
-        let mut http_client = reqwest::Client::builder();
-        for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() {
-            http_client = http_client.add_root_certificate(ssl_ca_cert);
-        }
-        let http_client = http_client
-            .build()
-            .expect("Client constructs with no errors");
-
        let endpoint = if env.storage_controller.use_https_pageserver_api {
            format!(
                "https://{}",
@@ -80,7 +66,7 @@ impl PageServerNode {
            conf: conf.clone(),
            env: env.clone(),
            http_client: mgmt_api::Client::new(
-                http_client,
+                env.create_http_client(),
                endpoint,
                {
                    match conf.http_auth_type {
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -87,7 +87,7 @@ impl SafekeeperNode {
            conf: conf.clone(),
            pg_connection_config: Self::safekeeper_connection_config(&listen_addr, conf.pg_port),
            env: env.clone(),
-            http_client: reqwest::Client::new(),
+            http_client: env.create_http_client(),
            http_base_url: format!("http://{}:{}/v1", listen_addr, conf.http_port),
            listen_addr,
        }
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -20,7 +20,7 @@ use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api::ResponseErrorMessageExt;
 use pem::Pem;
 use postgres_backend::AuthType;
-use reqwest::{Certificate, Method};
+use reqwest::Method;
 use serde::de::DeserializeOwned;
 use serde::{Deserialize, Serialize};
 use tokio::process::Command;
@@ -153,24 +153,11 @@ impl StorageController {
            }
        };

-        let ssl_ca_certs = env.ssl_ca_cert_path().map(|ssl_ca_file| {
-            let buf = std::fs::read(ssl_ca_file).expect("SSL CA file should exist");
-            Certificate::from_pem_bundle(&buf).expect("SSL CA file should be valid")
-        });
-
-        let mut http_client = reqwest::Client::builder();
-        for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() {
-            http_client = http_client.add_root_certificate(ssl_ca_cert);
-        }
-        let http_client = http_client
-            .build()
-            .expect("HTTP client should construct with no error");
-
        Self {
            env: env.clone(),
            private_key,
            public_key,
-            client: http_client,
+            client: env.create_http_client(),
            config: env.storage_controller.clone(),
            listen_port: OnceLock::default(),
        }
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -9,21 +9,20 @@
 # to verify custom image builds (e.g pre-published ones).
 #
 # A test script for postgres extensions
-# Currently supports only v16
+# Currently supports only v16+
 #
 set -eux -o pipefail

-COMPOSE_FILE='docker-compose.yml'
-cd $(dirname $0)
-COMPUTE_CONTAINER_NAME=docker-compose-compute-1
-TEST_CONTAINER_NAME=docker-compose-neon-test-extensions-1
+export COMPOSE_FILE='docker-compose.yml'
+export COMPOSE_PROFILES=test-extensions
+cd "$(dirname "${0}")"
 PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"

-cleanup() {
+function cleanup() {
    echo "show container information"
    docker ps
    echo "stop containers..."
-    docker compose --profile test-extensions -f $COMPOSE_FILE down
+    docker compose down
 }

 for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
@@ -31,55 +30,55 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
    echo "clean up containers if exists"
    cleanup
    PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
-    PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --quiet-pull --build -d
+    PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull --build -d

    echo "wait until the compute is ready. timeout after 60s. "
    cnt=0
    while sleep 3; do
        # check timeout
-        cnt=`expr $cnt + 3`
-        if [ $cnt -gt 60 ]; then
+        (( cnt += 3 ))
+        if [[ ${cnt} -gt 60 ]]; then
            echo "timeout before the compute is ready."
            exit 1
        fi
-        if docker compose --profile test-extensions -f $COMPOSE_FILE logs "compute_is_ready" | grep -q "accepting connections"; then
+        if docker compose logs "compute_is_ready" | grep -q "accepting connections"; then
            echo "OK. The compute is ready to connect."
            echo "execute simple queries."
-            docker exec $COMPUTE_CONTAINER_NAME /bin/bash -c "psql $PSQL_OPTION"
+            docker compose exec compute /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
            break
        fi
    done

-    if [ $pg_version -ge 16 ]; then
+    if [[ ${pg_version} -ge 16 ]]; then
        # This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
        # It cannot be moved to Dockerfile now because the database directory is created after the start of the container
        echo Adding dummy config
-        docker exec $COMPUTE_CONTAINER_NAME touch /var/db/postgres/compute/compute_ctl_temp_override.conf
+        docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf
        # The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
        TMPDIR=$(mktemp -d)
-        docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data
-        docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
-        rm -rf $TMPDIR
+        docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data"
+        docker compose cp "${TMPDIR}/data" compute:/ext-src/pg_hint_plan-src/
+        rm -rf "${TMPDIR}"
        # The following block does the same for the contrib/file_fdw test
        TMPDIR=$(mktemp -d)
-        docker cp $TEST_CONTAINER_NAME:/postgres/contrib/file_fdw/data $TMPDIR/data
-        docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/postgres/contrib/file_fdw/data
-        rm -rf $TMPDIR
+        docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${TMPDIR}/data"
+        docker compose cp "${TMPDIR}/data" compute:/postgres/contrib/file_fdw/data
+        rm -rf "${TMPDIR}"
        # Apply patches
-        cat ../compute/patches/contrib_pg${pg_version}.patch | docker exec -i $TEST_CONTAINER_NAME bash -c "(cd /postgres && patch -p1)"
+        docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
        # We are running tests now
        rm -f testout.txt testout_contrib.txt
-        docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
-        $TEST_CONTAINER_NAME /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
-        docker exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
-        $TEST_CONTAINER_NAME /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
-        if [ $EXT_SUCCESS -eq 0 ] || [ $CONTRIB_SUCCESS -eq 0 ]; then
+        docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
+        neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
+        docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
+        neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
+        if [[ ${EXT_SUCCESS} -eq 0 || ${CONTRIB_SUCCESS} -eq 0 ]]; then
            CONTRIB_FAILED=
            FAILED=
-            [ $EXT_SUCCESS -eq 0 ] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
-            [ $CONTRIB_SUCCESS -eq 0 ] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
-            for d in $FAILED $CONTRIB_FAILED; do
-                docker exec $TEST_CONTAINER_NAME bash -c 'for file in $(find '"$d"' -name regression.diffs -o -name regression.out); do cat $file; done' || [ $? -eq 1 ]
+            [[ ${EXT_SUCCESS} -eq 0 ]] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
+            [[ ${CONTRIB_SUCCESS} -eq 0 ]] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
+            for d in ${FAILED} ${CONTRIB_FAILED}; do
+                docker compose exec neon-test-extensions bash -c 'for file in $(find '"${d}"' -name regression.diffs -o -name regression.out); do cat ${file}; done' || [[ ${?} -eq 1 ]]
            done
        exit 1
        fi
--- a/docker-compose/ext-src/README.md
+++ b/docker-compose/ext-src/README.md
@@ -0,0 +1,99 @@
+# PostgreSQL Extensions for Testing
+
+This directory contains PostgreSQL extensions used primarily for:
+1. Testing extension upgrades between different Compute versions
+2. Running regression tests with regular users (mostly for cloud instances)
+
+## Directory Structure
+
+Each extension directory follows a standard structure:
+
+- `extension-name-src/` - Directory containing test files for the extension
+  - `test-upgrade.sh` - Script for testing upgrade scenarios
+  - `regular-test.sh` - Script for testing with regular users
+  - Additional test files depending on the extension
+
+## Available Extensions
+
+This directory includes the following extensions:
+
+- `hll-src` - HyperLogLog, a fixed-size data structure for approximating cardinality
+- `hypopg-src` - Extension to create hypothetical indexes
+- `ip4r-src` - IPv4/v6 and subnet data types
+- `pg_cron-src` - Run periodic jobs in PostgreSQL
+- `pg_graphql-src` - GraphQL support for PostgreSQL
+- `pg_hint_plan-src` - Execution plan hints
+- `pg_ivm-src` - Incremental view maintenance
+- `pg_jsonschema-src` - JSON Schema validation
+- `pg_repack-src` - Reorganize tables with minimal locks
+- `pg_roaringbitmap-src` - Roaring bitmap implementation
+- `pg_semver-src` - Semantic version data type
+- `pg_session_jwt-src` - JWT authentication for PostgreSQL
+- `pg_tiktoken-src` - OpenAI Tiktoken tokenizer
+- `pg_uuidv7-src` - UUIDv7 implementation for PostgreSQL
+- `pgjwt-src` - JWT tokens for PostgreSQL
+- `pgrag-src` - Retrieval Augmented Generation for PostgreSQL
+- `pgtap-src` - Unit testing framework for PostgreSQL
+- `pgvector-src` - Vector similarity search
+- `pgx_ulid-src` - ULID data type
+- `plv8-src` - JavaScript language for PostgreSQL stored procedures
+- `postgresql-unit-src` - SI units for PostgreSQL
+- `prefix-src` - Prefix matching for strings
+- `rag_bge_small_en_v15-src` - BGE embedding model for RAG
+- `rag_jina_reranker_v1_tiny_en-src` - Jina reranker model for RAG
+- `rum-src` - RUM access method for text search
+
+## Usage
+
+### Extension Upgrade Testing
+
+The extensions in this directory are used by the `test-upgrade.sh` script to test upgrading extensions between different versions of Neon Compute nodes. The script:
+
+1. Creates a database with extensions installed on an old Compute version
+2. Creates timelines for each extension
+3. Switches to a new Compute version and tests the upgrade process
+4. Verifies extension functionality after upgrade
+
+### Regular User Testing
+
+For testing with regular users (particularly for cloud instances), each extension directory typically contains a `regular-test.sh` script that:
+
+1. Drops the database if it exists
+2. Creates a fresh test database
+3. Installs the extension
+4. Runs regression tests
+
+A note about pg_regress: Since pg_regress attempts to set `lc_messages` for the database by default, which is forbidden for regular users, we create databases manually and use the `--use-existing` option to bypass this limitation.
+
+### CI Workflows
+
+Two main workflows use these extensions:
+
+1. **Cloud Extensions Test** - Tests extensions on Neon cloud projects
+2. **Force Test Upgrading of Extension** - Tests upgrading extensions between different Compute versions
+
+These workflows are integrated into the build-and-test pipeline through shell scripts:
+
+- `docker_compose_test.sh` - Tests extensions in a Docker Compose environment
+       
+- `test_extensions_upgrade.sh` - Tests extension upgrades between different Compute versions
+
+## Adding New Extensions
+
+To add a new extension for testing:
+
+1. Create a directory named `extension-name-src` in this directory
+2. Add at minimum:
+   - `regular-test.sh` for testing with regular users
+   - If `regular-test.sh` doesn't exist, the system will look for `neon-test.sh`
+   - If neither exists, it will try to run `make installcheck`
+   - `test-upgrade.sh` is only needed if you want to test upgrade scenarios
+3. Update the list of extensions in the `test_extensions_upgrade.sh` script if needed for upgrade testing
+
+### Patching Extension Sources
+
+If you need to patch the extension sources:
+
+1. Place the patch file in the extension's directory
+2. Apply the patch in the appropriate script (`test-upgrade.sh`, `neon-test.sh`, `regular-test.sh`, or `Makefile`)
+3. The patch will be applied during the testing process
--- a/docker-compose/ext-src/hll-src/regular-test.sh
+++ b/docker-compose/ext-src/hll-src/regular-test.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+dropdb --if-exists contrib_regression
+createdb contrib_regression
+${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression setup add_agg agg_oob auto_sparse card_op cast_shape copy_binary cumulative_add_cardinality_correction cumulative_add_comprehensive_promotion cumulative_add_sparse_edge cumulative_add_sparse_random cumulative_add_sparse_step cumulative_union_comprehensive cumulative_union_explicit_explicit cumulative_union_explicit_promotion cumulative_union_probabilistic_probabilistic cumulative_union_sparse_full_representation cumulative_union_sparse_promotion cumulative_union_sparse_sparse disable_hashagg equal explicit_thresh hash hash_any meta_func murmur_bigint murmur_bytea nosparse notequal scalar_oob storedproc transaction typmod typmod_insert union_op
--- a/docker-compose/ext-src/hypopg-src/regular-test.sh
+++ b/docker-compose/ext-src/hypopg-src/regular-test.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+dropdb --if-exists contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --inputdir=test --dbname=contrib_regression hypopg hypo_brin hypo_index_part hypo_include hypo_hash hypo_hide_index
--- a/docker-compose/ext-src/ip4r-src/regular-test.sh
+++ b/docker-compose/ext-src/ip4r-src/regular-test.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression ip4r ip4r-softerr ip4r-v11
--- a/docker-compose/ext-src/pg_cron-src/regular-test.sh
+++ b/docker-compose/ext-src/pg_cron-src/regular-test.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression pg_cron-test
--- a/docker-compose/ext-src/pg_graphql-src/regular-test.sh
+++ b/docker-compose/ext-src/pg_graphql-src/regular-test.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+set -ex
+cd "$(dirname "${0}")"
+PGXS="$(dirname "$(pg_config --pgxs)" )"
+REGRESS="${PGXS}/../test/regress/pg_regress"
+TESTDIR="test"
+TESTS=$(ls "${TESTDIR}/sql" | sort )
+TESTS=${TESTS//\.sql/}
+TESTS=${TESTS/empty_mutations/}
+TESTS=${TESTS/function_return_row_is_selectable/}
+TESTS=${TESTS/issue_300/}
+TESTS=${TESTS/permissions_connection_column/}
+TESTS=${TESTS/permissions_functions/}
+TESTS=${TESTS/permissions_node_column/}
+TESTS=${TESTS/permissions_table_level/}
+TESTS=${TESTS/permissions_types/}
+TESTS=${TESTS/row_level_security/}
+TESTS=${TESTS/sqli_connection/}
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+psql -v ON_ERROR_STOP=1 -f test/fixtures.sql -d contrib_regression
+${REGRESS} --use-existing --dbname=contrib_regression --inputdir=${TESTDIR} ${TESTS}
+
--- a/docker-compose/ext-src/pg_hint_plan-src/regular-test.sh
+++ b/docker-compose/ext-src/pg_hint_plan-src/regular-test.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --use-existing  --inputdir=./ --bindir='/usr/local/pgsql/bin'    --encoding=UTF8 --dbname=contrib_regression init base_plan pg_hint_plan ut-init ut-A ut-S ut-J ut-L ut-G ut-R ut-fdw ut-W ut-T ut-fini hints_anywhere plpgsql oldextversions
--- a/docker-compose/ext-src/pg_ivm-src/regular-test.sh
+++ b/docker-compose/ext-src/pg_ivm-src/regular-test.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+set -ex
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+cd "$(dirname ${0})"
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+patch -p1 <regular.patch
+${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_ivm create_immv refresh_immv
+patch -R -p1 <regular.patch
--- a/docker-compose/ext-src/pg_ivm-src/regular.patch
+++ b/docker-compose/ext-src/pg_ivm-src/regular.patch
@@ -0,0 +1,309 @@
+diff --git a/expected/pg_ivm.out b/expected/pg_ivm.out
+index e8798ee..4081680 100644
+--- a/expected/pg_ivm.out
+++ b/expected/pg_ivm.out
+@@ -1363,61 +1363,6 @@ SELECT * FROM mv ORDER BY i;
+    |   2 |   4 |                 2 |                 2 |             2
+ (1 row)
+ 
+-ROLLBACK;
+--- IMMV containing user defined type
+-BEGIN;
+-CREATE TYPE mytype;
+-CREATE FUNCTION mytype_in(cstring)
+- RETURNS mytype AS 'int4in'
+- LANGUAGE INTERNAL STRICT IMMUTABLE;
+-NOTICE:  return type mytype is only a shell
+-CREATE FUNCTION mytype_out(mytype)
+- RETURNS cstring AS 'int4out'
+- LANGUAGE INTERNAL STRICT IMMUTABLE;
+-NOTICE:  argument type mytype is only a shell
+-CREATE TYPE mytype (
+- LIKE = int4,
+- INPUT = mytype_in,
+- OUTPUT = mytype_out
+-);
+-CREATE FUNCTION mytype_eq(mytype, mytype)
+- RETURNS bool AS 'int4eq'
+- LANGUAGE INTERNAL STRICT IMMUTABLE;
+-CREATE FUNCTION mytype_lt(mytype, mytype)
+- RETURNS bool AS 'int4lt'
+- LANGUAGE INTERNAL STRICT IMMUTABLE;
+-CREATE FUNCTION mytype_cmp(mytype, mytype)
+- RETURNS integer AS 'btint4cmp'
+- LANGUAGE INTERNAL STRICT IMMUTABLE;
+-CREATE OPERATOR = (
+- leftarg = mytype, rightarg = mytype,
+- procedure = mytype_eq);
+-CREATE OPERATOR < (
+- leftarg = mytype, rightarg = mytype,
+- procedure = mytype_lt);
+-CREATE OPERATOR CLASS mytype_ops
+- DEFAULT FOR TYPE mytype USING btree AS
+- OPERATOR        1       <,
+- OPERATOR        3       = ,
+- FUNCTION		1		mytype_cmp(mytype,mytype);
+-CREATE TABLE t_mytype (x mytype);
+-SELECT create_immv('mv_mytype',
+- 'SELECT * FROM t_mytype');
+-NOTICE:  could not create an index on immv "mv_mytype" automatically
+-DETAIL:  This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.
+-HINT:  Create an index on the immv for efficient incremental maintenance.
+- create_immv 
+--------------
+-           0
+-(1 row)
+-
+-INSERT INTO t_mytype VALUES ('1'::mytype);
+-SELECT * FROM mv_mytype;
+- x 
+----
+- 1
+-(1 row)
+-
+ ROLLBACK;
+ -- outer join is not supported
+ SELECT create_immv('mv(a,b)',
+@@ -1510,112 +1455,6 @@ SELECT create_immv('mv_ivm_only_values1', 'values(1)');
+ ERROR:  VALUES is not supported on incrementally maintainable materialized view
+ SELECT create_immv('mv_ivm_only_values2',  'SELECT * FROM (values(1)) AS tmp');
+ ERROR:  VALUES is not supported on incrementally maintainable materialized view
+--- views containing base tables with Row Level Security
+-DROP USER IF EXISTS ivm_admin;
+-NOTICE:  role "ivm_admin" does not exist, skipping
+-DROP USER IF EXISTS ivm_user;
+-NOTICE:  role "ivm_user" does not exist, skipping
+-CREATE USER ivm_admin;
+-CREATE USER ivm_user;
+---- create a table with RLS
+-SET SESSION AUTHORIZATION ivm_admin;
+-CREATE TABLE rls_tbl(id int, data text, owner name);
+-INSERT INTO rls_tbl VALUES
+-  (1,'foo','ivm_user'),
+-  (2,'bar','postgres');
+-CREATE TABLE num_tbl(id int, num text);
+-INSERT INTO num_tbl VALUES
+-  (1,'one'),
+-  (2,'two'),
+-  (3,'three'),
+-  (4,'four'),
+-  (5,'five'),
+-  (6,'six');
+---- Users can access only their own rows
+-CREATE POLICY rls_tbl_policy ON rls_tbl FOR SELECT TO PUBLIC USING(owner = current_user);
+-ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY;
+-GRANT ALL on rls_tbl TO PUBLIC;
+-GRANT ALL on num_tbl TO PUBLIC;
+---- create a view owned by ivm_user
+-SET SESSION AUTHORIZATION ivm_user;
+-SELECT create_immv('ivm_rls', 'SELECT * FROM rls_tbl');
+-NOTICE:  could not create an index on immv "ivm_rls" automatically
+-DETAIL:  This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.
+-HINT:  Create an index on the immv for efficient incremental maintenance.
+- create_immv 
+--------------
+-           1
+-(1 row)
+-
+-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
+- id | data |  owner   
+-----+------+----------
+-  1 | foo  | ivm_user
+-(1 row)
+-
+-RESET SESSION AUTHORIZATION;
+---- inserts rows owned by different users
+-INSERT INTO rls_tbl VALUES
+-  (3,'baz','ivm_user'),
+-  (4,'qux','postgres');
+-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
+- id | data |  owner   
+-----+------+----------
+-  1 | foo  | ivm_user
+-  3 | baz  | ivm_user
+-(2 rows)
+-
+---- combination of diffent kinds of commands
+-WITH
+- i AS (INSERT INTO rls_tbl VALUES(5,'quux','postgres'), (6,'corge','ivm_user')),
+- u AS (UPDATE rls_tbl SET owner = 'postgres' WHERE id = 1),
+- u2 AS (UPDATE rls_tbl SET owner = 'ivm_user' WHERE id = 2)
+-SELECT;
+---
+-(1 row)
+-
+-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
+- id | data  |  owner   
+-----+-------+----------
+-  2 | bar   | ivm_user
+-  3 | baz   | ivm_user
+-  6 | corge | ivm_user
+-(3 rows)
+-
+----
+-SET SESSION AUTHORIZATION ivm_user;
+-SELECT create_immv('ivm_rls2', 'SELECT * FROM rls_tbl JOIN num_tbl USING(id)');
+-NOTICE:  could not create an index on immv "ivm_rls2" automatically
+-DETAIL:  This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.
+-HINT:  Create an index on the immv for efficient incremental maintenance.
+- create_immv 
+--------------
+-           3
+-(1 row)
+-
+-RESET SESSION AUTHORIZATION;
+-WITH
+- x AS (UPDATE rls_tbl SET data = data || '_2' where id in (3,4)),
+- y AS (UPDATE num_tbl SET num = num || '_2' where id in (3,4))
+-SELECT;
+---
+-(1 row)
+-
+-SELECT * FROM ivm_rls2 ORDER BY 1,2,3;
+- id | data  |  owner   |   num   
+-----+-------+----------+---------
+-  2 | bar   | ivm_user | two
+-  3 | baz_2 | ivm_user | three_2
+-  6 | corge | ivm_user | six
+-(3 rows)
+-
+-DROP TABLE rls_tbl CASCADE;
+-NOTICE:  drop cascades to 2 other objects
+-DETAIL:  drop cascades to table ivm_rls
+-drop cascades to table ivm_rls2
+-DROP TABLE num_tbl CASCADE;
+-DROP USER ivm_user;
+-DROP USER ivm_admin;
+ -- automatic index creation
+ BEGIN;
+ CREATE TABLE base_a (i int primary key, j int);
+diff --git a/sql/pg_ivm.sql b/sql/pg_ivm.sql
+index d3c1a01..203213d 100644
+--- a/sql/pg_ivm.sql
+++ b/sql/pg_ivm.sql
+@@ -454,53 +454,6 @@ DELETE FROM base_t WHERE v = 5;
+ SELECT * FROM mv ORDER BY i;
+ ROLLBACK;
+ 
+--- IMMV containing user defined type
+-BEGIN;
+-
+-CREATE TYPE mytype;
+-CREATE FUNCTION mytype_in(cstring)
+- RETURNS mytype AS 'int4in'
+- LANGUAGE INTERNAL STRICT IMMUTABLE;
+-CREATE FUNCTION mytype_out(mytype)
+- RETURNS cstring AS 'int4out'
+- LANGUAGE INTERNAL STRICT IMMUTABLE;
+-CREATE TYPE mytype (
+- LIKE = int4,
+- INPUT = mytype_in,
+- OUTPUT = mytype_out
+-);
+-
+-CREATE FUNCTION mytype_eq(mytype, mytype)
+- RETURNS bool AS 'int4eq'
+- LANGUAGE INTERNAL STRICT IMMUTABLE;
+-CREATE FUNCTION mytype_lt(mytype, mytype)
+- RETURNS bool AS 'int4lt'
+- LANGUAGE INTERNAL STRICT IMMUTABLE;
+-CREATE FUNCTION mytype_cmp(mytype, mytype)
+- RETURNS integer AS 'btint4cmp'
+- LANGUAGE INTERNAL STRICT IMMUTABLE;
+-
+-CREATE OPERATOR = (
+- leftarg = mytype, rightarg = mytype,
+- procedure = mytype_eq);
+-CREATE OPERATOR < (
+- leftarg = mytype, rightarg = mytype,
+- procedure = mytype_lt);
+-
+-CREATE OPERATOR CLASS mytype_ops
+- DEFAULT FOR TYPE mytype USING btree AS
+- OPERATOR        1       <,
+- OPERATOR        3       = ,
+- FUNCTION		1		mytype_cmp(mytype,mytype);
+-
+-CREATE TABLE t_mytype (x mytype);
+-SELECT create_immv('mv_mytype',
+- 'SELECT * FROM t_mytype');
+-INSERT INTO t_mytype VALUES ('1'::mytype);
+-SELECT * FROM mv_mytype;
+-
+-ROLLBACK;
+-
+ -- outer join is not supported
+ SELECT create_immv('mv(a,b)',
+     'SELECT a.i, b.i FROM mv_base_a a LEFT JOIN mv_base_b b ON a.i=b.i');
+@@ -579,71 +532,6 @@ SELECT create_immv('mv_ivm31', 'SELECT sum(i)/sum(j) FROM mv_base_a');
+ SELECT create_immv('mv_ivm_only_values1', 'values(1)');
+ SELECT create_immv('mv_ivm_only_values2',  'SELECT * FROM (values(1)) AS tmp');
+ 
+-
+--- views containing base tables with Row Level Security
+-DROP USER IF EXISTS ivm_admin;
+-DROP USER IF EXISTS ivm_user;
+-CREATE USER ivm_admin;
+-CREATE USER ivm_user;
+-
+---- create a table with RLS
+-SET SESSION AUTHORIZATION ivm_admin;
+-CREATE TABLE rls_tbl(id int, data text, owner name);
+-INSERT INTO rls_tbl VALUES
+-  (1,'foo','ivm_user'),
+-  (2,'bar','postgres');
+-CREATE TABLE num_tbl(id int, num text);
+-INSERT INTO num_tbl VALUES
+-  (1,'one'),
+-  (2,'two'),
+-  (3,'three'),
+-  (4,'four'),
+-  (5,'five'),
+-  (6,'six');
+-
+---- Users can access only their own rows
+-CREATE POLICY rls_tbl_policy ON rls_tbl FOR SELECT TO PUBLIC USING(owner = current_user);
+-ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY;
+-GRANT ALL on rls_tbl TO PUBLIC;
+-GRANT ALL on num_tbl TO PUBLIC;
+-
+---- create a view owned by ivm_user
+-SET SESSION AUTHORIZATION ivm_user;
+-SELECT create_immv('ivm_rls', 'SELECT * FROM rls_tbl');
+-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
+-RESET SESSION AUTHORIZATION;
+-
+---- inserts rows owned by different users
+-INSERT INTO rls_tbl VALUES
+-  (3,'baz','ivm_user'),
+-  (4,'qux','postgres');
+-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
+-
+---- combination of diffent kinds of commands
+-WITH
+- i AS (INSERT INTO rls_tbl VALUES(5,'quux','postgres'), (6,'corge','ivm_user')),
+- u AS (UPDATE rls_tbl SET owner = 'postgres' WHERE id = 1),
+- u2 AS (UPDATE rls_tbl SET owner = 'ivm_user' WHERE id = 2)
+-SELECT;
+-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
+-
+----
+-SET SESSION AUTHORIZATION ivm_user;
+-SELECT create_immv('ivm_rls2', 'SELECT * FROM rls_tbl JOIN num_tbl USING(id)');
+-RESET SESSION AUTHORIZATION;
+-
+-WITH
+- x AS (UPDATE rls_tbl SET data = data || '_2' where id in (3,4)),
+- y AS (UPDATE num_tbl SET num = num || '_2' where id in (3,4))
+-SELECT;
+-SELECT * FROM ivm_rls2 ORDER BY 1,2,3;
+-
+-DROP TABLE rls_tbl CASCADE;
+-DROP TABLE num_tbl CASCADE;
+-
+-DROP USER ivm_user;
+-DROP USER ivm_admin;
+-
+ -- automatic index creation
+ BEGIN;
+ CREATE TABLE base_a (i int primary key, j int);
--- a/docker-compose/ext-src/pg_jsonschema-src/Makefile
+++ b/docker-compose/ext-src/pg_jsonschema-src/Makefile
@@ -1,8 +1,13 @@
 EXTENSION = pg_jsonschema
 DATA = pg_jsonschema--1.0.sql
 REGRESS = jsonschema_valid_api  jsonschema_edge_cases
-REGRESS_OPTS = --load-extension=pg_jsonschema

 PG_CONFIG ?= pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
+PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
+.PHONY installcheck:
+installcheck:
+	dropdb --if-exists contrib_regression
+	createdb contrib_regression
+	psql -d contrib_regression -c "CREATE EXTENSION $(EXTENSION)"
+	$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)
--- a/docker-compose/ext-src/pg_roaringbitmap-src/regular-test.sh
+++ b/docker-compose/ext-src/pg_roaringbitmap-src/regular-test.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression roaringbitmap
--- a/docker-compose/ext-src/pg_semver-src/regular-test.sh
+++ b/docker-compose/ext-src/pg_semver-src/regular-test.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -ex
+# For v16 it's required to create a type which is impossible without superuser access
+# do not run this test so far
+if [[ "${PG_VERSION}" = v16 ]]; then
+  exit 0
+fi
+cd "$(dirname ${0})"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --inputdir=test --dbname=contrib_regression base corpus
--- a/docker-compose/ext-src/pg_session_jwt-src/Makefile
+++ b/docker-compose/ext-src/pg_session_jwt-src/Makefile
@@ -6,4 +6,10 @@ export PGOPTIONS = -c pg_session_jwt.jwk={"crv":"Ed25519","kty":"OKP","x":"R_Abz

 PG_CONFIG ?= pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
+PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
+.PHONY installcheck:
+installcheck:
+	dropdb --if-exists contrib_regression
+	createdb contrib_regression
+	psql -d contrib_regression -c "CREATE EXTENSION $(EXTENSION)"
+	$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)
--- a/docker-compose/ext-src/pg_tiktoken-src/Makefile
+++ b/docker-compose/ext-src/pg_tiktoken-src/Makefile
@@ -5,4 +5,6 @@ REGRESS = pg_tiktoken
 installcheck: regression-test

 regression-test:
-	$(PG_REGRESS) --inputdir=. --outputdir=. --dbname=contrib_regression $(REGRESS)
+	dropdb --if-exists contrib_regression
+	createdb contrib_regression
+	$(PG_REGRESS) --inputdir=. --outputdir=. --use-existing --dbname=contrib_regression $(REGRESS)
--- a/docker-compose/ext-src/pg_uuidv7-src/regular-test.sh
+++ b/docker-compose/ext-src/pg_uuidv7-src/regular-test.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -ex
+cd "$(dirname "${0}")"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --inputdir=test --dbname=contrib_regression 001_setup 002_uuid_generate_v7 003_uuid_v7_to_timestamptz 004_uuid_timestamptz_to_v7 005_uuid_v7_to_timestamp 006_uuid_timestamp_to_v7
--- a/docker-compose/ext-src/pgjwt-src/neon-test.sh
+++ b/docker-compose/ext-src/pgjwt-src/neon-test.sh
@@ -1,4 +1,6 @@
 #!/bin/bash
 set -ex
 cd "$(dirname "${0}")"
-pg_prove test.sql
+dropdb --if-exists contrib_regression
+createdb contrib_regression
+pg_prove -d contrib_regression test.sql
--- a/docker-compose/ext-src/pgrag-src/regular-test.sh
+++ b/docker-compose/ext-src/pgrag-src/regular-test.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+set -ex
+cd "$(dirname "${0}")"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag"
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin'    --use-existing --load-extension=vector --load-extension=rag --dbname=contrib_regression basic_functions text_processing api_keys chunking_functions document_processing embedding_api_functions voyageai_functions
--- a/docker-compose/ext-src/pgtap-src/regular-test.sh
+++ b/docker-compose/ext-src/pgtap-src/regular-test.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+make installcheck || true
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+sed -i '/hastap/d' test/build/run.sch
+sed -Ei 's/\b(aretap|enumtap|ownership|privs|usergroup)\b//g' test/build/run.sch
+${PG_REGRESS} --use-existing --dbname=contrib_regression --inputdir=./ --bindir='/usr/local/pgsql/bin'    --inputdir=test --max-connections=879 --schedule test/schedule/main.sch   --schedule test/build/run.sch
--- a/docker-compose/ext-src/pgvector-src/regular-test.sh
+++ b/docker-compose/ext-src/pgvector-src/regular-test.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+psql -d contrib_regression -c "CREATE EXTENSION vector"
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --use-existing --dbname=contrib_regression bit btree cast copy halfvec hnsw_bit hnsw_halfvec hnsw_sparsevec hnsw_vector ivfflat_bit ivfflat_halfvec ivfflat_vector sparsevec vector_type
--- a/docker-compose/ext-src/pgx_ulid-src/Makefile
+++ b/docker-compose/ext-src/pgx_ulid-src/Makefile
@@ -4,13 +4,21 @@ PGFILEDESC = "pgx_ulid - ULID type for PostgreSQL"

 PG_CONFIG ?= pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
+PG_REGRESS = $(dir $(PGXS))/../../src/test/regress/pg_regress
 PG_MAJOR_VERSION := $(word 2, $(subst ., , $(shell $(PG_CONFIG) --version)))
 ifeq ($(shell test $(PG_MAJOR_VERSION) -lt 17; echo $$?),0)
-  REGRESS_OPTS = --load-extension=ulid
  REGRESS = 00_ulid_generation 01_ulid_conversions 03_ulid_errors
+  EXTNAME = ulid
 else
-  REGRESS_OPTS = --load-extension=pgx_ulid
  REGRESS = 00_ulid_generation 01_ulid_conversions 02_ulid_conversions 03_ulid_errors
+  EXTNAME = pgx_ulid
 endif

-include $(PGXS)
+.PHONY: installcheck
+installcheck: regression-test
+
+regression-test:
+	dropdb --if-exists contrib_regression
+	createdb contrib_regression
+	psql -d contrib_regression -c "CREATE EXTENSION $(EXTNAME)"
+	$(PG_REGRESS) --inputdir=. --outputdir=. --use-existing --dbname=contrib_regression $(REGRESS)
--- a/docker-compose/ext-src/plv8-src/regular-test.sh
+++ b/docker-compose/ext-src/plv8-src/regular-test.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -ex
+cd "$(dirname ${0})"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+REGRESS="$(make -n installcheck | awk '{print substr($0,index($0,"init-extension"));}')"
+REGRESS="${REGRESS/startup_perms/}"
+REGRESS="${REGRESS/startup /}"
+REGRESS="${REGRESS/find_function_perms/}"
+REGRESS="${REGRESS/guc/}"
+${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin'  --use-existing --dbname=contrib_regression ${REGRESS}
--- a/docker-compose/ext-src/postgresql-unit-src/regular-test.sh
+++ b/docker-compose/ext-src/postgresql-unit-src/regular-test.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression extension tables unit binary unicode prefix units time temperature functions language_functions round derived compare aggregate iec custom crosstab convert
--- a/docker-compose/ext-src/prefix-src/regular-test.sh
+++ b/docker-compose/ext-src/prefix-src/regular-test.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --dbname=contrib_regression create_extension prefix falcon explain queries
--- a/docker-compose/ext-src/rag_bge_small_en_v15-src/Makefile
+++ b/docker-compose/ext-src/rag_bge_small_en_v15-src/Makefile
@@ -3,8 +3,13 @@ MODULE_big = rag_bge_small_en_v15
 OBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs))

 REGRESS = basic_functions embedding_functions basic_functions_enhanced embedding_functions_enhanced
-REGRESS_OPTS = --load-extension=vector --load-extension=rag_bge_small_en_v15

 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
+PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
+.PHONY installcheck:
+installcheck:
+	dropdb --if-exists contrib_regression
+	createdb contrib_regression
+	psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag_bge_small_en_v15"
+	$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)
--- a/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/Makefile
+++ b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/Makefile
@@ -3,8 +3,13 @@ MODULE_big = rag_jina_reranker_v1_tiny_en
 OBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs))

 REGRESS = reranking_functions reranking_functions_enhanced
-REGRESS_OPTS = --load-extension=vector --load-extension=rag_jina_reranker_v1_tiny_en

 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
+PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
+.PHONY installcheck:
+installcheck:
+	dropdb --if-exists contrib_regression
+	createdb contrib_regression
+	psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag_jina_reranker_v1_tiny_en"
+	$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)
--- a/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions.out
+++ b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions.out
@@ -1,25 +1,27 @@
 -- Reranking function tests
-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
- rerank_distance 
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
+ round  
+--------
+ 0.8989
+(1 row)
+
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
+    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
+      array      
 -----------------
-       0.8989152
+ {0.8989,1.3018}
 (1 row)

-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
-    rerank_distance    
-----------------------
- {0.8989152,1.3018152}
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
+  round  
+---------
+ -0.8989
 (1 row)

-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
- rerank_score 
--------------
-   -0.8989152
-(1 row)
-
-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
-      rerank_score       
-------------------------
- {-0.8989152,-1.3018152}
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
+    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) as x);
+       array       
+-------------------
+ {-0.8989,-1.3018}
 (1 row)

--- a/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions_enhanced.out
+++ b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions_enhanced.out
@@ -1,41 +1,41 @@
 -- Reranking function tests - single passage
-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
- rerank_distance 
-----------------
-       0.8989152
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
+ round  
+--------
+ 0.8989
 (1 row)

-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings');
- rerank_distance 
-----------------
-       1.3018152
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);
+ round  
+--------
+ 1.3018
 (1 row)

-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines');
- rerank_distance 
-----------------
-       1.3133051
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines')::NUMERIC,4);
+ round  
+--------
+ 1.3133
 (1 row)

-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test');
- rerank_distance 
-----------------
-       0.7075559
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test')::NUMERIC,4);
+ round  
+--------
+ 0.7076
 (1 row)

 -- Reranking function tests - array of passages
-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
-    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
-    rerank_distance    
-----------------------
- {0.8989152,1.3018152}
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
+    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
+      array      
+-----------------
+ {0.8989,1.3018}
 (1 row)

-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
-    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
-          rerank_distance           
------------------------------------
- {0.16591403,0.33475375,0.10132827}
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
+    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);
+         array          
+------------------------
+ {0.1659,0.3348,0.1013}
 (1 row)

 SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]::text[]);
@@ -45,43 +45,43 @@ SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]:
 (1 row)

 -- Reranking score function tests - single passage
-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
- rerank_score 
--------------
-   -0.8989152
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
+  round  
+---------
+ -0.8989
 (1 row)

-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings');
- rerank_score 
--------------
-   -1.3018152
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);
+  round  
+---------
+ -1.3018
 (1 row)

-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines');
- rerank_score 
--------------
-   -1.3133051
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines')::NUMERIC,4);
+  round  
+---------
+ -1.3133
 (1 row)

-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test');
- rerank_score 
--------------
-   -0.7075559
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test')::NUMERIC,4);
+  round  
+---------
+ -0.7076
 (1 row)

 -- Reranking score function tests - array of passages
-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
-    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
-      rerank_score       
-------------------------
- {-0.8989152,-1.3018152}
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
+    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
+       array       
+-------------------
+ {-0.8989,-1.3018}
 (1 row)

-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
-    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
-             rerank_score              
---------------------------------------
- {-0.16591403,-0.33475375,-0.10132827}
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
+    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);
+           array           
+---------------------------
+ {-0.1659,-0.3348,-0.1013}
 (1 row)

 SELECT rag_jina_reranker_v1_tiny_en.rerank_score('empty array test', ARRAY[]::text[]);
--- a/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions.sql
+++ b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions.sql
@@ -1,8 +1,10 @@
 -- Reranking function tests
-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);

-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
+    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);

-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);

-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
+    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) as x);
--- a/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions_enhanced.sql
+++ b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions_enhanced.sql
@@ -1,35 +1,35 @@
 -- Reranking function tests - single passage
-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);

-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings');
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);

-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines');
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines')::NUMERIC,4);

-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test');
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test')::NUMERIC,4);

 -- Reranking function tests - array of passages
-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
-    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
+    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);

-SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
-    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
+    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);

 SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]::text[]);

 -- Reranking score function tests - single passage
-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);

-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings');
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);

-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines');
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines')::NUMERIC,4);

-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test');
+SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test')::NUMERIC,4);

 -- Reranking score function tests - array of passages
-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
-    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
+    ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);

-SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
-    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
+SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
+    ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);

 SELECT rag_jina_reranker_v1_tiny_en.rerank_score('empty array test', ARRAY[]::text[]);
--- a/docker-compose/ext-src/rum-src/regular-test.sh
+++ b/docker-compose/ext-src/rum-src/regular-test.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+dropdb --if-exist contrib_regression
+createdb contrib_regression
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression rum rum_hash ruminv timestamp orderby orderby_hash altorder altorder_hash limits int2 int4 int8 float4 float8 money oid time timetz date interval macaddr inet cidr text varchar char bytea bit varbit numeric rum_weight expr array
--- a/docker-compose/run-tests.sh
+++ b/docker-compose/run-tests.sh
@@ -1,6 +1,42 @@
 #!/bin/bash
 set -x

+if [[ -v BENCHMARK_CONNSTR ]]; then
+  uri_no_proto="${BENCHMARK_CONNSTR#postgres://}"
+  uri_no_proto="${uri_no_proto#postgresql://}"
+  if [[ $uri_no_proto == *\?* ]]; then
+    base="${uri_no_proto%%\?*}"       # before '?'
+  else
+    base="$uri_no_proto"
+  fi
+  if [[ $base =~ ^([^:]+):([^@]+)@([^:/]+):?([0-9]*)/(.+)$ ]]; then
+    export PGUSER="${BASH_REMATCH[1]}"
+    export PGPASSWORD="${BASH_REMATCH[2]}"
+    export PGHOST="${BASH_REMATCH[3]}"
+    export PGPORT="${BASH_REMATCH[4]:-5432}"
+    export PGDATABASE="${BASH_REMATCH[5]}"
+    echo export PGUSER="${BASH_REMATCH[1]}"
+    echo export PGPASSWORD="${BASH_REMATCH[2]}"
+    echo export PGHOST="${BASH_REMATCH[3]}"
+    echo export PGPORT="${BASH_REMATCH[4]:-5432}"
+    echo export PGDATABASE="${BASH_REMATCH[5]}"
+  else
+    echo "Invalid PostgreSQL base URI"
+    exit 1
+  fi
+fi
+REGULAR_USER=false
+while getopts r arg; do
+  case $arg in
+  r)
+    REGULAR_USER=true
+    shift $((OPTIND-1))
+    ;;
+  *) :
+    ;;
+  esac
+done
+
 extdir=${1}

 cd "${extdir}" || exit 2
@@ -12,6 +48,11 @@ for d in ${LIST}; do
      FAILED="${d} ${FAILED}"
      break
    fi
+    if [[ ${REGULAR_USER} = true ]] && [ -f "${d}"/regular-test.sh ]; then
+       "${d}/regular-test.sh" || FAILED="${d} ${FAILED}"
+       continue
+    fi
+
    if [ -f "${d}/neon-test.sh" ]; then
       "${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
    else
@@ -19,5 +60,8 @@ for d in ${LIST}; do
    fi
 done
 [ -z "${FAILED}" ] && exit 0
+for d in ${FAILED}; do
+  cat "$(find $d -name regression.diffs)"
+done
 echo "${FAILED}"
 exit 1
--- a/docs/consumption_metrics.md
+++ b/docs/consumption_metrics.md
@@ -13,7 +13,7 @@ For design details see [the RFC](./rfcs/021-metering.md) and [the discussion on
 batch format is
 ```json

-{ "events" : [metric1, metric2, ...]]}
+{ "events" : [metric1, metric2, ...] }

 ```
 See metric format examples below.
@@ -49,11 +49,13 @@ Size of the remote storage (S3) directory.
 This is an absolute, per-tenant metric.

 - `timeline_logical_size`
-Logical size of the data in the timeline
+
+Logical size of the data in the timeline.
 This is an absolute, per-timeline metric.

 - `synthetic_storage_size`
-Size of all tenant's branches including WAL
+
+Size of all tenant's branches including WAL.
 This is the same metric that `tenant/{tenant_id}/size` endpoint returns.
 This is an absolute, per-tenant metric.

@@ -106,10 +108,10 @@ This is an incremental, per-endpoint metric.
 ```

 The metric is incremental, so the value is the difference between the current and the previous value.
-If there is no previous value, the value, the value is the current value and the `start_time` equals `stop_time`.
+If there is no previous value, the value is the current value and the `start_time` equals `stop_time`.

 ### TODO

 - [ ] Handle errors better: currently if one tenant fails to gather metrics, the whole iteration fails and metrics are not sent for any tenant.
 - [ ] Add retries
- [ ] Tune the interval
+- [ ] Tune the interval
--- a/libs/neonart/Cargo.toml
+++ b/libs/neonart/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "neonart"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+tracing.workspace = true
+
+rand.workspace = true # for tests
+zerocopy = "0.8"
--- a/libs/neonart/src/algorithm.rs
+++ b/libs/neonart/src/algorithm.rs
@@ -0,0 +1,377 @@
+mod lock_and_version;
+mod node_ptr;
+mod node_ref;
+
+use std::vec::Vec;
+
+use crate::algorithm::lock_and_version::ResultOrRestart;
+use crate::algorithm::node_ptr::{MAX_PREFIX_LEN, NodePtr};
+use crate::algorithm::node_ref::ChildOrValue;
+use crate::algorithm::node_ref::{NodeRef, ReadLockedNodeRef, WriteLockedNodeRef};
+
+use crate::epoch::EpochPin;
+use crate::{Allocator, Key, Value};
+
+pub(crate) type RootPtr<V> = node_ptr::NodePtr<V>;
+
+pub fn new_root<V: Value>(allocator: &Allocator) -> RootPtr<V> {
+    node_ptr::new_root(allocator)
+}
+
+pub(crate) fn search<'e, K: Key, V: Value>(
+    key: &K,
+    root: RootPtr<V>,
+    epoch_pin: &'e EpochPin,
+) -> Option<V> {
+    loop {
+        let root_ref = NodeRef::from_root_ptr(root);
+        if let Ok(result) = lookup_recurse(key.as_bytes(), root_ref, None, epoch_pin) {
+            break result;
+        }
+        // retry
+    }
+}
+
+pub(crate) fn update_fn<'e, K: Key, V: Value, F>(
+    key: &K,
+    value_fn: F,
+    root: RootPtr<V>,
+    allocator: &Allocator,
+    epoch_pin: &'e EpochPin,
+) where
+    F: FnOnce(Option<&V>) -> Option<V>,
+{
+    let value_fn_cell = std::cell::Cell::new(Some(value_fn));
+    loop {
+        let root_ref = NodeRef::from_root_ptr(root);
+        let this_value_fn = |arg: Option<&V>| value_fn_cell.take().unwrap()(arg);
+        let key_bytes = key.as_bytes();
+        if let Ok(()) = update_recurse(
+            key_bytes,
+            this_value_fn,
+            root_ref,
+            None,
+            allocator,
+            epoch_pin,
+            0,
+            key_bytes,
+        ) {
+            break;
+        }
+        // retry
+    }
+}
+
+pub(crate) fn dump_tree<'e, V: Value + std::fmt::Debug>(root: RootPtr<V>, epoch_pin: &'e EpochPin) {
+    let root_ref = NodeRef::from_root_ptr(root);
+
+    let _ = dump_recurse(&[], root_ref, &epoch_pin, 0);
+}
+
+// Error means you must retry.
+//
+// This corresponds to the 'lookupOpt' function in the paper
+fn lookup_recurse<'e, V: Value>(
+    key: &[u8],
+    node: NodeRef<'e, V>,
+    parent: Option<ReadLockedNodeRef<V>>,
+    epoch_pin: &'e EpochPin,
+) -> ResultOrRestart<Option<V>> {
+    let rnode = node.read_lock_or_restart()?;
+    if let Some(parent) = parent {
+        parent.read_unlock_or_restart()?;
+    }
+
+    // check if prefix matches, may increment level
+    let prefix_len = if let Some(prefix_len) = rnode.prefix_matches(key) {
+        prefix_len
+    } else {
+        rnode.read_unlock_or_restart()?;
+        return Ok(None);
+    };
+    let key = &key[prefix_len..];
+
+    // find child (or leaf value)
+    let next_node = rnode.find_child_or_value_or_restart(key[0])?;
+
+    match next_node {
+        None => Ok(None), // key not found
+        Some(ChildOrValue::Value(vptr)) => {
+            // safety: It's OK to follow the pointer because we checked the version.
+            let v = unsafe { (*vptr).clone() };
+            Ok(Some(v))
+        }
+        Some(ChildOrValue::Child(v)) => lookup_recurse(&key[1..], v, Some(rnode), epoch_pin),
+    }
+}
+
+// This corresponds to the 'insertOpt' function in the paper
+pub(crate) fn update_recurse<'e, V: Value, F>(
+    key: &[u8],
+    value_fn: F,
+    node: NodeRef<'e, V>,
+    rparent: Option<(ReadLockedNodeRef<V>, u8)>,
+    allocator: &Allocator,
+    epoch_pin: &'e EpochPin,
+    level: usize,
+    orig_key: &[u8],
+) -> ResultOrRestart<()>
+where
+    F: FnOnce(Option<&V>) -> Option<V>,
+{
+    let rnode = node.read_lock_or_restart()?;
+
+    let prefix_match_len = rnode.prefix_matches(key);
+    if prefix_match_len.is_none() {
+        let (rparent, parent_key) = rparent.expect("direct children of the root have no prefix");
+        let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
+        let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
+
+        if let Some(new_value) = value_fn(None) {
+            insert_split_prefix(
+                key,
+                new_value,
+                &mut wnode,
+                &mut wparent,
+                parent_key,
+                allocator,
+            );
+        }
+        wnode.write_unlock();
+        wparent.write_unlock();
+        return Ok(());
+    }
+    let prefix_match_len = prefix_match_len.unwrap();
+    let key = &key[prefix_match_len as usize..];
+    let level = level + prefix_match_len as usize;
+
+    let next_node = rnode.find_child_or_value_or_restart(key[0])?;
+
+    if next_node.is_none() {
+        if rnode.is_full() {
+            let (rparent, parent_key) = rparent.expect("root node cannot become full");
+            let mut wparent = rparent.upgrade_to_write_lock_or_restart()?;
+            let wnode = rnode.upgrade_to_write_lock_or_restart()?;
+
+            if let Some(new_value) = value_fn(None) {
+                insert_and_grow(key, new_value, &wnode, &mut wparent, parent_key, allocator);
+                wnode.write_unlock_obsolete();
+                wparent.write_unlock();
+            } else {
+                wnode.write_unlock();
+                wparent.write_unlock();
+            }
+        } else {
+            let mut wnode = rnode.upgrade_to_write_lock_or_restart()?;
+            if let Some((rparent, _)) = rparent {
+                rparent.read_unlock_or_restart()?;
+            }
+            if let Some(new_value) = value_fn(None) {
+                insert_to_node(&mut wnode, key, new_value, allocator);
+            }
+            wnode.write_unlock();
+        }
+        return Ok(());
+    } else {
+        let next_node = next_node.unwrap(); // checked above it's not None
+        if let Some((rparent, _)) = rparent {
+            rparent.read_unlock_or_restart()?;
+        }
+
+        match next_node {
+            ChildOrValue::Value(existing_value_ptr) => {
+                assert!(key.len() == 1);
+                let wnode = rnode.upgrade_to_write_lock_or_restart()?;
+
+                // safety: Now that we have acquired the write lock, we have exclusive access to the
+                // value
+                let vmut = unsafe { existing_value_ptr.cast_mut().as_mut() }.unwrap();
+                if let Some(new_value) = value_fn(Some(vmut)) {
+                    *vmut = new_value;
+                } else {
+                    // TODO: Treat this as deletion?
+                }
+                wnode.write_unlock();
+
+                Ok(())
+            }
+            ChildOrValue::Child(next_child) => {
+                // recurse to next level
+                update_recurse(
+                    &key[1..],
+                    value_fn,
+                    next_child,
+                    Some((rnode, key[0])),
+                    allocator,
+                    epoch_pin,
+                    level + 1,
+                    orig_key,
+                )
+            }
+        }
+    }
+}
+
+#[derive(Clone)]
+enum PathElement {
+    Prefix(Vec<u8>),
+    KeyByte(u8),
+}
+
+impl std::fmt::Debug for PathElement {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+        match self {
+            PathElement::Prefix(prefix) => write!(fmt, "{:?}", prefix),
+            PathElement::KeyByte(key_byte) => write!(fmt, "{}", key_byte),
+        }
+    }
+}
+
+fn dump_recurse<'e, V: Value + std::fmt::Debug>(
+    path: &[PathElement],
+    node: NodeRef<'e, V>,
+    epoch_pin: &'e EpochPin,
+    level: usize,
+) -> ResultOrRestart<()> {
+    let indent = str::repeat(" ", level);
+
+    let rnode = node.read_lock_or_restart()?;
+    let mut path = Vec::from(path);
+    let prefix = rnode.get_prefix();
+    if prefix.len() != 0 {
+        path.push(PathElement::Prefix(Vec::from(prefix)));
+    }
+
+    for key_byte in 0..u8::MAX {
+        match rnode.find_child_or_value_or_restart(key_byte)? {
+            None => continue,
+            Some(ChildOrValue::Child(child_ref)) => {
+                let rchild = child_ref.read_lock_or_restart()?;
+                eprintln!(
+                    "{} {:?}, {}: prefix {:?}",
+                    indent,
+                    &path,
+                    key_byte,
+                    rchild.get_prefix()
+                );
+
+                let mut child_path = path.clone();
+                child_path.push(PathElement::KeyByte(key_byte));
+
+                dump_recurse(&child_path, child_ref, epoch_pin, level + 1)?;
+            }
+            Some(ChildOrValue::Value(val)) => {
+                eprintln!("{} {:?}, {}: {:?}", indent, path, key_byte, unsafe {
+                    val.as_ref().unwrap()
+                });
+            }
+        }
+    }
+
+    Ok(())
+}
+
+///```text
+///        [fooba]r -> value
+///
+/// [foo]b -> [a]r  -> value
+///      e -> [ls]e -> value
+///```
+fn insert_split_prefix<'a, V: Value>(
+    key: &[u8],
+    value: V,
+    node: &mut WriteLockedNodeRef<V>,
+    parent: &mut WriteLockedNodeRef<V>,
+    parent_key: u8,
+    allocator: &Allocator,
+) {
+    let old_node = node;
+    let old_prefix = old_node.get_prefix();
+    let common_prefix_len = common_prefix(key, old_prefix);
+
+    // Allocate a node for the new value.
+    let new_value_node = allocate_node_for_value(&key[common_prefix_len + 1..], value, allocator);
+
+    // Allocate a new internal node with the common prefix
+    let mut prefix_node = node_ref::new_internal(&key[..common_prefix_len], allocator);
+
+    // Add the old node and the new nodes to the new internal node
+    prefix_node.insert_child(old_prefix[common_prefix_len], old_node.as_ptr());
+    prefix_node.insert_child(key[common_prefix_len], new_value_node);
+
+    // Modify the prefix of the old child in place
+    old_node.truncate_prefix(old_prefix.len() - common_prefix_len - 1);
+
+    // replace the pointer in the parent
+    parent.replace_child(parent_key, prefix_node.into_ptr());
+}
+
+fn insert_to_node<V: Value>(
+    wnode: &mut WriteLockedNodeRef<V>,
+    key: &[u8],
+    value: V,
+    allocator: &Allocator,
+) {
+    if wnode.is_leaf() {
+        wnode.insert_value(key[0], value);
+    } else {
+        let value_child = allocate_node_for_value(&key[1..], value, allocator);
+        wnode.insert_child(key[0], value_child);
+    }
+}
+
+// On entry: 'parent' and 'node' are locked
+fn insert_and_grow<V: Value>(
+    key: &[u8],
+    value: V,
+    wnode: &WriteLockedNodeRef<V>,
+    parent: &mut WriteLockedNodeRef<V>,
+    parent_key_byte: u8,
+    allocator: &Allocator,
+) {
+    let mut bigger_node = wnode.grow(allocator);
+
+    if wnode.is_leaf() {
+        bigger_node.insert_value(key[0], value);
+    } else {
+        let value_child = allocate_node_for_value(&key[1..], value, allocator);
+        bigger_node.insert_child(key[0], value_child);
+    }
+
+    // Replace the pointer in the parent
+    parent.replace_child(parent_key_byte, bigger_node.into_ptr());
+}
+
+// Allocate a new leaf node to hold 'value'. If key is long, we may need to allocate
+// new internal nodes to hold it too
+fn allocate_node_for_value<V: Value>(key: &[u8], value: V, allocator: &Allocator) -> NodePtr<V> {
+    let mut prefix_off = key.len().saturating_sub(MAX_PREFIX_LEN + 1);
+
+    let mut leaf_node = node_ref::new_leaf(&key[prefix_off..key.len() - 1], allocator);
+    leaf_node.insert_value(*key.last().unwrap(), value);
+
+    let mut node = leaf_node;
+    while prefix_off > 0 {
+        // Need another internal node
+        let remain_prefix = &key[0..prefix_off];
+
+        prefix_off = remain_prefix.len().saturating_sub(MAX_PREFIX_LEN + 1);
+        let mut internal_node = node_ref::new_internal(
+            &remain_prefix[prefix_off..remain_prefix.len() - 1],
+            allocator,
+        );
+        internal_node.insert_child(*remain_prefix.last().unwrap(), node.into_ptr());
+        node = internal_node;
+    }
+
+    node.into_ptr()
+}
+
+fn common_prefix(a: &[u8], b: &[u8]) -> usize {
+    for i in 0..MAX_PREFIX_LEN {
+        if a[i] != b[i] {
+            return i;
+        }
+    }
+    panic!("prefixes are equal");
+}
--- a/libs/neonart/src/algorithm/lock_and_version.rs
+++ b/libs/neonart/src/algorithm/lock_and_version.rs
@@ -0,0 +1,85 @@
+use std::sync::atomic::{AtomicU64, Ordering};
+
+pub(crate) struct AtomicLockAndVersion {
+    inner: AtomicU64,
+}
+
+impl AtomicLockAndVersion {
+    pub(crate) fn new() -> AtomicLockAndVersion {
+        AtomicLockAndVersion {
+            inner: AtomicU64::new(0),
+        }
+    }
+}
+
+pub(crate) type ResultOrRestart<T> = Result<T, ()>;
+
+const fn restart<T>() -> ResultOrRestart<T> {
+    Err(())
+}
+
+impl AtomicLockAndVersion {
+    pub(crate) fn read_lock_or_restart(&self) -> ResultOrRestart<u64> {
+        let version = self.await_node_unlocked();
+        if is_obsolete(version) {
+            return restart();
+        }
+        Ok(version)
+    }
+
+    pub(crate) fn check_or_restart(&self, version: u64) -> ResultOrRestart<()> {
+        self.read_unlock_or_restart(version)
+    }
+
+    pub(crate) fn read_unlock_or_restart(&self, version: u64) -> ResultOrRestart<()> {
+        if self.inner.load(Ordering::Acquire) != version {
+            return restart();
+        }
+        Ok(())
+    }
+
+    pub(crate) fn upgrade_to_write_lock_or_restart(&self, version: u64) -> ResultOrRestart<()> {
+        if self
+            .inner
+            .compare_exchange(
+                version,
+                set_locked_bit(version),
+                Ordering::Acquire,
+                Ordering::Relaxed,
+            )
+            .is_err()
+        {
+            return restart();
+        }
+        Ok(())
+    }
+
+    pub(crate) fn write_unlock(&self) {
+        // reset locked bit and overflow into version
+        self.inner.fetch_add(2, Ordering::Release);
+    }
+
+    pub(crate) fn write_unlock_obsolete(&self) {
+        // set obsolete, reset locked, overflow into version
+        self.inner.fetch_add(3, Ordering::Release);
+    }
+
+    // Helper functions
+    fn await_node_unlocked(&self) -> u64 {
+        let mut version = self.inner.load(Ordering::Acquire);
+        while (version & 2) == 2 {
+            // spinlock
+            std::thread::yield_now();
+            version = self.inner.load(Ordering::Acquire)
+        }
+        version
+    }
+}
+
+fn set_locked_bit(version: u64) -> u64 {
+    return version + 2;
+}
+
+fn is_obsolete(version: u64) -> bool {
+    return (version & 1) == 1;
+}
--- a/libs/neonart/src/algorithm/node_ptr.rs
+++ b/libs/neonart/src/algorithm/node_ptr.rs
@@ -0,0 +1,983 @@
+use std::marker::PhantomData;
+use std::ptr::NonNull;
+
+use super::lock_and_version::AtomicLockAndVersion;
+
+use crate::Allocator;
+use crate::Value;
+
+pub(crate) const MAX_PREFIX_LEN: usize = 8;
+
+enum NodeTag {
+    Internal4,
+    Internal16,
+    Internal48,
+    Internal256,
+    Leaf4,
+    Leaf16,
+    Leaf48,
+    Leaf256,
+}
+
+#[repr(C)]
+struct NodeBase {
+    tag: NodeTag,
+    lock_and_version: AtomicLockAndVersion,
+}
+
+pub(crate) struct NodePtr<V> {
+    ptr: *mut NodeBase,
+
+    phantom_value: PhantomData<V>,
+}
+
+impl<V> std::fmt::Debug for NodePtr<V> {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+        write!(fmt, "0x{}", self.ptr.addr())
+    }
+}
+
+impl<V> Copy for NodePtr<V> {}
+impl<V> Clone for NodePtr<V> {
+    fn clone(&self) -> NodePtr<V> {
+        NodePtr {
+            ptr: self.ptr,
+            phantom_value: PhantomData,
+        }
+    }
+}
+
+enum NodeVariant<'a, V> {
+    Internal4(&'a NodeInternal4<V>),
+    Internal16(&'a NodeInternal16<V>),
+    Internal48(&'a NodeInternal48<V>),
+    Internal256(&'a NodeInternal256<V>),
+    Leaf4(&'a NodeLeaf4<V>),
+    Leaf16(&'a NodeLeaf16<V>),
+    Leaf48(&'a NodeLeaf48<V>),
+    Leaf256(&'a NodeLeaf256<V>),
+}
+
+enum NodeVariantMut<'a, V> {
+    Internal4(&'a mut NodeInternal4<V>),
+    Internal16(&'a mut NodeInternal16<V>),
+    Internal48(&'a mut NodeInternal48<V>),
+    Internal256(&'a mut NodeInternal256<V>),
+    Leaf4(&'a mut NodeLeaf4<V>),
+    Leaf16(&'a mut NodeLeaf16<V>),
+    Leaf48(&'a mut NodeLeaf48<V>),
+    Leaf256(&'a mut NodeLeaf256<V>),
+}
+
+pub(crate) enum ChildOrValuePtr<V> {
+    Child(NodePtr<V>),
+    Value(*const V),
+}
+
+#[repr(C)]
+struct NodeInternal4<V> {
+    tag: NodeTag,
+    lock_and_version: AtomicLockAndVersion,
+
+    prefix: [u8; MAX_PREFIX_LEN],
+    prefix_len: u8,
+    num_children: u8,
+
+    child_keys: [u8; 4],
+    child_ptrs: [NodePtr<V>; 4],
+}
+
+#[repr(C)]
+struct NodeInternal16<V> {
+    tag: NodeTag,
+    lock_and_version: AtomicLockAndVersion,
+
+    prefix: [u8; MAX_PREFIX_LEN],
+    prefix_len: u8,
+
+    num_children: u8,
+    child_keys: [u8; 16],
+    child_ptrs: [NodePtr<V>; 16],
+}
+
+const INVALID_CHILD_INDEX: u8 = u8::MAX;
+
+#[repr(C)]
+struct NodeInternal48<V> {
+    tag: NodeTag,
+    lock_and_version: AtomicLockAndVersion,
+
+    prefix: [u8; MAX_PREFIX_LEN],
+    prefix_len: u8,
+
+    num_children: u8,
+    child_indexes: [u8; 256],
+    child_ptrs: [NodePtr<V>; 48],
+}
+
+#[repr(C)]
+pub(crate) struct NodeInternal256<V> {
+    tag: NodeTag,
+    lock_and_version: AtomicLockAndVersion,
+
+    prefix: [u8; MAX_PREFIX_LEN],
+    prefix_len: u8,
+
+    num_children: u16,
+    child_ptrs: [NodePtr<V>; 256],
+}
+
+#[repr(C)]
+struct NodeLeaf4<V> {
+    tag: NodeTag,
+    lock_and_version: AtomicLockAndVersion,
+
+    prefix: [u8; MAX_PREFIX_LEN],
+    prefix_len: u8,
+
+    num_values: u8,
+    child_keys: [u8; 4],
+    child_values: [Option<V>; 4],
+}
+
+#[repr(C)]
+struct NodeLeaf16<V> {
+    tag: NodeTag,
+    lock_and_version: AtomicLockAndVersion,
+
+    prefix: [u8; MAX_PREFIX_LEN],
+    prefix_len: u8,
+
+    num_values: u8,
+    child_keys: [u8; 16],
+    child_values: [Option<V>; 16],
+}
+
+#[repr(C)]
+struct NodeLeaf48<V> {
+    tag: NodeTag,
+    lock_and_version: AtomicLockAndVersion,
+
+    prefix: [u8; MAX_PREFIX_LEN],
+    prefix_len: u8,
+
+    num_values: u8,
+    child_indexes: [u8; 256],
+    child_values: [Option<V>; 48],
+}
+
+#[repr(C)]
+struct NodeLeaf256<V> {
+    tag: NodeTag,
+    lock_and_version: AtomicLockAndVersion,
+
+    prefix: [u8; MAX_PREFIX_LEN],
+    prefix_len: u8,
+
+    num_values: u16,
+    child_values: [Option<V>; 256],
+}
+
+impl<V> NodePtr<V> {
+    pub(crate) fn is_leaf(&self) -> bool {
+        match self.variant() {
+            NodeVariant::Internal4(_) => false,
+            NodeVariant::Internal16(_) => false,
+            NodeVariant::Internal48(_) => false,
+            NodeVariant::Internal256(_) => false,
+            NodeVariant::Leaf4(_) => true,
+            NodeVariant::Leaf16(_) => true,
+            NodeVariant::Leaf48(_) => true,
+            NodeVariant::Leaf256(_) => true,
+        }
+    }
+
+    pub(crate) fn lockword(&self) -> &AtomicLockAndVersion {
+        match self.variant() {
+            NodeVariant::Internal4(n) => &n.lock_and_version,
+            NodeVariant::Internal16(n) => &n.lock_and_version,
+            NodeVariant::Internal48(n) => &n.lock_and_version,
+            NodeVariant::Internal256(n) => &n.lock_and_version,
+            NodeVariant::Leaf4(n) => &n.lock_and_version,
+            NodeVariant::Leaf16(n) => &n.lock_and_version,
+            NodeVariant::Leaf48(n) => &n.lock_and_version,
+            NodeVariant::Leaf256(n) => &n.lock_and_version,
+        }
+    }
+
+    pub(crate) fn is_null(&self) -> bool {
+        self.ptr.is_null()
+    }
+
+    pub(crate) const fn null() -> NodePtr<V> {
+        NodePtr {
+            ptr: std::ptr::null_mut(),
+            phantom_value: PhantomData,
+        }
+    }
+
+    fn variant(&self) -> NodeVariant<V> {
+        unsafe {
+            match (*self.ptr).tag {
+                NodeTag::Internal4 => NodeVariant::Internal4(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal4<V>>()).as_ref(),
+                ),
+                NodeTag::Internal16 => NodeVariant::Internal16(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal16<V>>()).as_ref(),
+                ),
+                NodeTag::Internal48 => NodeVariant::Internal48(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal48<V>>()).as_ref(),
+                ),
+                NodeTag::Internal256 => NodeVariant::Internal256(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal256<V>>()).as_ref(),
+                ),
+                NodeTag::Leaf4 => NodeVariant::Leaf4(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf4<V>>()).as_ref(),
+                ),
+                NodeTag::Leaf16 => NodeVariant::Leaf16(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf16<V>>()).as_ref(),
+                ),
+                NodeTag::Leaf48 => NodeVariant::Leaf48(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf48<V>>()).as_ref(),
+                ),
+                NodeTag::Leaf256 => NodeVariant::Leaf256(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf256<V>>()).as_ref(),
+                ),
+            }
+        }
+    }
+
+    fn variant_mut(&mut self) -> NodeVariantMut<V> {
+        unsafe {
+            match (*self.ptr).tag {
+                NodeTag::Internal4 => NodeVariantMut::Internal4(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal4<V>>()).as_mut(),
+                ),
+                NodeTag::Internal16 => NodeVariantMut::Internal16(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal16<V>>()).as_mut(),
+                ),
+                NodeTag::Internal48 => NodeVariantMut::Internal48(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal48<V>>()).as_mut(),
+                ),
+                NodeTag::Internal256 => NodeVariantMut::Internal256(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeInternal256<V>>()).as_mut(),
+                ),
+                NodeTag::Leaf4 => NodeVariantMut::Leaf4(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf4<V>>()).as_mut(),
+                ),
+                NodeTag::Leaf16 => NodeVariantMut::Leaf16(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf16<V>>()).as_mut(),
+                ),
+                NodeTag::Leaf48 => NodeVariantMut::Leaf48(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf48<V>>()).as_mut(),
+                ),
+                NodeTag::Leaf256 => NodeVariantMut::Leaf256(
+                    NonNull::new_unchecked(self.ptr.cast::<NodeLeaf256<V>>()).as_mut(),
+                ),
+            }
+        }
+    }
+}
+
+impl<V: Value> NodePtr<V> {
+    pub(crate) fn prefix_matches(&self, key: &[u8]) -> Option<usize> {
+        let node_prefix = self.get_prefix();
+        assert!(node_prefix.len() <= key.len()); // because we only use fixed-size keys
+        if &key[0..node_prefix.len()] != node_prefix {
+            None
+        } else {
+            Some(node_prefix.len())
+        }
+    }
+
+    pub(crate) fn get_prefix(&self) -> &[u8] {
+        match self.variant() {
+            NodeVariant::Internal4(n) => n.get_prefix(),
+            NodeVariant::Internal16(n) => n.get_prefix(),
+            NodeVariant::Internal48(n) => n.get_prefix(),
+            NodeVariant::Internal256(n) => n.get_prefix(),
+            NodeVariant::Leaf4(n) => n.get_prefix(),
+            NodeVariant::Leaf16(n) => n.get_prefix(),
+            NodeVariant::Leaf48(n) => n.get_prefix(),
+            NodeVariant::Leaf256(n) => n.get_prefix(),
+        }
+    }
+
+    pub(crate) fn is_full(&self) -> bool {
+        match self.variant() {
+            NodeVariant::Internal4(n) => n.is_full(),
+            NodeVariant::Internal16(n) => n.is_full(),
+            NodeVariant::Internal48(n) => n.is_full(),
+            NodeVariant::Internal256(n) => n.is_full(),
+            NodeVariant::Leaf4(n) => n.is_full(),
+            NodeVariant::Leaf16(n) => n.is_full(),
+            NodeVariant::Leaf48(n) => n.is_full(),
+            NodeVariant::Leaf256(n) => n.is_full(),
+        }
+    }
+
+    pub(crate) fn find_child_or_value(&self, key_byte: u8) -> Option<ChildOrValuePtr<V>> {
+        match self.variant() {
+            NodeVariant::Internal4(n) => n.find_child(key_byte).map(|c| ChildOrValuePtr::Child(c)),
+            NodeVariant::Internal16(n) => n.find_child(key_byte).map(|c| ChildOrValuePtr::Child(c)),
+            NodeVariant::Internal48(n) => n.find_child(key_byte).map(|c| ChildOrValuePtr::Child(c)),
+            NodeVariant::Internal256(n) => {
+                n.find_child(key_byte).map(|c| ChildOrValuePtr::Child(c))
+            }
+            NodeVariant::Leaf4(n) => n
+                .get_leaf_value(key_byte)
+                .map(|v| ChildOrValuePtr::Value(v)),
+            NodeVariant::Leaf16(n) => n
+                .get_leaf_value(key_byte)
+                .map(|v| ChildOrValuePtr::Value(v)),
+            NodeVariant::Leaf48(n) => n
+                .get_leaf_value(key_byte)
+                .map(|v| ChildOrValuePtr::Value(v)),
+            NodeVariant::Leaf256(n) => n
+                .get_leaf_value(key_byte)
+                .map(|v| ChildOrValuePtr::Value(v)),
+        }
+    }
+
+    pub(crate) fn truncate_prefix(&mut self, new_prefix_len: usize) {
+        match self.variant_mut() {
+            NodeVariantMut::Internal4(n) => n.truncate_prefix(new_prefix_len),
+            NodeVariantMut::Internal16(n) => n.truncate_prefix(new_prefix_len),
+            NodeVariantMut::Internal48(n) => n.truncate_prefix(new_prefix_len),
+            NodeVariantMut::Internal256(n) => n.truncate_prefix(new_prefix_len),
+            NodeVariantMut::Leaf4(n) => n.truncate_prefix(new_prefix_len),
+            NodeVariantMut::Leaf16(n) => n.truncate_prefix(new_prefix_len),
+            NodeVariantMut::Leaf48(n) => n.truncate_prefix(new_prefix_len),
+            NodeVariantMut::Leaf256(n) => n.truncate_prefix(new_prefix_len),
+        }
+    }
+
+    pub(crate) fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
+        match self.variant() {
+            NodeVariant::Internal4(n) => n.grow(allocator),
+            NodeVariant::Internal16(n) => n.grow(allocator),
+            NodeVariant::Internal48(n) => n.grow(allocator),
+            NodeVariant::Internal256(_) => panic!("cannot grow Internal256 node"),
+            NodeVariant::Leaf4(n) => n.grow(allocator),
+            NodeVariant::Leaf16(n) => n.grow(allocator),
+            NodeVariant::Leaf48(n) => n.grow(allocator),
+            NodeVariant::Leaf256(_) => panic!("cannot grow Leaf256 node"),
+        }
+    }
+
+    pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
+        match self.variant_mut() {
+            NodeVariantMut::Internal4(n) => n.insert_child(key_byte, child),
+            NodeVariantMut::Internal16(n) => n.insert_child(key_byte, child),
+            NodeVariantMut::Internal48(n) => n.insert_child(key_byte, child),
+            NodeVariantMut::Internal256(n) => n.insert_child(key_byte, child),
+            NodeVariantMut::Leaf4(_)
+            | NodeVariantMut::Leaf16(_)
+            | NodeVariantMut::Leaf48(_)
+            | NodeVariantMut::Leaf256(_) => panic!("insert_child called on leaf node"),
+        }
+    }
+
+    pub(crate) fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
+        match self.variant_mut() {
+            NodeVariantMut::Internal4(n) => n.replace_child(key_byte, replacement),
+            NodeVariantMut::Internal16(n) => n.replace_child(key_byte, replacement),
+            NodeVariantMut::Internal48(n) => n.replace_child(key_byte, replacement),
+            NodeVariantMut::Internal256(n) => n.replace_child(key_byte, replacement),
+            NodeVariantMut::Leaf4(_)
+            | NodeVariantMut::Leaf16(_)
+            | NodeVariantMut::Leaf48(_)
+            | NodeVariantMut::Leaf256(_) => panic!("replace_child called on leaf node"),
+        }
+    }
+
+    pub(crate) fn insert_value(&mut self, key_byte: u8, value: V) {
+        match self.variant_mut() {
+            NodeVariantMut::Internal4(_)
+            | NodeVariantMut::Internal16(_)
+            | NodeVariantMut::Internal48(_)
+            | NodeVariantMut::Internal256(_) => panic!("insert_value called on internal node"),
+            NodeVariantMut::Leaf4(n) => n.insert_value(key_byte, value),
+            NodeVariantMut::Leaf16(n) => n.insert_value(key_byte, value),
+            NodeVariantMut::Leaf48(n) => n.insert_value(key_byte, value),
+            NodeVariantMut::Leaf256(n) => n.insert_value(key_byte, value),
+        }
+    }
+}
+
+pub fn new_root<V: Value>(allocator: &Allocator) -> NodePtr<V> {
+    NodePtr {
+        ptr: allocator.alloc(NodeInternal256::<V>::new()).as_ptr().cast(),
+        phantom_value: PhantomData,
+    }
+}
+
+pub fn new_internal<V: Value>(prefix: &[u8], allocator: &Allocator) -> NodePtr<V> {
+    let mut node = allocator.alloc(NodeInternal4 {
+        tag: NodeTag::Internal4,
+        lock_and_version: AtomicLockAndVersion::new(),
+
+        prefix: [8; MAX_PREFIX_LEN],
+        prefix_len: prefix.len() as u8,
+        num_children: 0,
+
+        child_keys: [0; 4],
+        child_ptrs: [const { NodePtr::null() }; 4],
+    });
+    node.prefix[0..prefix.len()].copy_from_slice(prefix);
+
+    node.as_ptr().into()
+}
+
+pub fn new_leaf<V: Value>(prefix: &[u8], allocator: &Allocator) -> NodePtr<V> {
+    let mut node = allocator.alloc(NodeLeaf4 {
+        tag: NodeTag::Leaf4,
+        lock_and_version: AtomicLockAndVersion::new(),
+
+        prefix: [8; MAX_PREFIX_LEN],
+        prefix_len: prefix.len() as u8,
+        num_values: 0,
+
+        child_keys: [0; 4],
+        child_values: [const { None }; 4],
+    });
+    node.prefix[0..prefix.len()].copy_from_slice(prefix);
+
+    node.as_ptr().into()
+}
+
+impl<V: Value> NodeInternal4<V> {
+    fn get_prefix(&self) -> &[u8] {
+        &self.prefix[0..self.prefix_len as usize]
+    }
+
+    fn truncate_prefix(&mut self, new_prefix_len: usize) {
+        assert!(new_prefix_len < self.prefix_len as usize);
+        let prefix = &mut self.prefix;
+        let offset = self.prefix_len as usize - new_prefix_len;
+        for i in 0..new_prefix_len {
+            prefix[i] = prefix[i + offset];
+        }
+        self.prefix_len = new_prefix_len as u8;
+    }
+
+    fn find_child(&self, key: u8) -> Option<NodePtr<V>> {
+        for i in 0..self.num_children as usize {
+            if self.child_keys[i] == key {
+                return Some(self.child_ptrs[i]);
+            }
+        }
+        None
+    }
+
+    fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
+        for i in 0..self.num_children as usize {
+            if self.child_keys[i] == key_byte {
+                self.child_ptrs[i] = replacement;
+                return;
+            }
+        }
+        panic!("could not re-find parent with key {}", key_byte);
+    }
+
+    fn is_full(&self) -> bool {
+        self.num_children == 4
+    }
+
+    fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
+        assert!(self.num_children < 4);
+
+        let idx = self.num_children as usize;
+        self.child_keys[idx] = key_byte;
+        self.child_ptrs[idx] = child;
+        self.num_children += 1;
+    }
+
+    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
+        let mut node16 = allocator.alloc(NodeInternal16 {
+            tag: NodeTag::Internal16,
+            lock_and_version: AtomicLockAndVersion::new(),
+
+            prefix: self.prefix.clone(),
+            prefix_len: self.prefix_len,
+            num_children: self.num_children,
+
+            child_keys: [0; 16],
+            child_ptrs: [const { NodePtr::null() }; 16],
+        });
+        for i in 0..self.num_children as usize {
+            node16.child_keys[i] = self.child_keys[i];
+            node16.child_ptrs[i] = self.child_ptrs[i];
+        }
+
+        node16.as_ptr().into()
+    }
+}
+
+impl<V: Value> NodeInternal16<V> {
+    fn get_prefix(&self) -> &[u8] {
+        &self.prefix[0..self.prefix_len as usize]
+    }
+
+    fn truncate_prefix(&mut self, new_prefix_len: usize) {
+        assert!(new_prefix_len < self.prefix_len as usize);
+        let prefix = &mut self.prefix;
+        let offset = self.prefix_len as usize - new_prefix_len;
+        for i in 0..new_prefix_len {
+            prefix[i] = prefix[i + offset];
+        }
+        self.prefix_len = new_prefix_len as u8;
+    }
+
+    fn find_child(&self, key_byte: u8) -> Option<NodePtr<V>> {
+        for i in 0..self.num_children as usize {
+            if self.child_keys[i] == key_byte {
+                return Some(self.child_ptrs[i]);
+            }
+        }
+        None
+    }
+
+    fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
+        for i in 0..self.num_children as usize {
+            if self.child_keys[i] == key_byte {
+                self.child_ptrs[i] = replacement;
+                return;
+            }
+        }
+        panic!("could not re-find parent with key {}", key_byte);
+    }
+
+    fn is_full(&self) -> bool {
+        self.num_children == 16
+    }
+
+    fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
+        assert!(self.num_children < 16);
+
+        let idx = self.num_children as usize;
+        self.child_keys[idx] = key_byte;
+        self.child_ptrs[idx] = child;
+        self.num_children += 1;
+    }
+
+    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
+        let mut node48 = allocator.alloc(NodeInternal48 {
+            tag: NodeTag::Internal48,
+            lock_and_version: AtomicLockAndVersion::new(),
+
+            prefix: self.prefix.clone(),
+            prefix_len: self.prefix_len,
+            num_children: self.num_children,
+
+            child_indexes: [INVALID_CHILD_INDEX; 256],
+            child_ptrs: [const { NodePtr::null() }; 48],
+        });
+        for i in 0..self.num_children as usize {
+            let idx = self.child_keys[i] as usize;
+            node48.child_indexes[idx] = i as u8;
+            node48.child_ptrs[i] = self.child_ptrs[i];
+        }
+
+        node48.as_ptr().into()
+    }
+}
+
+impl<V: Value> NodeInternal48<V> {
+    fn get_prefix(&self) -> &[u8] {
+        &self.prefix[0..self.prefix_len as usize]
+    }
+
+    fn truncate_prefix(&mut self, new_prefix_len: usize) {
+        assert!(new_prefix_len < self.prefix_len as usize);
+        let prefix = &mut self.prefix;
+        let offset = self.prefix_len as usize - new_prefix_len;
+        for i in 0..new_prefix_len {
+            prefix[i] = prefix[i + offset];
+        }
+        self.prefix_len = new_prefix_len as u8;
+    }
+
+    fn find_child(&self, key_byte: u8) -> Option<NodePtr<V>> {
+        let idx = self.child_indexes[key_byte as usize];
+        if idx != INVALID_CHILD_INDEX {
+            Some(self.child_ptrs[idx as usize])
+        } else {
+            None
+        }
+    }
+
+    fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
+        let idx = self.child_indexes[key_byte as usize];
+        if idx != INVALID_CHILD_INDEX {
+            self.child_ptrs[idx as usize] = replacement
+        } else {
+            panic!("could not re-find parent with key {}", key_byte);
+        }
+    }
+
+    fn is_full(&self) -> bool {
+        self.num_children == 48
+    }
+
+    fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
+        assert!(self.num_children < 48);
+        assert!(self.child_indexes[key_byte as usize] == INVALID_CHILD_INDEX);
+        let idx = self.num_children;
+        self.child_indexes[key_byte as usize] = idx;
+        self.child_ptrs[idx as usize] = child;
+        self.num_children += 1;
+    }
+
+    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
+        let mut node256 = allocator.alloc(NodeInternal256 {
+            tag: NodeTag::Internal256,
+            lock_and_version: AtomicLockAndVersion::new(),
+
+            prefix: self.prefix.clone(),
+            prefix_len: self.prefix_len,
+            num_children: self.num_children as u16,
+
+            child_ptrs: [const { NodePtr::null() }; 256],
+        });
+        for i in 0..256 {
+            let idx = self.child_indexes[i];
+            if idx != INVALID_CHILD_INDEX {
+                node256.child_ptrs[i] = self.child_ptrs[idx as usize];
+            }
+        }
+        node256.as_ptr().into()
+    }
+}
+
+impl<V: Value> NodeInternal256<V> {
+    fn get_prefix(&self) -> &[u8] {
+        &self.prefix[0..self.prefix_len as usize]
+    }
+
+    fn truncate_prefix(&mut self, new_prefix_len: usize) {
+        assert!(new_prefix_len < self.prefix_len as usize);
+        let prefix = &mut self.prefix;
+        let offset = self.prefix_len as usize - new_prefix_len;
+        for i in 0..new_prefix_len {
+            prefix[i] = prefix[i + offset];
+        }
+        self.prefix_len = new_prefix_len as u8;
+    }
+
+    fn find_child(&self, key_byte: u8) -> Option<NodePtr<V>> {
+        let idx = key_byte as usize;
+        if !self.child_ptrs[idx].is_null() {
+            Some(self.child_ptrs[idx])
+        } else {
+            None
+        }
+    }
+
+    fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
+        let idx = key_byte as usize;
+        if !self.child_ptrs[idx].is_null() {
+            self.child_ptrs[idx] = replacement
+        } else {
+            panic!("could not re-find parent with key {}", key_byte);
+        }
+    }
+
+    fn is_full(&self) -> bool {
+        self.num_children == 256
+    }
+
+    fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
+        assert!(self.num_children < 256);
+        assert!(self.child_ptrs[key_byte as usize].is_null());
+        self.child_ptrs[key_byte as usize] = child;
+        self.num_children += 1;
+    }
+}
+
+impl<V: Value> NodeLeaf4<V> {
+    fn get_prefix(&self) -> &[u8] {
+        &self.prefix[0..self.prefix_len as usize]
+    }
+
+    fn truncate_prefix(&mut self, new_prefix_len: usize) {
+        assert!(new_prefix_len < self.prefix_len as usize);
+        let prefix = &mut self.prefix;
+        let offset = self.prefix_len as usize - new_prefix_len;
+        for i in 0..new_prefix_len {
+            prefix[i] = prefix[i + offset];
+        }
+        self.prefix_len = new_prefix_len as u8;
+    }
+
+    fn get_leaf_value<'a: 'b, 'b>(&'a self, key: u8) -> Option<&'b V> {
+        for i in 0..self.num_values {
+            if self.child_keys[i as usize] == key {
+                assert!(self.child_values[i as usize].is_some());
+                return self.child_values[i as usize].as_ref();
+            }
+        }
+        None
+    }
+    fn is_full(&self) -> bool {
+        self.num_values == 4
+    }
+
+    fn insert_value(&mut self, key_byte: u8, value: V) {
+        assert!(self.num_values < 16);
+
+        let idx = self.num_values as usize;
+        self.child_keys[idx] = key_byte;
+        self.child_values[idx] = Some(value);
+        self.num_values += 1;
+    }
+
+    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
+        let mut node16 = allocator.alloc(NodeLeaf16 {
+            tag: NodeTag::Leaf16,
+            lock_and_version: AtomicLockAndVersion::new(),
+
+            prefix: self.prefix.clone(),
+            prefix_len: self.prefix_len,
+            num_values: self.num_values,
+
+            child_keys: [0; 16],
+            child_values: [const { None }; 16],
+        });
+        for i in 0..self.num_values as usize {
+            node16.child_keys[i] = self.child_keys[i];
+            node16.child_values[i] = self.child_values[i].clone();
+        }
+        node16.as_ptr().into()
+    }
+}
+
+impl<V: Value> NodeLeaf16<V> {
+    fn get_prefix(&self) -> &[u8] {
+        &self.prefix[0..self.prefix_len as usize]
+    }
+
+    fn truncate_prefix(&mut self, new_prefix_len: usize) {
+        assert!(new_prefix_len < self.prefix_len as usize);
+        let prefix = &mut self.prefix;
+        let offset = self.prefix_len as usize - new_prefix_len;
+        for i in 0..new_prefix_len {
+            prefix[i] = prefix[i + offset];
+        }
+        self.prefix_len = new_prefix_len as u8;
+    }
+
+    fn get_leaf_value(&self, key: u8) -> Option<&V> {
+        for i in 0..self.num_values {
+            if self.child_keys[i as usize] == key {
+                assert!(self.child_values[i as usize].is_some());
+                return self.child_values[i as usize].as_ref();
+            }
+        }
+        None
+    }
+    fn is_full(&self) -> bool {
+        self.num_values == 16
+    }
+
+    fn insert_value(&mut self, key_byte: u8, value: V) {
+        assert!(self.num_values < 16);
+
+        let idx = self.num_values as usize;
+        self.child_keys[idx] = key_byte;
+        self.child_values[idx] = Some(value);
+        self.num_values += 1;
+    }
+    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
+        let mut node48 = allocator.alloc(NodeLeaf48 {
+            tag: NodeTag::Leaf48,
+            lock_and_version: AtomicLockAndVersion::new(),
+
+            prefix: self.prefix.clone(),
+            prefix_len: self.prefix_len,
+            num_values: self.num_values,
+
+            child_indexes: [INVALID_CHILD_INDEX; 256],
+            child_values: [const { None }; 48],
+        });
+        for i in 0..self.num_values {
+            let idx = self.child_keys[i as usize];
+            node48.child_indexes[idx as usize] = i;
+            node48.child_values[i as usize] = self.child_values[i as usize].clone();
+        }
+        node48.as_ptr().into()
+    }
+}
+
+impl<V: Value> NodeLeaf48<V> {
+    fn get_prefix(&self) -> &[u8] {
+        &self.prefix[0..self.prefix_len as usize]
+    }
+
+    fn truncate_prefix(&mut self, new_prefix_len: usize) {
+        assert!(new_prefix_len < self.prefix_len as usize);
+        let prefix = &mut self.prefix;
+        let offset = self.prefix_len as usize - new_prefix_len;
+        for i in 0..new_prefix_len {
+            prefix[i] = prefix[i + offset];
+        }
+        self.prefix_len = new_prefix_len as u8;
+    }
+
+    fn get_leaf_value(&self, key: u8) -> Option<&V> {
+        let idx = self.child_indexes[key as usize];
+        if idx != INVALID_CHILD_INDEX {
+            assert!(self.child_values[idx as usize].is_some());
+            self.child_values[idx as usize].as_ref()
+        } else {
+            None
+        }
+    }
+    fn is_full(&self) -> bool {
+        self.num_values == 48
+    }
+
+    fn insert_value(&mut self, key_byte: u8, value: V) {
+        assert!(self.num_values < 48);
+        assert!(self.child_indexes[key_byte as usize] == INVALID_CHILD_INDEX);
+        let idx = self.num_values;
+        self.child_indexes[key_byte as usize] = idx;
+        self.child_values[idx as usize] = Some(value);
+        self.num_values += 1;
+    }
+    fn grow(&self, allocator: &Allocator) -> NodePtr<V> {
+        let mut node256 = allocator.alloc(NodeLeaf256 {
+            tag: NodeTag::Leaf256,
+            lock_and_version: AtomicLockAndVersion::new(),
+
+            prefix: self.prefix.clone(),
+            prefix_len: self.prefix_len,
+            num_values: self.num_values as u16,
+
+            child_values: [const { None }; 256],
+        });
+        for i in 0..256 {
+            let idx = self.child_indexes[i];
+            if idx != INVALID_CHILD_INDEX {
+                node256.child_values[i] = self.child_values[idx as usize].clone();
+            }
+        }
+        node256.as_ptr().into()
+    }
+}
+
+impl<V: Value> NodeLeaf256<V> {
+    fn get_prefix(&self) -> &[u8] {
+        &self.prefix[0..self.prefix_len as usize]
+    }
+
+    fn truncate_prefix(&mut self, new_prefix_len: usize) {
+        assert!(new_prefix_len < self.prefix_len as usize);
+        let prefix = &mut self.prefix;
+        let offset = self.prefix_len as usize - new_prefix_len;
+        for i in 0..new_prefix_len {
+            prefix[i] = prefix[i + offset];
+        }
+        self.prefix_len = new_prefix_len as u8;
+    }
+
+    fn get_leaf_value(&self, key: u8) -> Option<&V> {
+        let idx = key as usize;
+        self.child_values[idx].as_ref()
+    }
+    fn is_full(&self) -> bool {
+        self.num_values == 256
+    }
+
+    fn insert_value(&mut self, key_byte: u8, value: V) {
+        assert!(self.num_values < 256);
+        assert!(self.child_values[key_byte as usize].is_none());
+        self.child_values[key_byte as usize] = Some(value);
+        self.num_values += 1;
+    }
+}
+
+impl<V: Value> NodeInternal256<V> {
+    pub(crate) fn new() -> NodeInternal256<V> {
+        NodeInternal256 {
+            tag: NodeTag::Internal256,
+            lock_and_version: AtomicLockAndVersion::new(),
+
+            prefix: [0; MAX_PREFIX_LEN],
+            prefix_len: 0,
+            num_children: 0,
+
+            child_ptrs: [const { NodePtr::null() }; 256],
+        }
+    }
+}
+
+impl<V: Value> From<*mut NodeInternal4<V>> for NodePtr<V> {
+    fn from(val: *mut NodeInternal4<V>) -> NodePtr<V> {
+        NodePtr {
+            ptr: val.cast(),
+            phantom_value: PhantomData,
+        }
+    }
+}
+impl<V: Value> From<*mut NodeInternal16<V>> for NodePtr<V> {
+    fn from(val: *mut NodeInternal16<V>) -> NodePtr<V> {
+        NodePtr {
+            ptr: val.cast(),
+            phantom_value: PhantomData,
+        }
+    }
+}
+
+impl<V: Value> From<*mut NodeInternal48<V>> for NodePtr<V> {
+    fn from(val: *mut NodeInternal48<V>) -> NodePtr<V> {
+        NodePtr {
+            ptr: val.cast(),
+            phantom_value: PhantomData,
+        }
+    }
+}
+
+impl<V: Value> From<*mut NodeInternal256<V>> for NodePtr<V> {
+    fn from(val: *mut NodeInternal256<V>) -> NodePtr<V> {
+        NodePtr {
+            ptr: val.cast(),
+            phantom_value: PhantomData,
+        }
+    }
+}
+
+impl<V: Value> From<*mut NodeLeaf4<V>> for NodePtr<V> {
+    fn from(val: *mut NodeLeaf4<V>) -> NodePtr<V> {
+        NodePtr {
+            ptr: val.cast(),
+            phantom_value: PhantomData,
+        }
+    }
+}
+impl<V: Value> From<*mut NodeLeaf16<V>> for NodePtr<V> {
+    fn from(val: *mut NodeLeaf16<V>) -> NodePtr<V> {
+        NodePtr {
+            ptr: val.cast(),
+            phantom_value: PhantomData,
+        }
+    }
+}
+
+impl<V: Value> From<*mut NodeLeaf48<V>> for NodePtr<V> {
+    fn from(val: *mut NodeLeaf48<V>) -> NodePtr<V> {
+        NodePtr {
+            ptr: val.cast(),
+            phantom_value: PhantomData,
+        }
+    }
+}
+
+impl<V: Value> From<*mut NodeLeaf256<V>> for NodePtr<V> {
+    fn from(val: *mut NodeLeaf256<V>) -> NodePtr<V> {
+        NodePtr {
+            ptr: val.cast(),
+            phantom_value: PhantomData,
+        }
+    }
+}
--- a/libs/neonart/src/algorithm/node_ref.rs
+++ b/libs/neonart/src/algorithm/node_ref.rs
@@ -0,0 +1,202 @@
+use std::fmt::Debug;
+use std::marker::PhantomData;
+
+use super::lock_and_version::ResultOrRestart;
+use super::node_ptr;
+use super::node_ptr::ChildOrValuePtr;
+use super::node_ptr::NodePtr;
+use crate::EpochPin;
+use crate::algorithm::lock_and_version::AtomicLockAndVersion;
+use crate::{Allocator, Value};
+
+pub struct NodeRef<'e, V> {
+    ptr: NodePtr<V>,
+
+    phantom: PhantomData<&'e EpochPin>,
+}
+
+impl<'e, V> Debug for NodeRef<'e, V> {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+        write!(fmt, "{:?}", self.ptr)
+    }
+}
+
+impl<'e, V: Value> NodeRef<'e, V> {
+    pub(crate) fn from_root_ptr(root_ptr: NodePtr<V>) -> NodeRef<'e, V> {
+        NodeRef {
+            ptr: root_ptr,
+            phantom: PhantomData,
+        }
+    }
+
+    pub(crate) fn read_lock_or_restart(&self) -> ResultOrRestart<ReadLockedNodeRef<'e, V>> {
+        let version = self.lockword().read_lock_or_restart()?;
+        Ok(ReadLockedNodeRef {
+            ptr: self.ptr,
+            version,
+            phantom: self.phantom,
+        })
+    }
+
+    fn lockword(&self) -> &AtomicLockAndVersion {
+        self.ptr.lockword()
+    }
+}
+
+/// A reference to a node that has been optimistically read-locked. The functions re-check
+/// the version after each read.
+pub struct ReadLockedNodeRef<'e, V> {
+    ptr: NodePtr<V>,
+    version: u64,
+
+    phantom: PhantomData<&'e EpochPin>,
+}
+
+pub(crate) enum ChildOrValue<'e, V> {
+    Child(NodeRef<'e, V>),
+    Value(*const V),
+}
+
+impl<'e, V: Value> ReadLockedNodeRef<'e, V> {
+    pub(crate) fn is_full(&self) -> bool {
+        self.ptr.is_full()
+    }
+
+    pub(crate) fn get_prefix(&self) -> &[u8] {
+        self.ptr.get_prefix()
+    }
+
+    /// Note: because we're only holding a read lock, the prefix can change concurrently.
+    /// You must be prepared to restart, if read_unlock() returns error later.
+    ///
+    /// Returns the length of the prefix, or None if it's not a match
+    pub(crate) fn prefix_matches(&self, key: &[u8]) -> Option<usize> {
+        self.ptr.prefix_matches(key)
+    }
+
+    pub(crate) fn find_child_or_value_or_restart(
+        &self,
+        key_byte: u8,
+    ) -> ResultOrRestart<Option<ChildOrValue<'e, V>>> {
+        let child_or_value = self.ptr.find_child_or_value(key_byte);
+        self.ptr.lockword().check_or_restart(self.version)?;
+
+        match child_or_value {
+            None => Ok(None),
+            Some(ChildOrValuePtr::Value(vptr)) => Ok(Some(ChildOrValue::Value(vptr))),
+            Some(ChildOrValuePtr::Child(child_ptr)) => Ok(Some(ChildOrValue::Child(NodeRef {
+                ptr: child_ptr,
+                phantom: self.phantom,
+            }))),
+        }
+    }
+
+    pub(crate) fn upgrade_to_write_lock_or_restart(
+        self,
+    ) -> ResultOrRestart<WriteLockedNodeRef<'e, V>> {
+        self.ptr
+            .lockword()
+            .upgrade_to_write_lock_or_restart(self.version)?;
+
+        Ok(WriteLockedNodeRef {
+            ptr: self.ptr,
+            phantom: self.phantom,
+        })
+    }
+
+    pub(crate) fn read_unlock_or_restart(self) -> ResultOrRestart<()> {
+        self.ptr.lockword().check_or_restart(self.version)?;
+        Ok(())
+    }
+}
+
+/// A reference to a node that has been optimistically read-locked. The functions re-check
+/// the version after each read.
+pub struct WriteLockedNodeRef<'e, V> {
+    ptr: NodePtr<V>,
+    phantom: PhantomData<&'e EpochPin>,
+}
+
+impl<'e, V: Value> WriteLockedNodeRef<'e, V> {
+    pub(crate) fn is_leaf(&self) -> bool {
+        self.ptr.is_leaf()
+    }
+
+    pub(crate) fn write_unlock(mut self) {
+        self.ptr.lockword().write_unlock();
+        self.ptr = NodePtr::null();
+    }
+
+    pub(crate) fn write_unlock_obsolete(mut self) {
+        self.ptr.lockword().write_unlock_obsolete();
+        self.ptr = NodePtr::null();
+    }
+
+    pub(crate) fn get_prefix(&self) -> &[u8] {
+        self.ptr.get_prefix()
+    }
+
+    pub(crate) fn truncate_prefix(&mut self, new_prefix_len: usize) {
+        self.ptr.truncate_prefix(new_prefix_len)
+    }
+
+    pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
+        self.ptr.insert_child(key_byte, child)
+    }
+
+    pub(crate) fn insert_value(&mut self, key_byte: u8, value: V) {
+        self.ptr.insert_value(key_byte, value)
+    }
+
+    pub(crate) fn grow(&self, allocator: &Allocator) -> NewNodeRef<V> {
+        let new_node = self.ptr.grow(allocator);
+        NewNodeRef { ptr: new_node }
+    }
+
+    pub(crate) fn as_ptr(&self) -> NodePtr<V> {
+        self.ptr
+    }
+
+    pub(crate) fn replace_child(&mut self, key_byte: u8, replacement: NodePtr<V>) {
+        self.ptr.replace_child(key_byte, replacement);
+    }
+}
+
+impl<'e, V> Drop for WriteLockedNodeRef<'e, V> {
+    fn drop(&mut self) {
+        if !self.ptr.is_null() {
+            self.ptr.lockword().write_unlock();
+        }
+    }
+}
+
+pub(crate) struct NewNodeRef<V> {
+    ptr: NodePtr<V>,
+}
+
+impl<V: Value> NewNodeRef<V> {
+    pub(crate) fn insert_child(&mut self, key_byte: u8, child: NodePtr<V>) {
+        self.ptr.insert_child(key_byte, child)
+    }
+
+    pub(crate) fn insert_value(&mut self, key_byte: u8, value: V) {
+        self.ptr.insert_value(key_byte, value)
+    }
+
+    pub(crate) fn into_ptr(self) -> NodePtr<V> {
+        let ptr = self.ptr;
+        ptr
+    }
+}
+
+pub(crate) fn new_internal<V: Value>(prefix: &[u8], allocator: &Allocator) -> NewNodeRef<V> {
+    NewNodeRef {
+        ptr: node_ptr::new_internal(prefix, allocator),
+    }
+}
+
+pub(crate) fn new_leaf<V: Value>(prefix: &[u8], allocator: &Allocator) -> NewNodeRef<V> {
+    NewNodeRef {
+        ptr: node_ptr::new_leaf(prefix, allocator),
+    }
+}
--- a/libs/neonart/src/allocator.rs
+++ b/libs/neonart/src/allocator.rs
@@ -0,0 +1,107 @@
+use std::marker::PhantomData;
+use std::mem::MaybeUninit;
+use std::ops::{Deref, DerefMut};
+use std::ptr::NonNull;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+pub struct Allocator {
+    area: *mut MaybeUninit<u8>,
+    allocated: AtomicUsize,
+    size: usize,
+}
+
+// FIXME: I don't know if these are really safe...
+unsafe impl Send for Allocator {}
+unsafe impl Sync for Allocator {}
+
+#[repr(transparent)]
+pub struct AllocatedBox<'a, T> {
+    inner: NonNull<T>,
+
+    _phantom: PhantomData<&'a Allocator>,
+}
+
+// FIXME: I don't know if these are really safe...
+unsafe impl<'a, T> Send for AllocatedBox<'a, T> {}
+unsafe impl<'a, T> Sync for AllocatedBox<'a, T> {}
+
+impl<T> Deref for AllocatedBox<'_, T> {
+    type Target = T;
+
+    fn deref(&self) -> &T {
+        unsafe { self.inner.as_ref() }
+    }
+}
+
+impl<T> DerefMut for AllocatedBox<'_, T> {
+    fn deref_mut(&mut self) -> &mut T {
+        unsafe { self.inner.as_mut() }
+    }
+}
+
+impl<T> AsMut<T> for AllocatedBox<'_, T> {
+    fn as_mut(&mut self) -> &mut T {
+        unsafe { self.inner.as_mut() }
+    }
+}
+
+impl<T> AllocatedBox<'_, T> {
+    pub fn as_ptr(&self) -> *mut T {
+        self.inner.as_ptr()
+    }
+}
+
+const MAXALIGN: usize = std::mem::align_of::<usize>();
+
+impl Allocator {
+    pub fn new_uninit(area: &'static mut [MaybeUninit<u8>]) -> Allocator {
+        let ptr = area.as_mut_ptr();
+        let size = area.len();
+        Self::new_from_ptr(ptr, size)
+    }
+
+    pub fn new(area: &'static mut [u8]) -> Allocator {
+        let ptr: *mut MaybeUninit<u8> = area.as_mut_ptr().cast();
+        let size = area.len();
+        Self::new_from_ptr(ptr, size)
+    }
+
+    pub fn new_from_ptr(ptr: *mut MaybeUninit<u8>, size: usize) -> Allocator {
+        let padding = ptr.align_offset(MAXALIGN);
+
+        Allocator {
+            area: ptr,
+            allocated: AtomicUsize::new(padding),
+            size,
+        }
+    }
+
+    pub fn alloc<'a, T: Sized>(&'a self, value: T) -> AllocatedBox<'a, T> {
+        let sz = std::mem::size_of::<T>();
+
+        // pad all allocations to MAXALIGN boundaries
+        assert!(std::mem::align_of::<T>() <= MAXALIGN);
+        let sz = sz.next_multiple_of(MAXALIGN);
+
+        let offset = self.allocated.fetch_add(sz, Ordering::Relaxed);
+
+        if offset + sz > self.size {
+            panic!("out of memory");
+        }
+
+        let inner = unsafe {
+            let inner = self.area.offset(offset as isize).cast::<T>();
+            *inner = value;
+            NonNull::new_unchecked(inner)
+        };
+
+        AllocatedBox {
+            inner,
+            _phantom: PhantomData,
+        }
+    }
+
+    pub fn _dealloc_node<T>(&self, _node: AllocatedBox<T>) {
+        // doesn't free it immediately.
+    }
+}
--- a/libs/neonart/src/epoch.rs
+++ b/libs/neonart/src/epoch.rs
@@ -0,0 +1,23 @@
+//! This is similar to crossbeam_epoch crate, but works in shared memory
+//!
+//! FIXME: not implemented yet. (We haven't implemented removing any nodes from the ART
+//! tree, which is why we get away without this now)
+
+pub(crate) struct EpochPin {}
+
+pub(crate) fn pin_epoch() -> EpochPin {
+    EpochPin {}
+}
+
+/*
+struct CollectorGlobal {
+    epoch: AtomicU64,
+
+    participants: CachePadded<AtomicU64>, // make it an array
+}
+
+
+struct CollectorQueue {
+
+}
+*/
--- a/libs/neonart/src/lib.rs
+++ b/libs/neonart/src/lib.rs
@@ -0,0 +1,301 @@
+//! Adaptive Radix Tree (ART) implementation, with Optimistic Lock Coupling.
+//!
+//! The data structure is described in these two papers:
+//!
+//! [1] Leis, V. & Kemper, Alfons & Neumann, Thomas. (2013).
+//!     The adaptive radix tree: ARTful indexing for main-memory databases.
+//!     Proceedings - International Conference on Data Engineering. 38-49. 10.1109/ICDE.2013.6544812.
+//!     https://db.in.tum.de/~leis/papers/ART.pdf
+//!
+//! [2] Leis, Viktor & Scheibner, Florian & Kemper, Alfons & Neumann, Thomas. (2016).
+//!     The ART of practical synchronization.
+//!     1-8. 10.1145/2933349.2933352.
+//!     https://db.in.tum.de/~leis/papers/artsync.pdf
+//!
+//! [1] describes the base data structure, and [2] describes the Optimistic Lock Coupling that we
+//! use.
+//!
+//! The papers mention a few different variants. We have made the following choices in this
+//! implementation:
+//!
+//! - All keys have the same length
+//!
+//! - Multi-value leaves. The values are stored directly in one of the four different leaf node
+//!   types.
+//!
+//! - For collapsing inner nodes, we use the Pessimistic approach, where each inner node stores a
+//!   variable length "prefix", which stores the keys of all the one-way nodes which have been
+//!   removed. However, similar to the "hybrid" approach described in the paper, each node only has
+//!   space for a constant-size prefix of 8 bytes. If a node would have a longer prefix, then we
+//!   create create one-way nodes to store them. (There was no particular reason for this choice,
+//!   the "hybrid" approach described in the paper might be better.)
+//!
+//! - For concurrency, we use Optimistic Lock Coupling. The paper [2] also describes another method,
+//!   ROWEX, which generally performs better when there is contention, but that is not important
+//!   for use and Optimisic Lock Coupling is simpler to implement.
+//!
+//! ## Requirements
+//!
+//! This data structure is currently used for the integrated LFC, relsize and last-written LSN cache
+//! in the compute communicator, part of the 'neon' Postgres extension. We have some unique
+//! requirements, which is why we had to write our own. Namely:
+//!
+//! - The data structure has to live in fixed-sized shared memory segment. That rules out any
+//!   built-in Rust collections and most crates. (Except possibly with the 'allocator_api' rust
+//!   feature, which still nightly-only experimental as of this writing).
+//!
+//! - The data structure is accessed from multiple processes. Only one process updates the data
+//!   structure, but other processes perform reads. That rules out using built-in Rust locking
+//!   primitives like Mutex and RwLock, and most crates too.
+//!
+//! - Within the one process with write-access, multiple threads can perform updates concurrently.
+//!   That rules out using PostgreSQL LWLocks for the locking.
+//!
+//! The implementation is generic, and doesn't depend on any PostgreSQL specifics, but it has been
+//! written with that usage and the above constraints in mind. Some noteworthy assumptions:
+//!
+//! - Contention is assumed to be rare. In the integrated cache in PostgreSQL, there's higher level
+//!   locking in the PostgreSQL buffer manager, which ensures that two backends should not try to
+//!   read / write the same page at the same time. (Prefetching can conflict with actual reads,
+//!   however.)
+//!
+//!  - The keys in the integrated cache are 17 bytes long.
+//!
+//! ## Usage
+//!
+//! Because this is designed to be used as a Postgres shared memory data structure, initialization
+//! happens in three stages:
+//!
+//! 0. A fixed area of shared memory is allocated at postmaster startup.
+//!
+//! 1. TreeInitStruct::new() is called to initialize it, still in Postmaster process, before any
+//!    other process or thread is running. It returns a TreeInitStruct, which is inherited by all
+//!    the processes through fork().
+//!
+//! 2. One process may have write-access to the struct, by calling
+//!    [TreeInitStruct::attach_writer]. (That process is the communicator process.)
+//!
+//! 3. Other processes get read-access to the struct, by calling [TreeInitStruct::attach_reader]
+//!
+//! "Write access" means that you can insert / update / delete values in the tree.
+//!
+//! NOTE: The Values stored in the tree are sometimes moved, when a leaf node fills up and a new
+//! larger node needs to be allocated. The versioning and epoch-based allocator ensure that the data
+//! structure stays consistent, but if the Value has interior mutability, like atomic fields,
+//! updates to such fields might be lost if the leaf node is concurrently moved! If that becomes a
+//! problem, the version check could be passed up to the caller, so that the caller could detect the
+//! lost updates and retry the operation.
+//!
+//! ## Implementation
+//!
+//! node_ptr: Provides low-level implementations of the four different node types (eight actually,
+//! since there is an Internal and Leaf variant of each)
+//!
+//! lock_and_version.rs: Provides an abstraction for the combined lock and version counter on each
+//! node.
+//!
+//! node_ref.rs: The code in node_ptr.rs deals with raw pointers. node_ref.rs provides more type-safe
+//!   abstractions on top.
+//!
+//! algorithm.rs: Contains the functions to implement lookups and updates in the tree
+//!
+//! allocator.rs: Provides a facility to allocate memory for the tree nodes. (We must provide our
+//!   own abstraction for that because we need the data structure to live in a pre-allocated shared
+//!   memory segment).
+//!
+//! epoch.rs: The data structure requires that when a node is removed from the tree, it is not
+//!   immediately deallocated, but stays around for as long as concurrent readers might still have
+//!   pointers to them. This is enforced by an epoch system. This is similar to
+//!   e.g. crossbeam_epoch, but we couldn't use that either because it has to work across processes
+//!   communicating over the shared memory segment.
+//!
+//! ## See also
+//!
+//! There are some existing Rust ART implementations out there, but none of them filled all
+//! the requirements:
+//!
+//! - https://github.com/XiangpengHao/congee
+//! - https://github.com/declanvk/blart
+//!
+//! ## TODO
+//!
+//! - Removing values has not been implemented
+
+mod algorithm;
+mod allocator;
+mod epoch;
+
+use algorithm::RootPtr;
+
+use allocator::AllocatedBox;
+
+use std::fmt::Debug;
+use std::marker::PhantomData;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+use crate::epoch::EpochPin;
+
+#[cfg(test)]
+mod tests;
+
+pub use allocator::Allocator;
+
+/// Fixed-length key type.
+///
+pub trait Key: Clone + Debug {
+    const KEY_LEN: usize;
+
+    fn as_bytes(&self) -> &[u8];
+}
+
+/// Values stored in the tree
+///
+/// Values need to be Cloneable, because when a node "grows", the value is copied to a new node and
+/// the old sticks around until all readers that might see the old value are gone.
+pub trait Value: Clone {}
+
+struct Tree<K: Key, V: Value> {
+    root: RootPtr<V>,
+
+    writer_attached: AtomicBool,
+
+    phantom_key: PhantomData<K>,
+}
+
+/// Struct created at postmaster startup
+pub struct TreeInitStruct<'t, K: Key, V: Value> {
+    tree: AllocatedBox<'t, Tree<K, V>>,
+
+    allocator: &'t Allocator,
+}
+
+/// The worker process has a reference to this. The write operations are only safe
+/// from the worker process
+pub struct TreeWriteAccess<'t, K: Key, V: Value>
+where
+    K: Key,
+    V: Value,
+{
+    tree: AllocatedBox<'t, Tree<K, V>>,
+
+    allocator: &'t Allocator,
+}
+
+/// The backends have a reference to this. It cannot be used to modify the tree
+pub struct TreeReadAccess<'t, K: Key, V: Value>
+where
+    K: Key,
+    V: Value,
+{
+    tree: AllocatedBox<'t, Tree<K, V>>,
+}
+
+impl<'a, 't: 'a, K: Key, V: Value> TreeInitStruct<'t, K, V> {
+    pub fn new(allocator: &'t Allocator) -> TreeInitStruct<'t, K, V> {
+        let tree = allocator.alloc(Tree {
+            root: algorithm::new_root(allocator),
+            writer_attached: AtomicBool::new(false),
+            phantom_key: PhantomData,
+        });
+
+        TreeInitStruct { tree, allocator }
+    }
+
+    pub fn attach_writer(self) -> TreeWriteAccess<'t, K, V> {
+        let previously_attached = self.tree.writer_attached.swap(true, Ordering::Relaxed);
+        if previously_attached {
+            panic!("writer already attached");
+        }
+        TreeWriteAccess {
+            tree: self.tree,
+            allocator: self.allocator,
+        }
+    }
+
+    pub fn attach_reader(self) -> TreeReadAccess<'t, K, V> {
+        TreeReadAccess { tree: self.tree }
+    }
+}
+
+impl<'t, K: Key + Clone, V: Value> TreeWriteAccess<'t, K, V> {
+    pub fn start_write(&'t self) -> TreeWriteGuard<'t, K, V> {
+        // TODO: grab epoch guard
+        TreeWriteGuard {
+            allocator: self.allocator,
+            tree: &self.tree,
+            epoch_pin: epoch::pin_epoch(),
+        }
+    }
+
+    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
+        TreeReadGuard {
+            tree: &self.tree,
+            epoch_pin: epoch::pin_epoch(),
+        }
+    }
+}
+
+impl<'t, K: Key + Clone, V: Value> TreeReadAccess<'t, K, V> {
+    pub fn start_read(&'t self) -> TreeReadGuard<'t, K, V> {
+        TreeReadGuard {
+            tree: &self.tree,
+            epoch_pin: epoch::pin_epoch(),
+        }
+    }
+}
+
+pub struct TreeReadGuard<'t, K, V>
+where
+    K: Key,
+    V: Value,
+{
+    tree: &'t AllocatedBox<'t, Tree<K, V>>,
+
+    epoch_pin: EpochPin,
+}
+
+impl<'t, K: Key, V: Value> TreeReadGuard<'t, K, V> {
+    pub fn get(&self, key: &K) -> Option<V> {
+        algorithm::search(key, self.tree.root, &self.epoch_pin)
+    }
+}
+
+pub struct TreeWriteGuard<'t, K, V>
+where
+    K: Key,
+    V: Value,
+{
+    tree: &'t AllocatedBox<'t, Tree<K, V>>,
+    allocator: &'t Allocator,
+
+    epoch_pin: EpochPin,
+}
+
+impl<'t, K: Key, V: Value> TreeWriteGuard<'t, K, V> {
+    pub fn insert(&mut self, key: &K, value: V) {
+        self.update_with_fn(key, |_| Some(value))
+    }
+
+    pub fn update_with_fn<F>(&mut self, key: &K, value_fn: F)
+    where
+        F: FnOnce(Option<&V>) -> Option<V>,
+    {
+        algorithm::update_fn(
+            key,
+            value_fn,
+            self.tree.root,
+            self.allocator,
+            &self.epoch_pin,
+        )
+    }
+
+    pub fn get(&mut self, key: &K) -> Option<V> {
+        algorithm::search(key, self.tree.root, &self.epoch_pin)
+    }
+}
+
+impl<'t, K: Key, V: Value + Debug> TreeWriteGuard<'t, K, V> {
+    pub fn dump(&mut self) {
+        algorithm::dump_tree(self.tree.root, &self.epoch_pin)
+    }
+}
--- a/libs/neonart/src/tests.rs
+++ b/libs/neonart/src/tests.rs
@@ -0,0 +1,90 @@
+use std::collections::HashSet;
+
+use crate::Allocator;
+use crate::TreeInitStruct;
+
+use crate::{Key, Value};
+
+use rand::seq::SliceRandom;
+use rand::thread_rng;
+
+const TEST_KEY_LEN: usize = 16;
+
+#[derive(Clone, Copy, Debug)]
+struct TestKey([u8; TEST_KEY_LEN]);
+
+impl Key for TestKey {
+    const KEY_LEN: usize = TEST_KEY_LEN;
+
+    fn as_bytes(&self) -> &[u8] {
+        &self.0
+    }
+}
+
+impl From<u128> for TestKey {
+    fn from(val: u128) -> TestKey {
+        TestKey(val.to_be_bytes())
+    }
+}
+
+impl Value for usize {}
+
+fn test_inserts<K: Into<TestKey> + Copy>(keys: &[K]) {
+    const MEM_SIZE: usize = 10000000;
+    let area = Box::leak(Box::new_uninit_slice(MEM_SIZE));
+
+    let allocator = Box::leak(Box::new(Allocator::new_uninit(area)));
+
+    let init_struct = TreeInitStruct::<TestKey, usize>::new(allocator);
+    let tree_writer = init_struct.attach_writer();
+
+    for (idx, k) in keys.iter().enumerate() {
+        let mut w = tree_writer.start_write();
+        w.insert(&(*k).into(), idx);
+        eprintln!("INSERTED {:?}", Into::<TestKey>::into(*k));
+    }
+
+    //tree_writer.start_read().dump();
+
+    for (idx, k) in keys.iter().enumerate() {
+        let r = tree_writer.start_read();
+        let value = r.get(&(*k).into());
+        assert_eq!(value, Some(idx));
+    }
+}
+
+#[test]
+fn dense() {
+    // This exercises splitting a node with prefix
+    let keys: &[u128] = &[0, 1, 2, 3, 256];
+    test_inserts(keys);
+
+    // Dense keys
+    let mut keys: Vec<u128> = (0..10000).collect();
+    test_inserts(&keys);
+
+    // Do the same in random orders
+    for _ in 1..10 {
+        keys.shuffle(&mut thread_rng());
+        test_inserts(&keys);
+    }
+}
+
+#[test]
+fn sparse() {
+    // sparse keys
+    let mut keys: Vec<TestKey> = Vec::new();
+    let mut used_keys = HashSet::new();
+    for _ in 0..10000 {
+        loop {
+            let key = rand::random::<u128>();
+            if used_keys.get(&key).is_some() {
+                continue;
+            }
+            used_keys.insert(key);
+            keys.push(key.into());
+            break;
+        }
+    }
+    test_inserts(&keys);
+}
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -169,6 +169,8 @@ pub struct TenantDescribeResponseShard {
    pub is_pending_compute_notification: bool,
    /// A shard split is currently underway
    pub is_splitting: bool,
+    /// A timeline is being imported into this tenant
+    pub is_importing: bool,

    pub scheduling_policy: ShardSchedulingPolicy,

--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -1803,6 +1803,8 @@ pub struct TopTenantShardsResponse {
 }

 pub mod virtual_file {
+    use std::sync::LazyLock;
+
    #[derive(
        Copy,
        Clone,
@@ -1840,35 +1842,33 @@ pub mod virtual_file {
    pub enum IoMode {
        /// Uses buffered IO.
        Buffered,
-        /// Uses direct IO, error out if the operation fails.
+        /// Uses direct IO for reads only.
        #[cfg(target_os = "linux")]
        Direct,
+        /// Use direct IO for reads and writes.
+        #[cfg(target_os = "linux")]
+        DirectRw,
    }

    impl IoMode {
        pub fn preferred() -> Self {
            // The default behavior when running Rust unit tests without any further
-            // flags is to use the newest behavior if available on the platform (Direct).
+            // flags is to use the newest behavior (DirectRw).
            // The CI uses the following environment variable to unit tests for all
            // different modes.
            // NB: the Python regression & perf tests have their own defaults management
            // that writes pageserver.toml; they do not use this variable.
            if cfg!(test) {
-                use once_cell::sync::Lazy;
-                static CACHED: Lazy<IoMode> = Lazy::new(|| {
+                static CACHED: LazyLock<IoMode> = LazyLock::new(|| {
                    utils::env::var_serde_json_string(
                        "NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE",
                    )
-                    .unwrap_or({
+                    .unwrap_or(
                        #[cfg(target_os = "linux")]
-                        {
-                            IoMode::Direct
-                        }
+                        IoMode::DirectRw,
                        #[cfg(not(target_os = "linux"))]
-                        {
-                            IoMode::Buffered
-                        }
-                    })
+                        IoMode::Buffered,
+                    )
                });
                *CACHED
            } else {
@@ -1885,6 +1885,8 @@ pub mod virtual_file {
                v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
                #[cfg(target_os = "linux")]
                v if v == (IoMode::Direct as u8) => IoMode::Direct,
+                #[cfg(target_os = "linux")]
+                v if v == (IoMode::DirectRw as u8) => IoMode::DirectRw,
                x => return Err(x),
            })
        }
--- a/libs/tenant_size_model/src/calculation.rs
+++ b/libs/tenant_size_model/src/calculation.rs
@@ -77,7 +77,9 @@ impl StorageModel {
        }

        SizeResult {
-            total_size,
+            // If total_size is 0, it means that the tenant has all timelines offloaded; we need to report 1
+            // here so that the data point shows up in the s3 files.
+            total_size: total_size.max(1),
            segments: segment_results,
        }
    }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -42,12 +42,14 @@ nix.workspace = true
 num_cpus.workspace = true
 num-traits.workspace = true
 once_cell.workspace = true
+peekable.workspace = true
 pin-project-lite.workspace = true
 postgres_backend.workspace = true
 postgres-protocol.workspace = true
 postgres-types.workspace = true
 postgres_initdb.workspace = true
 pprof.workspace = true
+prost.workspace = true
 rand.workspace = true
 range-set-blaze = { version = "0.1.16", features = ["alloc"] }
 regex.workspace = true
@@ -60,6 +62,7 @@ serde_path_to_error.workspace = true
 serde_with.workspace = true
 sysinfo.workspace = true
 tokio-tar.workspace = true
+tonic.workspace = true
 thiserror.workspace = true
 tikv-jemallocator.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
@@ -76,6 +79,7 @@ url.workspace = true
 walkdir.workspace = true
 metrics.workspace = true
 pageserver_api.workspace = true
+pageserver_page_api.workspace = true
 pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
 pageserver_compaction.workspace = true
 pem.workspace = true
@@ -106,6 +110,7 @@ hex-literal.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time", "test-util"] }
 indoc.workspace = true
 uuid.workspace = true
+rstest.workspace = true

 [[bench]]
 name = "bench_layer_map"
--- a/pageserver/benches/bench_ingest.rs
+++ b/pageserver/benches/bench_ingest.rs
@@ -248,6 +248,8 @@ fn criterion_benchmark(c: &mut Criterion) {
            IoMode::Buffered,
            #[cfg(target_os = "linux")]
            IoMode::Direct,
+            #[cfg(target_os = "linux")]
+            IoMode::DirectRw,
        ] {
            for param in expect.clone() {
                let HandPickedParameters {
@@ -309,78 +311,114 @@ cargo bench --bench bench_ingest
 im4gn.2xlarge:

 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
-                        time:   [1.8491 s 1.8540 s 1.8592 s]
-                        thrpt:  [68.847 MiB/s 69.039 MiB/s 69.222 MiB/s]
+                        time:   [1.2901 s 1.2943 s 1.2991 s]
+                        thrpt:  [98.533 MiB/s 98.892 MiB/s 99.220 MiB/s]
 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
-                        time:   [2.6976 s 2.7123 s 2.7286 s]
-                        thrpt:  [46.911 MiB/s 47.193 MiB/s 47.450 MiB/s]
+                        time:   [2.1387 s 2.1623 s 2.1845 s]
+                        thrpt:  [58.595 MiB/s 59.197 MiB/s 59.851 MiB/s]
 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
-                        time:   [1.7433 s 1.7510 s 1.7600 s]
-                        thrpt:  [72.729 MiB/s 73.099 MiB/s 73.423 MiB/s]
+                        time:   [1.2036 s 1.2074 s 1.2122 s]
+                        thrpt:  [105.60 MiB/s 106.01 MiB/s 106.35 MiB/s]
 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
-                        time:   [499.63 ms 500.07 ms 500.46 ms]
-                        thrpt:  [255.77 MiB/s 255.96 MiB/s 256.19 MiB/s]
+                        time:   [520.55 ms 521.46 ms 522.57 ms]
+                        thrpt:  [244.94 MiB/s 245.47 MiB/s 245.89 MiB/s]
 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
-                        time:   [456.97 ms 459.61 ms 461.92 ms]
-                        thrpt:  [277.11 MiB/s 278.50 MiB/s 280.11 MiB/s]
+                        time:   [440.33 ms 442.24 ms 444.10 ms]
+                        thrpt:  [288.22 MiB/s 289.43 MiB/s 290.69 MiB/s]
 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
-                        time:   [158.82 ms 159.16 ms 159.56 ms]
-                        thrpt:  [802.22 MiB/s 804.24 MiB/s 805.93 MiB/s]
+                        time:   [168.78 ms 169.42 ms 170.18 ms]
+                        thrpt:  [752.16 MiB/s 755.52 MiB/s 758.40 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
-                        time:   [1.8856 s 1.8997 s 1.9179 s]
-                        thrpt:  [66.740 MiB/s 67.380 MiB/s 67.882 MiB/s]
+                        time:   [1.2978 s 1.3094 s 1.3227 s]
+                        thrpt:  [96.775 MiB/s 97.758 MiB/s 98.632 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
-                        time:   [2.7468 s 2.7625 s 2.7785 s]
-                        thrpt:  [46.068 MiB/s 46.335 MiB/s 46.600 MiB/s]
+                        time:   [2.1976 s 2.2067 s 2.2154 s]
+                        thrpt:  [57.777 MiB/s 58.006 MiB/s 58.245 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Yes
-                        time:   [1.7689 s 1.7726 s 1.7767 s]
-                        thrpt:  [72.045 MiB/s 72.208 MiB/s 72.363 MiB/s]
+                        time:   [1.2103 s 1.2160 s 1.2233 s]
+                        thrpt:  [104.64 MiB/s 105.26 MiB/s 105.76 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
-                        time:   [497.64 ms 498.60 ms 499.67 ms]
-                        thrpt:  [256.17 MiB/s 256.72 MiB/s 257.21 MiB/s]
+                        time:   [525.05 ms 526.37 ms 527.79 ms]
+                        thrpt:  [242.52 MiB/s 243.17 MiB/s 243.79 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
-                        time:   [493.72 ms 505.07 ms 518.03 ms]
-                        thrpt:  [247.09 MiB/s 253.43 MiB/s 259.26 MiB/s]
+                        time:   [443.06 ms 444.88 ms 447.15 ms]
+                        thrpt:  [286.26 MiB/s 287.72 MiB/s 288.90 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
-                        time:   [267.76 ms 267.85 ms 267.96 ms]
-                        thrpt:  [477.69 MiB/s 477.88 MiB/s 478.03 MiB/s]
+                        time:   [169.40 ms 169.80 ms 170.17 ms]
+                        thrpt:  [752.21 MiB/s 753.81 MiB/s 755.60 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
+                        time:   [1.2844 s 1.2915 s 1.2990 s]
+                        thrpt:  [98.536 MiB/s 99.112 MiB/s 99.657 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
+                        time:   [2.1431 s 2.1663 s 2.1900 s]
+                        thrpt:  [58.446 MiB/s 59.087 MiB/s 59.726 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
+                        time:   [1.1906 s 1.1926 s 1.1947 s]
+                        thrpt:  [107.14 MiB/s 107.33 MiB/s 107.51 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
+                        time:   [516.86 ms 518.25 ms 519.47 ms]
+                        thrpt:  [246.40 MiB/s 246.98 MiB/s 247.65 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
+                        time:   [536.50 ms 536.53 ms 536.60 ms]
+                        thrpt:  [238.54 MiB/s 238.57 MiB/s 238.59 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
+                        time:   [267.77 ms 267.90 ms 268.04 ms]
+                        thrpt:  [477.53 MiB/s 477.79 MiB/s 478.02 MiB/s]

 Hetzner AX102:

 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
-                        time:   [1.0683 s 1.1006 s 1.1386 s]
-                        thrpt:  [112.42 MiB/s 116.30 MiB/s 119.82 MiB/s]
+                        time:   [836.58 ms 861.93 ms 886.57 ms]
+                        thrpt:  [144.38 MiB/s 148.50 MiB/s 153.00 MiB/s]
 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
-                        time:   [1.5719 s 1.6012 s 1.6228 s]
-                        thrpt:  [78.877 MiB/s 79.938 MiB/s 81.430 MiB/s]
+                        time:   [1.2782 s 1.3191 s 1.3665 s]
+                        thrpt:  [93.668 MiB/s 97.037 MiB/s 100.14 MiB/s]
 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
-                        time:   [1.1095 s 1.1331 s 1.1580 s]
-                        thrpt:  [110.53 MiB/s 112.97 MiB/s 115.37 MiB/s]
+                        time:   [791.27 ms 807.08 ms 822.95 ms]
+                        thrpt:  [155.54 MiB/s 158.60 MiB/s 161.77 MiB/s]
 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
-                        time:   [303.20 ms 307.83 ms 311.90 ms]
-                        thrpt:  [410.39 MiB/s 415.81 MiB/s 422.16 MiB/s]
+                        time:   [310.78 ms 314.66 ms 318.47 ms]
+                        thrpt:  [401.92 MiB/s 406.79 MiB/s 411.87 MiB/s]
 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
-                        time:   [406.34 ms 429.37 ms 451.63 ms]
-                        thrpt:  [283.42 MiB/s 298.11 MiB/s 315.00 MiB/s]
+                        time:   [377.11 ms 387.77 ms 399.21 ms]
+                        thrpt:  [320.63 MiB/s 330.10 MiB/s 339.42 MiB/s]
 ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
-                        time:   [134.01 ms 135.78 ms 137.48 ms]
-                        thrpt:  [931.03 MiB/s 942.68 MiB/s 955.12 MiB/s]
+                        time:   [128.37 ms 132.96 ms 138.55 ms]
+                        thrpt:  [923.83 MiB/s 962.69 MiB/s 997.11 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
-                        time:   [1.0406 s 1.0580 s 1.0772 s]
-                        thrpt:  [118.83 MiB/s 120.98 MiB/s 123.00 MiB/s]
+                        time:   [900.38 ms 914.88 ms 928.86 ms]
+                        thrpt:  [137.80 MiB/s 139.91 MiB/s 142.16 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
-                        time:   [1.5059 s 1.5339 s 1.5625 s]
-                        thrpt:  [81.920 MiB/s 83.448 MiB/s 84.999 MiB/s]
+                        time:   [1.2538 s 1.2936 s 1.3313 s]
+                        thrpt:  [96.149 MiB/s 98.946 MiB/s 102.09 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Yes
-                        time:   [1.0714 s 1.0934 s 1.1161 s]
-                        thrpt:  [114.69 MiB/s 117.06 MiB/s 119.47 MiB/s]
+                        time:   [787.17 ms 803.89 ms 820.63 ms]
+                        thrpt:  [155.98 MiB/s 159.23 MiB/s 162.61 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
-                        time:   [262.68 ms 265.14 ms 267.71 ms]
-                        thrpt:  [478.13 MiB/s 482.76 MiB/s 487.29 MiB/s]
+                        time:   [318.78 ms 321.89 ms 324.74 ms]
+                        thrpt:  [394.16 MiB/s 397.65 MiB/s 401.53 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
-                        time:   [375.19 ms 393.80 ms 411.40 ms]
-                        thrpt:  [311.14 MiB/s 325.04 MiB/s 341.16 MiB/s]
+                        time:   [374.01 ms 383.45 ms 393.20 ms]
+                        thrpt:  [325.53 MiB/s 333.81 MiB/s 342.24 MiB/s]
 ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
-                        time:   [123.02 ms 123.85 ms 124.66 ms]
-                        thrpt:  [1.0027 GiB/s 1.0093 GiB/s 1.0161 GiB/s]
+                        time:   [137.98 ms 141.31 ms 143.57 ms]
+                        thrpt:  [891.58 MiB/s 905.79 MiB/s 927.66 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
+                        time:   [613.69 ms 622.48 ms 630.97 ms]
+                        thrpt:  [202.86 MiB/s 205.63 MiB/s 208.57 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
+                        time:   [1.0299 s 1.0766 s 1.1273 s]
+                        thrpt:  [113.55 MiB/s 118.90 MiB/s 124.29 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
+                        time:   [637.80 ms 647.78 ms 658.01 ms]
+                        thrpt:  [194.53 MiB/s 197.60 MiB/s 200.69 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
+                        time:   [266.09 ms 267.20 ms 268.31 ms]
+                        thrpt:  [477.06 MiB/s 479.04 MiB/s 481.04 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
+                        time:   [269.34 ms 273.27 ms 277.69 ms]
+                        thrpt:  [460.95 MiB/s 468.40 MiB/s 475.24 MiB/s]
+ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
+                        time:   [123.18 ms 124.24 ms 125.15 ms]
+                        thrpt:  [1022.8 MiB/s 1.0061 GiB/s 1.0148 GiB/s]
 */
--- a/pageserver/client_grpc/Cargo.toml
+++ b/pageserver/client_grpc/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "pageserver_client_grpc"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+bytes.workspace = true
+futures.workspace = true
+http.workspace = true
+thiserror.workspace = true
+tonic.workspace = true
+tracing.workspace = true
+
+pageserver_page_api.workspace = true
--- a/pageserver/client_grpc/src/lib.rs
+++ b/pageserver/client_grpc/src/lib.rs
@@ -0,0 +1,235 @@
+//! Pageserver Data API client
+//!
+//! - Manage connections to pageserver
+//! - Send requests to correct shards
+//!
+use std::collections::HashMap;
+use std::sync::RwLock;
+
+use bytes::Bytes;
+use futures::Stream;
+use thiserror::Error;
+use tonic::metadata::AsciiMetadataValue;
+use tonic::transport::Channel;
+
+use pageserver_page_api::model::*;
+use pageserver_page_api::proto;
+
+type Shardno = u16;
+
+use pageserver_page_api::proto::PageServiceClient;
+
+type MyPageServiceClient = pageserver_page_api::proto::PageServiceClient<
+    tonic::service::interceptor::InterceptedService<tonic::transport::Channel, AuthInterceptor>,
+>;
+
+#[derive(Error, Debug)]
+pub enum PageserverClientError {
+    #[error("could not connect to service: {0}")]
+    ConnectError(#[from] tonic::transport::Error),
+    #[error("could not perform request: {0}`")]
+    RequestError(#[from] tonic::Status),
+
+    #[error("could not perform request: {0}`")]
+    InvalidUri(#[from] http::uri::InvalidUri),
+}
+
+pub struct PageserverClient {
+    _tenant_id: String,
+    _timeline_id: String,
+
+    _auth_token: Option<String>,
+
+    shard_map: HashMap<Shardno, String>,
+
+    channels: RwLock<HashMap<Shardno, Channel>>,
+
+    auth_interceptor: AuthInterceptor,
+}
+
+impl PageserverClient {
+    /// TODO: this doesn't currently react to changes in the shard map.
+    pub fn new(
+        tenant_id: &str,
+        timeline_id: &str,
+        auth_token: &Option<String>,
+        shard_map: HashMap<Shardno, String>,
+    ) -> Self {
+        Self {
+            _tenant_id: tenant_id.to_string(),
+            _timeline_id: timeline_id.to_string(),
+            _auth_token: auth_token.clone(),
+            shard_map,
+            channels: RwLock::new(HashMap::new()),
+            auth_interceptor: AuthInterceptor::new(tenant_id, timeline_id, auth_token.as_ref()),
+        }
+    }
+
+    pub async fn process_rel_exists_request(
+        &self,
+        request: &RelExistsRequest,
+    ) -> Result<bool, PageserverClientError> {
+        // Current sharding model assumes that all metadata is present only at shard 0.
+        let shard_no = 0;
+
+        let mut client = self.get_client(shard_no).await?;
+
+        let request = proto::RelExistsRequest::from(request);
+        let response = client.rel_exists(tonic::Request::new(request)).await?;
+
+        Ok(response.get_ref().exists)
+    }
+
+    pub async fn process_rel_size_request(
+        &self,
+        request: &RelSizeRequest,
+    ) -> Result<u32, PageserverClientError> {
+        // Current sharding model assumes that all metadata is present only at shard 0.
+        let shard_no = 0;
+
+        let mut client = self.get_client(shard_no).await?;
+
+        let request = proto::RelSizeRequest::from(request);
+        let response = client.rel_size(tonic::Request::new(request)).await?;
+
+        Ok(response.get_ref().num_blocks)
+    }
+
+    pub async fn get_page(&self, request: &GetPageRequest) -> Result<Bytes, PageserverClientError> {
+        // FIXME: calculate the shard number correctly
+        let shard_no = 0;
+
+        let mut client = self.get_client(shard_no).await?;
+
+        let request = proto::GetPageRequest::from(request);
+        let response = client.get_page(tonic::Request::new(request)).await?;
+
+        Ok(response.into_inner().page_image)
+    }
+
+    pub async fn get_pages(
+        &self,
+        requests: impl Stream<Item = proto::GetPageRequest> + Send + 'static,
+    ) -> std::result::Result<
+        tonic::Response<tonic::codec::Streaming<proto::GetPageResponse>>,
+        PageserverClientError,
+    > {
+        // FIXME: calculate the shard number correctly
+        let shard_no = 0;
+
+        let mut client = self.get_client(shard_no).await?;
+
+        Ok(client.get_pages(tonic::Request::new(requests)).await?)
+    }
+
+    /// Process a request to get the size of a database.
+    pub async fn process_dbsize_request(
+        &self,
+        request: &DbSizeRequest,
+    ) -> Result<u64, PageserverClientError> {
+        // Current sharding model assumes that all metadata is present only at shard 0.
+        let shard_no = 0;
+
+        let mut client = self.get_client(shard_no).await?;
+
+        let request = proto::DbSizeRequest::from(request);
+        let response = client.db_size(tonic::Request::new(request)).await?;
+
+        Ok(response.get_ref().num_bytes)
+    }
+
+    /// Process a request to get the size of a database.
+    pub async fn get_base_backup(
+        &self,
+        request: &GetBaseBackupRequest,
+        gzip: bool,
+    ) -> std::result::Result<
+        tonic::Response<tonic::codec::Streaming<proto::GetBaseBackupResponseChunk>>,
+        PageserverClientError,
+    > {
+        // Current sharding model assumes that all metadata is present only at shard 0.
+        let shard_no = 0;
+
+        let mut client = self.get_client(shard_no).await?;
+        if gzip {
+            client = client.accept_compressed(tonic::codec::CompressionEncoding::Gzip);
+        }
+
+        let request = proto::GetBaseBackupRequest::from(request);
+        let response = client.get_base_backup(tonic::Request::new(request)).await?;
+
+        Ok(response)
+    }
+
+    /// Get a client for given shard
+    ///
+    /// This implements very basic caching. If we already have a client for the given shard,
+    /// reuse it. If not, create a new client and put it to the cache.
+    async fn get_client(
+        &self,
+        shard_no: u16,
+    ) -> Result<MyPageServiceClient, PageserverClientError> {
+        let reused_channel: Option<Channel> = {
+            let channels = self.channels.read().unwrap();
+
+            channels.get(&shard_no).cloned()
+        };
+
+        let channel = if let Some(reused_channel) = reused_channel {
+            reused_channel
+        } else {
+            let endpoint: tonic::transport::Endpoint = self
+                .shard_map
+                .get(&shard_no)
+                .expect("no url for shard {shard_no}")
+                .parse()?;
+            let channel = endpoint.connect().await?;
+
+            // Insert it to the cache so that it can be reused on subsequent calls. It's possible
+            // that another thread did the same concurrently, in which case we will overwrite the
+            // client in the cache.
+            {
+                let mut channels = self.channels.write().unwrap();
+                channels.insert(shard_no, channel.clone());
+            }
+            channel
+        };
+
+        let client = PageServiceClient::with_interceptor(channel, self.auth_interceptor.clone());
+        Ok(client)
+    }
+}
+
+/// Inject tenant_id, timeline_id and authentication token to all pageserver requests.
+#[derive(Clone)]
+struct AuthInterceptor {
+    tenant_id: AsciiMetadataValue,
+    timeline_id: AsciiMetadataValue,
+
+    auth_token: Option<AsciiMetadataValue>,
+}
+
+impl AuthInterceptor {
+    fn new(tenant_id: &str, timeline_id: &str, auth_token: Option<&String>) -> Self {
+        Self {
+            tenant_id: tenant_id.parse().expect("could not parse tenant id"),
+            timeline_id: timeline_id.parse().expect("could not parse timeline id"),
+            auth_token: auth_token.map(|x| x.parse().expect("could not parse auth token")),
+        }
+    }
+}
+
+impl tonic::service::Interceptor for AuthInterceptor {
+    fn call(&mut self, mut req: tonic::Request<()>) -> Result<tonic::Request<()>, tonic::Status> {
+        req.metadata_mut()
+            .insert("neon-tenant-id", self.tenant_id.clone());
+        req.metadata_mut()
+            .insert("neon-timeline-id", self.timeline_id.clone());
+        if let Some(auth_token) = &self.auth_token {
+            req.metadata_mut()
+                .insert("neon-auth-token", auth_token.clone());
+        }
+
+        Ok(req)
+    }
+}
--- a/pageserver/page_api/Cargo.toml
+++ b/pageserver/page_api/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "pageserver_page_api"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+
+# For Lsn.
+#
+# TODO: move Lsn to separate crate? This draws in a lot more dependencies
+utils.workspace = true
+
+prost.workspace = true
+thiserror.workspace = true
+tonic.workspace = true
+
+[build-dependencies]
+tonic-build.workspace = true
--- a/pageserver/page_api/build.rs
+++ b/pageserver/page_api/build.rs
@@ -0,0 +1,7 @@
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Generate rust code from .proto protobuf.
+    tonic_build::configure()
+        .bytes(["."])
+        .compile_protos(&["proto/page_service.proto"], &["proto"])
+        .map_err(|err| err.into())
+}
--- a/pageserver/page_api/proto/page_service.proto
+++ b/pageserver/page_api/proto/page_service.proto
@@ -0,0 +1,88 @@
+// Page service presented by pageservers, for computes
+//
+// Each request must come with the following metadata:
+// - neon-tenant-id
+// - neon-timeline-id
+// - neon-auth-token (if auth is enabled)
+//
+// TODO: what else? Priority? OpenTelemetry tracing?
+//
+
+syntax = "proto3";
+package page_service;
+
+service PageService {
+  rpc RelExists(RelExistsRequest) returns (RelExistsResponse);
+
+  // Returns size of a relation, as # of blocks
+  rpc RelSize (RelSizeRequest) returns (RelSizeResponse);
+
+  // Fetches a page.
+  rpc GetPage (GetPageRequest) returns (GetPageResponse);
+
+  // Streaming GetPage protocol.
+  rpc GetPages (stream GetPageRequest) returns (stream GetPageResponse);
+
+  // Returns total size of a database, as # of bytes
+  rpc DbSize (DbSizeRequest) returns (DbSizeResponse);
+
+  rpc GetBaseBackup (GetBaseBackupRequest) returns (stream GetBaseBackupResponseChunk);
+}
+
+message RequestCommon {
+  uint64 request_lsn = 1;
+  uint64 not_modified_since_lsn = 2;
+}
+
+message RelTag {
+    uint32 spc_oid = 1;
+    uint32 db_oid = 2;
+    uint32 rel_number = 3;
+    uint32 fork_number = 4;
+}
+
+message RelExistsRequest {
+  RequestCommon common = 1;
+  RelTag rel = 2;
+}
+
+message RelExistsResponse {
+  bool exists = 1;
+}
+
+message RelSizeRequest {
+  RequestCommon common = 1;
+  RelTag rel = 2;
+}
+
+message RelSizeResponse {
+  uint32 num_blocks = 1;
+}
+
+message GetPageRequest {
+  RequestCommon common = 1;
+  RelTag rel = 2;
+  uint32 block_number = 3;
+}
+
+message GetPageResponse {
+  bytes page_image = 1;
+}
+
+message DbSizeRequest {
+  RequestCommon common = 1;
+  uint32 db_oid = 2;
+}
+
+message DbSizeResponse {
+  uint64 num_bytes = 1;
+}
+
+message GetBaseBackupRequest {
+  RequestCommon common = 1;
+  bool replica = 2;
+}
+
+message GetBaseBackupResponseChunk {
+  bytes chunk = 1;
+}
--- a/pageserver/page_api/src/lib.rs
+++ b/pageserver/page_api/src/lib.rs
@@ -0,0 +1,19 @@
+//! This crate provides the Pageserver's page API. It contains:
+//!
+//! * proto: auto-generated Protobuf types for gRPC.
+//! * model: canonical domain types. Protobuf types are converted into these.
+//!
+//! See `proto/page_service.proto` for the protocol spec.
+//!
+//! This crate is used by both the client and the server. Try to keep it slim.
+//!
+
+pub mod model;
+
+// Code generated by protobuf.
+pub mod proto {
+    tonic::include_proto!("page_service");
+
+    pub use page_service_client::PageServiceClient;
+    pub use page_service_server::PageServiceServer;
+}
--- a/pageserver/page_api/src/model.rs
+++ b/pageserver/page_api/src/model.rs
@@ -0,0 +1,242 @@
+//! Structs representing the canonical page service API.
+//!
+//! These mirror the pageserver APIs and the structs automatically generated
+//! from the protobuf specification. The differences are:
+//!
+//! - Types that are in fact required by the API are not Options. The protobuf "required"
+//!   attribute is deprecated and 'prost' marks a lot of members as optional because of that.
+//!   (See https://github.com/tokio-rs/prost/issues/800 for a gripe on this)
+//!
+//! - Use more precise datatypes, e.g. Lsn and uints shorter than 32 bits.
+//!
+//! TODO: these types should be used in the Pageserver for actual processing,
+//! instead of being cast into internal mirror types.
+
+use utils::lsn::Lsn;
+
+use crate::proto;
+
+#[derive(Clone, Debug)]
+pub struct RequestCommon {
+    pub request_lsn: Lsn,
+    pub not_modified_since_lsn: Lsn,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)]
+pub struct RelTag {
+    pub spc_oid: u32,
+    pub db_oid: u32,
+    pub rel_number: u32,
+    pub fork_number: u8,
+}
+
+#[derive(Clone, Debug)]
+pub struct RelExistsRequest {
+    pub common: RequestCommon,
+    pub rel: RelTag,
+}
+
+#[derive(Clone, Debug)]
+pub struct RelSizeRequest {
+    pub common: RequestCommon,
+    pub rel: RelTag,
+}
+
+#[derive(Clone, Debug)]
+pub struct RelSizeResponse {
+    pub num_blocks: u32,
+}
+
+#[derive(Clone, Debug)]
+pub struct GetPageRequest {
+    pub common: RequestCommon,
+    pub rel: RelTag,
+    pub block_number: u32,
+}
+
+#[derive(Clone, Debug)]
+pub struct GetPageResponse {
+    pub page_image: std::vec::Vec<u8>,
+}
+
+#[derive(Clone, Debug)]
+pub struct DbSizeRequest {
+    pub common: RequestCommon,
+    pub db_oid: u32,
+}
+
+#[derive(Clone, Debug)]
+pub struct DbSizeResponse {
+    pub num_bytes: u64,
+}
+
+#[derive(Clone, Debug)]
+pub struct GetBaseBackupRequest {
+    pub common: RequestCommon,
+    pub replica: bool,
+}
+
+//--- Conversions to/from the generated proto types
+
+use thiserror::Error;
+
+#[derive(Error, Debug)]
+pub enum ProtocolError {
+    #[error("the value for field `{0}` is invalid")]
+    InvalidValue(&'static str),
+    #[error("the required field `{0}` is missing ")]
+    Missing(&'static str),
+}
+
+impl From<ProtocolError> for tonic::Status {
+    fn from(e: ProtocolError) -> Self {
+        match e {
+            ProtocolError::InvalidValue(_field) => tonic::Status::invalid_argument(e.to_string()),
+            ProtocolError::Missing(_field) => tonic::Status::invalid_argument(e.to_string()),
+        }
+    }
+}
+
+impl From<&RelTag> for proto::RelTag {
+    fn from(value: &RelTag) -> proto::RelTag {
+        proto::RelTag {
+            spc_oid: value.spc_oid,
+            db_oid: value.db_oid,
+            rel_number: value.rel_number,
+            fork_number: value.fork_number as u32,
+        }
+    }
+}
+impl TryFrom<&proto::RelTag> for RelTag {
+    type Error = ProtocolError;
+
+    fn try_from(value: &proto::RelTag) -> Result<RelTag, ProtocolError> {
+        Ok(RelTag {
+            spc_oid: value.spc_oid,
+            db_oid: value.db_oid,
+            rel_number: value.rel_number,
+            fork_number: value
+                .fork_number
+                .try_into()
+                .or(Err(ProtocolError::InvalidValue("fork_number")))?,
+        })
+    }
+}
+
+impl From<&RequestCommon> for proto::RequestCommon {
+    fn from(value: &RequestCommon) -> proto::RequestCommon {
+        proto::RequestCommon {
+            request_lsn: value.request_lsn.into(),
+            not_modified_since_lsn: value.not_modified_since_lsn.into(),
+        }
+    }
+}
+impl From<&proto::RequestCommon> for RequestCommon {
+    fn from(value: &proto::RequestCommon) -> RequestCommon {
+        RequestCommon {
+            request_lsn: value.request_lsn.into(),
+            not_modified_since_lsn: value.not_modified_since_lsn.into(),
+        }
+    }
+}
+
+impl From<&RelExistsRequest> for proto::RelExistsRequest {
+    fn from(value: &RelExistsRequest) -> proto::RelExistsRequest {
+        proto::RelExistsRequest {
+            common: Some((&value.common).into()),
+            rel: Some((&value.rel).into()),
+        }
+    }
+}
+impl TryFrom<&proto::RelExistsRequest> for RelExistsRequest {
+    type Error = ProtocolError;
+
+    fn try_from(value: &proto::RelExistsRequest) -> Result<RelExistsRequest, ProtocolError> {
+        Ok(RelExistsRequest {
+            common: (&value.common.ok_or(ProtocolError::Missing("common"))?).into(),
+            rel: (&value.rel.ok_or(ProtocolError::Missing("rel"))?).try_into()?,
+        })
+    }
+}
+
+impl From<&RelSizeRequest> for proto::RelSizeRequest {
+    fn from(value: &RelSizeRequest) -> proto::RelSizeRequest {
+        proto::RelSizeRequest {
+            common: Some((&value.common).into()),
+            rel: Some((&value.rel).into()),
+        }
+    }
+}
+impl TryFrom<&proto::RelSizeRequest> for RelSizeRequest {
+    type Error = ProtocolError;
+
+    fn try_from(value: &proto::RelSizeRequest) -> Result<RelSizeRequest, ProtocolError> {
+        Ok(RelSizeRequest {
+            common: (&value.common.ok_or(ProtocolError::Missing("common"))?).into(),
+            rel: (&value.rel.ok_or(ProtocolError::Missing("rel"))?).try_into()?,
+        })
+    }
+}
+
+impl From<&GetPageRequest> for proto::GetPageRequest {
+    fn from(value: &GetPageRequest) -> proto::GetPageRequest {
+        proto::GetPageRequest {
+            common: Some((&value.common).into()),
+            rel: Some((&value.rel).into()),
+            block_number: value.block_number,
+        }
+    }
+}
+impl TryFrom<&proto::GetPageRequest> for GetPageRequest {
+    type Error = ProtocolError;
+
+    fn try_from(value: &proto::GetPageRequest) -> Result<GetPageRequest, ProtocolError> {
+        Ok(GetPageRequest {
+            common: (&value.common.ok_or(ProtocolError::Missing("common"))?).into(),
+            rel: (&value.rel.ok_or(ProtocolError::Missing("rel"))?).try_into()?,
+            block_number: value.block_number,
+        })
+    }
+}
+
+impl From<&DbSizeRequest> for proto::DbSizeRequest {
+    fn from(value: &DbSizeRequest) -> proto::DbSizeRequest {
+        proto::DbSizeRequest {
+            common: Some((&value.common).into()),
+            db_oid: value.db_oid,
+        }
+    }
+}
+
+impl TryFrom<&proto::DbSizeRequest> for DbSizeRequest {
+    type Error = ProtocolError;
+
+    fn try_from(value: &proto::DbSizeRequest) -> Result<DbSizeRequest, ProtocolError> {
+        Ok(DbSizeRequest {
+            common: (&value.common.ok_or(ProtocolError::Missing("common"))?).into(),
+            db_oid: value.db_oid,
+        })
+    }
+}
+
+impl From<&GetBaseBackupRequest> for proto::GetBaseBackupRequest {
+    fn from(value: &GetBaseBackupRequest) -> proto::GetBaseBackupRequest {
+        proto::GetBaseBackupRequest {
+            common: Some((&value.common).into()),
+            replica: value.replica,
+        }
+    }
+}
+
+impl TryFrom<&proto::GetBaseBackupRequest> for GetBaseBackupRequest {
+    type Error = ProtocolError;
+
+    fn try_from(
+        value: &proto::GetBaseBackupRequest,
+    ) -> Result<GetBaseBackupRequest, ProtocolError> {
+        Ok(GetBaseBackupRequest {
+            common: (&value.common.ok_or(ProtocolError::Missing("common"))?).into(),
+            replica: value.replica,
+        })
+    }
+}
--- a/pageserver/pagebench/Cargo.toml
+++ b/pageserver/pagebench/Cargo.toml
@@ -15,14 +15,17 @@ hdrhistogram.workspace = true
 humantime.workspace = true
 humantime-serde.workspace = true
 rand.workspace = true
-reqwest.workspace=true
+reqwest.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tracing.workspace = true
 tokio.workspace = true
+tokio-stream.workspace = true
 tokio-util.workspace = true

 pageserver_client.workspace = true
+pageserver_client_grpc.workspace = true
+pageserver_page_api.workspace = true
 pageserver_api.workspace = true
 utils = { path = "../../libs/utils/" }
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/pageserver/pagebench/src/cmd/basebackup.rs
+++ b/pageserver/pagebench/src/cmd/basebackup.rs
@@ -9,6 +9,8 @@ use anyhow::Context;
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api::ForceAwaitLogicalSize;
 use pageserver_client::page_service::BasebackupRequest;
+use pageserver_page_api::model::{GetBaseBackupRequest, RequestCommon};
+
 use rand::prelude::*;
 use tokio::sync::Barrier;
 use tokio::task::JoinSet;
@@ -22,6 +24,8 @@ use crate::util::{request_stats, tokio_thread_local_stats};
 /// basebackup@LatestLSN
 #[derive(clap::Parser)]
 pub(crate) struct Args {
+    #[clap(long, default_value = "false")]
+    grpc: bool,
    #[clap(long, default_value = "http://localhost:9898")]
    mgmt_api_endpoint: String,
    #[clap(long, default_value = "postgres://postgres@localhost:64000")]
@@ -52,7 +56,7 @@ impl LiveStats {

 struct Target {
    timeline: TenantTimelineId,
-    lsn_range: Option<Range<Lsn>>,
+    lsn_range: Range<Lsn>,
 }

 #[derive(serde::Serialize)]
@@ -105,7 +109,7 @@ async fn main_impl(
                anyhow::Ok(Target {
                    timeline,
                    // TODO: support lsn_range != latest LSN
-                    lsn_range: Some(info.last_record_lsn..(info.last_record_lsn + 1)),
+                    lsn_range: info.last_record_lsn..(info.last_record_lsn + 1),
                })
            }
        });
@@ -149,14 +153,27 @@ async fn main_impl(
    for tl in &timelines {
        let (sender, receiver) = tokio::sync::mpsc::channel(1); // TODO: not sure what the implications of this are
        work_senders.insert(tl, sender);
-        tasks.push(tokio::spawn(client(
-            args,
-            *tl,
-            Arc::clone(&start_work_barrier),
-            receiver,
-            Arc::clone(&all_work_done_barrier),
-            Arc::clone(&live_stats),
-        )));
+
+        let client_task = if args.grpc {
+            tokio::spawn(client_grpc(
+                args,
+                *tl,
+                Arc::clone(&start_work_barrier),
+                receiver,
+                Arc::clone(&all_work_done_barrier),
+                Arc::clone(&live_stats),
+            ))
+        } else {
+            tokio::spawn(client(
+                args,
+                *tl,
+                Arc::clone(&start_work_barrier),
+                receiver,
+                Arc::clone(&all_work_done_barrier),
+                Arc::clone(&live_stats),
+            ))
+        };
+        tasks.push(client_task);
    }

    let work_sender = async move {
@@ -165,7 +182,7 @@ async fn main_impl(
            let (timeline, work) = {
                let mut rng = rand::thread_rng();
                let target = all_targets.choose(&mut rng).unwrap();
-                let lsn = target.lsn_range.clone().map(|r| rng.gen_range(r));
+                let lsn = rng.gen_range(target.lsn_range.clone());
                (
                    target.timeline,
                    Work {
@@ -215,7 +232,7 @@ async fn main_impl(

 #[derive(Copy, Clone)]
 struct Work {
-    lsn: Option<Lsn>,
+    lsn: Lsn,
    gzip: bool,
 }

@@ -240,7 +257,7 @@ async fn client(
            .basebackup(&BasebackupRequest {
                tenant_id: timeline.tenant_id,
                timeline_id: timeline.timeline_id,
-                lsn,
+                lsn: Some(lsn),
                gzip,
            })
            .await
@@ -270,3 +287,71 @@ async fn client(

    all_work_done_barrier.wait().await;
 }
+
+#[instrument(skip_all)]
+async fn client_grpc(
+    args: &'static Args,
+    timeline: TenantTimelineId,
+    start_work_barrier: Arc<Barrier>,
+    mut work: tokio::sync::mpsc::Receiver<Work>,
+    all_work_done_barrier: Arc<Barrier>,
+    live_stats: Arc<LiveStats>,
+) {
+    let shard_map = HashMap::from([(0, args.page_service_connstring.clone())]);
+    let client = pageserver_client_grpc::PageserverClient::new(
+        &timeline.tenant_id.to_string(),
+        &timeline.timeline_id.to_string(),
+        &None,
+        shard_map,
+    );
+
+    start_work_barrier.wait().await;
+
+    while let Some(Work { lsn, gzip }) = work.recv().await {
+        let start = Instant::now();
+
+        //tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+
+        info!("starting get_base_backup");
+        let mut basebackup_stream = client
+            .get_base_backup(
+                &GetBaseBackupRequest {
+                    common: RequestCommon {
+                        request_lsn: lsn,
+                        not_modified_since_lsn: lsn,
+                    },
+                    replica: false,
+                },
+                gzip,
+            )
+            .await
+            .with_context(|| format!("start basebackup for {timeline}"))
+            .unwrap()
+            .into_inner();
+
+        info!("starting receive");
+        use futures::StreamExt;
+        let mut size = 0;
+        let mut nchunks = 0;
+        while let Some(chunk) = basebackup_stream.next().await {
+            let chunk = chunk
+                .with_context(|| format!("error during basebackup"))
+                .unwrap();
+            size += chunk.chunk.len();
+            nchunks += 1;
+        }
+
+        info!(
+            "basebackup size is {} bytes, avg chunk size {} bytes",
+            size,
+            size as f32 / nchunks as f32
+        );
+        let elapsed = start.elapsed();
+        live_stats.inc();
+        STATS.with(|stats| {
+            stats.borrow().lock().unwrap().observe(elapsed).unwrap();
+        });
+    }
+
+    all_work_done_barrier.wait().await;
+}
--- a/Show More
+++ b/Show More