Adjust parameters in test_duplicate_layers.py test

Add test for duplicate layers
ci: use eu-central-1 bucket (#4315 )
2026-05-03 22:30:37 +00:00 · 2023-05-25 17:01:07 +03:00 · 2023-05-25 10:01:00 +03:00 · 2023-05-25 00:00:21 +03:00 · 2023-05-24 19:36:07 +03:00 · 2023-05-24 16:54:11 +02:00
82 changed files with 3634 additions and 2348 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -14,3 +14,4 @@ opt-level = 1

 [alias]
 build_testing = ["build", "--features", "testing"]
+neon = ["run", "--bin", "neon_local"]
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -57,14 +57,14 @@ runs:
        if ! which allure; then
          ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
          wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
-          echo "${ALLURE_ZIP_MD5}  ${ALLURE_ZIP}" | md5sum -c
+          echo "${ALLURE_ZIP_SHA256} ${ALLURE_ZIP}" | sha256sum --check
          unzip -q ${ALLURE_ZIP}
          echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
          rm -f ${ALLURE_ZIP}
        fi
      env:
-        ALLURE_VERSION: 2.22.0
-        ALLURE_ZIP_MD5: d5c9f0989b896482536956340a7d5ec9
+        ALLURE_VERSION: 2.22.1
+        ALLURE_ZIP_SHA256: fdc7a62d94b14c5e0bf25198ae1feded6b005fdbed864b4d3cb4e5e901720b0b

    # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
    - name: Acquire lock
@@ -147,6 +147,8 @@ runs:
        echo "report-url=${REPORT_URL}"                                   >> $GITHUB_OUTPUT
        echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT

+        echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
+
    - name: Release lock
      if: always()
      shell: bash -euxo pipefail {0}
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -36,18 +36,14 @@ inputs:
    description: 'Region name for real s3 tests'
    required: false
    default: ''
-  real_s3_access_key_id:
-    description: 'Access key id'
-    required: false
-    default: ''
-  real_s3_secret_access_key:
-    description: 'Secret access key'
-    required: false
-    default: ''
  rerun_flaky:
    description: 'Whether to rerun flaky tests'
    required: false
    default: 'false'
+  pg_version:
+    description: 'Postgres version to use for tests'
+    required: false
+    default: 'v14'

 runs:
  using: "composite"
@@ -67,12 +63,12 @@ runs:
        path: /tmp/neon-previous
        prefix: latest

-    - name: Download compatibility snapshot for Postgres 14
+    - name: Download compatibility snapshot
      if: inputs.build_type != 'remote'
      uses: ./.github/actions/download
      with:
-        name: compatibility-snapshot-${{ inputs.build_type }}-pg14
-        path: /tmp/compatibility_snapshot_pg14
+        name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
+        path: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
        prefix: latest

    - name: Checkout
@@ -100,19 +96,18 @@ runs:
        COMPATIBILITY_POSTGRES_DISTRIB_DIR: /tmp/neon-previous/pg_install
        TEST_OUTPUT: /tmp/test_output
        BUILD_TYPE: ${{ inputs.build_type }}
-        AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
-        AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
-        COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
+        COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
        ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
        ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
        RERUN_FLAKY: ${{ inputs.rerun_flaky }}
+        PG_VERSION: ${{ inputs.pg_version }}
      shell: bash -euxo pipefail {0}
      run: |
        # PLATFORM will be embedded in the perf test report
        # and it is needed to distinguish different environments
        export PLATFORM=${PLATFORM:-github-actions-selfhosted}
        export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
-        export DEFAULT_PG_VERSION=${DEFAULT_PG_VERSION:-14}
+        export DEFAULT_PG_VERSION=${PG_VERSION#v}

        if [ "${BUILD_TYPE}" = "remote" ]; then
          export REMOTE_ENV=1
@@ -192,13 +187,13 @@ runs:
          scripts/generate_and_push_perf_report.sh
        fi

-    - name: Upload compatibility snapshot for Postgres 14
+    - name: Upload compatibility snapshot
      if: github.ref_name == 'release'
      uses: ./.github/actions/upload
      with:
-        name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
+        name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}-${{ github.run_id }}
        # Directory is created by test_compatibility.py::test_create_snapshot, keep the path in sync with the test
-        path: /tmp/test_output/compatibility_snapshot_pg14/
+        path: /tmp/test_output/compatibility_snapshot_pg${{ inputs.pg_version }}/
        prefix: latest

    - name: Upload test results
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -324,7 +324,8 @@ jobs:
    runs-on: [ self-hosted, gen3, large ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+      # Default shared memory is 64mb
+      options: --init --shm-size=512mb
    needs: [ build-neon ]
    strategy:
      fail-fast: false
@@ -345,13 +346,11 @@ jobs:
          test_selection: regress
          needs_postgres_source: true
          run_with_real_s3: true
-          real_s3_bucket: ci-tests-s3
-          real_s3_region: us-west-2
-          real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
-          real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
+          real_s3_bucket: neon-github-ci-tests
+          real_s3_region: eu-central-1
          rerun_flaky: true
+          pg_version: ${{ matrix.pg_version }}
        env:
-          DEFAULT_PG_VERSION: ${{ matrix.pg_version }}
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty

@@ -363,7 +362,8 @@ jobs:
    runs-on: [ self-hosted, gen3, small ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+      # Default shared memory is 64mb
+      options: --init --shm-size=512mb
    needs: [ build-neon ]
    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
    strategy:
@@ -490,37 +490,43 @@ jobs:
      - name: Merge coverage data
        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge

-      - name: Build and upload coverage report
+      - name: Build coverage report
+        env:
+          COMMIT_URL: ${{ github.server_url }}/${{ github.repository }}/commit/${{ github.event.pull_request.head.sha || github.sha }}
        run: |
-          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
-          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
-          COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA
-
          scripts/coverage \
            --dir=/tmp/coverage report \
            --input-objects=/tmp/coverage/binaries.list \
-            --commit-url=$COMMIT_URL \
+            --commit-url=${COMMIT_URL} \
            --format=github

-          REPORT_URL=https://${{ github.repository_owner }}.github.io/zenith-coverage-data/$COMMIT_SHA
+      - name: Upload coverage report
+        id: upload-coverage-report
+        env:
+          BUCKET: neon-github-public-dev
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+        run: |
+          aws s3 cp --only-show-errors --recursive /tmp/coverage/report s3://neon-github-public-dev/code-coverage/${COMMIT_SHA}

-          scripts/git-upload \
-            --repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
-            --message="Add code coverage for $COMMIT_URL" \
-            copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE
+          REPORT_URL=https://${BUCKET}.s3.amazonaws.com/code-coverage/${COMMIT_SHA}/index.html
+          echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT

-          # Add link to the coverage report to the commit
-          curl -f -X POST \
-          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-          -H "Accept: application/vnd.github.v3+json" \
-          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
-          --data \
-            "{
-              \"state\": \"success\",
-              \"context\": \"neon-coverage\",
-              \"description\": \"Coverage report is ready\",
-              \"target_url\": \"$REPORT_URL\"
-            }"
+      - uses: actions/github-script@v6
+        env:
+          REPORT_URL: ${{ steps.upload-coverage-report.outputs.report-url }}
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+        with:
+          script: |
+            const { REPORT_URL, COMMIT_SHA } = process.env
+
+            await github.rest.repos.createCommitStatus({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              sha: `${COMMIT_SHA}`,
+              state: 'success',
+              target_url: `${REPORT_URL}`,
+              context: 'Code coverage report',
+            })

  trigger-e2e-tests:
    runs-on: [ self-hosted, gen3, small ]
@@ -703,7 +709,11 @@ jobs:

  compute-node-image:
    runs-on: [ self-hosted, gen3, large ]
-    container: gcr.io/kaniko-project/executor:v1.9.2-debug
+    container:
+      image: gcr.io/kaniko-project/executor:v1.9.2-debug
+      # Workaround for "Resolving download.osgeo.org (download.osgeo.org)... failed: Temporary failure in name resolution.""
+      # Should be prevented by https://github.com/neondatabase/neon/issues/4281
+      options: --add-host=download.osgeo.org:140.211.15.30
    needs: [ tag ]
    strategy:
      fail-fast: false
@@ -765,7 +775,7 @@ jobs:
      run:
        shell: sh -eu {0}
    env:
-      VM_BUILDER_VERSION: v0.4.6
+      VM_BUILDER_VERSION: v0.7.3-alpha3

    steps:
      - name: Checkout
@@ -775,21 +785,18 @@ jobs:

      - name: Downloading vm-builder
        run: |
-          curl -L https://github.com/neondatabase/neonvm/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
+          curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
          chmod +x vm-builder

+      # Note: we need a separate pull step here because otherwise vm-builder will try to pull, and
+      # it won't have the proper authentication (written at v0.6.0)
      - name: Pulling compute-node image
        run: |
          docker pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}

-      - name: Building VM compute-node rootfs
-        run: |
-          docker build -t temp-vm-compute-node --build-arg SRC_IMAGE=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} -f Dockerfile.vm-compute-node .
-
      - name: Build vm image
        run: |
-          # note: as of 2023-01-12, vm-builder requires a trailing ":latest" for local images
-          ./vm-builder -use-inittab -src=temp-vm-compute-node:latest -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
+          ./vm-builder -src=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}

      - name: Pushing vm-compute-node image
        run: |
@@ -949,7 +956,7 @@ jobs:
  promote-compatibility-data:
    runs-on: [ self-hosted, gen3, small ]
    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
      options: --init
    needs: [ promote-images, tag, regress-tests ]
    if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch'
@@ -960,11 +967,13 @@ jobs:
          PREFIX: artifacts/latest
        run: |
          # Update compatibility snapshot for the release
-          for build_type in debug release; do
-            OLD_FILENAME=compatibility-snapshot-${build_type}-pg14-${GITHUB_RUN_ID}.tar.zst
-            NEW_FILENAME=compatibility-snapshot-${build_type}-pg14.tar.zst
+          for pg_version in v14 v15; do
+            for build_type in debug release; do
+              OLD_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}-${GITHUB_RUN_ID}.tar.zst
+              NEW_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}.tar.zst

-            time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
+              time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
+            done
          done

          # Update Neon artifact for the release (reuse already uploaded artifact)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,7 +2,7 @@

 Howdy! Usual good software engineering practices apply. Write
 tests. Write comments. Follow standard Rust coding practices where
-possible. Use 'cargo fmt' and 'clippy' to tidy up formatting.
+possible. Use `cargo fmt` and `cargo clippy` to tidy up formatting.

 There are soft spots in the code, which could use cleanup,
 refactoring, additional comments, and so forth. Let's try to raise the
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -230,40 +230,38 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

 [[package]]
 name = "aws-config"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56a636c44c77fa18bdba56126a34d30cfe5538fe88f7d34988fa731fee143ddd"
+checksum = "fc00553f5f3c06ffd4510a9d576f92143618706c45ea6ff81e84ad9be9588abd"
 dependencies = [
+ "aws-credential-types",
 "aws-http",
- "aws-sdk-sso",
 "aws-sdk-sts",
- "aws-smithy-async 0.51.0",
- "aws-smithy-client 0.51.0",
- "aws-smithy-http 0.51.0",
- "aws-smithy-http-tower 0.51.0",
+ "aws-smithy-async",
+ "aws-smithy-client",
+ "aws-smithy-http",
+ "aws-smithy-http-tower",
 "aws-smithy-json",
- "aws-smithy-types 0.51.0",
- "aws-types 0.51.0",
+ "aws-smithy-types",
+ "aws-types",
 "bytes",
- "hex",
+ "fastrand",
 "http",
 "hyper",
- "ring",
 "time",
 "tokio",
 "tower",
 "tracing",
- "zeroize",
 ]

 [[package]]
 name = "aws-credential-types"
-version = "0.55.1"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4232d3729eefc287adc0d5a8adc97b7d94eefffe6bbe94312cc86c7ab6b06ce"
+checksum = "4cb57ac6088805821f78d282c0ba8aec809f11cbee10dda19a97b03ab040ccc2"
 dependencies = [
- "aws-smithy-async 0.55.1",
- "aws-smithy-types 0.55.1",
+ "aws-smithy-async",
+ "aws-smithy-types",
 "fastrand",
 "tokio",
 "tracing",
@@ -272,13 +270,13 @@ dependencies = [

 [[package]]
 name = "aws-endpoint"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ca8f374874f6459aaa88dc861d7f5d834ca1ff97668eae190e97266b5f6c3fb"
+checksum = "9c5f6f84a4f46f95a9bb71d9300b73cd67eb868bc43ae84f66ad34752299f4ac"
 dependencies = [
- "aws-smithy-http 0.51.0",
- "aws-smithy-types 0.51.0",
- "aws-types 0.51.0",
+ "aws-smithy-http",
+ "aws-smithy-types",
+ "aws-types",
 "http",
 "regex",
 "tracing",
@@ -286,13 +284,14 @@ dependencies = [

 [[package]]
 name = "aws-http"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78d41e19e779b73463f5f0c21b3aacc995f4ba783ab13a7ae9f5dfb159a551b4"
+checksum = "a754683c322f7dc5167484266489fdebdcd04d26e53c162cad1f3f949f2c5671"
 dependencies = [
- "aws-smithy-http 0.51.0",
- "aws-smithy-types 0.51.0",
- "aws-types 0.51.0",
+ "aws-credential-types",
+ "aws-smithy-http",
+ "aws-smithy-types",
+ "aws-types",
 "bytes",
 "http",
 "http-body",
@@ -304,127 +303,104 @@ dependencies = [

 [[package]]
 name = "aws-sdk-s3"
-version = "0.21.0"
+version = "0.25.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a9f08665c8e03aca8cb092ef01e617436ebfa977fddc1240e1b062488ab5d48a"
+checksum = "392b9811ca489747ac84349790e49deaa1f16631949e7dd4156000251c260eae"
 dependencies = [
+ "aws-credential-types",
 "aws-endpoint",
 "aws-http",
 "aws-sig-auth",
 "aws-sigv4",
- "aws-smithy-async 0.51.0",
+ "aws-smithy-async",
 "aws-smithy-checksums",
- "aws-smithy-client 0.51.0",
+ "aws-smithy-client",
 "aws-smithy-eventstream",
- "aws-smithy-http 0.51.0",
- "aws-smithy-http-tower 0.51.0",
- "aws-smithy-types 0.51.0",
+ "aws-smithy-http",
+ "aws-smithy-http-tower",
+ "aws-smithy-json",
+ "aws-smithy-types",
 "aws-smithy-xml",
- "aws-types 0.51.0",
+ "aws-types",
 "bytes",
- "bytes-utils",
 "http",
 "http-body",
+ "once_cell",
+ "percent-encoding",
+ "regex",
 "tokio-stream",
 "tower",
 "tracing",
+ "url",
+]
+
+[[package]]
+name = "aws-sdk-sts"
+version = "0.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d0fbe3c2c342bc8dfea4bb43937405a8ec06f99140a0dcb9c7b59e54dfa93a1"
+dependencies = [
+ "aws-credential-types",
+ "aws-endpoint",
+ "aws-http",
+ "aws-sig-auth",
+ "aws-smithy-async",
+ "aws-smithy-client",
+ "aws-smithy-http",
+ "aws-smithy-http-tower",
+ "aws-smithy-json",
+ "aws-smithy-query",
+ "aws-smithy-types",
+ "aws-smithy-xml",
+ "aws-types",
+ "bytes",
+ "http",
+ "regex",
+ "tower",
+ "tracing",
 ]

-[[package]]
-name = "aws-sdk-sso"
-version = "0.21.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86dcb1cb71aa8763b327542ead410424515cff0cde5b753eedd2917e09c63734"
-dependencies = [
- "aws-endpoint",
- "aws-http",
- "aws-sig-auth",
- "aws-smithy-async 0.51.0",
- "aws-smithy-client 0.51.0",
- "aws-smithy-http 0.51.0",
- "aws-smithy-http-tower 0.51.0",
- "aws-smithy-json",
- "aws-smithy-types 0.51.0",
- "aws-types 0.51.0",
- "bytes",
- "http",
- "tokio-stream",
- "tower",
-]
-
-[[package]]
-name = "aws-sdk-sts"
-version = "0.21.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdfcf584297c666f6b472d5368a78de3bc714b6e0a53d7fbf76c3e347c292ab1"
-dependencies = [
- "aws-endpoint",
- "aws-http",
- "aws-sig-auth",
- "aws-smithy-async 0.51.0",
- "aws-smithy-client 0.51.0",
- "aws-smithy-http 0.51.0",
- "aws-smithy-http-tower 0.51.0",
- "aws-smithy-query",
- "aws-smithy-types 0.51.0",
- "aws-smithy-xml",
- "aws-types 0.51.0",
- "bytes",
- "http",
- "tower",
-]
-
 [[package]]
 name = "aws-sig-auth"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12cbe7b2be9e185c1fbce27fc9c41c66b195b32d89aa099f98768d9544221308"
+checksum = "84dc92a63ede3c2cbe43529cb87ffa58763520c96c6a46ca1ced80417afba845"
 dependencies = [
+ "aws-credential-types",
 "aws-sigv4",
 "aws-smithy-eventstream",
- "aws-smithy-http 0.51.0",
- "aws-types 0.51.0",
+ "aws-smithy-http",
+ "aws-types",
 "http",
 "tracing",
 ]

 [[package]]
 name = "aws-sigv4"
-version = "0.51.1"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c0b2658d2cb66dbf02f0e8dee80810ef1e0ca3530ede463e0ef994c301087d1"
+checksum = "392fefab9d6fcbd76d518eb3b1c040b84728ab50f58df0c3c53ada4bea9d327e"
 dependencies = [
 "aws-smithy-eventstream",
- "aws-smithy-http 0.51.0",
+ "aws-smithy-http",
 "bytes",
 "form_urlencoded",
 "hex",
+ "hmac",
 "http",
 "once_cell",
 "percent-encoding",
 "regex",
- "ring",
+ "sha2",
 "time",
 "tracing",
 ]

 [[package]]
 name = "aws-smithy-async"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b3442b4c5d3fc39891a2e5e625735fba6b24694887d49c6518460fde98247a9"
-dependencies = [
- "futures-util",
- "pin-project-lite",
- "tokio",
- "tokio-stream",
-]
-
-[[package]]
-name = "aws-smithy-async"
-version = "0.55.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88573bcfbe1dcfd54d4912846df028b42d6255cbf9ce07be216b1bbfd11fc4b9"
+checksum = "ae23b9fe7a07d0919000116c4c5c0578303fbce6fc8d32efca1f7759d4c20faf"
 dependencies = [
 "futures-util",
 "pin-project-lite",
@@ -434,12 +410,12 @@ dependencies = [

 [[package]]
 name = "aws-smithy-checksums"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc227e36e346f45298288359f37123e1a92628d1cec6b11b5eb335553278bd9e"
+checksum = "a6367acbd6849b8c7c659e166955531274ae147bf83ab4312885991f6b6706cb"
 dependencies = [
- "aws-smithy-http 0.51.0",
- "aws-smithy-types 0.51.0",
+ "aws-smithy-http",
+ "aws-smithy-types",
 "bytes",
 "crc32c",
 "crc32fast",
@@ -455,14 +431,14 @@ dependencies = [

 [[package]]
 name = "aws-smithy-client"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff28d553714f8f54cd921227934fc13a536a1c03f106e56b362fd57e16d450ad"
+checksum = "5230d25d244a51339273b8870f0f77874cd4449fb4f8f629b21188ae10cfc0ba"
 dependencies = [
- "aws-smithy-async 0.51.0",
- "aws-smithy-http 0.51.0",
- "aws-smithy-http-tower 0.51.0",
- "aws-smithy-types 0.51.0",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-http-tower",
+ "aws-smithy-types",
 "bytes",
 "fastrand",
 "http",
@@ -471,26 +447,7 @@ dependencies = [
 "hyper-rustls",
 "lazy_static",
 "pin-project-lite",
- "tokio",
- "tower",
- "tracing",
-]
-
-[[package]]
-name = "aws-smithy-client"
-version = "0.55.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2f52352bae50d3337d5d6151b695d31a8c10ebea113eca5bead531f8301b067"
-dependencies = [
- "aws-smithy-async 0.55.1",
- "aws-smithy-http 0.55.1",
- "aws-smithy-http-tower 0.55.1",
- "aws-smithy-types 0.55.1",
- "bytes",
- "fastrand",
- "http",
- "http-body",
- "pin-project-lite",
+ "rustls 0.20.8",
 "tokio",
 "tower",
 "tracing",
@@ -498,23 +455,23 @@ dependencies = [

 [[package]]
 name = "aws-smithy-eventstream"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7ea0df7161ce65b5c8ca6eb709a1a907376fa18226976e41c748ce02ccccf24"
+checksum = "22d2a2bcc16e5c4d949ffd2b851da852b9bbed4bb364ed4ae371b42137ca06d9"
 dependencies = [
- "aws-smithy-types 0.51.0",
+ "aws-smithy-types",
 "bytes",
 "crc32fast",
 ]

 [[package]]
 name = "aws-smithy-http"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf58ed4fefa61dbf038e5421a521cbc2c448ef69deff0ab1d915d8a10eda5664"
+checksum = "b60e2133beb9fe6ffe0b70deca57aaeff0a35ad24a9c6fab2fd3b4f45b99fdb5"
 dependencies = [
 "aws-smithy-eventstream",
- "aws-smithy-types 0.51.0",
+ "aws-smithy-types",
 "bytes",
 "bytes-utils",
 "futures-core",
@@ -530,49 +487,14 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "aws-smithy-http"
-version = "0.55.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03bcc02d7ed9649d855c8ce4a735e9848d7b8f7568aad0504c158e3baa955df8"
-dependencies = [
- "aws-smithy-types 0.55.1",
- "bytes",
- "bytes-utils",
- "futures-core",
- "http",
- "http-body",
- "hyper",
- "once_cell",
- "percent-encoding",
- "pin-project-lite",
- "pin-utils",
- "tracing",
-]
-
 [[package]]
 name = "aws-smithy-http-tower"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20c96d7bd35e7cf96aca1134b2f81b1b59ffe493f7c6539c051791cbbf7a42d3"
+checksum = "3a4d94f556c86a0dd916a5d7c39747157ea8cb909ca469703e20fee33e448b67"
 dependencies = [
- "aws-smithy-http 0.51.0",
- "bytes",
- "http",
- "http-body",
- "pin-project-lite",
- "tower",
- "tracing",
-]
-
-[[package]]
-name = "aws-smithy-http-tower"
-version = "0.55.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da88b3a860f65505996c29192d800f1aeb9480440f56d63aad33a3c12045017a"
-dependencies = [
- "aws-smithy-http 0.55.1",
- "aws-smithy-types 0.55.1",
+ "aws-smithy-http",
+ "aws-smithy-types",
 "bytes",
 "http",
 "http-body",
@@ -583,40 +505,28 @@ dependencies = [

 [[package]]
 name = "aws-smithy-json"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8324ba98c8a94187723cc16c37aefa09504646ee65c3d2c3af495bab5ea701b"
+checksum = "5ce3d6e6ebb00b2cce379f079ad5ec508f9bcc3a9510d9b9c1840ed1d6f8af39"
 dependencies = [
- "aws-smithy-types 0.51.0",
+ "aws-smithy-types",
 ]

 [[package]]
 name = "aws-smithy-query"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83834ed2ff69ea6f6657baf205267dc2c0abe940703503a3e5d60ce23be3d306"
+checksum = "d58edfca32ef9bfbc1ca394599e17ea329cb52d6a07359827be74235b64b3298"
 dependencies = [
- "aws-smithy-types 0.51.0",
+ "aws-smithy-types",
 "urlencoding",
 ]

 [[package]]
 name = "aws-smithy-types"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b02e06ea63498c43bc0217ea4d16605d4e58d85c12fc23f6572ff6d0a840c61"
-dependencies = [
- "itoa",
- "num-integer",
- "ryu",
- "time",
-]
-
-[[package]]
-name = "aws-smithy-types"
-version = "0.55.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd0afc731fd1417d791f9145a1e0c30e23ae0beaab9b4814017708ead2fc20f1"
+checksum = "58db46fc1f4f26be01ebdb821751b4e2482cd43aa2b64a0348fb89762defaffa"
 dependencies = [
 "base64-simd",
 "itoa",
@@ -627,40 +537,24 @@ dependencies = [

 [[package]]
 name = "aws-smithy-xml"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "246e9f83dd1fdf5d347fa30ae4ad30a9d1d42ce4cd74a93d94afa874646f94cd"
+checksum = "fb557fe4995bd9ec87fb244bbb254666a971dc902a783e9da8b7711610e9664c"
 dependencies = [
 "xmlparser",
 ]

 [[package]]
 name = "aws-types"
-version = "0.51.0"
+version = "0.55.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05701d32da168b44f7ee63147781aed8723e792cc131cb9b18363b5393f17f70"
-dependencies = [
- "aws-smithy-async 0.51.0",
- "aws-smithy-client 0.51.0",
- "aws-smithy-http 0.51.0",
- "aws-smithy-types 0.51.0",
- "http",
- "rustc_version",
- "tracing",
- "zeroize",
-]
-
-[[package]]
-name = "aws-types"
-version = "0.55.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9b082e329d9a304d39e193ad5c7ab363a0d6507aca6965e0673a746686fb0cc"
+checksum = "de0869598bfe46ec44ffe17e063ed33336e59df90356ca8ff0e8da6f7c1d994b"
 dependencies = [
 "aws-credential-types",
- "aws-smithy-async 0.55.1",
- "aws-smithy-client 0.55.1",
- "aws-smithy-http 0.55.1",
- "aws-smithy-types 0.55.1",
+ "aws-smithy-async",
+ "aws-smithy-client",
+ "aws-smithy-http",
+ "aws-smithy-types",
 "http",
 "rustc_version",
 "tracing",
@@ -2693,6 +2587,21 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"

+[[package]]
+name = "pagectl"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "bytes",
+ "clap 4.2.2",
+ "git-version",
+ "pageserver",
+ "postgres_ffi",
+ "svg_fmt",
+ "utils",
+ "workspace_hack",
+]
+
 [[package]]
 name = "pageserver"
 version = "0.1.0"
@@ -2926,7 +2835,7 @@ dependencies = [
 [[package]]
 name = "postgres"
 version = "0.19.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9#2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -2939,7 +2848,7 @@ dependencies = [
 [[package]]
 name = "postgres-native-tls"
 version = "0.5.0"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9#2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9"
 dependencies = [
 "native-tls",
 "tokio",
@@ -2950,7 +2859,7 @@ dependencies = [
 [[package]]
 name = "postgres-protocol"
 version = "0.6.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9#2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9"
 dependencies = [
 "base64 0.20.0",
 "byteorder",
@@ -2968,7 +2877,7 @@ dependencies = [
 [[package]]
 name = "postgres-types"
 version = "0.2.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9#2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -3367,9 +3276,10 @@ dependencies = [
 "anyhow",
 "async-trait",
 "aws-config",
+ "aws-credential-types",
 "aws-sdk-s3",
- "aws-smithy-http 0.51.0",
- "aws-types 0.55.1",
+ "aws-smithy-http",
+ "aws-types",
 "hyper",
 "metrics",
 "once_cell",
@@ -4376,9 +4286,9 @@ dependencies = [

 [[package]]
 name = "tokio"
-version = "1.27.0"
+version = "1.28.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001"
+checksum = "0aa32867d44e6f2ce3385e89dceb990188b8bb0fb25b0cf576647a6f98ac5105"
 dependencies = [
 "autocfg",
 "bytes",
@@ -4389,7 +4299,7 @@ dependencies = [
 "signal-hook-registry",
 "socket2 0.4.9",
 "tokio-macros",
- "windows-sys 0.45.0",
+ "windows-sys 0.48.0",
 ]

 [[package]]
@@ -4404,9 +4314,9 @@ dependencies = [

 [[package]]
 name = "tokio-macros"
-version = "2.0.0"
+version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce"
+checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -4426,7 +4336,7 @@ dependencies = [
 [[package]]
 name = "tokio-postgres"
 version = "0.7.7"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9#2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9"
 dependencies = [
 "async-trait",
 "byteorder",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,6 +3,7 @@ members = [
    "compute_tools",
    "control_plane",
    "pageserver",
+    "pageserver/ctl",
    "proxy",
    "safekeeper",
    "storage_broker",
@@ -21,9 +22,10 @@ anyhow = { version = "1.0", features = ["backtrace"] }
 async-stream = "0.3"
 async-trait = "0.1"
 atty = "0.2.14"
-aws-config = { version = "0.51.0", default-features = false, features=["rustls"] }
-aws-sdk-s3 = "0.21.0"
-aws-smithy-http = "0.51.0"
+aws-config = { version = "0.55", default-features = false, features=["rustls"] }
+aws-sdk-s3 = "0.25"
+aws-smithy-http = "0.55"
+aws-credential-types = "0.55"
 aws-types = "0.55"
 base64 = "0.13.0"
 bincode = "1.3"
@@ -125,11 +127,11 @@ env_logger = "0.10"
 log = "0.4"

 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
-postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
-postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
-postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
+postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
+postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
+postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
 tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }

 ## Other git libraries
@@ -165,7 +167,7 @@ tonic-build = "0.9"

 # This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }

 # Changes the MAX_THREADS limit from 4096 to 32768.
 # This is a temporary workaround for using tracing from many threads in safekeepers code,
--- a/6
+++ b/6
@@ -47,8 +47,7 @@ RUN set -e \
    && mold -run cargo build  \
      --bin pg_sni_router  \
      --bin pageserver  \
-      --bin pageserver_binutils  \
-      --bin draw_timeline_dir \
+      --bin pagectl  \
      --bin safekeeper  \
      --bin storage_broker  \
      --bin proxy  \
@@ -73,8 +72,7 @@ RUN set -e \

 COPY --from=build --chown=neon:neon /home/nonroot/target/release/pg_sni_router       /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver          /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir   /usr/local/bin
+COPY --from=build --chown=neon:neon /home/nonroot/target/release/pagectl             /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper          /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker         /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy               /usr/local/bin
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -415,6 +415,23 @@ RUN apt-get update && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control

+#########################################################################################
+#
+# Layer "pg-cron-pg-build"
+# compile pg_cron extension
+#
+#########################################################################################
+FROM build-deps AS pg-cron-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.5.2.tar.gz -O pg_cron.tar.gz && \
+    echo "6f7f0980c03f1e2a6a747060e67bf4a303ca2a50e941e2c19daeed2b44dec744 pg_cron.tar.gz" | sha256sum --check && \
+    mkdir pg_cron-src && cd pg_cron-src && tar xvzf ../pg_cron.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
+
 #########################################################################################
 #
 # Layer "rust extensions"
@@ -529,6 +546,7 @@ COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
--- a/Dockerfile.vm-compute-node
+++ b/Dockerfile.vm-compute-node
@@ -1,70 +0,0 @@
-# Note: this file *mostly* just builds on Dockerfile.compute-node
-
-ARG SRC_IMAGE
-ARG VM_INFORMANT_VERSION=v0.1.14
-# on libcgroup update, make sure to check bootstrap.sh for changes
-ARG LIBCGROUP_VERSION=v2.0.3
-
-# Pull VM informant, to copy from later
-FROM neondatabase/vm-informant:$VM_INFORMANT_VERSION as informant
-
-# Build cgroup-tools
-#
-# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
-# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-informant
-# requires cgroup v2, so we'll build cgroup-tools ourselves.
-FROM debian:bullseye-slim as libcgroup-builder
-ARG LIBCGROUP_VERSION
-
-RUN set -exu \
-	&& apt update \
-	&& apt install --no-install-recommends -y \
-		git \
-		ca-certificates \
-		automake \
-		cmake \
-		make \
-		gcc \
-		byacc \
-		flex \
-		libtool \
-		libpam0g-dev \
-	&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
-	&& INSTALL_DIR="/libcgroup-install" \
-	&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
-	&& cd libcgroup \
-	# extracted from bootstrap.sh, with modified flags:
-	&& (test -d m4 || mkdir m4) \
-	&& autoreconf -fi \
-	&& rm -rf autom4te.cache \
-	&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
-	# actually build the thing...
-	&& make install
-
-# Combine, starting from non-VM compute node image.
-FROM $SRC_IMAGE as base
-
-# Temporarily set user back to root so we can run adduser, set inittab
-USER root
-RUN adduser vm-informant --disabled-password --no-create-home
-
-RUN set -e \
-	&& rm -f /etc/inittab \
-	&& touch /etc/inittab
-
-RUN set -e \
-	&& echo "::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664" >> /etc/inittab \
-	&& CONNSTR="dbname=postgres user=cloud_admin sslmode=disable" \
-	&& ARGS="--auto-restart --cgroup=neon-postgres --pgconnstr=\"$CONNSTR\"" \
-	&& echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant $ARGS'" >> /etc/inittab
-
-USER postgres
-
-ADD vm-cgconfig.conf /etc/cgconfig.conf
-COPY --from=informant /usr/bin/vm-informant /usr/local/bin/vm-informant
-
-COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
-COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
-COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
-
-ENTRYPOINT ["/usr/sbin/cgexec", "-g", "*:neon-postgres", "/usr/local/bin/compute_ctl"]
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ The Neon storage engine consists of two major components:
 - Pageserver. Scalable storage backend for the compute nodes.
 - Safekeepers. The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage.

-See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more information.
+See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more information.

 ## Running local installation

@@ -130,11 +130,11 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
 ```sh
 # Create repository in .neon with proper paths to binaries and data
 # Later that would be responsibility of a package install script
-> ./target/debug/neon_local init
+> cargo neon init
 Starting pageserver at '127.0.0.1:64000' in '.neon'.

 # start pageserver, safekeeper, and broker for their intercommunication
-> ./target/debug/neon_local start
+> cargo neon start
 Starting neon broker at 127.0.0.1:50051
 storage_broker started, pid: 2918372
 Starting pageserver at '127.0.0.1:64000' in '.neon'.
@@ -143,19 +143,19 @@ Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
 safekeeper 1 started, pid: 2918437

 # create initial tenant and use it as a default for every future neon_local invocation
-> ./target/debug/neon_local tenant create --set-default
+> cargo neon tenant create --set-default
 tenant 9ef87a5bf0d92544f6fafeeb3239695c successfully created on the pageserver
 Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c
 Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one

 # start postgres compute node
-> ./target/debug/neon_local endpoint start main
+> cargo neon endpoint start main
 Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
 Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
 Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'

 # check list of running postgres instances
-> ./target/debug/neon_local endpoint list
+> cargo neon endpoint list
 ENDPOINT  ADDRESS          TIMELINE                          BRANCH NAME  LSN        STATUS
 main      127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main         0/16B5BA8  running
 ```
@@ -177,22 +177,22 @@ postgres=# select * from t;
 3. And create branches and run postgres on them:
 ```sh
 # create branch named migration_check
-> ./target/debug/neon_local timeline branch --branch-name migration_check
+> cargo neon timeline branch --branch-name migration_check
 Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c. Ancestor timeline: 'main'

 # check branches tree
-> ./target/debug/neon_local timeline list
+> cargo neon timeline list
 (L) main [de200bd42b49cc1814412c7e592dd6e9]
 (L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]

 # start postgres on that branch
-> ./target/debug/neon_local endpoint start migration_check --branch-name migration_check
+> cargo neon endpoint start migration_check --branch-name migration_check
 Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
 Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
 Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'

 # check the new list of running postgres instances
-> ./target/debug/neon_local endpoint list
+> cargo neon endpoint list
 ENDPOINT         ADDRESS          TIMELINE                          BRANCH NAME      LSN        STATUS
 main             127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main             0/16F9A38  running
 migration_check  127.0.0.1:55433  b3b863fa45fa9e57e615f9f2d944e601  migration_check  0/16F9A70  running
@@ -221,7 +221,7 @@ postgres=# select * from t;
 4. If you want to run tests afterward (see below), you must stop all the running of the pageserver, safekeeper, and postgres instances
   you have just started. You can terminate them all with one command:
 ```sh
-> ./target/debug/neon_local stop
+> cargo neon stop
 ```

 ## Running tests
@@ -238,9 +238,9 @@ CARGO_BUILD_FLAGS="--features=testing" make

 ## Documentation

-[/docs/](/docs/) Contains a top-level overview of all available markdown documentation.
+[docs](/docs) Contains a top-level overview of all available markdown documentation.

- [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.
+- [sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.

 To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`

@@ -265,6 +265,6 @@ To get more familiar with this aspect, refer to:

 ## Join the development

- Read `CONTRIBUTING.md` to learn about project code style and practices.
- To get familiar with a source tree layout, use [/docs/sourcetree.md](/docs/sourcetree.md).
+- Read [CONTRIBUTING.md](/CONTRIBUTING.md) to learn about project code style and practices.
+- To get familiar with a source tree layout, use [sourcetree.md](/docs/sourcetree.md).
 - To learn more about PostgreSQL internals, check http://www.interdb.jp/pg/index.html
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -134,6 +134,7 @@ pub struct Endpoint {

    // port and address of the Postgres server
    pub address: SocketAddr,
+    // postgres major version in the format: 14, 15, etc.
    pg_version: u32,

    // These are not part of the endpoint as such, but the environment
@@ -381,6 +382,11 @@ impl Endpoint {
                conf.append("primary_conninfo", connstr.as_str());
                conf.append("primary_slot_name", slot_name.as_str());
                conf.append("hot_standby", "on");
+                // prefetching of blocks referenced in WAL doesn't make sense for us
+                // Neon hot standby ignores pages that are not in the shared_buffers
+                if self.pg_version >= 15 {
+                    conf.append("recovery_prefetch", "off");
+                }
            }
        }

--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -8,9 +8,7 @@ use std::process::{Child, Command};
 use std::{io, result};

 use anyhow::{bail, Context};
-use pageserver_api::models::{
-    TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
-};
+use pageserver_api::models::{self, TenantInfo, TimelineInfo};
 use postgres_backend::AuthType;
 use postgres_connection::{parse_host_port, PgConnectionConfig};
 use reqwest::blocking::{Client, RequestBuilder, Response};
@@ -316,8 +314,8 @@ impl PageServerNode {
        settings: HashMap<&str, &str>,
    ) -> anyhow::Result<TenantId> {
        let mut settings = settings.clone();
-        let request = TenantCreateRequest {
-            new_tenant_id,
+
+        let config = models::TenantConfig {
            checkpoint_distance: settings
                .remove("checkpoint_distance")
                .map(|x| x.parse::<u64>())
@@ -372,6 +370,10 @@ impl PageServerNode {
                .remove("evictions_low_residence_duration_metric_threshold")
                .map(|x| x.to_string()),
        };
+        let request = models::TenantCreateRequest {
+            new_tenant_id,
+            config,
+        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
        }
@@ -391,67 +393,81 @@ impl PageServerNode {
            })
    }

-    pub fn tenant_config(&self, tenant_id: TenantId, settings: HashMap<&str, &str>) -> Result<()> {
-        self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))?
-            .json(&TenantConfigRequest {
-                tenant_id,
+    pub fn tenant_config(
+        &self,
+        tenant_id: TenantId,
+        mut settings: HashMap<&str, &str>,
+    ) -> anyhow::Result<()> {
+        let config = {
+            // Braces to make the diff easier to read
+            models::TenantConfig {
                checkpoint_distance: settings
-                    .get("checkpoint_distance")
+                    .remove("checkpoint_distance")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'checkpoint_distance' as an integer")?,
-                checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
+                checkpoint_timeout: settings.remove("checkpoint_timeout").map(|x| x.to_string()),
                compaction_target_size: settings
-                    .get("compaction_target_size")
+                    .remove("compaction_target_size")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'compaction_target_size' as an integer")?,
-                compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
+                compaction_period: settings.remove("compaction_period").map(|x| x.to_string()),
                compaction_threshold: settings
-                    .get("compaction_threshold")
+                    .remove("compaction_threshold")
                    .map(|x| x.parse::<usize>())
                    .transpose()
                    .context("Failed to parse 'compaction_threshold' as an integer")?,
                gc_horizon: settings
-                    .get("gc_horizon")
+                    .remove("gc_horizon")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'gc_horizon' as an integer")?,
-                gc_period: settings.get("gc_period").map(|x| x.to_string()),
+                gc_period: settings.remove("gc_period").map(|x| x.to_string()),
                image_creation_threshold: settings
-                    .get("image_creation_threshold")
+                    .remove("image_creation_threshold")
                    .map(|x| x.parse::<usize>())
                    .transpose()
                    .context("Failed to parse 'image_creation_threshold' as non zero integer")?,
-                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
+                pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
                walreceiver_connect_timeout: settings
-                    .get("walreceiver_connect_timeout")
+                    .remove("walreceiver_connect_timeout")
+                    .map(|x| x.to_string()),
+                lagging_wal_timeout: settings
+                    .remove("lagging_wal_timeout")
                    .map(|x| x.to_string()),
-                lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
                max_lsn_wal_lag: settings
-                    .get("max_lsn_wal_lag")
+                    .remove("max_lsn_wal_lag")
                    .map(|x| x.parse::<NonZeroU64>())
                    .transpose()
                    .context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
                trace_read_requests: settings
-                    .get("trace_read_requests")
+                    .remove("trace_read_requests")
                    .map(|x| x.parse::<bool>())
                    .transpose()
                    .context("Failed to parse 'trace_read_requests' as bool")?,
                eviction_policy: settings
-                    .get("eviction_policy")
-                    .map(|x| serde_json::from_str(x))
+                    .remove("eviction_policy")
+                    .map(serde_json::from_str)
                    .transpose()
                    .context("Failed to parse 'eviction_policy' json")?,
                min_resident_size_override: settings
-                    .get("min_resident_size_override")
+                    .remove("min_resident_size_override")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'min_resident_size_override' as an integer")?,
                evictions_low_residence_duration_metric_threshold: settings
-                    .get("evictions_low_residence_duration_metric_threshold")
+                    .remove("evictions_low_residence_duration_metric_threshold")
                    .map(|x| x.to_string()),
-            })
+            }
+        };
+
+        if !settings.is_empty() {
+            bail!("Unrecognized tenant settings: {settings:?}")
+        }
+
+        self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))?
+            .json(&models::TenantConfigRequest { tenant_id, config })
            .send()?
            .error_from_body()?;

@@ -483,7 +499,7 @@ impl PageServerNode {
            Method::POST,
            format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
        )?
-        .json(&TimelineCreateRequest {
+        .json(&models::TimelineCreateRequest {
            new_timeline_id,
            ancestor_start_lsn,
            ancestor_timeline_id,
--- a/docs/pageserver-thread-mgmt.md
+++ b/docs/pageserver-thread-mgmt.md
@@ -4,6 +4,11 @@ The pageserver uses Tokio for handling concurrency. Everything runs in
 Tokio tasks, although some parts are written in blocking style and use
 spawn_blocking().

+We currently use std blocking functions for disk I/O, however.  The
+current model is that we consider disk I/Os to be short enough that we
+perform them while running in a Tokio task. Changing all the disk I/O
+calls to async is a TODO.
+
 Each Tokio task is tracked by the `task_mgr` module. It maintains a
 registry of tasks, and which tenant or timeline they are operating
 on.
@@ -21,19 +26,86 @@ also a `shudown_watcher()` Future that can be used with `tokio::select!`
 or similar, to wake up on shutdown.


-### Sync vs async
+### Async cancellation safety

-We use async to wait for incoming data on network connections, and to
-perform other long-running operations. For example, each WAL receiver
-connection is handled by a tokio Task. Once a piece of WAL has been
-received from the network, the task calls the blocking functions in
-the Repository to process the WAL.
+In async Rust, futures can be "cancelled" at any await point, by
+dropping the Future. For example, `tokio::select!` returns as soon as
+one of the Futures returns, and drops the others. `tokio::timeout!` is
+another example. In the Rust ecosystem, some functions are
+cancellation-safe, meaning they can be safely dropped without
+side-effects, while others are not. See documentation of
+`tokio::select!` for examples.

-The core storage code in `layered_repository/` is synchronous, with
-blocking locks and I/O calls. The current model is that we consider
-disk I/Os to be short enough that we perform them while running in a
-Tokio task. If that becomes a problem, we should use `spawn_blocking`
-before entering the synchronous parts of the code, or switch to using
-tokio I/O functions.
+In the pageserver and safekeeper, async code is *not*
+cancellation-safe by default. Unless otherwise marked, any async
+function that you call cannot be assumed to be async
+cancellation-safe, and must be polled to completion.

-Be very careful when mixing sync and async code!
+The downside of non-cancellation safe code is that you have to be very
+careful when using `tokio::select!`, `tokio::timeout!`, and other such
+functions that can cause a Future to be dropped. They can only be used
+with functions that are explicitly documented to be cancellation-safe,
+or you need to spawn a separate task to shield from the cancellation.
+
+At the entry points to the code, we also take care to poll futures to
+completion, or shield the rest of the code from surprise cancellations
+by spawning a separate task. The code that handles incoming HTTP
+requests, for example, spawns a separate task for each request,
+because Hyper will drop the request-handling Future if the HTTP
+connection is lost.  (FIXME: our HTTP handlers do not do that
+currently, but we should fix that. See [issue
+3478](https://github.com/neondatabase/neon/issues/3478)).
+
+
+#### How to cancel, then?
+
+If our code is not cancellation-safe, how do you cancel long-running
+tasks? Use CancellationTokens.
+
+TODO: More details on that. And we have an ongoing discussion on what
+to do if cancellations might come from multiple sources.
+
+#### Exceptions
+Some library functions are cancellation-safe, and are explicitly marked
+as such. For example, `utils::seqwait`.
+
+#### Rationale
+
+The alternative would be to make all async code cancellation-safe,
+unless otherwise marked. That way, you could use `tokio::select!` more
+liberally. The reasons we didn't choose that are explained in this
+section.
+
+Writing code in a cancellation-safe manner is tedious, as you need to
+scrutinize every `.await` and ensure that if the `.await` call never
+returns, the system is in a safe, consistent state. In some ways, you
+need to do that with `?` and early `returns`, too, but `.await`s are
+easier to miss. It is also easier to perform cleanup tasks when a
+function returns an `Err` than when an `.await` simply never
+returns. You can use `scopeguard` and Drop guards to perform cleanup
+tasks, but it is more tedious. An `.await` that never returns is more
+similar to a panic.
+
+Note that even if you only use building blocks that themselves are
+cancellation-safe, it doesn't mean that the code as whole is
+cancellation-safe. For example, consider the following code:
+
+```
+while let Some(i) = work_inbox.recv().await {
+	if let Err(_) = results_outbox.send(i).await {
+		println!("receiver dropped");
+		return;
+		}
+	}
+}
+```
+
+It reads messages from one channel, sends them to another channel. If
+this code is cancelled at the `results_outbox.send(i).await`, the
+message read from the receiver is lost. That may or may not be OK,
+depending on the context.
+
+Another reason to not require cancellation-safety is historical: we
+already had a lot of async code that was not scrutinized for
+cancellation-safety when this issue was raised. Scrutinizing all
+existing code is no fun.
--- a/docs/rfcs/023-the-state-of-pageserver-tenant-relocation.md
+++ b/docs/rfcs/023-the-state-of-pageserver-tenant-relocation.md
@@ -0,0 +1,232 @@
+# The state of pageserver tenant relocation
+
+Created on 17.03.23
+
+## Motivation
+
+There were previous write ups on the subject. The design of tenant relocation was planned at the time when we had quite different landscape. I e there was no on-demand download/eviction. They were on the horizon but we still planned for cases when they were not available. Some other things have changed. Now safekeepers offload wal to s3 so we're not risking overflowing their disks. Having all of the above, it makes sense to recap and take a look at the options we have now, which adjustments we'd like to make to original process, etc.
+
+Related (in chronological order):
+
+- Tracking issue with initial discussion: [#886](https://github.com/neondatabase/neon/issues/886)
+- [015. Storage Messaging](015-storage-messaging.md)
+- [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md)
+
+## Summary
+
+The RFC consists of a walkthrough of prior art on tenant relocation and corresponding problems. It describes 3 approaches.
+
+1. Simplistic approach that uses ignore and is the fastest to implement. The main downside is a requirement of short downtime.
+2. More complicated approach that avoids even short downtime.
+3. Even more complicated approach that will allow multiple pageservers to operate concurrently on the same tenant possibly allowing for HA cluster topologies and horizontal scaling of reads (i e compute talks to multiple pageservers).
+
+The order in which solutions are described is a bit different. We start from 2, then move to possible compromises (aka simplistic approach) and then move to discussing directions for solving HA/Pageserver replica case with 3.
+
+## Components
+
+pageserver, control-plane, safekeepers (a bit)
+
+## Requirements
+
+Relocation procedure should move tenant from one pageserver to another without downtime introduced by storage side. For now restarting compute for applying new configuration is fine.
+
+- component restarts
+- component outage
+- pageserver loss
+
+## The original proposed implementation
+
+The starting point is this sequence:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant CP as Control Plane
+    participant PS1 as Pageserver 1
+    participant PS2 as Pageserver 2
+    participant S3
+
+    CP->>PS2: Attach tenant X
+    PS2->>S3: Fetch timelines, indexes for them
+    PS2->>CP: Accepted
+    CP->>CP: Change pageserver id in project
+    CP->>PS1: Detach
+```
+
+Which problems do we have with naive approach?
+
+### Concurrent GC and Compaction
+
+The problem is that they can run on both, PS1 and PS2. Consider this example from [Pageserver S3 Coordination RFC](020-pageserver-s3-coordination.md)
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant PS1
+    participant S3
+    participant PS2
+
+    PS1->>S3: Uploads L1, L2 <br/> Index contains L1 L2
+    PS2->>S3: Attach called, sees L1, L2
+    PS1->>S3: Compaction comes <br/> Removes L1, adds L3
+    note over S3: Index now L2, L3
+    PS2->>S3: Uploads new layer L4 <br/> (added to previous view of the index)
+    note over S3: Index now L1, L2, L4
+```
+
+At this point it is not possible to restore the state from index, it contains L2 which
+is no longer available in s3 and doesnt contain L3 added by compaction by the
+first pageserver. So if any of the pageservers restart, initial sync will fail
+(or in on-demand world it will fail a bit later during page request from
+missing layer)
+
+The problem lies in shared index_part.json. Having intersecting layers from append only edits is expected to work, though this is an uncharted territory without tests.
+
+#### Options
+
+There are several options on how to restrict concurrent access to index file.
+
+First and the simplest one is external orchestration. Control plane which runs migration can use special api call on pageserver to stop background processes (gc, compaction), and even possibly all uploads.
+
+So the sequence becomes:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant CP as Control Plane
+    participant PS1 as Pageserver 1
+    participant PS2 as Pageserver 2
+    participant S3
+
+    CP->>PS1: Pause background jobs, pause uploading new layers.
+    CP->>PS2: Attach tenant X.
+    PS2->>S3: Fetch timelines, index, start background operations
+    PS2->>CP: Accepted
+    CP->>CP: Monitor PS2 last record lsn, ensure OK lag
+    CP->>CP: Change pageserver id in project
+    CP->>PS1: Detach
+```
+
+The downside of this sequence is the potential rollback process. What if something goes wrong on new pageserver? Can we safely roll back to source pageserver?
+
+There are two questions:
+
+#### How can we detect that something went wrong?
+
+We can run usual availability check (consists of compute startup and an update of one row).
+Note that we cant run separate compute for that before touching compute that client runs actual workload on, because we cant have two simultaneous computes running in read-write mode on the same timeline (enforced by safekeepers consensus algorithm). So we can either run some readonly check first (basebackup) and then change pageserver id and run availability check. If it failed we can roll it back to the old one.
+
+#### What can go wrong? And how we can safely roll-back?
+
+In the sequence above during attach we start background processes/uploads. They change state in remote storage so it is possible that after rollback remote state will be different from one that was observed by source pageserver. So if target pageserver goes wild then source pageserver may fail to start with changed remote state.
+
+Proposed option would be to implement a barrier (read-only) mode when pageserver does not update remote state.
+
+So the sequence for happy path becomes this one:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant CP as Control Plane
+    participant PS1 as Pageserver 1
+    participant PS2 as Pageserver 2
+    participant S3
+
+    CP->>PS1: Pause background jobs, pause uploading new layers.
+    CP->>PS2: Attach tenant X in remote readonly mode.
+    PS2->>S3: Fetch timelines, index
+    PS2->>CP: Accepted
+    CP->>CP: Monitor PS2 last record lsn, ensure OK lag
+    CP->>CP: Change pageserver id in project
+    CP->>CP: Run successful availability check
+    CP->>PS2: Start uploads, background tasks
+    CP->>PS1: Detach
+```
+
+With this sequence we restrict any changes to remote storage to one pageserver. So there is no concurrent access at all, not only for index_part.json, but for everything else too. This approach makes it possible to roll back after failure on new pageserver.
+
+The sequence with roll back process:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant CP as Control Plane
+    participant PS1 as Pageserver 1
+    participant PS2 as Pageserver 2
+    participant S3
+
+    CP->>PS1: Pause background jobs, pause uploading new layers.
+    CP->>PS2: Attach tenant X in remote readonly mode.
+    PS2->>S3: Fetch timelines, index
+    PS2->>CP: Accepted
+    CP->>CP: Monitor PS2 last record lsn, ensure OK lag
+    CP->>CP: Change pageserver id in project
+    CP->>CP: Availability check Failed
+    CP->>CP: Change pageserver id back
+    CP->>PS1: Resume remote operations
+    CP->>PS2: Ignore (instead of detach for investigation purposes)
+```
+
+## Concurrent branch creation
+
+Another problem is a possibility of concurrent branch creation calls.
+
+I e during migration create_branch can be called on old pageserver and newly created branch wont be seen on new pageserver. Prior art includes prototyping an approach of trying to mirror such branches, but currently it lost its importance, because now attach is fast because we dont need to download all data, and additionally to the best of my knowledge of control plane internals (cc @ololobus to confirm) operations on one project are executed sequentially, so it is not possible to have such case. So branch create operation will be executed only when relocation is completed. As a safety measure we can forbid branch creation for tenants that are in readonly remote state.
+
+## Simplistic approach
+
+The difference of simplistic approach from one described above is that it calls ignore on source tenant first and then calls attach on target pageserver. Approach above does it in opposite order thus opening a possibility for race conditions we strive to avoid.
+
+The approach largely follows this guide: <https://github.com/neondatabase/cloud/wiki/Cloud:-Ad-hoc-tenant-relocation>
+
+The happy path sequence:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant CP as Control Plane
+    participant PS1 as Pageserver 1
+    participant PS2 as Pageserver 2
+    participant SK as Safekeeper
+    participant S3
+
+    CP->>CP: Enable maintenance mode
+    CP->>PS1: Ignore
+    CP->>PS2: Attach
+    PS2->>CP: Accepted
+    loop Delete layers for each timeline
+        CP->>PS2: Get last record lsn
+        CP->>SK: Get commit lsn
+        CP->>CP: OK? Timed out?
+    end
+    CP->>CP: Change pageserver id in project
+    CP->>CP: Run successful availability check
+    CP->>CP: Disable maintenance mode
+    CP->>PS1: Detach ignored
+```
+
+The sequence contains exactly the same rollback problems as in previous approach described above. They can be resolved the same way.
+
+Most probably we'd like to move forward without this safety measure and implement it on top of this approach to make progress towards the downtime-less one.
+
+## Lease based approach
+
+In order to allow for concurrent operation on the same data on remote storage for multiple pageservers we need to go further than external orchestration.
+
+NOTE: [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md) discusses one more approach that relies on duplication of index_part.json for each pageserver operating on the timeline. This approach still requires external coordination which makes certain things easier but requires additional bookkeeping to account for multiple index_part.json files. Discussion/comparison with proposed lease based approach
+
+The problems are outlined in [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md) and suggested solution includes [Coordination based approach](020-pageserver-s3-coordination.md#coordination-based-approach). This way it will allow to do basic leader election for pageservers so they can decide which node will be responsible for running GC and compaction. The process is based on extensive communication via storage broker and consists of a lease that is taken by one of the pageservers that extends it to continue serving a leader role.
+
+There are two options for ingesting new data into pageserver in follower role. One option is to avoid WAL ingestion at all and rely on notifications from leader to discover new layers on s3. Main downside of this approach is that follower will always lag behind the primary node because it wont have the last layer until it is uploaded to remote storage. In case of a primary failure follower will be required to reingest last segment (up to 256Mb of WAL currently) which slows down recovery. Additionally if compute is connected to follower pageserver it will observe latest data with a delay. Queries from compute will likely experience bigger delays when recent lsn is required.
+
+The second option is to consume WAL stream on both pageservers. In this case the only problem is non deterministic layer generation. Additional bookkeeping will be required to deduplicate layers from primary with local ones. Some process needs to somehow merge them to remove duplicated data. Additionally we need to have good testing coverage to ensure that our implementation of `get_page@lsn` properly handles intersecting layers.
+
+There is another tradeoff. Approaches may be different in amount of traffic between system components. With first approach there can be increased traffic between follower and remote storage. But only in case follower has some activity that actually requests pages (!). With other approach traffic increase will be permanent and will be caused by two WAL streams instead of one.
+
+## Summary
+
+Proposed implementation strategy:
+
+Go with the simplest approach for now. Then work on tech debt, increase test coverage. Then gradually move forward to second approach by implementing safety measures first, finishing with switch of order between ignore and attach operation.
+
+And only then go to lease based approach to solve HA/Pageserver replica use cases.
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -131,11 +131,26 @@ pub struct TimelineCreateRequest {
 }

 #[serde_as]
-#[derive(Serialize, Deserialize, Default)]
+#[derive(Serialize, Deserialize, Debug, Default)]
+#[serde(deny_unknown_fields)]
 pub struct TenantCreateRequest {
    #[serde(default)]
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub new_tenant_id: Option<TenantId>,
+    #[serde(flatten)]
+    pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
+}
+
+impl std::ops::Deref for TenantCreateRequest {
+    type Target = TenantConfig;
+
+    fn deref(&self) -> &Self::Target {
+        &self.config
+    }
+}
+
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct TenantConfig {
    pub checkpoint_distance: Option<u64>,
    pub checkpoint_timeout: Option<String>,
    pub compaction_target_size: Option<u64>,
@@ -178,37 +193,26 @@ impl TenantCreateRequest {
 }

 #[serde_as]
-#[derive(Serialize, Deserialize)]
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(deny_unknown_fields)]
 pub struct TenantConfigRequest {
    #[serde_as(as = "DisplayFromStr")]
    pub tenant_id: TenantId,
-    #[serde(default)]
-    pub checkpoint_distance: Option<u64>,
-    pub checkpoint_timeout: Option<String>,
-    pub compaction_target_size: Option<u64>,
-    pub compaction_period: Option<String>,
-    pub compaction_threshold: Option<usize>,
-    pub gc_horizon: Option<u64>,
-    pub gc_period: Option<String>,
-    pub image_creation_threshold: Option<usize>,
-    pub pitr_interval: Option<String>,
-    pub walreceiver_connect_timeout: Option<String>,
-    pub lagging_wal_timeout: Option<String>,
-    pub max_lsn_wal_lag: Option<NonZeroU64>,
-    pub trace_read_requests: Option<bool>,
-    // We defer the parsing of the eviction_policy field to the request handler.
-    // Otherwise we'd have to move the types for eviction policy into this package.
-    // We might do that once the eviction feature has stabilizied.
-    // For now, this field is not even documented in the openapi_spec.yml.
-    pub eviction_policy: Option<serde_json::Value>,
-    pub min_resident_size_override: Option<u64>,
-    pub evictions_low_residence_duration_metric_threshold: Option<String>,
+    #[serde(flatten)]
+    pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
+}
+
+impl std::ops::Deref for TenantConfigRequest {
+    type Target = TenantConfig;
+
+    fn deref(&self) -> &Self::Target {
+        &self.config
+    }
 }

 impl TenantConfigRequest {
    pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
-        TenantConfigRequest {
-            tenant_id,
+        let config = TenantConfig {
            checkpoint_distance: None,
            checkpoint_timeout: None,
            compaction_target_size: None,
@@ -225,7 +229,30 @@ impl TenantConfigRequest {
            eviction_policy: None,
            min_resident_size_override: None,
            evictions_low_residence_duration_metric_threshold: None,
-        }
+        };
+        TenantConfigRequest { tenant_id, config }
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct TenantAttachRequest {
+    pub config: TenantAttachConfig,
+}
+
+/// Newtype to enforce deny_unknown_fields on TenantConfig for
+/// its usage inside `TenantAttachRequest`.
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct TenantAttachConfig {
+    #[serde(flatten)]
+    allowing_unknown_fields: TenantConfig,
+}
+
+impl std::ops::Deref for TenantAttachConfig {
+    type Target = TenantConfig;
+
+    fn deref(&self) -> &Self::Target {
+        &self.allowing_unknown_fields
    }
 }

@@ -765,4 +792,43 @@ mod tests {
        assert!(format!("{:?}", &original_broken.state).contains("reason"));
        assert!(format!("{:?}", &original_broken.state).contains("backtrace info"));
    }
+
+    #[test]
+    fn test_reject_unknown_field() {
+        let id = TenantId::generate();
+        let create_request = json!({
+            "new_tenant_id": id.to_string(),
+            "unknown_field": "unknown_value".to_string(),
+        });
+        let err = serde_json::from_value::<TenantCreateRequest>(create_request).unwrap_err();
+        assert!(
+            err.to_string().contains("unknown field `unknown_field`"),
+            "expect unknown field `unknown_field` error, got: {}",
+            err
+        );
+
+        let id = TenantId::generate();
+        let config_request = json!({
+            "tenant_id": id.to_string(),
+            "unknown_field": "unknown_value".to_string(),
+        });
+        let err = serde_json::from_value::<TenantConfigRequest>(config_request).unwrap_err();
+        assert!(
+            err.to_string().contains("unknown field `unknown_field`"),
+            "expect unknown field `unknown_field` error, got: {}",
+            err
+        );
+
+        let attach_request = json!({
+            "config": {
+                "unknown_field": "unknown_value".to_string(),
+            },
+        });
+        let err = serde_json::from_value::<TenantAttachRequest>(attach_request).unwrap_err();
+        assert!(
+            err.to_string().contains("unknown field `unknown_field`"),
+            "expect unknown field `unknown_field` error, got: {}",
+            err
+        );
+    }
 }
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -12,6 +12,7 @@ aws-smithy-http.workspace = true
 aws-types.workspace = true
 aws-config.workspace = true
 aws-sdk-s3.workspace = true
+aws-credential-types.workspace = true
 hyper = { workspace = true, features = ["stream"] }
 serde.workspace = true
 serde_json.workspace = true
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -9,14 +9,15 @@ use std::sync::Arc;
 use anyhow::Context;
 use aws_config::{
    environment::credentials::EnvironmentVariableCredentialsProvider,
-    imds::credentials::ImdsCredentialsProvider,
-    meta::credentials::{CredentialsProviderChain, LazyCachingCredentialsProvider},
+    imds::credentials::ImdsCredentialsProvider, meta::credentials::CredentialsProviderChain,
 };
+use aws_credential_types::cache::CredentialsCache;
 use aws_sdk_s3::{
-    config::Config,
-    error::{GetObjectError, GetObjectErrorKind},
-    types::{ByteStream, SdkError},
-    Client, Endpoint, Region,
+    config::{Config, Region},
+    error::SdkError,
+    operation::get_object::GetObjectError,
+    primitives::ByteStream,
+    Client,
 };
 use aws_smithy_http::body::SdkBody;
 use hyper::Body;
@@ -125,28 +126,23 @@ impl S3Bucket {

        let credentials_provider = {
            // uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
-            let env_creds = EnvironmentVariableCredentialsProvider::new();
+            CredentialsProviderChain::first_try(
+                "env",
+                EnvironmentVariableCredentialsProvider::new(),
+            )
            // uses imds v2
-            let imds = ImdsCredentialsProvider::builder().build();
-
-            // finally add caching.
-            // this might change in future, see https://github.com/awslabs/aws-sdk-rust/issues/629
-            LazyCachingCredentialsProvider::builder()
-                .load(CredentialsProviderChain::first_try("env", env_creds).or_else("imds", imds))
-                .build()
+            .or_else("imds", ImdsCredentialsProvider::builder().build())
        };

        let mut config_builder = Config::builder()
            .region(Region::new(aws_config.bucket_region.clone()))
+            .credentials_cache(CredentialsCache::lazy())
            .credentials_provider(credentials_provider);

        if let Some(custom_endpoint) = aws_config.endpoint.clone() {
-            let endpoint = Endpoint::immutable(
-                custom_endpoint
-                    .parse()
-                    .expect("Failed to parse S3 custom endpoint"),
-            );
-            config_builder.set_endpoint_resolver(Some(Arc::new(endpoint)));
+            config_builder = config_builder
+                .endpoint_url(custom_endpoint)
+                .force_path_style(true);
        }
        let client = Client::from_conf(config_builder.build());

@@ -229,14 +225,9 @@ impl S3Bucket {
                    ))),
                })
            }
-            Err(SdkError::ServiceError {
-                err:
-                    GetObjectError {
-                        kind: GetObjectErrorKind::NoSuchKey(..),
-                        ..
-                    },
-                ..
-            }) => Err(DownloadError::NotFound),
+            Err(SdkError::ServiceError(e)) if matches!(e.err(), GetObjectError::NoSuchKey(_)) => {
+                Err(DownloadError::NotFound)
+            }
            Err(e) => {
                metrics::inc_get_object_fail();
                Err(DownloadError::Other(anyhow::anyhow!(
--- a/libs/utils/src/http/json.rs
+++ b/libs/utils/src/http/json.rs
@@ -8,12 +8,26 @@ use super::error::ApiError;
 pub async fn json_request<T: for<'de> Deserialize<'de>>(
    request: &mut Request<Body>,
 ) -> Result<T, ApiError> {
-    let whole_body = hyper::body::aggregate(request.body_mut())
+    json_request_or_empty_body(request)
+        .await?
+        .context("missing request body")
+        .map_err(ApiError::BadRequest)
+}
+
+/// Will be removed as part of https://github.com/neondatabase/neon/issues/4282
+pub async fn json_request_or_empty_body<T: for<'de> Deserialize<'de>>(
+    request: &mut Request<Body>,
+) -> Result<Option<T>, ApiError> {
+    let body = hyper::body::aggregate(request.body_mut())
        .await
        .context("Failed to read request body")
        .map_err(ApiError::BadRequest)?;
-    serde_json::from_reader(whole_body.reader())
+    if body.remaining() == 0 {
+        return Ok(None);
+    }
+    serde_json::from_reader(body.reader())
        .context("Failed to parse json request")
+        .map(Some)
        .map_err(ApiError::BadRequest)
 }

--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -60,10 +60,6 @@ pub mod tracing_span_assert;

 pub mod rate_limit;

-/// Primitive for coalescing operations into a single task which will not be cancelled by for
-/// example external http client closing the connection.
-pub mod shared_retryable;
-
 mod failpoint_macro_helpers {

    /// use with fail::cfg("$name", "return(2000)")
@@ -100,7 +96,6 @@ mod failpoint_macro_helpers {
        tracing::info!("failpoint {:?}: sleep done", name);
    }
 }
-
 pub use failpoint_macro_helpers::failpoint_sleep_helper;

 /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
--- a/libs/utils/src/seqwait.rs
+++ b/libs/utils/src/seqwait.rs
@@ -144,6 +144,8 @@ where
    ///
    /// This call won't complete until someone has called `advance`
    /// with a number greater than or equal to the one we're waiting for.
+    ///
+    /// This function is async cancellation-safe.
    pub async fn wait_for(&self, num: V) -> Result<(), SeqWaitError> {
        match self.queue_for_wait(num) {
            Ok(None) => Ok(()),
@@ -159,6 +161,8 @@ where
    ///
    /// If that hasn't happened after the specified timeout duration,
    /// [`SeqWaitError::Timeout`] will be returned.
+    ///
+    /// This function is async cancellation-safe.
    pub async fn wait_for_timeout(
        &self,
        num: V,
--- a/libs/utils/src/shared_retryable.rs
+++ b/libs/utils/src/shared_retryable.rs
@@ -1,657 +0,0 @@
-use std::future::Future;
-use std::sync::Arc;
-
-/// Container using which many request handlers can come together and join a single task to
-/// completion instead of racing each other and their own cancellation.
-///
-/// In a picture:
-///
-/// ```text
-/// SharedRetryable::try_restart         Spawned task completes with only one concurrent attempt
-///                             \       /
-///      request handler 1 ---->|--X
-///      request handler 2 ---->|-------|
-///      request handler 3 ---->|-------|
-///                             |       |
-///                             v       |
-///       one spawned task      \------>/
-///
-/// (X = cancelled during await)
-/// ```
-///
-/// Implementation is cancel safe. Implementation and internal structure are hurt by the inability
-/// to just spawn the task, but this is needed for `pageserver` usage. Within `pageserver`, the
-/// `task_mgr` must be used to spawn the future because it will cause awaiting during shutdown.
-///
-/// Implementation exposes a fully decomposed [`SharedRetryable::try_restart`] which requires the
-/// caller to do the spawning before awaiting for the result. If the caller is dropped while this
-/// happens, a new attempt will be required, and all concurrent awaiters will see a
-/// [`RetriedTaskPanicked`] error.
-///
-/// There is another "family of APIs" [`SharedRetryable::attempt_spawn`] for infallible futures. It is
-/// just provided for completeness, and it does not have a fully decomposed version like
-/// `try_restart`.
-///
-/// For `try_restart_*` family of APIs, there is a concept of two leveled results. The inner level
-/// is returned by the executed future. It needs to be `Clone`. Most errors are not `Clone`, so
-/// implementation advice is to log the happened error, and not propagate more than a label as the
-/// "inner error" which will be used to build an outer error. The outer error will also have to be
-/// convertable from [`RetriedTaskPanicked`] to absorb that case as well.
-///
-/// ## Example
-///
-/// A shared service value completes the infallible work once, even if called concurrently by
-/// multiple cancellable tasks.
-///
-/// Example moved as a test `service_example`.
-#[derive(Clone)]
-pub struct SharedRetryable<V> {
-    inner: Arc<tokio::sync::Mutex<MaybeDone<V>>>,
-}
-
-impl<V> Default for SharedRetryable<V> {
-    fn default() -> Self {
-        Self {
-            inner: Arc::new(tokio::sync::Mutex::new(MaybeDone::default())),
-        }
-    }
-}
-
-/// Determine if an error is transient or permanent.
-pub trait Retryable {
-    fn is_permanent(&self) -> bool {
-        true
-    }
-}
-
-pub trait MakeFuture {
-    type Future: Future<Output = Self::Output> + Send + 'static;
-    type Output: Send + 'static;
-
-    fn make_future(self) -> Self::Future;
-}
-
-impl<Fun, Fut, R> MakeFuture for Fun
-where
-    Fun: FnOnce() -> Fut,
-    Fut: Future<Output = R> + Send + 'static,
-    R: Send + 'static,
-{
-    type Future = Fut;
-    type Output = R;
-
-    fn make_future(self) -> Self::Future {
-        self()
-    }
-}
-
-/// Retried task panicked, was cancelled, or never spawned (see [`SharedRetryable::try_restart`]).
-#[derive(Debug, PartialEq, Eq)]
-pub struct RetriedTaskPanicked;
-
-impl<T, E1> SharedRetryable<Result<T, E1>>
-where
-    T: Clone + std::fmt::Debug + Send + 'static,
-    E1: Retryable + Clone + std::fmt::Debug + Send + 'static,
-{
-    /// Restart a previously failed operation unless it already completed with a terminal result.
-    ///
-    /// Many futures can call this function and and get the terminal result from an earlier attempt
-    /// or start a new attempt, or join an existing one.
-    ///
-    /// Compared to `Self::try_restart`, this method also spawns the future to run, which would
-    /// otherwise have to be done manually.
-    #[cfg(test)]
-    pub async fn try_restart_spawn<E2>(
-        &self,
-        retry_with: impl MakeFuture<Output = Result<T, E1>>,
-    ) -> Result<T, E2>
-    where
-        E2: From<E1> + From<RetriedTaskPanicked> + Send + 'static,
-    {
-        let (recv, maybe_fut) = self.try_restart(retry_with).await;
-
-        if let Some(fut) = maybe_fut {
-            // top level function, we must spawn, pageserver cannot use this
-            tokio::spawn(fut);
-        }
-
-        recv.await
-    }
-
-    /// Restart a previously failed operation unless it already completed with a terminal result.
-    ///
-    /// Many futures can call this function and get the terminal result from an earlier attempt or
-    /// start a new attempt, or join an existing one.
-    ///
-    /// If a task calling this method is cancelled before spawning the returned future, this
-    /// attempt is immediatedly deemed as having panicked will happen, but without a panic ever
-    /// happening.
-    ///
-    /// Returns one future for waiting for the result and possibly another which needs to be
-    /// spawned when `Some`. Spawning has to happen before waiting is started, otherwise the first
-    /// future will never make progress.
-    ///
-    /// This complication exists because on `pageserver` we cannot use `tokio::spawn` directly
-    /// at this time.
-    pub async fn try_restart<E2>(
-        &self,
-        retry_with: impl MakeFuture<Output = Result<T, E1>>,
-    ) -> (
-        impl Future<Output = Result<T, E2>> + Send + 'static,
-        Option<impl Future<Output = ()> + Send + 'static>,
-    )
-    where
-        E2: From<E1> + From<RetriedTaskPanicked> + Send + 'static,
-    {
-        use futures::future::Either;
-
-        match self.decide_to_retry_or_join(retry_with).await {
-            Ok(terminal) => (Either::Left(async move { terminal }), None),
-            Err((rx, maybe_fut)) => {
-                let recv = Self::make_oneshot_alike_receiver(rx);
-
-                (Either::Right(recv), maybe_fut)
-            }
-        }
-    }
-
-    /// Returns a Ok if the previous attempt had resulted in a terminal result. Err is returned
-    /// when an attempt can be joined and possibly needs to be spawned.
-    async fn decide_to_retry_or_join<E2>(
-        &self,
-        retry_with: impl MakeFuture<Output = Result<T, E1>>,
-    ) -> Result<
-        Result<T, E2>,
-        (
-            tokio::sync::broadcast::Receiver<Result<T, E1>>,
-            Option<impl Future<Output = ()> + Send + 'static>,
-        ),
-    >
-    where
-        E2: From<E1> + From<RetriedTaskPanicked>,
-    {
-        let mut g = self.inner.lock().await;
-
-        let maybe_rx = match &*g {
-            MaybeDone::Done(Ok(t)) => return Ok(Ok(t.to_owned())),
-            MaybeDone::Done(Err(e)) if e.is_permanent() => return Ok(Err(E2::from(e.to_owned()))),
-            MaybeDone::Pending(weak) => {
-                // failure to upgrade can mean only one thing: there was an unexpected
-                // panic which we consider as a transient retryable error.
-                weak.upgrade()
-            }
-            MaybeDone::Done(Err(_retryable)) => None,
-            MaybeDone::NotStarted => None,
-        };
-
-        let (strong, maybe_fut) = match maybe_rx {
-            Some(strong) => (strong, None),
-            None => {
-                // new attempt
-                // panic safety: invoke the factory before configuring the pending value
-                let fut = retry_with.make_future();
-
-                let (strong, fut) = self.make_run_and_complete(fut, &mut g);
-                (strong, Some(fut))
-            }
-        };
-
-        // important: the Arc<Receiver> is not held after unlocking
-        // important: we resubscribe before lock is released to be sure to get a message which
-        // is sent once receiver is dropped
-        let rx = strong.resubscribe();
-        drop(strong);
-        Err((rx, maybe_fut))
-    }
-
-    /// Configure a new attempt, but leave spawning it to the caller.
-    ///
-    /// Returns an `Arc<Receiver<V>>` which is valid until the attempt completes, and the future
-    /// which will need to run to completion outside the lifecycle of the caller.
-    fn make_run_and_complete(
-        &self,
-        fut: impl Future<Output = Result<T, E1>> + Send + 'static,
-        g: &mut tokio::sync::MutexGuard<'_, MaybeDone<Result<T, E1>>>,
-    ) -> (
-        Arc<tokio::sync::broadcast::Receiver<Result<T, E1>>>,
-        impl Future<Output = ()> + Send + 'static,
-    ) {
-        #[cfg(debug_assertions)]
-        match &**g {
-            MaybeDone::Pending(weak) => {
-                assert!(
-                    weak.upgrade().is_none(),
-                    "when starting a restart, should no longer have an upgradeable channel"
-                );
-            }
-            MaybeDone::Done(Err(err)) => {
-                assert!(
-                    !err.is_permanent(),
-                    "when restarting, the err must be transient"
-                );
-            }
-            MaybeDone::Done(Ok(_)) => {
-                panic!("unexpected restart after a completion on MaybeDone");
-            }
-            MaybeDone::NotStarted => {}
-        }
-
-        self.make_run_and_complete_any(fut, g)
-    }
-
-    /// Oneshot alike as in it's a future which will be consumed by an `await`.
-    ///
-    /// Otherwise the caller might think it's beneficial or reasonable to poll the channel multiple
-    /// times.
-    async fn make_oneshot_alike_receiver<E2>(
-        mut rx: tokio::sync::broadcast::Receiver<Result<T, E1>>,
-    ) -> Result<T, E2>
-    where
-        E2: From<E1> + From<RetriedTaskPanicked>,
-    {
-        use tokio::sync::broadcast::error::RecvError;
-
-        match rx.recv().await {
-            Ok(Ok(t)) => Ok(t),
-            Ok(Err(e)) => Err(E2::from(e)),
-            Err(RecvError::Closed | RecvError::Lagged(_)) => {
-                // lagged doesn't mean anything with 1 send, but whatever, handle it the same
-                // this case should only ever happen if a panick happened in the `fut`.
-                Err(E2::from(RetriedTaskPanicked))
-            }
-        }
-    }
-}
-
-impl<V> SharedRetryable<V>
-where
-    V: std::fmt::Debug + Clone + Send + 'static,
-{
-    /// Attempt to run once a spawned future to completion.
-    ///
-    /// Any previous attempt which panicked will be retried, but the `RetriedTaskPanicked` will be
-    /// returned when the most recent attempt panicked.
-    #[cfg(test)]
-    pub async fn attempt_spawn(
-        &self,
-        attempt_with: impl MakeFuture<Output = V>,
-    ) -> Result<V, RetriedTaskPanicked> {
-        let (rx, maybe_fut) = {
-            let mut g = self.inner.lock().await;
-
-            let maybe_rx = match &*g {
-                MaybeDone::Done(v) => return Ok(v.to_owned()),
-                MaybeDone::Pending(weak) => {
-                    // see comment in try_restart
-                    weak.upgrade()
-                }
-                MaybeDone::NotStarted => None,
-            };
-
-            let (strong, maybe_fut) = match maybe_rx {
-                Some(strong) => (strong, None),
-                None => {
-                    let fut = attempt_with.make_future();
-
-                    let (strong, fut) = self.make_run_and_complete_any(fut, &mut g);
-                    (strong, Some(fut))
-                }
-            };
-
-            // see decide_to_retry_or_join for important notes
-            let rx = strong.resubscribe();
-            drop(strong);
-            (rx, maybe_fut)
-        };
-
-        if let Some(fut) = maybe_fut {
-            // this is a top level function, need to spawn directly
-            // from pageserver one wouldn't use this but more piecewise functions
-            tokio::spawn(fut);
-        }
-
-        let recv = Self::make_oneshot_alike_receiver_any(rx);
-
-        recv.await
-    }
-
-    /// Configure a new attempt, but leave spawning it to the caller.
-    ///
-    /// Forgetting the returned future is outside of scope of any correctness guarantees; all of
-    /// the waiters will then be deadlocked, and the MaybeDone will forever be pending. Dropping
-    /// and not running the future will then require a new attempt.
-    ///
-    /// Also returns an `Arc<Receiver<V>>` which is valid until the attempt completes.
-    fn make_run_and_complete_any(
-        &self,
-        fut: impl Future<Output = V> + Send + 'static,
-        g: &mut tokio::sync::MutexGuard<'_, MaybeDone<V>>,
-    ) -> (
-        Arc<tokio::sync::broadcast::Receiver<V>>,
-        impl Future<Output = ()> + Send + 'static,
-    ) {
-        let (tx, rx) = tokio::sync::broadcast::channel(1);
-        let strong = Arc::new(rx);
-
-        **g = MaybeDone::Pending(Arc::downgrade(&strong));
-
-        let retry = {
-            let strong = strong.clone();
-            self.clone().run_and_complete(fut, tx, strong)
-        };
-
-        #[cfg(debug_assertions)]
-        match &**g {
-            MaybeDone::Pending(weak) => {
-                let rx = weak.upgrade().expect("holding the weak and strong locally");
-                assert!(Arc::ptr_eq(&strong, &rx));
-            }
-            _ => unreachable!("MaybeDone::pending must be set after spawn_and_run_complete_any"),
-        }
-
-        (strong, retry)
-    }
-
-    /// Run the actual attempt, and communicate the response via both:
-    /// - setting the `MaybeDone::Done`
-    /// - the broadcast channel
-    async fn run_and_complete(
-        self,
-        fut: impl Future<Output = V>,
-        tx: tokio::sync::broadcast::Sender<V>,
-        strong: Arc<tokio::sync::broadcast::Receiver<V>>,
-    ) {
-        let res = fut.await;
-
-        {
-            let mut g = self.inner.lock().await;
-            g.complete(&strong, res.clone());
-
-            // make the weak un-upgradeable by dropping the final alive
-            // reference to it. it is final Arc because the Arc never escapes
-            // the critical section in `decide_to_retry_or_join` or `attempt_spawn`.
-            Arc::try_unwrap(strong).expect("expected this to be the only Arc<Receiver<V>>");
-        }
-
-        // now no one can get the Pending(weak) value to upgrade and they only see
-        // the Done(res).
-        //
-        // send the result value to listeners, if any
-        drop(tx.send(res));
-    }
-
-    #[cfg(test)]
-    async fn make_oneshot_alike_receiver_any(
-        mut rx: tokio::sync::broadcast::Receiver<V>,
-    ) -> Result<V, RetriedTaskPanicked> {
-        use tokio::sync::broadcast::error::RecvError;
-
-        match rx.recv().await {
-            Ok(t) => Ok(t),
-            Err(RecvError::Closed | RecvError::Lagged(_)) => {
-                // lagged doesn't mean anything with 1 send, but whatever, handle it the same
-                // this case should only ever happen if a panick happened in the `fut`.
-                Err(RetriedTaskPanicked)
-            }
-        }
-    }
-}
-
-/// MaybeDone handles synchronization for multiple requests and the single actual task.
-///
-/// If request handlers witness `Pending` which they are able to upgrade, they are guaranteed a
-/// useful `recv().await`, where useful means "value" or "disconnect" arrives. If upgrade fails,
-/// this means that "disconnect" has happened in the past.
-///
-/// On successful execution the one executing task will set this to `Done` variant, with the actual
-/// resulting value.
-#[derive(Debug, Default)]
-pub enum MaybeDone<V> {
-    Pending(std::sync::Weak<tokio::sync::broadcast::Receiver<V>>),
-    Done(V),
-    #[default]
-    NotStarted,
-}
-
-impl<V: std::fmt::Debug> MaybeDone<V> {
-    fn complete(&mut self, _strong: &Arc<tokio::sync::broadcast::Receiver<V>>, outcome: V) {
-        #[cfg(debug_assertions)]
-        match self {
-            MaybeDone::Pending(weak) => {
-                let same = weak
-                    .upgrade()
-                    // we don't yet have Receiver::same_channel
-                    .map(|rx| Arc::ptr_eq(_strong, &rx))
-                    .unwrap_or(false);
-                assert!(same, "different channel had been replaced or dropped");
-            }
-            other => panic!("unexpected MaybeDone: {other:?}"),
-        }
-
-        *self = MaybeDone::Done(outcome);
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::{RetriedTaskPanicked, Retryable, SharedRetryable};
-    use std::sync::Arc;
-
-    #[derive(Debug)]
-    enum OuterError {
-        AttemptPanicked,
-        Unlucky,
-    }
-
-    #[derive(Clone, Debug)]
-    enum InnerError {
-        Unlucky,
-    }
-
-    impl Retryable for InnerError {
-        fn is_permanent(&self) -> bool {
-            false
-        }
-    }
-
-    impl From<InnerError> for OuterError {
-        fn from(_: InnerError) -> Self {
-            OuterError::Unlucky
-        }
-    }
-
-    impl From<RetriedTaskPanicked> for OuterError {
-        fn from(_: RetriedTaskPanicked) -> Self {
-            OuterError::AttemptPanicked
-        }
-    }
-
-    #[tokio::test]
-    async fn restartable_until_permanent() {
-        let shr = SharedRetryable::<Result<u8, InnerError>>::default();
-
-        let res = shr
-            .try_restart_spawn(|| async move { panic!("really unlucky") })
-            .await;
-
-        assert!(matches!(res, Err(OuterError::AttemptPanicked)));
-
-        let res = shr
-            .try_restart_spawn(|| async move { Err(InnerError::Unlucky) })
-            .await;
-
-        assert!(matches!(res, Err(OuterError::Unlucky)));
-
-        let res = shr.try_restart_spawn(|| async move { Ok(42) }).await;
-
-        assert!(matches!(res, Ok::<u8, OuterError>(42)));
-
-        let res = shr
-            .try_restart_spawn(|| async move { panic!("rerun should clone Ok(42)") })
-            .await;
-
-        assert!(matches!(res, Ok::<u8, OuterError>(42)));
-    }
-
-    /// Demonstration of the SharedRetryable::attempt
-    #[tokio::test]
-    async fn attemptable_until_no_panic() {
-        let shr = SharedRetryable::<u8>::default();
-
-        let res = shr
-            .attempt_spawn(|| async move { panic!("should not interfere") })
-            .await;
-
-        assert!(matches!(res, Err(RetriedTaskPanicked)), "{res:?}");
-
-        let res = shr.attempt_spawn(|| async move { 42 }).await;
-
-        assert_eq!(res, Ok(42));
-
-        let res = shr
-            .attempt_spawn(|| async move { panic!("should not be called") })
-            .await;
-
-        assert_eq!(res, Ok(42));
-    }
-
-    #[tokio::test]
-    async fn cancelling_spawner_is_fine() {
-        let shr = SharedRetryable::<Result<u8, InnerError>>::default();
-
-        let (recv1, maybe_fut) = shr
-            .try_restart(|| async move { panic!("should not have been called") })
-            .await;
-        let should_be_spawned = maybe_fut.unwrap();
-
-        let (recv2, maybe_fut) = shr
-            .try_restart(|| async move {
-                panic!("should never be called because waiting on should_be_spawned")
-            })
-            .await;
-        assert!(
-            matches!(maybe_fut, None),
-            "only the first one should had created the future"
-        );
-
-        let mut recv1 = std::pin::pin!(recv1);
-        let mut recv2 = std::pin::pin!(recv2);
-
-        tokio::select! {
-            _ = tokio::time::sleep(std::time::Duration::from_millis(100)) => {},
-            _ = &mut recv1 => unreachable!("should not have completed because should_be_spawned not spawned"),
-            _ = &mut recv2 => unreachable!("should not have completed because should_be_spawned not spawned"),
-        }
-
-        drop(should_be_spawned);
-
-        let res = recv1.await;
-        assert!(matches!(res, Err(OuterError::AttemptPanicked)), "{res:?}");
-
-        let res = recv2.await;
-        assert!(matches!(res, Err(OuterError::AttemptPanicked)), "{res:?}");
-
-        // but we can still reach a terminal state if the api is not misused or the
-        // should_be_spawned winner is not cancelled
-
-        let recv1 = shr.try_restart_spawn::<OuterError>(|| async move { Ok(42) });
-        let recv2 = shr.try_restart_spawn::<OuterError>(|| async move { Ok(43) });
-
-        assert_eq!(recv1.await.unwrap(), 42);
-        assert_eq!(recv2.await.unwrap(), 42, "43 should never be returned");
-    }
-
-    #[tokio::test]
-    async fn service_example() {
-        #[derive(Debug, Clone, Copy)]
-        enum OneLevelError {
-            TaskPanicked,
-        }
-
-        impl Retryable for OneLevelError {
-            fn is_permanent(&self) -> bool {
-                // for a single level errors, this wording is weird
-                !matches!(self, OneLevelError::TaskPanicked)
-            }
-        }
-
-        impl From<RetriedTaskPanicked> for OneLevelError {
-            fn from(_: RetriedTaskPanicked) -> Self {
-                OneLevelError::TaskPanicked
-            }
-        }
-
-        #[derive(Clone, Default)]
-        struct Service(SharedRetryable<Result<u8, OneLevelError>>);
-
-        impl Service {
-            async fn work(
-                &self,
-                completions: Arc<std::sync::atomic::AtomicUsize>,
-            ) -> Result<u8, OneLevelError> {
-                self.0
-                    .try_restart_spawn(|| async move {
-                        // give time to cancel some of the tasks
-                        tokio::time::sleep(std::time::Duration::from_secs(1)).await;
-                        completions.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
-                        Self::work_once().await
-                    })
-                    .await
-            }
-
-            async fn work_once() -> Result<u8, OneLevelError> {
-                Ok(42)
-            }
-        }
-
-        let svc = Service::default();
-
-        let mut js = tokio::task::JoinSet::new();
-
-        let barrier = Arc::new(tokio::sync::Barrier::new(10 + 1));
-        let completions = Arc::new(std::sync::atomic::AtomicUsize::new(0));
-
-        let handles = (0..10)
-            .map(|_| {
-                js.spawn({
-                    let svc = svc.clone();
-                    let barrier = barrier.clone();
-                    let completions = completions.clone();
-                    async move {
-                        // make sure all tasks are ready to start at the same time
-                        barrier.wait().await;
-                        // after successfully starting the work, any of the futures could get cancelled
-                        svc.work(completions).await
-                    }
-                })
-            })
-            .collect::<Vec<_>>();
-
-        barrier.wait().await;
-
-        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
-
-        handles[5].abort();
-
-        let mut cancellations = 0;
-
-        while let Some(res) = js.join_next().await {
-            // all complete with the same result
-            match res {
-                Ok(res) => assert_eq!(res.unwrap(), 42),
-                Err(je) => {
-                    // except for the one task we cancelled; it's cancelling
-                    // does not interfere with the result
-                    assert!(je.is_cancelled());
-                    cancellations += 1;
-                    assert_eq!(cancellations, 1, "only 6th task was aborted");
-                    // however we cannot assert that everytime we get to cancel the 6th task
-                }
-            }
-        }
-
-        // there will be at most one terminal completion
-        assert_eq!(completions.load(std::sync::atomic::Ordering::Relaxed), 1);
-    }
-}
--- a/pageserver/ctl/Cargo.toml
+++ b/pageserver/ctl/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "pagectl"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+anyhow.workspace = true
+bytes.workspace = true
+clap = { workspace = true, features = ["string"] }
+git-version.workspace = true
+pageserver = { path = ".." }
+postgres_ffi.workspace = true
+utils.workspace = true
+svg_fmt.workspace = true
+workspace_hack.workspace = true
--- a/pageserver/ctl/src/draw_timeline_dir.rs
+++ b/pageserver/ctl/src/draw_timeline_dir.rs
@@ -12,7 +12,7 @@
 //! Example use:
 //! ```
 //! $ ls test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE | \
-//! $   grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
+//! $   grep "__" | cargo run --release --bin pagectl draw-timeline-dir > out.svg
 //! $ firefox out.svg
 //! ```
 //!
@@ -62,7 +62,7 @@ fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
    (keys, lsns)
 }

-fn main() -> Result<()> {
+pub fn main() -> Result<()> {
    // Parse layer filenames from stdin
    let mut ranges: Vec<(Range<Key>, Range<Lsn>)> = vec![];
    let stdin = io::stdin();
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -6,7 +6,7 @@ use anyhow::Result;
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;
 use std::ops::Range;
-use std::{env, fs, path::Path, path::PathBuf, str, str::FromStr};
+use std::{fs, path::Path, str};

 use pageserver::page_cache::PAGE_SZ;
 use pageserver::repository::{Key, KEY_SIZE};
@@ -18,12 +18,14 @@ use pageserver::virtual_file::VirtualFile;

 use utils::{bin_ser::BeSer, lsn::Lsn};

+use crate::AnalyzeLayerMapCmd;
+
 const MIN_HOLE_LENGTH: i128 = (128 * 1024 * 1024 / PAGE_SZ) as i128;
 const DEFAULT_MAX_HOLES: usize = 10;

 /// Wrapper for key range to provide reverse ordering by range length for BinaryHeap
 #[derive(PartialEq, Eq)]
-struct Hole(Range<Key>);
+pub struct Hole(Range<Key>);

 impl Ord for Hole {
    fn cmp(&self, other: &Self) -> Ordering {
@@ -39,11 +41,11 @@ impl PartialOrd for Hole {
    }
 }

-struct LayerFile {
-    key_range: Range<Key>,
-    lsn_range: Range<Lsn>,
-    is_delta: bool,
-    holes: Vec<Hole>,
+pub(crate) struct LayerFile {
+    pub key_range: Range<Key>,
+    pub lsn_range: Range<Lsn>,
+    pub is_delta: bool,
+    pub holes: Vec<Hole>,
 }

 impl LayerFile {
@@ -67,7 +69,7 @@ impl LayerFile {
    }
 }

-fn parse_filename(name: &str) -> Option<LayerFile> {
+pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {
    let split: Vec<&str> = name.split("__").collect();
    if split.len() != 2 {
        return None;
@@ -127,18 +129,9 @@ fn get_holes(path: &Path, max_holes: usize) -> Result<Vec<Hole>> {
    Ok(holes)
 }

-fn main() -> Result<()> {
-    let args: Vec<String> = env::args().collect();
-    if args.len() < 2 {
-        println!("Usage: layer_map_analyzer PAGESERVER_DATA_DIR [MAX_HOLES]");
-        return Ok(());
-    }
-    let storage_path = PathBuf::from_str(&args[1])?;
-    let max_holes = if args.len() > 2 {
-        args[2].parse::<usize>().unwrap()
-    } else {
-        DEFAULT_MAX_HOLES
-    };
+pub(crate) fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
+    let storage_path = &cmd.path;
+    let max_holes = cmd.max_holes.unwrap_or(DEFAULT_MAX_HOLES);

    // Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.
    pageserver::virtual_file::init(10);
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -0,0 +1,169 @@
+use std::path::{Path, PathBuf};
+
+use anyhow::Result;
+use clap::Subcommand;
+use pageserver::tenant::block_io::BlockCursor;
+use pageserver::tenant::disk_btree::DiskBtreeReader;
+use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary};
+use pageserver::{page_cache, virtual_file};
+use pageserver::{
+    repository::{Key, KEY_SIZE},
+    tenant::{
+        block_io::FileBlockReader, disk_btree::VisitDirection,
+        storage_layer::delta_layer::DELTA_KEY_SIZE,
+    },
+    virtual_file::VirtualFile,
+};
+use std::fs;
+use utils::bin_ser::BeSer;
+
+use crate::layer_map_analyzer::parse_filename;
+
+#[derive(Subcommand)]
+pub(crate) enum LayerCmd {
+    /// List all tenants and timelines under the pageserver path
+    ///
+    /// Example: `cargo run --bin pagectl layer list .neon/`
+    List { path: PathBuf },
+    /// List all layers of a given tenant and timeline
+    ///
+    /// Example: `cargo run --bin pagectl layer list .neon/`
+    ListLayer {
+        path: PathBuf,
+        tenant: String,
+        timeline: String,
+    },
+    /// Dump all information of a layer file
+    DumpLayer {
+        path: PathBuf,
+        tenant: String,
+        timeline: String,
+        /// The id from list-layer command
+        id: usize,
+    },
+}
+
+fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
+    use pageserver::tenant::blob_io::BlobCursor;
+    use pageserver::tenant::block_io::BlockReader;
+
+    let path = path.as_ref();
+    virtual_file::init(10);
+    page_cache::init(100);
+    let file = FileBlockReader::new(VirtualFile::open(path)?);
+    let summary_blk = file.read_blk(0)?;
+    let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
+    let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
+        actual_summary.index_start_blk,
+        actual_summary.index_root_blk,
+        &file,
+    );
+    // TODO(chi): dedup w/ `delta_layer.rs` by exposing the API.
+    let mut all = vec![];
+    tree_reader.visit(
+        &[0u8; DELTA_KEY_SIZE],
+        VisitDirection::Forwards,
+        |key, value_offset| {
+            let curr = Key::from_slice(&key[..KEY_SIZE]);
+            all.push((curr, BlobRef(value_offset)));
+            true
+        },
+    )?;
+    let mut cursor = BlockCursor::new(&file);
+    for (k, v) in all {
+        let value = cursor.read_blob(v.pos())?;
+        println!("key:{} value_len:{}", k, value.len());
+    }
+    // TODO(chi): special handling for last key?
+    Ok(())
+}
+
+pub(crate) fn main(cmd: &LayerCmd) -> Result<()> {
+    match cmd {
+        LayerCmd::List { path } => {
+            for tenant in fs::read_dir(path.join("tenants"))? {
+                let tenant = tenant?;
+                if !tenant.file_type()?.is_dir() {
+                    continue;
+                }
+                println!("tenant {}", tenant.file_name().to_string_lossy());
+                for timeline in fs::read_dir(tenant.path().join("timelines"))? {
+                    let timeline = timeline?;
+                    if !timeline.file_type()?.is_dir() {
+                        continue;
+                    }
+                    println!("- timeline {}", timeline.file_name().to_string_lossy());
+                }
+            }
+        }
+        LayerCmd::ListLayer {
+            path,
+            tenant,
+            timeline,
+        } => {
+            let timeline_path = path
+                .join("tenants")
+                .join(tenant)
+                .join("timelines")
+                .join(timeline);
+            let mut idx = 0;
+            for layer in fs::read_dir(timeline_path)? {
+                let layer = layer?;
+                if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
+                {
+                    println!(
+                        "[{:3}]  key:{}-{}\n       lsn:{}-{}\n       delta:{}",
+                        idx,
+                        layer_file.key_range.start,
+                        layer_file.key_range.end,
+                        layer_file.lsn_range.start,
+                        layer_file.lsn_range.end,
+                        layer_file.is_delta,
+                    );
+                    idx += 1;
+                }
+            }
+        }
+        LayerCmd::DumpLayer {
+            path,
+            tenant,
+            timeline,
+            id,
+        } => {
+            let timeline_path = path
+                .join("tenants")
+                .join(tenant)
+                .join("timelines")
+                .join(timeline);
+            let mut idx = 0;
+            for layer in fs::read_dir(timeline_path)? {
+                let layer = layer?;
+                if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
+                {
+                    if *id == idx {
+                        // TODO(chi): dedup code
+                        println!(
+                            "[{:3}]  key:{}-{}\n       lsn:{}-{}\n       delta:{}",
+                            idx,
+                            layer_file.key_range.start,
+                            layer_file.key_range.end,
+                            layer_file.lsn_range.start,
+                            layer_file.lsn_range.end,
+                            layer_file.is_delta,
+                        );
+
+                        if layer_file.is_delta {
+                            read_delta_file(layer.path())?;
+                        } else {
+                            anyhow::bail!("not supported yet :(");
+                        }
+
+                        break;
+                    }
+                    idx += 1;
+                }
+            }
+        }
+    }
+    Ok(())
+}
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -0,0 +1,179 @@
+//! A helper tool to manage pageserver binary files.
+//! Accepts a file as an argument, attempts to parse it with all ways possible
+//! and prints its interpreted context.
+//!
+//! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.
+
+mod draw_timeline_dir;
+mod layer_map_analyzer;
+mod layers;
+
+use clap::{Parser, Subcommand};
+use layers::LayerCmd;
+use pageserver::{
+    context::{DownloadBehavior, RequestContext},
+    page_cache,
+    task_mgr::TaskKind,
+    tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
+    virtual_file,
+};
+use postgres_ffi::ControlFileData;
+use std::path::{Path, PathBuf};
+use utils::{lsn::Lsn, project_git_version};
+
+project_git_version!(GIT_VERSION);
+
+#[derive(Parser)]
+#[command(
+    version = GIT_VERSION,
+    about = "Neon Pageserver binutils",
+    long_about = "Reads pageserver (and related) binary files management utility"
+)]
+#[command(propagate_version = true)]
+struct CliOpts {
+    #[command(subcommand)]
+    command: Commands,
+}
+
+#[derive(Subcommand)]
+enum Commands {
+    Metadata(MetadataCmd),
+    PrintLayerFile(PrintLayerFileCmd),
+    DrawTimeline {},
+    AnalyzeLayerMap(AnalyzeLayerMapCmd),
+    #[command(subcommand)]
+    Layer(LayerCmd),
+}
+
+/// Read and update pageserver metadata file
+#[derive(Parser)]
+struct MetadataCmd {
+    /// Input metadata file path
+    metadata_path: PathBuf,
+    /// Replace disk consistent Lsn
+    disk_consistent_lsn: Option<Lsn>,
+    /// Replace previous record Lsn
+    prev_record_lsn: Option<Lsn>,
+    /// Replace latest gc cuttoff
+    latest_gc_cuttoff: Option<Lsn>,
+}
+
+#[derive(Parser)]
+struct PrintLayerFileCmd {
+    /// Pageserver data path
+    path: PathBuf,
+}
+
+#[derive(Parser)]
+struct AnalyzeLayerMapCmd {
+    /// Pageserver data path
+    path: PathBuf,
+    /// Max holes
+    max_holes: Option<usize>,
+}
+
+fn main() -> anyhow::Result<()> {
+    let cli = CliOpts::parse();
+
+    match cli.command {
+        Commands::Layer(cmd) => {
+            layers::main(&cmd)?;
+        }
+        Commands::Metadata(cmd) => {
+            handle_metadata(&cmd)?;
+        }
+        Commands::DrawTimeline {} => {
+            draw_timeline_dir::main()?;
+        }
+        Commands::AnalyzeLayerMap(cmd) => {
+            layer_map_analyzer::main(&cmd)?;
+        }
+        Commands::PrintLayerFile(cmd) => {
+            if let Err(e) = read_pg_control_file(&cmd.path) {
+                println!(
+                    "Failed to read input file as a pg control one: {e:#}\n\
+                    Attempting to read it as layer file"
+                );
+                print_layerfile(&cmd.path)?;
+            }
+        }
+    };
+    Ok(())
+}
+
+fn read_pg_control_file(control_file_path: &Path) -> anyhow::Result<()> {
+    let control_file = ControlFileData::decode(&std::fs::read(control_file_path)?)?;
+    println!("{control_file:?}");
+    let control_file_initdb = Lsn(control_file.checkPoint);
+    println!(
+        "pg_initdb_lsn: {}, aligned: {}",
+        control_file_initdb,
+        control_file_initdb.align()
+    );
+    Ok(())
+}
+
+fn print_layerfile(path: &Path) -> anyhow::Result<()> {
+    // Basic initialization of things that don't change after startup
+    virtual_file::init(10);
+    page_cache::init(100);
+    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
+    dump_layerfile_from_path(path, true, &ctx)
+}
+
+fn handle_metadata(
+    MetadataCmd {
+        metadata_path: path,
+        disk_consistent_lsn,
+        prev_record_lsn,
+        latest_gc_cuttoff,
+    }: &MetadataCmd,
+) -> Result<(), anyhow::Error> {
+    let metadata_bytes = std::fs::read(path)?;
+    let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
+    println!("Current metadata:\n{meta:?}");
+    let mut update_meta = false;
+    if let Some(disk_consistent_lsn) = disk_consistent_lsn {
+        meta = TimelineMetadata::new(
+            *disk_consistent_lsn,
+            meta.prev_record_lsn(),
+            meta.ancestor_timeline(),
+            meta.ancestor_lsn(),
+            meta.latest_gc_cutoff_lsn(),
+            meta.initdb_lsn(),
+            meta.pg_version(),
+        );
+        update_meta = true;
+    }
+    if let Some(prev_record_lsn) = prev_record_lsn {
+        meta = TimelineMetadata::new(
+            meta.disk_consistent_lsn(),
+            Some(*prev_record_lsn),
+            meta.ancestor_timeline(),
+            meta.ancestor_lsn(),
+            meta.latest_gc_cutoff_lsn(),
+            meta.initdb_lsn(),
+            meta.pg_version(),
+        );
+        update_meta = true;
+    }
+    if let Some(latest_gc_cuttoff) = latest_gc_cuttoff {
+        meta = TimelineMetadata::new(
+            meta.disk_consistent_lsn(),
+            meta.prev_record_lsn(),
+            meta.ancestor_timeline(),
+            meta.ancestor_lsn(),
+            *latest_gc_cuttoff,
+            meta.initdb_lsn(),
+            meta.pg_version(),
+        );
+        update_meta = true;
+    }
+
+    if update_meta {
+        let metadata_bytes = meta.to_bytes()?;
+        std::fs::write(path, metadata_bytes)?;
+    }
+
+    Ok(())
+}
--- a/pageserver/src/bin/pageserver_binutils.rs
+++ b/pageserver/src/bin/pageserver_binutils.rs
@@ -1,157 +0,0 @@
-//! A helper tool to manage pageserver binary files.
-//! Accepts a file as an argument, attempts to parse it with all ways possible
-//! and prints its interpreted context.
-//!
-//! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.
-use std::{
-    path::{Path, PathBuf},
-    str::FromStr,
-};
-
-use anyhow::Context;
-use clap::{value_parser, Arg, Command};
-
-use pageserver::{
-    context::{DownloadBehavior, RequestContext},
-    page_cache,
-    task_mgr::TaskKind,
-    tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
-    virtual_file,
-};
-use postgres_ffi::ControlFileData;
-use utils::{lsn::Lsn, project_git_version};
-
-project_git_version!(GIT_VERSION);
-
-const METADATA_SUBCOMMAND: &str = "metadata";
-
-fn main() -> anyhow::Result<()> {
-    let arg_matches = cli().get_matches();
-
-    match arg_matches.subcommand() {
-        Some((subcommand_name, subcommand_matches)) => {
-            let path = subcommand_matches
-                .get_one::<PathBuf>("metadata_path")
-                .context("'metadata_path' argument is missing")?
-                .to_path_buf();
-            anyhow::ensure!(
-                subcommand_name == METADATA_SUBCOMMAND,
-                "Unknown subcommand {subcommand_name}"
-            );
-            handle_metadata(&path, subcommand_matches)?;
-        }
-        None => {
-            let path = arg_matches
-                .get_one::<PathBuf>("path")
-                .context("'path' argument is missing")?
-                .to_path_buf();
-            println!(
-                "No subcommand specified, attempting to guess the format for file {}",
-                path.display()
-            );
-            if let Err(e) = read_pg_control_file(&path) {
-                println!(
-                    "Failed to read input file as a pg control one: {e:#}\n\
-                    Attempting to read it as layer file"
-                );
-                print_layerfile(&path)?;
-            }
-        }
-    };
-    Ok(())
-}
-
-fn read_pg_control_file(control_file_path: &Path) -> anyhow::Result<()> {
-    let control_file = ControlFileData::decode(&std::fs::read(control_file_path)?)?;
-    println!("{control_file:?}");
-    let control_file_initdb = Lsn(control_file.checkPoint);
-    println!(
-        "pg_initdb_lsn: {}, aligned: {}",
-        control_file_initdb,
-        control_file_initdb.align()
-    );
-    Ok(())
-}
-
-fn print_layerfile(path: &Path) -> anyhow::Result<()> {
-    // Basic initialization of things that don't change after startup
-    virtual_file::init(10);
-    page_cache::init(100);
-    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
-    dump_layerfile_from_path(path, true, &ctx)
-}
-
-fn handle_metadata(path: &Path, arg_matches: &clap::ArgMatches) -> Result<(), anyhow::Error> {
-    let metadata_bytes = std::fs::read(path)?;
-    let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
-    println!("Current metadata:\n{meta:?}");
-    let mut update_meta = false;
-    if let Some(disk_consistent_lsn) = arg_matches.get_one::<String>("disk_consistent_lsn") {
-        meta = TimelineMetadata::new(
-            Lsn::from_str(disk_consistent_lsn)?,
-            meta.prev_record_lsn(),
-            meta.ancestor_timeline(),
-            meta.ancestor_lsn(),
-            meta.latest_gc_cutoff_lsn(),
-            meta.initdb_lsn(),
-            meta.pg_version(),
-        );
-        update_meta = true;
-    }
-    if let Some(prev_record_lsn) = arg_matches.get_one::<String>("prev_record_lsn") {
-        meta = TimelineMetadata::new(
-            meta.disk_consistent_lsn(),
-            Some(Lsn::from_str(prev_record_lsn)?),
-            meta.ancestor_timeline(),
-            meta.ancestor_lsn(),
-            meta.latest_gc_cutoff_lsn(),
-            meta.initdb_lsn(),
-            meta.pg_version(),
-        );
-        update_meta = true;
-    }
-
-    if update_meta {
-        let metadata_bytes = meta.to_bytes()?;
-        std::fs::write(path, metadata_bytes)?;
-    }
-
-    Ok(())
-}
-
-fn cli() -> Command {
-    Command::new("Neon Pageserver binutils")
-        .about("Reads pageserver (and related) binary files management utility")
-        .version(GIT_VERSION)
-        .arg(
-            Arg::new("path")
-                .help("Input file path")
-                .value_parser(value_parser!(PathBuf))
-                .required(false),
-        )
-        .subcommand(
-            Command::new(METADATA_SUBCOMMAND)
-                .about("Read and update pageserver metadata file")
-                .arg(
-                    Arg::new("metadata_path")
-                        .help("Input metadata file path")
-                        .value_parser(value_parser!(PathBuf))
-                        .required(false),
-                )
-                .arg(
-                    Arg::new("disk_consistent_lsn")
-                        .long("disk_consistent_lsn")
-                        .help("Replace disk consistent Lsn"),
-                )
-                .arg(
-                    Arg::new("prev_record_lsn")
-                        .long("prev_record_lsn")
-                        .help("Replace previous record Lsn"),
-                ),
-        )
-}
-
-#[test]
-fn verify_cli() {
-    cli().debug_assert();
-}
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -797,7 +797,8 @@ impl PageServerConf {
            )?);
        }
        if let Some(max_lsn_wal_lag) = item.get("max_lsn_wal_lag") {
-            t_conf.max_lsn_wal_lag = Some(parse_toml_from_str("max_lsn_wal_lag", max_lsn_wal_lag)?);
+            t_conf.max_lsn_wal_lag =
+                Some(deserialize_from_item("max_lsn_wal_lag", max_lsn_wal_lag)?);
        }
        if let Some(trace_read_requests) = item.get("trace_read_requests") {
            t_conf.trace_read_requests =
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -363,11 +363,29 @@ paths:
        * MUST NOT ASSUME that the request has been lost, based on the observation
          that a subsequent tenant status request returns 404. The request may
          still be in flight. It must be retried.
+
+        The client SHOULD supply a `TenantConfig` for the tenant in the request body.
+        Settings specified in the config override the pageserver's defaults.
+        It is guaranteed that the config settings are applied before the pageserver
+        starts operating on the tenant. E.g., if the config specifies a specific
+        PITR interval for a tenant, then that setting will be in effect before the
+        pageserver starts the garbage collection loop. This enables a client to
+        guarantee a specific PITR setting across detach/attach cycles.
+        The pageserver will reject the request if it cannot parse the config, or
+        if there are any unknown fields in it.
+
+        If the client does not supply a config, the pageserver will use its defaults.
+        This behavior is deprecated: https://github.com/neondatabase/neon/issues/4282
+      requestBody:
+        required: false
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/TenantAttachRequest"
      responses:
        "202":
          description: Tenant attaching scheduled
        "400":
-          description: Error when no tenant id found in path parameters
          content:
            application/json:
              schema:
@@ -741,13 +759,16 @@ paths:
                $ref: "#/components/schemas/Error"
    post:
      description: |
-        Create a tenant. Returns new tenant id on success.\
+        Create a tenant. Returns new tenant id on success.
+
        If no new tenant id is specified in parameters, it would be generated. It's an error to recreate the same tenant.
+
+        Invalid fields in the tenant config will cause the request to be rejected with status 400.
      requestBody:
        content:
          application/json:
            schema:
-              $ref: "#/components/schemas/TenantCreateInfo"
+              $ref: "#/components/schemas/TenantCreateRequest"
      responses:
        "201":
          description: New tenant created successfully
@@ -790,11 +811,13 @@ paths:
    put:
      description: |
        Update tenant's config.
+
+        Invalid fields in the tenant config will cause the request to be rejected with status 400.
      requestBody:
        content:
          application/json:
            schema:
-              $ref: "#/components/schemas/TenantConfigInfo"
+              $ref: "#/components/schemas/TenantConfigRequest"
      responses:
        "200":
          description: OK
@@ -846,7 +869,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "#/components/schemas/TenantConfig"
+                $ref: "#/components/schemas/TenantConfigResponse"
        "400":
          description: Malformed get tenanant config request
          content:
@@ -909,35 +932,34 @@ components:
            See the tenant `/attach` endpoint for more information.
          type: string
          enum: [ "maybe", "attached" ]
-    TenantCreateInfo:
+    TenantCreateRequest:
+      allOf:
+        - $ref: '#/components/schemas/TenantConfig'
+        - type: object
+          properties:
+            new_tenant_id:
+              type: string
+              format: hex
+    TenantAttachRequest:
+      type: object
+      required:
+        - config
+      properties:
+        config:
+          $ref: '#/components/schemas/TenantConfig'
+    TenantConfigRequest:
+      allOf:
+        - $ref: '#/components/schemas/TenantConfig'
+        - type: object
+          required:
+            - tenant_id
+          properties:
+            tenant_id:
+              type: string
+              format: hex
+    TenantConfig:
      type: object
      properties:
-        new_tenant_id:
-          type: string
-          format: hex
-        tenant_id:
-          type: string
-          format: hex
-        gc_period:
-          type: string
-        gc_horizon:
-          type: integer
-        pitr_interval:
-          type: string
-        checkpoint_distance:
-          type: integer
-        checkpoint_timeout:
-          type: string
-        compaction_period:
-          type: string
-        compaction_threshold:
-          type: string
-    TenantConfigInfo:
-      type: object
-      properties:
-        tenant_id:
-          type: string
-          format: hex
        gc_period:
          type: string
        gc_horizon:
@@ -964,13 +986,13 @@ components:
          type: integer
        trace_read_requests:
          type: boolean
-    TenantConfig:
+    TenantConfigResponse:
      type: object
      properties:
        tenant_specific_overrides:
-          $ref: "#/components/schemas/TenantConfigInfo"
+          $ref: "#/components/schemas/TenantConfig"
        effective_config:
-          $ref: "#/components/schemas/TenantConfigInfo"
+          $ref: "#/components/schemas/TenantConfig"
    TimelineInfo:
      type: object
      required:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -5,12 +5,13 @@ use anyhow::{anyhow, Context, Result};
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
 use metrics::launch_timestamp::LaunchTimestamp;
-use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
+use pageserver_api::models::{DownloadRemoteLayersTaskSpawnRequest, TenantAttachRequest};
 use remote_storage::GenericRemoteStorage;
 use tenant_size_model::{SizeResult, StorageModel};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::http::endpoint::RequestSpan;
+use utils::http::json::json_request_or_empty_body;
 use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};

 use super::models::{
@@ -19,7 +20,7 @@ use super::models::{
 };
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::disk_usage_eviction_task;
-use crate::metrics::STORAGE_TIME_GLOBAL;
+use crate::metrics::{StorageTimeOperation, STORAGE_TIME_GLOBAL};
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::TenantConfOpt;
@@ -386,11 +387,16 @@ async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response
    json_response(StatusCode::OK, result)
 }

-// TODO makes sense to provide tenant config right away the same way as it handled in tenant_create
-async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn tenant_attach_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;

+    let maybe_body: Option<TenantAttachRequest> = json_request_or_empty_body(&mut request).await?;
+    let tenant_conf = match maybe_body {
+        Some(request) => TenantConfOpt::try_from(&*request.config).map_err(ApiError::BadRequest)?,
+        None => TenantConfOpt::default(),
+    };
+
    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);

    info!("Handling tenant attach {tenant_id}");
@@ -398,9 +404,15 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,
    let state = get_state(&request);

    if let Some(remote_storage) = &state.remote_storage {
-        mgr::attach_tenant(state.conf, tenant_id, remote_storage.clone(), &ctx)
-            .instrument(info_span!("tenant_attach", tenant = %tenant_id))
-            .await?;
+        mgr::attach_tenant(
+            state.conf,
+            tenant_id,
+            tenant_conf,
+            remote_storage.clone(),
+            &ctx,
+        )
+        .instrument(info_span!("tenant_attach", tenant = %tenant_id))
+        .await?;
    } else {
        return Err(ApiError::BadRequest(anyhow!(
            "attach_tenant is not possible because pageserver was configured without remote storage"
@@ -710,7 +722,7 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
    check_permission(&request, None)?;

    let _timer = STORAGE_TIME_GLOBAL
-        .get_metric_with_label_values(&["create tenant"])
+        .get_metric_with_label_values(&[StorageTimeOperation::CreateTenant.into()])
        .expect("bug")
        .start_timer();

@@ -718,7 +730,8 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo

    let request_data: TenantCreateRequest = json_request(&mut request).await?;

-    let tenant_conf = TenantConfOpt::try_from(&request_data).map_err(ApiError::BadRequest)?;
+    let tenant_conf =
+        TenantConfOpt::try_from(&request_data.config).map_err(ApiError::BadRequest)?;

    let target_tenant_id = request_data
        .new_tenant_id
@@ -787,7 +800,8 @@ async fn update_tenant_config_handler(
    let tenant_id = request_data.tenant_id;
    check_permission(&request, Some(tenant_id))?;

-    let tenant_conf = TenantConfOpt::try_from(&request_data).map_err(ApiError::BadRequest)?;
+    let tenant_conf =
+        TenantConfOpt::try_from(&request_data.config).map_err(ApiError::BadRequest)?;

    let state = get_state(&request);
    mgr::set_new_tenant_config(state.conf, tenant_conf, tenant_id)
--- a/pageserver/src/keyspace.rs
+++ b/pageserver/src/keyspace.rs
@@ -5,7 +5,7 @@ use std::ops::Range;
 ///
 /// Represents a set of Keys, in a compact form.
 ///
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Default)]
 pub struct KeySpace {
    /// Contiguous ranges of keys that belong to the key space. In key order,
    /// and with no overlap.
@@ -61,6 +61,18 @@ impl KeySpace {

        KeyPartitioning { parts }
    }
+
+    ///
+    /// Check if key space contains overlapping range
+    ///
+    pub fn overlaps(&self, range: &Range<Key>) -> bool {
+        match self.ranges.binary_search_by_key(&range.end, |r| r.start) {
+            Ok(0) => false,
+            Err(0) => false,
+            Ok(index) => self.ranges[index - 1].end > range.start,
+            Err(index) => self.ranges[index - 1].end > range.start,
+        }
+    }
 }

 ///
@@ -129,3 +141,226 @@ impl KeySpaceAccum {
        }
    }
 }
+
+///
+/// A helper object, to collect a set of keys and key ranges into a KeySpace
+/// object. Key ranges may be inserted in any order and can overlap.
+///
+#[derive(Clone, Debug, Default)]
+pub struct KeySpaceRandomAccum {
+    ranges: Vec<Range<Key>>,
+}
+
+impl KeySpaceRandomAccum {
+    pub fn new() -> Self {
+        Self { ranges: Vec::new() }
+    }
+
+    pub fn add_key(&mut self, key: Key) {
+        self.add_range(singleton_range(key))
+    }
+
+    pub fn add_range(&mut self, range: Range<Key>) {
+        self.ranges.push(range);
+    }
+
+    pub fn to_keyspace(mut self) -> KeySpace {
+        let mut ranges = Vec::new();
+        if !self.ranges.is_empty() {
+            self.ranges.sort_by_key(|r| r.start);
+            let mut start = self.ranges.first().unwrap().start;
+            let mut end = self.ranges.first().unwrap().end;
+            for r in self.ranges {
+                assert!(r.start >= start);
+                if r.start > end {
+                    ranges.push(start..end);
+                    start = r.start;
+                    end = r.end;
+                } else if r.end > end {
+                    end = r.end;
+                }
+            }
+            ranges.push(start..end);
+        }
+        KeySpace { ranges }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fmt::Write;
+
+    // Helper function to create a key range.
+    //
+    // Make the tests below less verbose.
+    fn kr(irange: Range<i128>) -> Range<Key> {
+        Key::from_i128(irange.start)..Key::from_i128(irange.end)
+    }
+
+    #[allow(dead_code)]
+    fn dump_keyspace(ks: &KeySpace) {
+        for r in ks.ranges.iter() {
+            println!("  {}..{}", r.start.to_i128(), r.end.to_i128());
+        }
+    }
+
+    fn assert_ks_eq(actual: &KeySpace, expected: Vec<Range<Key>>) {
+        if actual.ranges != expected {
+            let mut msg = String::new();
+
+            writeln!(msg, "expected:").unwrap();
+            for r in &expected {
+                writeln!(msg, "  {}..{}", r.start.to_i128(), r.end.to_i128()).unwrap();
+            }
+            writeln!(msg, "got:").unwrap();
+            for r in &actual.ranges {
+                writeln!(msg, "  {}..{}", r.start.to_i128(), r.end.to_i128()).unwrap();
+            }
+            panic!("{}", msg);
+        }
+    }
+
+    #[test]
+    fn keyspace_add_range() {
+        // two separate ranges
+        //
+        // #####
+        //         #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(0..10));
+        ks.add_range(kr(20..30));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..10), kr(20..30)]);
+
+        // two separate ranges, added in reverse order
+        //
+        //         #####
+        // #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(20..30));
+        ks.add_range(kr(0..10));
+
+        // add range that is adjacent to the end of an existing range
+        //
+        // #####
+        //      #####
+        ks.add_range(kr(0..10));
+        ks.add_range(kr(10..30));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add range that is adjacent to the start of an existing range
+        //
+        //      #####
+        // #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(10..30));
+        ks.add_range(kr(0..10));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add range that overlaps with the end of an existing range
+        //
+        // #####
+        //    #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(0..10));
+        ks.add_range(kr(5..30));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add range that overlaps with the start of an existing range
+        //
+        //    #####
+        // #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(5..30));
+        ks.add_range(kr(0..10));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add range that is fully covered by an existing range
+        //
+        // #########
+        //   #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(0..30));
+        ks.add_range(kr(10..20));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add range that extends an existing range from both ends
+        //
+        //   #####
+        // #########
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(10..20));
+        ks.add_range(kr(0..30));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add a range that overlaps with two existing ranges, joining them
+        //
+        // #####   #####
+        //    #######
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(0..10));
+        ks.add_range(kr(20..30));
+        ks.add_range(kr(5..25));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+    }
+
+    #[test]
+    fn keyspace_overlaps() {
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(10..20));
+        ks.add_range(kr(30..40));
+        let ks = ks.to_keyspace();
+
+        //        #####      #####
+        // xxxx
+        assert!(!ks.overlaps(&kr(0..5)));
+
+        //        #####      #####
+        //   xxxx
+        assert!(!ks.overlaps(&kr(5..9)));
+
+        //        #####      #####
+        //    xxxx
+        assert!(!ks.overlaps(&kr(5..10)));
+
+        //        #####      #####
+        //     xxxx
+        assert!(ks.overlaps(&kr(5..11)));
+
+        //        #####      #####
+        //        xxxx
+        assert!(ks.overlaps(&kr(10..15)));
+
+        //        #####      #####
+        //         xxxx
+        assert!(ks.overlaps(&kr(15..20)));
+
+        //        #####      #####
+        //           xxxx
+        assert!(ks.overlaps(&kr(15..25)));
+
+        //        #####      #####
+        //              xxxx
+        assert!(!ks.overlaps(&kr(22..28)));
+
+        //        #####      #####
+        //               xxxx
+        assert!(!ks.overlaps(&kr(25..30)));
+
+        //        #####      #####
+        //                      xxxx
+        assert!(ks.overlaps(&kr(35..35)));
+
+        //        #####      #####
+        //                        xxxx
+        assert!(!ks.overlaps(&kr(40..45)));
+
+        //        #####      #####
+        //                        xxxx
+        assert!(!ks.overlaps(&kr(45..50)));
+
+        //        #####      #####
+        //        xxxxxxxxxxx
+        assert!(ks.overlaps(&kr(0..30))); // XXXXX This fails currently!
+    }
+}
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -8,6 +8,7 @@ use metrics::{
 use once_cell::sync::Lazy;
 use pageserver_api::models::TenantState;
 use strum::VariantNames;
+use strum_macros::{EnumVariantNames, IntoStaticStr};
 use utils::id::{TenantId, TimelineId};

 /// Prometheus histogram buckets (in seconds) for operations in the critical
@@ -24,17 +25,33 @@ const CRITICAL_OP_BUCKETS: &[f64] = &[
 ];

 // Metrics collected on operations on the storage repository.
-const STORAGE_TIME_OPERATIONS: &[&str] = &[
-    "layer flush",
-    "compact",
-    "create images",
-    "init logical size",
-    "logical size",
-    "imitate logical size",
-    "load layer map",
-    "gc",
-    "create tenant",
-];
+#[derive(Debug, EnumVariantNames, IntoStaticStr)]
+#[strum(serialize_all = "kebab_case")]
+pub enum StorageTimeOperation {
+    #[strum(serialize = "layer flush")]
+    LayerFlush,
+
+    #[strum(serialize = "compact")]
+    Compact,
+
+    #[strum(serialize = "create images")]
+    CreateImages,
+
+    #[strum(serialize = "logical size")]
+    LogicalSize,
+
+    #[strum(serialize = "imitate logical size")]
+    ImitateLogicalSize,
+
+    #[strum(serialize = "load layer map")]
+    LoadLayerMap,
+
+    #[strum(serialize = "gc")]
+    Gc,
+
+    #[strum(serialize = "create tenant")]
+    CreateTenant,
+}

 pub static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
    register_counter_vec!(
@@ -673,7 +690,9 @@ pub struct StorageTimeMetrics {
 }

 impl StorageTimeMetrics {
-    pub fn new(operation: &str, tenant_id: &str, timeline_id: &str) -> Self {
+    pub fn new(operation: StorageTimeOperation, tenant_id: &str, timeline_id: &str) -> Self {
+        let operation: &'static str = operation.into();
+
        let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
            .get_metric_with_label_values(&[operation, tenant_id, timeline_id])
            .unwrap();
@@ -737,16 +756,23 @@ impl TimelineMetrics {
        let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
-        let flush_time_histo = StorageTimeMetrics::new("layer flush", &tenant_id, &timeline_id);
-        let compact_time_histo = StorageTimeMetrics::new("compact", &tenant_id, &timeline_id);
+        let flush_time_histo =
+            StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
+        let compact_time_histo =
+            StorageTimeMetrics::new(StorageTimeOperation::Compact, &tenant_id, &timeline_id);
        let create_images_time_histo =
-            StorageTimeMetrics::new("create images", &tenant_id, &timeline_id);
-        let logical_size_histo = StorageTimeMetrics::new("logical size", &tenant_id, &timeline_id);
-        let imitate_logical_size_histo =
-            StorageTimeMetrics::new("imitate logical size", &tenant_id, &timeline_id);
+            StorageTimeMetrics::new(StorageTimeOperation::CreateImages, &tenant_id, &timeline_id);
+        let logical_size_histo =
+            StorageTimeMetrics::new(StorageTimeOperation::LogicalSize, &tenant_id, &timeline_id);
+        let imitate_logical_size_histo = StorageTimeMetrics::new(
+            StorageTimeOperation::ImitateLogicalSize,
+            &tenant_id,
+            &timeline_id,
+        );
        let load_layer_map_histo =
-            StorageTimeMetrics::new("load layer map", &tenant_id, &timeline_id);
-        let garbage_collect_histo = StorageTimeMetrics::new("gc", &tenant_id, &timeline_id);
+            StorageTimeMetrics::new(StorageTimeOperation::LoadLayerMap, &tenant_id, &timeline_id);
+        let garbage_collect_histo =
+            StorageTimeMetrics::new(StorageTimeOperation::Gc, &tenant_id, &timeline_id);
        let last_record_gauge = LAST_RECORD_LSN
            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
@@ -814,7 +840,7 @@ impl Drop for TimelineMetrics {
            .write()
            .unwrap()
            .remove(tenant_id, timeline_id);
-        for op in STORAGE_TIME_OPERATIONS {
+        for op in StorageTimeOperation::VARIANTS {
            let _ =
                STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]);
            let _ =
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -1600,9 +1600,7 @@ pub fn create_test_timeline(
    pg_version: u32,
    ctx: &RequestContext,
 ) -> anyhow::Result<std::sync::Arc<Timeline>> {
-    let tline = tenant
-        .create_empty_timeline(timeline_id, Lsn(8), pg_version, ctx)?
-        .initialize(ctx)?;
+    let tline = tenant.create_test_timeline(timeline_id, Lsn(8), pg_version, ctx)?;
    let mut m = tline.begin_modification(Lsn(8));
    m.init_empty()?;
    m.commit()?;
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -272,9 +272,6 @@ pub enum TaskKind {

    #[cfg(test)]
    UnitTest,
-
-    /// Task which is the only task to delete this particular timeline
-    DeleteTimeline,
 }

 #[derive(Default)]
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -9,7 +9,7 @@
 //! may lead to a data loss.
 //!
 use anyhow::Context;
-use pageserver_api::models::{TenantConfigRequest, TenantCreateRequest};
+use pageserver_api::models;
 use serde::{Deserialize, Serialize};
 use std::num::NonZeroU64;
 use std::time::Duration;
@@ -292,93 +292,77 @@ fn bad_duration<'a>(field_name: &'static str, value: &'a str) -> impl 'a + Fn()
    move || format!("Cannot parse `{field_name}` duration {value:?}")
 }

-impl TenantConfOpt {
-    #[allow(clippy::too_many_arguments)]
-    fn from_request(
-        checkpoint_distance: Option<u64>,
-        checkpoint_timeout: &Option<String>,
-        compaction_target_size: Option<u64>,
-        compaction_period: &Option<String>,
-        compaction_threshold: Option<usize>,
-        gc_horizon: Option<u64>,
-        gc_period: &Option<String>,
-        image_creation_threshold: Option<usize>,
-        pitr_interval: &Option<String>,
-        walreceiver_connect_timeout: &Option<String>,
-        lagging_wal_timeout: &Option<String>,
-        max_lsn_wal_lag: Option<NonZeroU64>,
-        trace_read_requests: Option<bool>,
-        eviction_policy: &Option<serde_json::Value>,
-        min_resident_size_override: Option<u64>,
-        evictions_low_residence_duration_metric_threshold: &Option<String>,
-    ) -> Result<Self, anyhow::Error> {
+impl TryFrom<&'_ models::TenantConfig> for TenantConfOpt {
+    type Error = anyhow::Error;
+
+    fn try_from(request_data: &'_ models::TenantConfig) -> Result<Self, Self::Error> {
        let mut tenant_conf = TenantConfOpt::default();

-        if let Some(gc_period) = &gc_period {
+        if let Some(gc_period) = &request_data.gc_period {
            tenant_conf.gc_period = Some(
                humantime::parse_duration(gc_period)
                    .with_context(bad_duration("gc_period", gc_period))?,
            );
        }
-        tenant_conf.gc_horizon = gc_horizon;
-        tenant_conf.image_creation_threshold = image_creation_threshold;
+        tenant_conf.gc_horizon = request_data.gc_horizon;
+        tenant_conf.image_creation_threshold = request_data.image_creation_threshold;

-        if let Some(pitr_interval) = &pitr_interval {
+        if let Some(pitr_interval) = &request_data.pitr_interval {
            tenant_conf.pitr_interval = Some(
                humantime::parse_duration(pitr_interval)
                    .with_context(bad_duration("pitr_interval", pitr_interval))?,
            );
        }

-        if let Some(walreceiver_connect_timeout) = &walreceiver_connect_timeout {
+        if let Some(walreceiver_connect_timeout) = &request_data.walreceiver_connect_timeout {
            tenant_conf.walreceiver_connect_timeout = Some(
                humantime::parse_duration(walreceiver_connect_timeout).with_context(
                    bad_duration("walreceiver_connect_timeout", walreceiver_connect_timeout),
                )?,
            );
        }
-        if let Some(lagging_wal_timeout) = &lagging_wal_timeout {
+        if let Some(lagging_wal_timeout) = &request_data.lagging_wal_timeout {
            tenant_conf.lagging_wal_timeout = Some(
                humantime::parse_duration(lagging_wal_timeout)
                    .with_context(bad_duration("lagging_wal_timeout", lagging_wal_timeout))?,
            );
        }
-        if let Some(max_lsn_wal_lag) = max_lsn_wal_lag {
+        if let Some(max_lsn_wal_lag) = request_data.max_lsn_wal_lag {
            tenant_conf.max_lsn_wal_lag = Some(max_lsn_wal_lag);
        }
-        if let Some(trace_read_requests) = trace_read_requests {
+        if let Some(trace_read_requests) = request_data.trace_read_requests {
            tenant_conf.trace_read_requests = Some(trace_read_requests);
        }

-        tenant_conf.checkpoint_distance = checkpoint_distance;
-        if let Some(checkpoint_timeout) = &checkpoint_timeout {
+        tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
+        if let Some(checkpoint_timeout) = &request_data.checkpoint_timeout {
            tenant_conf.checkpoint_timeout = Some(
                humantime::parse_duration(checkpoint_timeout)
                    .with_context(bad_duration("checkpoint_timeout", checkpoint_timeout))?,
            );
        }

-        tenant_conf.compaction_target_size = compaction_target_size;
-        tenant_conf.compaction_threshold = compaction_threshold;
+        tenant_conf.compaction_target_size = request_data.compaction_target_size;
+        tenant_conf.compaction_threshold = request_data.compaction_threshold;

-        if let Some(compaction_period) = &compaction_period {
+        if let Some(compaction_period) = &request_data.compaction_period {
            tenant_conf.compaction_period = Some(
                humantime::parse_duration(compaction_period)
                    .with_context(bad_duration("compaction_period", compaction_period))?,
            );
        }

-        if let Some(eviction_policy) = &eviction_policy {
+        if let Some(eviction_policy) = &request_data.eviction_policy {
            tenant_conf.eviction_policy = Some(
                serde::Deserialize::deserialize(eviction_policy)
                    .context("parse field `eviction_policy`")?,
            );
        }

-        tenant_conf.min_resident_size_override = min_resident_size_override;
+        tenant_conf.min_resident_size_override = request_data.min_resident_size_override;

        if let Some(evictions_low_residence_duration_metric_threshold) =
-            &evictions_low_residence_duration_metric_threshold
+            &request_data.evictions_low_residence_duration_metric_threshold
        {
            tenant_conf.evictions_low_residence_duration_metric_threshold = Some(
                humantime::parse_duration(evictions_low_residence_duration_metric_threshold)
@@ -393,56 +377,6 @@ impl TenantConfOpt {
    }
 }

-impl TryFrom<&'_ TenantCreateRequest> for TenantConfOpt {
-    type Error = anyhow::Error;
-
-    fn try_from(request_data: &TenantCreateRequest) -> Result<Self, Self::Error> {
-        Self::from_request(
-            request_data.checkpoint_distance,
-            &request_data.checkpoint_timeout,
-            request_data.compaction_target_size,
-            &request_data.compaction_period,
-            request_data.compaction_threshold,
-            request_data.gc_horizon,
-            &request_data.gc_period,
-            request_data.image_creation_threshold,
-            &request_data.pitr_interval,
-            &request_data.walreceiver_connect_timeout,
-            &request_data.lagging_wal_timeout,
-            request_data.max_lsn_wal_lag,
-            request_data.trace_read_requests,
-            &request_data.eviction_policy,
-            request_data.min_resident_size_override,
-            &request_data.evictions_low_residence_duration_metric_threshold,
-        )
-    }
-}
-
-impl TryFrom<&'_ TenantConfigRequest> for TenantConfOpt {
-    type Error = anyhow::Error;
-
-    fn try_from(request_data: &TenantConfigRequest) -> Result<Self, Self::Error> {
-        Self::from_request(
-            request_data.checkpoint_distance,
-            &request_data.checkpoint_timeout,
-            request_data.compaction_target_size,
-            &request_data.compaction_period,
-            request_data.compaction_threshold,
-            request_data.gc_horizon,
-            &request_data.gc_period,
-            request_data.image_creation_threshold,
-            &request_data.pitr_interval,
-            &request_data.walreceiver_connect_timeout,
-            &request_data.lagging_wal_timeout,
-            request_data.max_lsn_wal_lag,
-            request_data.trace_read_requests,
-            &request_data.eviction_policy,
-            request_data.min_resident_size_override,
-            &request_data.evictions_low_residence_duration_metric_threshold,
-        )
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -56,6 +56,7 @@ use std::collections::VecDeque;
 use std::ops::Range;
 use std::sync::Arc;
 use utils::lsn::Lsn;
+use tracing::*;

 use historic_layer_coverage::BufferedHistoricLayerCoverage;
 pub use historic_layer_coverage::Replacement;
@@ -275,11 +276,14 @@ where
    ///
    pub(self) fn insert_historic_noflush(&mut self, layer: Arc<L>) {
        // TODO: See #3869, resulting #4088, attempted fix and repro #4094
-        self.historic.insert(
-            historic_layer_coverage::LayerKey::from(&*layer),
-            Arc::clone(&layer),
-        );
-
+        let key = historic_layer_coverage::LayerKey::from(&*layer);
+        if self.historic.contains(&key) {
+            error!(
+                "Attempt to insert duplicate layer {} in layer map",
+                layer.short_id()
+            );
+        }
+        self.historic.insert(key, Arc::clone(&layer));
        if Self::is_l0(&layer) {
            self.l0_delta_layers.push(layer);
        }
--- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
@@ -417,7 +417,15 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
        }
    }

-    pub fn insert(&mut self, layer_key: LayerKey, value: Value) {
+    pub fn contains(&self, layer_key: &LayerKey) -> bool {
+        match self.buffer.get(layer_key) {
+            Some(None) => false,                         // layer remove was buffered
+            Some(_) => true,                             // layer insert was buffered
+            None => self.layers.contains_key(layer_key), // no buffered ops for this layer
+        }
+    }
+
+	pub fn insert(&mut self, layer_key: LayerKey, value: Value) {
        self.buffer.insert(layer_key, Some(value));
    }

--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -19,7 +19,7 @@ use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::{self, TaskKind};
 use crate::tenant::config::TenantConfOpt;
-use crate::tenant::{Tenant, TenantState};
+use crate::tenant::{create_tenant_files, CreateTenantFilesMode, Tenant, TenantState};
 use crate::IGNORED_TENANT_FILE_NAME;

 use utils::fs_ext::PathExt;
@@ -278,19 +278,24 @@ pub async fn create_tenant(
    remote_storage: Option<GenericRemoteStorage>,
    ctx: &RequestContext,
 ) -> Result<Arc<Tenant>, TenantMapInsertError> {
-    tenant_map_insert(tenant_id, |vacant_entry| {
+    tenant_map_insert(tenant_id, || {
        // We're holding the tenants lock in write mode while doing local IO.
        // If this section ever becomes contentious, introduce a new `TenantState::Creating`
        // and do the work in that state.
-        let tenant_directory = super::create_tenant_files(conf, tenant_conf, tenant_id)?;
+        let tenant_directory = super::create_tenant_files(conf, tenant_conf, tenant_id, CreateTenantFilesMode::Create)?;
+        // TODO: tenant directory remains on disk if we bail out from here on.
+        //       See https://github.com/neondatabase/neon/issues/4233
+
        let created_tenant =
            schedule_local_tenant_processing(conf, &tenant_directory, remote_storage, ctx)?;
+        // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here.
+        //      See https://github.com/neondatabase/neon/issues/4233
+
        let crated_tenant_id = created_tenant.tenant_id();
        anyhow::ensure!(
                tenant_id == crated_tenant_id,
                "loaded created tenant has unexpected tenant id (expect {tenant_id} != actual {crated_tenant_id})",
            );
-        vacant_entry.insert(Arc::clone(&created_tenant));
        Ok(created_tenant)
    }).await
 }
@@ -402,7 +407,7 @@ pub async fn load_tenant(
    remote_storage: Option<GenericRemoteStorage>,
    ctx: &RequestContext,
 ) -> Result<(), TenantMapInsertError> {
-    tenant_map_insert(tenant_id, |vacant_entry| {
+    tenant_map_insert(tenant_id, || {
        let tenant_path = conf.tenant_path(&tenant_id);
        let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(tenant_id);
        if tenant_ignore_mark.exists() {
@@ -415,9 +420,9 @@ pub async fn load_tenant(
                format!("Failed to schedule tenant processing in path {tenant_path:?}")
            })?;

-        vacant_entry.insert(new_tenant);
-        Ok(())
-    }).await
+        Ok(new_tenant)
+    }).await?;
+    Ok(())
 }

 pub async fn ignore_tenant(
@@ -466,22 +471,35 @@ pub async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, TenantMapLis
 pub async fn attach_tenant(
    conf: &'static PageServerConf,
    tenant_id: TenantId,
+    tenant_conf: TenantConfOpt,
    remote_storage: GenericRemoteStorage,
    ctx: &RequestContext,
 ) -> Result<(), TenantMapInsertError> {
-    tenant_map_insert(tenant_id, |vacant_entry| {
-        let tenant_path = conf.tenant_path(&tenant_id);
-        anyhow::ensure!(
-            !tenant_path.exists(),
-            "Cannot attach tenant {tenant_id}, local tenant directory already exists"
-        );
+    tenant_map_insert(tenant_id, || {
+        let tenant_dir = create_tenant_files(conf, tenant_conf, tenant_id, CreateTenantFilesMode::Attach)?;
+        // TODO: tenant directory remains on disk if we bail out from here on.
+        //       See https://github.com/neondatabase/neon/issues/4233

-        let tenant =
-            Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx).context("spawn_attach")?;
-        vacant_entry.insert(tenant);
-        Ok(())
+        // Without the attach marker, schedule_local_tenant_processing will treat the attached tenant as fully attached
+        let marker_file_exists = conf
+            .tenant_attaching_mark_file_path(&tenant_id)
+            .try_exists()
+            .context("check for attach marker file existence")?;
+        anyhow::ensure!(marker_file_exists, "create_tenant_files should have created the attach marker file");
+
+        let attached_tenant = schedule_local_tenant_processing(conf, &tenant_dir, Some(remote_storage), ctx)?;
+        // TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here.
+        //      See https://github.com/neondatabase/neon/issues/4233
+
+        let attached_tenant_id = attached_tenant.tenant_id();
+        anyhow::ensure!(
+            tenant_id == attached_tenant_id,
+            "loaded created tenant has unexpected tenant id (expect {tenant_id} != actual {attached_tenant_id})",
+        );
+        Ok(attached_tenant)
    })
-    .await
+    .await?;
+    Ok(())
 }

 #[derive(Debug, thiserror::Error)]
@@ -502,12 +520,12 @@ pub enum TenantMapInsertError {
 ///
 /// NB: the closure should return quickly because the current implementation of tenants map
 /// serializes access through an `RwLock`.
-async fn tenant_map_insert<F, V>(
+async fn tenant_map_insert<F>(
    tenant_id: TenantId,
    insert_fn: F,
-) -> Result<V, TenantMapInsertError>
+) -> Result<Arc<Tenant>, TenantMapInsertError>
 where
-    F: FnOnce(hash_map::VacantEntry<TenantId, Arc<Tenant>>) -> anyhow::Result<V>,
+    F: FnOnce() -> anyhow::Result<Arc<Tenant>>,
 {
    let mut guard = TENANTS.write().await;
    let m = match &mut *guard {
@@ -520,8 +538,11 @@ where
            tenant_id,
            e.get().current_state(),
        )),
-        hash_map::Entry::Vacant(v) => match insert_fn(v) {
-            Ok(v) => Ok(v),
+        hash_map::Entry::Vacant(v) => match insert_fn() {
+            Ok(tenant) => {
+                v.insert(tenant.clone());
+                Ok(tenant)
+            }
            Err(e) => Err(TenantMapInsertError::Closure(e)),
        },
    }
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -1264,9 +1264,7 @@ mod tests {
            let harness = TenantHarness::create(test_name)?;
            let (tenant, ctx) = runtime.block_on(harness.load());
            // create an empty timeline directory
-            let timeline =
-                tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?;
-            let _ = timeline.initialize(&ctx).unwrap();
+            let _ = tenant.create_test_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?;

            let remote_fs_dir = harness.conf.workdir.join("remote_fs");
            std::fs::create_dir_all(remote_fs_dir)?;
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -542,7 +542,7 @@ impl From<LayerFileName> for LayerDescriptor {
 ///
 /// This is used by DeltaLayer and ImageLayer. Normally, this holds a reference to the
 /// global config, and paths to layer files are constructed using the tenant/timeline
-/// path from the config. But in the 'pageserver_binutils' binary, we need to construct a Layer
+/// path from the config. But in the 'pagectl' binary, we need to construct a Layer
 /// struct for a file on disk, without having a page server running, so that we have no
 /// config. In that case, we use the Path variant to hold the full path to the file on
 /// disk.
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -110,7 +110,7 @@ const WILL_INIT: u64 = 1;
 /// reading/deserializing records themselves.
 ///
 #[derive(Debug, Serialize, Deserialize, Copy, Clone)]
-struct BlobRef(u64);
+pub struct BlobRef(pub u64);

 impl BlobRef {
    pub fn will_init(&self) -> bool {
@@ -619,7 +619,7 @@ impl DeltaLayer {

    /// Create a DeltaLayer struct representing an existing file on disk.
    ///
-    /// This variant is only used for debugging purposes, by the 'pageserver_binutils' binary.
+    /// This variant is only used for debugging purposes, by the 'pagectl' binary.
    pub fn new_for_path(path: &Path, file: File) -> Result<Self> {
        let mut summary_buf = Vec::new();
        summary_buf.resize(PAGE_SZ, 0);
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -422,7 +422,7 @@ impl ImageLayer {

    /// Create an ImageLayer struct representing an existing file on disk.
    ///
-    /// This variant is only used for debugging purposes, by the 'pageserver_binutils' binary.
+    /// This variant is only used for debugging purposes, by the 'pagectl' binary.
    pub fn new_for_path(path: &Path, file: File) -> Result<ImageLayer> {
        let mut summary_buf = Vec::new();
        summary_buf.resize(PAGE_SZ, 0);
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -22,8 +22,7 @@ use tracing::*;
 use utils::id::TenantTimelineId;

 use std::cmp::{max, min, Ordering};
-use std::collections::BinaryHeap;
-use std::collections::HashMap;
+use std::collections::{BinaryHeap, HashMap};
 use std::fs;
 use std::ops::{Deref, Range};
 use std::path::{Path, PathBuf};
@@ -48,7 +47,7 @@ use crate::tenant::{
 };

 use crate::config::PageServerConf;
-use crate::keyspace::{KeyPartitioning, KeySpace};
+use crate::keyspace::{KeyPartitioning, KeySpace, KeySpaceRandomAccum};
 use crate::metrics::{TimelineMetrics, UNEXPECTED_ONDEMAND_DOWNLOADS};
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
@@ -123,6 +122,17 @@ pub struct Timeline {

    pub(super) layers: RwLock<LayerMap<dyn PersistentLayer>>,

+    /// Set of key ranges which should be covered by image layers to
+    /// allow GC to remove old layers. This set is created by GC and its cutoff LSN is also stored.
+    /// It is used by compaction task when it checks if new image layer should be created.
+    /// Newly created image layer doesn't help to remove the delta layer, until the
+    /// newly created image layer falls off the PITR horizon. So on next GC cycle,
+    /// gc_timeline may still want the new image layer to be created. To avoid redundant
+    /// image layers creation we should check if image layer exists but beyond PITR horizon.
+    /// This is why we need remember GC cutoff LSN.
+    ///
+    wanted_image_layers: Mutex<Option<(Lsn, KeySpace)>>,
+
    last_freeze_at: AtomicLsn,
    // Atomic would be more appropriate here.
    last_freeze_ts: RwLock<Instant>,
@@ -227,9 +237,6 @@ pub struct Timeline {
    state: watch::Sender<TimelineState>,

    eviction_task_timeline_state: tokio::sync::Mutex<EvictionTaskTimelineState>,
-
-    pub(super) delete_self:
-        utils::shared_retryable::SharedRetryable<Result<(), super::InnerDeleteTimelineError>>,
 }

 /// Internal structure to hold all data needed for logical size calculation.
@@ -948,7 +955,7 @@ impl Timeline {

    pub async fn wait_to_become_active(
        &self,
-        _ctx: &RequestContext, /* Prepare for use by cancellation */
+        _ctx: &RequestContext, // Prepare for use by cancellation
    ) -> Result<(), TimelineState> {
        let mut receiver = self.state.subscribe();
        loop {
@@ -1357,6 +1364,7 @@ impl Timeline {
                tenant_id,
                pg_version,
                layers: RwLock::new(LayerMap::default()),
+                wanted_image_layers: Mutex::new(None),

                walredo_mgr,
                walreceiver,
@@ -1424,8 +1432,6 @@ impl Timeline {
                eviction_task_timeline_state: tokio::sync::Mutex::new(
                    EvictionTaskTimelineState::default(),
                ),
-
-                delete_self: utils::shared_retryable::SharedRetryable::default(),
            };
            result.repartition_threshold = result.get_checkpoint_distance() / 10;
            result
@@ -2909,6 +2915,30 @@ impl Timeline {
        let layers = self.layers.read().unwrap();

        let mut max_deltas = 0;
+        {
+            let wanted_image_layers = self.wanted_image_layers.lock().unwrap();
+            if let Some((cutoff_lsn, wanted)) = &*wanted_image_layers {
+                let img_range =
+                    partition.ranges.first().unwrap().start..partition.ranges.last().unwrap().end;
+                if wanted.overlaps(&img_range) {
+                    //
+                    // gc_timeline only pays attention to image layers that are older than the GC cutoff,
+                    // but create_image_layers creates image layers at last-record-lsn.
+                    // So it's possible that gc_timeline wants a new image layer to be created for a key range,
+                    // but the range is already covered by image layers at more recent LSNs. Before we
+                    // create a new image layer, check if the range is already covered at more recent LSNs.
+                    if !layers
+                        .image_layer_exists(&img_range, &(Lsn::min(lsn, *cutoff_lsn)..lsn + 1))?
+                    {
+                        debug!(
+                            "Force generation of layer {}-{} wanted by GC, cutoff={}, lsn={})",
+                            img_range.start, img_range.end, cutoff_lsn, lsn
+                        );
+                        return Ok(true);
+                    }
+                }
+            }
+        }

        for part_range in &partition.ranges {
            let image_coverage = layers.image_coverage(part_range, lsn)?;
@@ -3028,6 +3058,12 @@ impl Timeline {
                image_layers.push(image_layer);
            }
        }
+        // All layers that the GC wanted us to create have now been created.
+        //
+        // It's possible that another GC cycle happened while we were compacting, and added
+        // something new to wanted_image_layers, and we now clear that before processing it.
+        // That's OK, because the next GC iteration will put it back in.
+        *self.wanted_image_layers.lock().unwrap() = None;

        // Sync the new layer to disk before adding it to the layer map, to make sure
        // we don't garbage collect something based on the new layer, before it has
@@ -3437,6 +3473,10 @@ impl Timeline {

        drop(all_keys_iter); // So that deltas_to_compact is no longer borrowed

+        fail_point!("compact-level0-phase1-finish", |_| {
+            Err(anyhow::anyhow!("failpoint compact-level0-phase1-finish").into())
+        });
+
        Ok(CompactLevel0Phase1Result {
            new_layers,
            deltas_to_compact,
@@ -3725,6 +3765,7 @@ impl Timeline {
        }

        let mut layers_to_remove = Vec::new();
+        let mut wanted_image_layers = KeySpaceRandomAccum::default();

        // Scan all layers in the timeline (remote or on-disk).
        //
@@ -3808,6 +3849,15 @@ impl Timeline {
                    "keeping {} because it is the latest layer",
                    l.filename().file_name()
                );
+                // Collect delta key ranges that need image layers to allow garbage
+                // collecting the layers.
+                // It is not so obvious whether we need to propagate information only about
+                // delta layers. Image layers can form "stairs" preventing old image from been deleted.
+                // But image layers are in any case less sparse than delta layers. Also we need some
+                // protection from replacing recent image layers with new one after each GC iteration.
+                if l.is_incremental() && !LayerMap::is_l0(&*l) {
+                    wanted_image_layers.add_range(l.get_key_range());
+                }
                result.layers_not_updated += 1;
                continue 'outer;
            }
@@ -3820,6 +3870,10 @@ impl Timeline {
            );
            layers_to_remove.push(Arc::clone(&l));
        }
+        self.wanted_image_layers
+            .lock()
+            .unwrap()
+            .replace((new_gc_cutoff, wanted_image_layers.to_keyspace()));

        let mut updates = layers.batch_update();
        if !layers_to_remove.is_empty() {
@@ -4421,12 +4475,6 @@ pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {}
 pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {
    use utils::tracing_span_assert;

-    pub static TENANT_ID_EXTRACTOR: once_cell::sync::Lazy<
-        tracing_span_assert::MultiNameExtractor<2>,
-    > = once_cell::sync::Lazy::new(|| {
-        tracing_span_assert::MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"])
-    });
-
    pub static TIMELINE_ID_EXTRACTOR: once_cell::sync::Lazy<
        tracing_span_assert::MultiNameExtractor<2>,
    > = once_cell::sync::Lazy::new(|| {
@@ -4434,7 +4482,7 @@ pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {
    });

    match tracing_span_assert::check_fields_present([
-        &*TENANT_ID_EXTRACTOR,
+        &*super::TENANT_ID_EXTRACTOR,
        &*TIMELINE_ID_EXTRACTOR,
    ]) {
        Ok(()) => (),
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -28,8 +28,8 @@ use storage_broker::proto::SubscribeSafekeeperInfoRequest;
 use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
 use storage_broker::BrokerClientChannel;
 use storage_broker::Streaming;
+use tokio::select;
 use tokio::sync::RwLock;
-use tokio::{select, sync::watch};
 use tracing::*;

 use crate::{exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};
@@ -50,13 +50,13 @@ pub(super) async fn connection_manager_loop_step(
    ctx: &RequestContext,
    manager_status: &RwLock<Option<ConnectionManagerStatus>>,
 ) -> ControlFlow<(), ()> {
-    let mut timeline_state_updates = connection_manager_state
+    match connection_manager_state
        .timeline
-        .subscribe_for_state_updates();
-
-    match wait_for_active_timeline(&mut timeline_state_updates).await {
-        ControlFlow::Continue(()) => {}
-        ControlFlow::Break(()) => {
+        .wait_to_become_active(ctx)
+        .await
+    {
+        Ok(()) => {}
+        Err(_) => {
            info!("Timeline dropped state updates sender before becoming active, stopping wal connection manager loop");
            return ControlFlow::Break(());
        }
@@ -72,6 +72,10 @@ pub(super) async fn connection_manager_loop_step(
        timeline_id: connection_manager_state.timeline.timeline_id,
    };

+    let mut timeline_state_updates = connection_manager_state
+        .timeline
+        .subscribe_for_state_updates();
+
    // Subscribe to the broker updates. Stream shares underlying TCP connection
    // with other streams on this client (other connection managers). When
    // object goes out of scope, stream finishes in drop() automatically.
@@ -195,34 +199,6 @@ pub(super) async fn connection_manager_loop_step(
    }
 }

-async fn wait_for_active_timeline(
-    timeline_state_updates: &mut watch::Receiver<TimelineState>,
-) -> ControlFlow<(), ()> {
-    let current_state = *timeline_state_updates.borrow();
-    if current_state == TimelineState::Active {
-        return ControlFlow::Continue(());
-    }
-
-    loop {
-        match timeline_state_updates.changed().await {
-            Ok(()) => {
-                let new_state = *timeline_state_updates.borrow();
-                match new_state {
-                    TimelineState::Active => {
-                        debug!("Timeline state changed to active, continuing the walreceiver connection manager");
-                        return ControlFlow::Continue(());
-                    }
-                    state => {
-                        debug!("Not running the walreceiver connection manager, timeline is not active: {state:?}");
-                        continue;
-                    }
-                }
-            }
-            Err(_sender_dropped_error) => return ControlFlow::Break(()),
-        }
-    }
-}
-
 /// Endlessly try to subscribe for broker updates for a given timeline.
 async fn subscribe_for_timeline_updates(
    broker_client: &mut BrokerClientChannel,
@@ -1333,9 +1309,8 @@ mod tests {
    async fn dummy_state(harness: &TenantHarness<'_>) -> ConnectionManagerState {
        let (tenant, ctx) = harness.load().await;
        let timeline = tenant
-            .create_empty_timeline(TIMELINE_ID, Lsn(0), crate::DEFAULT_PG_VERSION, &ctx)
+            .create_test_timeline(TIMELINE_ID, Lsn(0), crate::DEFAULT_PG_VERSION, &ctx)
            .expect("Failed to create an empty timeline for dummy wal connection manager");
-        let timeline = timeline.initialize(&ctx).unwrap();

        ConnectionManagerState {
            id: TenantTimelineId {
--- a/pageserver/src/walrecord.rs
+++ b/pageserver/src/walrecord.rs
@@ -379,17 +379,6 @@ impl XlXactParsedRecord {
                });
            }
        }
-        if xinfo & pg_constants::XACT_XINFO_HAS_INVALS != 0 {
-            let nmsgs = buf.get_i32_le();
-            for _i in 0..nmsgs {
-                let sizeof_shared_invalidation_message = 0;
-                buf.advance(sizeof_shared_invalidation_message);
-            }
-        }
-        if xinfo & pg_constants::XACT_XINFO_HAS_TWOPHASE != 0 {
-            xid = buf.get_u32_le();
-            trace!("XLOG_XACT_COMMIT-XACT_XINFO_HAS_TWOPHASE");
-        }

        if xinfo & postgres_ffi::v15::bindings::XACT_XINFO_HAS_DROPPED_STATS != 0 {
            let nitems = buf.get_i32_le();
@@ -397,7 +386,23 @@ impl XlXactParsedRecord {
                "XLOG_XACT_COMMIT-XACT_XINFO_HAS_DROPPED_STAT nitems {}",
                nitems
            );
-            //FIXME: do we need to handle dropped stats here?
+            let sizeof_xl_xact_stats_item = 12;
+            buf.advance((nitems * sizeof_xl_xact_stats_item).try_into().unwrap());
+        }
+
+        if xinfo & pg_constants::XACT_XINFO_HAS_INVALS != 0 {
+            let nmsgs = buf.get_i32_le();
+            let sizeof_shared_invalidation_message = 16;
+            buf.advance(
+                (nmsgs * sizeof_shared_invalidation_message)
+                    .try_into()
+                    .unwrap(),
+            );
+        }
+
+        if xinfo & pg_constants::XACT_XINFO_HAS_TWOPHASE != 0 {
+            xid = buf.get_u32_le();
+            debug!("XLOG_XACT_COMMIT-XACT_XINFO_HAS_TWOPHASE xid {}", xid);
        }

        XlXactParsedRecord {
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.

 [[package]]
 name = "aiohttp"
@@ -79,30 +79,30 @@ sa = ["sqlalchemy[postgresql-psycopg2binary] (>=1.3,<1.5)"]

 [[package]]
 name = "allure-pytest"
-version = "2.13.1"
+version = "2.13.2"
 description = "Allure pytest integration"
 category = "main"
 optional = false
 python-versions = "*"
 files = [
-    {file = "allure-pytest-2.13.1.tar.gz", hash = "sha256:68d69456eeb65af4061ec06a80bc941163b0616e8216554d36b070a6bf070e08"},
-    {file = "allure_pytest-2.13.1-py3-none-any.whl", hash = "sha256:a8de2fc3b3effe2d8f98801646920de3f055b779710f4c806dbee7c613c24633"},
+    {file = "allure-pytest-2.13.2.tar.gz", hash = "sha256:22243159e8ec81ce2b5254b4013802198821b1b42f118f69d4a289396607c7b3"},
+    {file = "allure_pytest-2.13.2-py3-none-any.whl", hash = "sha256:17de9dbee7f61c8e66a5b5e818b00e419dbcea44cb55c24319401ba813220690"},
 ]

 [package.dependencies]
-allure-python-commons = "2.13.1"
+allure-python-commons = "2.13.2"
 pytest = ">=4.5.0"

 [[package]]
 name = "allure-python-commons"
-version = "2.13.1"
+version = "2.13.2"
 description = "Common module for integrate allure with python-based frameworks"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "allure-python-commons-2.13.1.tar.gz", hash = "sha256:3fc13e1da8ebb23f9ab5c9c72ad04595023cdd5078dbb8604939997faebed5cb"},
-    {file = "allure_python_commons-2.13.1-py3-none-any.whl", hash = "sha256:d08e04867bddf44fef55def3d67f4bc25af58a1bf9fcffcf4ec3331f7f2ef0d0"},
+    {file = "allure-python-commons-2.13.2.tar.gz", hash = "sha256:8a03681330231b1deadd86b97ff68841c6591320114ae638570f1ed60d7a2033"},
+    {file = "allure_python_commons-2.13.2-py3-none-any.whl", hash = "sha256:2bb3646ec3fbf5b36d178a5e735002bc130ae9f9ba80f080af97d368ba375051"},
 ]

 [package.dependencies]
@@ -172,17 +172,6 @@ dev = ["Cython (>=0.29.24,<0.30.0)", "Sphinx (>=4.1.2,<4.2.0)", "flake8 (>=5.0.4
 docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"]
 test = ["flake8 (>=5.0.4,<5.1.0)", "uvloop (>=0.15.3)"]

-[[package]]
-name = "atomicwrites"
-version = "1.4.1"
-description = "Atomic file writes."
-category = "main"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
-    {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"},
-]
-
 [[package]]
 name = "attrs"
 version = "21.4.0"
@@ -239,49 +228,49 @@ wrapt = "*"

 [[package]]
 name = "backoff"
-version = "1.11.1"
+version = "2.2.1"
 description = "Function decoration for backoff and retry"
 category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.7,<4.0"
 files = [
-    {file = "backoff-1.11.1-py2.py3-none-any.whl", hash = "sha256:61928f8fa48d52e4faa81875eecf308eccfb1016b018bb6bd21e05b5d90a96c5"},
-    {file = "backoff-1.11.1.tar.gz", hash = "sha256:ccb962a2378418c667b3c979b504fdeb7d9e0d29c0579e3b13b86467177728cb"},
+    {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"},
+    {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
 ]

 [[package]]
 name = "black"
-version = "23.1.0"
+version = "23.3.0"
 description = "The uncompromising code formatter."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "black-23.1.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:b6a92a41ee34b883b359998f0c8e6eb8e99803aa8bf3123bf2b2e6fec505a221"},
-    {file = "black-23.1.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:57c18c5165c1dbe291d5306e53fb3988122890e57bd9b3dcb75f967f13411a26"},
-    {file = "black-23.1.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:9880d7d419bb7e709b37e28deb5e68a49227713b623c72b2b931028ea65f619b"},
-    {file = "black-23.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6663f91b6feca5d06f2ccd49a10f254f9298cc1f7f49c46e498a0771b507104"},
-    {file = "black-23.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9afd3f493666a0cd8f8df9a0200c6359ac53940cbde049dcb1a7eb6ee2dd7074"},
-    {file = "black-23.1.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:bfffba28dc52a58f04492181392ee380e95262af14ee01d4bc7bb1b1c6ca8d27"},
-    {file = "black-23.1.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c1c476bc7b7d021321e7d93dc2cbd78ce103b84d5a4cf97ed535fbc0d6660648"},
-    {file = "black-23.1.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:382998821f58e5c8238d3166c492139573325287820963d2f7de4d518bd76958"},
-    {file = "black-23.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bf649fda611c8550ca9d7592b69f0637218c2369b7744694c5e4902873b2f3a"},
-    {file = "black-23.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:121ca7f10b4a01fd99951234abdbd97728e1240be89fde18480ffac16503d481"},
-    {file = "black-23.1.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:a8471939da5e824b891b25751955be52ee7f8a30a916d570a5ba8e0f2eb2ecad"},
-    {file = "black-23.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8178318cb74f98bc571eef19068f6ab5613b3e59d4f47771582f04e175570ed8"},
-    {file = "black-23.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:a436e7881d33acaf2536c46a454bb964a50eff59b21b51c6ccf5a40601fbef24"},
-    {file = "black-23.1.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:a59db0a2094d2259c554676403fa2fac3473ccf1354c1c63eccf7ae65aac8ab6"},
-    {file = "black-23.1.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:0052dba51dec07ed029ed61b18183942043e00008ec65d5028814afaab9a22fd"},
-    {file = "black-23.1.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:49f7b39e30f326a34b5c9a4213213a6b221d7ae9d58ec70df1c4a307cf2a1580"},
-    {file = "black-23.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:162e37d49e93bd6eb6f1afc3e17a3d23a823042530c37c3c42eeeaf026f38468"},
-    {file = "black-23.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b70eb40a78dfac24842458476135f9b99ab952dd3f2dab738c1881a9b38b753"},
-    {file = "black-23.1.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:a29650759a6a0944e7cca036674655c2f0f63806ddecc45ed40b7b8aa314b651"},
-    {file = "black-23.1.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:bb460c8561c8c1bec7824ecbc3ce085eb50005883a6203dcfb0122e95797ee06"},
-    {file = "black-23.1.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:c91dfc2c2a4e50df0026f88d2215e166616e0c80e86004d0003ece0488db2739"},
-    {file = "black-23.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a951cc83ab535d248c89f300eccbd625e80ab880fbcfb5ac8afb5f01a258ac9"},
-    {file = "black-23.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0680d4380db3719ebcfb2613f34e86c8e6d15ffeabcf8ec59355c5e7b85bb555"},
-    {file = "black-23.1.0-py3-none-any.whl", hash = "sha256:7a0f701d314cfa0896b9001df70a530eb2472babb76086344e688829efd97d32"},
-    {file = "black-23.1.0.tar.gz", hash = "sha256:b0bd97bea8903f5a2ba7219257a44e3f1f9d00073d6cc1add68f0beec69692ac"},
+    {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"},
+    {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"},
+    {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"},
+    {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"},
+    {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"},
+    {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"},
+    {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"},
+    {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"},
+    {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"},
+    {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"},
+    {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"},
+    {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"},
+    {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"},
+    {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"},
+    {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"},
+    {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"},
+    {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"},
+    {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"},
+    {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"},
+    {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"},
+    {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"},
+    {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"},
+    {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"},
+    {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"},
+    {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"},
 ]

 [package.dependencies]
@@ -951,6 +940,21 @@ six = ">=1.9.0"
 gmpy = ["gmpy"]
 gmpy2 = ["gmpy2"]

+[[package]]
+name = "exceptiongroup"
+version = "1.1.1"
+description = "Backport of PEP 654 (exception groups)"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"},
+    {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"},
+]
+
+[package.extras]
+test = ["pytest (>=6)"]
+
 [[package]]
 name = "execnet"
 version = "1.9.0"
@@ -1410,38 +1414,38 @@ files = [

 [[package]]
 name = "mypy"
-version = "1.1.1"
+version = "1.3.0"
 description = "Optional static typing for Python"
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "mypy-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39c7119335be05630611ee798cc982623b9e8f0cff04a0b48dfc26100e0b97af"},
-    {file = "mypy-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:61bf08362e93b6b12fad3eab68c4ea903a077b87c90ac06c11e3d7a09b56b9c1"},
-    {file = "mypy-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbb19c9f662e41e474e0cff502b7064a7edc6764f5262b6cd91d698163196799"},
-    {file = "mypy-1.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:315ac73cc1cce4771c27d426b7ea558fb4e2836f89cb0296cbe056894e3a1f78"},
-    {file = "mypy-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:5cb14ff9919b7df3538590fc4d4c49a0f84392237cbf5f7a816b4161c061829e"},
-    {file = "mypy-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:26cdd6a22b9b40b2fd71881a8a4f34b4d7914c679f154f43385ca878a8297389"},
-    {file = "mypy-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5b5f81b40d94c785f288948c16e1f2da37203c6006546c5d947aab6f90aefef2"},
-    {file = "mypy-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21b437be1c02712a605591e1ed1d858aba681757a1e55fe678a15c2244cd68a5"},
-    {file = "mypy-1.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d809f88734f44a0d44959d795b1e6f64b2bbe0ea4d9cc4776aa588bb4229fc1c"},
-    {file = "mypy-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:a380c041db500e1410bb5b16b3c1c35e61e773a5c3517926b81dfdab7582be54"},
-    {file = "mypy-1.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b7c7b708fe9a871a96626d61912e3f4ddd365bf7f39128362bc50cbd74a634d5"},
-    {file = "mypy-1.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1c10fa12df1232c936830839e2e935d090fc9ee315744ac33b8a32216b93707"},
-    {file = "mypy-1.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0a28a76785bf57655a8ea5eb0540a15b0e781c807b5aa798bd463779988fa1d5"},
-    {file = "mypy-1.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:ef6a01e563ec6a4940784c574d33f6ac1943864634517984471642908b30b6f7"},
-    {file = "mypy-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d64c28e03ce40d5303450f547e07418c64c241669ab20610f273c9e6290b4b0b"},
-    {file = "mypy-1.1.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64cc3afb3e9e71a79d06e3ed24bb508a6d66f782aff7e56f628bf35ba2e0ba51"},
-    {file = "mypy-1.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce61663faf7a8e5ec6f456857bfbcec2901fbdb3ad958b778403f63b9e606a1b"},
-    {file = "mypy-1.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2b0c373d071593deefbcdd87ec8db91ea13bd8f1328d44947e88beae21e8d5e9"},
-    {file = "mypy-1.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:2888ce4fe5aae5a673386fa232473014056967f3904f5abfcf6367b5af1f612a"},
-    {file = "mypy-1.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:19ba15f9627a5723e522d007fe708007bae52b93faab00f95d72f03e1afa9598"},
-    {file = "mypy-1.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:59bbd71e5c58eed2e992ce6523180e03c221dcd92b52f0e792f291d67b15a71c"},
-    {file = "mypy-1.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9401e33814cec6aec8c03a9548e9385e0e228fc1b8b0a37b9ea21038e64cdd8a"},
-    {file = "mypy-1.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4b398d8b1f4fba0e3c6463e02f8ad3346f71956b92287af22c9b12c3ec965a9f"},
-    {file = "mypy-1.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:69b35d1dcb5707382810765ed34da9db47e7f95b3528334a3c999b0c90fe523f"},
-    {file = "mypy-1.1.1-py3-none-any.whl", hash = "sha256:4e4e8b362cdf99ba00c2b218036002bdcdf1e0de085cdb296a49df03fb31dfc4"},
-    {file = "mypy-1.1.1.tar.gz", hash = "sha256:ae9ceae0f5b9059f33dbc62dea087e942c0ccab4b7a003719cb70f9b8abfa32f"},
+    {file = "mypy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eb485cea53f4f5284e5baf92902cd0088b24984f4209e25981cc359d64448d"},
+    {file = "mypy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c99c3ecf223cf2952638da9cd82793d8f3c0c5fa8b6ae2b2d9ed1e1ff51ba85"},
+    {file = "mypy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:550a8b3a19bb6589679a7c3c31f64312e7ff482a816c96e0cecec9ad3a7564dd"},
+    {file = "mypy-1.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cbc07246253b9e3d7d74c9ff948cd0fd7a71afcc2b77c7f0a59c26e9395cb152"},
+    {file = "mypy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:a22435632710a4fcf8acf86cbd0d69f68ac389a3892cb23fbad176d1cddaf228"},
+    {file = "mypy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6e33bb8b2613614a33dff70565f4c803f889ebd2f859466e42b46e1df76018dd"},
+    {file = "mypy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d23370d2a6b7a71dc65d1266f9a34e4cde9e8e21511322415db4b26f46f6b8c"},
+    {file = "mypy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:658fe7b674769a0770d4b26cb4d6f005e88a442fe82446f020be8e5f5efb2fae"},
+    {file = "mypy-1.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d29e324cdda61daaec2336c42512e59c7c375340bd202efa1fe0f7b8f8ca"},
+    {file = "mypy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:d0b6c62206e04061e27009481cb0ec966f7d6172b5b936f3ead3d74f29fe3dcf"},
+    {file = "mypy-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:76ec771e2342f1b558c36d49900dfe81d140361dd0d2df6cd71b3db1be155409"},
+    {file = "mypy-1.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc95f8386314272bbc817026f8ce8f4f0d2ef7ae44f947c4664efac9adec929"},
+    {file = "mypy-1.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:faff86aa10c1aa4a10e1a301de160f3d8fc8703b88c7e98de46b531ff1276a9a"},
+    {file = "mypy-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8c5979d0deb27e0f4479bee18ea0f83732a893e81b78e62e2dda3e7e518c92ee"},
+    {file = "mypy-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c5d2cc54175bab47011b09688b418db71403aefad07cbcd62d44010543fc143f"},
+    {file = "mypy-1.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:87df44954c31d86df96c8bd6e80dfcd773473e877ac6176a8e29898bfb3501cb"},
+    {file = "mypy-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:473117e310febe632ddf10e745a355714e771ffe534f06db40702775056614c4"},
+    {file = "mypy-1.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:74bc9b6e0e79808bf8678d7678b2ae3736ea72d56eede3820bd3849823e7f305"},
+    {file = "mypy-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:44797d031a41516fcf5cbfa652265bb994e53e51994c1bd649ffcd0c3a7eccbf"},
+    {file = "mypy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ddae0f39ca146972ff6bb4399f3b2943884a774b8771ea0a8f50e971f5ea5ba8"},
+    {file = "mypy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c4c42c60a8103ead4c1c060ac3cdd3ff01e18fddce6f1016e08939647a0e703"},
+    {file = "mypy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e86c2c6852f62f8f2b24cb7a613ebe8e0c7dc1402c61d36a609174f63e0ff017"},
+    {file = "mypy-1.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f9dca1e257d4cc129517779226753dbefb4f2266c4eaad610fc15c6a7e14283e"},
+    {file = "mypy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d8d31a7713510685b05fbb18d6ac287a56c8f6554d88c19e73f724a445448a"},
+    {file = "mypy-1.3.0-py3-none-any.whl", hash = "sha256:a8763e72d5d9574d45ce5881962bc8e9046bf7b375b0abf031f3e6811732a897"},
+    {file = "mypy-1.3.0.tar.gz", hash = "sha256:e1f4d16e296f5135624b34e8fb741eb0eadedca90862405b1f1fde2040b9bd11"},
 ]

 [package.dependencies]
@@ -1721,18 +1725,6 @@ files = [
    {file = "psycopg2_binary-2.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:accfe7e982411da3178ec690baaceaad3c278652998b2c45828aaac66cd8285f"},
 ]

-[[package]]
-name = "py"
-version = "1.11.0"
-description = "library with cross-python path, ini-parsing, io, code, log facilities"
-category = "main"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-files = [
-    {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
-    {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
-]
-
 [[package]]
 name = "pyasn1"
 version = "0.4.8"
@@ -1841,57 +1833,56 @@ files = [

 [[package]]
 name = "pytest"
-version = "6.2.5"
+version = "7.3.1"
 description = "pytest: simple powerful testing with Python"
 category = "main"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"},
-    {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"},
+    {file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"},
+    {file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"},
 ]

 [package.dependencies]
-atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
-attrs = ">=19.2.0"
 colorama = {version = "*", markers = "sys_platform == \"win32\""}
+exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
 iniconfig = "*"
 packaging = "*"
 pluggy = ">=0.12,<2.0"
-py = ">=1.8.2"
-toml = "*"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}

 [package.extras]
-testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
+testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]

 [[package]]
 name = "pytest-asyncio"
-version = "0.19.0"
+version = "0.21.0"
 description = "Pytest support for asyncio"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pytest-asyncio-0.19.0.tar.gz", hash = "sha256:ac4ebf3b6207259750bc32f4c1d8fcd7e79739edbc67ad0c58dd150b1d072fed"},
-    {file = "pytest_asyncio-0.19.0-py3-none-any.whl", hash = "sha256:7a97e37cfe1ed296e2e84941384bdd37c376453912d397ed39293e0916f521fa"},
+    {file = "pytest-asyncio-0.21.0.tar.gz", hash = "sha256:2b38a496aef56f56b0e87557ec313e11e1ab9276fc3863f6a7be0f1d0e415e1b"},
+    {file = "pytest_asyncio-0.21.0-py3-none-any.whl", hash = "sha256:f2b3366b7cd501a4056858bd39349d5af19742aed2d81660b7998b6341c7eb9c"},
 ]

 [package.dependencies]
-pytest = ">=6.1.0"
+pytest = ">=7.0.0"

 [package.extras]
+docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"]
 testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"]

 [[package]]
 name = "pytest-httpserver"
-version = "1.0.6"
+version = "1.0.8"
 description = "pytest-httpserver is a httpserver for pytest"
 category = "main"
 optional = false
-python-versions = ">=3.7,<4.0"
+python-versions = ">=3.8,<4.0"
 files = [
-    {file = "pytest_httpserver-1.0.6-py3-none-any.whl", hash = "sha256:ac2379acc91fe8bdbe2911c93af8dd130e33b5899fb9934d15669480739c6d32"},
-    {file = "pytest_httpserver-1.0.6.tar.gz", hash = "sha256:9040d07bf59ac45d8de3db1d4468fd2d1d607975e4da4c872ecc0402cdbf7b3e"},
+    {file = "pytest_httpserver-1.0.8-py3-none-any.whl", hash = "sha256:24cd3d9f6a0b927c7bfc400d0b3fda7442721b8267ce29942bf307b190f0bb09"},
+    {file = "pytest_httpserver-1.0.8.tar.gz", hash = "sha256:e052f69bc8a9073db02484681e8e47004dd1fb3763b0ae833bd899e5895c559a"},
 ]

 [package.dependencies]
@@ -1914,14 +1905,14 @@ pytest = ">=3.2.5"

 [[package]]
 name = "pytest-order"
-version = "1.0.1"
+version = "1.1.0"
 description = "pytest plugin to run your tests in a specific order"
 category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "pytest-order-1.0.1.tar.gz", hash = "sha256:5dd6b929fbd7eaa6d0ee07586f65c623babb0afe72b4843c5f15055d6b3b1b1f"},
-    {file = "pytest_order-1.0.1-py3-none-any.whl", hash = "sha256:bbe6e63a8e23741ab3e810d458d1ea7317e797b70f9550512d77d6e9e8fd1bbb"},
+    {file = "pytest-order-1.1.0.tar.gz", hash = "sha256:139d25b30826b78eebb42722f747eab14c44b88059d7a71d4f79d14a057269a5"},
+    {file = "pytest_order-1.1.0-py3-none-any.whl", hash = "sha256:3b3730969c97900fa5cd31ecff80847680ed56b2490954565c14949ba60d9371"},
 ]

 [package.dependencies]
@@ -1963,14 +1954,14 @@ pytest = ">=5.0.0"

 [[package]]
 name = "pytest-xdist"
-version = "3.0.2"
-description = "pytest xdist plugin for distributed testing and loop-on-failing modes"
+version = "3.3.1"
+description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs"
 category = "main"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "pytest-xdist-3.0.2.tar.gz", hash = "sha256:688da9b814370e891ba5de650c9327d1a9d861721a524eb917e620eec3e90291"},
-    {file = "pytest_xdist-3.0.2-py3-none-any.whl", hash = "sha256:9feb9a18e1790696ea23e1434fa73b325ed4998b0e9fcb221f16fd1945e6df1b"},
+    {file = "pytest-xdist-3.3.1.tar.gz", hash = "sha256:d5ee0520eb1b7bcca50a60a518ab7a7707992812c578198f8b44fdfac78e8c93"},
+    {file = "pytest_xdist-3.3.1-py3-none-any.whl", hash = "sha256:ff9daa7793569e6a68544850fd3927cd257cc03a7ef76c95e86915355e82b5f2"},
 ]

 [package.dependencies]
@@ -2092,21 +2083,21 @@ files = [

 [[package]]
 name = "requests"
-version = "2.28.1"
+version = "2.31.0"
 description = "Python HTTP for Humans."
 category = "main"
 optional = false
-python-versions = ">=3.7, <4"
+python-versions = ">=3.7"
 files = [
-    {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"},
-    {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"},
+    {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
+    {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
 ]

 [package.dependencies]
 certifi = ">=2017.4.17"
-charset-normalizer = ">=2,<3"
+charset-normalizer = ">=2,<4"
 idna = ">=2.5,<4"
-urllib3 = ">=1.21.1,<1.27"
+urllib3 = ">=1.21.1,<3"

 [package.extras]
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
@@ -2148,29 +2139,29 @@ pyasn1 = ">=0.1.3"

 [[package]]
 name = "ruff"
-version = "0.0.255"
+version = "0.0.269"
 description = "An extremely fast Python linter, written in Rust."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.0.255-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:b2d71fb6a7e50501a2473864acffc85dee6b750c25db198f7e71fe1dbbff1aad"},
-    {file = "ruff-0.0.255-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:6c97d746861a6010f941179e84bba9feb8a871815667471d9ed6beb98d45c252"},
-    {file = "ruff-0.0.255-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a7fa60085079b91a298b963361be9b1b1c724582af6c84be954cbabdbd9309a"},
-    {file = "ruff-0.0.255-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c089f7141496334ab5a127b54ce55e41f0d6714e68a4453a1e09d2204cdea8c3"},
-    {file = "ruff-0.0.255-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0423908caa7d437a416b853214565b9c33bbd1106c4f88147982216dddcbbd96"},
-    {file = "ruff-0.0.255-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:981493e92547cacbb8e0874904ec049fe744507ee890dc8736caf89a8864f9a7"},
-    {file = "ruff-0.0.255-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59d5193d2aedb35db180824462b374dbcfc306b2e76076245088afa6e5837df2"},
-    {file = "ruff-0.0.255-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd5e00733c9d160c8a34a22e62b390da9d1e9f326676402421cb8c1236beefc3"},
-    {file = "ruff-0.0.255-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:694418cf41838bd19c6229e4e1b2d04505b1e6b86fe3ab81165484fc96d36f01"},
-    {file = "ruff-0.0.255-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5d0408985c9777369daebb5d3340a99e9f7294bdd7120642239261508185cf89"},
-    {file = "ruff-0.0.255-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:abd6376ef9d12f370d95a8c7c98682fbb9bfedfba59f40e84a816fef8ddcb8de"},
-    {file = "ruff-0.0.255-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f9b1a5df0bc09193cbef58a6f78e4a9a0b058a4f9733c0442866d078006d1bb9"},
-    {file = "ruff-0.0.255-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6a25c5f4ff087445b2e1bbcb9963f2ae7c868d65e4a8d5f84c36c12f71571179"},
-    {file = "ruff-0.0.255-py3-none-win32.whl", hash = "sha256:1ff87a8310354f9f1a099625e54a27fdd6756d9cd2a40b45922f2e943daf982d"},
-    {file = "ruff-0.0.255-py3-none-win_amd64.whl", hash = "sha256:f3d8416be618f023f93ec4fd6ee3048585ef85dba9563b2a7e38fc7e5131d5b1"},
-    {file = "ruff-0.0.255-py3-none-win_arm64.whl", hash = "sha256:8ba124819624145d7b6b53add40c367c44318893215ffc1bfe3d72e0225a1c9c"},
-    {file = "ruff-0.0.255.tar.gz", hash = "sha256:f9eb1d3b2eecbeedae419fa494c4e2a5e4484baf93a1ce0f81eddb005e1919c5"},
+    {file = "ruff-0.0.269-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:3569bcdee679045c09c0161fabc057599759c49219a08d9a4aad2cc3982ccba3"},
+    {file = "ruff-0.0.269-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:56347da63757a56cbce7d4b3d6044ca4f1941cd1bbff3714f7554360c3361f83"},
+    {file = "ruff-0.0.269-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6da8ee25ef2f0cc6cc8e6e20942c1d44d25a36dce35070d7184655bc14f63f63"},
+    {file = "ruff-0.0.269-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd81b8e681b9eaa6cf15484f3985bd8bd97c3d114e95bff3e8ea283bf8865062"},
+    {file = "ruff-0.0.269-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f19f59ca3c28742955241fb452f3346241ddbd34e72ac5cb3d84fadebcf6bc8"},
+    {file = "ruff-0.0.269-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:f062059b8289a4fab7f6064601b811d447c2f9d3d432a17f689efe4d68988450"},
+    {file = "ruff-0.0.269-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f5dc7aac52c58e82510217e3c7efd80765c134c097c2815d59e40face0d1fe6"},
+    {file = "ruff-0.0.269-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e131b4dbe798c391090c6407641d6ab12c0fa1bb952379dde45e5000e208dabb"},
+    {file = "ruff-0.0.269-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a374434e588e06550df0f8dcb74777290f285678de991fda4e1063c367ab2eb2"},
+    {file = "ruff-0.0.269-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:cec2f4b84a14b87f1b121488649eb5b4eaa06467a2387373f750da74bdcb5679"},
+    {file = "ruff-0.0.269-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:374b161753a247904aec7a32d45e165302b76b6e83d22d099bf3ff7c232c888f"},
+    {file = "ruff-0.0.269-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9ca0a1ddb1d835b5f742db9711c6cf59f213a1ad0088cb1e924a005fd399e7d8"},
+    {file = "ruff-0.0.269-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a20658f0b97d207c7841c13d528f36d666bf445b00b01139f28a8ccb80093bb"},
+    {file = "ruff-0.0.269-py3-none-win32.whl", hash = "sha256:03ff42bc91ceca58e0f0f072cb3f9286a9208f609812753474e799a997cdad1a"},
+    {file = "ruff-0.0.269-py3-none-win_amd64.whl", hash = "sha256:f3b59ccff57b21ef0967ea8021fd187ec14c528ec65507d8bcbe035912050776"},
+    {file = "ruff-0.0.269-py3-none-win_arm64.whl", hash = "sha256:bbeb857b1e508a4487bdb02ca1e6d41dd8d5ac5335a5246e25de8a3dff38c1ff"},
+    {file = "ruff-0.0.269.tar.gz", hash = "sha256:11ddcfbab32cf5c420ea9dd5531170ace5a3e59c16d9251c7bd2581f7b16f602"},
 ]

 [[package]]
@@ -2271,7 +2262,7 @@ files = [
 name = "tomli"
 version = "2.0.1"
 description = "A lil' TOML parser"
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2281,42 +2272,54 @@ files = [

 [[package]]
 name = "types-psutil"
-version = "5.9.5.4"
+version = "5.9.5.12"
 description = "Typing stubs for psutil"
 category = "main"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-psutil-5.9.5.4.tar.gz", hash = "sha256:aa09102b80c65a3b4573216614372398dab78972d650488eaff1ff05482cc18f"},
-    {file = "types_psutil-5.9.5.4-py3-none-any.whl", hash = "sha256:28e59764630187e462d43788efa16d59d5e77b510115f9e25901b2d4007fca62"},
+    {file = "types-psutil-5.9.5.12.tar.gz", hash = "sha256:61a91679d3fe737250013b624dca09375e7cc3ad77dcc734553746c429c02aca"},
+    {file = "types_psutil-5.9.5.12-py3-none-any.whl", hash = "sha256:e9a147b8561235c6afcce5aa1adb973fad9ab2c50cf89820697687f53510358f"},
 ]

 [[package]]
 name = "types-psycopg2"
-version = "2.9.18"
+version = "2.9.21.10"
 description = "Typing stubs for psycopg2"
 category = "main"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-psycopg2-2.9.18.tar.gz", hash = "sha256:9b0e9e1f097b15cd9fa8aad2596a9e3082fd72f8d9cfe52b190cfa709105b6c0"},
-    {file = "types_psycopg2-2.9.18-py3-none-any.whl", hash = "sha256:14c779dcab18c31453fa1cad3cf4b1601d33540a344adead3c47a6b8091cd2fa"},
+    {file = "types-psycopg2-2.9.21.10.tar.gz", hash = "sha256:c2600892312ae1c34e12f145749795d93dc4eac3ef7dbf8a9c1bfd45385e80d7"},
+    {file = "types_psycopg2-2.9.21.10-py3-none-any.whl", hash = "sha256:918224a0731a3650832e46633e720703b5beef7693a064e777d9748654fcf5e5"},
+]
+
+[[package]]
+name = "types-pytest-lazy-fixture"
+version = "0.6.3.3"
+description = "Typing stubs for pytest-lazy-fixture"
+category = "main"
+optional = false
+python-versions = "*"
+files = [
+    {file = "types-pytest-lazy-fixture-0.6.3.3.tar.gz", hash = "sha256:2ef79d66bcde0e50acdac8dc55074b9ae0d4cfaeabdd638f5522f4cac7c8a2c7"},
+    {file = "types_pytest_lazy_fixture-0.6.3.3-py3-none-any.whl", hash = "sha256:a56a55649147ff960ff79d4b2c781a4f769351abc1876873f3116d0bd0c96353"},
 ]

 [[package]]
 name = "types-requests"
-version = "2.28.5"
+version = "2.31.0.0"
 description = "Typing stubs for requests"
 category = "main"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-requests-2.28.5.tar.gz", hash = "sha256:ac618bfefcb3742eaf97c961e13e9e5a226e545eda4a3dbe293b898d40933ad1"},
-    {file = "types_requests-2.28.5-py3-none-any.whl", hash = "sha256:98ab647ae88b5e2c41d6d20cfcb5117da1bea561110000b6fdeeea07b3e89877"},
+    {file = "types-requests-2.31.0.0.tar.gz", hash = "sha256:c1c29d20ab8d84dff468d7febfe8e0cb0b4664543221b386605e14672b44ea25"},
+    {file = "types_requests-2.31.0.0-py3-none-any.whl", hash = "sha256:7c5cea7940f8e92ec560bbc468f65bf684aa3dcf0554a6f8c4710f5f708dc598"},
 ]

 [package.dependencies]
-types-urllib3 = "<1.27"
+types-urllib3 = "*"

 [[package]]
 name = "types-s3transfer"
@@ -2332,14 +2335,14 @@ files = [

 [[package]]
 name = "types-toml"
-version = "0.10.8"
+version = "0.10.8.6"
 description = "Typing stubs for toml"
 category = "main"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-toml-0.10.8.tar.gz", hash = "sha256:b7e7ea572308b1030dc86c3ba825c5210814c2825612ec679eb7814f8dd9295a"},
-    {file = "types_toml-0.10.8-py3-none-any.whl", hash = "sha256:8300fd093e5829eb9c1fba69cee38130347d4b74ddf32d0a7df650ae55c2b599"},
+    {file = "types-toml-0.10.8.6.tar.gz", hash = "sha256:6d3ac79e36c9ee593c5d4fb33a50cca0e3adceb6ef5cff8b8e5aef67b4c4aaf2"},
+    {file = "types_toml-0.10.8.6-py3-none-any.whl", hash = "sha256:de7b2bb1831d6f7a4b554671ffe5875e729753496961b3e9b202745e4955dafa"},
 ]

 [[package]]
@@ -2356,14 +2359,14 @@ files = [

 [[package]]
 name = "typing-extensions"
-version = "4.3.0"
+version = "4.6.1"
 description = "Backported and Experimental Type Hints for Python 3.7+"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"},
-    {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"},
+    {file = "typing_extensions-4.6.1-py3-none-any.whl", hash = "sha256:6bac751f4789b135c43228e72de18637e9a6c29d12777023a703fd1a6858469f"},
+    {file = "typing_extensions-4.6.1.tar.gz", hash = "sha256:558bc0c4145f01e6405f4a5fdbd82050bd221b119f4bf72a961a1cfd471349d6"},
 ]

 [[package]]
@@ -2611,4 +2614,4 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "b689ffd6eae32b966f1744b5ac3343fe0dd26b31ee1f50e13daf5045ee0623e1"
+content-hash = "c6c217033f50430c31b0979b74db222e6bab2301abd8b9f0cce5a9d5bccc578f"
--- a/proxy/README.md
+++ b/proxy/README.md
@@ -1,6 +1,6 @@
 # Proxy

-Proxy binary accepts `--auth-backend` CLI option, which determines auth scheme and cluster routing method. Following backends are currently implemented:
+Proxy binary accepts `--auth-backend` CLI option, which determines auth scheme and cluster routing method. Following routing backends are currently implemented:

 * console
  new SCRAM-based console API; uses SNI info to select the destination project (endpoint soon)
@@ -9,6 +9,90 @@ Proxy binary accepts `--auth-backend` CLI option, which determines auth scheme a
 * link
  sends login link for all usernames

+Also proxy can expose following services to the external world:
+
+* postgres protocol over TCP -- usual postgres endpoint compatible with usual
+  postgres drivers
+* postgres protocol over WebSockets -- same protocol tunneled over websockets
+  for environments where TCP connection is not available. We have our own
+  implementation of a client that uses node-postgres and tunnels traffic through
+  websockets: https://github.com/neondatabase/serverless
+* SQL over HTTP -- service that accepts POST requests with SQL text over HTTP
+  and responds with JSON-serialised results.
+
+
+## SQL over HTTP
+
+Contrary to the usual postgres proto over TCP and WebSockets using plain
+one-shot HTTP request achieves smaller amortized latencies in edge setups due to
+fewer round trips and an enhanced open connection reuse by the v8 engine. Also
+such endpoint could be used directly without any driver.
+
+To play with it locally one may start proxy over a local postgres installation
+(see end of this page on how to generate certs with openssl):
+
+```
+./target/debug/proxy -c server.crt -k server.key --auth-backend=postgres --auth-endpoint=postgres://stas@127.0.0.1:5432/stas --wss 0.0.0.0:4444
+```
+
+If both postgres and proxy are running you may send a SQL query:
+```json
+curl -k -X POST 'https://proxy.localtest.me:4444/sql' \
+  -H 'Neon-Connection-String: postgres://stas:pass@proxy.localtest.me:4444/postgres' \
+  -H 'Content-Type: application/json' \
+  --data '{
+    "query":"SELECT $1::int[] as arr, $2::jsonb as obj, 42 as num",
+    "params":[ "{{1,2},{\"3\",4}}", {"key":"val", "ikey":4242}]
+  }' | jq
+
+{
+  "command": "SELECT",
+  "fields": [
+    { "dataTypeID": 1007, "name": "arr" },
+    { "dataTypeID": 3802, "name": "obj" },
+    { "dataTypeID": 23, "name": "num" }
+  ],
+  "rowCount": 1,
+  "rows": [
+    {
+      "arr": [[1,2],[3,4]],
+      "num": 42,
+      "obj": {
+        "ikey": 4242,
+        "key": "val"
+      }
+    }
+  ]
+}
+```
+
+
+With the current approach we made the following design decisions:
+
+1. SQL injection protection: We employed the extended query protocol, modifying
+   the rust-postgres driver to send queries in one roundtrip using a text
+   protocol rather than binary, bypassing potential issues like those identified
+   in sfackler/rust-postgres#1030.
+
+2. Postgres type compatibility: As not all postgres types have binary
+   representations (e.g., acl's in pg_class), we adjusted rust-postgres to
+   respond with text protocol, simplifying serialization and fixing queries with
+   text-only types in response.
+
+3. Data type conversion: Considering JSON supports fewer data types than
+   Postgres, we perform conversions where possible, passing all other types as
+   strings. Key conversions include:
+   - postgres int2, int4, float4, float8 -> json number (NaN and Inf remain
+     text)
+   - postgres bool, null, text -> json bool, null, string
+   - postgres array -> json array
+   - postgres json and jsonb -> json object
+
+4. Alignment with node-postgres: To facilitate integration with js libraries,
+   we've matched the response structure of node-postgres, returning command tags
+   and column oids. Command tag capturing was added to the rust-postgres
+   functionality as part of this change.
+
 ## Using SNI-based routing on localhost

 Now proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so I usually use `*.localtest.me` which resolves to `127.0.0.1`. Now we can create self-signed certificate and play with proxy:
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -139,6 +139,16 @@ async fn auth_quirks(
 }

 impl BackendType<'_, ClientCredentials<'_>> {
+    /// Get compute endpoint name from the credentials.
+    pub fn get_endpoint(&self) -> Option<String> {
+        use BackendType::*;
+
+        match self {
+            Console(_, creds) => creds.project.clone(),
+            Postgres(_, creds) => creds.project.clone(),
+            Link(_) => Some("link".to_owned()),
+        }
+    }
    /// Authenticate the client via the requested backend, possibly using credentials.
    #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
    pub async fn authenticate(
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -100,9 +100,10 @@ impl CertResolver {
        is_default: bool,
    ) -> anyhow::Result<()> {
        let priv_key = {
-            let key_bytes = std::fs::read(key_path).context("TLS key file")?;
-            let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..])
+            let key_bytes = std::fs::read(key_path)
                .context(format!("Failed to read TLS keys at '{key_path}'"))?;
+            let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..])
+                .context(format!("Failed to parse TLS keys at '{key_path}'"))?;

            ensure!(keys.len() == 1, "keys.len() = {} (should be 1)", keys.len());
            keys.pop().map(rustls::PrivateKey).unwrap()
--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -3,6 +3,7 @@
 //! directly relying on deps like `reqwest` (think loose coupling).

 pub mod server;
+pub mod sql_over_http;
 pub mod websocket;

 pub use reqwest::{Request, Response, StatusCode};
--- a/proxy/src/http/sql_over_http.rs
+++ b/proxy/src/http/sql_over_http.rs
@@ -0,0 +1,603 @@
+use futures::pin_mut;
+use futures::StreamExt;
+use hyper::body::HttpBody;
+use hyper::{Body, HeaderMap, Request};
+use pq_proto::StartupMessageParams;
+use serde_json::json;
+use serde_json::Map;
+use serde_json::Value;
+use tokio_postgres::types::Kind;
+use tokio_postgres::types::Type;
+use tokio_postgres::Row;
+use url::Url;
+
+use crate::{auth, config::ProxyConfig, console};
+
+#[derive(serde::Deserialize)]
+struct QueryData {
+    query: String,
+    params: Vec<serde_json::Value>,
+}
+
+const APP_NAME: &str = "sql_over_http";
+const MAX_RESPONSE_SIZE: usize = 1024 * 1024; // 1 MB
+const MAX_REQUEST_SIZE: u64 = 1024 * 1024; // 1 MB
+
+//
+// Convert json non-string types to strings, so that they can be passed to Postgres
+// as parameters.
+//
+fn json_to_pg_text(json: Vec<Value>) -> Result<Vec<String>, serde_json::Error> {
+    json.iter()
+        .map(|value| {
+            match value {
+                Value::Null => serde_json::to_string(value),
+                Value::Bool(_) => serde_json::to_string(value),
+                Value::Number(_) => serde_json::to_string(value),
+                Value::Object(_) => serde_json::to_string(value),
+
+                // no need to escape
+                Value::String(s) => Ok(s.to_string()),
+
+                // special care for arrays
+                Value::Array(_) => json_array_to_pg_array(value),
+            }
+        })
+        .collect()
+}
+
+//
+// Serialize a JSON array to a Postgres array. Contrary to the strings in the params
+// in the array we need to escape the strings. Postgres is okay with arrays of form
+// '{1,"2",3}'::int[], so we don't check that array holds values of the same type, leaving
+// it for Postgres to check.
+//
+// Example of the same escaping in node-postgres: packages/pg/lib/utils.js
+//
+fn json_array_to_pg_array(value: &Value) -> Result<String, serde_json::Error> {
+    match value {
+        // same
+        Value::Null => serde_json::to_string(value),
+        Value::Bool(_) => serde_json::to_string(value),
+        Value::Number(_) => serde_json::to_string(value),
+        Value::Object(_) => serde_json::to_string(value),
+
+        // now needs to be escaped, as it is part of the array
+        Value::String(_) => serde_json::to_string(value),
+
+        // recurse into array
+        Value::Array(arr) => {
+            let vals = arr
+                .iter()
+                .map(json_array_to_pg_array)
+                .collect::<Result<Vec<_>, _>>()?
+                .join(",");
+            Ok(format!("{{{}}}", vals))
+        }
+    }
+}
+
+fn get_conn_info(
+    headers: &HeaderMap,
+    sni_hostname: Option<String>,
+) -> Result<(String, String, String, String), anyhow::Error> {
+    let connection_string = headers
+        .get("Neon-Connection-String")
+        .ok_or(anyhow::anyhow!("missing connection string"))?
+        .to_str()?;
+
+    let connection_url = Url::parse(connection_string)?;
+
+    let protocol = connection_url.scheme();
+    if protocol != "postgres" && protocol != "postgresql" {
+        return Err(anyhow::anyhow!(
+            "connection string must start with postgres: or postgresql:"
+        ));
+    }
+
+    let mut url_path = connection_url
+        .path_segments()
+        .ok_or(anyhow::anyhow!("missing database name"))?;
+
+    let dbname = url_path
+        .next()
+        .ok_or(anyhow::anyhow!("invalid database name"))?;
+
+    let username = connection_url.username();
+    if username.is_empty() {
+        return Err(anyhow::anyhow!("missing username"));
+    }
+
+    let password = connection_url
+        .password()
+        .ok_or(anyhow::anyhow!("no password"))?;
+
+    // TLS certificate selector now based on SNI hostname, so if we are running here
+    // we are sure that SNI hostname is set to one of the configured domain names.
+    let sni_hostname = sni_hostname.ok_or(anyhow::anyhow!("no SNI hostname set"))?;
+
+    let hostname = connection_url
+        .host_str()
+        .ok_or(anyhow::anyhow!("no host"))?;
+
+    let host_header = headers
+        .get("host")
+        .and_then(|h| h.to_str().ok())
+        .and_then(|h| h.split(':').next());
+
+    if hostname != sni_hostname {
+        return Err(anyhow::anyhow!("mismatched SNI hostname and hostname"));
+    } else if let Some(h) = host_header {
+        if h != hostname {
+            return Err(anyhow::anyhow!("mismatched host header and hostname"));
+        }
+    }
+
+    Ok((
+        username.to_owned(),
+        dbname.to_owned(),
+        hostname.to_owned(),
+        password.to_owned(),
+    ))
+}
+
+// TODO: return different http error codes
+pub async fn handle(
+    config: &'static ProxyConfig,
+    request: Request<Body>,
+    sni_hostname: Option<String>,
+) -> anyhow::Result<Value> {
+    //
+    // Determine the destination and connection params
+    //
+    let headers = request.headers();
+    let (username, dbname, hostname, password) = get_conn_info(headers, sni_hostname)?;
+    let credential_params = StartupMessageParams::new([
+        ("user", &username),
+        ("database", &dbname),
+        ("application_name", APP_NAME),
+    ]);
+
+    //
+    // Wake up the destination if needed. Code here is a bit involved because
+    // we reuse the code from the usual proxy and we need to prepare few structures
+    // that this code expects.
+    //
+    let tls = config.tls_config.as_ref();
+    let common_names = tls.and_then(|tls| tls.common_names.clone());
+    let creds = config
+        .auth_backend
+        .as_ref()
+        .map(|_| auth::ClientCredentials::parse(&credential_params, Some(&hostname), common_names))
+        .transpose()?;
+    let extra = console::ConsoleReqExtra {
+        session_id: uuid::Uuid::new_v4(),
+        application_name: Some(APP_NAME),
+    };
+    let node = creds.wake_compute(&extra).await?.expect("msg");
+    let conf = node.value.config;
+    let port = *conf.get_ports().first().expect("no port");
+    let host = match conf.get_hosts().first().expect("no host") {
+        tokio_postgres::config::Host::Tcp(host) => host,
+        tokio_postgres::config::Host::Unix(_) => {
+            return Err(anyhow::anyhow!("unix socket is not supported"));
+        }
+    };
+
+    let request_content_length = match request.body().size_hint().upper() {
+        Some(v) => v,
+        None => MAX_REQUEST_SIZE + 1,
+    };
+
+    if request_content_length > MAX_REQUEST_SIZE {
+        return Err(anyhow::anyhow!(
+            "request is too large (max {MAX_REQUEST_SIZE} bytes)"
+        ));
+    }
+
+    //
+    // Read the query and query params from the request body
+    //
+    let body = hyper::body::to_bytes(request.into_body()).await?;
+    let QueryData { query, params } = serde_json::from_slice(&body)?;
+    let query_params = json_to_pg_text(params)?;
+
+    //
+    // Connenct to the destination
+    //
+    let (client, connection) = tokio_postgres::Config::new()
+        .host(host)
+        .port(port)
+        .user(&username)
+        .password(&password)
+        .dbname(&dbname)
+        .max_backend_message_size(MAX_RESPONSE_SIZE)
+        .connect(tokio_postgres::NoTls)
+        .await?;
+
+    tokio::spawn(async move {
+        if let Err(e) = connection.await {
+            eprintln!("connection error: {}", e);
+        }
+    });
+
+    //
+    // Now execute the query and return the result
+    //
+    let row_stream = client.query_raw_txt(query, query_params).await?;
+
+    // Manually drain the stream into a vector to leave row_stream hanging
+    // around to get a command tag. Also check that the response is not too
+    // big.
+    pin_mut!(row_stream);
+    let mut rows: Vec<tokio_postgres::Row> = Vec::new();
+    let mut curret_size = 0;
+    while let Some(row) = row_stream.next().await {
+        let row = row?;
+        curret_size += row.body_len();
+        rows.push(row);
+        if curret_size > MAX_RESPONSE_SIZE {
+            return Err(anyhow::anyhow!("response too large"));
+        }
+    }
+
+    // grab the command tag and number of rows affected
+    let command_tag = row_stream.command_tag().unwrap_or_default();
+    let mut command_tag_split = command_tag.split(' ');
+    let command_tag_name = command_tag_split.next().unwrap_or_default();
+    let command_tag_count = if command_tag_name == "INSERT" {
+        // INSERT returns OID first and then number of rows
+        command_tag_split.nth(1)
+    } else {
+        // other commands return number of rows (if any)
+        command_tag_split.next()
+    }
+    .and_then(|s| s.parse::<i64>().ok());
+
+    let fields = if !rows.is_empty() {
+        rows[0]
+            .columns()
+            .iter()
+            .map(|c| {
+                json!({
+                    "name": Value::String(c.name().to_owned()),
+                    "dataTypeID": Value::Number(c.type_().oid().into()),
+                })
+            })
+            .collect::<Vec<_>>()
+    } else {
+        Vec::new()
+    };
+
+    // convert rows to JSON
+    let rows = rows
+        .iter()
+        .map(pg_text_row_to_json)
+        .collect::<Result<Vec<_>, _>>()?;
+
+    // resulting JSON format is based on the format of node-postgres result
+    Ok(json!({
+        "command": command_tag_name,
+        "rowCount": command_tag_count,
+        "rows": rows,
+        "fields": fields,
+    }))
+}
+
+//
+// Convert postgres row with text-encoded values to JSON object
+//
+pub fn pg_text_row_to_json(row: &Row) -> Result<Value, anyhow::Error> {
+    let res = row
+        .columns()
+        .iter()
+        .enumerate()
+        .map(|(i, column)| {
+            let name = column.name();
+            let pg_value = row.as_text(i)?;
+            let json_value = pg_text_to_json(pg_value, column.type_())?;
+            Ok((name.to_string(), json_value))
+        })
+        .collect::<Result<Map<String, Value>, anyhow::Error>>()?;
+
+    Ok(Value::Object(res))
+}
+
+//
+// Convert postgres text-encoded value to JSON value
+//
+pub fn pg_text_to_json(pg_value: Option<&str>, pg_type: &Type) -> Result<Value, anyhow::Error> {
+    if let Some(val) = pg_value {
+        if val == "NULL" {
+            return Ok(Value::Null);
+        }
+
+        if let Kind::Array(elem_type) = pg_type.kind() {
+            return pg_array_parse(val, elem_type);
+        }
+
+        match *pg_type {
+            Type::BOOL => Ok(Value::Bool(val == "t")),
+            Type::INT2 | Type::INT4 => {
+                let val = val.parse::<i32>()?;
+                Ok(Value::Number(serde_json::Number::from(val)))
+            }
+            Type::FLOAT4 | Type::FLOAT8 => {
+                let fval = val.parse::<f64>()?;
+                let num = serde_json::Number::from_f64(fval);
+                if let Some(num) = num {
+                    Ok(Value::Number(num))
+                } else {
+                    // Pass Nan, Inf, -Inf as strings
+                    // JS JSON.stringify() does converts them to null, but we
+                    // want to preserve them, so we pass them as strings
+                    Ok(Value::String(val.to_string()))
+                }
+            }
+            Type::JSON | Type::JSONB => Ok(serde_json::from_str(val)?),
+            _ => Ok(Value::String(val.to_string())),
+        }
+    } else {
+        Ok(Value::Null)
+    }
+}
+
+//
+// Parse postgres array into JSON array.
+//
+// This is a bit involved because we need to handle nested arrays and quoted
+// values. Unlike postgres we don't check that all nested arrays have the same
+// dimensions, we just return them as is.
+//
+fn pg_array_parse(pg_array: &str, elem_type: &Type) -> Result<Value, anyhow::Error> {
+    _pg_array_parse(pg_array, elem_type, false).map(|(v, _)| v)
+}
+
+fn _pg_array_parse(
+    pg_array: &str,
+    elem_type: &Type,
+    nested: bool,
+) -> Result<(Value, usize), anyhow::Error> {
+    let mut pg_array_chr = pg_array.char_indices();
+    let mut level = 0;
+    let mut quote = false;
+    let mut entries: Vec<Value> = Vec::new();
+    let mut entry = String::new();
+
+    // skip bounds decoration
+    if let Some('[') = pg_array.chars().next() {
+        for (_, c) in pg_array_chr.by_ref() {
+            if c == '=' {
+                break;
+            }
+        }
+    }
+
+    while let Some((mut i, mut c)) = pg_array_chr.next() {
+        let mut escaped = false;
+
+        if c == '\\' {
+            escaped = true;
+            (i, c) = pg_array_chr.next().unwrap();
+        }
+
+        match c {
+            '{' if !quote => {
+                level += 1;
+                if level > 1 {
+                    let (res, off) = _pg_array_parse(&pg_array[i..], elem_type, true)?;
+                    entries.push(res);
+                    for _ in 0..off - 1 {
+                        pg_array_chr.next();
+                    }
+                }
+            }
+            '}' => {
+                level -= 1;
+                if level == 0 {
+                    if !entry.is_empty() {
+                        entries.push(pg_text_to_json(Some(&entry), elem_type)?);
+                    }
+                    if nested {
+                        return Ok((Value::Array(entries), i));
+                    }
+                }
+            }
+            '"' if !escaped => {
+                if quote {
+                    // push even if empty
+                    entries.push(pg_text_to_json(Some(&entry), elem_type)?);
+                    entry = String::new();
+                }
+                quote = !quote;
+            }
+            ',' if !quote => {
+                if !entry.is_empty() {
+                    entries.push(pg_text_to_json(Some(&entry), elem_type)?);
+                    entry = String::new();
+                }
+            }
+            _ => {
+                entry.push(c);
+            }
+        }
+    }
+
+    if level != 0 {
+        return Err(anyhow::anyhow!("unbalanced array"));
+    }
+
+    Ok((Value::Array(entries), 0))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn test_atomic_types_to_pg_params() {
+        let json = vec![Value::Bool(true), Value::Bool(false)];
+        let pg_params = json_to_pg_text(json).unwrap();
+        assert_eq!(pg_params, vec!["true", "false"]);
+
+        let json = vec![Value::Number(serde_json::Number::from(42))];
+        let pg_params = json_to_pg_text(json).unwrap();
+        assert_eq!(pg_params, vec!["42"]);
+
+        let json = vec![Value::String("foo\"".to_string())];
+        let pg_params = json_to_pg_text(json).unwrap();
+        assert_eq!(pg_params, vec!["foo\""]);
+
+        let json = vec![Value::Null];
+        let pg_params = json_to_pg_text(json).unwrap();
+        assert_eq!(pg_params, vec!["null"]);
+    }
+
+    #[test]
+    fn test_json_array_to_pg_array() {
+        // atoms and escaping
+        let json = "[true, false, null, 42, \"foo\", \"bar\\\"-\\\\\"]";
+        let json: Value = serde_json::from_str(json).unwrap();
+        let pg_params = json_to_pg_text(vec![json]).unwrap();
+        assert_eq!(
+            pg_params,
+            vec!["{true,false,null,42,\"foo\",\"bar\\\"-\\\\\"}"]
+        );
+
+        // nested arrays
+        let json = "[[true, false], [null, 42], [\"foo\", \"bar\\\"-\\\\\"]]";
+        let json: Value = serde_json::from_str(json).unwrap();
+        let pg_params = json_to_pg_text(vec![json]).unwrap();
+        assert_eq!(
+            pg_params,
+            vec!["{{true,false},{null,42},{\"foo\",\"bar\\\"-\\\\\"}}"]
+        );
+    }
+
+    #[test]
+    fn test_atomic_types_parse() {
+        assert_eq!(
+            pg_text_to_json(Some("foo"), &Type::TEXT).unwrap(),
+            json!("foo")
+        );
+        assert_eq!(pg_text_to_json(None, &Type::TEXT).unwrap(), json!(null));
+        assert_eq!(pg_text_to_json(Some("42"), &Type::INT4).unwrap(), json!(42));
+        assert_eq!(pg_text_to_json(Some("42"), &Type::INT2).unwrap(), json!(42));
+        assert_eq!(
+            pg_text_to_json(Some("42"), &Type::INT8).unwrap(),
+            json!("42")
+        );
+        assert_eq!(
+            pg_text_to_json(Some("42.42"), &Type::FLOAT8).unwrap(),
+            json!(42.42)
+        );
+        assert_eq!(
+            pg_text_to_json(Some("42.42"), &Type::FLOAT4).unwrap(),
+            json!(42.42)
+        );
+        assert_eq!(
+            pg_text_to_json(Some("NaN"), &Type::FLOAT4).unwrap(),
+            json!("NaN")
+        );
+        assert_eq!(
+            pg_text_to_json(Some("Infinity"), &Type::FLOAT4).unwrap(),
+            json!("Infinity")
+        );
+        assert_eq!(
+            pg_text_to_json(Some("-Infinity"), &Type::FLOAT4).unwrap(),
+            json!("-Infinity")
+        );
+
+        let json: Value =
+            serde_json::from_str("{\"s\":\"str\",\"n\":42,\"f\":4.2,\"a\":[null,3,\"a\"]}")
+                .unwrap();
+        assert_eq!(
+            pg_text_to_json(
+                Some(r#"{"s":"str","n":42,"f":4.2,"a":[null,3,"a"]}"#),
+                &Type::JSONB
+            )
+            .unwrap(),
+            json
+        );
+    }
+
+    #[test]
+    fn test_pg_array_parse_text() {
+        fn pt(pg_arr: &str) -> Value {
+            pg_array_parse(pg_arr, &Type::TEXT).unwrap()
+        }
+        assert_eq!(
+            pt(r#"{"aa\"\\\,a",cha,"bbbb"}"#),
+            json!(["aa\"\\,a", "cha", "bbbb"])
+        );
+        assert_eq!(
+            pt(r#"{{"foo","bar"},{"bee","bop"}}"#),
+            json!([["foo", "bar"], ["bee", "bop"]])
+        );
+        assert_eq!(
+            pt(r#"{{{{"foo",NULL,"bop",bup}}}}"#),
+            json!([[[["foo", null, "bop", "bup"]]]])
+        );
+        assert_eq!(
+            pt(r#"{{"1",2,3},{4,NULL,6},{NULL,NULL,NULL}}"#),
+            json!([["1", "2", "3"], ["4", null, "6"], [null, null, null]])
+        );
+    }
+
+    #[test]
+    fn test_pg_array_parse_bool() {
+        fn pb(pg_arr: &str) -> Value {
+            pg_array_parse(pg_arr, &Type::BOOL).unwrap()
+        }
+        assert_eq!(pb(r#"{t,f,t}"#), json!([true, false, true]));
+        assert_eq!(pb(r#"{{t,f,t}}"#), json!([[true, false, true]]));
+        assert_eq!(
+            pb(r#"{{t,f},{f,t}}"#),
+            json!([[true, false], [false, true]])
+        );
+        assert_eq!(
+            pb(r#"{{t,NULL},{NULL,f}}"#),
+            json!([[true, null], [null, false]])
+        );
+    }
+
+    #[test]
+    fn test_pg_array_parse_numbers() {
+        fn pn(pg_arr: &str, ty: &Type) -> Value {
+            pg_array_parse(pg_arr, ty).unwrap()
+        }
+        assert_eq!(pn(r#"{1,2,3}"#, &Type::INT4), json!([1, 2, 3]));
+        assert_eq!(pn(r#"{1,2,3}"#, &Type::INT2), json!([1, 2, 3]));
+        assert_eq!(pn(r#"{1,2,3}"#, &Type::INT8), json!(["1", "2", "3"]));
+        assert_eq!(pn(r#"{1,2,3}"#, &Type::FLOAT4), json!([1.0, 2.0, 3.0]));
+        assert_eq!(pn(r#"{1,2,3}"#, &Type::FLOAT8), json!([1.0, 2.0, 3.0]));
+        assert_eq!(
+            pn(r#"{1.1,2.2,3.3}"#, &Type::FLOAT4),
+            json!([1.1, 2.2, 3.3])
+        );
+        assert_eq!(
+            pn(r#"{1.1,2.2,3.3}"#, &Type::FLOAT8),
+            json!([1.1, 2.2, 3.3])
+        );
+        assert_eq!(
+            pn(r#"{NaN,Infinity,-Infinity}"#, &Type::FLOAT4),
+            json!(["NaN", "Infinity", "-Infinity"])
+        );
+        assert_eq!(
+            pn(r#"{NaN,Infinity,-Infinity}"#, &Type::FLOAT8),
+            json!(["NaN", "Infinity", "-Infinity"])
+        );
+    }
+
+    #[test]
+    fn test_pg_array_with_decoration() {
+        fn p(pg_arr: &str) -> Value {
+            pg_array_parse(pg_arr, &Type::INT2).unwrap()
+        }
+        assert_eq!(
+            p(r#"[1:1][-2:-1][3:5]={{{1,2,3},{4,5,6}}}"#),
+            json!([[[1, 2, 3], [4, 5, 6]]])
+        );
+    }
+}
--- a/proxy/src/http/websocket.rs
+++ b/proxy/src/http/websocket.rs
@@ -4,12 +4,17 @@ use crate::{
 use bytes::{Buf, Bytes};
 use futures::{Sink, Stream, StreamExt};
 use hyper::{
-    server::{accept, conn::AddrIncoming},
+    server::{
+        accept,
+        conn::{AddrIncoming, AddrStream},
+    },
    upgrade::Upgraded,
-    Body, Request, Response, StatusCode,
+    Body, Method, Request, Response, StatusCode,
 };
 use hyper_tungstenite::{tungstenite::Message, HyperWebsocket, WebSocketStream};
 use pin_project_lite::pin_project;
+use serde_json::{json, Value};
+
 use std::{
    convert::Infallible,
    future::ready,
@@ -21,6 +26,7 @@ use tls_listener::TlsListener;
 use tokio::{
    io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf},
    net::TcpListener,
+    select,
 };
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info, info_span, warn, Instrument};
@@ -30,6 +36,8 @@ use utils::http::{error::ApiError, json::json_response};
 // Tracking issue: https://github.com/rust-lang/rust/issues/98407.
 use sync_wrapper::SyncWrapper;

+use super::sql_over_http;
+
 pin_project! {
    /// This is a wrapper around a [`WebSocketStream`] that
    /// implements [`AsyncRead`] and [`AsyncWrite`].
@@ -159,6 +167,7 @@ async fn ws_handler(
    config: &'static ProxyConfig,
    cancel_map: Arc<CancelMap>,
    session_id: uuid::Uuid,
+    sni_hostname: Option<String>,
 ) -> Result<Response<Body>, ApiError> {
    let host = request
        .headers()
@@ -181,8 +190,44 @@ async fn ws_handler(

        // Return the response so the spawned future can continue.
        Ok(response)
+    // TODO: that deserves a refactor as now this function also handles http json client besides websockets.
+    // Right now I don't want to blow up sql-over-http patch with file renames and do that as a follow up instead.
+    } else if request.uri().path() == "/sql" && request.method() == Method::POST {
+        let result = select! {
+            _ = tokio::time::sleep(std::time::Duration::from_secs(10)) => {
+                Err(anyhow::anyhow!("Query timed out"))
+            }
+            response = sql_over_http::handle(config, request, sni_hostname) => {
+                response
+            }
+        };
+        let status_code = match result {
+            Ok(_) => StatusCode::OK,
+            Err(_) => StatusCode::BAD_REQUEST,
+        };
+        let json = match result {
+            Ok(r) => r,
+            Err(e) => {
+                let message = format!("{:?}", e);
+                let code = match e.downcast_ref::<tokio_postgres::Error>() {
+                    Some(e) => match e.code() {
+                        Some(e) => serde_json::to_value(e.code()).unwrap(),
+                        None => Value::Null,
+                    },
+                    None => Value::Null,
+                };
+                json!({ "message": message, "code": code })
+            }
+        };
+        json_response(status_code, json).map(|mut r| {
+            r.headers_mut().insert(
+                "Access-Control-Allow-Origin",
+                hyper::http::HeaderValue::from_static("*"),
+            );
+            r
+        })
    } else {
-        json_response(StatusCode::OK, "Connect with a websocket client")
+        json_response(StatusCode::BAD_REQUEST, "query is not supported")
    }
 }

@@ -216,20 +261,27 @@ pub async fn task_main(
        }
    });

-    let make_svc = hyper::service::make_service_fn(|_stream| async move {
-        Ok::<_, Infallible>(hyper::service::service_fn(
-            move |req: Request<Body>| async move {
-                let cancel_map = Arc::new(CancelMap::default());
-                let session_id = uuid::Uuid::new_v4();
-                ws_handler(req, config, cancel_map, session_id)
-                    .instrument(info_span!(
-                        "ws-client",
-                        session = format_args!("{session_id}")
-                    ))
-                    .await
-            },
-        ))
-    });
+    let make_svc =
+        hyper::service::make_service_fn(|stream: &tokio_rustls::server::TlsStream<AddrStream>| {
+            let sni_name = stream.get_ref().1.sni_hostname().map(|s| s.to_string());
+
+            async move {
+                Ok::<_, Infallible>(hyper::service::service_fn(move |req: Request<Body>| {
+                    let sni_name = sni_name.clone();
+                    async move {
+                        let cancel_map = Arc::new(CancelMap::default());
+                        let session_id = uuid::Uuid::new_v4();
+
+                        ws_handler(req, config, cancel_map, session_id, sni_name)
+                            .instrument(info_span!(
+                                "ws-client",
+                                session = format_args!("{session_id}")
+                            ))
+                            .await
+                    }
+                }))
+            }
+        });

    hyper::Server::builder(accept::from_stream(tls_listener))
        .serve(make_svc)
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -455,6 +455,9 @@ impl<'a, S> Client<'a, S> {

 impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
    /// Let the client authenticate and connect to the designated compute node.
+    // Instrumentation logs endpoint name everywhere. Doesn't work for link
+    // auth; strictly speaking we don't know endpoint name in its case.
+    #[tracing::instrument(name = "", fields(ep = self.creds.get_endpoint().unwrap_or("".to_owned())), skip_all)]
    async fn connect_to_db(
        self,
        session: cancellation::Session<'_>,
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,40 +6,41 @@ authors = []

 [tool.poetry.dependencies]
 python = "^3.9"
-pytest = "^6.2.5"
+pytest = "^7.3.1"
 psycopg2-binary = "^2.9.1"
-typing-extensions = "^4.1.0"
+typing-extensions = "^4.6.1"
 PyJWT = {version = "^2.1.0", extras = ["crypto"]}
-requests = "^2.26.0"
-pytest-xdist = "^3.0.2"
+requests = "^2.31.0"
+pytest-xdist = "^3.3.1"
 asyncpg = "^0.27.0"
 aiopg = "^1.3.1"
 Jinja2 = "^3.0.2"
-types-requests = "^2.28.5"
-types-psycopg2 = "^2.9.18"
+types-requests = "^2.31.0.0"
+types-psycopg2 = "^2.9.21.10"
 boto3 = "^1.26.16"
 boto3-stubs = {extras = ["s3"], version = "^1.26.16"}
 moto = {extras = ["server"], version = "^4.1.2"}
-backoff = "^1.11.1"
+backoff = "^2.2.1"
 pytest-lazy-fixture = "^0.6.3"
 prometheus-client = "^0.14.1"
 pytest-timeout = "^2.1.0"
 Werkzeug = "^2.2.3"
-pytest-order = "^1.0.1"
-allure-pytest = "^2.13.1"
-pytest-asyncio = "^0.19.0"
+pytest-order = "^1.1.0"
+allure-pytest = "^2.13.2"
+pytest-asyncio = "^0.21.0"
 toml = "^0.10.2"
 psutil = "^5.9.4"
-types-psutil = "^5.9.5.4"
-types-toml = "^0.10.8"
-pytest-httpserver = "^1.0.6"
+types-psutil = "^5.9.5.12"
+types-toml = "^0.10.8.6"
+pytest-httpserver = "^1.0.8"
 aiohttp = "3.7.4"
 pytest-rerunfailures = "^11.1.2"
+types-pytest-lazy-fixture = "^0.6.3.3"

 [tool.poetry.group.dev.dependencies]
-black = "^23.1.0"
-mypy = "==1.1.1"
-ruff = "^0.0.255"
+black = "^23.3.0"
+mypy = "==1.3.0"
+ruff = "^0.0.269"

 [build-system]
 requires = ["poetry-core>=1.0.0"]
--- a/scripts/export_import_between_pageservers.py
+++ b/scripts/export_import_between_pageservers.py
@@ -162,7 +162,7 @@ class PgProtocol:
        Returns psycopg2's connection object.
        This method passes all extra params to connstr.
        """
-        conn = psycopg2.connect(**self.conn_options(**kwargs))
+        conn: PgConnection = psycopg2.connect(**self.conn_options(**kwargs))

        # WARNING: this setting affects *all* tests!
        conn.autocommit = autocommit
--- a/scripts/ingest_perf_test_result.py
+++ b/scripts/ingest_perf_test_result.py
@@ -35,7 +35,7 @@ def get_connection_cursor():
    connstr = os.getenv("DATABASE_URL")
    if not connstr:
        err("DATABASE_URL environment variable is not set")
-    with psycopg2.connect(connstr) as conn:
+    with psycopg2.connect(connstr, connect_timeout=30) as conn:
        with conn.cursor() as cur:
            yield cur

--- a/scripts/pr-comment-test-report.js
+++ b/scripts/pr-comment-test-report.js
@@ -36,11 +36,9 @@ module.exports = async ({ github, context, fetch, report }) => {
    // Marker to find the comment in the subsequent runs
    const startMarker = `<!--AUTOMATIC COMMENT START #${context.payload.number}-->`
    // Let users know that the comment is updated automatically
-    const autoupdateNotice = `<div align="right"><sub>The comment gets automatically updated with the latest test results :recycle:</sub></div>`
+    const autoupdateNotice = `<div align="right"><sub>The comment gets automatically updated with the latest test results<br>${context.payload.pull_request.head.sha} at ${new Date().toISOString()} :recycle:</sub></div>`
    // GitHub bot id taken from (https://api.github.com/users/github-actions[bot])
    const githubActionsBotId = 41898282
-    // The latest commit in the PR URL
-    const commitUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/pull/${context.payload.number}/commits/${context.payload.pull_request.head.sha}`
    // Commend body itself
    let commentBody = `${startMarker}\n`

@@ -74,7 +72,6 @@ module.exports = async ({ github, context, fetch, report }) => {
    let flakyTestsCount = 0

    const pgVersions = new Set()
-    const buildTypes = new Set()

    for (const parentSuite of suites.children) {
        for (const suite of parentSuite.children) {
@@ -92,28 +89,29 @@ module.exports = async ({ github, context, fetch, report }) => {
                }

                pgVersions.add(pgVersion)
-                buildTypes.add(buildType)

                // Removing build type and PostgreSQL version from the test name to make it shorter
                const testName = test.name.replace(new RegExp(`${buildType}-pg${pgVersion}-?`), "").replace("[]", "")
                test.pytestName = `${parentSuite.name.replace(".", "/")}/${suite.name}.py::${testName}`
+                test.pgVersion = pgVersion
+                test.buildType = buildType

                if (test.status === "passed") {
-                    passedTests[pgVersion][buildType].push(test)
+                    passedTests[pgVersion][testName].push(test)
                    passedTestsCount += 1
                } else if (test.status === "failed" || test.status === "broken") {
-                    failedTests[pgVersion][buildType].push(test)
+                    failedTests[pgVersion][testName].push(test)
                    failedTestsCount += 1
                } else if (test.status === "skipped") {
-                    skippedTests[pgVersion][buildType].push(test)
+                    skippedTests[pgVersion][testName].push(test)
                    skippedTestsCount += 1
                }

                if (test.retriesCount > 0) {
-                    retriedTests[pgVersion][buildType].push(test)
+                    retriedTests[pgVersion][testName].push(test)

                    if (test.retriesStatusChange) {
-                        flakyTests[pgVersion][buildType].push(test)
+                        flakyTests[pgVersion][testName].push(test)
                        flakyTestsCount += 1
                    }
                }
@@ -122,39 +120,44 @@ module.exports = async ({ github, context, fetch, report }) => {
    }

    const totalTestsCount = failedTestsCount + passedTestsCount + skippedTestsCount
-    commentBody += `### ${totalTestsCount} tests run: ${passedTestsCount} passed, ${failedTestsCount} failed, ${skippedTestsCount} skipped ([full report](${reportUrl}) for ${commitUrl})\n___\n`
+    commentBody += `### ${totalTestsCount} tests run: ${passedTestsCount} passed, ${failedTestsCount} failed, ${skippedTestsCount} skipped ([full report](${reportUrl}))\n___\n`

-    // Print test resuls from the newest to the oldest PostgreSQL version for release and debug builds.
+    // Print test resuls from the newest to the oldest Postgres version for release and debug builds.
    for (const pgVersion of Array.from(pgVersions).sort().reverse()) {
-        for (const buildType of Array.from(buildTypes).sort().reverse()) {
-            if (failedTests[pgVersion][buildType].length > 0) {
-                commentBody += `#### PostgreSQL ${pgVersion} (${buildType} build)\n\n`
-                commentBody += `Failed tests:\n`
-                for (const test of failedTests[pgVersion][buildType]) {
+        if (Object.keys(failedTests[pgVersion]).length > 0) {
+            commentBody += `#### Failures on Posgres ${pgVersion}\n\n`
+            for (const [testName, tests] of Object.entries(failedTests[pgVersion])) {
+                const links = []
+                for (const test of tests) {
                    const allureLink = `${reportUrl}#suites/${test.parentUid}/${test.uid}`
-
-                    commentBody += `- [\`${test.pytestName}\`](${allureLink})`
-                    if (test.retriesCount > 0) {
-                        commentBody += ` (ran [${test.retriesCount + 1} times](${allureLink}/retries))`
-                    }
-                    commentBody += "\n"
+                    links.push(`[${test.buildType}](${allureLink})`)
                }
-                commentBody += "\n"
+                commentBody += `- \`${testName}\`: ${links.join(", ")}\n`
            }
+
+            const testsToRerun = Object.values(failedTests[pgVersion]).map(x => x[0].name)
+            const command = `DEFAULT_PG_VERSION=${pgVersion} scripts/pytest -k "${testsToRerun.join(" or ")}"`
+
+            commentBody += "```\n"
+            commentBody += `# Run failed on Postgres ${pgVersion} tests locally:\n`
+            commentBody += `${command}\n`
+            commentBody += "```\n"
        }
    }

    if (flakyTestsCount > 0) {
-        commentBody += "<details>\n<summary>Flaky tests</summary>\n\n"
+        commentBody += `<details>\n<summary>Flaky tests (${flakyTestsCount})</summary>\n\n`
        for (const pgVersion of Array.from(pgVersions).sort().reverse()) {
-            for (const buildType of Array.from(buildTypes).sort().reverse()) {
-                if (flakyTests[pgVersion][buildType].length > 0) {
-                    commentBody += `#### PostgreSQL ${pgVersion} (${buildType} build)\n\n`
-                    for (const test of flakyTests[pgVersion][buildType]) {
+            if (Object.keys(flakyTests[pgVersion]).length > 0) {
+                commentBody += `#### Postgres ${pgVersion}\n\n`
+                for (const [testName, tests] of Object.entries(flakyTests[pgVersion])) {
+                    const links = []
+                    for (const test of tests) {
+                        const allureLink = `${reportUrl}#suites/${test.parentUid}/${test.uid}/retries`
                        const status = test.status === "passed" ? ":white_check_mark:" : ":x:"
-                        commentBody += `- ${status} [\`${test.pytestName}\`](${reportUrl}#suites/${test.parentUid}/${test.uid}/retries)\n`
+                        links.push(`[${status} ${test.buildType}](${allureLink})`)
                    }
-                    commentBody += "\n"
+                    commentBody += `- \`${testName}\`: ${links.join(", ")}\n`
                }
            }
        }
--- a/test_runner/fixtures/compare_fixtures.py
+++ b/test_runner/fixtures/compare_fixtures.py
@@ -312,6 +312,6 @@ def neon_with_baseline(request: FixtureRequest) -> PgCompare:
    implementation-specific logic is widely useful across multiple tests, it might
    make sense to add methods to the PgCompare class.
    """
-    fixture = request.getfixturevalue(request.param)  # type: ignore
+    fixture = request.getfixturevalue(request.param)
    assert isinstance(fixture, PgCompare), f"test error: fixture {fixture} is not PgCompare"
    return fixture
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -26,7 +26,7 @@ from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union, cast
 from urllib.parse import urlparse

 import asyncpg
-import backoff  # type: ignore
+import backoff
 import boto3
 import jwt
 import psycopg2
@@ -149,7 +149,7 @@ def top_output_dir(base_dir: Path) -> Iterator[Path]:

@pytest.fixture(scope="session")
 def versioned_pg_distrib_dir(pg_distrib_dir: Path, pg_version: PgVersion) -> Iterator[Path]:
-    versioned_dir = pg_distrib_dir / f"v{pg_version}"
+    versioned_dir = pg_distrib_dir / pg_version.v_prefixed

    psql_bin_path = versioned_dir / "bin/psql"
    postgres_bin_path = versioned_dir / "bin/postgres"
@@ -354,7 +354,7 @@ class PgProtocol:
        Returns psycopg2's connection object.
        This method passes all extra params to connstr.
        """
-        conn = psycopg2.connect(**self.conn_options(**kwargs))
+        conn: PgConnection = psycopg2.connect(**self.conn_options(**kwargs))

        # WARNING: this setting affects *all* tests!
        conn.autocommit = autocommit
@@ -1745,8 +1745,8 @@ class PgBin:
    def __init__(self, log_dir: Path, pg_distrib_dir: Path, pg_version: PgVersion):
        self.log_dir = log_dir
        self.pg_version = pg_version
-        self.pg_bin_path = pg_distrib_dir / f"v{pg_version}" / "bin"
-        self.pg_lib_dir = pg_distrib_dir / f"v{pg_version}" / "lib"
+        self.pg_bin_path = pg_distrib_dir / pg_version.v_prefixed / "bin"
+        self.pg_lib_dir = pg_distrib_dir / pg_version.v_prefixed / "lib"
        self.env = os.environ.copy()
        self.env["LD_LIBRARY_PATH"] = str(self.pg_lib_dir)

@@ -2042,15 +2042,19 @@ class NeonProxy(PgProtocol):
        proxy_port: int,
        http_port: int,
        mgmt_port: int,
+        external_http_port: int,
        auth_backend: NeonProxy.AuthBackend,
        metric_collection_endpoint: Optional[str] = None,
        metric_collection_interval: Optional[str] = None,
    ):
        host = "127.0.0.1"
-        super().__init__(dsn=auth_backend.default_conn_url, host=host, port=proxy_port)
+        domain = "proxy.localtest.me"  # resolves to 127.0.0.1
+        super().__init__(dsn=auth_backend.default_conn_url, host=domain, port=proxy_port)

+        self.domain = domain
        self.host = host
        self.http_port = http_port
+        self.external_http_port = external_http_port
        self.neon_binpath = neon_binpath
        self.test_output_dir = test_output_dir
        self.proxy_port = proxy_port
@@ -2062,11 +2066,42 @@ class NeonProxy(PgProtocol):

    def start(self) -> NeonProxy:
        assert self._popen is None
+
+        # generate key of it doesn't exist
+        crt_path = self.test_output_dir / "proxy.crt"
+        key_path = self.test_output_dir / "proxy.key"
+
+        if not key_path.exists():
+            r = subprocess.run(
+                [
+                    "openssl",
+                    "req",
+                    "-new",
+                    "-x509",
+                    "-days",
+                    "365",
+                    "-nodes",
+                    "-text",
+                    "-out",
+                    str(crt_path),
+                    "-keyout",
+                    str(key_path),
+                    "-subj",
+                    "/CN=*.localtest.me",
+                    "-addext",
+                    "subjectAltName = DNS:*.localtest.me",
+                ]
+            )
+            assert r.returncode == 0
+
        args = [
            str(self.neon_binpath / "proxy"),
            *["--http", f"{self.host}:{self.http_port}"],
            *["--proxy", f"{self.host}:{self.proxy_port}"],
            *["--mgmt", f"{self.host}:{self.mgmt_port}"],
+            *["--wss", f"{self.host}:{self.external_http_port}"],
+            *["-c", str(crt_path)],
+            *["-k", str(key_path)],
            *self.auth_backend.extra_args(),
        ]

@@ -2190,6 +2225,7 @@ def link_proxy(
    http_port = port_distributor.get_port()
    proxy_port = port_distributor.get_port()
    mgmt_port = port_distributor.get_port()
+    external_http_port = port_distributor.get_port()

    with NeonProxy(
        neon_binpath=neon_binpath,
@@ -2197,6 +2233,7 @@ def link_proxy(
        proxy_port=proxy_port,
        http_port=http_port,
        mgmt_port=mgmt_port,
+        external_http_port=external_http_port,
        auth_backend=NeonProxy.Link(),
    ) as proxy:
        proxy.start()
@@ -2224,6 +2261,7 @@ def static_proxy(
    proxy_port = port_distributor.get_port()
    mgmt_port = port_distributor.get_port()
    http_port = port_distributor.get_port()
+    external_http_port = port_distributor.get_port()

    with NeonProxy(
        neon_binpath=neon_binpath,
@@ -2231,6 +2269,7 @@ def static_proxy(
        proxy_port=proxy_port,
        http_port=http_port,
        mgmt_port=mgmt_port,
+        external_http_port=external_http_port,
        auth_backend=NeonProxy.Postgres(auth_endpoint),
    ) as proxy:
        proxy.start()
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import json
 import time
 from collections import defaultdict
 from dataclasses import dataclass
@@ -109,6 +110,10 @@ class PageserverHttpClient(requests.Session):
        if auth_token is not None:
            self.headers["Authorization"] = f"Bearer {auth_token}"

+    @property
+    def base_url(self) -> str:
+        return f"http://localhost:{self.port}"
+
    def verbose_error(self, res: requests.Response):
        try:
            res.raise_for_status()
@@ -149,11 +154,16 @@ class PageserverHttpClient(requests.Session):
        assert isinstance(res_json, list)
        return res_json

-    def tenant_create(self, new_tenant_id: Optional[TenantId] = None) -> TenantId:
+    def tenant_create(
+        self, new_tenant_id: Optional[TenantId] = None, conf: Optional[Dict[str, Any]] = None
+    ) -> TenantId:
+        if conf is not None:
+            assert "new_tenant_id" not in conf.keys()
        res = self.post(
            f"http://localhost:{self.port}/v1/tenant",
            json={
                "new_tenant_id": str(new_tenant_id) if new_tenant_id else None,
+                **(conf or {}),
            },
        )
        self.verbose_error(res)
@@ -163,8 +173,22 @@ class PageserverHttpClient(requests.Session):
        assert isinstance(new_tenant_id, str)
        return TenantId(new_tenant_id)

-    def tenant_attach(self, tenant_id: TenantId):
-        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/attach")
+    def tenant_attach(
+        self, tenant_id: TenantId, config: None | Dict[str, Any] = None, config_null: bool = False
+    ):
+        if config_null:
+            assert config is None
+            body = "null"
+        else:
+            # null-config is prohibited by the API
+            if config is None:
+                config = {}
+            body = json.dumps({"config": config})
+        res = self.post(
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/attach",
+            data=body,
+            headers={"Content-Type": "application/json"},
+        )
        self.verbose_error(res)

    def tenant_detach(self, tenant_id: TenantId, detach_ignored=False):
--- a/test_runner/fixtures/pg_version.py
+++ b/test_runner/fixtures/pg_version.py
@@ -27,6 +27,12 @@ class PgVersion(str, enum.Enum):
    def __repr__(self) -> str:
        return f"'{self.value}'"

+    # In GitHub workflows we use Postgres version with v-prefix (e.g. v14 instead of just 14),
+    # sometime we need to do so in tests.
+    @property
+    def v_prefixed(self) -> str:
+        return f"v{self.value}"
+
    @classmethod
    def _missing_(cls, value) -> Optional["PgVersion"]:
        known_values = {v.value for _, v in cls.__members__.items()}
--- a/test_runner/performance/test_dup_key.py
+++ b/test_runner/performance/test_dup_key.py
@@ -2,7 +2,7 @@ from contextlib import closing

 import pytest
 from fixtures.compare_fixtures import PgCompare
-from pytest_lazyfixture import lazy_fixture  # type: ignore
+from pytest_lazyfixture import lazy_fixture


@pytest.mark.parametrize(
--- a/test_runner/performance/test_duplicate_layers.py
+++ b/test_runner/performance/test_duplicate_layers.py
@@ -0,0 +1,43 @@
+import time
+
+import pytest
+from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
+
+
+# Test duplicate layer detection
+#
+# This test sets fail point at the end of first compaction phase:
+# after flushing new L1 layers but before deletion of L0 layes
+# It should cause generation of duplicate L1 layer by compaction after restart
+@pytest.mark.timeout(600)
+def test_duplicate_layers(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
+    env = neon_env_builder.init_start()
+
+    # These warnings are expected, when the pageserver is restarted abruptly
+    env.pageserver.allowed_errors.append(".*found future image layer.*")
+    env.pageserver.allowed_errors.append(".*found future delta layer.*")
+    #env.pageserver.allowed_errors.append(".*duplicate layer.*")
+
+    pageserver_http = env.pageserver.http_client()
+
+    # Use aggressive compaction and checkpoint settings
+    tenant_id, _ = env.neon_cli.create_tenant(
+        conf={
+            "checkpoint_distance": f"{1024 ** 2}",
+            "compaction_target_size": f"{1024 ** 2}",
+            "compaction_period": "5 s",
+            "compaction_threshold": "5",
+        }
+    )
+    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
+    connstr = endpoint.connstr(options="-csynchronous_commit=off")
+    pg_bin.run_capture(["pgbench", "-i", "-s10", connstr])
+
+    pageserver_http.configure_failpoints(("compact-level0-phase1-finish", "exit"))
+
+    with pytest.raises(Exception):
+        pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr])
+    time.sleep(6)  # let compaction to be performed
+    env.pageserver.stop()
+    env.pageserver.start()
+    time.sleep(6)  # let compaction to be performed
--- a/test_runner/performance/test_gc_feedback.py
+++ b/test_runner/performance/test_gc_feedback.py
@@ -0,0 +1,76 @@
+import pytest
+from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import NeonEnvBuilder
+
+
+@pytest.mark.timeout(10000)
+def test_gc_feedback(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker):
+    """
+    Test that GC is able to collect all old layers even if them are forming
+    "stairs" and there are not three delta layers since last image layer.
+
+    Information about image layers needed to collect old layers should
+    be propagated by GC to compaction task which should take in in account
+    when make a decision which new image layers needs to be created.
+    """
+    env = neon_env_builder.init_start()
+    client = env.pageserver.http_client()
+
+    tenant_id, _ = env.neon_cli.create_tenant(
+        conf={
+            # disable default GC and compaction
+            "gc_period": "1000 m",
+            "compaction_period": "0 s",
+            "gc_horizon": f"{1024 ** 2}",
+            "checkpoint_distance": f"{1024 ** 2}",
+            "compaction_target_size": f"{1024 ** 2}",
+            # set PITR interval to be small, so we can do GC
+            "pitr_interval": "10 s",
+            # "compaction_threshold": "3",
+            # "image_creation_threshold": "2",
+        }
+    )
+    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
+    timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
+    n_steps = 10
+    n_update_iters = 100
+    step_size = 10000
+    with endpoint.cursor() as cur:
+        cur.execute("SET statement_timeout='1000s'")
+        cur.execute(
+            "CREATE TABLE t(step bigint, count bigint default 0, payload text default repeat(' ', 100))  with (fillfactor=50)"
+        )
+        cur.execute("CREATE INDEX ON t(step)")
+        # In each step, we insert 'step_size' new rows, and update the newly inserted rows
+        # 'n_update_iters' times. This creates a lot of churn and generates lots of WAL at the end of the table,
+        # without modifying the earlier parts of the table.
+        for step in range(n_steps):
+            cur.execute(f"INSERT INTO t (step) SELECT {step} FROM generate_series(1, {step_size})")
+            for i in range(n_update_iters):
+                cur.execute(f"UPDATE t set count=count+1 where step = {step}")
+                cur.execute("vacuum t")
+
+            # cur.execute("select pg_table_size('t')")
+            # logical_size = cur.fetchone()[0]
+            logical_size = client.timeline_detail(tenant_id, timeline_id)["current_logical_size"]
+            log.info(f"Logical storage size  {logical_size}")
+
+            client.timeline_checkpoint(tenant_id, timeline_id)
+
+            # Do compaction and GC
+            client.timeline_gc(tenant_id, timeline_id, 0)
+            client.timeline_compact(tenant_id, timeline_id)
+            # One more iteration to check that no excessive image layers are generated
+            client.timeline_gc(tenant_id, timeline_id, 0)
+            client.timeline_compact(tenant_id, timeline_id)
+
+            physical_size = client.timeline_detail(tenant_id, timeline_id)["current_physical_size"]
+            log.info(f"Physical storage size {physical_size}")
+
+    MB = 1024 * 1024
+    zenbenchmark.record("logical_size", logical_size // MB, "Mb", MetricReport.LOWER_IS_BETTER)
+    zenbenchmark.record("physical_size", physical_size // MB, "Mb", MetricReport.LOWER_IS_BETTER)
+    zenbenchmark.record(
+        "physical/logical ratio", physical_size / logical_size, "", MetricReport.LOWER_IS_BETTER
+    )
--- a/test_runner/performance/test_hot_page.py
+++ b/test_runner/performance/test_hot_page.py
@@ -2,7 +2,7 @@ from contextlib import closing

 import pytest
 from fixtures.compare_fixtures import PgCompare
-from pytest_lazyfixture import lazy_fixture  # type: ignore
+from pytest_lazyfixture import lazy_fixture


@pytest.mark.parametrize(
--- a/test_runner/performance/test_hot_table.py
+++ b/test_runner/performance/test_hot_table.py
@@ -2,7 +2,7 @@ from contextlib import closing

 import pytest
 from fixtures.compare_fixtures import PgCompare
-from pytest_lazyfixture import lazy_fixture  # type: ignore
+from pytest_lazyfixture import lazy_fixture


@pytest.mark.parametrize(
--- a/test_runner/performance/test_seqscans.py
+++ b/test_runner/performance/test_seqscans.py
@@ -6,7 +6,7 @@ import pytest
 from fixtures.benchmark_fixture import MetricReport
 from fixtures.compare_fixtures import PgCompare
 from fixtures.log_helper import log
-from pytest_lazyfixture import lazy_fixture  # type: ignore
+from pytest_lazyfixture import lazy_fixture


@pytest.mark.parametrize(
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -0,0 +1,200 @@
+from dataclasses import dataclass
+from typing import Generator, Optional
+
+import pytest
+from fixtures.neon_fixtures import (
+    LocalFsStorage,
+    NeonEnv,
+    NeonEnvBuilder,
+    RemoteStorageKind,
+)
+from fixtures.pageserver.http import PageserverApiException, TenantConfig
+from fixtures.types import TenantId
+from fixtures.utils import wait_until
+
+
+@pytest.fixture
+def positive_env(neon_env_builder: NeonEnvBuilder) -> NeonEnv:
+    neon_env_builder.enable_remote_storage(
+        remote_storage_kind=RemoteStorageKind.LOCAL_FS,
+        test_name="test_attach_tenant_config",
+    )
+    env = neon_env_builder.init_start()
+    assert isinstance(env.remote_storage, LocalFsStorage)
+    return env
+
+
+@dataclass
+class NegativeTests:
+    neon_env: NeonEnv
+    tenant_id: TenantId
+    config_pre_detach: TenantConfig
+
+
+@pytest.fixture
+def negative_env(neon_env_builder: NeonEnvBuilder) -> Generator[NegativeTests, None, None]:
+    neon_env_builder.enable_remote_storage(
+        remote_storage_kind=RemoteStorageKind.LOCAL_FS,
+        test_name="test_attach_tenant_config",
+    )
+    env = neon_env_builder.init_start()
+    assert isinstance(env.remote_storage, LocalFsStorage)
+
+    ps_http = env.pageserver.http_client()
+    (tenant_id, _) = env.neon_cli.create_tenant()
+    assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
+    config_pre_detach = ps_http.tenant_config(tenant_id)
+    assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()]
+    ps_http.tenant_detach(tenant_id)
+    assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()]
+
+    yield NegativeTests(env, tenant_id, config_pre_detach)
+
+    assert tenant_id not in [
+        TenantId(t["id"]) for t in ps_http.tenant_list()
+    ], "tenant should not be attached after negative test"
+
+    env.pageserver.allowed_errors.append(".*Error processing HTTP request: Bad request")
+
+    def log_contains_bad_request():
+        env.pageserver.log_contains(".*Error processing HTTP request: Bad request")
+
+    wait_until(50, 0.1, log_contains_bad_request)
+
+
+def test_null_body(negative_env: NegativeTests):
+    """
+    If we send `null` in the body, the request should be rejected with status 400.
+    """
+    env = negative_env.neon_env
+    tenant_id = negative_env.tenant_id
+    ps_http = env.pageserver.http_client()
+
+    res = ps_http.post(
+        f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach",
+        data=b"null",
+        headers={"Content-Type": "application/json"},
+    )
+    assert res.status_code == 400
+
+
+def test_null_config(negative_env: NegativeTests):
+    """
+    If the `config` field is `null`, the request should be rejected with status 400.
+    """
+
+    env = negative_env.neon_env
+    tenant_id = negative_env.tenant_id
+    ps_http = env.pageserver.http_client()
+
+    res = ps_http.post(
+        f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach",
+        data=b'{"config": null}',
+        headers={"Content-Type": "application/json"},
+    )
+    assert res.status_code == 400
+
+
+def test_config_with_unknown_keys_is_bad_request(negative_env: NegativeTests):
+    """
+    If we send a config with unknown keys, the request should be rejected with status 400.
+    """
+
+    env = negative_env.neon_env
+    tenant_id = negative_env.tenant_id
+    ps_http = env.pageserver.http_client()
+
+    config_with_unknown_keys = {
+        "compaction_period": "1h",
+        "this_key_does_not_exist": "some value",
+    }
+
+    with pytest.raises(PageserverApiException) as e:
+        ps_http.tenant_attach(tenant_id, config=config_with_unknown_keys)
+    assert e.type == PageserverApiException
+    assert e.value.status_code == 400
+
+
+@pytest.mark.parametrize("content_type", [None, "application/json"])
+def test_empty_body(positive_env: NeonEnv, content_type: Optional[str]):
+    """
+    For backwards-compatiblity: if we send an empty body,
+    the request should be accepted and the config should be the default config.
+    """
+    env = positive_env
+    ps_http = env.pageserver.http_client()
+    (tenant_id, _) = env.neon_cli.create_tenant()
+    assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
+    config_pre_detach = ps_http.tenant_config(tenant_id)
+    assert tenant_id in [TenantId(t["id"]) for t in ps_http.tenant_list()]
+    ps_http.tenant_detach(tenant_id)
+    assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()]
+
+    ps_http.post(
+        f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach",
+        data=b"",
+        headers=None if content_type else {"Content-Type": "application/json"},
+    ).raise_for_status()
+
+    assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == {}
+    assert ps_http.tenant_config(tenant_id).effective_config == config_pre_detach.effective_config
+
+
+def test_fully_custom_config(positive_env: NeonEnv):
+    """
+    If we send a valid config in the body, the request should be accepted and the config should be applied.
+    """
+    env = positive_env
+
+    fully_custom_config = {
+        "compaction_period": "1h",
+        "compaction_threshold": 13,
+        "compaction_target_size": 1048576,
+        "checkpoint_distance": 10000,
+        "checkpoint_timeout": "13m",
+        "eviction_policy": {
+            "kind": "LayerAccessThreshold",
+            "period": "20s",
+            "threshold": "23h",
+        },
+        "evictions_low_residence_duration_metric_threshold": "2days",
+        "gc_horizon": 23 * (1024 * 1024),
+        "gc_period": "2h 13m",
+        "image_creation_threshold": 7,
+        "pitr_interval": "1m",
+        "lagging_wal_timeout": "23m",
+        "max_lsn_wal_lag": 230000,
+        "min_resident_size_override": 23,
+        "trace_read_requests": True,
+        "walreceiver_connect_timeout": "13m",
+    }
+
+    ps_http = env.pageserver.http_client()
+
+    initial_tenant_config = ps_http.tenant_config(env.initial_tenant)
+    assert initial_tenant_config.tenant_specific_overrides == {}
+    assert set(initial_tenant_config.effective_config.keys()) == set(
+        fully_custom_config.keys()
+    ), "ensure we cover all config options"
+
+    (tenant_id, _) = env.neon_cli.create_tenant()
+    ps_http.set_tenant_config(tenant_id, fully_custom_config)
+    our_tenant_config = ps_http.tenant_config(tenant_id)
+    assert our_tenant_config.tenant_specific_overrides == fully_custom_config
+    assert set(our_tenant_config.effective_config.keys()) == set(
+        fully_custom_config.keys()
+    ), "ensure we cover all config options"
+    assert {
+        k: initial_tenant_config.effective_config[k] != our_tenant_config.effective_config[k]
+        for k in fully_custom_config.keys()
+    } == {
+        k: True for k in fully_custom_config.keys()
+    }, "ensure our custom config has different values than the default config for all config options, so we know we overrode everything"
+
+    ps_http.tenant_detach(tenant_id)
+    ps_http.tenant_attach(tenant_id, config=fully_custom_config)
+
+    assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == fully_custom_config
+    assert set(ps_http.tenant_config(tenant_id).effective_config.keys()) == set(
+        fully_custom_config.keys()
+    ), "ensure we cover all config options"
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -16,7 +16,7 @@ from fixtures.neon_fixtures import (
 )
 from fixtures.pageserver.http import PageserverHttpClient
 from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
-from fixtures.pg_version import PgVersion, skip_on_postgres
+from fixtures.pg_version import PgVersion
 from fixtures.types import Lsn
 from pytest import FixtureRequest

@@ -41,7 +41,6 @@ check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif(
 )


-@skip_on_postgres(PgVersion.V15, "Compatibility tests doesn't support Postgres 15 yet")
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(before="test_forward_compatibility")
 def test_create_snapshot(
@@ -49,12 +48,13 @@ def test_create_snapshot(
    pg_bin: PgBin,
    top_output_dir: Path,
    test_output_dir: Path,
+    pg_version: PgVersion,
 ):
    # The test doesn't really test anything
    # it creates a new snapshot for releases after we tested the current version against the previous snapshot in `test_backward_compatibility`.
    #
    # There's no cleanup here, it allows to adjust the data in `test_backward_compatibility` itself without re-collecting it.
-    neon_env_builder.pg_version = PgVersion.V14
+    neon_env_builder.pg_version = pg_version
    neon_env_builder.num_safekeepers = 3
    neon_env_builder.enable_local_fs_remote_storage()
    neon_env_builder.preserve_database_files = True
@@ -90,13 +90,14 @@ def test_create_snapshot(
    env.pageserver.stop()

    # Directory `compatibility_snapshot_dir` is uploaded to S3 in a workflow, keep the name in sync with it
-    compatibility_snapshot_dir = top_output_dir / "compatibility_snapshot_pg14"
+    compatibility_snapshot_dir = (
+        top_output_dir / f"compatibility_snapshot_pg{pg_version.v_prefixed}"
+    )
    if compatibility_snapshot_dir.exists():
        shutil.rmtree(compatibility_snapshot_dir)
    shutil.copytree(test_output_dir, compatibility_snapshot_dir)


-@skip_on_postgres(PgVersion.V15, "Compatibility tests doesn't support Postgres 15 yet")
@check_ondisk_data_compatibility_if_enabled
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
@@ -115,7 +116,7 @@ def test_backward_compatibility(
    compatibility_snapshot_dir_env = os.environ.get("COMPATIBILITY_SNAPSHOT_DIR")
    assert (
        compatibility_snapshot_dir_env is not None
-    ), "COMPATIBILITY_SNAPSHOT_DIR is not set. It should be set to `compatibility_snapshot_pg14` path generateted by test_create_snapshot (ideally generated by the previous version of Neon)"
+    ), f"COMPATIBILITY_SNAPSHOT_DIR is not set. It should be set to `compatibility_snapshot_pg{pg_version.v_prefixed}` path generateted by test_create_snapshot (ideally generated by the previous version of Neon)"
    compatibility_snapshot_dir = Path(compatibility_snapshot_dir_env).resolve()

    breaking_changes_allowed = (
@@ -155,7 +156,6 @@ def test_backward_compatibility(
    ), "Breaking changes are allowed by ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"


-@skip_on_postgres(PgVersion.V15, "Compatibility tests doesn't support Postgres 15 yet")
@check_ondisk_data_compatibility_if_enabled
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
@@ -183,7 +183,9 @@ def test_forward_compatibility(
    ), "COMPATIBILITY_POSTGRES_DISTRIB_DIR is not set. It should be set to a pg_install directrory (ideally generated by the previous version of Neon)"
    compatibility_postgres_distrib_dir = Path(compatibility_postgres_distrib_dir_env).resolve()

-    compatibility_snapshot_dir = top_output_dir / "compatibility_snapshot_pg14"
+    compatibility_snapshot_dir = (
+        top_output_dir / f"compatibility_snapshot_pg{pg_version.v_prefixed}"
+    )

    breaking_changes_allowed = (
        os.environ.get("ALLOW_FORWARD_COMPATIBILITY_BREAKAGE", "false").lower() == "true"
--- a/test_runner/regress/test_disk_usage_eviction.py
+++ b/test_runner/regress/test_disk_usage_eviction.py
@@ -118,6 +118,11 @@ class EvictionEnv:

        wait_until(10, 1, statvfs_called)

+        # these can sometimes happen during startup before any tenants have been
+        # loaded, so nothing can be evicted, we just wait for next iteration which
+        # is able to evict.
+        self.neon_env.pageserver.allowed_errors.append(".*WARN.* disk usage still high.*")
+

@pytest.fixture
 def eviction_env(request, neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) -> EvictionEnv:
--- a/test_runner/regress/test_hot_standby.py
+++ b/test_runner/regress/test_hot_standby.py
@@ -1,9 +1,7 @@
 import pytest
 from fixtures.neon_fixtures import NeonEnv
-from fixtures.pg_version import PgVersion, xfail_on_postgres


-@xfail_on_postgres(PgVersion.V15, reason="https://github.com/neondatabase/neon/pull/4182")
@pytest.mark.timeout(1800)
 def test_hot_standby(neon_simple_env: NeonEnv):
    env = neon_simple_env
--- a/test_runner/regress/test_metric_collection.py
+++ b/test_runner/regress/test_metric_collection.py
@@ -204,6 +204,7 @@ def proxy_with_metric_collector(
    http_port = port_distributor.get_port()
    proxy_port = port_distributor.get_port()
    mgmt_port = port_distributor.get_port()
+    external_http_port = port_distributor.get_port()

    (host, port) = httpserver_listen_address
    metric_collection_endpoint = f"http://{host}:{port}/billing/api/v1/usage_events"
@@ -215,6 +216,7 @@ def proxy_with_metric_collector(
        proxy_port=proxy_port,
        http_port=http_port,
        mgmt_port=mgmt_port,
+        external_http_port=external_http_port,
        metric_collection_endpoint=metric_collection_endpoint,
        metric_collection_interval=metric_collection_interval,
        auth_backend=NeonProxy.Link(),
--- a/test_runner/regress/test_ondemand_download.py
+++ b/test_runner/regress/test_ondemand_download.py
@@ -20,6 +20,7 @@ from fixtures.pageserver.utils import (
    assert_tenant_state,
    wait_for_last_record_lsn,
    wait_for_upload,
+    wait_for_upload_queue_empty,
    wait_until_tenant_state,
 )
 from fixtures.types import Lsn
@@ -63,12 +64,15 @@ def test_ondemand_download_large_rel(
    tenant, _ = env.neon_cli.create_tenant(
        conf={
            # disable background GC
-            "gc_period": "10 m",
+            "gc_period": "0s",
            "gc_horizon": f"{10 * 1024 ** 3}",  # 10 GB
            # small checkpoint distance to create more delta layer files
            "checkpoint_distance": f"{10 * 1024 ** 2}",  # 10 MB
+            # allow compaction with the checkpoint
            "compaction_threshold": "3",
            "compaction_target_size": f"{10 * 1024 ** 2}",  # 10 MB
+            # but don't run compaction in background or on restart
+            "compaction_period": "0s",
        }
    )
    env.initial_tenant = tenant
@@ -95,9 +99,17 @@ def test_ondemand_download_large_rel(

        current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))

-    # wait until pageserver receives that data
    wait_for_last_record_lsn(client, tenant_id, timeline_id, current_lsn)

+    # stop endpoint before checkpoint to stop wal generation
+    endpoint.stop()
+
+    # stopping of safekeepers now will help us not to calculate logical size
+    # after startup, so page requests should be the only one on-demand
+    # downloading the layers
+    for sk in env.safekeepers:
+        sk.stop()
+
    # run checkpoint manually to be sure that data landed in remote storage
    client.timeline_checkpoint(tenant_id, timeline_id)

@@ -106,7 +118,6 @@ def test_ondemand_download_large_rel(
    log.info("uploads have finished")

    ##### Stop the first pageserver instance, erase all its data
-    endpoint.stop()
    env.pageserver.stop()

    # remove all the layer files
@@ -117,8 +128,13 @@ def test_ondemand_download_large_rel(
    ##### Second start, restore the data and ensure it's the same
    env.pageserver.start()

-    endpoint.start()
+    # start a readonly endpoint which we'll use to check the database.
+    # readonly (with lsn=) is required so that we don't try to connect to
+    # safekeepers, that have now been shut down.
+    endpoint = env.endpoints.create_start("main", lsn=current_lsn)
+
    before_downloads = get_num_downloaded_layers(client, tenant_id, timeline_id)
+    assert before_downloads != 0, "basebackup should on-demand non-zero layers"

    # Probe in the middle of the table. There's a high chance that the beginning
    # and end of the table was stored together in the same layer files with data
@@ -149,6 +165,7 @@ def test_ondemand_download_timetravel(

    ##### First start, insert data and upload it to the remote storage
    env = neon_env_builder.init_start()
+    pageserver_http = env.pageserver.http_client()

    # Override defaults, to create more layers
    tenant, _ = env.neon_cli.create_tenant(
@@ -225,7 +242,8 @@ def test_ondemand_download_timetravel(
    assert filled_current_physical == filled_size, "we don't yet do layer eviction"

    # Wait until generated image layers are uploaded to S3
-    time.sleep(3)
+    if remote_storage_kind is not None:
+        wait_for_upload_queue_empty(pageserver_http, env.initial_tenant, timeline_id)

    env.pageserver.stop()

--- a/test_runner/regress/test_pg_regress.py
+++ b/test_runner/regress/test_pg_regress.py
@@ -5,7 +5,6 @@ from pathlib import Path

 import pytest
 from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
-from fixtures.pg_version import PgVersion, xfail_on_postgres


 # Run the main PostgreSQL regression tests, in src/test/regress.
@@ -33,8 +32,8 @@ def test_pg_regress(
    (runpath / "testtablespace").mkdir(parents=True)

    # Compute all the file locations that pg_regress will need.
-    build_path = pg_distrib_dir / f"build/v{env.pg_version}/src/test/regress"
-    src_path = base_dir / f"vendor/postgres-v{env.pg_version}/src/test/regress"
+    build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/regress"
+    src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/regress"
    bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
    schedule = src_path / "parallel_schedule"
    pg_regress = build_path / "pg_regress"
@@ -72,7 +71,6 @@ def test_pg_regress(
 #
 # This runs for a long time, especially in debug mode, so use a larger-than-default
 # timeout.
-@xfail_on_postgres(PgVersion.V15, reason="https://github.com/neondatabase/neon/pull/4213")
@pytest.mark.timeout(1800)
 def test_isolation(
    neon_simple_env: NeonEnv,
@@ -97,8 +95,8 @@ def test_isolation(
    (runpath / "testtablespace").mkdir(parents=True)

    # Compute all the file locations that pg_isolation_regress will need.
-    build_path = pg_distrib_dir / f"build/v{env.pg_version}/src/test/isolation"
-    src_path = base_dir / f"vendor/postgres-v{env.pg_version}/src/test/isolation"
+    build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/isolation"
+    src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/isolation"
    bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
    schedule = src_path / "isolation_schedule"
    pg_isolation_regress = build_path / "pg_isolation_regress"
--- a/test_runner/regress/test_proxy.py
+++ b/test_runner/regress/test_proxy.py
@@ -1,22 +1,32 @@
+import json
 import subprocess
+from typing import Any, List

 import psycopg2
 import pytest
+import requests
 from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres


-@pytest.mark.parametrize("option_name", ["project", "endpoint"])
-def test_proxy_select_1(static_proxy: NeonProxy, option_name: str):
+def test_proxy_select_1(static_proxy: NeonProxy):
    """
    A simplest smoke test: check proxy against a local postgres instance.
    """

-    out = static_proxy.safe_psql("select 1", options=f"{option_name}=generic-project-name")
+    # no SNI, deprecated `options=project` syntax (before we had several endpoint in project)
+    out = static_proxy.safe_psql("select 1", sslsni=0, options="project=generic-project-name")
    assert out[0][0] == 1

+    # no SNI, new `options=endpoint` syntax
+    out = static_proxy.safe_psql("select 1", sslsni=0, options="endpoint=generic-project-name")
+    assert out[0][0] == 1

-@pytest.mark.parametrize("option_name", ["project", "endpoint"])
-def test_password_hack(static_proxy: NeonProxy, option_name: str):
+    # with SNI
+    out = static_proxy.safe_psql("select 42", host="generic-project-name.localtest.me")
+    assert out[0][0] == 42
+
+
+def test_password_hack(static_proxy: NeonProxy):
    """
    Check the PasswordHack auth flow: an alternative to SCRAM auth for
    clients which can't provide the project/endpoint name via SNI or `options`.
@@ -24,14 +34,16 @@ def test_password_hack(static_proxy: NeonProxy, option_name: str):

    user = "borat"
    password = "password"
-    static_proxy.safe_psql(
-        f"create role {user} with login password '{password}'",
-        options=f"{option_name}=irrelevant",
-    )
+    static_proxy.safe_psql(f"create role {user} with login password '{password}'")

    # Note the format of `magic`!
-    magic = f"{option_name}=irrelevant;{password}"
-    static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
+    magic = f"project=irrelevant;{password}"
+    out = static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
+    assert out[0][0] == 1
+
+    magic = f"endpoint=irrelevant;{password}"
+    out = static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
+    assert out[0][0] == 1

    # Must also check that invalid magic won't be accepted.
    with pytest.raises(psycopg2.OperationalError):
@@ -69,52 +81,55 @@ def test_proxy_options(static_proxy: NeonProxy, option_name: str):
    """

    options = f"{option_name}=irrelevant -cproxytest.option=value"
-    out = static_proxy.safe_psql("show proxytest.option", options=options)
+    out = static_proxy.safe_psql("show proxytest.option", options=options, sslsni=0)
    assert out[0][0] == "value"

    options = f"-c proxytest.foo=\\ str {option_name}=irrelevant"
+    out = static_proxy.safe_psql("show proxytest.foo", options=options, sslsni=0)
+    assert out[0][0] == " str"
+
+    options = "-cproxytest.option=value"
+    out = static_proxy.safe_psql("show proxytest.option", options=options)
+    assert out[0][0] == "value"
+
+    options = "-c proxytest.foo=\\ str"
    out = static_proxy.safe_psql("show proxytest.foo", options=options)
    assert out[0][0] == " str"


-@pytest.mark.parametrize("option_name", ["project", "endpoint"])
-def test_auth_errors(static_proxy: NeonProxy, option_name: str):
+def test_auth_errors(static_proxy: NeonProxy):
    """
    Check that we throw very specific errors in some unsuccessful auth scenarios.
    """

    # User does not exist
    with pytest.raises(psycopg2.Error) as exprinfo:
-        static_proxy.connect(user="pinocchio", options=f"{option_name}=irrelevant")
+        static_proxy.connect(user="pinocchio")
    text = str(exprinfo.value).strip()
-    assert text.endswith("password authentication failed for user 'pinocchio'")
+    assert text.find("password authentication failed for user 'pinocchio'") != -1

    static_proxy.safe_psql(
        "create role pinocchio with login password 'magic'",
-        options=f"{option_name}=irrelevant",
    )

    # User exists, but password is missing
    with pytest.raises(psycopg2.Error) as exprinfo:
-        static_proxy.connect(user="pinocchio", password=None, options=f"{option_name}=irrelevant")
+        static_proxy.connect(user="pinocchio", password=None)
    text = str(exprinfo.value).strip()
-    assert text.endswith("password authentication failed for user 'pinocchio'")
+    assert text.find("password authentication failed for user 'pinocchio'") != -1

    # User exists, but password is wrong
    with pytest.raises(psycopg2.Error) as exprinfo:
-        static_proxy.connect(user="pinocchio", password="bad", options=f"{option_name}=irrelevant")
+        static_proxy.connect(user="pinocchio", password="bad")
    text = str(exprinfo.value).strip()
-    assert text.endswith("password authentication failed for user 'pinocchio'")
+    assert text.find("password authentication failed for user 'pinocchio'") != -1

    # Finally, check that the user can connect
-    with static_proxy.connect(
-        user="pinocchio", password="magic", options=f"{option_name}=irrelevant"
-    ):
+    with static_proxy.connect(user="pinocchio", password="magic"):
        pass


-@pytest.mark.parametrize("option_name", ["project", "endpoint"])
-def test_forward_params_to_client(static_proxy: NeonProxy, option_name: str):
+def test_forward_params_to_client(static_proxy: NeonProxy):
    """
    Check that we forward all necessary PostgreSQL server params to client.
    """
@@ -140,7 +155,7 @@ def test_forward_params_to_client(static_proxy: NeonProxy, option_name: str):
        where name = any(%s)
    """

-    with static_proxy.connect(options=f"{option_name}=irrelevant") as conn:
+    with static_proxy.connect() as conn:
        with conn.cursor() as cur:
            cur.execute(query, (reported_params_subset,))
            for name, value in cur.fetchall():
@@ -148,18 +163,65 @@ def test_forward_params_to_client(static_proxy: NeonProxy, option_name: str):
                assert conn.get_parameter_status(name) == value


-@pytest.mark.parametrize("option_name", ["project", "endpoint"])
@pytest.mark.timeout(5)
-def test_close_on_connections_exit(static_proxy: NeonProxy, option_name: str):
+def test_close_on_connections_exit(static_proxy: NeonProxy):
    # Open two connections, send SIGTERM, then ensure that proxy doesn't exit
    # until after connections close.
-    with static_proxy.connect(options=f"{option_name}=irrelevant"), static_proxy.connect(
-        options=f"{option_name}=irrelevant"
-    ):
+    with static_proxy.connect(), static_proxy.connect():
        static_proxy.terminate()
        with pytest.raises(subprocess.TimeoutExpired):
            static_proxy.wait_for_exit(timeout=2)
        # Ensure we don't accept any more connections
        with pytest.raises(psycopg2.OperationalError):
-            static_proxy.connect(options=f"{option_name}=irrelevant")
+            static_proxy.connect()
    static_proxy.wait_for_exit()
+
+
+def test_sql_over_http(static_proxy: NeonProxy):
+    static_proxy.safe_psql("create role http with login password 'http' superuser")
+
+    def q(sql: str, params: List[Any] = []) -> Any:
+        connstr = f"postgresql://http:http@{static_proxy.domain}:{static_proxy.proxy_port}/postgres"
+        response = requests.post(
+            f"https://{static_proxy.domain}:{static_proxy.external_http_port}/sql",
+            data=json.dumps({"query": sql, "params": params}),
+            headers={"Content-Type": "application/sql", "Neon-Connection-String": connstr},
+            verify=str(static_proxy.test_output_dir / "proxy.crt"),
+        )
+        assert response.status_code == 200
+        return response.json()
+
+    rows = q("select 42 as answer")["rows"]
+    assert rows == [{"answer": 42}]
+
+    rows = q("select $1 as answer", [42])["rows"]
+    assert rows == [{"answer": "42"}]
+
+    rows = q("select $1 * 1 as answer", [42])["rows"]
+    assert rows == [{"answer": 42}]
+
+    rows = q("select $1::int[] as answer", [[1, 2, 3]])["rows"]
+    assert rows == [{"answer": [1, 2, 3]}]
+
+    rows = q("select $1::json->'a' as answer", [{"a": {"b": 42}}])["rows"]
+    assert rows == [{"answer": {"b": 42}}]
+
+    rows = q("select * from pg_class limit 1")["rows"]
+    assert len(rows) == 1
+
+    res = q("create table t(id serial primary key, val int)")
+    assert res["command"] == "CREATE"
+    assert res["rowCount"] is None
+
+    res = q("insert into t(val) values (10), (20), (30) returning id")
+    assert res["command"] == "INSERT"
+    assert res["rowCount"] == 3
+    assert res["rows"] == [{"id": 1}, {"id": 2}, {"id": 3}]
+
+    res = q("select * from t")
+    assert res["command"] == "SELECT"
+    assert res["rowCount"] == 3
+
+    res = q("drop table t")
+    assert res["command"] == "DROP"
+    assert res["rowCount"] is None
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -83,9 +83,7 @@ def test_remote_storage_backup_and_restore(
    env.pageserver.allowed_errors.append(".*failed to load remote timeline.*")
    # we have a bunch of pytest.raises for these below
    env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
-    env.pageserver.allowed_errors.append(
-        ".*Cannot attach tenant .*?, local tenant directory already exists.*"
-    )
+    env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
    env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*")

    pageserver_http = env.pageserver.http_client()
--- a/test_runner/regress/test_sni_router.py
+++ b/test_runner/regress/test_sni_router.py
@@ -4,7 +4,7 @@ from pathlib import Path
 from types import TracebackType
 from typing import Optional, Type

-import backoff  # type: ignore
+import backoff
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import PgProtocol, PortDistributor, VanillaPostgres

--- a/test_runner/regress/test_tenant_conf.py
+++ b/test_runner/regress/test_tenant_conf.py
@@ -62,6 +62,7 @@ eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold =
            log.info(f"show {env.initial_tenant}")
            pscur.execute(f"show {env.initial_tenant}")
            res = pscur.fetchone()
+            assert res is not None
            assert all(
                i in res.items()
                for i in {
@@ -101,6 +102,7 @@ eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold =
            pscur.execute(f"show {tenant}")
            res = pscur.fetchone()
            log.info(f"res: {res}")
+            assert res is not None
            assert all(
                i in res.items()
                for i in {
@@ -151,6 +153,7 @@ eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold =
        "eviction_policy": json.dumps(
            {"kind": "LayerAccessThreshold", "period": "80s", "threshold": "42h"}
        ),
+        "max_lsn_wal_lag": "13000000",
    }
    env.neon_cli.config_tenant(
        tenant_id=tenant,
@@ -162,6 +165,7 @@ eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold =
            pscur.execute(f"show {tenant}")
            res = pscur.fetchone()
            log.info(f"after config res: {res}")
+            assert res is not None
            assert all(
                i in res.items()
                for i in {
@@ -206,6 +210,7 @@ eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold =
    assert updated_effective_config["gc_horizon"] == 67108864
    assert updated_effective_config["image_creation_threshold"] == 2
    assert updated_effective_config["pitr_interval"] == "7days"
+    assert updated_effective_config["max_lsn_wal_lag"] == 13000000

    # restart the pageserver and ensure that the config is still correct
    env.pageserver.stop()
@@ -216,6 +221,7 @@ eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold =
            pscur.execute(f"show {tenant}")
            res = pscur.fetchone()
            log.info(f"after restart res: {res}")
+            assert res is not None
            assert all(
                i in res.items()
                for i in {
@@ -265,6 +271,7 @@ eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold =
        "period": "20s",
        "threshold": "23h",
    }
+    assert final_effective_config["max_lsn_wal_lag"] == 10 * 1024 * 1024

    # restart the pageserver and ensure that the config is still correct
    env.pageserver.stop()
@@ -275,6 +282,7 @@ eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold =
            pscur.execute(f"show {tenant}")
            res = pscur.fetchone()
            log.info(f"after restart res: {res}")
+            assert res is not None
            assert all(
                i in res.items()
                for i in {
--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -685,12 +685,10 @@ def test_load_attach_negatives(

    pageserver_http.tenant_ignore(tenant_id)

-    env.pageserver.allowed_errors.append(
-        ".*Cannot attach tenant .*?, local tenant directory already exists.*"
-    )
+    env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
    with pytest.raises(
        expected_exception=PageserverApiException,
-        match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists",
+        match="tenant directory already exists",
    ):
        pageserver_http.tenant_attach(tenant_id)

@@ -734,12 +732,10 @@ def test_ignore_while_attaching(
    pageserver_http.tenant_ignore(tenant_id)

    # Cannot attach it due to some local files existing
-    env.pageserver.allowed_errors.append(
-        ".*Cannot attach tenant .*?, local tenant directory already exists.*"
-    )
+    env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
    with pytest.raises(
        expected_exception=PageserverApiException,
-        match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists",
+        match="tenant directory already exists",
    ):
        pageserver_http.tenant_attach(tenant_id)

--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -324,7 +324,11 @@ def test_concurrent_timeline_delete_if_first_stuck_at_index_upload(
    If we're stuck uploading the index file with the is_delete flag,
    eventually console will hand up and retry.
    If we're still stuck at the retry time, ensure that the retry
-    eventually completes with the same status.
+    fails with status 500, signalling to console that it should retry
+    later.
+    Ideally, timeline_delete should return 202 Accepted and require
+    console to poll for completion, but, that would require changing
+    the API contract.
    """

    neon_env_builder.enable_remote_storage(
@@ -338,34 +342,24 @@ def test_concurrent_timeline_delete_if_first_stuck_at_index_upload(

    ps_http = env.pageserver.http_client()

+    # make the first call sleep practically forever
    failpoint_name = "persist_index_part_with_deleted_flag_after_set_before_upload_pause"
    ps_http.configure_failpoints((failpoint_name, "pause"))

-    def delete_timeline_call(name, result_queue, barrier):
-        if barrier:
-            barrier.wait()
+    def first_call(result_queue):
        try:
-            log.info(f"{name} call start")
+            log.info("first call start")
            ps_http.timeline_delete(env.initial_tenant, child_timeline_id, timeout=10)
-            log.info(f"{name} call success")
+            log.info("first call success")
            result_queue.put("success")
        except Exception:
-            log.exception(f"{name} call failed")
+            log.exception("first call failed")
            result_queue.put("failure, see log for stack trace")

-    delete_results: queue.Queue[str] = queue.Queue()
-    first_call_thread = threading.Thread(
-        target=delete_timeline_call,
-        args=(
-            "1st",
-            delete_results,
-            None,
-        ),
-    )
+    first_call_result: queue.Queue[str] = queue.Queue()
+    first_call_thread = threading.Thread(target=first_call, args=(first_call_result,))
    first_call_thread.start()

-    second_call_thread = None
-
    try:

        def first_call_hit_failpoint():
@@ -375,53 +369,38 @@ def test_concurrent_timeline_delete_if_first_stuck_at_index_upload(

        wait_until(50, 0.1, first_call_hit_failpoint)

-        barrier = threading.Barrier(2)
-        second_call_thread = threading.Thread(
-            target=delete_timeline_call,
-            args=(
-                "2nd",
-                delete_results,
-                barrier,
-            ),
-        )
-        second_call_thread.start()
-
-        barrier.wait()
-
-        # release the pause
-        ps_http.configure_failpoints((failpoint_name, "off"))
-
-        # both should had succeeded: the second call will coalesce with the already-ongoing first call
-        result = delete_results.get()
-        assert result == "success"
-        result = delete_results.get()
-        assert result == "success"
-
-        # the second call will try to transition the timeline into Stopping state, but it's already in that state
-        # (the transition to Stopping state is not part of the request coalescing, because Tenant and Timeline states are a mess already)
+        # make the second call and assert behavior
+        log.info("second call start")
+        error_msg_re = "another task is already setting the deleted_flag, started at"
+        with pytest.raises(PageserverApiException, match=error_msg_re) as second_call_err:
+            ps_http.timeline_delete(env.initial_tenant, child_timeline_id)
+        assert second_call_err.value.status_code == 500
+        env.pageserver.allowed_errors.append(f".*{child_timeline_id}.*{error_msg_re}.*")
+        # the second call will try to transition the timeline into Stopping state as well
        env.pageserver.allowed_errors.append(
            f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping"
        )
+        log.info("second call failed as expected")

-        def second_call_attempt():
-            assert env.pageserver.log_contains(
-                f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping"
-            )
+        # by now we know that the second call failed, let's ensure the first call will finish
+        ps_http.configure_failpoints((failpoint_name, "off"))
+
+        result = first_call_result.get()
+        assert result == "success"

-        wait_until(50, 0.1, second_call_attempt)
    finally:
-        log.info("joining 1st thread")
+        log.info("joining first call thread")
        # in any case, make sure the lifetime of the thread is bounded to this test
        first_call_thread.join()

-        if second_call_thread:
-            log.info("joining 2nd thread")
-            second_call_thread.join()
-

 def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):
    """
-    Make sure the timeline_delete runs to completion even if first request is cancelled because of a timeout.
+    If the client hangs up before we start the index part upload but after we mark it
+    deleted in local memory, a subsequent delete_timeline call should be able to do
+    another delete timeline operation.
+
+    This tests cancel safety up to the given failpoint.
    """
    neon_env_builder.enable_remote_storage(
        remote_storage_kind=RemoteStorageKind.MOCK_S3,
@@ -458,12 +437,12 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):

    wait_until(50, 0.1, got_hangup_log_message)

-    # after disabling the failpoint pause, the original attempt should complete eventually
+    # ok, retry without failpoint, it should succeed
    ps_http.configure_failpoints((failpoint_name, "off"))

-    def delete_timeline_completes():
-        assert [env.initial_timeline] == [
-            timeline_id for (_, timeline_id) in env.neon_cli.list_timelines()
-        ]
-
-    wait_until(50, 0.5, delete_timeline_completes)
+    # this should succeed
+    ps_http.timeline_delete(env.initial_tenant, child_timeline_id, timeout=2)
+    # the second call will try to transition the timeline into Stopping state, but it's already in that state
+    env.pageserver.allowed_errors.append(
+        f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping"
+    )