Check postgres version and ensure that public schema exists

before running GRANT query on it
Merge pull request #2668 from neondatabase/main
2026-02-10 22:20:38 +00:00 · 2022-10-22 02:14:26 +03:00 · 2022-10-21 15:21:42 +03:00 · 2022-10-21 14:21:22 +03:00 · 2022-10-21 10:58:43 +00:00 · 2022-10-21 12:47:06 +03:00
193 changed files with 13829 additions and 3837 deletions
--- a/.github/PULL_REQUEST_TEMPLATE/release-pr.md
+++ b/.github/PULL_REQUEST_TEMPLATE/release-pr.md
@@ -10,7 +10,7 @@
 <!-- List everything that should be done **before** release, any issues / setting changes / etc -->

 ### Checklist after release
- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/120/files))
+- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files))
 - [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel
 - [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true)
 - [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some)
--- a/.github/actions/allure-report/action.yml
+++ b/.github/actions/allure-report/action.yml
@@ -47,7 +47,7 @@ runs:
        else
          key=branch-$(echo ${GITHUB_REF#refs/heads/} | tr -c "[:alnum:]._-" "-")
        fi
-        echo "::set-output name=KEY::${key}"
+        echo "KEY=${key}" >> $GITHUB_OUTPUT

    - uses: actions/setup-java@v3
      if: ${{ inputs.action == 'generate' }}
@@ -186,7 +186,7 @@ runs:
        aws s3 cp --only-show-errors ./index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"

        echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
-        echo "::set-output name=report-url::${REPORT_URL}"
+        echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT

    - name: Release Allure lock
      if: ${{ inputs.action == 'generate' && always() }}
--- a/.github/actions/download/action.yml
+++ b/.github/actions/download/action.yml
@@ -12,6 +12,9 @@ inputs:
    description: "Allow to skip if file doesn't exist, fail otherwise"
    default: false
    required: false
+  prefix:
+    description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
+    required: false

 runs:
  using: "composite"
@@ -23,23 +26,23 @@ runs:
        TARGET: ${{ inputs.path }}
        ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
        SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
+        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
      run: |
        BUCKET=neon-github-public-dev
-        PREFIX=artifacts/${GITHUB_RUN_ID}
        FILENAME=$(basename $ARCHIVE)

-        S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
+        S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX%$GITHUB_RUN_ATTEMPT} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
        if [ -z "${S3_KEY}" ]; then
          if [ "${SKIP_IF_DOES_NOT_EXIST}" = "true" ]; then
-            echo '::set-output name=SKIPPED::true'
+            echo 'SKIPPED=true' >> $GITHUB_OUTPUT
            exit 0
          else
-            echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME} nor its version from previous attempts exist"
+            echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
            exit 1
          fi
        fi

-        echo '::set-output name=SKIPPED::false'
+        echo 'SKIPPED=false' >> $GITHUB_OUTPUT

        mkdir -p $(dirname $ARCHIVE)
        time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} ${ARCHIVE}
--- a/.github/actions/neon-project-create/action.yml
+++ b/.github/actions/neon-project-create/action.yml
@@ -41,8 +41,8 @@ runs:
            ;;
        esac

-        echo "::set-output name=api_host::${API_HOST}"
-        echo "::set-output name=region_id::${REGION_ID}"
+        echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
+        echo "region_id=${REGION_ID}" >> $GITHUB_OUTPUT
      env:
        ENVIRONMENT: ${{ inputs.environment }}
        REGION_ID: ${{ inputs.region_id }}
@@ -72,10 +72,10 @@ runs:

        dsn=$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .dsn')/main
        echo "::add-mask::${dsn}"
-        echo "::set-output name=dsn::${dsn}"
+        echo "dsn=${dsn}" >> $GITHUB_OUTPUT

        project_id=$(echo $project | jq --raw-output '.id')
-        echo "::set-output name=project_id::${project_id}"
+        echo "project_id=${project_id}" >> $GITHUB_OUTPUT
      env:
        API_KEY: ${{ inputs.api_key }}
        API_HOST: ${{ steps.parse-input.outputs.api_host }}
--- a/.github/actions/neon-project-delete/action.yml
+++ b/.github/actions/neon-project-delete/action.yml
@@ -32,7 +32,7 @@ runs:
            ;;
        esac

-        echo "::set-output name=api_host::${API_HOST}"
+        echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
      env:
        ENVIRONMENT: ${{ inputs.environment }}

--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -85,7 +85,8 @@ runs:
        # PLATFORM will be embedded in the perf test report
        # and it is needed to distinguish different environments
        export PLATFORM=${PLATFORM:-github-actions-selfhosted}
-        export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install/v14}
+        export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
+        export DEFAULT_PG_VERSION=${DEFAULT_PG_VERSION:-14}

        if [ "${BUILD_TYPE}" = "remote" ]; then
          export REMOTE_ENV=1
@@ -126,7 +127,7 @@ runs:

        # Wake up the cluster if we use remote neon instance
        if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then
-          ${POSTGRES_DISTRIB_DIR}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();"
+          ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();"
        fi

        # Run the tests.
--- a/.github/actions/upload/action.yml
+++ b/.github/actions/upload/action.yml
@@ -7,6 +7,9 @@ inputs:
  path:
    description: "A directory or file to upload"
    required: true
+  prefix:
+    description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
+    required: false

 runs:
  using: "composite"
@@ -42,14 +45,14 @@ runs:
      env:
        SOURCE: ${{ inputs.path }}
        ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
+        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
      run: |
        BUCKET=neon-github-public-dev
-        PREFIX=artifacts/${GITHUB_RUN_ID}
        FILENAME=$(basename $ARCHIVE)

        FILESIZE=$(du -sh ${ARCHIVE} | cut -f1)

-        time aws s3 mv --only-show-errors ${ARCHIVE} s3://${BUCKET}/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME}
+        time aws s3 mv --only-show-errors ${ARCHIVE} s3://${BUCKET}/${PREFIX}/${FILENAME}

        # Ref https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary
-        echo "[${FILENAME}](https://${BUCKET}.s3.amazonaws.com/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME}) ${FILESIZE}" >> ${GITHUB_STEP_SUMMARY}
+        echo "[${FILENAME}](https://${BUCKET}.s3.amazonaws.com/${PREFIX}/${FILENAME}) ${FILESIZE}" >> ${GITHUB_STEP_SUMMARY}
--- a/.github/ansible/.gitignore
+++ b/.github/ansible/.gitignore
@@ -2,3 +2,6 @@ zenith_install.tar.gz
 .zenith_current_version
 neon_install.tar.gz
 .neon_current_version
+
+collections/*
+!collections/.keep
--- a/.github/ansible/ansible.cfg
+++ b/.github/ansible/ansible.cfg
@@ -3,6 +3,7 @@
 localhost_warning = False
 host_key_checking = False
 timeout = 30
+collections_paths = ./collections

 [ssh_connection]
 ssh_args   = -F ./ansible.ssh.cfg
--- a/.github/ansible/collections/.keep
+++ b/.github/ansible/collections/.keep
--- a/.github/ansible/deploy.yaml
+++ b/.github/ansible/deploy.yaml
@@ -1,7 +1,7 @@
 - name: Upload Neon binaries
  hosts: storage
  gather_facts: False
-  remote_user: admin
+  remote_user: "{{ remote_user }}"

  tasks:

@@ -14,7 +14,8 @@
      - safekeeper

    - name: inform about versions
-      debug: msg="Version to deploy - {{ current_version }}"
+      debug:
+        msg: "Version to deploy - {{ current_version }}"
      tags:
      - pageserver
      - safekeeper
@@ -35,7 +36,7 @@
 - name: Deploy pageserver
  hosts: pageservers
  gather_facts: False
-  remote_user: admin
+  remote_user: "{{ remote_user }}"

  tasks:

@@ -58,23 +59,37 @@
        creates: "/storage/pageserver/data/tenants"
      environment:
        NEON_REPO_DIR: "/storage/pageserver/data"
-        LD_LIBRARY_PATH: "/usr/local/lib"
+        LD_LIBRARY_PATH: "/usr/local/v14/lib"
      become: true
      tags:
      - pageserver

-    # - name: update remote storage (s3) config
-    #   lineinfile:
-    #     path: /storage/pageserver/data/pageserver.toml
-    #     line: "{{ item }}"
-    #   loop:
-    #     - "[remote_storage]"
-    #     - "bucket_name = '{{ bucket_name }}'"
-    #     - "bucket_region = '{{ bucket_region }}'"
-    #     - "prefix_in_bucket = '{{ inventory_hostname }}'"
-    #   become: true
-    #   tags:
-    #   - pageserver
+    - name: read the existing remote pageserver config
+      ansible.builtin.slurp:
+        src: /storage/pageserver/data/pageserver.toml
+      register: _remote_ps_config
+      tags:
+      - pageserver
+
+    - name: parse the existing pageserver configuration
+      ansible.builtin.set_fact:
+        _existing_ps_config: "{{ _remote_ps_config['content'] | b64decode | sivel.toiletwater.from_toml }}"
+      tags:
+      - pageserver
+
+    - name: construct the final pageserver configuration dict
+      ansible.builtin.set_fact:
+        pageserver_config: "{{ pageserver_config_stub | combine({'id': _existing_ps_config.id }) }}"
+      tags:
+      - pageserver
+
+    - name: template the pageserver config
+      template:
+        src: templates/pageserver.toml.j2
+        dest: /storage/pageserver/data/pageserver.toml
+      become: true
+      tags:
+      - pageserver

    - name: upload systemd service definition
      ansible.builtin.template:
@@ -87,15 +102,15 @@
      tags:
      - pageserver

-    # - name: start systemd service
-    #   ansible.builtin.systemd:
-    #     daemon_reload: yes
-    #     name: pageserver
-    #     enabled: yes
-    #     state: restarted
-    #   become: true
-    #   tags:
-    #   - pageserver
+    - name: start systemd service
+      ansible.builtin.systemd:
+        daemon_reload: yes
+        name: pageserver
+        enabled: yes
+        state: restarted
+      become: true
+      tags:
+      - pageserver

    - name: post version to console
      when: console_mgmt_base_url is defined
@@ -109,7 +124,7 @@
 - name: Deploy safekeeper
  hosts: safekeepers
  gather_facts: False
-  remote_user: admin
+  remote_user: "{{ remote_user }}"

  tasks:

@@ -132,7 +147,7 @@
        creates: "/storage/safekeeper/data/safekeeper.id"
      environment:
        NEON_REPO_DIR: "/storage/safekeeper/data"
-        LD_LIBRARY_PATH: "/usr/local/lib"
+        LD_LIBRARY_PATH: "/usr/local/v14/lib"
      become: true
      tags:
      - safekeeper
--- a/.github/ansible/get_binaries.sh
+++ b/.github/ansible/get_binaries.sh
@@ -21,10 +21,15 @@ docker pull --quiet neondatabase/neon:${DOCKER_TAG}
 ID=$(docker create neondatabase/neon:${DOCKER_TAG})
 docker cp ${ID}:/data/postgres_install.tar.gz .
 tar -xzf postgres_install.tar.gz -C neon_install
+mkdir neon_install/bin/
 docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
+docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/
 docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
 docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
-docker cp ${ID}:/usr/local/bin/postgres neon_install/bin/
+docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/
+docker cp ${ID}:/usr/local/v15/bin/ neon_install/v15/bin/
+docker cp ${ID}:/usr/local/v14/lib/ neon_install/v14/lib/
+docker cp ${ID}:/usr/local/v15/lib/ neon_install/v15/lib/
 docker rm -vf ${ID}

 # store version to file (for ansible playbooks) and create binaries tarball
--- a/.github/ansible/neon-stress.hosts
+++ b/.github/ansible/neon-stress.hosts
@@ -1,20 +0,0 @@
-[pageservers]
-neon-stress-ps-1 console_region_id=1
-neon-stress-ps-2 console_region_id=1
-
-[safekeepers]
-neon-stress-sk-1 console_region_id=1
-neon-stress-sk-2 console_region_id=1
-neon-stress-sk-3 console_region_id=1
-
-[storage:children]
-pageservers
-safekeepers
-
-[storage:vars]
-env_name = neon-stress
-console_mgmt_base_url = http://neon-stress-console.local
-bucket_name           = neon-storage-ireland
-bucket_region         = eu-west-1
-etcd_endpoints        = etcd-stress.local:2379
-safekeeper_enable_s3_offload = false
--- a/.github/ansible/neon-stress.hosts.yaml
+++ b/.github/ansible/neon-stress.hosts.yaml
@@ -0,0 +1,31 @@
+storage:
+  vars:
+    bucket_name: neon-storage-ireland
+    bucket_region: eu-west-1
+    console_mgmt_base_url: http://neon-stress-console.local
+    env_name: neon-stress
+    etcd_endpoints: neon-stress-etcd.local:2379
+    safekeeper_enable_s3_offload: 'false'
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "{{ inventory_hostname }}"
+    hostname_suffix: ".local"
+    remote_user: admin
+  children:
+    pageservers:
+      hosts:
+        neon-stress-ps-1:
+          console_region_id: aws-eu-west-1
+        neon-stress-ps-2:
+          console_region_id: aws-eu-west-1
+    safekeepers:
+      hosts:
+        neon-stress-sk-1:
+          console_region_id: aws-eu-west-1
+        neon-stress-sk-2:
+          console_region_id: aws-eu-west-1
+        neon-stress-sk-3:
+          console_region_id: aws-eu-west-1
--- a/.github/ansible/production.hosts
+++ b/.github/ansible/production.hosts
@@ -1,20 +0,0 @@
-[pageservers]
-#zenith-1-ps-1 console_region_id=1
-zenith-1-ps-2 console_region_id=1
-zenith-1-ps-3 console_region_id=1
-
-[safekeepers]
-zenith-1-sk-1 console_region_id=1
-zenith-1-sk-2 console_region_id=1
-zenith-1-sk-3 console_region_id=1
-
-[storage:children]
-pageservers
-safekeepers
-
-[storage:vars]
-env_name = prod-1
-console_mgmt_base_url = http://console-release.local
-bucket_name           = zenith-storage-oregon
-bucket_region         = us-west-2
-etcd_endpoints        = zenith-1-etcd.local:2379
--- a/.github/ansible/production.hosts.yaml
+++ b/.github/ansible/production.hosts.yaml
@@ -0,0 +1,33 @@
+---
+storage:
+  vars:
+    env_name: prod-1
+    console_mgmt_base_url: http://console-release.local
+    bucket_name: zenith-storage-oregon
+    bucket_region: us-west-2
+    etcd_endpoints: zenith-1-etcd.local:2379
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "{{ inventory_hostname }}"
+    hostname_suffix: ".local"
+    remote_user: admin
+
+  children:
+    pageservers:
+      hosts:
+        zenith-1-ps-2:
+          console_region_id: aws-us-west-2
+        zenith-1-ps-3:
+          console_region_id: aws-us-west-2
+
+    safekeepers:
+      hosts:
+        zenith-1-sk-1:
+          console_region_id: aws-us-west-2
+        zenith-1-sk-2:
+          console_region_id: aws-us-west-2
+        zenith-1-sk-3:
+          console_region_id: aws-us-west-2
--- a/.github/ansible/scripts/init_pageserver.sh
+++ b/.github/ansible/scripts/init_pageserver.sh
@@ -12,18 +12,19 @@ cat <<EOF | tee /tmp/payload
  "version": 1,
  "host": "${HOST}",
  "port": 6400,
-  "region_id": {{ console_region_id }},
+  "region_id": "{{ console_region_id }}",
  "instance_id": "${INSTANCE_ID}",
  "http_host": "${HOST}",
-  "http_port": 9898
+  "http_port": 9898,
+  "active": false
 }
 EOF

 # check if pageserver already registered or not
-if ! curl -sf -X PATCH -d '{}' {{ console_mgmt_base_url }}/api/v1/pageservers/${INSTANCE_ID} -o /dev/null; then
+if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then

    # not registered, so register it now
-    ID=$(curl -sf -X POST {{ console_mgmt_base_url }}/api/v1/pageservers -d@/tmp/payload | jq -r '.ID')
+    ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id')

    # init pageserver
    sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data
--- a/.github/ansible/scripts/init_safekeeper.sh
+++ b/.github/ansible/scripts/init_safekeeper.sh
@@ -14,18 +14,18 @@ cat <<EOF | tee /tmp/payload
  "host": "${HOST}",
  "port": 6500,
  "http_port": 7676,
-  "region_id": {{ console_region_id }},
+  "region_id": "{{ console_region_id }}",
  "instance_id": "${INSTANCE_ID}",
-  "availability_zone_id": "${AZ_ID}"
+  "availability_zone_id": "${AZ_ID}",
+  "active": false
 }
 EOF

 # check if safekeeper already registered or not
-if ! curl -sf -X PATCH -d '{}' {{ console_mgmt_base_url }}/api/v1/safekeepers/${INSTANCE_ID} -o /dev/null; then
+if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then

    # not registered, so register it now
-    ID=$(curl -sf -X POST {{ console_mgmt_base_url }}/api/v1/safekeepers -d@/tmp/payload | jq -r '.ID')
-
+    ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id')
    # init safekeeper
    sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
 fi
--- a/.github/ansible/ssm_config
+++ b/.github/ansible/ssm_config
@@ -0,0 +1,3 @@
+ansible_connection: aws_ssm
+ansible_aws_ssm_bucket_name: neon-dev-bucket
+ansible_python_interpreter: /usr/bin/python3
--- a/.github/ansible/staging.hosts
+++ b/.github/ansible/staging.hosts
@@ -1,21 +0,0 @@
-[pageservers]
-#zenith-us-stage-ps-1 console_region_id=27
-zenith-us-stage-ps-2 console_region_id=27
-zenith-us-stage-ps-3 console_region_id=27
-zenith-us-stage-ps-4 console_region_id=27
-
-[safekeepers]
-zenith-us-stage-sk-4 console_region_id=27
-zenith-us-stage-sk-5 console_region_id=27
-zenith-us-stage-sk-6 console_region_id=27
-
-[storage:children]
-pageservers
-safekeepers
-
-[storage:vars]
-env_name = us-stage
-console_mgmt_base_url = http://console-staging.local
-bucket_name           = zenith-staging-storage-us-east-1
-bucket_region         = us-east-1
-etcd_endpoints        = zenith-us-stage-etcd.local:2379
--- a/.github/ansible/staging.hosts.yaml
+++ b/.github/ansible/staging.hosts.yaml
@@ -0,0 +1,34 @@
+storage:
+  vars:
+    bucket_name: zenith-staging-storage-us-east-1
+    bucket_region: us-east-1
+    console_mgmt_base_url: http://console-staging.local
+    env_name: us-stage
+    etcd_endpoints: zenith-us-stage-etcd.local:2379
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "{{ inventory_hostname }}"
+    hostname_suffix: ".local"
+    remote_user: admin
+
+  children:
+    pageservers:
+      hosts:
+        zenith-us-stage-ps-2:
+          console_region_id: aws-us-east-1
+        zenith-us-stage-ps-3:
+          console_region_id: aws-us-east-1
+        zenith-us-stage-ps-4:
+          console_region_id: aws-us-east-1
+
+    safekeepers:
+      hosts:
+        zenith-us-stage-sk-4:
+          console_region_id: aws-us-east-1
+        zenith-us-stage-sk-5:
+          console_region_id: aws-us-east-1
+        zenith-us-stage-sk-6:
+          console_region_id: aws-us-east-1
--- a/.github/ansible/staging.us-east-2.hosts.yaml
+++ b/.github/ansible/staging.us-east-2.hosts.yaml
@@ -0,0 +1,32 @@
+storage:
+  vars:
+    bucket_name: neon-staging-storage-us-east-2
+    bucket_region: us-east-2
+    console_mgmt_base_url: http://console-staging.local
+    env_name: us-stage
+    etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: us-east-2
+    console_region_id: aws-us-east-2
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.us-east-2.aws.neon.build:
+          ansible_host: i-0c3e70929edb5d691
+
+    safekeepers:
+      hosts:
+        safekeeper-0.us-east-2.aws.neon.build:
+          ansible_host: i-027662bd552bf5db0
+        safekeeper-1.us-east-2.aws.neon.build:
+          ansible_host: i-0171efc3604a7b907
+        safekeeper-2.us-east-2.aws.neon.build:
+          ansible_host: i-0de0b03a51676a6ce
--- a/.github/ansible/systemd/pageserver.service
+++ b/.github/ansible/systemd/pageserver.service
@@ -1,11 +1,11 @@
 [Unit]
-Description=Zenith pageserver
+Description=Neon pageserver
 After=network.target auditd.service

 [Service]
 Type=simple
 User=pageserver
-Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib
 ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
--- a/.github/ansible/systemd/safekeeper.service
+++ b/.github/ansible/systemd/safekeeper.service
@@ -1,12 +1,12 @@
 [Unit]
-Description=Zenith safekeeper
+Description=Neon safekeeper
 After=network.target auditd.service

 [Service]
 Type=simple
 User=safekeeper
-Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
-ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib
+ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
 KillSignal=SIGINT
--- a/.github/ansible/templates/pageserver.toml.j2
+++ b/.github/ansible/templates/pageserver.toml.j2
@@ -0,0 +1 @@
+{{ pageserver_config | sivel.toiletwater.to_toml }}
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
@@ -0,0 +1,31 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://console-staging.local/management/api/v2"
+  domain: "*.us-east-2.aws.neon.build"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  zenith_service: proxy-scram
+  zenith_env: dev
+  zenith_region: us-east-2
+  zenith_region_slug: us-east-2
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -46,7 +46,8 @@ jobs:
    runs-on: [self-hosted, zenith-benchmarker]

    env:
-      POSTGRES_DISTRIB_DIR: "/usr/pgsql-14"
+      POSTGRES_DISTRIB_DIR: /usr/pgsql
+      DEFAULT_PG_VERSION: 14

    steps:
    - name: Checkout zenith repo
@@ -71,7 +72,7 @@ jobs:
        echo Poetry
        poetry --version
        echo Pgbench
-        $POSTGRES_DISTRIB_DIR/bin/pgbench --version
+        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version

    - name: Create Neon Project
      id: create-neon-project
@@ -137,21 +138,31 @@ jobs:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

  pgbench-compare:
-    env:
-      TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
-      TEST_PG_BENCH_SCALES_MATRIX: "10gb"
-      POSTGRES_DISTRIB_DIR: /usr
-      TEST_OUTPUT: /tmp/test_output
-      BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
-
    strategy:
      fail-fast: false
      matrix:
        # neon-captest-new: Run pgbench in a freshly created project
        # neon-captest-reuse: Same, but reusing existing project
        # neon-captest-prefetch: Same, with prefetching enabled (new project)
-        platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch, rds-aurora ]
+        platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch ]
+        db_size: [ 10gb ]
+        include:
+          - platform: neon-captest-new
+            db_size: 50gb
+          - platform: neon-captest-prefetch
+            db_size: 50gb
+          - platform: rds-aurora
+            db_size: 50gb
+
+    env:
+      TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
+      TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
+      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      DEFAULT_PG_VERSION: 14
+      TEST_OUTPUT: /tmp/test_output
+      BUILD_TYPE: remote
+      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
+      PLATFORM: ${{ matrix.platform }}

    runs-on: dev
    container:
@@ -163,13 +174,20 @@ jobs:
    steps:
    - uses: actions/checkout@v3

-    - name: Install Deps
+    - name: Download Neon artifact
+      uses: ./.github/actions/download
+      with:
+        name: neon-${{ runner.os }}-release-artifact
+        path: /tmp/neon/
+        prefix: latest
+
+    - name: Add Postgres binaries to PATH
      run: |
-        sudo apt -y update
-        sudo apt install -y postgresql-14
+        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
+        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH

    - name: Create Neon Project
-      if: matrix.platform != 'neon-captest-reuse'
+      if: contains(fromJson('["neon-captest-new", "neon-captest-prefetch"]'), matrix.platform)
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
@@ -195,11 +213,9 @@ jobs:
            ;;
        esac

-        echo "::set-output name=connstr::${CONNSTR}"
+        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT

        psql ${CONNSTR} -c "SELECT version();"
-      env:
-        PLATFORM: ${{ matrix.platform }}

    - name: Set database options
      if: matrix.platform == 'neon-captest-prefetch'
@@ -218,7 +234,6 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
      env:
-        PLATFORM: ${{ matrix.platform }}
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -232,7 +247,6 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
      env:
-        PLATFORM: ${{ matrix.platform }}
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -246,7 +260,6 @@ jobs:
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
      env:
-        PLATFORM: ${{ matrix.platform }}
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -259,7 +272,7 @@ jobs:
        build_type: ${{ env.BUILD_TYPE }}

    - name: Delete Neon Project
-      if: ${{ matrix.platform != 'neon-captest-reuse' && always() }}
+      if: ${{ steps.create-neon-project.outputs.project_id && always() }}
      uses: ./.github/actions/neon-project-delete
      with:
        environment: dev
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -35,12 +35,12 @@ jobs:
          echo ref:$GITHUB_REF_NAME
          echo rev:$(git rev-list --count HEAD)
          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "::set-output name=tag::$(git rev-list --count HEAD)"
+            echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
+            echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
          else
            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            echo "::set-output name=tag::$GITHUB_RUN_ID"
+            echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
          fi
        shell: bash
        id: build-tag
@@ -78,12 +78,12 @@ jobs:

      - name: Set pg 14 revision for caching
        id: pg_v14_rev
-        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v14)
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
        shell: bash -euxo pipefail {0}

      - name: Set pg 15 revision for caching
        id: pg_v15_rev
-        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v15)
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
        shell: bash -euxo pipefail {0}

      # Set some environment variables used by all the steps.
@@ -127,8 +127,8 @@ jobs:
            target/
          # Fall back to older versions of the key, if no cache for current Cargo.lock was found
          key: |
-            v8-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
-            v8-${{ runner.os }}-${{ matrix.build_type }}-cargo-
+            v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
+            v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-

      - name: Cache postgres v14 build
        id: cache_pg_14
@@ -268,6 +268,32 @@ jobs:
        if: matrix.build_type == 'debug'
        uses: ./.github/actions/save-coverage-data

+  upload-latest-artifacts:
+    runs-on: dev
+    container:
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
+      options: --init
+    needs: [ regress-tests ]
+    if: github.ref_name == 'main'
+    steps:
+      - name: Copy Neon artifact to the latest directory
+        shell: bash -euxo pipefail {0}
+        env:
+          BUCKET: neon-github-public-dev
+          PREFIX: artifacts/${{ github.run_id }}
+        run: |
+          for build_type in debug release; do
+            FILENAME=neon-${{ runner.os }}-${build_type}-artifact.tar.zst
+
+            S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
+            if [ -z "${S3_KEY}" ]; then
+              echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
+              exit 1
+            fi
+
+            time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} s3://${BUCKET}/artifacts/latest/${FILENAME}
+          done
+
  benchmarks:
    runs-on: dev
    container:
@@ -335,9 +361,6 @@ jobs:
          curl --fail --output suites.json ${REPORT_URL%/index.html}/data/suites.json
          ./scripts/pysync

-          # Workaround for https://github.com/neondatabase/cloud/issues/2188
-          psql "$TEST_RESULT_CONNSTR" -c "SELECT 1;" || sleep 10
-
          DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json

  coverage-report:
@@ -366,7 +389,7 @@ jobs:
            !~/.cargo/registry/src
            ~/.cargo/git/
            target/
-          key: v8-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}
+          key: v9-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ hashFiles('Cargo.lock') }}

      - name: Get Neon artifact
        uses: ./.github/actions/download
@@ -458,6 +481,7 @@ jobs:

  neon-image:
    runs-on: dev
+    needs: [ tag ]
    container: gcr.io/kaniko-project/executor:v1.9.0-debug

    steps:
@@ -471,10 +495,11 @@ jobs:
        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json

      - name: Kaniko build neon
-        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID
+        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}

  compute-tools-image:
    runs-on: dev
+    needs: [ tag ]
    container: gcr.io/kaniko-project/executor:v1.9.0-debug

    steps:
@@ -485,11 +510,12 @@ jobs:
        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json

      - name: Kaniko build compute tools
-        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID
+        run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}

  compute-node-image:
    runs-on: dev
    container: gcr.io/kaniko-project/executor:v1.9.0-debug
+    needs: [ tag ]
    steps:
      - name: Checkout
        uses: actions/checkout@v1 # v3 won't work with kaniko
@@ -504,11 +530,12 @@ jobs:
        # cloud repo depends on this image name, thus duplicating it
        # remove compute-node when cloud repo is updated
      - name: Kaniko build compute node with extensions v14 (compatibility)
-        run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID
+        run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}}

  compute-node-image-v14:
    runs-on: dev
    container: gcr.io/kaniko-project/executor:v1.9.0-debug
+    needs: [ tag ]
    steps:
      - name: Checkout
        uses: actions/checkout@v1 # v3 won't work with kaniko
@@ -520,12 +547,13 @@ jobs:
        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json

      - name: Kaniko build compute node with extensions v14
-        run: /kaniko/executor --skip-unused-stages  --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache  --context . --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:$GITHUB_RUN_ID
+        run: /kaniko/executor --skip-unused-stages  --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache  --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v14 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}}


  compute-node-image-v15:
    runs-on: dev
    container: gcr.io/kaniko-project/executor:v1.9.0-debug
+    needs: [ tag ]
    steps:
      - name: Checkout
        uses: actions/checkout@v1 # v3 won't work with kaniko
@@ -537,11 +565,11 @@ jobs:
        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json

      - name: Kaniko build compute node with extensions v15
-        run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:$GITHUB_RUN_ID
+        run: /kaniko/executor --skip-unused-stages --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-node-v15 --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}}

  promote-images:
    runs-on: dev
-    needs: [ neon-image, compute-node-image, compute-node-image-v14, compute-tools-image ]
+    needs: [ tag, neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
    if: github.event_name != 'workflow_dispatch'
    container: amazon/aws-cli
    strategy:
@@ -550,12 +578,13 @@ jobs:
        # compute-node uses postgres 14, which is default now
        # cloud repo depends on this image name, thus duplicating it
        # remove compute-node when cloud repo is updated
-        name: [ neon, compute-node, compute-node-v14, compute-tools ]
+        name: [ neon, compute-node, compute-node-v14, compute-node-v15, compute-tools ]

    steps:
      - name: Promote image to latest
-        run:
-          MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
+        run: |
+          export MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=${{needs.tag.outputs.build-tag}} --query 'images[].imageManifest' --output text)
+          aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"

  push-docker-hub:
    runs-on: dev
@@ -574,21 +603,35 @@ jobs:
          echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json

      - name: Pull neon image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:latest neon
+        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} neon

      - name: Pull compute tools image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest compute-tools
+        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} compute-tools

      - name: Pull compute node image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest compute-node
+        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} compute-node

      - name: Pull compute node v14 image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest compute-node-v14
+        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} compute-node-v14
+
+      - name: Pull compute node v15 image from ECR
+        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} compute-node-v15

      - name: Pull rust image from ECR
        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust

-      - name: Configure docker login
+      - name: Push images to production ECR
+        if: |
+          (github.ref_name == 'main' || github.ref_name == 'release') &&
+          github.event_name != 'workflow_dispatch'
+        run: |
+          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/neon:latest
+          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-tools:latest
+          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node:latest
+          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v14:latest
+          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v15:latest
+
+      - name: Configure Docker Hub login
        run: |
          # ECR Credential Helper & Docker Hub don't work together in config, hence reset
          echo "" > /github/home/.docker/config.json
@@ -606,10 +649,13 @@ jobs:
      - name: Push compute node v14 image to Docker Hub
        run: crane push compute-node-v14 neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}}

+      - name: Push compute node v15 image to Docker Hub
+        run: crane push compute-node-v15 neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}}
+
      - name: Push rust image to Docker Hub
        run: crane push rust neondatabase/rust:pinned

-      - name: Add latest tag to images
+      - name: Add latest tag to images in Docker Hub
        if: |
          (github.ref_name == 'main' || github.ref_name == 'release') &&
          github.event_name != 'workflow_dispatch'
@@ -618,6 +664,7 @@ jobs:
          crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
          crane tag neondatabase/compute-node:${{needs.tag.outputs.build-tag}} latest
          crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
+          crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest

  calculate-deploy-targets:
    runs-on: [ self-hosted, Linux, k8s-runner ]
@@ -630,12 +677,12 @@ jobs:
      - id: set-matrix
        run: |
          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA"}'
-            NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA"}'
-            echo "::set-output name=include::[$STAGING, $NEON_STRESS]"
+            STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
+            NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY"}'
+            echo "include=[$STAGING, $NEON_STRESS]" >> $GITHUB_OUTPUT
          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA"}'
-            echo "::set-output name=include::[$PRODUCTION]"
+            PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
+            echo "include=[$PRODUCTION]" >> $GITHUB_OUTPUT
          else
            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
            exit 1
@@ -671,7 +718,7 @@ jobs:
      - name: Setup ansible
        run: |
          export PATH="/root/.local/bin:$PATH"
-          pip install --progress-bar off --user ansible boto3
+          pip install --progress-bar off --user ansible boto3 toml

      - name: Redeploy
        run: |
@@ -693,8 +740,48 @@ jobs:
          chmod 0600 ssh-key
          ssh-add ssh-key
          rm -f ssh-key ssh-key-cert.pub
+          ansible-galaxy collection install sivel.toiletwater
+          ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }}
+          rm -f neon_install.tar.gz .neon_current_version

-          ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts
+  deploy-new:
+    runs-on: dev
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
+    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
+    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
+    if: |
+      (github.ref_name == 'main') &&
+      github.event_name != 'workflow_dispatch'
+    defaults:
+      run:
+        shell: bash
+    env:
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+
+      - name: Redeploy
+        run: |
+          export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
+          cd "$(pwd)/.github/ansible"
+
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            ./get_binaries.sh
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            RELEASE=true ./get_binaries.sh
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
+            exit 1
+          fi
+
+          ansible-galaxy collection install sivel.toiletwater
+          ansible-playbook deploy.yaml -i staging.us-east-2.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
          rm -f neon_install.tar.gz .neon_current_version

  deploy-proxy:
@@ -736,5 +823,33 @@ jobs:
      - name: Re-deploy proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
-          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
+          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
+          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
+
+  deploy-proxy-new:
+    runs-on: dev
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
+    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
+    if: |
+      (github.ref_name == 'main' || github.ref_name == 'release') &&
+      github.event_name != 'workflow_dispatch'
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+
+      - name: Configure environment 
+        run: |
+          helm repo add neondatabase https://neondatabase.github.io/helm-charts
+          aws --region us-east-2 eks update-kubeconfig --name dev-us-east-2-beta --role-arn arn:aws:iam::369495373322:role/github-runner
+
+      - name: Re-deploy proxy
+        run: |
+          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
+          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -36,7 +36,7 @@ jobs:

    steps:
      - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
        with:
          submodules: true
          fetch-depth: 2
@@ -56,12 +56,12 @@ jobs:

      - name: Set pg 14 revision for caching
        id: pg_v14_rev
-        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v14)
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
        shell: bash -euxo pipefail {0}

      - name: Set pg 15 revision for caching
        id: pg_v15_rev
-        run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres-v15)
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
        shell: bash -euxo pipefail {0}

      - name: Cache postgres v14 build
@@ -106,7 +106,7 @@ jobs:
            !~/.cargo/registry/src
            ~/.cargo/git
            target
-          key: v4-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust
+          key: v5-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust

      - name: Run cargo clippy
        run: ./run_clippy.sh
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -58,12 +58,12 @@ jobs:
      env:
        REMOTE_ENV: 1
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
-        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install/v14
+        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
      shell: bash -euxo pipefail {0}
      run: |
        # Test framework expects we have psql binary;
        # but since we don't really need it in this test, let's mock it
-        mkdir -p "$POSTGRES_DISTRIB_DIR/bin" && touch "$POSTGRES_DISTRIB_DIR/bin/psql";
+        mkdir -p "$POSTGRES_DISTRIB_DIR/v14/bin" && touch "$POSTGRES_DISTRIB_DIR/v14/bin/psql";
        ./scripts/pytest \
          --junitxml=$TEST_OUTPUT/junit.xml \
          --tb=short \
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,3 +1,14 @@
+# 'named-profiles' feature was stabilized in cargo 1.57. This line makes the
+# build work with older cargo versions.
+#
+# We have this because as of this writing, the latest cargo Debian package
+# that's available is 1.56. (Confusingly, the Debian package version number
+# is 0.57, whereas 'cargo --version' says 1.56.)
+#
+# See https://tracker.debian.org/pkg/cargo for the current status of the
+# package. When that gets updated, we can remove this.
+cargo-features = ["named-profiles"]
+
 [workspace]
 members = [
    "compute_tools",
--- a/21
+++ b/21
@@ -19,9 +19,8 @@ COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
 ENV BUILD_TYPE release
 RUN set -e \
    && mold -run make -j $(nproc) -s neon-pg-ext \
-    && rm -rf pg_install/v14/build \
-    && rm -rf pg_install/v15/build \
-    && tar -C pg_install/v14 -czf /home/nonroot/postgres_install.tar.gz .
+    && rm -rf pg_install/build \
+    && tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .

 # Build neon binaries
 FROM $REPOSITORY/$IMAGE:$TAG AS build
@@ -45,7 +44,7 @@ COPY . .
 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
-&& mold -run cargo build --bin pageserver --bin safekeeper --bin proxy --locked --release \
+&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin safekeeper --bin proxy --locked --release \
    && cachepot -s

 # Build final image
@@ -64,12 +63,13 @@ RUN set -e \
    && useradd -d /data neon \
    && chown -R neon:neon /data

-COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy      /usr/local/bin
+COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver          /usr/local/bin
+COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
+COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper          /usr/local/bin
+COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy               /usr/local/bin

-# v14 is default for now
-COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/
+COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
+COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
 COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/

 # By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
@@ -78,7 +78,7 @@ RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
    && /usr/local/bin/pageserver -D /data/.neon/ --init \
       -c "id=1234" \
       -c "broker_endpoints=['http://etcd:2379']" \
-       -c "pg_distrib_dir='/usr/local'" \
+       -c "pg_distrib_dir='/usr/local/'" \
       -c "listen_pg_addr='0.0.0.0:6400'" \
       -c "listen_http_addr='0.0.0.0:9898'"

@@ -86,4 +86,3 @@ VOLUME ["/data"]
 USER neon
 EXPOSE 6400
 EXPOSE 9898
-CMD ["/bin/bash"]
--- a/Dockerfile.compute-node-v14
+++ b/Dockerfile.compute-node-v14
@@ -8,9 +8,12 @@ ARG TAG=pinned
 # Layer "build-deps"
 #
 FROM debian:bullseye-slim AS build-deps
+RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
+    echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
+    apt update
 RUN apt update &&  \
    apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
-    libcurl4-openssl-dev libossp-uuid-dev
+    libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libglib2.0-dev

 #
 # Layer "pg-build"
@@ -37,7 +40,7 @@ RUN cd postgres && \
 FROM build-deps AS postgis-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN apt update && \
-    apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc wget
+    apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc

 RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \
    tar xvzf postgis-3.3.0.tar.gz && \
@@ -59,32 +62,65 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \
 # Build plv8
 #
 FROM build-deps AS plv8-build
-COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN apt update && \
-    apt install -y git curl wget make ninja-build build-essential libncurses5 python3-dev pkg-config libc++-dev libc++abi-dev libglib2.0-dev
+    apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5

 # https://github.com/plv8/plv8/issues/475
 # Debian bullseye provides binutils 2.35 when >= 2.38 is necessary
-RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
-    echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
-    apt update && \
+RUN apt update && \
    apt install -y --no-install-recommends -t testing binutils

+# Sed is used to patch for https://github.com/plv8/plv8/issues/503
 RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
    tar xvzf v3.1.4.tar.gz && \
    cd plv8-3.1.4 && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
+    sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    rm -rf /plv8-* && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control

+#
+# Layer "h3-pg-build"
+# Build h3_pg
+#
+FROM build-deps AS h3-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+# packaged cmake is too old
+RUN apt update && \
+    apt install -y --no-install-recommends -t testing cmake
+
+RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
+    tar xvzf h3.tgz  && \
+    cd h3-4.0.1 && \
+    mkdir build && \
+    cd build && \
+    cmake .. -DCMAKE_BUILD_TYPE=Release && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    DESTDIR=/h3 make install && \
+    cp -R /h3/usr / && \
+    rm -rf build
+
+RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3-pg.tgz && \
+    tar xvzf h3-pg.tgz && \
+    cd h3-pg-4.0.1 && \
+    export PATH="/usr/local/pgsql/bin:$PATH" && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control
+
 #
 # Layer "neon-pg-ext-build"
 # compile neon extensions
 #
 FROM build-deps AS neon-pg-ext-build
 COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=h3-pg-build /h3/usr /
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -132,8 +168,6 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
    chmod 0750 /var/db/postgres/compute && \
    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig

-# TODO: Check if we can make the extension setup more modular versus a linear build
-# currently plv8-build copies the output /usr/local/pgsql from postgis-build, etc#
 COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
 COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl

--- a/Dockerfile.compute-node-v15
+++ b/Dockerfile.compute-node-v15
@@ -5,7 +5,7 @@

 ARG TAG=pinned
 # apparently, ARGs don't get replaced in RUN commands in kaniko
-# ARG POSTGIS_VERSION=3.3.0
+# ARG POSTGIS_VERSION=3.3.1
 # ARG PLV8_VERSION=3.1.4
 # ARG PG_VERSION=v15

@@ -13,9 +13,12 @@ ARG TAG=pinned
 # Layer "build-deps"
 #
 FROM debian:bullseye-slim AS build-deps
+RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
+    echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
+    apt update
 RUN apt update &&  \
    apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
-    libcurl4-openssl-dev libossp-uuid-dev
+    libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libglib2.0-dev

 #
 # Layer "pg-build"
@@ -42,11 +45,11 @@ RUN cd postgres && \
 FROM build-deps AS postgis-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN apt update && \
-    apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc wget
+    apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc

-RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \
-    tar xvzf postgis-3.3.0.tar.gz && \
-    cd postgis-3.3.0 && \
+RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
+    tar xvzf postgis-3.3.1.tar.gz && \
+    cd postgis-3.3.1 && \
    ./autogen.sh && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
    ./configure && \
@@ -64,32 +67,65 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \
 # Build plv8
 #
 FROM build-deps AS plv8-build
-COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN apt update && \
-    apt install -y git curl wget make ninja-build build-essential libncurses5 python3-dev pkg-config libc++-dev libc++abi-dev libglib2.0-dev
+    apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5

 # https://github.com/plv8/plv8/issues/475
 # Debian bullseye provides binutils 2.35 when >= 2.38 is necessary
-RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
-    echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
-    apt update && \
+RUN apt update && \
    apt install -y --no-install-recommends -t testing binutils

+# Sed is used to patch for https://github.com/plv8/plv8/issues/503
 RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
    tar xvzf v3.1.4.tar.gz && \
    cd plv8-3.1.4 && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
+    sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    rm -rf /plv8-* && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control

+#
+# Layer "h3-pg-build"
+# Build h3_pg
+#
+FROM build-deps AS h3-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+# packaged cmake is too old
+RUN apt update && \
+    apt install -y --no-install-recommends -t testing cmake
+
+RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
+    tar xvzf h3.tgz  && \
+    cd h3-4.0.1 && \
+    mkdir build && \
+    cd build && \
+    cmake .. -DCMAKE_BUILD_TYPE=Release && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    DESTDIR=/h3 make install && \
+    cp -R /h3/usr / && \
+    rm -rf build
+
+RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3-pg.tgz && \
+    tar xvzf h3-pg.tgz && \
+    cd h3-pg-4.0.1 && \
+    export PATH="/usr/local/pgsql/bin:$PATH" && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control
+
 #
 # Layer "neon-pg-ext-build"
 # compile neon extensions
 #
 FROM build-deps AS neon-pg-ext-build
 COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=h3-pg-build /h3/usr /
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -137,8 +173,6 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
    chmod 0750 /var/db/postgres/compute && \
    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig

-# TODO: Check if we can make the extension setup more modular versus a linear build
-# currently plv8-build copies the output /usr/local/pgsql from postgis-build, etc#
 COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
 COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl

--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -6,10 +6,12 @@ edition = "2021"
 [dependencies]
 anyhow = "1.0"
 chrono = "0.4"
-clap = "3.0"
+clap = "4.0"
 env_logger = "0.9"
+futures = "0.3.13"
 hyper = { version = "0.14", features = ["full"] }
 log = { version = "0.4", features = ["std", "serde"] }
+notify = "5.0.0"
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 regex = "1"
 serde = { version = "1.0", features = ["derive"] }
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -51,53 +51,19 @@ fn main() -> Result<()> {
    // TODO: re-use `utils::logging` later
    init_logger(DEFAULT_LOG_LEVEL)?;

-    // Env variable is set by `cargo`
-    let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
-    let matches = clap::App::new("compute_ctl")
-        .version(version.unwrap_or("unknown"))
-        .arg(
-            Arg::new("connstr")
-                .short('C')
-                .long("connstr")
-                .value_name("DATABASE_URL")
-                .required(true),
-        )
-        .arg(
-            Arg::new("pgdata")
-                .short('D')
-                .long("pgdata")
-                .value_name("DATADIR")
-                .required(true),
-        )
-        .arg(
-            Arg::new("pgbin")
-                .short('b')
-                .long("pgbin")
-                .value_name("POSTGRES_PATH"),
-        )
-        .arg(
-            Arg::new("spec")
-                .short('s')
-                .long("spec")
-                .value_name("SPEC_JSON"),
-        )
-        .arg(
-            Arg::new("spec-path")
-                .short('S')
-                .long("spec-path")
-                .value_name("SPEC_PATH"),
-        )
-        .get_matches();
+    let matches = cli().get_matches();

-    let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
+    let pgdata = matches
+        .get_one::<String>("pgdata")
+        .expect("PGDATA path is required");
    let connstr = matches
-        .value_of("connstr")
+        .get_one::<String>("connstr")
        .expect("Postgres connection string is required");
-    let spec = matches.value_of("spec");
-    let spec_path = matches.value_of("spec-path");
+    let spec = matches.get_one::<String>("spec");
+    let spec_path = matches.get_one::<String>("spec-path");

    // Try to use just 'postgres' if no path is provided
-    let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
+    let pgbin = matches.get_one::<String>("pgbin").unwrap();

    let spec: ComputeSpec = match spec {
        // First, try to get cluster spec from the cli argument
@@ -173,3 +139,48 @@ fn main() -> Result<()> {
        }
    }
 }
+
+fn cli() -> clap::Command {
+    // Env variable is set by `cargo`
+    let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
+    clap::Command::new("compute_ctl")
+        .version(version)
+        .arg(
+            Arg::new("connstr")
+                .short('C')
+                .long("connstr")
+                .value_name("DATABASE_URL")
+                .required(true),
+        )
+        .arg(
+            Arg::new("pgdata")
+                .short('D')
+                .long("pgdata")
+                .value_name("DATADIR")
+                .required(true),
+        )
+        .arg(
+            Arg::new("pgbin")
+                .short('b')
+                .long("pgbin")
+                .default_value("postgres")
+                .value_name("POSTGRES_PATH"),
+        )
+        .arg(
+            Arg::new("spec")
+                .short('s')
+                .long("spec")
+                .value_name("SPEC_JSON"),
+        )
+        .arg(
+            Arg::new("spec-path")
+                .short('S')
+                .long("spec-path")
+                .value_name("SPEC_PATH"),
+        )
+}
+
+#[test]
+fn verify_cli() {
+    cli().debug_assert()
+}
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -178,7 +178,6 @@ impl ComputeNode {
            .args(&["--sync-safekeepers"])
            .env("PGDATA", &self.pgdata) // we cannot use -D in this mode
            .stdout(Stdio::piped())
-            .stderr(Stdio::piped())
            .spawn()
            .expect("postgres --sync-safekeepers failed to start");

@@ -191,10 +190,10 @@ impl ComputeNode {

        if !sync_output.status.success() {
            anyhow::bail!(
-                "postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}, stderr: {}",
+                "postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}",
                sync_output.status,
-                String::from_utf8(sync_output.stdout).expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
-                String::from_utf8(sync_output.stderr).expect("postgres --sync-safekeepers exited, and stderr is not utf-8"),
+                String::from_utf8(sync_output.stdout)
+                    .expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
            );
        }

@@ -258,14 +257,7 @@ impl ComputeNode {
            .spawn()
            .expect("cannot start postgres process");

-        // Try default Postgres port if it is not provided
-        let port = self
-            .spec
-            .cluster
-            .settings
-            .find("port")
-            .unwrap_or_else(|| "5432".to_string());
-        wait_for_postgres(&mut pg, &port, pgdata_path)?;
+        wait_for_postgres(&mut pg, pgdata_path)?;

        // If connection fails,
        // it may be the old node with `zenith_admin` superuser.
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -1,18 +1,18 @@
 use std::fmt::Write;
+use std::fs;
 use std::fs::File;
 use std::io::{BufRead, BufReader};
-use std::net::{SocketAddr, TcpStream};
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
 use std::process::Child;
-use std::str::FromStr;
-use std::{fs, thread, time};
+use std::time::{Duration, Instant};

 use anyhow::{bail, Result};
+use notify::{RecursiveMode, Watcher};
 use postgres::{Client, Transaction};
 use serde::Deserialize;

-const POSTGRES_WAIT_TIMEOUT: u64 = 60 * 1000; // milliseconds
+const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds

 /// Rust representation of Postgres role info with only those fields
 /// that matter for us.
@@ -168,7 +168,7 @@ impl Database {
    /// it may require a proper quoting too.
    pub fn to_pg_options(&self) -> String {
        let mut params: String = self.options.as_pg_options();
-        write!(params, " OWNER {}", &self.owner.quote())
+        write!(params, " OWNER {}", &self.owner.pg_quote())
            .expect("String is documented to not to error during write operations");

        params
@@ -179,18 +179,17 @@ impl Database {
 /// intended to be used for DB / role names.
 pub type PgIdent = String;

-/// Generic trait used to provide quoting for strings used in the
-/// Postgres SQL queries. Currently used only to implement quoting
-/// of identifiers, but could be used for literals in the future.
-pub trait PgQuote {
-    fn quote(&self) -> String;
+/// Generic trait used to provide quoting / encoding for strings used in the
+/// Postgres SQL queries and DATABASE_URL.
+pub trait Escaping {
+    fn pg_quote(&self) -> String;
 }

-impl PgQuote for PgIdent {
+impl Escaping for PgIdent {
    /// This is intended to mimic Postgres quote_ident(), but for simplicity it
-    /// always quotes provided string with `""` and escapes every `"`. Not idempotent,
-    /// i.e. if string is already escaped it will be escaped again.
-    fn quote(&self) -> String {
+    /// always quotes provided string with `""` and escapes every `"`.
+    /// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
+    fn pg_quote(&self) -> String {
        let result = format!("\"{}\"", self.replace('"', "\"\""));
        result
    }
@@ -230,52 +229,112 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
    Ok(postgres_dbs)
 }

-/// Wait for Postgres to become ready to accept connections:
-/// - state should be `ready` in the `pgdata/postmaster.pid`
-/// - and we should be able to connect to 127.0.0.1:5432
-pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()> {
+/// Wait for Postgres to become ready to accept connections. It's ready to
+/// accept connections when the state-field in `pgdata/postmaster.pid` says
+/// 'ready'.
+pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
    let pid_path = pgdata.join("postmaster.pid");
-    let mut slept: u64 = 0; // ms
-    let pause = time::Duration::from_millis(100);

-    let timeout = time::Duration::from_millis(10);
-    let addr = SocketAddr::from_str(&format!("127.0.0.1:{}", port)).unwrap();
+    // PostgreSQL writes line "ready" to the postmaster.pid file, when it has
+    // completed initialization and is ready to accept connections. We want to
+    // react quickly and perform the rest of our initialization as soon as
+    // PostgreSQL starts accepting connections. Use 'notify' to be notified
+    // whenever the PID file is changed, and whenever it changes, read it to
+    // check if it's now "ready".
+    //
+    // You cannot actually watch a file before it exists, so we first watch the
+    // data directory, and once the postmaster.pid file appears, we switch to
+    // watch the file instead. We also wake up every 100 ms to poll, just in
+    // case we miss some events for some reason. Not strictly necessary, but
+    // better safe than sorry.
+    let (tx, rx) = std::sync::mpsc::channel();
+    let (mut watcher, rx): (Box<dyn Watcher>, _) = match notify::recommended_watcher(move |res| {
+        let _ = tx.send(res);
+    }) {
+        Ok(watcher) => (Box::new(watcher), rx),
+        Err(e) => {
+            match e.kind {
+                notify::ErrorKind::Io(os) if os.raw_os_error() == Some(38) => {
+                    // docker on m1 macs does not support recommended_watcher
+                    // but return "Function not implemented (os error 38)"
+                    // see https://github.com/notify-rs/notify/issues/423
+                    let (tx, rx) = std::sync::mpsc::channel();

-    loop {
-        // Sleep POSTGRES_WAIT_TIMEOUT at max (a bit longer actually if consider a TCP timeout,
-        // but postgres starts listening almost immediately, even if it is not really
-        // ready to accept connections).
-        if slept >= POSTGRES_WAIT_TIMEOUT {
-            bail!("timed out while waiting for Postgres to start");
+                    // let's poll it faster than what we check the results for (100ms)
+                    let config =
+                        notify::Config::default().with_poll_interval(Duration::from_millis(50));
+
+                    let watcher = notify::PollWatcher::new(
+                        move |res| {
+                            let _ = tx.send(res);
+                        },
+                        config,
+                    )?;
+
+                    (Box::new(watcher), rx)
+                }
+                _ => return Err(e.into()),
+            }
        }
+    };

+    watcher.watch(pgdata, RecursiveMode::NonRecursive)?;
+
+    let started_at = Instant::now();
+    let mut postmaster_pid_seen = false;
+    loop {
        if let Ok(Some(status)) = pg.try_wait() {
            // Postgres exited, that is not what we expected, bail out earlier.
            let code = status.code().unwrap_or(-1);
            bail!("Postgres exited unexpectedly with code {}", code);
        }

+        let res = rx.recv_timeout(Duration::from_millis(100));
+        log::debug!("woken up by notify: {res:?}");
+        // If there are multiple events in the channel already, we only need to be
+        // check once. Swallow the extra events before we go ahead to check the
+        // pid file.
+        while let Ok(res) = rx.try_recv() {
+            log::debug!("swallowing extra event: {res:?}");
+        }
+
        // Check that we can open pid file first.
        if let Ok(file) = File::open(&pid_path) {
+            if !postmaster_pid_seen {
+                log::debug!("postmaster.pid appeared");
+                watcher
+                    .unwatch(pgdata)
+                    .expect("Failed to remove pgdata dir watch");
+                watcher
+                    .watch(&pid_path, RecursiveMode::NonRecursive)
+                    .expect("Failed to add postmaster.pid file watch");
+                postmaster_pid_seen = true;
+            }
+
            let file = BufReader::new(file);
            let last_line = file.lines().last();

            // Pid file could be there and we could read it, but it could be empty, for example.
            if let Some(Ok(line)) = last_line {
                let status = line.trim();
-                let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();
+                log::debug!("last line of postmaster.pid: {status:?}");

                // Now Postgres is ready to accept connections
-                if status == "ready" && can_connect {
+                if status == "ready" {
                    break;
                }
            }
        }

-        thread::sleep(pause);
-        slept += 100;
+        // Give up after POSTGRES_WAIT_TIMEOUT.
+        let duration = started_at.elapsed();
+        if duration >= POSTGRES_WAIT_TIMEOUT {
+            bail!("timed out while waiting for Postgres to start");
+        }
    }

+    log::info!("PostgreSQL is now running, continuing to configure it");
+
    Ok(())
 }

--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,7 +1,9 @@
 use std::path::Path;
+use std::str::FromStr;

 use anyhow::Result;
 use log::{info, log_enabled, warn, Level};
+use postgres::config::Config;
 use postgres::{Client, NoTls};
 use serde::Deserialize;

@@ -115,8 +117,8 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                    if existing_roles.iter().any(|r| r.name == op.name) {
                        let query: String = format!(
                            "ALTER ROLE {} RENAME TO {}",
-                            op.name.quote(),
-                            new_name.quote()
+                            op.name.pg_quote(),
+                            new_name.pg_quote()
                        );

                        warn!("renaming role '{}' to '{}'", op.name, new_name);
@@ -162,7 +164,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
            }

            if update_role {
-                let mut query: String = format!("ALTER ROLE {} ", name.quote());
+                let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
                info_print!(" -> update");

                query.push_str(&role.to_pg_options());
@@ -170,7 +172,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
            }
        } else {
            info!("role name: '{}'", &name);
-            let mut query: String = format!("CREATE ROLE {} ", name.quote());
+            let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
            info!("role create query: '{}'", &query);
            info_print!(" -> create");

@@ -179,7 +181,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {

            let grant_query = format!(
                "GRANT pg_read_all_data, pg_write_all_data TO {}",
-                name.quote()
+                name.pg_quote()
            );
            xact.execute(grant_query.as_str(), &[])?;
            info!("role grant query: '{}'", &grant_query);
@@ -215,7 +217,7 @@ pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<
            // We do not check either role exists or not,
            // Postgres will take care of it for us
            if op.action == "delete_role" {
-                let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
+                let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote());

                warn!("deleting role '{}'", &op.name);
                xact.execute(query.as_str(), &[])?;
@@ -230,17 +232,16 @@ pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<
 fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
    for db in &node.spec.cluster.databases {
        if db.owner != *role_name {
-            let mut connstr = node.connstr.clone();
-            // database name is always the last and the only component of the path
-            connstr.set_path(&db.name);
+            let mut conf = Config::from_str(node.connstr.as_str())?;
+            conf.dbname(&db.name);

-            let mut client = Client::connect(connstr.as_str(), NoTls)?;
+            let mut client = conf.connect(NoTls)?;

            // This will reassign all dependent objects to the db owner
            let reassign_query = format!(
                "REASSIGN OWNED BY {} TO {}",
-                role_name.quote(),
-                db.owner.quote()
+                role_name.pg_quote(),
+                db.owner.pg_quote()
            );
            info!(
                "reassigning objects owned by '{}' in db '{}' to '{}'",
@@ -249,7 +250,7 @@ fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()>
            client.simple_query(&reassign_query)?;

            // This now will only drop privileges of the role
-            let drop_query = format!("DROP OWNED BY {}", role_name.quote());
+            let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
            client.simple_query(&drop_query)?;
        }
    }
@@ -279,7 +280,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                // We do not check either DB exists or not,
                // Postgres will take care of it for us
                "delete_db" => {
-                    let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.quote());
+                    let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.pg_quote());

                    warn!("deleting database '{}'", &op.name);
                    client.execute(query.as_str(), &[])?;
@@ -291,8 +292,8 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                    if existing_dbs.iter().any(|r| r.name == op.name) {
                        let query: String = format!(
                            "ALTER DATABASE {} RENAME TO {}",
-                            op.name.quote(),
-                            new_name.quote()
+                            op.name.pg_quote(),
+                            new_name.pg_quote()
                        );

                        warn!("renaming database '{}' to '{}'", op.name, new_name);
@@ -320,7 +321,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
            // XXX: db owner name is returned as quoted string from Postgres,
            // when quoting is needed.
            let new_owner = if r.owner.starts_with('"') {
-                db.owner.quote()
+                db.owner.pg_quote()
            } else {
                db.owner.clone()
            };
@@ -328,15 +329,15 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
            if new_owner != r.owner {
                let query: String = format!(
                    "ALTER DATABASE {} OWNER TO {}",
-                    name.quote(),
-                    db.owner.quote()
+                    name.pg_quote(),
+                    db.owner.pg_quote()
                );
                info_print!(" -> update");

                client.execute(query.as_str(), &[])?;
            }
        } else {
-            let mut query: String = format!("CREATE DATABASE {} ", name.quote());
+            let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
            info_print!(" -> create");

            query.push_str(&db.to_pg_options());
@@ -366,7 +367,7 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
        .cluster
        .roles
        .iter()
-        .map(|r| r.name.quote())
+        .map(|r| r.name.pg_quote())
        .collect::<Vec<_>>();

    for db in &spec.cluster.databases {
@@ -374,7 +375,7 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {

        let query: String = format!(
            "GRANT CREATE ON DATABASE {} TO {}",
-            dbname.quote(),
+            dbname.pg_quote(),
            roles.join(", ")
        );
        info!("grant query {}", &query);
@@ -385,12 +386,11 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
    // Do some per-database access adjustments. We'd better do this at db creation time,
    // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
    // atomically.
-    let mut db_connstr = node.connstr.clone();
    for db in &node.spec.cluster.databases {
-        // database name is always the last and the only component of the path
-        db_connstr.set_path(&db.name);
+        let mut conf = Config::from_str(node.connstr.as_str())?;
+        conf.dbname(&db.name);

-        let mut db_client = Client::connect(db_connstr.as_str(), NoTls)?;
+        let mut db_client = conf.connect(NoTls)?;

        // This will only change ownership on the schema itself, not the objects
        // inside it. Without it owner of the `public` schema will be `cloud_admin`
@@ -419,9 +419,36 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
                    END IF;\n\
                END\n\
            $$;",
-            db.owner.quote()
+            db.owner.pg_quote()
        );
        db_client.simple_query(&alter_query)?;
+
+        // Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user.
+        // This is needed since postgres 15, where this privilege is removed by default.
+        let grant_query = "DO $$\n\
+                BEGIN\n\
+                    IF EXISTS(\n\
+                        SELECT nspname\n\
+                        FROM pg_catalog.pg_namespace\n\
+                        WHERE nspname = 'public'\n\
+                    ) AND\n\
+                    version() LIKE 'PostgreSQL 15%'\n\
+                    THEN\n\
+                        IF EXISTS(\n\
+                            SELECT rolname\n\
+                            FROM pg_catalog.pg_roles\n\
+                            WHERE rolname = 'web_access'\n\
+                        )\n\
+                        THEN\n\
+                            GRANT CREATE ON SCHEMA public TO web_access;\n\
+                        END IF;\n\
+                    END IF;\n\
+                END\n\
+            $$;"
+        .to_string();
+
+        info!("grant query for db {} : {}", &db.name, &grant_query);
+        db_client.simple_query(&grant_query)?;
    }

    Ok(())
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -33,9 +33,9 @@ mod pg_helpers_tests {
    }

    #[test]
-    fn quote_ident() {
+    fn ident_pg_quote() {
        let ident: PgIdent = PgIdent::from("\"name\";\\n select 1;");

-        assert_eq!(ident.quote(), "\"\"\"name\"\";\\n select 1;\"");
+        assert_eq!(ident.pg_quote(), "\"\"\"name\"\";\\n select 1;\"");
    }
 }
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -4,22 +4,24 @@ version = "0.1.0"
 edition = "2021"

 [dependencies]
-clap = "3.0"
-comfy-table = "5.0.1"
+clap = "4.0"
+comfy-table = "6.1"
 git-version = "0.3.5"
 tar = "0.4.38"
-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 serde = { version = "1.0", features = ["derive"] }
-serde_with = "1.12.0"
+serde_with = "2.0"
 toml = "0.5"
 once_cell = "1.13.0"
 regex = "1"
 anyhow = "1.0"
 thiserror = "1"
-nix = "0.23"
+nix = "0.25"
 reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }

-pageserver = { path = "../pageserver" }
-safekeeper = { path = "../safekeeper" }
+# Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
+# instead, so that recompile times are better.
+pageserver_api = { path = "../libs/pageserver_api" }
+safekeeper_api = { path = "../libs/safekeeper_api" }
 utils = { path = "../libs/utils" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -6,18 +6,18 @@
 //! rely on `neon_local` to set up the environment for each test.
 //!
 use anyhow::{anyhow, bail, Context, Result};
-use clap::{App, AppSettings, Arg, ArgMatches};
+use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
 use control_plane::compute::ComputeControlPlane;
 use control_plane::local_env::{EtcdBroker, LocalEnv};
 use control_plane::safekeeper::SafekeeperNode;
 use control_plane::storage::PageServerNode;
 use control_plane::{etcd, local_env};
-use pageserver::config::defaults::{
+use pageserver_api::models::TimelineInfo;
+use pageserver_api::{
    DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR,
    DEFAULT_PG_LISTEN_ADDR as DEFAULT_PAGESERVER_PG_ADDR,
 };
-use pageserver::http::models::TimelineInfo;
-use safekeeper::defaults::{
+use safekeeper_api::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
 };
@@ -39,6 +39,8 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
 const DEFAULT_BRANCH_NAME: &str = "main";
 project_git_version!(GIT_VERSION);

+const DEFAULT_PG_VERSION: &str = "14";
+
 fn default_conf(etcd_binary_path: &Path) -> String {
    format!(
        r#"
@@ -83,196 +85,7 @@ struct TimelineTreeEl {
 //   * Providing CLI api to the pageserver
 //   * TODO: export/import to/from usual postgres
 fn main() -> Result<()> {
-    let branch_name_arg = Arg::new("branch-name")
-        .long("branch-name")
-        .takes_value(true)
-        .help("Name of the branch to be created or used as an alias for other services")
-        .required(false);
-
-    let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
-
-    let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
-
-    let tenant_id_arg = Arg::new("tenant-id")
-        .long("tenant-id")
-        .help("Tenant id. Represented as a hexadecimal string 32 symbols length")
-        .takes_value(true)
-        .required(false);
-
-    let timeline_id_arg = Arg::new("timeline-id")
-        .long("timeline-id")
-        .help("Timeline id. Represented as a hexadecimal string 32 symbols length")
-        .takes_value(true)
-        .required(false);
-
-    let port_arg = Arg::new("port")
-        .long("port")
-        .required(false)
-        .value_name("port");
-
-    let stop_mode_arg = Arg::new("stop-mode")
-        .short('m')
-        .takes_value(true)
-        .possible_values(&["fast", "immediate"])
-        .help("If 'immediate', don't flush repository data at shutdown")
-        .required(false)
-        .value_name("stop-mode");
-
-    let pageserver_config_args = Arg::new("pageserver-config-override")
-        .long("pageserver-config-override")
-        .takes_value(true)
-        .number_of_values(1)
-        .multiple_occurrences(true)
-        .help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
-        .required(false);
-
-    let lsn_arg = Arg::new("lsn")
-        .long("lsn")
-        .help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
-        .takes_value(true)
-        .required(false);
-
-    let matches = App::new("Neon CLI")
-        .setting(AppSettings::ArgRequiredElseHelp)
-        .version(GIT_VERSION)
-        .subcommand(
-            App::new("init")
-                .about("Initialize a new Neon repository")
-                .arg(pageserver_config_args.clone())
-                .arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
-                .arg(
-                    Arg::new("config")
-                        .long("config")
-                        .required(false)
-                        .value_name("config"),
-                )
-        )
-        .subcommand(
-            App::new("timeline")
-            .about("Manage timelines")
-            .subcommand(App::new("list")
-                .about("List all timelines, available to this pageserver")
-                .arg(tenant_id_arg.clone()))
-            .subcommand(App::new("branch")
-                .about("Create a new timeline, using another timeline as a base, copying its data")
-                .arg(tenant_id_arg.clone())
-                .arg(branch_name_arg.clone())
-                .arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name").takes_value(true)
-                    .help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
-                .arg(Arg::new("ancestor-start-lsn").long("ancestor-start-lsn").takes_value(true)
-                    .help("When using another timeline as base, use a specific Lsn in it instead of the latest one").required(false)))
-            .subcommand(App::new("create")
-                .about("Create a new blank timeline")
-                .arg(tenant_id_arg.clone())
-                .arg(branch_name_arg.clone()))
-            .subcommand(App::new("import")
-                .about("Import timeline from basebackup directory")
-                .arg(tenant_id_arg.clone())
-                .arg(timeline_id_arg.clone())
-                .arg(Arg::new("node-name").long("node-name").takes_value(true)
-                    .help("Name to assign to the imported timeline"))
-                .arg(Arg::new("base-tarfile").long("base-tarfile").takes_value(true)
-                    .help("Basebackup tarfile to import"))
-                .arg(Arg::new("base-lsn").long("base-lsn").takes_value(true)
-                    .help("Lsn the basebackup starts at"))
-                .arg(Arg::new("wal-tarfile").long("wal-tarfile").takes_value(true)
-                    .help("Wal to add after base"))
-                .arg(Arg::new("end-lsn").long("end-lsn").takes_value(true)
-                    .help("Lsn the basebackup ends at")))
-        ).subcommand(
-            App::new("tenant")
-            .setting(AppSettings::ArgRequiredElseHelp)
-            .about("Manage tenants")
-            .subcommand(App::new("list"))
-            .subcommand(App::new("create")
-                .arg(tenant_id_arg.clone())
-                .arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
-                .arg(Arg::new("config").short('c').takes_value(true).multiple_occurrences(true).required(false))
-                )
-            .subcommand(App::new("config")
-                .arg(tenant_id_arg.clone())
-                .arg(Arg::new("config").short('c').takes_value(true).multiple_occurrences(true).required(false))
-                )
-        )
-        .subcommand(
-            App::new("pageserver")
-                .setting(AppSettings::ArgRequiredElseHelp)
-                .about("Manage pageserver")
-                .subcommand(App::new("status"))
-                .subcommand(App::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
-                .subcommand(App::new("stop").about("Stop local pageserver")
-                            .arg(stop_mode_arg.clone()))
-                .subcommand(App::new("restart").about("Restart local pageserver").arg(pageserver_config_args.clone()))
-        )
-        .subcommand(
-            App::new("safekeeper")
-                .setting(AppSettings::ArgRequiredElseHelp)
-                .about("Manage safekeepers")
-                .subcommand(App::new("start")
-                            .about("Start local safekeeper")
-                            .arg(safekeeper_id_arg.clone())
-                )
-                .subcommand(App::new("stop")
-                            .about("Stop local safekeeper")
-                            .arg(safekeeper_id_arg.clone())
-                            .arg(stop_mode_arg.clone())
-                )
-                .subcommand(App::new("restart")
-                            .about("Restart local safekeeper")
-                            .arg(safekeeper_id_arg.clone())
-                            .arg(stop_mode_arg.clone())
-                )
-        )
-        .subcommand(
-            App::new("pg")
-                .setting(AppSettings::ArgRequiredElseHelp)
-                .about("Manage postgres instances")
-                .subcommand(App::new("list").arg(tenant_id_arg.clone()))
-                .subcommand(App::new("create")
-                    .about("Create a postgres compute node")
-                    .arg(pg_node_arg.clone())
-                    .arg(branch_name_arg.clone())
-                    .arg(tenant_id_arg.clone())
-                    .arg(lsn_arg.clone())
-                    .arg(port_arg.clone())
-                    .arg(
-                        Arg::new("config-only")
-                            .help("Don't do basebackup, create compute node with only config files")
-                            .long("config-only")
-                            .required(false)
-                    ))
-                .subcommand(App::new("start")
-                    .about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
-                    .arg(pg_node_arg.clone())
-                    .arg(tenant_id_arg.clone())
-                    .arg(branch_name_arg.clone())
-                    .arg(timeline_id_arg.clone())
-                    .arg(lsn_arg.clone())
-                    .arg(port_arg.clone()))
-                .subcommand(
-                    App::new("stop")
-                    .arg(pg_node_arg.clone())
-                    .arg(tenant_id_arg.clone())
-                    .arg(
-                        Arg::new("destroy")
-                            .help("Also delete data directory (now optional, should be default in future)")
-                            .long("destroy")
-                            .required(false)
-                    )
-                    )
-
-        )
-        .subcommand(
-            App::new("start")
-                .about("Start page server and safekeepers")
-                .arg(pageserver_config_args)
-        )
-        .subcommand(
-            App::new("stop")
-                .about("Stop page server and safekeepers")
-                .arg(stop_mode_arg.clone())
-        )
-        .get_matches();
+    let matches = cli().get_matches();

    let (sub_name, sub_args) = match matches.subcommand() {
        Some(subcommand_data) => subcommand_data,
@@ -340,9 +153,7 @@ fn print_timelines_tree(

    // Memorize all direct children of each timeline.
    for timeline in timelines.iter() {
-        if let Some(ancestor_timeline_id) =
-            timeline.local.as_ref().and_then(|l| l.ancestor_timeline_id)
-        {
+        if let Some(ancestor_timeline_id) = timeline.ancestor_timeline_id {
            timelines_hash
                .get_mut(&ancestor_timeline_id)
                .context("missing timeline info in the HashMap")?
@@ -353,13 +164,7 @@ fn print_timelines_tree(

    for timeline in timelines_hash.values() {
        // Start with root local timelines (no ancestors) first.
-        if timeline
-            .info
-            .local
-            .as_ref()
-            .and_then(|l| l.ancestor_timeline_id)
-            .is_none()
-        {
+        if timeline.info.ancestor_timeline_id.is_none() {
            print_timeline(0, &Vec::from([true]), timeline, &timelines_hash)?;
        }
    }
@@ -376,17 +181,8 @@ fn print_timeline(
    timeline: &TimelineTreeEl,
    timelines: &HashMap<TimelineId, TimelineTreeEl>,
 ) -> Result<()> {
-    let local_remote = match (timeline.info.local.as_ref(), timeline.info.remote.as_ref()) {
-        (None, None) => unreachable!("in this case no info for a timeline is found"),
-        (None, Some(_)) => "(R)",
-        (Some(_), None) => "(L)",
-        (Some(_), Some(_)) => "(L+R)",
-    };
-    // Draw main padding
-    print!("{} ", local_remote);
-
    if nesting_level > 0 {
-        let ancestor_lsn = match timeline.info.local.as_ref().and_then(|i| i.ancestor_lsn) {
+        let ancestor_lsn = match timeline.info.ancestor_lsn {
            Some(lsn) => lsn.to_string(),
            None => "Unknown Lsn".to_string(),
        };
@@ -474,16 +270,16 @@ fn get_tenant_id(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::R

 fn parse_tenant_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TenantId>> {
    sub_match
-        .value_of("tenant-id")
-        .map(TenantId::from_str)
+        .get_one::<String>("tenant-id")
+        .map(|tenant_id| TenantId::from_str(tenant_id))
        .transpose()
        .context("Failed to parse tenant id from the argument string")
 }

 fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId>> {
    sub_match
-        .value_of("timeline-id")
-        .map(TimelineId::from_str)
+        .get_one::<String>("timeline-id")
+        .map(|timeline_id| TimelineId::from_str(timeline_id))
        .transpose()
        .context("Failed to parse timeline id from the argument string")
 }
@@ -492,18 +288,28 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
    let initial_timeline_id_arg = parse_timeline_id(init_match)?;

    // Create config file
-    let toml_file: String = if let Some(config_path) = init_match.value_of("config") {
+    let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
        // load and parse the file
-        std::fs::read_to_string(std::path::Path::new(config_path))
-            .with_context(|| format!("Could not read configuration file '{config_path}'"))?
+        std::fs::read_to_string(config_path).with_context(|| {
+            format!(
+                "Could not read configuration file '{}'",
+                config_path.display()
+            )
+        })?
    } else {
        // Built-in default config
        default_conf(&EtcdBroker::locate_etcd()?)
    };

+    let pg_version = init_match
+        .get_one::<u32>("pg-version")
+        .copied()
+        .context("Failed to parse postgres version from the argument string")?;
+
    let mut env =
        LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
-    env.init().context("Failed to initialize neon repository")?;
+    env.init(pg_version)
+        .context("Failed to initialize neon repository")?;
    let initial_tenant_id = env
        .default_tenant_id
        .expect("default_tenant_id should be generated by the `env.init()` call above");
@@ -515,6 +321,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
            Some(initial_tenant_id),
            initial_timeline_id_arg,
            &pageserver_config_overrides(init_match),
+            pg_version,
        )
        .unwrap_or_else(|e| {
            eprintln!("pageserver init failed: {e}");
@@ -532,9 +339,10 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {

 fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
    init_match
-        .values_of("pageserver-config-override")
+        .get_many::<String>("pageserver-config-override")
        .into_iter()
        .flatten()
+        .map(|s| s.as_str())
        .collect()
 }

@@ -549,7 +357,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
        Some(("create", create_match)) => {
            let initial_tenant_id = parse_tenant_id(create_match)?;
            let tenant_conf: HashMap<_, _> = create_match
-                .values_of("config")
+                .get_many::<String>("config")
                .map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
                .unwrap_or_default();
            let new_tenant_id = pageserver.tenant_create(initial_tenant_id, tenant_conf)?;
@@ -557,13 +365,20 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an

            // Create an initial timeline for the new tenant
            let new_timeline_id = parse_timeline_id(create_match)?;
-            let timeline_info =
-                pageserver.timeline_create(new_tenant_id, new_timeline_id, None, None)?;
+            let pg_version = create_match
+                .get_one::<u32>("pg-version")
+                .copied()
+                .context("Failed to parse postgres version from the argument string")?;
+
+            let timeline_info = pageserver.timeline_create(
+                new_tenant_id,
+                new_timeline_id,
+                None,
+                None,
+                Some(pg_version),
+            )?;
            let new_timeline_id = timeline_info.timeline_id;
-            let last_record_lsn = timeline_info
-                .local
-                .context(format!("Failed to get last record LSN: no local timeline info for timeline {new_timeline_id}"))?
-                .last_record_lsn;
+            let last_record_lsn = timeline_info.last_record_lsn;

            env.register_branch_mapping(
                DEFAULT_BRANCH_NAME.to_string(),
@@ -578,7 +393,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
        Some(("config", create_match)) => {
            let tenant_id = get_tenant_id(create_match, env)?;
            let tenant_conf: HashMap<_, _> = create_match
-                .values_of("config")
+                .get_many::<String>("config")
                .map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
                .unwrap_or_default();

@@ -605,15 +420,19 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
        Some(("create", create_match)) => {
            let tenant_id = get_tenant_id(create_match, env)?;
            let new_branch_name = create_match
-                .value_of("branch-name")
+                .get_one::<String>("branch-name")
                .ok_or_else(|| anyhow!("No branch name provided"))?;
-            let timeline_info = pageserver.timeline_create(tenant_id, None, None, None)?;
+
+            let pg_version = create_match
+                .get_one::<u32>("pg-version")
+                .copied()
+                .context("Failed to parse postgres version from the argument string")?;
+
+            let timeline_info =
+                pageserver.timeline_create(tenant_id, None, None, None, Some(pg_version))?;
            let new_timeline_id = timeline_info.timeline_id;

-            let last_record_lsn = timeline_info
-                .local
-                .expect("no local timeline info")
-                .last_record_lsn;
+            let last_record_lsn = timeline_info.last_record_lsn;
            env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;

            println!(
@@ -625,46 +444,51 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
            let tenant_id = get_tenant_id(import_match, env)?;
            let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
            let name = import_match
-                .value_of("node-name")
+                .get_one::<String>("node-name")
                .ok_or_else(|| anyhow!("No node name provided"))?;

            // Parse base inputs
            let base_tarfile = import_match
-                .value_of("base-tarfile")
-                .map(|s| PathBuf::from_str(s).unwrap())
-                .ok_or_else(|| anyhow!("No base-tarfile provided"))?;
+                .get_one::<PathBuf>("base-tarfile")
+                .ok_or_else(|| anyhow!("No base-tarfile provided"))?
+                .to_owned();
            let base_lsn = Lsn::from_str(
                import_match
-                    .value_of("base-lsn")
+                    .get_one::<String>("base-lsn")
                    .ok_or_else(|| anyhow!("No base-lsn provided"))?,
            )?;
            let base = (base_lsn, base_tarfile);

            // Parse pg_wal inputs
-            let wal_tarfile = import_match
-                .value_of("wal-tarfile")
-                .map(|s| PathBuf::from_str(s).unwrap());
+            let wal_tarfile = import_match.get_one::<PathBuf>("wal-tarfile").cloned();
            let end_lsn = import_match
-                .value_of("end-lsn")
+                .get_one::<String>("end-lsn")
                .map(|s| Lsn::from_str(s).unwrap());
            // TODO validate both or none are provided
            let pg_wal = end_lsn.zip(wal_tarfile);

+            let pg_version = import_match
+                .get_one::<u32>("pg-version")
+                .copied()
+                .context("Failed to parse postgres version from the argument string")?;
+
            let mut cplane = ComputeControlPlane::load(env.clone())?;
            println!("Importing timeline into pageserver ...");
-            pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal)?;
+            pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)?;
            println!("Creating node for imported timeline ...");
            env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
-            cplane.new_node(tenant_id, name, timeline_id, None, None)?;
+
+            cplane.new_node(tenant_id, name, timeline_id, None, None, pg_version)?;
            println!("Done");
        }
        Some(("branch", branch_match)) => {
            let tenant_id = get_tenant_id(branch_match, env)?;
            let new_branch_name = branch_match
-                .value_of("branch-name")
+                .get_one::<String>("branch-name")
                .ok_or_else(|| anyhow!("No branch name provided"))?;
            let ancestor_branch_name = branch_match
-                .value_of("ancestor-branch-name")
+                .get_one::<String>("ancestor-branch-name")
+                .map(|s| s.as_str())
                .unwrap_or(DEFAULT_BRANCH_NAME);
            let ancestor_timeline_id = env
                .get_branch_timeline_id(ancestor_branch_name, tenant_id)
@@ -673,8 +497,8 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                })?;

            let start_lsn = branch_match
-                .value_of("ancestor-start-lsn")
-                .map(Lsn::from_str)
+                .get_one::<String>("ancestor-start-lsn")
+                .map(|lsn_str| Lsn::from_str(lsn_str))
                .transpose()
                .context("Failed to parse ancestor start Lsn from the request")?;
            let timeline_info = pageserver.timeline_create(
@@ -682,13 +506,11 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                None,
                start_lsn,
                Some(ancestor_timeline_id),
+                None,
            )?;
            let new_timeline_id = timeline_info.timeline_id;

-            let last_record_lsn = timeline_info
-                .local
-                .expect("no local timeline info")
-                .last_record_lsn;
+            let last_record_lsn = timeline_info.last_record_lsn;

            env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;

@@ -748,7 +570,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
                        // Use the LSN at the end of the timeline.
                        timeline_infos
                            .get(&node.timeline_id)
-                            .and_then(|bi| bi.local.as_ref().map(|l| l.last_record_lsn.to_string()))
+                            .map(|bi| bi.last_record_lsn.to_string())
                            .unwrap_or_else(|| "?".to_string())
                    }
                    Some(lsn) => {
@@ -777,38 +599,39 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
        }
        "create" => {
            let branch_name = sub_args
-                .value_of("branch-name")
+                .get_one::<String>("branch-name")
+                .map(|s| s.as_str())
                .unwrap_or(DEFAULT_BRANCH_NAME);
            let node_name = sub_args
-                .value_of("node")
-                .map(ToString::to_string)
-                .unwrap_or_else(|| format!("{}_node", branch_name));
+                .get_one::<String>("node")
+                .map(|node_name| node_name.to_string())
+                .unwrap_or_else(|| format!("{branch_name}_node"));

            let lsn = sub_args
-                .value_of("lsn")
-                .map(Lsn::from_str)
+                .get_one::<String>("lsn")
+                .map(|lsn_str| Lsn::from_str(lsn_str))
                .transpose()
                .context("Failed to parse Lsn from the request")?;
            let timeline_id = env
                .get_branch_timeline_id(branch_name, tenant_id)
-                .ok_or_else(|| anyhow!("Found no timeline id for branch name '{}'", branch_name))?;
+                .ok_or_else(|| anyhow!("Found no timeline id for branch name '{branch_name}'"))?;

-            let port: Option<u16> = match sub_args.value_of("port") {
-                Some(p) => Some(p.parse()?),
-                None => None,
-            };
-            cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port)?;
+            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
+
+            let pg_version = sub_args
+                .get_one::<u32>("pg-version")
+                .copied()
+                .context("Failed to parse postgres version from the argument string")?;
+
+            cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port, pg_version)?;
        }
        "start" => {
-            let port: Option<u16> = match sub_args.value_of("port") {
-                Some(p) => Some(p.parse()?),
-                None => None,
-            };
+            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
            let node_name = sub_args
-                .value_of("node")
+                .get_one::<String>("node")
                .ok_or_else(|| anyhow!("No node name was provided to start"))?;

-            let node = cplane.nodes.get(&(tenant_id, node_name.to_owned()));
+            let node = cplane.nodes.get(&(tenant_id, node_name.to_string()));

            let auth_token = if matches!(env.pageserver.auth_type, AuthType::NeonJWT) {
                let claims = Claims::new(Some(tenant_id), Scope::Tenant);
@@ -819,49 +642,53 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
            };

            if let Some(node) = node {
-                println!("Starting existing postgres {}...", node_name);
+                println!("Starting existing postgres {node_name}...");
                node.start(&auth_token)?;
            } else {
                let branch_name = sub_args
-                    .value_of("branch-name")
+                    .get_one::<String>("branch-name")
+                    .map(|s| s.as_str())
                    .unwrap_or(DEFAULT_BRANCH_NAME);
                let timeline_id = env
                    .get_branch_timeline_id(branch_name, tenant_id)
                    .ok_or_else(|| {
-                        anyhow!("Found no timeline id for branch name '{}'", branch_name)
+                        anyhow!("Found no timeline id for branch name '{branch_name}'")
                    })?;
                let lsn = sub_args
-                    .value_of("lsn")
-                    .map(Lsn::from_str)
+                    .get_one::<String>("lsn")
+                    .map(|lsn_str| Lsn::from_str(lsn_str))
                    .transpose()
                    .context("Failed to parse Lsn from the request")?;
+                let pg_version = sub_args
+                    .get_one::<u32>("pg-version")
+                    .copied()
+                    .context("Failed to `pg-version` from the argument string")?;
                // when used with custom port this results in non obvious behaviour
                // port is remembered from first start command, i e
                // start --port X
                // stop
                // start <-- will also use port X even without explicit port argument
-                println!(
-                    "Starting new postgres {} on timeline {} ...",
-                    node_name, timeline_id
-                );
-                let node = cplane.new_node(tenant_id, node_name, timeline_id, lsn, port)?;
+                println!("Starting new postgres (v{pg_version}) {node_name} on timeline {timeline_id} ...");
+
+                let node =
+                    cplane.new_node(tenant_id, node_name, timeline_id, lsn, port, pg_version)?;
                node.start(&auth_token)?;
            }
        }
        "stop" => {
            let node_name = sub_args
-                .value_of("node")
+                .get_one::<String>("node")
                .ok_or_else(|| anyhow!("No node name was provided to stop"))?;
-            let destroy = sub_args.is_present("destroy");
+            let destroy = sub_args.get_flag("destroy");

            let node = cplane
                .nodes
-                .get(&(tenant_id, node_name.to_owned()))
-                .with_context(|| format!("postgres {} is not found", node_name))?;
+                .get(&(tenant_id, node_name.to_string()))
+                .with_context(|| format!("postgres {node_name} is not found"))?;
            node.stop(destroy)?;
        }

-        _ => bail!("Unexpected pg subcommand '{}'", sub_name),
+        _ => bail!("Unexpected pg subcommand '{sub_name}'"),
    }

    Ok(())
@@ -879,7 +706,10 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
        }

        Some(("stop", stop_match)) => {
-            let immediate = stop_match.value_of("stop-mode") == Some("immediate");
+            let immediate = stop_match
+                .get_one::<String>("stop-mode")
+                .map(|s| s.as_str())
+                == Some("immediate");

            if let Err(e) = pageserver.stop(immediate) {
                eprintln!("pageserver stop failed: {}", e);
@@ -929,7 +759,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
    };

    // All the commands take an optional safekeeper name argument
-    let sk_id = if let Some(id_str) = sub_args.value_of("id") {
+    let sk_id = if let Some(id_str) = sub_args.get_one::<String>("id") {
        NodeId(id_str.parse().context("while parsing safekeeper id")?)
    } else {
        DEFAULT_SAFEKEEPER_ID
@@ -945,7 +775,8 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
        }

        "stop" => {
-            let immediate = sub_args.value_of("stop-mode") == Some("immediate");
+            let immediate =
+                sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");

            if let Err(e) = safekeeper.stop(immediate) {
                eprintln!("safekeeper stop failed: {}", e);
@@ -954,7 +785,8 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
        }

        "restart" => {
-            let immediate = sub_args.value_of("stop-mode") == Some("immediate");
+            let immediate =
+                sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");

            if let Err(e) = safekeeper.stop(immediate) {
                eprintln!("safekeeper stop failed: {}", e);
@@ -998,7 +830,8 @@ fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow
 }

 fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
-    let immediate = sub_match.value_of("stop-mode") == Some("immediate");
+    let immediate =
+        sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");

    let pageserver = PageServerNode::from_env(env);

@@ -1031,3 +864,219 @@ fn try_stop_etcd_process(env: &local_env::LocalEnv) {
        eprintln!("etcd stop failed: {e}");
    }
 }
+
+fn cli() -> Command {
+    let branch_name_arg = Arg::new("branch-name")
+        .long("branch-name")
+        .help("Name of the branch to be created or used as an alias for other services")
+        .required(false);
+
+    let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);
+
+    let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
+
+    let tenant_id_arg = Arg::new("tenant-id")
+        .long("tenant-id")
+        .help("Tenant id. Represented as a hexadecimal string 32 symbols length")
+        .required(false);
+
+    let timeline_id_arg = Arg::new("timeline-id")
+        .long("timeline-id")
+        .help("Timeline id. Represented as a hexadecimal string 32 symbols length")
+        .required(false);
+
+    let pg_version_arg = Arg::new("pg-version")
+        .long("pg-version")
+        .help("Postgres version to use for the initial tenant")
+        .required(false)
+        .value_parser(value_parser!(u32))
+        .default_value(DEFAULT_PG_VERSION);
+
+    let port_arg = Arg::new("port")
+        .long("port")
+        .required(false)
+        .value_parser(value_parser!(u16))
+        .value_name("port");
+
+    let stop_mode_arg = Arg::new("stop-mode")
+        .short('m')
+        .value_parser(["fast", "immediate"])
+        .help("If 'immediate', don't flush repository data at shutdown")
+        .required(false)
+        .value_name("stop-mode");
+
+    let pageserver_config_args = Arg::new("pageserver-config-override")
+        .long("pageserver-config-override")
+        .num_args(1)
+        .action(ArgAction::Append)
+        .help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
+        .required(false);
+
+    let lsn_arg = Arg::new("lsn")
+        .long("lsn")
+        .help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
+        .required(false);
+
+    Command::new("Neon CLI")
+        .arg_required_else_help(true)
+        .version(GIT_VERSION)
+        .subcommand(
+            Command::new("init")
+                .about("Initialize a new Neon repository")
+                .arg(pageserver_config_args.clone())
+                .arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
+                .arg(
+                    Arg::new("config")
+                        .long("config")
+                        .required(false)
+                        .value_parser(value_parser!(PathBuf))
+                        .value_name("config"),
+                )
+                .arg(pg_version_arg.clone())
+        )
+        .subcommand(
+            Command::new("timeline")
+            .about("Manage timelines")
+            .subcommand(Command::new("list")
+                .about("List all timelines, available to this pageserver")
+                .arg(tenant_id_arg.clone()))
+            .subcommand(Command::new("branch")
+                .about("Create a new timeline, using another timeline as a base, copying its data")
+                .arg(tenant_id_arg.clone())
+                .arg(branch_name_arg.clone())
+                .arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name")
+                    .help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
+                .arg(Arg::new("ancestor-start-lsn").long("ancestor-start-lsn")
+                    .help("When using another timeline as base, use a specific Lsn in it instead of the latest one").required(false)))
+            .subcommand(Command::new("create")
+                .about("Create a new blank timeline")
+                .arg(tenant_id_arg.clone())
+                .arg(branch_name_arg.clone())
+                .arg(pg_version_arg.clone())
+            )
+            .subcommand(Command::new("import")
+                .about("Import timeline from basebackup directory")
+                .arg(tenant_id_arg.clone())
+                .arg(timeline_id_arg.clone())
+                .arg(Arg::new("node-name").long("node-name")
+                    .help("Name to assign to the imported timeline"))
+                .arg(Arg::new("base-tarfile")
+                    .long("base-tarfile")
+                    .value_parser(value_parser!(PathBuf))
+                    .help("Basebackup tarfile to import")
+                )
+                .arg(Arg::new("base-lsn").long("base-lsn")
+                    .help("Lsn the basebackup starts at"))
+                .arg(Arg::new("wal-tarfile")
+                    .long("wal-tarfile")
+                    .value_parser(value_parser!(PathBuf))
+                    .help("Wal to add after base")
+                )
+                .arg(Arg::new("end-lsn").long("end-lsn")
+                    .help("Lsn the basebackup ends at"))
+                .arg(pg_version_arg.clone())
+            )
+        ).subcommand(
+            Command::new("tenant")
+            .arg_required_else_help(true)
+            .about("Manage tenants")
+            .subcommand(Command::new("list"))
+            .subcommand(Command::new("create")
+                .arg(tenant_id_arg.clone())
+                .arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
+                .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
+                .arg(pg_version_arg.clone())
+                )
+            .subcommand(Command::new("config")
+                .arg(tenant_id_arg.clone())
+                .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
+                )
+        )
+        .subcommand(
+            Command::new("pageserver")
+                .arg_required_else_help(true)
+                .about("Manage pageserver")
+                .subcommand(Command::new("status"))
+                .subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
+                .subcommand(Command::new("stop").about("Stop local pageserver")
+                            .arg(stop_mode_arg.clone()))
+                .subcommand(Command::new("restart").about("Restart local pageserver").arg(pageserver_config_args.clone()))
+        )
+        .subcommand(
+            Command::new("safekeeper")
+                .arg_required_else_help(true)
+                .about("Manage safekeepers")
+                .subcommand(Command::new("start")
+                            .about("Start local safekeeper")
+                            .arg(safekeeper_id_arg.clone())
+                )
+                .subcommand(Command::new("stop")
+                            .about("Stop local safekeeper")
+                            .arg(safekeeper_id_arg.clone())
+                            .arg(stop_mode_arg.clone())
+                )
+                .subcommand(Command::new("restart")
+                            .about("Restart local safekeeper")
+                            .arg(safekeeper_id_arg)
+                            .arg(stop_mode_arg.clone())
+                )
+        )
+        .subcommand(
+            Command::new("pg")
+                .arg_required_else_help(true)
+                .about("Manage postgres instances")
+                .subcommand(Command::new("list").arg(tenant_id_arg.clone()))
+                .subcommand(Command::new("create")
+                    .about("Create a postgres compute node")
+                    .arg(pg_node_arg.clone())
+                    .arg(branch_name_arg.clone())
+                    .arg(tenant_id_arg.clone())
+                    .arg(lsn_arg.clone())
+                    .arg(port_arg.clone())
+                    .arg(
+                        Arg::new("config-only")
+                            .help("Don't do basebackup, create compute node with only config files")
+                            .long("config-only")
+                            .required(false))
+                    .arg(pg_version_arg.clone())
+                )
+                .subcommand(Command::new("start")
+                    .about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
+                    .arg(pg_node_arg.clone())
+                    .arg(tenant_id_arg.clone())
+                    .arg(branch_name_arg)
+                    .arg(timeline_id_arg)
+                    .arg(lsn_arg)
+                    .arg(port_arg)
+                    .arg(pg_version_arg)
+                )
+                .subcommand(
+                    Command::new("stop")
+                    .arg(pg_node_arg)
+                    .arg(tenant_id_arg)
+                    .arg(
+                        Arg::new("destroy")
+                            .help("Also delete data directory (now optional, should be default in future)")
+                            .long("destroy")
+                            .action(ArgAction::SetTrue)
+                            .required(false)
+                        )
+                )
+
+        )
+        .subcommand(
+            Command::new("start")
+                .about("Start page server and safekeepers")
+                .arg(pageserver_config_args)
+        )
+        .subcommand(
+            Command::new("stop")
+                .about("Stop page server and safekeepers")
+                .arg(stop_mode_arg)
+        )
+}
+
+#[test]
+fn verify_cli() {
+    cli().debug_assert();
+}
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -18,7 +18,7 @@ use utils::{
    postgres_backend::AuthType,
 };

-use crate::local_env::LocalEnv;
+use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
 use crate::postgresql_conf::PostgresConf;
 use crate::storage::PageServerNode;

@@ -81,6 +81,7 @@ impl ComputeControlPlane {
        timeline_id: TimelineId,
        lsn: Option<Lsn>,
        port: Option<u16>,
+        pg_version: u32,
    ) -> Result<Arc<PostgresNode>> {
        let port = port.unwrap_or_else(|| self.get_port());
        let node = Arc::new(PostgresNode {
@@ -93,6 +94,7 @@ impl ComputeControlPlane {
            lsn,
            tenant_id,
            uses_wal_proposer: false,
+            pg_version,
        });

        node.create_pgdata()?;
@@ -118,6 +120,7 @@ pub struct PostgresNode {
    pub lsn: Option<Lsn>, // if it's a read-only node. None for primary
    pub tenant_id: TenantId,
    uses_wal_proposer: bool,
+    pg_version: u32,
 }

 impl PostgresNode {
@@ -152,6 +155,14 @@ impl PostgresNode {
        let tenant_id: TenantId = conf.parse_field("neon.tenant_id", &context)?;
        let uses_wal_proposer = conf.get("neon.safekeepers").is_some();

+        // Read postgres version from PG_VERSION file to determine which postgres version binary to use.
+        // If it doesn't exist, assume broken data directory and use default pg version.
+        let pg_version_path = entry.path().join("PG_VERSION");
+
+        let pg_version_str =
+            fs::read_to_string(pg_version_path).unwrap_or_else(|_| DEFAULT_PG_VERSION.to_string());
+        let pg_version = u32::from_str(&pg_version_str)?;
+
        // parse recovery_target_lsn, if any
        let recovery_target_lsn: Option<Lsn> =
            conf.parse_field_optional("recovery_target_lsn", &context)?;
@@ -167,17 +178,24 @@ impl PostgresNode {
            lsn: recovery_target_lsn,
            tenant_id,
            uses_wal_proposer,
+            pg_version,
        })
    }

-    fn sync_safekeepers(&self, auth_token: &Option<String>) -> Result<Lsn> {
-        let pg_path = self.env.pg_bin_dir().join("postgres");
+    fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> {
+        let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres");
        let mut cmd = Command::new(&pg_path);

        cmd.arg("--sync-safekeepers")
            .env_clear()
-            .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-            .env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
+            .env(
+                "LD_LIBRARY_PATH",
+                self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
+            )
+            .env(
+                "DYLD_LIBRARY_PATH",
+                self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
+            )
            .env("PGDATA", self.pgdata().to_str().unwrap())
            .stdout(Stdio::piped())
            // Comment this to avoid capturing stderr (useful if command hangs)
@@ -259,8 +277,8 @@ impl PostgresNode {
            })
    }

-    // Connect to a page server, get base backup, and untar it to initialize a
-    // new data directory
+    // Write postgresql.conf with default configuration
+    // and PG_VERSION file to the data directory of a new node.
    fn setup_pg_conf(&self, auth_type: AuthType) -> Result<()> {
        let mut conf = PostgresConf::new();
        conf.append("max_wal_senders", "10");
@@ -357,6 +375,9 @@ impl PostgresNode {
        let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
        file.write_all(conf.to_string().as_bytes())?;

+        let mut file = File::create(self.pgdata().join("PG_VERSION"))?;
+        file.write_all(self.pg_version.to_string().as_bytes())?;
+
        Ok(())
    }

@@ -368,7 +389,7 @@ impl PostgresNode {
            // latest data from the pageserver. That is a bit clumsy but whole bootstrap
            // procedure evolves quite actively right now, so let's think about it again
            // when things would be more stable (TODO).
-            let lsn = self.sync_safekeepers(auth_token)?;
+            let lsn = self.sync_safekeepers(auth_token, self.pg_version)?;
            if lsn == Lsn(0) {
                None
            } else {
@@ -401,7 +422,7 @@ impl PostgresNode {
    }

    fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
-        let pg_ctl_path = self.env.pg_bin_dir().join("pg_ctl");
+        let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl");
        let mut cmd = Command::new(pg_ctl_path);
        cmd.args(
            [
@@ -417,8 +438,14 @@ impl PostgresNode {
            .concat(),
        )
        .env_clear()
-        .env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
-        .env("DYLD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap());
+        .env(
+            "LD_LIBRARY_PATH",
+            self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
+        )
+        .env(
+            "DYLD_LIBRARY_PATH",
+            self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
+        );
        if let Some(token) = auth_token {
            cmd.env("ZENITH_AUTH_TOKEN", token);
        }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -20,6 +20,8 @@ use utils::{

 use crate::safekeeper::SafekeeperNode;

+pub const DEFAULT_PG_VERSION: u32 = 14;
+
 //
 // This data structures represents neon_local CLI config
 //
@@ -195,12 +197,33 @@ impl Default for SafekeeperConf {
 }

 impl LocalEnv {
-    // postgres installation paths
-    pub fn pg_bin_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("bin")
+    pub fn pg_distrib_dir_raw(&self) -> PathBuf {
+        self.pg_distrib_dir.clone()
    }
-    pub fn pg_lib_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("lib")
+
+    pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
+        let path = self.pg_distrib_dir.clone();
+
+        match pg_version {
+            14 => Ok(path.join(format!("v{pg_version}"))),
+            15 => Ok(path.join(format!("v{pg_version}"))),
+            _ => bail!("Unsupported postgres version: {}", pg_version),
+        }
+    }
+
+    pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
+        match pg_version {
+            14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
+            15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
+            _ => bail!("Unsupported postgres version: {}", pg_version),
+        }
+    }
+    pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
+        match pg_version {
+            14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
+            15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
+            _ => bail!("Unsupported postgres version: {}", pg_version),
+        }
    }

    pub fn pageserver_bin(&self) -> anyhow::Result<PathBuf> {
@@ -289,13 +312,15 @@ impl LocalEnv {
        let mut env: LocalEnv = toml::from_str(toml)?;

        // Find postgres binaries.
-        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install/v14".
+        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
+        // Note that later in the code we assume, that distrib dirs follow the same pattern
+        // for all postgres versions.
        if env.pg_distrib_dir == Path::new("") {
            if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
                env.pg_distrib_dir = postgres_bin.into();
            } else {
                let cwd = env::current_dir()?;
-                env.pg_distrib_dir = cwd.join("pg_install/v14")
+                env.pg_distrib_dir = cwd.join("pg_install")
            }
        }

@@ -384,7 +409,7 @@ impl LocalEnv {
    //
    // Initialize a new Neon repository
    //
-    pub fn init(&mut self) -> anyhow::Result<()> {
+    pub fn init(&mut self, pg_version: u32) -> anyhow::Result<()> {
        // check if config already exists
        let base_path = &self.base_data_dir;
        ensure!(
@@ -397,10 +422,10 @@ impl LocalEnv {
            "directory '{}' already exists. Perhaps already initialized?",
            base_path.display()
        );
-        if !self.pg_distrib_dir.join("bin/postgres").exists() {
+        if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
            bail!(
                "Can't find postgres binary at {}",
-                self.pg_distrib_dir.display()
+                self.pg_bin_dir(pg_version)?.display()
            );
        }
        for binary in ["pageserver", "safekeeper"] {
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -12,13 +12,8 @@ use nix::unistd::Pid;
 use postgres::Config;
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
-use safekeeper::http::models::TimelineCreateRequest;
 use thiserror::Error;
-use utils::{
-    connstring::connection_address,
-    http::error::HttpErrorBody,
-    id::{NodeId, TenantId, TimelineId},
-};
+use utils::{connstring::connection_address, http::error::HttpErrorBody, id::NodeId};

 use crate::local_env::{LocalEnv, SafekeeperConf};
 use crate::storage::PageServerNode;
@@ -281,24 +276,4 @@ impl SafekeeperNode {
            .error_from_body()?;
        Ok(())
    }
-
-    pub fn timeline_create(
-        &self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        peer_ids: Vec<NodeId>,
-    ) -> Result<()> {
-        Ok(self
-            .http_request(
-                Method::POST,
-                format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
-            )
-            .json(&TimelineCreateRequest {
-                timeline_id,
-                peer_ids,
-            })
-            .send()?
-            .error_from_body()?
-            .json()?)
-    }
 }
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -11,7 +11,7 @@ use anyhow::{bail, Context};
 use nix::errno::Errno;
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
-use pageserver::http::models::{
+use pageserver_api::models::{
    TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
 };
 use postgres::{Config, NoTls};
@@ -112,11 +112,15 @@ impl PageServerNode {
        create_tenant: Option<TenantId>,
        initial_timeline_id: Option<TimelineId>,
        config_overrides: &[&str],
+        pg_version: u32,
    ) -> anyhow::Result<TimelineId> {
        let id = format!("id={}", self.env.pageserver.id);
        // FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
-        let pg_distrib_dir_param =
-            format!("pg_distrib_dir='{}'", self.env.pg_distrib_dir.display());
+        let pg_distrib_dir_param = format!(
+            "pg_distrib_dir='{}'",
+            self.env.pg_distrib_dir_raw().display()
+        );
+
        let authg_type_param = format!("auth_type='{}'", self.env.pageserver.auth_type);
        let listen_http_addr_param = format!(
            "listen_http_addr='{}'",
@@ -159,7 +163,7 @@ impl PageServerNode {

        self.start_node(&init_config_overrides, &self.env.base_data_dir, true)?;
        let init_result = self
-            .try_init_timeline(create_tenant, initial_timeline_id)
+            .try_init_timeline(create_tenant, initial_timeline_id, pg_version)
            .context("Failed to create initial tenant and timeline for pageserver");
        match &init_result {
            Ok(initial_timeline_id) => {
@@ -175,10 +179,16 @@ impl PageServerNode {
        &self,
        new_tenant_id: Option<TenantId>,
        new_timeline_id: Option<TimelineId>,
+        pg_version: u32,
    ) -> anyhow::Result<TimelineId> {
        let initial_tenant_id = self.tenant_create(new_tenant_id, HashMap::new())?;
-        let initial_timeline_info =
-            self.timeline_create(initial_tenant_id, new_timeline_id, None, None)?;
+        let initial_timeline_info = self.timeline_create(
+            initial_tenant_id,
+            new_timeline_id,
+            None,
+            None,
+            Some(pg_version),
+        )?;
        Ok(initial_timeline_info.timeline_id)
    }

@@ -497,6 +507,7 @@ impl PageServerNode {
        new_timeline_id: Option<TimelineId>,
        ancestor_start_lsn: Option<Lsn>,
        ancestor_timeline_id: Option<TimelineId>,
+        pg_version: Option<u32>,
    ) -> anyhow::Result<TimelineInfo> {
        self.http_request(
            Method::POST,
@@ -506,6 +517,7 @@ impl PageServerNode {
            new_timeline_id,
            ancestor_start_lsn,
            ancestor_timeline_id,
+            pg_version,
        })
        .send()?
        .error_from_body()?
@@ -535,6 +547,7 @@ impl PageServerNode {
        timeline_id: TimelineId,
        base: (Lsn, PathBuf),
        pg_wal: Option<(Lsn, PathBuf)>,
+        pg_version: u32,
    ) -> anyhow::Result<()> {
        let mut client = self.pg_connection_config.connect(NoTls).unwrap();

@@ -553,8 +566,9 @@ impl PageServerNode {
        };

        // Import base
-        let import_cmd =
-            format!("import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
+        let import_cmd = format!(
+            "import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn} {pg_version}"
+        );
        let mut writer = client.copy_in(&import_cmd)?;
        io::copy(&mut base_reader, &mut writer)?;
        writer.finish()?;
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -80,4 +80,6 @@
 - [015-storage-messaging](rfcs/015-storage-messaging.md)
 - [016-connection-routing](rfcs/016-connection-routing.md)
 - [017-timeline-data-management](rfcs/017-timeline-data-management.md)
+- [018-storage-messaging-2](rfcs/018-storage-messaging-2.md)
+- [019-tenant-timeline-lifecycles](rfcs/019-tenant-timeline-lifecycles.md)
 - [cluster-size-limits](rfcs/cluster-size-limits.md)
--- a/docs/rfcs/018-storage-messaging-2.md
+++ b/docs/rfcs/018-storage-messaging-2.md
@@ -0,0 +1,163 @@
+# Storage messaging
+
+Safekeepers need to communicate to each other to
+* Trim WAL on safekeepers;
+* Decide on which SK should push WAL to the S3;
+* Decide on when to shut down SK<->pageserver connection;
+* Understand state of each other to perform peer recovery;
+
+Pageservers need to communicate to safekeepers to decide which SK should provide
+WAL to the pageserver.
+
+This is an iteration on [015-storage-messaging](https://github.com/neondatabase/neon/blob/main/docs/rfcs/015-storage-messaging.md) describing current situation,
+potential performance issue and ways to address it.
+
+## Background
+
+What we have currently is very close to etcd variant described in
+015-storage-messaging. Basically, we have single `SkTimelineInfo` message
+periodically sent by all safekeepers to etcd for each timeline.
+* Safekeepers subscribe to it to learn status of peers (currently they subscribe to
+  'everything', but they can and should fetch data only for timelines they hold).
+* Pageserver subscribes to it (separate watch per timeline) to learn safekeepers
+  positions; based on that, it decides from which safekeepers to pull WAL.
+
+Also, safekeepers use etcd elections API to make sure only single safekeeper
+offloads WAL.
+
+It works, and callmemaybe is gone. However, this has a performance
+hazard. Currently deployed etcd can do about 6k puts per second (using its own
+`benchmark` tool); on my 6 core laptop, while running on tmpfs, this gets to
+35k. Making benchmark closer to our usage [etcd watch bench](https://github.com/arssher/etcd-client/blob/watch-bench/examples/watch_bench.rs),
+I get ~10k received messages per second with various number of publisher-subscribers
+(laptop, tmpfs). Diving this by 12 (3 sks generate msg, 1 ps + 3 sk consume them) we
+get about 800 active timelines, if message is sent each second. Not extremely
+low, but quite reachable.
+
+A lot of idle watches seem to be ok though -- which is good, as pageserver
+subscribes to all its timelines regardless of their activity.
+
+Also, running etcd with fsyncs disabled is messy -- data dir must be wiped on
+each restart or there is a risk of corruption errors.
+
+The reason is etcd making much more than what we need; it is a fault tolerant
+store with strong consistency, but I claim all we need here is just simplest pub
+sub with best effort delivery, because
+* We already have centralized source of truth for long running data, like which
+  tlis are on which nodes  -- the console.
+* Momentary data (safekeeper/pageserver progress) doesn't make sense to persist.
+  Instead of putting each change to broker, expecting it to reliably deliver it
+  is better to just have constant flow of data for active timelines: 1) they
+  serve as natural heartbeats -- if node can't send, we shouldn't pull WAL from
+  it 2) it is simpler -- no need to track delivery to/from the broker.
+  Moreover, latency here is important: the faster we obtain fresh data, the
+  faster we can switch to proper safekeeper after failure.
+* As for WAL offloading leader election, it is trivial to achieve through these
+  heartbeats -- just take suitable node through deterministic rule (min node
+  id).  Once network is stable, this is a converging process (well, except
+  complicated failure topology, but even then making it converge is not
+  hard). Such elections bear some risk of several offloaders running
+  concurrently for a short period of time, but that's harmless.
+
+  Generally, if one needs strong consistency, electing leader per se is not
+  enough; it must be accompanied with number (logical clock ts), checked at
+  every action to track causality. s3 doesn't provide CAS, so it can't
+  differentiate old/new leader, this must be solved differently.
+
+  We could use etcd CAS (its most powerful/useful primitive actually) to issue
+  these leader numbers (and e.g. prefix files in s3), but currently I don't see
+  need for that.
+
+
+Obviously best effort pub sub is much more simpler and performant; the one proposed is
+
+## gRPC broker
+
+I took tonic and [prototyped](https://github.com/neondatabase/neon/blob/asher/neon-broker/broker/src/broker.rs) the replacement of functionality we currently use
+with grpc streams and tokio mpsc channels. The implementation description is at the file header.
+
+It is just 500 lines of code and core functionality is complete. 1-1 pub sub
+gives about 120k received messages per second; having multiple subscribers in
+different connecitons quickly scales to 1 million received messages per second.
+I had concerns about many concurrent streams in singe connection, but 2^20
+subscribers still work (though eat memory, with 10 publishers 20GB are consumed;
+in this implementation each publisher holds full copy of all subscribers). There
+is `bench.rs` nearby which I used for testing.
+
+`SkTimelineInfo` is wired here, but another message can be added (e.g. if
+pageservers want to communicate with each other) with templating.
+
+### Fault tolerance
+
+Since such broker is stateless, we can run it under k8s. Or add proxying to
+other members, with best-effort this is simple.
+
+### Security implications
+
+Communication happens in a private network that is not exposed to users;
+additionaly we can add auth to the broker.
+
+## Alternative: get existing pub-sub
+
+We could take some existing pub sub solution, e.g. RabbitMQ, Redis. But in this
+case IMV simplicity of our own outweights external dependency costs (RabbitMQ is
+much more complicated and needs VM; Redis Rust client maintenance is not
+ideal...). Also note that projects like CockroachDB and TiDB are based on gRPC
+as well.
+
+## Alternative: direct communication
+
+Apart from being transport, broker solves one more task: discovery, i.e. letting
+safekeepers and pageservers find each other. We can let safekeepers know, for
+each timeline, both other safekeepers for this timeline and pageservers serving
+it. In this case direct communication is possible:
+ - each safekeeper pushes to each other safekeeper status of timelines residing
+   on both of them, letting remove WAL, decide who offloads, decide on peer
+   recovery;
+ - each safekeeper pushes to each pageserver status of timelines residing on
+   both of them, letting pageserver choose from which sk to pull WAL;
+
+It was mostly described in [014-safekeeper-gossip](https://github.com/neondatabase/neon/blob/main/docs/rfcs/014-safekeepers-gossip.md), but I want to recap on that.
+
+The main pro is less one dependency: less moving parts, easier to run Neon
+locally/manually, less places to monitor. Fault tolerance for broker disappears,
+no kuber or something. To me this is a big thing.
+
+Also (though not a big thing) idle watches for inactive timelines disappear:
+naturally safekeepers learn about compute connection first and start pushing
+status to pageserver(s), notifying it should pull.
+
+Importantly, I think that eventually knowing and persisting peers and
+pageservers on safekeepers is inevitable:
+- Knowing peer safekeepers for the timeline is required for correct
+  automatic membership change -- new member set must be hardened on old
+  majority before proceeding. It is required to get rid of sync-safekeepers
+  as well (peer recovery up to flush_lsn).
+- Knowing pageservers where the timeline is attached is needed to
+  1. Understand when to shut down activity on the timeline, i.e. push data to
+     the broker. We can have a lot of timelines sleeping quietly which
+	 shouldn't occupy resources.
+  2. Preserve WAL for these (currently we offload to s3 and take it from there,
+     but serving locally is better, and we get one less condition on which WAL
+     can be removed from s3).
+
+I suppose this membership data should be passed to safekeepers directly from the
+console because
+1. Console is the original source of this data, conceptually this is the
+   simplest way (rather than passing it through compute or something).
+2. We already have similar code for deleting timeline on safekeepers
+   (and attaching/detaching timeline on pageserver), this is a typical
+   action -- queue operation against storage node and execute it until it
+   completes (or timeline is dropped).
+
+Cons of direct communication are
+- It is more complicated: each safekeeper should maintain set of peers it talks
+  to, and set of timelines for each such peer -- they ought to be multiplexed
+  into single connection.
+- Totally, we have O(n^2) connections instead of O(n) with broker schema
+  (still O(n) on each node). However, these are relatively stable, async and
+  thus not very expensive, I don't think this is a big problem. Up to 10k
+  storage nodes I doubt connection overhead would be noticeable.
+
+I'd use gRPC for direct communication, and in this sense gRPC based broker is a
+step towards it.
--- a/docs/rfcs/019-tenant-timeline-lifecycles.md
+++ b/docs/rfcs/019-tenant-timeline-lifecycles.md
@@ -0,0 +1,91 @@
+# Managing Tenant and Timeline lifecycles
+
+## Summary
+
+The pageserver has a Tenant object in memory for each tenant it manages, and a
+Timeline for each timeline. There are a lot of tasks that operate on the tenants
+and timelines with references to those objects. We have some mechanisms to track
+which tasks are operating on each Tenant and Timeline, and to request them to
+shutdown when a tenant or timeline is deleted, but it does not cover all uses,
+and as a result we have many race conditions around tenant/timeline shutdown.
+
+## Motivation
+
+We have a bunch of race conditions that can produce weird errors and can be hard
+to track down.
+
+## Non Goals
+
+This RFC only covers the problem of ensuring that a task/thread isn't operating
+on a Tenant or Timeline. It does not cover what states, aside from Active and
+non-Active, each Tenant and Timeline should have, or when exactly the transitions
+should happen.
+
+## Impacted components (e.g. pageserver, safekeeper, console, etc)
+
+Pageserver. Although I wonder if the safekeeper should have a similar mechanism.
+
+## Current situation
+
+Most pageserver tasks of are managed by task_mgr.rs:
+
+- LibpqEndpointListener
+- HttpEndPointListener
+- WalReceiverManager and -Connection
+- GarbageCollector and Compaction
+- InitialLogicalSizeCalculation
+
+In addition to those tasks, the walreceiver performs some direct tokio::spawn
+calls to spawn tasks that are not registered with 'task_mgr'. And all of these
+tasks can spawn extra operations with tokio spawn_blocking.
+
+Whenever a tenant or timeline is removed from the system, by pageserver
+shutdown, delete_timeline or tenant-detach operation, we rely on the task
+registry in 'task_mgr.rs' to wait until there are no tasks operating on the
+tenant or timeline, before its Tenant/Timeline object is removed. That relies on
+each task to register itself with the tenant/timeline ID in
+'task_mgr.rs'. However, there are many gaps in that. For example,
+GarbageCollection and Compaction tasks are registered with the tenant, but when
+they proceed to operate on a particular timeline of the tenant, they don't
+register with timeline ID. Because of that, the timeline can be deleted while GC
+or compaction is running on it, causing failures in the GC or compaction (see
+https://github.com/neondatabase/neon/issues/2442).
+
+Another problem is that the task registry only works for tokio Tasks. There is
+no way to register a piece of code that runs inside spawn_blocking(), for
+example.
+
+## Proposed implementation
+
+This "voluntary" registration of tasks is fragile. Let's use Rust language features
+to enforce that a tenant/timeline cannot be removed from the system when there is
+still some code operating on it.
+
+Let's introduce new Guard objects for Tenant and Timeline, and do all actions through
+the Guard object. Something like:
+
+TenantActiveGuard: Guard object over Arc<Tenant>. When you acquire the guard,
+the code checks that the tenant is in Active state. If it's not, you get an
+error. You can change the state of the tenant to Stopping while there are
+ActiveTenantGuard objects still on it, to prevent new ActiveTenantGuards from
+being acquired, but the Tenant cannot be removed until all the guards are gone.
+
+TenantMaintenanceGuard: Like ActiveTenantGuard, but can be held even when the
+tenant is not in Active state. Used for operations like attach/detach. Perhaps
+allow only one such guard on a Tenant at a time.
+
+Similarly for Timelines. We don't currentl have a "state" on Timeline, but I think
+we need at least two states: Active and Stopping. The Stopping state is used at
+deletion, to prevent new TimelineActiveGuards from appearing, while you wait for
+existing TimelineActiveGuards to die out.
+
+The shutdown-signaling, using shutdown_watcher() and is_shutdown_requested(),
+probably also needs changes to deal with the new Guards. The rule is that if you
+have a TenantActiveGuard, and the tenant's state changes from Active to
+Stopping, the is_shutdown_requested() function should return true, and
+shutdown_watcher() future should return.
+
+This signaling doesn't neessarily need to cover all cases. For example, if you
+have a block of code in spawn_blocking(), it might be acceptable if
+is_shutdown_requested() doesn't return true even though the tenant is in
+Stopping state, as long as the code finishes reasonably fast.
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -155,6 +155,8 @@ for other files and for sockets for incoming connections.
 #### pg_distrib_dir

 A directory with Postgres installation to use during pageserver activities.
+Since pageserver supports several postgres versions, `pg_distrib_dir` contains
+a subdirectory for each version with naming convention `v{PG_MAJOR_VERSION}/`.
 Inside that dir, a `bin/postgres` binary should be present.

 The default distrib dir is `./pg_install/`.
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -96,7 +96,7 @@ A single virtual environment with all dependencies is described in the single `P
      sudo apt install python3.9
      ```
 - Install `poetry`
-    - Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation)`.
+    - Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation).
 - Install dependencies via `./scripts/pysync`.
    - Note that CI uses specific Python version (look for `PYTHON_VERSION` [here](https://github.com/neondatabase/docker-images/blob/main/rust/Dockerfile))
      so if you have different version some linting tools can yield different result locally vs in the CI.
--- a/libs/etcd_broker/Cargo.toml
+++ b/libs/etcd_broker/Cargo.toml
@@ -8,7 +8,7 @@
 regex = "1.4.5"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
- serde_with = "1.12.0"
+ serde_with = "2.0"
 once_cell = "1.13.0"

 utils = { path = "../utils" }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -3,7 +3,7 @@
 //! Otherwise, we might not see all metrics registered via
 //! a default registry.
 use once_cell::sync::Lazy;
-use prometheus::core::{AtomicU64, GenericGauge, GenericGaugeVec};
+use prometheus::core::{AtomicU64, Collector, GenericGauge, GenericGaugeVec};
 pub use prometheus::opts;
 pub use prometheus::register;
 pub use prometheus::{core, default_registry, proto};
@@ -17,6 +17,7 @@ pub use prometheus::{register_int_counter_vec, IntCounterVec};
 pub use prometheus::{register_int_gauge, IntGauge};
 pub use prometheus::{register_int_gauge_vec, IntGaugeVec};
 pub use prometheus::{Encoder, TextEncoder};
+use prometheus::{Registry, Result};

 mod wrappers;
 pub use wrappers::{CountedReader, CountedWriter};
@@ -32,13 +33,27 @@ macro_rules! register_uint_gauge_vec {
    }};
 }

+/// Special internal registry, to collect metrics independently from the default registry.
+/// Was introduced to fix deadlock with lazy registration of metrics in the default registry.
+static INTERNAL_REGISTRY: Lazy<Registry> = Lazy::new(Registry::new);
+
+/// Register a collector in the internal registry. MUST be called before the first call to `gather()`.
+/// Otherwise, we can have a deadlock in the `gather()` call, trying to register a new collector
+/// while holding the lock.
+pub fn register_internal(c: Box<dyn Collector>) -> Result<()> {
+    INTERNAL_REGISTRY.register(c)
+}
+
 /// Gathers all Prometheus metrics and records the I/O stats just before that.
 ///
 /// Metrics gathering is a relatively simple and standalone operation, so
 /// it might be fine to do it this way to keep things simple.
 pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
    update_rusage_metrics();
-    prometheus::gather()
+    let mut mfs = prometheus::gather();
+    let mut internal_mfs = INTERNAL_REGISTRY.gather();
+    mfs.append(&mut internal_mfs);
+    mfs
 }

 static DISK_IO_BYTES: Lazy<IntGaugeVec> = Lazy::new(|| {
@@ -62,6 +77,16 @@ pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[
    0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5,
 ];

+pub fn set_build_info_metric(revision: &str) {
+    let metric = register_int_gauge_vec!(
+        "libmetrics_build_info",
+        "Build/version information",
+        &["revision"]
+    )
+    .expect("Failed to register build info metric");
+    metric.with_label_values(&[revision]).set(1);
+}
+
 // Records I/O stats in a "cross-platform" way.
 // Compiles both on macOS and Linux, but current macOS implementation always returns 0 as values for I/O stats.
 // An alternative is to read procfs (`/proc/[pid]/io`) which does not work under macOS at all, hence abandoned.
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "pageserver_api"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+serde = { version = "1.0", features = ["derive"] }
+serde_with = "2.0"
+const_format = "0.2.21"
+
+utils = { path = "../utils" }
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/pageserver_api/src/lib.rs
+++ b/libs/pageserver_api/src/lib.rs
@@ -0,0 +1,9 @@
+use const_format::formatcp;
+
+/// Public API types
+pub mod models;
+
+pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
+pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
+pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
+pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -7,7 +7,17 @@ use utils::{
    lsn::Lsn,
 };

-use crate::tenant::TenantState;
+/// A state of a tenant in pageserver's memory.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub enum TenantState {
+    /// Tenant is fully operational, its background jobs might be running or not.
+    Active { background_jobs_running: bool },
+    /// A tenant is recognized by pageserver, but not yet ready to operate:
+    /// e.g. not present locally and being downloaded or being read into memory from the file system.
+    Paused,
+    /// A tenant is recognized by the pageserver, but no longer used for any operations, as failed to get activated.
+    Broken,
+}

 #[serde_as]
 #[derive(Serialize, Deserialize)]
@@ -21,6 +31,7 @@ pub struct TimelineCreateRequest {
    #[serde(default)]
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub ancestor_start_lsn: Option<Lsn>,
+    pub pg_version: Option<u32>,
 }

 #[serde_as]
@@ -112,9 +123,15 @@ pub struct TenantInfo {
    pub has_in_progress_downloads: Option<bool>,
 }

+/// This represents the output of the "timeline_detail" and "timeline_list" API calls.
 #[serde_as]
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct LocalTimelineInfo {
+pub struct TimelineInfo {
+    #[serde_as(as = "DisplayFromStr")]
+    pub tenant_id: TenantId,
+    #[serde_as(as = "DisplayFromStr")]
+    pub timeline_id: TimelineId,
+
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub ancestor_timeline_id: Option<TimelineId>,
    #[serde_as(as = "Option<DisplayFromStr>")]
@@ -137,28 +154,34 @@ pub struct LocalTimelineInfo {
    pub last_received_msg_lsn: Option<Lsn>,
    /// the timestamp (in microseconds) of the last received message
    pub last_received_msg_ts: Option<u128>,
+    pub pg_version: u32,
+
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub remote_consistent_lsn: Option<Lsn>,
+    pub awaits_download: bool,
+
+    // Some of the above fields are duplicated in 'local' and 'remote', for backwards-
+    // compatility with older clients.
+    pub local: LocalTimelineInfo,
+    pub remote: RemoteTimelineInfo,
+}
+
+#[serde_as]
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct LocalTimelineInfo {
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub ancestor_timeline_id: Option<TimelineId>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub ancestor_lsn: Option<Lsn>,
+    pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
+    pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
 }

 #[serde_as]
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct RemoteTimelineInfo {
-    #[serde_as(as = "DisplayFromStr")]
-    pub remote_consistent_lsn: Lsn,
-    pub awaits_download: bool,
-}
-
-///
-/// This represents the output of the "timeline_detail" API call.
-///
-#[serde_as]
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct TimelineInfo {
-    #[serde_as(as = "DisplayFromStr")]
-    pub tenant_id: TenantId,
-    #[serde_as(as = "DisplayFromStr")]
-    pub timeline_id: TimelineId,
-    pub local: Option<LocalTimelineInfo>,
-    pub remote: Option<RemoteTimelineInfo>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub remote_consistent_lsn: Option<Lsn>,
 }

 pub type ConfigureFailpointsRequest = Vec<FailpointConfig>;
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -13,7 +13,7 @@ crc32c = "0.6.0"
 hex = "0.4.3"
 once_cell = "1.13.0"
 log = "0.4.14"
-memoffset = "0.6.2"
+memoffset = "0.7"
 thiserror = "1.0"
 serde = { version = "1.0", features = ["derive"] }
 utils = { path = "../utils" }
@@ -26,4 +26,4 @@ wal_craft = { path = "wal_craft" }

 [build-dependencies]
 anyhow = "1.0"
-bindgen = "0.60.1"
+bindgen = "0.61"
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -3,10 +3,14 @@
 #![allow(non_snake_case)]
 // bindgen creates some unsafe code with no doc comments.
 #![allow(clippy::missing_safety_doc)]
-// suppress warnings on rust 1.53 due to bindgen unit tests.
-// https://github.com/rust-lang/rust-bindgen/issues/1651
-#![allow(deref_nullptr)]
+// noted at 1.63 that in many cases there's a u32 -> u32 transmutes in bindgen code.
+#![allow(clippy::useless_transmute)]
+// modules included with the postgres_ffi macro depend on the types of the specific version's
+// types, and trigger a too eager lint.
+#![allow(clippy::duplicate_mod)]

+use bytes::Bytes;
+use utils::bin_ser::SerializeError;
 use utils::lsn::Lsn;

 macro_rules! postgres_ffi {
@@ -24,12 +28,12 @@ macro_rules! postgres_ffi {
                    stringify!($version),
                    ".rs"
                ));
+
+                include!(concat!("pg_constants_", stringify!($version), ".rs"));
            }
            pub mod controlfile_utils;
            pub mod nonrelfile_utils;
-            pub mod pg_constants;
-            pub mod relfile_utils;
-            pub mod waldecoder;
+            pub mod waldecoder_handler;
            pub mod xlog_utils;

            pub const PG_MAJORVERSION: &str = stringify!($version);
@@ -44,6 +48,9 @@ macro_rules! postgres_ffi {
 postgres_ffi!(v14);
 postgres_ffi!(v15);

+pub mod pg_constants;
+pub mod relfile_utils;
+
 // Export some widely used datatypes that are unlikely to change across Postgres versions
 pub use v14::bindings::{uint32, uint64, Oid};
 pub use v14::bindings::{BlockNumber, OffsetNumber};
@@ -52,8 +59,11 @@ pub use v14::bindings::{TimeLineID, TimestampTz, XLogRecPtr, XLogSegNo};

 // Likewise for these, although the assumption that these don't change is a little more iffy.
 pub use v14::bindings::{MultiXactOffset, MultiXactStatus};
+pub use v14::bindings::{PageHeaderData, XLogRecord};
 pub use v14::xlog_utils::{XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD};

+pub use v14::bindings::{CheckPoint, ControlFileData};
+
 // from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
 // --with-segsize=SEGSIZE, but assume the defaults for now.
 pub const BLCKSZ: u16 = 8192;
@@ -63,6 +73,49 @@ pub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024;

 pub const MAX_SEND_SIZE: usize = XLOG_BLCKSZ * 16;

+// Export some version independent functions that are used outside of this mod
+pub use v14::xlog_utils::encode_logical_message;
+pub use v14::xlog_utils::get_current_timestamp;
+pub use v14::xlog_utils::to_pg_timestamp;
+pub use v14::xlog_utils::XLogFileName;
+
+pub use v14::bindings::DBState_DB_SHUTDOWNED;
+
+pub fn bkpimage_is_compressed(bimg_info: u8, version: u32) -> anyhow::Result<bool> {
+    match version {
+        14 => Ok(bimg_info & v14::bindings::BKPIMAGE_IS_COMPRESSED != 0),
+        15 => Ok(bimg_info & v15::bindings::BKPIMAGE_COMPRESS_PGLZ != 0
+            || bimg_info & v15::bindings::BKPIMAGE_COMPRESS_LZ4 != 0
+            || bimg_info & v15::bindings::BKPIMAGE_COMPRESS_ZSTD != 0),
+        _ => anyhow::bail!("Unknown version {}", version),
+    }
+}
+
+pub fn generate_wal_segment(
+    segno: u64,
+    system_id: u64,
+    pg_version: u32,
+) -> Result<Bytes, SerializeError> {
+    match pg_version {
+        14 => v14::xlog_utils::generate_wal_segment(segno, system_id),
+        15 => v15::xlog_utils::generate_wal_segment(segno, system_id),
+        _ => Err(SerializeError::BadInput),
+    }
+}
+
+pub fn generate_pg_control(
+    pg_control_bytes: &[u8],
+    checkpoint_bytes: &[u8],
+    lsn: Lsn,
+    pg_version: u32,
+) -> anyhow::Result<(Bytes, u64)> {
+    match pg_version {
+        14 => v14::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
+        15 => v15::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
+        _ => anyhow::bail!("Unknown version {}", pg_version),
+    }
+}
+
 // PG timeline is always 1, changing it doesn't have any useful meaning in Neon.
 //
 // NOTE: this is not to be confused with Neon timelines; different concept!
@@ -74,7 +127,7 @@ pub const PG_TLI: u32 = 1;

 //  See TransactionIdIsNormal in transam.h
 pub const fn transaction_id_is_normal(id: TransactionId) -> bool {
-    id > v14::pg_constants::FIRST_NORMAL_TRANSACTION_ID
+    id > pg_constants::FIRST_NORMAL_TRANSACTION_ID
 }

 // See TransactionIdPrecedes in transam.c
@@ -109,3 +162,76 @@ pub fn page_set_lsn(pg: &mut [u8], lsn: Lsn) {
    pg[0..4].copy_from_slice(&((lsn.0 >> 32) as u32).to_le_bytes());
    pg[4..8].copy_from_slice(&(lsn.0 as u32).to_le_bytes());
 }
+
+pub mod waldecoder {
+
+    use crate::{v14, v15};
+    use bytes::{Buf, Bytes, BytesMut};
+    use std::num::NonZeroU32;
+    use thiserror::Error;
+    use utils::lsn::Lsn;
+
+    pub enum State {
+        WaitingForRecord,
+        ReassemblingRecord {
+            recordbuf: BytesMut,
+            contlen: NonZeroU32,
+        },
+        SkippingEverything {
+            skip_until_lsn: Lsn,
+        },
+    }
+
+    pub struct WalStreamDecoder {
+        pub lsn: Lsn,
+        pub pg_version: u32,
+        pub inputbuf: BytesMut,
+        pub state: State,
+    }
+
+    #[derive(Error, Debug, Clone)]
+    #[error("{msg} at {lsn}")]
+    pub struct WalDecodeError {
+        pub msg: String,
+        pub lsn: Lsn,
+    }
+
+    impl WalStreamDecoder {
+        pub fn new(lsn: Lsn, pg_version: u32) -> WalStreamDecoder {
+            WalStreamDecoder {
+                lsn,
+                pg_version,
+                inputbuf: BytesMut::new(),
+                state: State::WaitingForRecord,
+            }
+        }
+
+        // The latest LSN position fed to the decoder.
+        pub fn available(&self) -> Lsn {
+            self.lsn + self.inputbuf.remaining() as u64
+        }
+
+        pub fn feed_bytes(&mut self, buf: &[u8]) {
+            self.inputbuf.extend_from_slice(buf);
+        }
+
+        pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
+            match self.pg_version {
+                // This is a trick to support both versions simultaneously.
+                // See WalStreamDecoderHandler comments.
+                14 => {
+                    use self::v14::waldecoder_handler::WalStreamDecoderHandler;
+                    self.poll_decode_internal()
+                }
+                15 => {
+                    use self::v15::waldecoder_handler::WalStreamDecoderHandler;
+                    self.poll_decode_internal()
+                }
+                _ => Err(WalDecodeError {
+                    msg: format!("Unknown version {}", self.pg_version),
+                    lsn: self.lsn,
+                }),
+            }
+        }
+    }
+}
--- a/libs/postgres_ffi/src/nonrelfile_utils.rs
+++ b/libs/postgres_ffi/src/nonrelfile_utils.rs
@@ -1,7 +1,7 @@
 //!
 //! Common utilities for dealing with PostgreSQL non-relation files.
 //!
-use super::pg_constants;
+use crate::pg_constants;
 use crate::transaction_id_precedes;
 use bytes::BytesMut;
 use log::*;
--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
@@ -1,14 +1,16 @@
 //!
 //! Misc constants, copied from PostgreSQL headers.
 //!
+//! Only place version-independent constants here.
+//!
 //! TODO: These probably should be auto-generated using bindgen,
 //! rather than copied by hand. Although on the other hand, it's nice
 //! to have them all here in one place, and have the ability to add
 //! comments on them.
 //!

-use super::bindings::{PageHeaderData, XLogRecord};
 use crate::BLCKSZ;
+use crate::{PageHeaderData, XLogRecord};

 //
 // From pg_tablespace_d.h
@@ -16,14 +18,6 @@ use crate::BLCKSZ;
 pub const DEFAULTTABLESPACE_OID: u32 = 1663;
 pub const GLOBALTABLESPACE_OID: u32 = 1664;

-//
-// Fork numbers, from relpath.h
-//
-pub const MAIN_FORKNUM: u8 = 0;
-pub const FSM_FORKNUM: u8 = 1;
-pub const VISIBILITYMAP_FORKNUM: u8 = 2;
-pub const INIT_FORKNUM: u8 = 3;
-
 // From storage_xlog.h
 pub const XLOG_SMGR_CREATE: u8 = 0x10;
 pub const XLOG_SMGR_TRUNCATE: u8 = 0x20;
@@ -114,7 +108,6 @@ pub const XLOG_NEXTOID: u8 = 0x30;
 pub const XLOG_SWITCH: u8 = 0x40;
 pub const XLOG_FPI_FOR_HINT: u8 = 0xA0;
 pub const XLOG_FPI: u8 = 0xB0;
-pub const DB_SHUTDOWNED: u32 = 1;

 // From multixact.h
 pub const FIRST_MULTIXACT_ID: u32 = 1;
@@ -169,10 +162,6 @@ pub const RM_HEAP_ID: u8 = 10;
 pub const XLR_INFO_MASK: u8 = 0x0F;
 pub const XLR_RMGR_INFO_MASK: u8 = 0xF0;

-// from dbcommands_xlog.h
-pub const XLOG_DBASE_CREATE: u8 = 0x00;
-pub const XLOG_DBASE_DROP: u8 = 0x10;
-
 pub const XLOG_TBLSPC_CREATE: u8 = 0x00;
 pub const XLOG_TBLSPC_DROP: u8 = 0x10;

@@ -197,8 +186,6 @@ pub const BKPBLOCK_SAME_REL: u8 = 0x80; /* RelFileNode omitted, same as previous

 /* Information stored in bimg_info */
 pub const BKPIMAGE_HAS_HOLE: u8 = 0x01; /* page image has "hole" */
-pub const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
-pub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */

 /* From transam.h */
 pub const FIRST_NORMAL_TRANSACTION_ID: u32 = 3;
--- a/libs/postgres_ffi/src/pg_constants_v14.rs
+++ b/libs/postgres_ffi/src/pg_constants_v14.rs
@@ -0,0 +1,5 @@
+pub const XLOG_DBASE_CREATE: u8 = 0x00;
+pub const XLOG_DBASE_DROP: u8 = 0x10;
+
+pub const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
+pub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */
--- a/libs/postgres_ffi/src/pg_constants_v15.rs
+++ b/libs/postgres_ffi/src/pg_constants_v15.rs
@@ -0,0 +1,10 @@
+pub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;
+
+pub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;
+pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x00;
+pub const XLOG_DBASE_DROP: u8 = 0x20;
+
+pub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */
+pub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */
+pub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */
+pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
--- a/libs/postgres_ffi/src/relfile_utils.rs
+++ b/libs/postgres_ffi/src/relfile_utils.rs
@@ -1,10 +1,17 @@
 //!
 //! Common utilities for dealing with PostgreSQL relation files.
 //!
-use super::pg_constants;
 use once_cell::sync::OnceCell;
 use regex::Regex;

+//
+// Fork numbers, from relpath.h
+//
+pub const MAIN_FORKNUM: u8 = 0;
+pub const FSM_FORKNUM: u8 = 1;
+pub const VISIBILITYMAP_FORKNUM: u8 = 2;
+pub const INIT_FORKNUM: u8 = 3;
+
 #[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
 pub enum FilePathError {
    #[error("invalid relation fork name")]
@@ -23,10 +30,10 @@ impl From<core::num::ParseIntError> for FilePathError {
 pub fn forkname_to_number(forkname: Option<&str>) -> Result<u8, FilePathError> {
    match forkname {
        // "main" is not in filenames, it's implicit if the fork name is not present
-        None => Ok(pg_constants::MAIN_FORKNUM),
-        Some("fsm") => Ok(pg_constants::FSM_FORKNUM),
-        Some("vm") => Ok(pg_constants::VISIBILITYMAP_FORKNUM),
-        Some("init") => Ok(pg_constants::INIT_FORKNUM),
+        None => Ok(MAIN_FORKNUM),
+        Some("fsm") => Ok(FSM_FORKNUM),
+        Some("vm") => Ok(VISIBILITYMAP_FORKNUM),
+        Some("init") => Ok(INIT_FORKNUM),
        Some(_) => Err(FilePathError::InvalidForkName),
    }
 }
@@ -34,10 +41,10 @@ pub fn forkname_to_number(forkname: Option<&str>) -> Result<u8, FilePathError> {
 /// Convert Postgres fork number to the right suffix of the relation data file.
 pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
    match forknum {
-        pg_constants::MAIN_FORKNUM => None,
-        pg_constants::FSM_FORKNUM => Some("fsm"),
-        pg_constants::VISIBILITYMAP_FORKNUM => Some("vm"),
-        pg_constants::INIT_FORKNUM => Some("init"),
+        MAIN_FORKNUM => None,
+        FSM_FORKNUM => Some("fsm"),
+        VISIBILITYMAP_FORKNUM => Some("vm"),
+        INIT_FORKNUM => Some("init"),
        _ => Some("UNKNOWN FORKNUM"),
    }
 }
--- a/libs/postgres_ffi/src/waldecoder_handler.rs
+++ b/libs/postgres_ffi/src/waldecoder_handler.rs
@@ -8,6 +8,7 @@
 //! to look deeper into the WAL records to also understand which blocks they modify, the code
 //! for that is in pageserver/src/walrecord.rs
 //!
+use super::super::waldecoder::{State, WalDecodeError, WalStreamDecoder};
 use super::bindings::{XLogLongPageHeaderData, XLogPageHeaderData, XLogRecord, XLOG_PAGE_MAGIC};
 use super::xlog_utils::*;
 use crate::WAL_SEGMENT_SIZE;
@@ -16,55 +17,26 @@ use crc32c::*;
 use log::*;
 use std::cmp::min;
 use std::num::NonZeroU32;
-use thiserror::Error;
 use utils::lsn::Lsn;

-enum State {
-    WaitingForRecord,
-    ReassemblingRecord {
-        recordbuf: BytesMut,
-        contlen: NonZeroU32,
-    },
-    SkippingEverything {
-        skip_until_lsn: Lsn,
-    },
-}
-
-pub struct WalStreamDecoder {
-    lsn: Lsn,
-    inputbuf: BytesMut,
-    state: State,
-}
-
-#[derive(Error, Debug, Clone)]
-#[error("{msg} at {lsn}")]
-pub struct WalDecodeError {
-    msg: String,
-    lsn: Lsn,
+pub trait WalStreamDecoderHandler {
+    fn validate_page_header(&self, hdr: &XLogPageHeaderData) -> Result<(), WalDecodeError>;
+    fn poll_decode_internal(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError>;
+    fn complete_record(&mut self, recordbuf: Bytes) -> Result<(Lsn, Bytes), WalDecodeError>;
 }

 //
-// WalRecordStream is a Stream that returns a stream of WAL records
-// FIXME: This isn't a proper rust stream
+// This is a trick to support several postgres versions simultaneously.
 //
-impl WalStreamDecoder {
-    pub fn new(lsn: Lsn) -> WalStreamDecoder {
-        WalStreamDecoder {
-            lsn,
-            inputbuf: BytesMut::new(),
-            state: State::WaitingForRecord,
-        }
-    }
-
-    // The latest LSN position fed to the decoder.
-    pub fn available(&self) -> Lsn {
-        self.lsn + self.inputbuf.remaining() as u64
-    }
-
-    pub fn feed_bytes(&mut self, buf: &[u8]) {
-        self.inputbuf.extend_from_slice(buf);
-    }
-
+// Page decoding code depends on postgres bindings, so it is compiled for each version.
+// Thus WalStreamDecoder implements several WalStreamDecoderHandler traits.
+// WalStreamDecoder poll_decode() method dispatches to the right handler based on the postgres version.
+// Other methods are internal and are not dispatched.
+//
+// It is similar to having several impl blocks for the same struct,
+// but the impls here are in different modules, so need to use a trait.
+//
+impl WalStreamDecoderHandler for WalStreamDecoder {
    fn validate_page_header(&self, hdr: &XLogPageHeaderData) -> Result<(), WalDecodeError> {
        let validate_impl = || {
            if hdr.xlp_magic != XLOG_PAGE_MAGIC as u16 {
@@ -125,7 +97,7 @@ impl WalStreamDecoder {
    ///     Ok(None): there is not enough data in the input buffer. Feed more by calling the `feed_bytes` function
    ///     Err(WalDecodeError): an error occurred while decoding, meaning the input was invalid.
    ///
-    pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
+    fn poll_decode_internal(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
        // Run state machine that validates page headers, and reassembles records
        // that cross page boundaries.
        loop {
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -9,12 +9,13 @@

 use crc32c::crc32c_append;

+use super::super::waldecoder::WalStreamDecoder;
 use super::bindings::{
-    CheckPoint, FullTransactionId, TimeLineID, TimestampTz, XLogLongPageHeaderData,
-    XLogPageHeaderData, XLogRecPtr, XLogRecord, XLogSegNo, XLOG_PAGE_MAGIC,
+    CheckPoint, ControlFileData, DBState_DB_SHUTDOWNED, FullTransactionId, TimeLineID, TimestampTz,
+    XLogLongPageHeaderData, XLogPageHeaderData, XLogRecPtr, XLogRecord, XLogSegNo, XLOG_PAGE_MAGIC,
 };
-use super::pg_constants;
-use super::waldecoder::WalStreamDecoder;
+use super::PG_MAJORVERSION;
+use crate::pg_constants;
 use crate::PG_TLI;
 use crate::{uint32, uint64, Oid};
 use crate::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
@@ -56,12 +57,10 @@ pub const SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT: usize = 1 * 2;
 /// in order to let CLOG_TRUNCATE mechanism correctly extend CLOG.
 const XID_CHECKPOINT_INTERVAL: u32 = 1024;

-#[allow(non_snake_case)]
 pub fn XLogSegmentsPerXLogId(wal_segsz_bytes: usize) -> XLogSegNo {
    (0x100000000u64 / wal_segsz_bytes as u64) as XLogSegNo
 }

-#[allow(non_snake_case)]
 pub fn XLogSegNoOffsetToRecPtr(
    segno: XLogSegNo,
    offset: u32,
@@ -70,7 +69,6 @@ pub fn XLogSegNoOffsetToRecPtr(
    segno * (wal_segsz_bytes as u64) + (offset as u64)
 }

-#[allow(non_snake_case)]
 pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
    format!(
        "{:>08X}{:>08X}{:>08X}",
@@ -80,7 +78,6 @@ pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize
    )
 }

-#[allow(non_snake_case)]
 pub fn XLogFromFileName(fname: &str, wal_seg_size: usize) -> (XLogSegNo, TimeLineID) {
    let tli = u32::from_str_radix(&fname[0..8], 16).unwrap();
    let log = u32::from_str_radix(&fname[8..16], 16).unwrap() as XLogSegNo;
@@ -88,12 +85,10 @@ pub fn XLogFromFileName(fname: &str, wal_seg_size: usize) -> (XLogSegNo, TimeLin
    (log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli)
 }

-#[allow(non_snake_case)]
 pub fn IsXLogFileName(fname: &str) -> bool {
    return fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit());
 }

-#[allow(non_snake_case)]
 pub fn IsPartialXLogFileName(fname: &str) -> bool {
    fname.ends_with(".partial") && IsXLogFileName(&fname[0..fname.len() - 8])
 }
@@ -113,6 +108,30 @@ pub fn normalize_lsn(lsn: Lsn, seg_sz: usize) -> Lsn {
    }
 }

+pub fn generate_pg_control(
+    pg_control_bytes: &[u8],
+    checkpoint_bytes: &[u8],
+    lsn: Lsn,
+) -> anyhow::Result<(Bytes, u64)> {
+    let mut pg_control = ControlFileData::decode(pg_control_bytes)?;
+    let mut checkpoint = CheckPoint::decode(checkpoint_bytes)?;
+
+    // Generate new pg_control needed for bootstrap
+    checkpoint.redo = normalize_lsn(lsn, WAL_SEGMENT_SIZE).0;
+
+    //reset some fields we don't want to preserve
+    //TODO Check this.
+    //We may need to determine the value from twophase data.
+    checkpoint.oldestActiveXid = 0;
+
+    //save new values in pg_control
+    pg_control.checkPoint = 0;
+    pg_control.checkPointCopy = checkpoint;
+    pg_control.state = DBState_DB_SHUTDOWNED;
+
+    Ok((pg_control.encode(), pg_control.system_identifier))
+}
+
 pub fn get_current_timestamp() -> TimestampTz {
    to_pg_timestamp(SystemTime::now())
 }
@@ -144,7 +163,10 @@ pub fn find_end_of_wal(
    let mut result = start_lsn;
    let mut curr_lsn = start_lsn;
    let mut buf = [0u8; XLOG_BLCKSZ];
-    let mut decoder = WalStreamDecoder::new(start_lsn);
+    let pg_version = PG_MAJORVERSION[1..3].parse::<u32>().unwrap();
+    debug!("find_end_of_wal PG_VERSION: {}", pg_version);
+
+    let mut decoder = WalStreamDecoder::new(start_lsn, pg_version);

    // loop over segments
    loop {
@@ -154,7 +176,7 @@ pub fn find_end_of_wal(
        match open_wal_segment(&seg_file_path)? {
            None => {
                // no more segments
-                info!(
+                debug!(
                    "find_end_of_wal reached end at {:?}, segment {:?} doesn't exist",
                    result, seg_file_path
                );
@@ -177,7 +199,7 @@ pub fn find_end_of_wal(
                        match decoder.poll_decode() {
                            Ok(Some(record)) => result = record.0,
                            Err(e) => {
-                                info!(
+                                debug!(
                                    "find_end_of_wal reached end at {:?}, decode error: {:?}",
                                    result, e
                                );
@@ -438,12 +460,15 @@ mod tests {
    fn test_end_of_wal<C: wal_craft::Crafter>(test_name: &str) {
        use wal_craft::*;

+        let pg_version = PG_MAJORVERSION[1..3].parse::<u32>().unwrap();
+
        // Craft some WAL
        let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
            .join("..")
            .join("..");
        let cfg = Conf {
-            pg_distrib_dir: top_path.join(format!("pg_install/{PG_MAJORVERSION}")),
+            pg_version,
+            pg_distrib_dir: top_path.join("pg_install"),
            datadir: top_path.join(format!("test_output/{}-{PG_MAJORVERSION}", test_name)),
        };
        if cfg.datadir.exists() {
--- a/libs/postgres_ffi/wal_craft/Cargo.toml
+++ b/libs/postgres_ffi/wal_craft/Cargo.toml
@@ -7,7 +7,7 @@ edition = "2021"

 [dependencies]
 anyhow = "1.0"
-clap = "3.0"
+clap = "4.0"
 env_logger = "0.9"
 log = "0.4"
 once_cell = "1.13.0"
--- a/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs
+++ b/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs
@@ -1,61 +1,19 @@
 use anyhow::*;
-use clap::{App, Arg, ArgMatches};
-use std::str::FromStr;
+use clap::{value_parser, Arg, ArgMatches, Command};
+use std::{path::PathBuf, str::FromStr};
 use wal_craft::*;

 fn main() -> Result<()> {
    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("wal_craft=info"))
        .init();
-    let type_arg = &Arg::new("type")
-        .takes_value(true)
-        .help("Type of WAL to craft")
-        .possible_values([
-            Simple::NAME,
-            LastWalRecordXlogSwitch::NAME,
-            LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
-            WalRecordCrossingSegmentFollowedBySmallOne::NAME,
-            LastWalRecordCrossingSegment::NAME,
-        ])
-        .required(true);
-    let arg_matches = App::new("Postgres WAL crafter")
-        .about("Crafts Postgres databases with specific WAL properties")
-        .subcommand(
-            App::new("print-postgres-config")
-                .about("Print the configuration required for PostgreSQL server before running this script")
-        )
-        .subcommand(
-            App::new("with-initdb")
-                .about("Craft WAL in a new data directory first initialized with initdb")
-                .arg(type_arg)
-                .arg(
-                    Arg::new("datadir")
-                        .takes_value(true)
-                        .help("Data directory for the Postgres server")
-                        .required(true)
-                )
-                .arg(
-                    Arg::new("pg-distrib-dir")
-                        .long("pg-distrib-dir")
-                        .takes_value(true)
-                        .help("Directory with Postgres distribution (bin and lib directories, e.g. pg_install/v14)")
-                        .default_value("/usr/local")
-                )
-        )
-        .subcommand(
-            App::new("in-existing")
-                .about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
-                .arg(type_arg)
-                .arg(
-                    Arg::new("connection")
-                        .takes_value(true)
-                        .help("Connection string to the Postgres database to populate")
-                        .required(true)
-                )
-        )
-        .get_matches();
+    let arg_matches = cli().get_matches();

    let wal_craft = |arg_matches: &ArgMatches, client| {
-        let (intermediate_lsns, end_of_wal_lsn) = match arg_matches.value_of("type").unwrap() {
+        let (intermediate_lsns, end_of_wal_lsn) = match arg_matches
+            .get_one::<String>("type")
+            .map(|s| s.as_str())
+            .context("'type' is required")?
+        {
            Simple::NAME => Simple::craft(client)?,
            LastWalRecordXlogSwitch::NAME => LastWalRecordXlogSwitch::craft(client)?,
            LastWalRecordXlogSwitchEndsOnPageBoundary::NAME => {
@@ -65,12 +23,12 @@ fn main() -> Result<()> {
                WalRecordCrossingSegmentFollowedBySmallOne::craft(client)?
            }
            LastWalRecordCrossingSegment::NAME => LastWalRecordCrossingSegment::craft(client)?,
-            a => panic!("Unknown --type argument: {}", a),
+            a => panic!("Unknown --type argument: {a}"),
        };
        for lsn in intermediate_lsns {
-            println!("intermediate_lsn = {}", lsn);
+            println!("intermediate_lsn = {lsn}");
        }
-        println!("end_of_wal = {}", end_of_wal_lsn);
+        println!("end_of_wal = {end_of_wal_lsn}");
        Ok(())
    };

@@ -78,14 +36,24 @@ fn main() -> Result<()> {
        None => panic!("No subcommand provided"),
        Some(("print-postgres-config", _)) => {
            for cfg in REQUIRED_POSTGRES_CONFIG.iter() {
-                println!("{}", cfg);
+                println!("{cfg}");
            }
            Ok(())
        }
+
        Some(("with-initdb", arg_matches)) => {
            let cfg = Conf {
-                pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
-                datadir: arg_matches.value_of("datadir").unwrap().into(),
+                pg_version: *arg_matches
+                    .get_one::<u32>("pg-version")
+                    .context("'pg-version' is required")?,
+                pg_distrib_dir: arg_matches
+                    .get_one::<PathBuf>("pg-distrib-dir")
+                    .context("'pg-distrib-dir' is required")?
+                    .to_owned(),
+                datadir: arg_matches
+                    .get_one::<PathBuf>("datadir")
+                    .context("'datadir' is required")?
+                    .to_owned(),
            };
            cfg.initdb()?;
            let srv = cfg.start_server()?;
@@ -95,9 +63,77 @@ fn main() -> Result<()> {
        }
        Some(("in-existing", arg_matches)) => wal_craft(
            arg_matches,
-            &mut postgres::Config::from_str(arg_matches.value_of("connection").unwrap())?
-                .connect(postgres::NoTls)?,
+            &mut postgres::Config::from_str(
+                arg_matches
+                    .get_one::<String>("connection")
+                    .context("'connection' is required")?,
+            )
+            .context(
+                "'connection' argument value could not be parsed as a postgres connection string",
+            )?
+            .connect(postgres::NoTls)?,
        ),
        Some(_) => panic!("Unknown subcommand"),
    }
 }
+
+fn cli() -> Command {
+    let type_arg = &Arg::new("type")
+        .help("Type of WAL to craft")
+        .value_parser([
+            Simple::NAME,
+            LastWalRecordXlogSwitch::NAME,
+            LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
+            WalRecordCrossingSegmentFollowedBySmallOne::NAME,
+            LastWalRecordCrossingSegment::NAME,
+        ])
+        .required(true);
+
+    Command::new("Postgres WAL crafter")
+        .about("Crafts Postgres databases with specific WAL properties")
+        .subcommand(
+            Command::new("print-postgres-config")
+                .about("Print the configuration required for PostgreSQL server before running this script")
+        )
+        .subcommand(
+            Command::new("with-initdb")
+                .about("Craft WAL in a new data directory first initialized with initdb")
+                .arg(type_arg)
+                .arg(
+                    Arg::new("datadir")
+                        .help("Data directory for the Postgres server")
+                        .value_parser(value_parser!(PathBuf))
+                        .required(true)
+                )
+                .arg(
+                    Arg::new("pg-distrib-dir")
+                        .long("pg-distrib-dir")
+                        .value_parser(value_parser!(PathBuf))
+                        .help("Directory with Postgres distributions (bin and lib directories, e.g. pg_install containing subpath `v14/bin/postgresql`)")
+                        .default_value("/usr/local")
+                )
+                .arg(
+                    Arg::new("pg-version")
+                    .long("pg-version")
+                    .help("Postgres version to use for the initial tenant")
+                    .value_parser(value_parser!(u32))
+                    .required(true)
+
+                )
+        )
+        .subcommand(
+            Command::new("in-existing")
+                .about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
+                .arg(type_arg)
+                .arg(
+                    Arg::new("connection")
+                        .help("Connection string to the Postgres database to populate")
+                        .required(true)
+                )
+        )
+}
+
+#[test]
+fn verify_cli() {
+    cli().debug_assert();
+}
--- a/libs/postgres_ffi/wal_craft/src/lib.rs
+++ b/libs/postgres_ffi/wal_craft/src/lib.rs
@@ -15,6 +15,7 @@ use tempfile::{tempdir, TempDir};

 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Conf {
+    pub pg_version: u32,
    pub pg_distrib_dir: PathBuf,
    pub datadir: PathBuf,
 }
@@ -36,12 +37,22 @@ pub static REQUIRED_POSTGRES_CONFIG: Lazy<Vec<&'static str>> = Lazy::new(|| {
 });

 impl Conf {
-    fn pg_bin_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("bin")
+    pub fn pg_distrib_dir(&self) -> anyhow::Result<PathBuf> {
+        let path = self.pg_distrib_dir.clone();
+
+        match self.pg_version {
+            14 => Ok(path.join(format!("v{}", self.pg_version))),
+            15 => Ok(path.join(format!("v{}", self.pg_version))),
+            _ => bail!("Unsupported postgres version: {}", self.pg_version),
+        }
    }

-    fn pg_lib_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("lib")
+    fn pg_bin_dir(&self) -> anyhow::Result<PathBuf> {
+        Ok(self.pg_distrib_dir()?.join("bin"))
+    }
+
+    fn pg_lib_dir(&self) -> anyhow::Result<PathBuf> {
+        Ok(self.pg_distrib_dir()?.join("lib"))
    }

    pub fn wal_dir(&self) -> PathBuf {
@@ -49,12 +60,12 @@ impl Conf {
    }

    fn new_pg_command(&self, command: impl AsRef<Path>) -> Result<Command> {
-        let path = self.pg_bin_dir().join(command);
+        let path = self.pg_bin_dir()?.join(command);
        ensure!(path.exists(), "Command {:?} does not exist", path);
        let mut cmd = Command::new(path);
        cmd.env_clear()
-            .env("LD_LIBRARY_PATH", self.pg_lib_dir())
-            .env("DYLD_LIBRARY_PATH", self.pg_lib_dir());
+            .env("LD_LIBRARY_PATH", self.pg_lib_dir()?)
+            .env("DYLD_LIBRARY_PATH", self.pg_lib_dir()?);
        Ok(cmd)
    }

--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -15,7 +15,7 @@ serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
 tokio-util = { version = "0.7", features = ["io"] }
-toml_edit = { version = "0.13", features = ["easy"] }
+toml_edit = { version = "0.14", features = ["easy"] }
 tracing = "0.1.27"

 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -16,7 +16,7 @@ use tokio::{
    io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
 };
 use tracing::*;
-use utils::crashsafe_dir::path_with_suffix_extension;
+use utils::crashsafe::path_with_suffix_extension;

 use crate::{Download, DownloadError, RemoteObjectId};

--- a/libs/safekeeper_api/Cargo.toml
+++ b/libs/safekeeper_api/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "safekeeper_api"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+serde = { version = "1.0", features = ["derive"] }
+serde_with = "2.0"
+const_format = "0.2.21"
+
+utils = { path = "../utils" }
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/safekeeper_api/src/lib.rs
+++ b/libs/safekeeper_api/src/lib.rs
@@ -0,0 +1,10 @@
+use const_format::formatcp;
+
+/// Public API types
+pub mod models;
+
+pub const DEFAULT_PG_LISTEN_PORT: u16 = 5454;
+pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
+
+pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 7676;
+pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
@@ -0,0 +1,24 @@
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
+
+use utils::{
+    id::{NodeId, TenantId, TimelineId},
+    lsn::Lsn,
+};
+
+#[serde_as]
+#[derive(Serialize, Deserialize)]
+pub struct TimelineCreateRequest {
+    #[serde_as(as = "DisplayFromStr")]
+    pub tenant_id: TenantId,
+    #[serde_as(as = "DisplayFromStr")]
+    pub timeline_id: TimelineId,
+    pub peer_ids: Option<Vec<NodeId>>,
+    pub pg_version: u32,
+    pub system_id: Option<u64>,
+    pub wal_seg_size: Option<u32>,
+    #[serde_as(as = "DisplayFromStr")]
+    pub commit_lsn: Lsn,
+    // If not passed, it is assigned to the beginning of commit_lsn segment.
+    pub local_start_lsn: Option<Lsn>,
+}
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -20,7 +20,7 @@ tokio = { version = "1.17", features = ["macros"]}
 tokio-rustls = "0.23"
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
-nix = "0.23.0"
+nix = "0.25"
 signal-hook = "0.3.10"
 rand = "0.8.3"
 jsonwebtoken = "8"
@@ -28,7 +28,7 @@ hex = { version = "0.4.3", features = ["serde"] }
 rustls = "0.20.2"
 rustls-split = "0.3.0"
 git-version = "0.3.5"
-serde_with = "1.12.0"
+serde_with = "2.0"
 once_cell = "1.13.0"


@@ -40,7 +40,7 @@ byteorder = "1.4.3"
 bytes = "1.0.1"
 hex-literal = "0.3"
 tempfile = "3.2"
-criterion = "0.3"
+criterion = "0.4"
 rustls-pemfile = "1"

 [[bench]]
--- a/libs/utils/src/crashsafe_dir.rs
+++ b/libs/utils/src/crashsafe_dir.rs
@@ -12,16 +12,8 @@ pub fn create_dir(path: impl AsRef<Path>) -> io::Result<()> {
    let path = path.as_ref();

    fs::create_dir(path)?;
-    File::open(path)?.sync_all()?;
-
-    if let Some(parent) = path.parent() {
-        File::open(parent)?.sync_all()
-    } else {
-        Err(io::Error::new(
-            io::ErrorKind::InvalidInput,
-            "can't find parent",
-        ))
-    }
+    fsync_file_and_parent(path)?;
+    Ok(())
 }

 /// Similar to [`std::fs::create_dir_all`], except we fsync all
@@ -65,12 +57,12 @@ pub fn create_dir_all(path: impl AsRef<Path>) -> io::Result<()> {

    // Fsync the created directories from child to parent.
    for &path in dirs_to_create.iter() {
-        File::open(path)?.sync_all()?;
+        fsync(path)?;
    }

    // If we created any new directories, fsync the parent.
    if !dirs_to_create.is_empty() {
-        File::open(path)?.sync_all()?;
+        fsync(path)?;
    }

    Ok(())
@@ -92,6 +84,33 @@ pub fn path_with_suffix_extension(original_path: impl AsRef<Path>, suffix: &str)
        .with_extension(new_extension.as_ref())
 }

+pub fn fsync_file_and_parent(file_path: &Path) -> io::Result<()> {
+    let parent = file_path.parent().ok_or_else(|| {
+        io::Error::new(
+            io::ErrorKind::Other,
+            format!("File {file_path:?} has no parent"),
+        )
+    })?;
+
+    fsync(file_path)?;
+    fsync(parent)?;
+    Ok(())
+}
+
+pub fn fsync(path: &Path) -> io::Result<()> {
+    File::open(path)
+        .map_err(|e| io::Error::new(e.kind(), format!("Failed to open the file {path:?}: {e}")))
+        .and_then(|file| {
+            file.sync_all().map_err(|e| {
+                io::Error::new(
+                    e.kind(),
+                    format!("Failed to sync file {path:?} data and metadata: {e}"),
+                )
+            })
+        })
+        .map_err(|e| io::Error::new(e.kind(), format!("Failed to fsync file {path:?}: {e}")))
+}
+
 #[cfg(test)]
 mod tests {
    use tempfile::tempdir;
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -9,6 +9,7 @@ use once_cell::sync::Lazy;
 use routerify::ext::RequestExt;
 use routerify::RequestInfo;
 use routerify::{Middleware, Router, RouterBuilder, RouterService};
+use tokio::task::JoinError;
 use tracing::info;

 use std::future::Future;
@@ -35,7 +36,13 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
    let mut buffer = vec![];
    let encoder = TextEncoder::new();

-    let metrics = metrics::gather();
+    let metrics = tokio::task::spawn_blocking(move || {
+        // Currently we take a lot of mutexes while collecting metrics, so it's
+        // better to spawn a blocking task to avoid blocking the event loop.
+        metrics::gather()
+    })
+    .await
+    .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;
    encoder.encode(&metrics, &mut buffer).unwrap();

    let response = Response::builder()
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -22,8 +22,8 @@ pub mod pq_proto;
 // dealing with connstring parsing and handy access to it's parts
 pub mod connstring;

-// helper functions for creating and fsyncing directories/trees
-pub mod crashsafe_dir;
+// helper functions for creating and fsyncing
+pub mod crashsafe;

 // common authentication routines
 pub mod auth;
--- a/libs/utils/src/lsn.rs
+++ b/libs/utils/src/lsn.rs
@@ -66,6 +66,11 @@ impl Lsn {
        (self.0 % seg_sz as u64) as usize
    }

+    /// Compute LSN of the segment start.
+    pub fn segment_lsn(self, seg_sz: usize) -> Lsn {
+        Lsn(self.0 - (self.0 % seg_sz as u64))
+    }
+
    /// Compute the segment number
    pub fn segment_number(self, seg_sz: usize) -> u64 {
        self.0 / seg_sz as u64
--- a/libs/utils/src/postgres_backend_async.rs
+++ b/libs/utils/src/postgres_backend_async.rs
@@ -15,7 +15,7 @@ use std::sync::Arc;
 use std::task::Poll;
 use tracing::{debug, error, trace};

-use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
+use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufReader};
 use tokio_rustls::TlsAcceptor;

 #[async_trait::async_trait]
@@ -66,8 +66,8 @@ pub enum ProcessMsgResult {
 /// Always-writeable sock_split stream.
 /// May not be readable. See [`PostgresBackend::take_stream_in`]
 pub enum Stream {
-    Unencrypted(tokio::net::TcpStream),
-    Tls(Box<tokio_rustls::server::TlsStream<tokio::net::TcpStream>>),
+    Unencrypted(BufReader<tokio::net::TcpStream>),
+    Tls(Box<tokio_rustls::server::TlsStream<BufReader<tokio::net::TcpStream>>>),
    Broken,
 }

@@ -157,7 +157,7 @@ impl PostgresBackend {
        let peer_addr = socket.peer_addr()?;

        Ok(Self {
-            stream: Stream::Unencrypted(socket),
+            stream: Stream::Unencrypted(BufReader::new(socket)),
            buf_out: BytesMut::with_capacity(10 * 1024),
            state: ProtoState::Initialization,
            md5_salt: [0u8; 4],
--- a/libs/utils/src/pq_proto.rs
+++ b/libs/utils/src/pq_proto.rs
@@ -10,6 +10,7 @@ use serde::{Deserialize, Serialize};
 use std::{
    borrow::Cow,
    collections::HashMap,
+    fmt,
    future::Future,
    io::{self, Cursor},
    str,
@@ -124,6 +125,19 @@ pub struct CancelKeyData {
    pub cancel_key: i32,
 }

+impl fmt::Display for CancelKeyData {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let hi = (self.backend_pid as u64) << 32;
+        let lo = self.cancel_key as u64;
+        let id = hi | lo;
+
+        // This format is more compact and might work better for logs.
+        f.debug_tuple("CancelKeyData")
+            .field(&format_args!("{:x}", id))
+            .finish()
+    }
+}
+
 use rand::distributions::{Distribution, Standard};
 impl Distribution<CancelKeyData> for Standard {
    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> CancelKeyData {
--- a/libs/utils/src/seqwait.rs
+++ b/libs/utils/src/seqwait.rs
@@ -240,7 +240,6 @@ where
 mod tests {
    use super::*;
    use std::sync::Arc;
-    use std::thread::sleep;
    use std::time::Duration;

    impl MonotonicCounter<i32> for i32 {
@@ -258,17 +257,19 @@ mod tests {
        let seq = Arc::new(SeqWait::new(0));
        let seq2 = Arc::clone(&seq);
        let seq3 = Arc::clone(&seq);
-        tokio::task::spawn(async move {
+        let jh1 = tokio::task::spawn(async move {
            seq2.wait_for(42).await.expect("wait_for 42");
            let old = seq2.advance(100);
            assert_eq!(old, 99);
-            seq2.wait_for(999).await.expect_err("no 999");
+            seq2.wait_for_timeout(999, Duration::from_millis(100))
+                .await
+                .expect_err("no 999");
        });
-        tokio::task::spawn(async move {
+        let jh2 = tokio::task::spawn(async move {
            seq3.wait_for(42).await.expect("wait_for 42");
            seq3.wait_for(0).await.expect("wait_for 0");
        });
-        sleep(Duration::from_secs(1));
+        tokio::time::sleep(Duration::from_millis(200)).await;
        let old = seq.advance(99);
        assert_eq!(old, 0);
        seq.wait_for(100).await.expect("wait_for 100");
@@ -277,6 +278,9 @@ mod tests {
        assert_eq!(seq.advance(98), 100);
        assert_eq!(seq.load(), 100);

+        jh1.await.unwrap();
+        jh2.await.unwrap();
+
        seq.shutdown();
    }

@@ -284,15 +288,18 @@ mod tests {
    async fn seqwait_timeout() {
        let seq = Arc::new(SeqWait::new(0));
        let seq2 = Arc::clone(&seq);
-        tokio::task::spawn(async move {
+        let jh = tokio::task::spawn(async move {
            let timeout = Duration::from_millis(1);
            let res = seq2.wait_for_timeout(42, timeout).await;
            assert_eq!(res, Err(SeqWaitError::Timeout));
        });
-        tokio::time::sleep(Duration::from_secs(1)).await;
+        tokio::time::sleep(Duration::from_millis(200)).await;
        // This will attempt to wake, but nothing will happen
        // because the waiter already dropped its Receiver.
        let old = seq.advance(99);
-        assert_eq!(old, 0)
+        assert_eq!(old, 0);
+        jh.await.unwrap();
+
+        seq.shutdown();
    }
 }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -23,7 +23,7 @@ futures = "0.3.13"
 hex = "0.4.3"
 hyper = "0.14"
 itertools = "0.10.3"
-clap = "3.0"
+clap = { version = "4.0", features = ["string"] }
 daemonize = "0.4.1"
 tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
 tokio-util = { version = "0.7.3", features = ["io", "io-util"] }
@@ -38,26 +38,27 @@ tar = "0.4.33"
 humantime = "2.1.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
-serde_with = "1.12.0"
+serde_with = "2.0"
 humantime-serde = "1.1.1"

 pprof = { git = "https://github.com/neondatabase/pprof-rs.git", branch = "wallclock-profiling", features = ["flamegraph"], optional = true }

-toml_edit = { version = "0.13", features = ["easy"] }
+toml_edit = { version = "0.14", features = ["easy"] }
 scopeguard = "1.1.0"
 const_format = "0.2.21"
 tracing = "0.1.36"
 signal-hook = "0.3.10"
 url = "2"
-nix = "0.23"
+nix = "0.25"
 once_cell = "1.13.0"
 crossbeam-utils = "0.8.5"
 fail = "0.5.0"
 git-version = "0.3.5"
 rstar = "0.9.3"
 num-traits = "0.2.15"
-amplify_num = "0.4.1"
+amplify_num = { git = "https://github.com/hlinnaka/rust-amplify.git", branch = "unsigned-int-perf" }

+pageserver_api = { path = "../libs/pageserver_api" }
 postgres_ffi = { path = "../libs/postgres_ffi" }
 etcd_broker = { path = "../libs/etcd_broker" }
 metrics = { path = "../libs/metrics" }
@@ -68,5 +69,10 @@ close_fds = "0.3.2"
 walkdir = "2.3.2"

 [dev-dependencies]
+criterion = "0.4"
 hex-literal = "0.3"
 tempfile = "3.2"
+
+[[bench]]
+name = "bench_layer_map"
+harness = false
--- a/pageserver/benches/bench_layer_map.rs
+++ b/pageserver/benches/bench_layer_map.rs
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -25,10 +25,10 @@ use tracing::*;
 use crate::reltag::{RelTag, SlruKind};
 use crate::tenant::Timeline;

-use postgres_ffi::v14::pg_constants;
-use postgres_ffi::v14::xlog_utils::{generate_wal_segment, normalize_lsn, XLogFileName};
-use postgres_ffi::v14::{CheckPoint, ControlFileData};
+use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
+use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
 use postgres_ffi::TransactionId;
+use postgres_ffi::XLogFileName;
 use postgres_ffi::PG_TLI;
 use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
 use utils::lsn::Lsn;
@@ -129,15 +129,15 @@ where
        // TODO include checksum

        // Create pgdata subdirs structure
-        for dir in pg_constants::PGDATA_SUBDIRS.iter() {
+        for dir in PGDATA_SUBDIRS.iter() {
            let header = new_tar_header_dir(*dir)?;
            self.ar.append(&header, &mut io::empty())?;
        }

        // Send empty config files.
-        for filepath in pg_constants::PGDATA_SPECIAL_FILES.iter() {
+        for filepath in PGDATA_SPECIAL_FILES.iter() {
            if *filepath == "pg_hba.conf" {
-                let data = pg_constants::PG_HBA.as_bytes();
+                let data = PG_HBA.as_bytes();
                let header = new_tar_header(filepath, data.len() as u64)?;
                self.ar.append(&header, data)?;
            } else {
@@ -267,16 +267,12 @@ where
            None
        };

-        // TODO pass this as a parameter
-        let pg_version = "14";
+        if spcnode == GLOBALTABLESPACE_OID {
+            let pg_version_str = self.timeline.pg_version.to_string();
+            let header = new_tar_header("PG_VERSION", pg_version_str.len() as u64)?;
+            self.ar.append(&header, pg_version_str.as_bytes())?;

-        if spcnode == pg_constants::GLOBALTABLESPACE_OID {
-            let version_bytes = pg_version.as_bytes();
-            let header = new_tar_header("PG_VERSION", version_bytes.len() as u64)?;
-            self.ar.append(&header, version_bytes)?;
-
-            let header = new_tar_header("global/PG_VERSION", version_bytes.len() as u64)?;
-            self.ar.append(&header, version_bytes)?;
+            info!("timeline.pg_version {}", self.timeline.pg_version);

            if let Some(img) = relmap_img {
                // filenode map for global tablespace
@@ -305,7 +301,7 @@ where
                return Ok(());
            }
            // User defined tablespaces are not supported
-            ensure!(spcnode == pg_constants::DEFAULTTABLESPACE_OID);
+            ensure!(spcnode == DEFAULTTABLESPACE_OID);

            // Append dir path for each database
            let path = format!("base/{}", dbnode);
@@ -314,9 +310,10 @@ where

            if let Some(img) = relmap_img {
                let dst_path = format!("base/{}/PG_VERSION", dbnode);
-                let version_bytes = pg_version.as_bytes();
-                let header = new_tar_header(&dst_path, version_bytes.len() as u64)?;
-                self.ar.append(&header, version_bytes)?;
+
+                let pg_version_str = self.timeline.pg_version.to_string();
+                let header = new_tar_header(&dst_path, pg_version_str.len() as u64)?;
+                self.ar.append(&header, pg_version_str.as_bytes())?;

                let relmap_path = format!("base/{}/pg_filenode.map", dbnode);
                let header = new_tar_header(&relmap_path, img.len() as u64)?;
@@ -348,30 +345,6 @@ where
    // Also send zenith.signal file with extra bootstrap data.
    //
    fn add_pgcontrol_file(&mut self) -> anyhow::Result<()> {
-        let checkpoint_bytes = self
-            .timeline
-            .get_checkpoint(self.lsn)
-            .context("failed to get checkpoint bytes")?;
-        let pg_control_bytes = self
-            .timeline
-            .get_control_file(self.lsn)
-            .context("failed get control bytes")?;
-        let mut pg_control = ControlFileData::decode(&pg_control_bytes)?;
-        let mut checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
-
-        // Generate new pg_control needed for bootstrap
-        checkpoint.redo = normalize_lsn(self.lsn, WAL_SEGMENT_SIZE).0;
-
-        //reset some fields we don't want to preserve
-        //TODO Check this.
-        //We may need to determine the value from twophase data.
-        checkpoint.oldestActiveXid = 0;
-
-        //save new values in pg_control
-        pg_control.checkPoint = 0;
-        pg_control.checkPointCopy = checkpoint;
-        pg_control.state = pg_constants::DB_SHUTDOWNED;
-
        // add zenith.signal file
        let mut zenith_signal = String::new();
        if self.prev_record_lsn == Lsn(0) {
@@ -388,8 +361,23 @@ where
            zenith_signal.as_bytes(),
        )?;

+        let checkpoint_bytes = self
+            .timeline
+            .get_checkpoint(self.lsn)
+            .context("failed to get checkpoint bytes")?;
+        let pg_control_bytes = self
+            .timeline
+            .get_control_file(self.lsn)
+            .context("failed get control bytes")?;
+
+        let (pg_control_bytes, system_identifier) = postgres_ffi::generate_pg_control(
+            &pg_control_bytes,
+            &checkpoint_bytes,
+            self.lsn,
+            self.timeline.pg_version,
+        )?;
+
        //send pg_control
-        let pg_control_bytes = pg_control.encode();
        let header = new_tar_header("global/pg_control", pg_control_bytes.len() as u64)?;
        self.ar.append(&header, &pg_control_bytes[..])?;

@@ -398,8 +386,10 @@ where
        let wal_file_name = XLogFileName(PG_TLI, segno, WAL_SEGMENT_SIZE);
        let wal_file_path = format!("pg_wal/{}", wal_file_name);
        let header = new_tar_header(&wal_file_path, WAL_SEGMENT_SIZE as u64)?;
-        let wal_seg = generate_wal_segment(segno, pg_control.system_identifier)
-            .map_err(|e| anyhow!(e).context("Failed generating wal segment"))?;
+
+        let wal_seg =
+            postgres_ffi::generate_wal_segment(segno, system_identifier, self.timeline.pg_version)
+                .map_err(|e| anyhow!(e).context("Failed generating wal segment"))?;
        ensure!(wal_seg.len() == WAL_SEGMENT_SIZE);
        self.ar.append(&header, &wal_seg[..])?;
        Ok(())
--- a/pageserver/src/bin/dump_layerfile.rs
+++ b/pageserver/src/bin/dump_layerfile.rs
@@ -1,35 +0,0 @@
-//! Main entry point for the dump_layerfile executable
-//!
-//! A handy tool for debugging, that's all.
-use anyhow::Result;
-use clap::{App, Arg};
-use pageserver::page_cache;
-use pageserver::tenant::dump_layerfile_from_path;
-use pageserver::virtual_file;
-use std::path::PathBuf;
-use utils::project_git_version;
-
-project_git_version!(GIT_VERSION);
-
-fn main() -> Result<()> {
-    let arg_matches = App::new("Neon dump_layerfile utility")
-        .about("Dump contents of one layer file, for debugging")
-        .version(GIT_VERSION)
-        .arg(
-            Arg::new("path")
-                .help("Path to file to dump")
-                .required(true)
-                .index(1),
-        )
-        .get_matches();
-
-    let path = PathBuf::from(arg_matches.value_of("path").unwrap());
-
-    // Basic initialization of things that don't change after startup
-    virtual_file::init(10);
-    page_cache::init(100);
-
-    dump_layerfile_from_path(&path, true)?;
-
-    Ok(())
-}
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -6,10 +6,12 @@ use tracing::*;

 use anyhow::{anyhow, bail, Context, Result};

-use clap::{App, Arg};
+use clap::{Arg, ArgAction, Command};
 use daemonize::Daemonize;

 use fail::FailScenario;
+use metrics::set_build_info_metric;
+
 use pageserver::{
    config::{defaults::*, PageServerConf},
    http, page_cache, page_service, profiling, task_mgr,
@@ -31,72 +33,35 @@ use utils::{

 project_git_version!(GIT_VERSION);

+const FEATURES: &[&str] = &[
+    #[cfg(feature = "testing")]
+    "testing",
+    #[cfg(feature = "fail/failpoints")]
+    "fail/failpoints",
+    #[cfg(feature = "profiling")]
+    "profiling",
+];
+
 fn version() -> String {
    format!(
-        "{GIT_VERSION} profiling:{} failpoints:{}",
-        cfg!(feature = "profiling"),
-        fail::has_failpoints()
+        "{GIT_VERSION} failpoints: {}, features: {:?}",
+        fail::has_failpoints(),
+        FEATURES,
    )
 }

 fn main() -> anyhow::Result<()> {
-    let arg_matches = App::new("Neon page server")
-        .about("Materializes WAL stream to pages and serves them to the postgres")
-        .version(&*version())
-        .arg(
+    let arg_matches = cli().get_matches();

-            Arg::new("daemonize")
-                .short('d')
-                .long("daemonize")
-                .takes_value(false)
-                .help("Run in the background"),
-        )
-        .arg(
-            Arg::new("init")
-                .long("init")
-                .takes_value(false)
-                .help("Initialize pageserver with all given config overrides"),
-        )
-        .arg(
-            Arg::new("workdir")
-                .short('D')
-                .long("workdir")
-                .takes_value(true)
-                .help("Working directory for the pageserver"),
-        )
-        // See `settings.md` for more details on the extra configuration patameters pageserver can process
-        .arg(
-            Arg::new("config-override")
-                .short('c')
-                .takes_value(true)
-                .number_of_values(1)
-                .multiple_occurrences(true)
-                .help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
-                Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
-        )
-        .arg(Arg::new("update-config").long("update-config").takes_value(false).help(
-            "Update the config file when started",
-        ))
-        .arg(
-            Arg::new("enabled-features")
-                .long("enabled-features")
-                .takes_value(false)
-                .help("Show enabled compile time features"),
-        )
-        .get_matches();
-
-    if arg_matches.is_present("enabled-features") {
-        let features: &[&str] = &[
-            #[cfg(feature = "testing")]
-            "testing",
-            #[cfg(feature = "profiling")]
-            "profiling",
-        ];
-        println!("{{\"features\": {features:?} }}");
+    if arg_matches.get_flag("enabled-features") {
+        println!("{{\"features\": {FEATURES:?} }}");
        return Ok(());
    }

-    let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".neon"));
+    let workdir = arg_matches
+        .get_one::<String>("workdir")
+        .map(Path::new)
+        .unwrap_or_else(|| Path::new(".neon"));
    let workdir = workdir
        .canonicalize()
        .with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
@@ -110,7 +75,7 @@ fn main() -> anyhow::Result<()> {
        )
    })?;

-    let daemonize = arg_matches.is_present("daemonize");
+    let daemonize = arg_matches.get_flag("daemonize");

    let conf = match initialize_config(&cfg_file_path, arg_matches, &workdir)? {
        ControlFlow::Continue(conf) => conf,
@@ -122,7 +87,7 @@ fn main() -> anyhow::Result<()> {

    let tenants_path = conf.tenants_path();
    if !tenants_path.exists() {
-        utils::crashsafe_dir::create_dir_all(conf.tenants_path()).with_context(|| {
+        utils::crashsafe::create_dir_all(conf.tenants_path()).with_context(|| {
            format!(
                "Failed to create tenants root dir at '{}'",
                tenants_path.display()
@@ -148,8 +113,8 @@ fn initialize_config(
    arg_matches: clap::ArgMatches,
    workdir: &Path,
 ) -> anyhow::Result<ControlFlow<(), &'static PageServerConf>> {
-    let init = arg_matches.is_present("init");
-    let update_config = init || arg_matches.is_present("update-config");
+    let init = arg_matches.get_flag("init");
+    let update_config = init || arg_matches.get_flag("update-config");

    let (mut toml, config_file_exists) = if cfg_file_path.is_file() {
        if init {
@@ -191,13 +156,10 @@ fn initialize_config(
        )
    };

-    if let Some(values) = arg_matches.values_of("config-override") {
+    if let Some(values) = arg_matches.get_many::<String>("config-override") {
        for option_line in values {
            let doc = toml_edit::Document::from_str(option_line).with_context(|| {
-                format!(
-                    "Option '{}' could not be parsed as a toml document",
-                    option_line
-                )
+                format!("Option '{option_line}' could not be parsed as a toml document")
            })?;

            for (key, item) in doc.iter() {
@@ -239,7 +201,7 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
    // Initialize logger
    let log_file = logging::init(LOG_FILE_NAME, daemonize)?;

-    info!("version: {GIT_VERSION}");
+    info!("version: {}", version());

    // TODO: Check that it looks like a valid repository before going further

@@ -356,6 +318,8 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
        },
    );

+    set_build_info_metric(GIT_VERSION);
+
    // All started up! Now just sit and wait for shutdown signal.
    signals.handle(|signal| match signal {
        Signal::Quit => {
@@ -378,3 +342,55 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
        }
    })
 }
+
+fn cli() -> Command {
+    Command::new("Neon page server")
+        .about("Materializes WAL stream to pages and serves them to the postgres")
+        .version(version())
+        .arg(
+
+            Arg::new("daemonize")
+                .short('d')
+                .long("daemonize")
+                .action(ArgAction::SetTrue)
+                .help("Run in the background"),
+        )
+        .arg(
+            Arg::new("init")
+                .long("init")
+                .action(ArgAction::SetTrue)
+                .help("Initialize pageserver with all given config overrides"),
+        )
+        .arg(
+            Arg::new("workdir")
+                .short('D')
+                .long("workdir")
+                .help("Working directory for the pageserver"),
+        )
+        // See `settings.md` for more details on the extra configuration patameters pageserver can process
+        .arg(
+            Arg::new("config-override")
+                .short('c')
+                .num_args(1)
+                .action(ArgAction::Append)
+                .help("Additional configuration overrides of the ones from the toml config file (or new ones to add there). \
+                Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
+        )
+        .arg(
+            Arg::new("update-config")
+                .long("update-config")
+                .action(ArgAction::SetTrue)
+                .help("Update the config file when started"),
+        )
+        .arg(
+            Arg::new("enabled-features")
+                .long("enabled-features")
+                .action(ArgAction::SetTrue)
+                .help("Show enabled compile time features"),
+        )
+}
+
+#[test]
+fn verify_cli() {
+    cli().debug_assert();
+}
--- a/pageserver/src/bin/pageserver_binutils.rs
+++ b/pageserver/src/bin/pageserver_binutils.rs
@@ -0,0 +1,154 @@
+//! A helper tool to manage pageserver binary files.
+//! Accepts a file as an argument, attempts to parse it with all ways possible
+//! and prints its interpreted context.
+//!
+//! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.
+use std::{
+    path::{Path, PathBuf},
+    str::FromStr,
+};
+
+use anyhow::Context;
+use clap::{value_parser, Arg, Command};
+
+use pageserver::{
+    page_cache,
+    tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
+    virtual_file,
+};
+use postgres_ffi::ControlFileData;
+use utils::{lsn::Lsn, project_git_version};
+
+project_git_version!(GIT_VERSION);
+
+const METADATA_SUBCOMMAND: &str = "metadata";
+
+fn main() -> anyhow::Result<()> {
+    let arg_matches = cli().get_matches();
+
+    match arg_matches.subcommand() {
+        Some((subcommand_name, subcommand_matches)) => {
+            let path = subcommand_matches
+                .get_one::<PathBuf>("metadata_path")
+                .context("'metadata_path' argument is missing")?
+                .to_path_buf();
+            anyhow::ensure!(
+                subcommand_name == METADATA_SUBCOMMAND,
+                "Unknown subcommand {subcommand_name}"
+            );
+            handle_metadata(&path, subcommand_matches)?;
+        }
+        None => {
+            let path = arg_matches
+                .get_one::<PathBuf>("path")
+                .context("'path' argument is missing")?
+                .to_path_buf();
+            println!(
+                "No subcommand specified, attempting to guess the format for file {}",
+                path.display()
+            );
+            if let Err(e) = read_pg_control_file(&path) {
+                println!(
+                    "Failed to read input file as a pg control one: {e:#}\n\
+                    Attempting to read it as layer file"
+                );
+                print_layerfile(&path)?;
+            }
+        }
+    };
+    Ok(())
+}
+
+fn read_pg_control_file(control_file_path: &Path) -> anyhow::Result<()> {
+    let control_file = ControlFileData::decode(&std::fs::read(&control_file_path)?)?;
+    println!("{control_file:?}");
+    let control_file_initdb = Lsn(control_file.checkPoint);
+    println!(
+        "pg_initdb_lsn: {}, aligned: {}",
+        control_file_initdb,
+        control_file_initdb.align()
+    );
+    Ok(())
+}
+
+fn print_layerfile(path: &Path) -> anyhow::Result<()> {
+    // Basic initialization of things that don't change after startup
+    virtual_file::init(10);
+    page_cache::init(100);
+    dump_layerfile_from_path(path, true)
+}
+
+fn handle_metadata(path: &Path, arg_matches: &clap::ArgMatches) -> Result<(), anyhow::Error> {
+    let metadata_bytes = std::fs::read(&path)?;
+    let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
+    println!("Current metadata:\n{meta:?}");
+    let mut update_meta = false;
+    if let Some(disk_consistent_lsn) = arg_matches.get_one::<String>("disk_consistent_lsn") {
+        meta = TimelineMetadata::new(
+            Lsn::from_str(disk_consistent_lsn)?,
+            meta.prev_record_lsn(),
+            meta.ancestor_timeline(),
+            meta.ancestor_lsn(),
+            meta.latest_gc_cutoff_lsn(),
+            meta.initdb_lsn(),
+            meta.pg_version(),
+        );
+        update_meta = true;
+    }
+    if let Some(prev_record_lsn) = arg_matches.get_one::<String>("prev_record_lsn") {
+        meta = TimelineMetadata::new(
+            meta.disk_consistent_lsn(),
+            Some(Lsn::from_str(prev_record_lsn)?),
+            meta.ancestor_timeline(),
+            meta.ancestor_lsn(),
+            meta.latest_gc_cutoff_lsn(),
+            meta.initdb_lsn(),
+            meta.pg_version(),
+        );
+        update_meta = true;
+    }
+
+    if update_meta {
+        let metadata_bytes = meta.to_bytes()?;
+        std::fs::write(&path, &metadata_bytes)?;
+    }
+
+    Ok(())
+}
+
+fn cli() -> Command {
+    Command::new("Neon Pageserver binutils")
+        .about("Reads pageserver (and related) binary files management utility")
+        .version(GIT_VERSION)
+        .arg(
+            Arg::new("path")
+                .help("Input file path")
+                .value_parser(value_parser!(PathBuf))
+                .required(false),
+        )
+        .subcommand(
+            Command::new(METADATA_SUBCOMMAND)
+                .about("Read and update pageserver metadata file")
+                .arg(
+                    Arg::new("metadata_path")
+                        .help("Input metadata file path")
+                        .value_parser(value_parser!(PathBuf))
+                        .required(false),
+                )
+                .arg(
+                    Arg::new("disk_consistent_lsn")
+                        .long("disk_consistent_lsn")
+                        .help("Replace disk consistent Lsn"),
+                )
+                .arg(
+                    Arg::new("prev_record_lsn")
+                        .long("prev_record_lsn")
+                        .help("Replace previous record Lsn"),
+                ),
+        )
+}
+
+#[test]
+fn verify_cli() {
+    cli().debug_assert();
+}
--- a/pageserver/src/bin/update_metadata.rs
+++ b/pageserver/src/bin/update_metadata.rs
@@ -1,73 +0,0 @@
-//! Main entry point for the edit_metadata executable
-//!
-//! A handy tool for debugging, that's all.
-use anyhow::Result;
-use clap::{App, Arg};
-use pageserver::tenant::metadata::TimelineMetadata;
-use std::path::PathBuf;
-use std::str::FromStr;
-use utils::{lsn::Lsn, project_git_version};
-
-project_git_version!(GIT_VERSION);
-
-fn main() -> Result<()> {
-    let arg_matches = App::new("Neon update metadata utility")
-        .about("Dump or update metadata file")
-        .version(GIT_VERSION)
-        .arg(
-            Arg::new("path")
-                .help("Path to metadata file")
-                .required(true),
-        )
-        .arg(
-            Arg::new("disk_lsn")
-                .short('d')
-                .long("disk_lsn")
-                .takes_value(true)
-                .help("Replace disk constistent lsn"),
-        )
-        .arg(
-            Arg::new("prev_lsn")
-                .short('p')
-                .long("prev_lsn")
-                .takes_value(true)
-                .help("Previous record LSN"),
-        )
-        .get_matches();
-
-    let path = PathBuf::from(arg_matches.value_of("path").unwrap());
-    let metadata_bytes = std::fs::read(&path)?;
-    let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
-    println!("Current metadata:\n{:?}", &meta);
-
-    let mut update_meta = false;
-
-    if let Some(disk_lsn) = arg_matches.value_of("disk_lsn") {
-        meta = TimelineMetadata::new(
-            Lsn::from_str(disk_lsn)?,
-            meta.prev_record_lsn(),
-            meta.ancestor_timeline(),
-            meta.ancestor_lsn(),
-            meta.latest_gc_cutoff_lsn(),
-            meta.initdb_lsn(),
-        );
-        update_meta = true;
-    }
-
-    if let Some(prev_lsn) = arg_matches.value_of("prev_lsn") {
-        meta = TimelineMetadata::new(
-            meta.disk_consistent_lsn(),
-            Some(Lsn::from_str(prev_lsn)?),
-            meta.ancestor_timeline(),
-            meta.ancestor_lsn(),
-            meta.latest_gc_cutoff_lsn(),
-            meta.initdb_lsn(),
-        );
-        update_meta = true;
-    }
-    if update_meta {
-        let metadata_bytes = meta.to_bytes()?;
-        std::fs::write(&path, &metadata_bytes)?;
-    }
-    Ok(())
-}
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -7,6 +7,7 @@
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use remote_storage::RemoteStorageConfig;
 use std::env;
+use utils::crashsafe::path_with_suffix_extension;

 use std::path::{Path, PathBuf};
 use std::str::FromStr;
@@ -24,16 +25,17 @@ use crate::tenant_config::{TenantConf, TenantConfOpt};

 /// The name of the metadata file pageserver creates per timeline.
 pub const METADATA_FILE_NAME: &str = "metadata";
+pub const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";
 const TENANT_CONFIG_NAME: &str = "config";

 pub mod defaults {
    use crate::tenant_config::defaults::*;
    use const_format::formatcp;

-    pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
-    pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
-    pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
-    pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
+    pub use pageserver_api::{
+        DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_HTTP_LISTEN_PORT, DEFAULT_PG_LISTEN_ADDR,
+        DEFAULT_PG_LISTEN_PORT,
+    };

    pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "60 s";
    pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
@@ -209,7 +211,7 @@ impl Default for PageServerConfigBuilder {
            workdir: Set(PathBuf::new()),
            pg_distrib_dir: Set(env::current_dir()
                .expect("cannot access current directory")
-                .join("pg_install/v14")),
+                .join("pg_install")),
            auth_type: Set(AuthType::Trust),
            auth_validation_public_key_path: Set(None),
            remote_storage_config: Set(None),
@@ -364,6 +366,17 @@ impl PageServerConf {
        self.timelines_path(tenant_id).join(timeline_id.to_string())
    }

+    pub fn timeline_uninit_mark_file_path(
+        &self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> PathBuf {
+        path_with_suffix_extension(
+            self.timeline_path(&timeline_id, &tenant_id),
+            TIMELINE_UNINIT_MARK_SUFFIX,
+        )
+    }
+
    /// Points to a place in pageserver's local directory,
    /// where certain timeline's metadata file should be located.
    pub fn metadata_path(&self, timeline_id: TimelineId, tenant_id: TenantId) -> PathBuf {
@@ -374,13 +387,29 @@ impl PageServerConf {
    //
    // Postgres distribution paths
    //
+    pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
+        let path = self.pg_distrib_dir.clone();

-    pub fn pg_bin_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("bin")
+        match pg_version {
+            14 => Ok(path.join(format!("v{pg_version}"))),
+            15 => Ok(path.join(format!("v{pg_version}"))),
+            _ => bail!("Unsupported postgres version: {}", pg_version),
+        }
    }

-    pub fn pg_lib_dir(&self) -> PathBuf {
-        self.pg_distrib_dir.join("lib")
+    pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
+        match pg_version {
+            14 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
+            15 => Ok(self.pg_distrib_dir(pg_version)?.join("bin")),
+            _ => bail!("Unsupported postgres version: {}", pg_version),
+        }
+    }
+    pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
+        match pg_version {
+            14 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
+            15 => Ok(self.pg_distrib_dir(pg_version)?.join("lib")),
+            _ => bail!("Unsupported postgres version: {}", pg_version),
+        }
    }

    /// Parse a configuration file (pageserver.toml) into a PageServerConf struct,
@@ -449,13 +478,6 @@ impl PageServerConf {
            );
        }

-        if !conf.pg_distrib_dir.join("bin/postgres").exists() {
-            bail!(
-                "Can't find postgres binary at {}",
-                conf.pg_distrib_dir.display()
-            );
-        }
-
        conf.default_tenant_conf = t_conf.merge(TenantConf::default());

        Ok(conf)
@@ -625,6 +647,7 @@ mod tests {
    use tempfile::{tempdir, TempDir};

    use super::*;
+    use crate::DEFAULT_PG_VERSION;

    const ALL_BASE_VALUES_TOML: &str = r#"
 # Initial configuration file created by 'pageserver --init'
@@ -864,8 +887,9 @@ broker_endpoints = ['{broker_endpoint}']
        fs::create_dir_all(&workdir)?;

        let pg_distrib_dir = tempdir_path.join("pg_distrib");
-        fs::create_dir_all(&pg_distrib_dir)?;
-        let postgres_bin_dir = pg_distrib_dir.join("bin");
+        let pg_distrib_dir_versioned = pg_distrib_dir.join(format!("v{DEFAULT_PG_VERSION}"));
+        fs::create_dir_all(&pg_distrib_dir_versioned)?;
+        let postgres_bin_dir = pg_distrib_dir_versioned.join("bin");
        fs::create_dir_all(&postgres_bin_dir)?;
        fs::write(postgres_bin_dir.join("postgres"), "I'm postgres, trust me")?;

--- a/pageserver/src/http/mod.rs
+++ b/pageserver/src/http/mod.rs
@@ -1,3 +1,4 @@
-pub mod models;
 pub mod routes;
 pub use routes::make_router;
+
+pub use pageserver_api::models;
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -1,7 +1,11 @@
 openapi: "3.0.2"
 info:
  title: Page Server API
+  description: Neon Pageserver API
  version: "1.0"
+  license:
+    name: "Apache"
+    url: https://github.com/neondatabase/neon/blob/main/LICENSE
 servers:
  - url: ""
 paths:
@@ -207,6 +211,61 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

+  /v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+      - name: timeline_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+    get:
+      description: Get LSN by a timestamp
+      parameters:
+        - name: timestamp
+          in: query
+          required: true
+          schema:
+            type: string
+            format: date-time
+          description: A timestamp to get the LSN
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: string
+        "400":
+          description: Error when no tenant id found in path, no timeline id or invalid timestamp
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
  /v1/tenant/{tenant_id}/attach:
    parameters:
      - name: tenant_id
@@ -307,6 +366,7 @@ paths:
      description: |
        Create a timeline. Returns new timeline id on success.\
        If no new timeline id is specified in parameters, it would be generated. It's an error to recreate the same timeline.
+        If no pg_version is specified, assume DEFAULT_PG_VERSION hardcoded in the pageserver.
      requestBody:
        content:
          application/json:
@@ -322,6 +382,8 @@ paths:
                ancestor_start_lsn:
                  type: string
                  format: hex
+                pg_version:
+                  type: integer
      responses:
        "201":
          description: TimelineInfo
@@ -553,6 +615,9 @@ components:
      required:
        - timeline_id
        - tenant_id
+        - last_record_lsn
+        - disk_consistent_lsn
+        - awaits_download
      properties:
        timeline_id:
          type: string
@@ -560,33 +625,15 @@ components:
        tenant_id:
          type: string
          format: hex
-        local:
-          $ref: "#/components/schemas/LocalTimelineInfo"
-        remote:
-          $ref: "#/components/schemas/RemoteTimelineInfo"
-    RemoteTimelineInfo:
-      type: object
-      required:
-        - awaits_download
-        - remote_consistent_lsn
-      properties:
-        awaits_download:
-          type: boolean
-        remote_consistent_lsn:
-          type: string
-          format: hex
-    LocalTimelineInfo:
-      type: object
-      required:
-        - last_record_lsn
-        - disk_consistent_lsn
-      properties:
        last_record_lsn:
          type: string
          format: hex
        disk_consistent_lsn:
          type: string
          format: hex
+        remote_consistent_lsn:
+          type: string
+          format: hex
        ancestor_timeline_id:
          type: string
          format: hex
@@ -611,7 +658,39 @@ components:
          format: hex
        last_received_msg_ts:
          type: integer
+        awaits_download:
+          type: boolean

+        # These 'local' and 'remote' fields just duplicate some of the fields
+        # above. They are kept for backwards-compatibility. They can be removed,
+        # when the control plane has been updated to look at the above fields
+        # directly.
+        local:
+          $ref: "#/components/schemas/LocalTimelineInfo"
+        remote:
+          $ref: "#/components/schemas/RemoteTimelineInfo"
+
+    LocalTimelineInfo:
+      type: object
+      properties:
+        ancestor_timeline_id:
+          type: string
+          format: hex
+        ancestor_lsn:
+          type: string
+          format: hex
+        current_logical_size:
+          type: integer
+        current_physical_size:
+          type: integer
+    RemoteTimelineInfo:
+      type: object
+      required:
+        - remote_consistent_lsn
+      properties:
+        remote_consistent_lsn:
+          type: string
+          format: hex
    Error:
      type: object
      required:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -12,6 +12,7 @@ use super::models::{
    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
    TimelineCreateRequest,
 };
+use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::storage_sync;
 use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
 use crate::tenant::{TenantState, Timeline};
@@ -78,13 +79,13 @@ fn get_config(request: &Request<Body>) -> &'static PageServerConf {
    get_state(request).conf
 }

-// Helper functions to construct a LocalTimelineInfo struct for a timeline
-
-fn local_timeline_info_from_timeline(
+// Helper function to construct a TimelineInfo struct for a timeline
+async fn build_timeline_info(
+    state: &State,
    timeline: &Arc<Timeline>,
    include_non_incremental_logical_size: bool,
    include_non_incremental_physical_size: bool,
-) -> anyhow::Result<LocalTimelineInfo> {
+) -> anyhow::Result<TimelineInfo> {
    let last_record_lsn = timeline.get_last_record_lsn();
    let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
        let guard = timeline.last_received_wal.lock().unwrap();
@@ -99,24 +100,47 @@ fn local_timeline_info_from_timeline(
        }
    };

-    let info = LocalTimelineInfo {
-        ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
-        ancestor_lsn: {
-            match timeline.get_ancestor_lsn() {
-                Lsn(0) => None,
-                lsn @ Lsn(_) => Some(lsn),
-            }
-        },
+    let (remote_consistent_lsn, awaits_download) = if let Some(remote_entry) = state
+        .remote_index
+        .read()
+        .await
+        .timeline_entry(&TenantTimelineId {
+            tenant_id: timeline.tenant_id,
+            timeline_id: timeline.timeline_id,
+        }) {
+        (
+            Some(remote_entry.metadata.disk_consistent_lsn()),
+            remote_entry.awaits_download,
+        )
+    } else {
+        (None, false)
+    };
+
+    let ancestor_timeline_id = timeline.get_ancestor_timeline_id();
+    let ancestor_lsn = match timeline.get_ancestor_lsn() {
+        Lsn(0) => None,
+        lsn @ Lsn(_) => Some(lsn),
+    };
+    let current_logical_size = match timeline.get_current_logical_size() {
+        Ok(size) => Some(size),
+        Err(err) => {
+            error!("Timeline info creation failed to get current logical size: {err:?}");
+            None
+        }
+    };
+    let current_physical_size = Some(timeline.get_physical_size());
+
+    let info = TimelineInfo {
+        tenant_id: timeline.tenant_id,
+        timeline_id: timeline.timeline_id,
+        ancestor_timeline_id,
+        ancestor_lsn,
        disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
        last_record_lsn,
        prev_record_lsn: Some(timeline.get_prev_record_lsn()),
        latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
-        current_logical_size: Some(
-            timeline
-                .get_current_logical_size()
-                .context("Timeline info creation failed to get current logical size")?,
-        ),
-        current_physical_size: Some(timeline.get_physical_size()),
+        current_logical_size,
+        current_physical_size,
        current_logical_size_non_incremental: if include_non_incremental_logical_size {
            Some(timeline.get_current_logical_size_non_incremental(last_record_lsn)?)
        } else {
@@ -130,32 +154,26 @@ fn local_timeline_info_from_timeline(
        wal_source_connstr,
        last_received_msg_lsn,
        last_received_msg_ts,
+        pg_version: timeline.pg_version,
+
+        remote_consistent_lsn,
+        awaits_download,
+
+        // Duplicate some fields in 'local' and 'remote' fields, for backwards-compatility
+        // with the control plane.
+        local: LocalTimelineInfo {
+            ancestor_timeline_id,
+            ancestor_lsn,
+            current_logical_size,
+            current_physical_size,
+        },
+        remote: RemoteTimelineInfo {
+            remote_consistent_lsn,
+        },
    };
    Ok(info)
 }

-fn list_local_timelines(
-    tenant_id: TenantId,
-    include_non_incremental_logical_size: bool,
-    include_non_incremental_physical_size: bool,
-) -> Result<Vec<(TimelineId, LocalTimelineInfo)>> {
-    let tenant = tenant_mgr::get_tenant(tenant_id, true)?;
-    let timelines = tenant.list_timelines();
-
-    let mut local_timeline_info = Vec::with_capacity(timelines.len());
-    for (timeline_id, repository_timeline) in timelines {
-        local_timeline_info.push((
-            timeline_id,
-            local_timeline_info_from_timeline(
-                &repository_timeline,
-                include_non_incremental_logical_size,
-                include_non_incremental_physical_size,
-            )?,
-        ))
-    }
-    Ok(local_timeline_info)
-}
-
 // healthcheck handler
 async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let config = get_config(&request);
@@ -167,29 +185,28 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
    let request_data: TimelineCreateRequest = json_request(&mut request).await?;
    check_permission(&request, Some(tenant_id))?;

+    let state = get_state(&request);
+
    let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
    let new_timeline_info = async {
        match tenant.create_timeline(
            request_data.new_timeline_id.map(TimelineId::from),
            request_data.ancestor_timeline_id.map(TimelineId::from),
            request_data.ancestor_start_lsn,
+            request_data.pg_version.unwrap_or(crate::DEFAULT_PG_VERSION)
        ).await {
            Ok(Some(new_timeline)) => {
                // Created. Construct a TimelineInfo for it.
-                let local_info = local_timeline_info_from_timeline(&new_timeline, false, false)
+                let timeline_info = build_timeline_info(state, &new_timeline, false, false)
+                    .await
                    .map_err(ApiError::InternalServerError)?;
-                Ok(Some(TimelineInfo {
-                    tenant_id,
-                    timeline_id: new_timeline.timeline_id,
-                    local: Some(local_info),
-                    remote: None,
-                }))
+                Ok(Some(timeline_info))
            }
            Ok(None) => Ok(None), // timeline already exists
            Err(err) => Err(ApiError::InternalServerError(err)),
        }
    }
-    .instrument(info_span!("timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, lsn=?request_data.ancestor_start_lsn))
+    .instrument(info_span!("timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
        .await?;

    Ok(match new_timeline_info {
@@ -206,6 +223,8 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
        query_param_present(&request, "include-non-incremental-physical-size");
    check_permission(&request, Some(tenant_id))?;

+    let state = get_state(&request);
+
    let timelines = tokio::task::spawn_blocking(move || {
        let _enter = info_span!("timeline_list", tenant = %tenant_id).entered();
        let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
@@ -215,36 +234,18 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
    .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;

    let mut response_data = Vec::with_capacity(timelines.len());
-    for (timeline_id, timeline) in timelines {
-        let local = match local_timeline_info_from_timeline(
+    for timeline in timelines {
+        let timeline_info = build_timeline_info(
+            state,
            &timeline,
            include_non_incremental_logical_size,
            include_non_incremental_physical_size,
-        ) {
-            Ok(local) => Some(local),
-            Err(e) => {
-                error!("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}");
-                None
-            }
-        };
+        )
+        .await
+        .context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
+        .map_err(ApiError::InternalServerError)?;

-        response_data.push(TimelineInfo {
-            tenant_id,
-            timeline_id,
-            local,
-            remote: get_state(&request)
-                .remote_index
-                .read()
-                .await
-                .timeline_entry(&TenantTimelineId {
-                    tenant_id,
-                    timeline_id,
-                })
-                .map(|remote_entry| RemoteTimelineInfo {
-                    remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
-                    awaits_download: remote_entry.awaits_download,
-                }),
-        })
+        response_data.push(timeline_info);
    }

    json_response(StatusCode::OK, response_data)
@@ -263,6 +264,23 @@ fn query_param_present(request: &Request<Body>, param: &str) -> bool {
        .unwrap_or(false)
 }

+fn get_query_param(request: &Request<Body>, param_name: &str) -> Result<String, ApiError> {
+    request.uri().query().map_or(
+        Err(ApiError::BadRequest(anyhow!("empty query in request"))),
+        |v| {
+            url::form_urlencoded::parse(v.as_bytes())
+                .into_owned()
+                .find(|(k, _)| k == param_name)
+                .map_or(
+                    Err(ApiError::BadRequest(anyhow!(
+                        "no {param_name} specified in query parameters"
+                    ))),
+                    |(_, v)| Ok(v),
+                )
+        },
+    )
+}
+
 async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -272,59 +290,60 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
        query_param_present(&request, "include-non-incremental-physical-size");
    check_permission(&request, Some(tenant_id))?;

-    let (local_timeline_info, remote_timeline_info) = async {
+    let state = get_state(&request);
+
+    let timeline_info = async {
        let timeline = tokio::task::spawn_blocking(move || {
            tenant_mgr::get_tenant(tenant_id, true)?.get_timeline(timeline_id)
        })
        .await
        .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;

-        let local_timeline_info = match timeline.and_then(|timeline| {
-            local_timeline_info_from_timeline(
-                &timeline,
-                include_non_incremental_logical_size,
-                include_non_incremental_physical_size,
-            )
-        }) {
-            Ok(local_info) => Some(local_info),
-            Err(e) => {
-                error!("Failed to get local timeline info: {e:#}");
-                None
-            }
-        };
+        let timeline = timeline.map_err(ApiError::NotFound)?;

-        let remote_timeline_info = {
-            let remote_index_read = get_state(&request).remote_index.read().await;
-            remote_index_read
-                .timeline_entry(&TenantTimelineId {
-                    tenant_id,
-                    timeline_id,
-                })
-                .map(|remote_entry| RemoteTimelineInfo {
-                    remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
-                    awaits_download: remote_entry.awaits_download,
-                })
-        };
-        Ok::<_, ApiError>((local_timeline_info, remote_timeline_info))
+        let timeline_info = build_timeline_info(
+            state,
+            &timeline,
+            include_non_incremental_logical_size,
+            include_non_incremental_physical_size,
+        )
+        .await
+        .context("Failed to get local timeline info: {e:#}")
+        .map_err(ApiError::InternalServerError)?;
+
+        Ok::<_, ApiError>(timeline_info)
    }
    .instrument(info_span!("timeline_detail", tenant = %tenant_id, timeline = %timeline_id))
    .await?;

-    if local_timeline_info.is_none() && remote_timeline_info.is_none() {
-        Err(ApiError::NotFound(anyhow!(
-            "Timeline {tenant_id}/{timeline_id} is not found neither locally nor remotely"
-        )))
-    } else {
-        json_response(
-            StatusCode::OK,
-            TimelineInfo {
-                tenant_id,
-                timeline_id,
-                local: local_timeline_info,
-                remote: remote_timeline_info,
-            },
-        )
-    }
+    json_response(StatusCode::OK, timeline_info)
+}
+
+async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;
+
+    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
+    let timestamp_raw = get_query_param(&request, "timestamp")?;
+    let timestamp = humantime::parse_rfc3339(timestamp_raw.as_str())
+        .with_context(|| format!("Invalid time: {:?}", timestamp_raw))
+        .map_err(ApiError::BadRequest)?;
+    let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
+
+    let timeline = tenant_mgr::get_tenant(tenant_id, true)
+        .and_then(|tenant| tenant.get_timeline(timeline_id))
+        .with_context(|| format!("No timeline {timeline_id} in repository for tenant {tenant_id}"))
+        .map_err(ApiError::NotFound)?;
+    let result = match timeline
+        .find_lsn_for_timestamp(timestamp_pg)
+        .map_err(ApiError::InternalServerError)?
+    {
+        LsnForTimestamp::Present(lsn) => format!("{}", lsn),
+        LsnForTimestamp::Future(_lsn) => "future".into(),
+        LsnForTimestamp::Past(_lsn) => "past".into(),
+        LsnForTimestamp::NoData(_lsn) => "nodata".into(),
+    };
+    json_response(StatusCode::OK, result)
 }

 // TODO makes sense to provide tenant config right away the same way as it handled in tenant_create
@@ -335,9 +354,16 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,
    info!("Handling tenant attach {tenant_id}");

    tokio::task::spawn_blocking(move || match tenant_mgr::get_tenant(tenant_id, false) {
-        Ok(_) => Err(ApiError::Conflict(
-            "Tenant is already present locally".to_owned(),
-        )),
+        Ok(tenant) => {
+            if tenant.list_timelines().is_empty() {
+                info!("Attaching to tenant {tenant_id} with zero timelines");
+                Ok(())
+            } else {
+                Err(ApiError::Conflict(
+                    "Tenant is already present locally".to_owned(),
+                ))
+            }
+        }
        Err(_) => Ok(()),
    })
    .await
@@ -360,7 +386,7 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,
        }
        return json_response(StatusCode::ACCEPTED, ());
    }
-    // no tenant in the index, release the lock to make the potentially lengthy download opetation
+    // no tenant in the index, release the lock to make the potentially lengthy download operation
    drop(index_accessor);

    // download index parts for every tenant timeline
@@ -512,36 +538,27 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
            false
        });

-    let tenant_state = match tenant {
-        Ok(tenant) => tenant.current_state(),
+    let (tenant_state, current_physical_size) = match tenant {
+        Ok(tenant) => {
+            let timelines = tenant.list_timelines();
+            // Calculate total physical size of all timelines
+            let mut current_physical_size = 0;
+            for timeline in timelines {
+                current_physical_size += timeline.get_physical_size();
+            }
+
+            (tenant.current_state(), Some(current_physical_size))
+        }
        Err(e) => {
            error!("Failed to get local tenant state: {e:#}");
            if has_in_progress_downloads {
-                TenantState::Paused
+                (TenantState::Paused, None)
            } else {
-                TenantState::Broken
+                (TenantState::Broken, None)
            }
        }
    };

-    let current_physical_size =
-        match tokio::task::spawn_blocking(move || list_local_timelines(tenant_id, false, false))
-            .await
-            .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?
-        {
-            Err(err) => {
-                // Getting local timelines can fail when no local tenant directory is on disk (e.g, when tenant data is being downloaded).
-                // In that case, put a warning message into log and operate normally.
-                warn!("Failed to get local timelines for tenant {tenant_id}: {err}");
-                None
-            }
-            Ok(local_timeline_infos) => Some(
-                local_timeline_infos
-                    .into_iter()
-                    .fold(0, |acc, x| acc + x.1.current_physical_size.unwrap()),
-            ),
-        };
-
    json_response(
        StatusCode::OK,
        TenantInfo {
@@ -730,7 +747,7 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
    json_response(StatusCode::OK, ())
 }

-#[cfg(any(feature = "testing", feature = "failpoints"))]
+#[cfg(feature = "testing")]
 async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    if !fail::has_failpoints() {
        return Err(ApiError::BadRequest(anyhow!(
@@ -764,11 +781,6 @@ async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>
 }

 // Run GC immediately on given timeline.
-// FIXME: This is just for tests. See test_runner/regress/test_gc.py.
-// This probably should require special authentication or a global flag to
-// enable, I don't think we want to or need to allow regular clients to invoke
-// GC.
-//     @hllinnaka in commits ec44f4b29, 3aca717f3
 #[cfg(feature = "testing")]
 async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
@@ -794,9 +806,6 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
 }

 // Run compaction immediately on given timeline.
-// FIXME This is just for tests. Don't expect this to be exposed to
-// the users or the api.
-//     @dhammika in commit a0781f229
 #[cfg(feature = "testing")]
 async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
@@ -899,6 +908,10 @@ pub fn make_router(
            "/v1/tenant/:tenant_id/timeline/:timeline_id",
            timeline_detail_handler,
        )
+        .get(
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/get_lsn_by_timestamp",
+            get_lsn_by_timestamp_handler,
+        )
        .put(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc",
            testing_api!("run timeline GC", timeline_gc_handler),
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -16,11 +16,13 @@ use crate::reltag::{RelTag, SlruKind};
 use crate::tenant::Timeline;
 use crate::walingest::WalIngest;
 use crate::walrecord::DecodedWALRecord;
-use postgres_ffi::v14::relfile_utils::*;
-use postgres_ffi::v14::waldecoder::*;
-use postgres_ffi::v14::xlog_utils::*;
-use postgres_ffi::v14::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
+use postgres_ffi::pg_constants;
+use postgres_ffi::relfile_utils::*;
+use postgres_ffi::waldecoder::WalStreamDecoder;
+use postgres_ffi::ControlFileData;
+use postgres_ffi::DBState_DB_SHUTDOWNED;
 use postgres_ffi::Oid;
+use postgres_ffi::XLogFileName;
 use postgres_ffi::{BLCKSZ, WAL_SEGMENT_SIZE};
 use utils::lsn::Lsn;

@@ -41,19 +43,19 @@ pub fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
 /// The code that deals with the checkpoint would not work right if the
 /// cluster was not shut down cleanly.
 pub fn import_timeline_from_postgres_datadir(
-    path: &Path,
    tline: &Timeline,
-    lsn: Lsn,
+    pgdata_path: &Path,
+    pgdata_lsn: Lsn,
 ) -> Result<()> {
    let mut pg_control: Option<ControlFileData> = None;

    // TODO this shoud be start_lsn, which is not necessarily equal to end_lsn (aka lsn)
    // Then fishing out pg_control would be unnecessary
-    let mut modification = tline.begin_modification(lsn);
+    let mut modification = tline.begin_modification(pgdata_lsn);
    modification.init_empty()?;

    // Import all but pg_wal
-    let all_but_wal = WalkDir::new(path)
+    let all_but_wal = WalkDir::new(pgdata_path)
        .into_iter()
        .filter_entry(|entry| !entry.path().ends_with("pg_wal"));
    for entry in all_but_wal {
@@ -61,7 +63,7 @@ pub fn import_timeline_from_postgres_datadir(
        let metadata = entry.metadata().expect("error getting dir entry metadata");
        if metadata.is_file() {
            let absolute_path = entry.path();
-            let relative_path = absolute_path.strip_prefix(path)?;
+            let relative_path = absolute_path.strip_prefix(pgdata_path)?;

            let file = File::open(absolute_path)?;
            let len = metadata.len() as usize;
@@ -82,7 +84,7 @@ pub fn import_timeline_from_postgres_datadir(
        "Postgres cluster was not shut down cleanly"
    );
    ensure!(
-        pg_control.checkPointCopy.redo == lsn.0,
+        pg_control.checkPointCopy.redo == pgdata_lsn.0,
        "unexpected checkpoint REDO pointer"
    );

@@ -90,10 +92,10 @@ pub fn import_timeline_from_postgres_datadir(
    // this reads the checkpoint record itself, advancing the tip of the timeline to
    // *after* the checkpoint record. And crucially, it initializes the 'prev_lsn'.
    import_wal(
-        &path.join("pg_wal"),
+        &pgdata_path.join("pg_wal"),
        tline,
        Lsn(pg_control.checkPointCopy.redo),
-        lsn,
+        pgdata_lsn,
    )?;

    Ok(())
@@ -236,7 +238,7 @@ fn import_slru<Reader: Read>(
 /// Scan PostgreSQL WAL files in given directory and load all records between
 /// 'startpoint' and 'endpoint' into the repository.
 fn import_wal(walpath: &Path, tline: &Timeline, startpoint: Lsn, endpoint: Lsn) -> Result<()> {
-    let mut waldecoder = WalStreamDecoder::new(startpoint);
+    let mut waldecoder = WalStreamDecoder::new(startpoint, tline.pg_version);

    let mut segno = startpoint.segment_number(WAL_SEGMENT_SIZE);
    let mut offset = startpoint.segment_offset(WAL_SEGMENT_SIZE);
@@ -354,7 +356,7 @@ pub fn import_wal_from_tar<Reader: Read>(
    end_lsn: Lsn,
 ) -> Result<()> {
    // Set up walingest mutable state
-    let mut waldecoder = WalStreamDecoder::new(start_lsn);
+    let mut waldecoder = WalStreamDecoder::new(start_lsn, tline.pg_version);
    let mut segno = start_lsn.segment_number(WAL_SEGMENT_SIZE);
    let mut offset = start_lsn.segment_offset(WAL_SEGMENT_SIZE);
    let mut last_lsn = start_lsn;
@@ -439,7 +441,7 @@ fn import_file<Reader: Read>(
    len: usize,
 ) -> Result<Option<ControlFileData>> {
    if file_path.starts_with("global") {
-        let spcnode = pg_constants::GLOBALTABLESPACE_OID;
+        let spcnode = postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
        let dbnode = 0;

        match file_path
@@ -467,7 +469,7 @@ fn import_file<Reader: Read>(
                debug!("imported relmap file")
            }
            "PG_VERSION" => {
-                debug!("ignored");
+                debug!("ignored PG_VERSION file");
            }
            _ => {
                import_rel(modification, file_path, spcnode, dbnode, reader, len)?;
@@ -495,7 +497,7 @@ fn import_file<Reader: Read>(
                debug!("imported relmap file")
            }
            "PG_VERSION" => {
-                debug!("ignored");
+                debug!("ignored PG_VERSION file");
            }
            _ => {
                import_rel(modification, file_path, spcnode, dbnode, reader, len)?;
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -31,17 +31,23 @@ use crate::task_mgr::TaskKind;

 /// Current storage format version
 ///
-/// This is embedded in the metadata file, and also in the header of all the
-/// layer files. If you make any backwards-incompatible changes to the storage
+/// This is embedded in the header of all the layer files.
+/// If you make any backwards-incompatible changes to the storage
 /// format, bump this!
+/// Note that TimelineMetadata uses its own version number to track
+/// backwards-compatible changes to the metadata format.
 pub const STORAGE_FORMAT_VERSION: u16 = 3;

+pub const DEFAULT_PG_VERSION: u32 = 14;
+
 // Magic constants used to identify different kinds of files
 pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
 pub const DELTA_FILE_MAGIC: u16 = 0x5A61;

 pub const LOG_FILE_NAME: &str = "pageserver.log";

+static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);
+
 /// Config for the Repository checkpointer
 #[derive(Debug, Clone, Copy)]
 pub enum CheckpointConfig {
@@ -115,32 +121,6 @@ impl<T> TenantTimelineValues<T> {
    fn new() -> Self {
        Self(HashMap::new())
    }
-
-    fn with_capacity(capacity: usize) -> Self {
-        Self(HashMap::with_capacity(capacity))
-    }
-
-    /// A convenience method to map certain values and omit some of them, if needed.
-    /// Tenants that won't have any timeline entries due to the filtering, will still be preserved
-    /// in the structure.
-    fn filter_map<F, NewT>(self, map: F) -> TenantTimelineValues<NewT>
-    where
-        F: Fn(T) -> Option<NewT>,
-    {
-        let capacity = self.0.len();
-        self.0.into_iter().fold(
-            TenantTimelineValues::<NewT>::with_capacity(capacity),
-            |mut new_values, (tenant_id, old_values)| {
-                let new_timeline_values = new_values.0.entry(tenant_id).or_default();
-                for (timeline_id, old_value) in old_values {
-                    if let Some(new_value) = map(old_value) {
-                        new_timeline_values.insert(timeline_id, new_value);
-                    }
-                }
-                new_values
-            },
-        )
-    }
 }

 /// A suffix to be used during file sync from the remote storage,
@@ -177,35 +157,3 @@ mod backoff_defaults_tests {
        );
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use crate::tenant::harness::TIMELINE_ID;
-
-    use super::*;
-
-    #[test]
-    fn tenant_timeline_value_mapping() {
-        let first_tenant = TenantId::generate();
-        let second_tenant = TenantId::generate();
-        assert_ne!(first_tenant, second_tenant);
-
-        let mut initial = TenantTimelineValues::new();
-        initial
-            .0
-            .entry(first_tenant)
-            .or_default()
-            .insert(TIMELINE_ID, "test_value");
-        let _ = initial.0.entry(second_tenant).or_default();
-        assert_eq!(initial.0.len(), 2, "Should have entries for both tenants");
-
-        let filtered = initial.filter_map(|_| None::<&str>).0;
-        assert_eq!(
-            filtered.len(),
-            2,
-            "Should have entries for both tenants even after filtering away all entries"
-        );
-        assert!(filtered.contains_key(&first_tenant));
-        assert!(filtered.contains_key(&second_tenant));
-    }
-}
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1,8 +1,9 @@
 use metrics::core::{AtomicU64, GenericCounter};
 use metrics::{
-    register_histogram, register_histogram_vec, register_int_counter, register_int_counter_vec,
-    register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec, Histogram, HistogramVec,
-    IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
+    register_gauge_vec, register_histogram, register_histogram_vec, register_int_counter,
+    register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec,
+    GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge,
+    UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
 use utils::id::{TenantId, TimelineId};
@@ -106,18 +107,20 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {

 // Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
 // or in testing they estimate how much we would upload if we did.
-static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "pageserver_created_persistent_files_total",
        "Number of files created that are meant to be uploaded to cloud storage",
+        &["tenant_id", "timeline_id"]
    )
    .expect("failed to define a metric")
 });

-static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounter> = Lazy::new(|| {
-    register_int_counter!(
+static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "pageserver_written_persistent_bytes_total",
        "Total bytes written that are meant to be uploaded to cloud storage",
+        &["tenant_id", "timeline_id"]
    )
    .expect("failed to define a metric")
 });
@@ -204,12 +207,34 @@ pub static REMAINING_SYNC_ITEMS: Lazy<IntGauge> = Lazy::new(|| {
    .expect("failed to register pageserver remote storage remaining sync items int gauge")
 });

-pub static IMAGE_SYNC_TIME: Lazy<HistogramVec> = Lazy::new(|| {
+pub static IMAGE_SYNC_TIME: Lazy<GaugeVec> = Lazy::new(|| {
+    register_gauge_vec!(
+        "pageserver_remote_storage_image_sync_duration",
+        "Time spent to synchronize (up/download) a whole pageserver image",
+        &["tenant_id", "timeline_id"],
+    )
+    .expect("failed to register per-timeline pageserver image sync time vec")
+});
+
+pub static IMAGE_SYNC_OPERATION_KINDS: &[&str] = &["upload", "download", "delete"];
+pub static IMAGE_SYNC_STATUS: &[&str] = &["success", "failure", "abort"];
+
+pub static IMAGE_SYNC_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_remote_storage_image_sync_count",
+        "Number of synchronization operations executed for pageserver images. \
+        Grouped by tenant, timeline, operation_kind and status",
+        &["tenant_id", "timeline_id", "operation_kind", "status"]
+    )
+    .expect("failed to register pageserver image sync count vec")
+});
+
+pub static IMAGE_SYNC_TIME_HISTOGRAM: Lazy<HistogramVec> = Lazy::new(|| {
    register_histogram_vec!(
        "pageserver_remote_storage_image_sync_seconds",
        "Time took to synchronize (download or upload) a whole pageserver image. \
-        Grouped by tenant and timeline ids, `operation_kind` (upload|download) and `status` (success|failure)",
-        &["tenant_id", "timeline_id", "operation_kind", "status"],
+        Grouped by operation_kind and status",
+        &["operation_kind", "status"],
        vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 3.0, 10.0, 20.0]
    )
    .expect("failed to register pageserver image sync time histogram vec")
@@ -252,11 +277,15 @@ pub static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
 /// smallest redo processing times. These buckets allow us to measure down
 /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
 /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
+///
+/// Values up to 1s are recorded because metrics show that we have redo
+/// durations and lock times larger than 0.250s.
 macro_rules! redo_histogram_time_buckets {
    () => {
        vec![
            0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
-            0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000,
+            0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
+            1.000_000,
        ]
    };
 }
@@ -271,6 +300,17 @@ macro_rules! redo_histogram_count_buckets {
    };
 }

+macro_rules! redo_bytes_histogram_count_buckets {
+    () => {
+        // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
+        // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
+        vec![
+            24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
+            2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
+        ]
+    };
+}
+
 pub static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
    register_histogram!(
        "pageserver_wal_redo_seconds",
@@ -298,6 +338,15 @@ pub static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

+pub static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "pageserver_wal_redo_bytes_histogram",
+        "Histogram of number of records replayed per redo",
+        redo_bytes_histogram_count_buckets!(),
+    )
+    .expect("failed to define a metric")
+});
+
 pub static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
        "pageserver_replayed_wal_records_total",
@@ -363,8 +412,12 @@ impl TimelineMetrics {
        let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
-        let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED.clone();
-        let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN.clone();
+        let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
+            .unwrap();
+        let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
+            .unwrap();

        TimelineMetrics {
            tenant_id,
@@ -396,6 +449,8 @@ impl Drop for TimelineMetrics {
        let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]);
        let _ = CURRENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
        let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
+        let _ = NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, timeline_id]);
+        let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]);

        for op in STORAGE_TIME_OPERATIONS {
            let _ = STORAGE_TIME.remove_label_values(&[op, tenant_id, timeline_id]);
@@ -411,6 +466,14 @@ impl Drop for TimelineMetrics {
        for op in SMGR_QUERY_TIME_OPERATIONS {
            let _ = SMGR_QUERY_TIME.remove_label_values(&[op, tenant_id, timeline_id]);
        }
+
+        for op in IMAGE_SYNC_OPERATION_KINDS {
+            for status in IMAGE_SYNC_STATUS {
+                let _ = IMAGE_SYNC_COUNT.remove_label_values(&[tenant_id, timeline_id, op, status]);
+            }
+        }
+
+        let _ = IMAGE_SYNC_TIME.remove_label_values(&[tenant_id, timeline_id]);
    }
 }

--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -12,7 +12,6 @@
 use anyhow::{bail, ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use futures::{Stream, StreamExt};
-use regex::Regex;
 use std::io;
 use std::net::TcpListener;
 use std::str;
@@ -33,9 +32,8 @@ use utils::{

 use crate::basebackup;
 use crate::config::{PageServerConf, ProfilingConfig};
-use crate::import_datadir::{import_basebackup_from_tar, import_wal_from_tar};
+use crate::import_datadir::import_wal_from_tar;
 use crate::metrics::{LIVE_CONNECTIONS_COUNT, SMGR_QUERY_TIME};
-use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::profiling::profpoint_start;
 use crate::reltag::RelTag;
 use crate::task_mgr;
@@ -43,9 +41,8 @@ use crate::task_mgr::TaskKind;
 use crate::tenant::Timeline;
 use crate::tenant_mgr;
 use crate::CheckpointConfig;
-use postgres_ffi::v14::xlog_utils::to_pg_timestamp;

-use postgres_ffi::v14::pg_constants::DEFAULTTABLESPACE_OID;
+use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use postgres_ffi::BLCKSZ;

 // Wrapped in libpq CopyData
@@ -498,12 +495,13 @@ impl PageServerHandler {
        timeline_id: TimelineId,
        base_lsn: Lsn,
        _end_lsn: Lsn,
+        pg_version: u32,
    ) -> anyhow::Result<()> {
        task_mgr::associate_with(Some(tenant_id), Some(timeline_id));
        // Create empty timeline
        info!("creating new timeline");
-        let timeline = tenant_mgr::get_tenant(tenant_id, true)?
-            .create_empty_timeline(timeline_id, base_lsn)?;
+        let tenant = tenant_mgr::get_tenant(tenant_id, true)?;
+        let timeline = tenant.create_empty_timeline(timeline_id, base_lsn, pg_version)?;

        // TODO mark timeline as not ready until it reaches end_lsn.
        // We might have some wal to import as well, and we should prevent compute
@@ -526,7 +524,8 @@ impl PageServerHandler {
        // - use block_in_place()
        let mut copyin_stream = Box::pin(copyin_stream(pgb));
        let reader = SyncIoBridge::new(StreamReader::new(&mut copyin_stream));
-        tokio::task::block_in_place(|| import_basebackup_from_tar(&timeline, reader, base_lsn))?;
+        tokio::task::block_in_place(|| timeline.import_basebackup_from_tar(reader, base_lsn))?;
+        timeline.initialize()?;

        // Drain the rest of the Copy data
        let mut bytes_after_tar = 0;
@@ -543,12 +542,6 @@ impl PageServerHandler {
        // It wouldn't work if base came from vanilla postgres though,
        // since we discard some log files.

-        // Flush data to disk, then upload to s3
-        info!("flushing layers");
-        timeline.checkpoint(CheckpointConfig::Flush)?;
-
-        timeline.launch_wal_receiver()?;
-
        info!("done");
        Ok(())
    }
@@ -663,7 +656,7 @@ impl PageServerHandler {
        Ok(lsn)
    }

-    #[instrument(skip(timeline, req), fields(rel = %req.rel, req_lsn = %req.lsn))]
+    #[instrument(skip(self, timeline, req), fields(rel = %req.rel, req_lsn = %req.lsn))]
    async fn handle_get_rel_exists_request(
        &self,
        timeline: &Timeline,
@@ -680,7 +673,7 @@ impl PageServerHandler {
        }))
    }

-    #[instrument(skip(timeline, req), fields(rel = %req.rel, req_lsn = %req.lsn))]
+    #[instrument(skip(self, timeline, req), fields(rel = %req.rel, req_lsn = %req.lsn))]
    async fn handle_get_nblocks_request(
        &self,
        timeline: &Timeline,
@@ -697,7 +690,7 @@ impl PageServerHandler {
        }))
    }

-    #[instrument(skip(timeline, req), fields(dbnode = %req.dbnode, req_lsn = %req.lsn))]
+    #[instrument(skip(self, timeline, req), fields(dbnode = %req.dbnode, req_lsn = %req.lsn))]
    async fn handle_db_size_request(
        &self,
        timeline: &Timeline,
@@ -717,7 +710,7 @@ impl PageServerHandler {
        }))
    }

-    #[instrument(skip(timeline, req), fields(rel = %req.rel, blkno = %req.blkno, req_lsn = %req.lsn))]
+    #[instrument(skip(self, timeline, req), fields(rel = %req.rel, blkno = %req.blkno, req_lsn = %req.lsn))]
    async fn handle_get_page_at_lsn_request(
        &self,
        timeline: &Timeline,
@@ -955,19 +948,27 @@ impl postgres_backend_async::Handler for PageServerHandler {
            // 1. Get start/end LSN from backup_manifest file
            // 2. Run:
            // cat my_backup/base.tar | psql -h $PAGESERVER \
-            //     -c "import basebackup $TENANT $TIMELINE $START_LSN $END_LSN"
+            //     -c "import basebackup $TENANT $TIMELINE $START_LSN $END_LSN $PG_VERSION"
            let (_, params_raw) = query_string.split_at("import basebackup ".len());
            let params = params_raw.split_whitespace().collect::<Vec<_>>();
-            ensure!(params.len() == 4);
+            ensure!(params.len() == 5);
            let tenant_id = TenantId::from_str(params[0])?;
            let timeline_id = TimelineId::from_str(params[1])?;
            let base_lsn = Lsn::from_str(params[2])?;
            let end_lsn = Lsn::from_str(params[3])?;
+            let pg_version = u32::from_str(params[4])?;

            self.check_permission(Some(tenant_id))?;

            match self
-                .handle_import_basebackup(pgb, tenant_id, timeline_id, base_lsn, end_lsn)
+                .handle_import_basebackup(
+                    pgb,
+                    tenant_id,
+                    timeline_id,
+                    base_lsn,
+                    end_lsn,
+                    pg_version,
+                )
                .await
            {
                Ok(()) => pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?,
@@ -1011,6 +1012,9 @@ impl postgres_backend_async::Handler for PageServerHandler {
            let params = params_raw.split(' ').collect::<Vec<_>>();
            ensure!(params.len() == 1, "invalid param number for config command");
            let tenant_id = TenantId::from_str(params[0])?;
+
+            self.check_permission(Some(tenant_id))?;
+
            let tenant = tenant_mgr::get_tenant(tenant_id, true)?;
            pgb.write_message(&BeMessage::RowDescription(&[
                RowDescriptor::int8_col(b"checkpoint_distance"),
@@ -1047,33 +1051,6 @@ impl postgres_backend_async::Handler for PageServerHandler {
                Some(tenant.get_pitr_interval().as_secs().to_string().as_bytes()),
            ]))?
            .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
-        } else if query_string.starts_with("get_lsn_by_timestamp ") {
-            // Locate LSN of last transaction with timestamp less or equal than sppecified
-            // TODO lazy static
-            let re = Regex::new(r"^get_lsn_by_timestamp ([[:xdigit:]]+) ([[:xdigit:]]+) '(.*)'$")
-                .unwrap();
-            let caps = re
-                .captures(query_string)
-                .with_context(|| format!("invalid get_lsn_by_timestamp: '{}'", query_string))?;
-
-            let tenant_id = TenantId::from_str(caps.get(1).unwrap().as_str())?;
-            let timeline_id = TimelineId::from_str(caps.get(2).unwrap().as_str())?;
-            let timeline = get_local_timeline(tenant_id, timeline_id)?;
-
-            let timestamp = humantime::parse_rfc3339(caps.get(3).unwrap().as_str())?;
-            let timestamp_pg = to_pg_timestamp(timestamp);
-
-            pgb.write_message(&BeMessage::RowDescription(&[RowDescriptor::text_col(
-                b"lsn",
-            )]))?;
-            let result = match timeline.find_lsn_for_timestamp(timestamp_pg)? {
-                LsnForTimestamp::Present(lsn) => format!("{}", lsn),
-                LsnForTimestamp::Future(_lsn) => "future".into(),
-                LsnForTimestamp::Past(_lsn) => "past".into(),
-                LsnForTimestamp::NoData(_lsn) => "nodata".into(),
-            };
-            pgb.write_message(&BeMessage::DataRow(&[Some(result.as_bytes())]))?;
-            pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else {
            bail!("unknown command");
        }
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -13,7 +13,7 @@ use crate::tenant::Timeline;
 use crate::walrecord::NeonWalRecord;
 use anyhow::{bail, ensure, Result};
 use bytes::{Buf, Bytes};
-use postgres_ffi::v14::pg_constants;
+use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::BLCKSZ;
 use postgres_ffi::{Oid, TimestampTz, TransactionId};
 use serde::{Deserialize, Serialize};
@@ -125,8 +125,7 @@ impl Timeline {
            return Ok(nblocks);
        }

-        if (tag.forknum == pg_constants::FSM_FORKNUM
-            || tag.forknum == pg_constants::VISIBILITYMAP_FORKNUM)
+        if (tag.forknum == FSM_FORKNUM || tag.forknum == VISIBILITYMAP_FORKNUM)
            && !self.get_rel_exists(tag, lsn, latest)?
        {
            // FIXME: Postgres sometimes calls smgrcreate() to create
@@ -1090,6 +1089,7 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
 // 03 misc
 //    controlfile
 //    checkpoint
+//    pg_version
 //
 // Below is a full list of the keyspace allocation:
 //
@@ -1128,7 +1128,6 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
 //
 // Checkpoint:
 // 03 00000000 00000000 00000000 00   00000001
-
 //-- Section 01: relation data and metadata

 const DBDIR_KEY: Key = Key {
@@ -1374,6 +1373,17 @@ fn is_rel_block_key(key: Key) -> bool {
    key.field1 == 0x00 && key.field4 != 0
 }

+pub fn is_rel_fsm_block_key(key: Key) -> bool {
+    key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
+}
+
+pub fn is_rel_vm_block_key(key: Key) -> bool {
+    key.field1 == 0x00
+        && key.field4 != 0
+        && key.field5 == VISIBILITYMAP_FORKNUM
+        && key.field6 != 0xffffffff
+}
+
 pub fn key_to_slru_block(key: Key) -> Result<(SlruKind, u32, BlockNumber)> {
    Ok(match key.field1 {
        0x01 => {
@@ -1402,8 +1412,11 @@ fn is_slru_block_key(key: Key) -> bool {
 pub fn create_test_timeline(
    tenant: &crate::tenant::Tenant,
    timeline_id: utils::id::TimelineId,
+    pg_version: u32,
 ) -> Result<std::sync::Arc<Timeline>> {
-    let tline = tenant.create_empty_timeline(timeline_id, Lsn(8))?;
+    let tline = tenant
+        .create_empty_timeline(timeline_id, Lsn(8), pg_version)?
+        .initialize()?;
    let mut m = tline.begin_modification(Lsn(8));
    m.init_empty()?;
    m.commit()?;
--- a/pageserver/src/reltag.rs
+++ b/pageserver/src/reltag.rs
@@ -2,8 +2,8 @@ use serde::{Deserialize, Serialize};
 use std::cmp::Ordering;
 use std::fmt;

-use postgres_ffi::v14::pg_constants;
-use postgres_ffi::v14::relfile_utils::forknumber_to_name;
+use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
+use postgres_ffi::relfile_utils::forknumber_to_name;
 use postgres_ffi::Oid;

 ///
@@ -78,7 +78,7 @@ impl fmt::Display for RelTag {

 impl RelTag {
    pub fn to_segfile_name(&self, segno: u32) -> String {
-        let mut name = if self.spcnode == pg_constants::GLOBALTABLESPACE_OID {
+        let mut name = if self.spcnode == GLOBALTABLESPACE_OID {
            "global/".to_string()
        } else {
            format!("base/{}/", self.dbnode)
--- a/pageserver/src/storage_sync.rs
+++ b/pageserver/src/storage_sync.rs
@@ -169,15 +169,21 @@ use self::{
    upload::{upload_index_part, upload_timeline_layers, UploadedTimeline},
 };
 use crate::{
-    config::PageServerConf, exponential_backoff, storage_sync::index::RemoteIndex, task_mgr,
-    task_mgr::TaskKind, task_mgr::BACKGROUND_RUNTIME, tenant::metadata::TimelineMetadata,
-    tenant_mgr::attach_local_tenants,
+    config::PageServerConf,
+    exponential_backoff,
+    storage_sync::index::{LayerFileMetadata, RemoteIndex},
+    task_mgr,
+    task_mgr::TaskKind,
+    task_mgr::BACKGROUND_RUNTIME,
+    tenant::metadata::TimelineMetadata,
+    tenant_mgr::{attach_local_tenants, TenantAttachData},
 };
 use crate::{
    metrics::{IMAGE_SYNC_TIME, REMAINING_SYNC_ITEMS, REMOTE_INDEX_UPLOAD},
    TenantTimelineValues,
 };

+use crate::metrics::{IMAGE_SYNC_COUNT, IMAGE_SYNC_TIME_HISTOGRAM};
 use utils::id::{TenantId, TenantTimelineId, TimelineId};

 use self::download::download_index_parts;
@@ -187,7 +193,7 @@ static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();

 /// A timeline status to share with pageserver's sync counterpart,
 /// after comparing local and remote timeline state.
-#[derive(Clone)]
+#[derive(Clone, PartialEq, Eq)]
 pub enum LocalTimelineInitStatus {
    /// The timeline has every remote layer present locally.
    /// There could be some layers requiring uploading,
@@ -310,7 +316,7 @@ impl SyncQueue {

 /// A task to run in the async download/upload loop.
 /// Limited by the number of retries, after certain threshold the failing task gets evicted and the timeline disabled.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 enum SyncTask {
    /// A checkpoint outcome with possible local file updates that need actualization in the remote storage.
    /// Not necessary more fresh than the one already uploaded.
@@ -421,7 +427,7 @@ impl SyncTaskBatch {
                            .extend(new_delete.data.deleted_layers.iter().cloned());
                    }
                    if let Some(batch_upload) = &mut self.upload {
-                        let not_deleted = |layer: &PathBuf| {
+                        let not_deleted = |layer: &PathBuf, _: &mut LayerFileMetadata| {
                            !new_delete.data.layers_to_delete.contains(layer)
                                && !new_delete.data.deleted_layers.contains(layer)
                        };
@@ -449,21 +455,35 @@ impl SyncTaskBatch {
 #[derive(Debug, Clone, PartialEq, Eq)]
 struct LayersUpload {
    /// Layer file path in the pageserver workdir, that were added for the corresponding checkpoint.
-    layers_to_upload: HashSet<PathBuf>,
+    layers_to_upload: HashMap<PathBuf, LayerFileMetadata>,
    /// Already uploaded layers. Used to store the data about the uploads between task retries
    /// and to record the data into the remote index after the task got completed or evicted.
-    uploaded_layers: HashSet<PathBuf>,
+    uploaded_layers: HashMap<PathBuf, LayerFileMetadata>,
    metadata: Option<TimelineMetadata>,
 }

 /// A timeline download task.
 /// Does not contain the file list to download, to allow other
 /// parts of the pageserer code to schedule the task
-/// without using the remote index or any other ways to list the remote timleine files.
+/// without using the remote index or any other ways to list the remote timeline files.
 /// Skips the files that are already downloaded.
 #[derive(Debug, Clone, PartialEq, Eq)]
 struct LayersDownload {
    layers_to_skip: HashSet<PathBuf>,
+
+    /// Paths which have been downloaded, and had their metadata verified or generated.
+    ///
+    /// Metadata generation happens when upgrading from past version of `IndexPart`.
+    gathered_metadata: HashMap<PathBuf, LayerFileMetadata>,
+}
+
+impl LayersDownload {
+    fn from_skipped_layers(layers_to_skip: HashSet<PathBuf>) -> Self {
+        LayersDownload {
+            layers_to_skip,
+            gathered_metadata: HashMap::default(),
+        }
+    }
 }

 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -485,7 +505,7 @@ struct LayersDeletion {
 pub fn schedule_layer_upload(
    tenant_id: TenantId,
    timeline_id: TimelineId,
-    layers_to_upload: HashSet<PathBuf>,
+    layers_to_upload: HashMap<PathBuf, LayerFileMetadata>,
    metadata: Option<TimelineMetadata>,
 ) {
    let sync_queue = match SYNC_QUEUE.get() {
@@ -502,7 +522,7 @@ pub fn schedule_layer_upload(
        },
        SyncTask::upload(LayersUpload {
            layers_to_upload,
-            uploaded_layers: HashSet::new(),
+            uploaded_layers: HashMap::new(),
            metadata,
        }),
    );
@@ -560,18 +580,44 @@ pub fn schedule_layer_download(tenant_id: TenantId, timeline_id: TimelineId) {
            tenant_id,
            timeline_id,
        },
-        SyncTask::download(LayersDownload {
-            layers_to_skip: HashSet::new(),
-        }),
+        SyncTask::download(LayersDownload::from_skipped_layers(HashSet::new())),
    );
    debug!("Download task for tenant {tenant_id}, timeline {timeline_id} sent")
 }

+/// Local existing timeline files
+///
+/// Values of this type serve different meanings in different contexts. On startup, collected
+/// timelines come with the full collected information and when signalling readyness to attach
+/// after completed download. After the download the file information is no longer carried, because
+/// it is already merged into [`RemoteTimeline`].
+#[derive(Debug)]
+pub struct TimelineLocalFiles(TimelineMetadata, HashMap<PathBuf, LayerFileMetadata>);
+
+impl TimelineLocalFiles {
+    pub fn metadata(&self) -> &TimelineMetadata {
+        &self.0
+    }
+
+    /// Called during startup, for all of the local files with full metadata.
+    pub(crate) fn collected(
+        metadata: TimelineMetadata,
+        timeline_files: HashMap<PathBuf, LayerFileMetadata>,
+    ) -> TimelineLocalFiles {
+        TimelineLocalFiles(metadata, timeline_files)
+    }
+
+    /// Called near the end of tenant initialization, to signal readyness to attach tenants.
+    pub(crate) fn ready(metadata: TimelineMetadata) -> Self {
+        TimelineLocalFiles(metadata, HashMap::new())
+    }
+}
+
 /// Launch a thread to perform remote storage sync tasks.
 /// See module docs for loop step description.
 pub fn spawn_storage_sync_task(
    conf: &'static PageServerConf,
-    local_timeline_files: TenantTimelineValues<(TimelineMetadata, HashSet<PathBuf>)>,
+    local_timeline_files: HashMap<TenantId, HashMap<TimelineId, TimelineLocalFiles>>,
    storage: GenericRemoteStorage,
    max_concurrent_timelines_sync: NonZeroUsize,
    max_sync_errors: NonZeroU32,
@@ -594,7 +640,7 @@ pub fn spawn_storage_sync_task(
    let mut keys_for_index_part_downloads = HashSet::new();
    let mut timelines_to_sync = HashMap::new();

-    for (tenant_id, timeline_data) in local_timeline_files.0 {
+    for (tenant_id, timeline_data) in local_timeline_files {
        if timeline_data.is_empty() {
            info!("got empty tenant {}", tenant_id);
            let _ = empty_tenants.0.entry(tenant_id).or_default();
@@ -638,6 +684,7 @@ pub fn spawn_storage_sync_task(
                (storage, remote_index_clone, sync_queue),
                max_sync_errors,
            )
+            .instrument(info_span!("storage_sync_loop"))
            .await;
            Ok(())
        },
@@ -696,7 +743,7 @@ async fn storage_sync_loop(
                        "Sync loop step completed, {} new tenant state update(s)",
                        updated_tenants.len()
                    );
-                    let mut timelines_to_attach = TenantTimelineValues::new();
+                    let mut timelines_to_attach = HashMap::new();
                    let index_accessor = index.read().await;
                    for tenant_id in updated_tenants {
                        let tenant_entry = match index_accessor.tenant_entry(&tenant_id) {
@@ -722,12 +769,16 @@ async fn storage_sync_loop(
                            // and register them all at once in a tenant for download
                            // to be submitted in a single operation to tenant
                            // so it can apply them at once to internal timeline map.
-                            timelines_to_attach.0.insert(
+                            timelines_to_attach.insert(
                                tenant_id,
-                                tenant_entry
-                                    .iter()
-                                    .map(|(&id, entry)| (id, entry.metadata.clone()))
-                                    .collect(),
+                                TenantAttachData::Ready(
+                                    tenant_entry
+                                        .iter()
+                                        .map(|(&id, entry)| {
+                                            (id, TimelineLocalFiles::ready(entry.metadata.clone()))
+                                        })
+                                        .collect(),
+                                ),
                            );
                        }
                    }
@@ -835,7 +886,6 @@ async fn process_sync_task_batch(
                            sync_id,
                            upload_data,
                            sync_start,
-                            "upload",
                        )
                        .await
                    }
@@ -879,7 +929,6 @@ async fn process_sync_task_batch(
                            sync_id,
                            download_data,
                            sync_start,
-                            "download",
                        )
                        .await;
                    }
@@ -911,7 +960,6 @@ async fn process_sync_task_batch(
                            sync_id,
                            delete_data,
                            sync_start,
-                            "delete",
                        )
                        .instrument(info_span!("delete_timeline_data"))
                        .await;
@@ -948,8 +996,9 @@ async fn download_timeline_data(
    sync_id: TenantTimelineId,
    new_download_data: SyncData<LayersDownload>,
    sync_start: Instant,
-    task_name: &str,
 ) -> DownloadStatus {
+    static TASK_NAME: &str = "download";
+
    match download_timeline_layers(
        conf,
        storage,
@@ -961,30 +1010,42 @@ async fn download_timeline_data(
    .await
    {
        DownloadedTimeline::Abort => {
-            register_sync_status(sync_id, sync_start, task_name, None);
+            register_sync_status(sync_id, sync_start, TASK_NAME, None);
            if let Err(e) = index.write().await.set_awaits_download(&sync_id, false) {
                error!("Timeline {sync_id} was expected to be in the remote index after a download attempt, but it's absent: {e:?}");
            }
        }
        DownloadedTimeline::FailedAndRescheduled => {
-            register_sync_status(sync_id, sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, TASK_NAME, Some(false));
        }
        DownloadedTimeline::Successful(mut download_data) => {
            match update_local_metadata(conf, sync_id, current_remote_timeline).await {
-                Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
-                    Ok(()) => {
-                        register_sync_status(sync_id, sync_start, task_name, Some(true));
-                        return DownloadStatus::Downloaded;
-                    }
-                    Err(e) => {
-                        error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
-                    }
-                },
+                Ok(()) => {
+                    let mut g = index.write().await;
+
+                    match g.set_awaits_download(&sync_id, false) {
+                        Ok(()) => {
+                            let timeline = g
+                                .timeline_entry_mut(&sync_id)
+                                .expect("set_awaits_download verified existence");
+
+                            timeline.merge_metadata_from_downloaded(
+                                &download_data.data.gathered_metadata,
+                            );
+
+                            register_sync_status(sync_id, sync_start, TASK_NAME, Some(true));
+                            return DownloadStatus::Downloaded;
+                        }
+                        Err(e) => {
+                            error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
+                        }
+                    };
+                }
                Err(e) => {
                    error!("Failed to update local timeline metadata: {e:?}");
                    download_data.retries += 1;
                    sync_queue.push(sync_id, SyncTask::Download(download_data));
-                    register_sync_status(sync_id, sync_start, task_name, Some(false));
+                    register_sync_status(sync_id, sync_start, TASK_NAME, Some(false));
                }
            }
        }
@@ -1060,8 +1121,9 @@ async fn delete_timeline_data(
    sync_id: TenantTimelineId,
    mut new_delete_data: SyncData<LayersDeletion>,
    sync_start: Instant,
-    task_name: &str,
 ) {
+    static TASK_NAME: &str = "delete";
+
    let timeline_delete = &mut new_delete_data.data;

    if !timeline_delete.deletion_registered {
@@ -1077,14 +1139,14 @@ async fn delete_timeline_data(
            error!("Failed to update remote timeline {sync_id}: {e:?}");
            new_delete_data.retries += 1;
            sync_queue.push(sync_id, SyncTask::Delete(new_delete_data));
-            register_sync_status(sync_id, sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, TASK_NAME, Some(false));
            return;
        }
    }
    timeline_delete.deletion_registered = true;

    let sync_status = delete_timeline_layers(storage, sync_queue, sync_id, new_delete_data).await;
-    register_sync_status(sync_id, sync_start, task_name, Some(sync_status));
+    register_sync_status(sync_id, sync_start, TASK_NAME, Some(sync_status));
 }

 async fn read_metadata_file(metadata_path: &Path) -> anyhow::Result<TimelineMetadata> {
@@ -1103,8 +1165,8 @@ async fn upload_timeline_data(
    sync_id: TenantTimelineId,
    new_upload_data: SyncData<LayersUpload>,
    sync_start: Instant,
-    task_name: &str,
 ) -> UploadStatus {
+    static TASK_NAME: &str = "upload";
    let mut uploaded_data = match upload_timeline_layers(
        storage,
        sync_queue,
@@ -1115,7 +1177,7 @@ async fn upload_timeline_data(
    .await
    {
        UploadedTimeline::FailedAndRescheduled(e) => {
-            register_sync_status(sync_id, sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, TASK_NAME, Some(false));
            return UploadStatus::Failed(e);
        }
        UploadedTimeline::Successful(upload_data) => upload_data,
@@ -1134,14 +1196,14 @@ async fn upload_timeline_data(
    .await
    {
        Ok(()) => {
-            register_sync_status(sync_id, sync_start, task_name, Some(true));
+            register_sync_status(sync_id, sync_start, TASK_NAME, Some(true));
            UploadStatus::Uploaded
        }
        Err(e) => {
            error!("Failed to update remote timeline {sync_id}: {e:?}");
            uploaded_data.retries += 1;
            sync_queue.push(sync_id, SyncTask::Upload(uploaded_data));
-            register_sync_status(sync_id, sync_start, task_name, Some(false));
+            register_sync_status(sync_id, sync_start, TASK_NAME, Some(false));
            UploadStatus::Failed(e)
        }
    }
@@ -1181,11 +1243,18 @@ async fn update_remote_data(
                        }
                        if upload_failed {
                            existing_entry.add_upload_failures(
-                                uploaded_data.layers_to_upload.iter().cloned(),
+                                uploaded_data
+                                    .layers_to_upload
+                                    .iter()
+                                    .map(|(k, v)| (k.to_owned(), v.to_owned())),
                            );
                        } else {
-                            existing_entry
-                                .add_timeline_layers(uploaded_data.uploaded_layers.iter().cloned());
+                            existing_entry.add_timeline_layers(
+                                uploaded_data
+                                    .uploaded_layers
+                                    .iter()
+                                    .map(|(k, v)| (k.to_owned(), v.to_owned())),
+                            );
                        }
                    }
                    RemoteDataUpdate::Delete(layers_to_remove) => {
@@ -1205,11 +1274,19 @@ async fn update_remote_data(
                    };
                    let mut new_remote_timeline = RemoteTimeline::new(new_metadata.clone());
                    if upload_failed {
-                        new_remote_timeline
-                            .add_upload_failures(uploaded_data.layers_to_upload.iter().cloned());
+                        new_remote_timeline.add_upload_failures(
+                            uploaded_data
+                                .layers_to_upload
+                                .iter()
+                                .map(|(k, v)| (k.to_owned(), v.to_owned())),
+                        );
                    } else {
-                        new_remote_timeline
-                            .add_timeline_layers(uploaded_data.uploaded_layers.iter().cloned());
+                        new_remote_timeline.add_timeline_layers(
+                            uploaded_data
+                                .uploaded_layers
+                                .iter()
+                                .map(|(k, v)| (k.to_owned(), v.to_owned())),
+                        );
                    }

                    index_accessor.add_timeline_entry(sync_id, new_remote_timeline.clone());
@@ -1257,13 +1334,14 @@ async fn validate_task_retries(
 fn schedule_first_sync_tasks(
    index: &mut RemoteTimelineIndex,
    sync_queue: &SyncQueue,
-    local_timeline_files: HashMap<TenantTimelineId, (TimelineMetadata, HashSet<PathBuf>)>,
+    local_timeline_files: HashMap<TenantTimelineId, TimelineLocalFiles>,
 ) -> TenantTimelineValues<LocalTimelineInitStatus> {
    let mut local_timeline_init_statuses = TenantTimelineValues::new();

    let mut new_sync_tasks = VecDeque::with_capacity(local_timeline_files.len());

-    for (sync_id, (local_metadata, local_files)) in local_timeline_files {
+    for (sync_id, local_timeline) in local_timeline_files {
+        let TimelineLocalFiles(local_metadata, local_files) = local_timeline;
        match index.timeline_entry_mut(&sync_id) {
            Some(remote_timeline) => {
                let (timeline_status, awaits_download) = compare_local_and_remote_timeline(
@@ -1307,7 +1385,7 @@ fn schedule_first_sync_tasks(
                    sync_id,
                    SyncTask::upload(LayersUpload {
                        layers_to_upload: local_files,
-                        uploaded_layers: HashSet::new(),
+                        uploaded_layers: HashMap::new(),
                        metadata: Some(local_metadata.clone()),
                    }),
                ));
@@ -1334,20 +1412,46 @@ fn compare_local_and_remote_timeline(
    new_sync_tasks: &mut VecDeque<(TenantTimelineId, SyncTask)>,
    sync_id: TenantTimelineId,
    local_metadata: TimelineMetadata,
-    local_files: HashSet<PathBuf>,
+    local_files: HashMap<PathBuf, LayerFileMetadata>,
    remote_entry: &RemoteTimeline,
 ) -> (LocalTimelineInitStatus, bool) {
    let _entered = info_span!("compare_local_and_remote_timeline", sync_id = %sync_id).entered();

-    let remote_files = remote_entry.stored_files();
+    let needed_to_download_files = remote_entry
+        .stored_files()
+        .iter()
+        .filter_map(|(layer_file, remote_metadata)| {
+            if let Some(local_metadata) = local_files.get(layer_file) {
+                match (remote_metadata.file_size(), local_metadata.file_size()) {
+                    (Some(x), Some(y)) if x == y => { None },
+                    (None, Some(_)) => {
+                        // upgrading from an earlier IndexPart without metadata
+                        None
+                    },
+                    _ => {
+                        // having to deal with other than (Some(x), Some(y)) where x != y here is a
+                        // bummer, but see #2582 and #2610 for attempts and discussion.
+                        warn!("Redownloading locally existing {layer_file:?} due to size mismatch, size on index: {:?}, on disk: {:?}", remote_metadata.file_size(), local_metadata.file_size());
+                        Some(layer_file)
+                    },
+                }
+            } else {
+                // doesn't exist locally
+                Some(layer_file)
+            }
+        })
+        .collect::<HashSet<_>>();

-    let number_of_layers_to_download = remote_files.difference(&local_files).count();
-    let (initial_timeline_status, awaits_download) = if number_of_layers_to_download > 0 {
+    let (initial_timeline_status, awaits_download) = if !needed_to_download_files.is_empty() {
        new_sync_tasks.push_back((
            sync_id,
-            SyncTask::download(LayersDownload {
-                layers_to_skip: local_files.clone(),
-            }),
+            SyncTask::download(LayersDownload::from_skipped_layers(
+                local_files
+                    .keys()
+                    .filter(|path| !needed_to_download_files.contains(path))
+                    .cloned()
+                    .collect(),
+            )),
        ));
        info!("NeedsSync");
        (LocalTimelineInitStatus::NeedsSync, true)
@@ -1362,15 +1466,22 @@ fn compare_local_and_remote_timeline(
    };

    let layers_to_upload = local_files
-        .difference(remote_files)
-        .cloned()
-        .collect::<HashSet<_>>();
+        .iter()
+        .filter_map(|(local_file, metadata)| {
+            if !remote_entry.stored_files().contains_key(local_file) {
+                Some((local_file.to_owned(), metadata.to_owned()))
+            } else {
+                None
+            }
+        })
+        .collect::<HashMap<_, _>>();
+
    if !layers_to_upload.is_empty() {
        new_sync_tasks.push_back((
            sync_id,
            SyncTask::upload(LayersUpload {
                layers_to_upload,
-                uploaded_layers: HashSet::new(),
+                uploaded_layers: HashMap::new(),
                metadata: Some(local_metadata),
            }),
        ));
@@ -1391,16 +1502,22 @@ fn register_sync_status(

    let tenant_id = sync_id.tenant_id.to_string();
    let timeline_id = sync_id.timeline_id.to_string();
-    match sync_status {
-        Some(true) => {
-            IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "success"])
-        }
-        Some(false) => {
-            IMAGE_SYNC_TIME.with_label_values(&[&tenant_id, &timeline_id, sync_name, "failure"])
-        }
-        None => return,
-    }
-    .observe(secs_elapsed)
+
+    let sync_status = match sync_status {
+        Some(true) => "success",
+        Some(false) => "failure",
+        None => "abort",
+    };
+
+    IMAGE_SYNC_TIME_HISTOGRAM
+        .with_label_values(&[sync_name, sync_status])
+        .observe(secs_elapsed);
+    IMAGE_SYNC_TIME
+        .with_label_values(&[&tenant_id, &timeline_id])
+        .add(secs_elapsed);
+    IMAGE_SYNC_COUNT
+        .with_label_values(&[&tenant_id, &timeline_id, sync_name, sync_status])
+        .inc();
 }

 #[cfg(test)]
@@ -1420,11 +1537,12 @@ mod test_utils {
        let timeline_path = harness.timeline_path(&timeline_id);
        fs::create_dir_all(&timeline_path).await?;

-        let mut layers_to_upload = HashSet::with_capacity(filenames.len());
+        let mut layers_to_upload = HashMap::with_capacity(filenames.len());
        for &file in filenames {
            let file_path = timeline_path.join(file);
            fs::write(&file_path, dummy_contents(file).into_bytes()).await?;
-            layers_to_upload.insert(file_path);
+            let metadata = LayerFileMetadata::new(file_path.metadata()?.len());
+            layers_to_upload.insert(file_path, metadata);
        }

        fs::write(
@@ -1435,7 +1553,7 @@ mod test_utils {

        Ok(LayersUpload {
            layers_to_upload,
-            uploaded_layers: HashSet::new(),
+            uploaded_layers: HashMap::new(),
            metadata: Some(metadata),
        })
    }
@@ -1445,7 +1563,17 @@ mod test_utils {
    }

    pub(super) fn dummy_metadata(disk_consistent_lsn: Lsn) -> TimelineMetadata {
-        TimelineMetadata::new(disk_consistent_lsn, None, None, Lsn(0), Lsn(0), Lsn(0))
+        TimelineMetadata::new(
+            disk_consistent_lsn,
+            None,
+            None,
+            Lsn(0),
+            Lsn(0),
+            Lsn(0),
+            // Any version will do
+            // but it should be consistent with the one in the tests
+            crate::DEFAULT_PG_VERSION,
+        )
    }
 }

@@ -1480,12 +1608,13 @@ mod tests {
        assert!(sync_id_2 != sync_id_3);
        assert!(sync_id_3 != TEST_SYNC_ID);

-        let download_task = SyncTask::download(LayersDownload {
-            layers_to_skip: HashSet::from([PathBuf::from("sk")]),
-        });
+        let download_task =
+            SyncTask::download(LayersDownload::from_skipped_layers(HashSet::from([
+                PathBuf::from("sk"),
+            ])));
        let upload_task = SyncTask::upload(LayersUpload {
-            layers_to_upload: HashSet::from([PathBuf::from("up")]),
-            uploaded_layers: HashSet::from([PathBuf::from("upl")]),
+            layers_to_upload: HashMap::from([(PathBuf::from("up"), LayerFileMetadata::new(123))]),
+            uploaded_layers: HashMap::from([(PathBuf::from("upl"), LayerFileMetadata::new(123))]),
            metadata: Some(dummy_metadata(Lsn(2))),
        });
        let delete_task = SyncTask::delete(LayersDeletion {
@@ -1529,12 +1658,10 @@ mod tests {
        let sync_queue = SyncQueue::new(NonZeroUsize::new(100).unwrap());
        assert_eq!(sync_queue.len(), 0);

-        let download = LayersDownload {
-            layers_to_skip: HashSet::from([PathBuf::from("sk")]),
-        };
+        let download = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk")]));
        let upload = LayersUpload {
-            layers_to_upload: HashSet::from([PathBuf::from("up")]),
-            uploaded_layers: HashSet::from([PathBuf::from("upl")]),
+            layers_to_upload: HashMap::from([(PathBuf::from("up"), LayerFileMetadata::new(123))]),
+            uploaded_layers: HashMap::from([(PathBuf::from("upl"), LayerFileMetadata::new(123))]),
            metadata: Some(dummy_metadata(Lsn(2))),
        };
        let delete = LayersDeletion {
@@ -1582,18 +1709,10 @@ mod tests {
    #[tokio::test]
    async fn same_task_id_same_tasks_batch() {
        let sync_queue = SyncQueue::new(NonZeroUsize::new(1).unwrap());
-        let download_1 = LayersDownload {
-            layers_to_skip: HashSet::from([PathBuf::from("sk1")]),
-        };
-        let download_2 = LayersDownload {
-            layers_to_skip: HashSet::from([PathBuf::from("sk2")]),
-        };
-        let download_3 = LayersDownload {
-            layers_to_skip: HashSet::from([PathBuf::from("sk3")]),
-        };
-        let download_4 = LayersDownload {
-            layers_to_skip: HashSet::from([PathBuf::from("sk4")]),
-        };
+        let download_1 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk1")]));
+        let download_2 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk2")]));
+        let download_3 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk3")]));
+        let download_4 = LayersDownload::from_skipped_layers(HashSet::from([PathBuf::from("sk4")]));

        let sync_id_2 = TenantTimelineId {
            tenant_id: TenantId::from_array(hex!("22223344556677881122334455667788")),
@@ -1617,15 +1736,15 @@ mod tests {
            Some(SyncTaskBatch {
                download: Some(SyncData {
                    retries: 0,
-                    data: LayersDownload {
-                        layers_to_skip: {
+                    data: LayersDownload::from_skipped_layers(
+                        {
                            let mut set = HashSet::new();
                            set.extend(download_1.layers_to_skip.into_iter());
                            set.extend(download_2.layers_to_skip.into_iter());
                            set.extend(download_4.layers_to_skip.into_iter());
                            set
                        },
-                    }
+                    )
                }),
                upload: None,
                delete: None,
@@ -1641,4 +1760,148 @@ mod tests {
            "Should have one task left out of the batch"
        );
    }
+
+    mod local_and_remote_comparisons {
+        use super::*;
+
+        #[test]
+        fn ready() {
+            let mut new_sync_tasks = VecDeque::default();
+            let sync_id = TenantTimelineId::generate();
+            let local_metadata = dummy_metadata(0x02.into());
+            let local_files =
+                HashMap::from([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
+            let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
+            remote_entry
+                .add_timeline_layers([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
+
+            let (status, sync_needed) = compare_local_and_remote_timeline(
+                &mut new_sync_tasks,
+                sync_id,
+                local_metadata.clone(),
+                local_files,
+                &remote_entry,
+            );
+
+            assert_eq!(
+                status,
+                LocalTimelineInitStatus::LocallyComplete(local_metadata)
+            );
+            assert!(!sync_needed);
+
+            assert!(new_sync_tasks.is_empty(), "{:?}", new_sync_tasks);
+        }
+
+        #[test]
+        fn needs_download() {
+            let mut new_sync_tasks = VecDeque::default();
+            let sync_id = TenantTimelineId::generate();
+            let local_metadata = dummy_metadata(0x02.into());
+            let local_files = HashMap::default();
+            let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
+            remote_entry
+                .add_timeline_layers([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
+
+            let (status, sync_needed) = compare_local_and_remote_timeline(
+                &mut new_sync_tasks,
+                sync_id,
+                local_metadata,
+                local_files.clone(),
+                &remote_entry,
+            );
+
+            assert_eq!(status, LocalTimelineInitStatus::NeedsSync);
+            assert!(sync_needed);
+
+            let new_sync_tasks = new_sync_tasks.into_iter().collect::<Vec<_>>();
+
+            assert_eq!(
+                &new_sync_tasks,
+                &[(
+                    sync_id,
+                    SyncTask::download(LayersDownload::from_skipped_layers(
+                        local_files.keys().cloned().collect()
+                    ))
+                )]
+            );
+        }
+
+        #[test]
+        fn redownload_is_not_needed_on_upgrade() {
+            // originally the implementation missed the `(None, Some(_))` case in the match, and
+            // proceeded to always redownload if the remote metadata was not available.
+
+            let mut new_sync_tasks = VecDeque::default();
+            let sync_id = TenantTimelineId::generate();
+
+            let local_metadata = dummy_metadata(0x02.into());
+
+            // type system would in general allow that LayerFileMetadata would be created with
+            // file_size: None, however `LayerFileMetadata::default` is only allowed from tests,
+            // and so everywhere within the system valid LayerFileMetadata is being created, it is
+            // created through `::new`.
+            let local_files =
+                HashMap::from([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
+
+            let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
+
+            // RemoteTimeline is constructed out of an older version IndexPart, which didn't carry
+            // any metadata.
+            remote_entry
+                .add_timeline_layers([(PathBuf::from("first_file"), LayerFileMetadata::default())]);
+
+            let (status, sync_needed) = compare_local_and_remote_timeline(
+                &mut new_sync_tasks,
+                sync_id,
+                local_metadata.clone(),
+                local_files,
+                &remote_entry,
+            );
+
+            assert_eq!(
+                status,
+                LocalTimelineInitStatus::LocallyComplete(local_metadata)
+            );
+            assert!(!sync_needed);
+        }
+
+        #[test]
+        fn needs_upload() {
+            let mut new_sync_tasks = VecDeque::default();
+            let sync_id = TenantTimelineId::generate();
+            let local_metadata = dummy_metadata(0x02.into());
+            let local_files =
+                HashMap::from([(PathBuf::from("first_file"), LayerFileMetadata::new(123))]);
+            let mut remote_entry = RemoteTimeline::new(local_metadata.clone());
+            remote_entry.add_timeline_layers([]);
+
+            let (status, sync_needed) = compare_local_and_remote_timeline(
+                &mut new_sync_tasks,
+                sync_id,
+                local_metadata.clone(),
+                local_files.clone(),
+                &remote_entry,
+            );
+
+            assert_eq!(
+                status,
+                LocalTimelineInitStatus::LocallyComplete(local_metadata.clone())
+            );
+            assert!(!sync_needed);
+
+            let new_sync_tasks = new_sync_tasks.into_iter().collect::<Vec<_>>();
+
+            assert_eq!(
+                &new_sync_tasks,
+                &[(
+                    sync_id,
+                    SyncTask::upload(LayersUpload {
+                        layers_to_upload: local_files,
+                        uploaded_layers: HashMap::default(),
+                        metadata: Some(local_metadata),
+                    })
+                )]
+            );
+        }
+    }
 }
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`{{ pageserver_config \| sivel.toiletwater.to_toml }}`