Merge branch 'main' into bojan-get-page-tests

2026-01-17 10:22:56 +00:00 · 2022-04-27 13:05:27 -04:00
parent a7870d708b 29539b0561
commit be86621152
187 changed files with 5704 additions and 4343 deletions
--- a/.circleci/ansible/deploy.yaml
+++ b/.circleci/ansible/deploy.yaml
@@ -1,14 +1,14 @@
- name: Upload Zenith binaries
+- name: Upload Neon binaries
  hosts: storage
  gather_facts: False
  remote_user: admin

  tasks:

-    - name: get latest version of Zenith binaries
+    - name: get latest version of Neon binaries
      register: current_version_file
      set_fact:
-        current_version: "{{ lookup('file', '.zenith_current_version') | trim }}"
+        current_version: "{{ lookup('file', '.neon_current_version') | trim }}"
      tags:
      - pageserver
      - safekeeper
@@ -19,11 +19,11 @@
      - pageserver
      - safekeeper

-    - name: upload and extract Zenith binaries to /usr/local
+    - name: upload and extract Neon binaries to /usr/local
      ansible.builtin.unarchive:
        owner: root
        group: root
-        src: zenith_install.tar.gz
+        src: neon_install.tar.gz
        dest: /usr/local
      become: true
      tags:
--- a/.circleci/ansible/get_binaries.sh
+++ b/.circleci/ansible/get_binaries.sh
@@ -4,10 +4,10 @@ set -e

 RELEASE=${RELEASE:-false}

-# look at docker hub for latest tag fo zenith docker image
+# look at docker hub for latest tag for neon docker image
 if [ "${RELEASE}" = "true" ]; then
    echo "search latest relase tag"
-    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | tail -1)
+    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | tail -1)
    if [ -z "${VERSION}" ]; then
        echo "no any docker tags found, exiting..."
        exit 1
@@ -16,7 +16,7 @@ if [ "${RELEASE}" = "true" ]; then
    fi
 else
    echo "search latest dev tag"
-    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep -v release | tail -1)
+    VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep -v release | tail -1)
    if [ -z "${VERSION}" ]; then
        echo "no any docker tags found, exiting..."
        exit 1
@@ -28,25 +28,25 @@ fi
 echo "found ${VERSION}"

 # do initial cleanup
-rm -rf zenith_install postgres_install.tar.gz zenith_install.tar.gz .zenith_current_version
-mkdir zenith_install
+rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
+mkdir neon_install

 # retrive binaries from docker image
 echo "getting binaries from docker image"
-docker pull --quiet zenithdb/zenith:${TAG}
-ID=$(docker create zenithdb/zenith:${TAG})
+docker pull --quiet neondatabase/neon:${TAG}
+ID=$(docker create neondatabase/neon:${TAG})
 docker cp ${ID}:/data/postgres_install.tar.gz .
-tar -xzf postgres_install.tar.gz -C zenith_install
-docker cp ${ID}:/usr/local/bin/pageserver zenith_install/bin/
-docker cp ${ID}:/usr/local/bin/safekeeper zenith_install/bin/
-docker cp ${ID}:/usr/local/bin/proxy zenith_install/bin/
-docker cp ${ID}:/usr/local/bin/postgres zenith_install/bin/
+tar -xzf postgres_install.tar.gz -C neon_install
+docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
+docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
+docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
+docker cp ${ID}:/usr/local/bin/postgres neon_install/bin/
 docker rm -vf ${ID}

 # store version to file (for ansible playbooks) and create binaries tarball
-echo ${VERSION} > zenith_install/.zenith_current_version
-echo ${VERSION} > .zenith_current_version
-tar -czf zenith_install.tar.gz -C zenith_install .
+echo ${VERSION} > neon_install/.neon_current_version
+echo ${VERSION} > .neon_current_version
+tar -czf neon_install.tar.gz -C neon_install .

 # do final cleaup
-rm -rf zenith_install postgres_install.tar.gz
+rm -rf neon_install postgres_install.tar.gz
--- a/.circleci/ansible/production.hosts
+++ b/.circleci/ansible/production.hosts
@@ -14,3 +14,4 @@ safekeepers
 console_mgmt_base_url = http://console-release.local
 bucket_name           = zenith-storage-oregon
 bucket_region         = us-west-2
+etcd_endpoints        = etcd-release.local:2379
--- a/.circleci/ansible/staging.hosts
+++ b/.circleci/ansible/staging.hosts
@@ -15,3 +15,4 @@ safekeepers
 console_mgmt_base_url = http://console-staging.local
 bucket_name           = zenith-staging-storage-us-east-1
 bucket_region         = us-east-1
+etcd_endpoints        = etcd-staging.local:2379
--- a/.circleci/ansible/systemd/safekeeper.service
+++ b/.circleci/ansible/systemd/safekeeper.service
@@ -6,7 +6,7 @@ After=network.target auditd.service
 Type=simple
 User=safekeeper
 Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
-ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data
+ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }}
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
 KillSignal=SIGINT
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,18 +1,18 @@
 version: 2.1

 executors:
-  zenith-xlarge-executor:
+  neon-xlarge-executor:
    resource_class: xlarge
    docker:
      # NB: when changed, do not forget to update rust image tag in all Dockerfiles
-      - image: zimg/rust:1.56
-  zenith-executor:
+      - image: zimg/rust:1.58
+  neon-executor:
    docker:
-      - image: zimg/rust:1.56
+      - image: zimg/rust:1.58

 jobs:
  check-codestyle-rust:
-    executor: zenith-xlarge-executor
+    executor: neon-xlarge-executor
    steps:
      - checkout
      - run:
@@ -22,7 +22,7 @@ jobs:

  # A job to build postgres
  build-postgres:
-    executor: zenith-xlarge-executor
+    executor: neon-xlarge-executor
    parameters:
      build_type:
        type: enum
@@ -67,9 +67,9 @@ jobs:
          paths:
            - tmp_install

-  # A job to build zenith rust code
-  build-zenith:
-    executor: zenith-xlarge-executor
+  # A job to build Neon rust code
+  build-neon:
+    executor: neon-xlarge-executor
    parameters:
      build_type:
        type: enum
@@ -113,7 +113,7 @@ jobs:
              CARGO_FLAGS=
            elif [[ $BUILD_TYPE == "release" ]]; then
              cov_prefix=()
-              CARGO_FLAGS=--release
+              CARGO_FLAGS="--release --features profiling"
            fi

            export CARGO_INCREMENTAL=0
@@ -132,20 +132,6 @@ jobs:
            - ~/.cargo/git
            - target

-        # Run style checks
-        # has to run separately from cargo fmt section
-        # since needs to run with dependencies
-      - run:
-          name: cargo clippy
-          command: |
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
-            fi
-
-            "${cov_prefix[@]}" ./run_clippy.sh
-
        # Run rust unit tests
      - run:
          name: cargo test
@@ -223,7 +209,7 @@ jobs:
            - "*"

  check-codestyle-python:
-    executor: zenith-executor
+    executor: neon-executor
    steps:
      - checkout
      - restore_cache:
@@ -246,7 +232,7 @@ jobs:
          command: poetry run mypy .

  run-pytest:
-    executor: zenith-executor
+    executor: neon-executor
    parameters:
      # pytest args to specify the tests to run.
      #
@@ -369,7 +355,7 @@ jobs:
          when: always
          command: |
            du -sh /tmp/test_output/*
-            find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" -delete
+            find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" -delete
            du -sh /tmp/test_output/*
      - store_artifacts:
          path: /tmp/test_output
@@ -390,7 +376,7 @@ jobs:
            - "*"

  coverage-report:
-    executor: zenith-xlarge-executor
+    executor: neon-xlarge-executor
    steps:
      - attach_workspace:
          at: /tmp/zenith
@@ -420,7 +406,7 @@ jobs:
            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1

            scripts/git-upload \
-              --repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-coverage-data.git \
+              --repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/neondatabase/zenith-coverage-data.git \
              --message="Add code coverage for $COMMIT_URL" \
              copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE

@@ -437,7 +423,7 @@ jobs:
                \"target_url\": \"$REPORT_URL\"
              }"

-  # Build zenithdb/zenith:latest image and push it to Docker hub
+  # Build neondatabase/neon:latest image and push it to Docker hub
  docker-image:
    docker:
      - image: cimg/base:2021.04
@@ -451,18 +437,18 @@ jobs:
      - run:
          name: Build and push Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG=$(git log --oneline|wc -l)
            docker build \
              --pull \
              --build-arg GIT_VERSION=${CIRCLE_SHA1} \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag zenithdb/zenith:${DOCKER_TAG} --tag zenithdb/zenith:latest .
-            docker push zenithdb/zenith:${DOCKER_TAG}
-            docker push zenithdb/zenith:latest
+              --tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:latest .
+            docker push neondatabase/neon:${DOCKER_TAG}
+            docker push neondatabase/neon:latest

-  # Build zenithdb/compute-node:latest image and push it to Docker hub
+  # Build neondatabase/compute-node:latest image and push it to Docker hub
  docker-image-compute:
    docker:
      - image: cimg/base:2021.04
@@ -470,31 +456,31 @@ jobs:
      - checkout
      - setup_remote_docker:
          docker_layer_caching: true
-      # Build zenithdb/compute-tools:latest image and push it to Docker hub
+      # Build neondatabase/compute-tools:latest image and push it to Docker hub
      # TODO: this should probably also use versioned tag, not just :latest.
      # XXX: but should it? We build and use it only locally now.
      - run:
          name: Build and push compute-tools Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            docker build \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag zenithdb/compute-tools:latest -f Dockerfile.compute-tools .
-            docker push zenithdb/compute-tools:latest
+              --tag neondatabase/compute-tools:latest -f Dockerfile.compute-tools .
+            docker push neondatabase/compute-tools:latest
      - run:
          name: Init postgres submodule
          command: git submodule update --init --depth 1
      - run:
          name: Build and push compute-node Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG=$(git log --oneline|wc -l)
-            docker build --tag zenithdb/compute-node:${DOCKER_TAG} --tag zenithdb/compute-node:latest vendor/postgres
-            docker push zenithdb/compute-node:${DOCKER_TAG}
-            docker push zenithdb/compute-node:latest
+            docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:latest vendor/postgres
+            docker push neondatabase/compute-node:${DOCKER_TAG}
+            docker push neondatabase/compute-node:latest

-  # Build production zenithdb/zenith:release image and push it to Docker hub
+  # Build production neondatabase/neon:release image and push it to Docker hub
  docker-image-release:
    docker:
      - image: cimg/base:2021.04
@@ -508,18 +494,18 @@ jobs:
      - run:
          name: Build and push Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG="release-$(git log --oneline|wc -l)"
            docker build \
              --pull \
              --build-arg GIT_VERSION=${CIRCLE_SHA1} \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag zenithdb/zenith:${DOCKER_TAG} --tag zenithdb/zenith:release .
-            docker push zenithdb/zenith:${DOCKER_TAG}
-            docker push zenithdb/zenith:release
+              --tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:release .
+            docker push neondatabase/neon:${DOCKER_TAG}
+            docker push neondatabase/neon:release

-  # Build production zenithdb/compute-node:release image and push it to Docker hub
+  # Build production neondatabase/compute-node:release image and push it to Docker hub
  docker-image-compute-release:
    docker:
      - image: cimg/base:2021.04
@@ -527,29 +513,29 @@ jobs:
      - checkout
      - setup_remote_docker:
          docker_layer_caching: true
-      # Build zenithdb/compute-tools:release image and push it to Docker hub
+      # Build neondatabase/compute-tools:release image and push it to Docker hub
      # TODO: this should probably also use versioned tag, not just :latest.
      # XXX: but should it? We build and use it only locally now.
      - run:
          name: Build and push compute-tools Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            docker build \
              --build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
              --build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
-              --tag zenithdb/compute-tools:release -f Dockerfile.compute-tools .
-            docker push zenithdb/compute-tools:release
+              --tag neondatabase/compute-tools:release -f Dockerfile.compute-tools .
+            docker push neondatabase/compute-tools:release
      - run:
          name: Init postgres submodule
          command: git submodule update --init --depth 1
      - run:
          name: Build and push compute-node Docker image
          command: |
-            echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
+            echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
            DOCKER_TAG="release-$(git log --oneline|wc -l)"
-            docker build --tag zenithdb/compute-node:${DOCKER_TAG} --tag zenithdb/compute-node:release vendor/postgres
-            docker push zenithdb/compute-node:${DOCKER_TAG}
-            docker push zenithdb/compute-node:release
+            docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:release vendor/postgres
+            docker push neondatabase/compute-node:${DOCKER_TAG}
+            docker push neondatabase/compute-node:release

  deploy-staging:
    docker:
@@ -575,7 +561,7 @@ jobs:
            rm -f ssh-key ssh-key-cert.pub

            ansible-playbook deploy.yaml -i staging.hosts
-            rm -f zenith_install.tar.gz .zenith_current_version
+            rm -f neon_install.tar.gz .neon_current_version

  deploy-staging-proxy:
    docker:
@@ -625,7 +611,7 @@ jobs:
            rm -f ssh-key ssh-key-cert.pub

            ansible-playbook deploy.yaml -i production.hosts
-            rm -f zenith_install.tar.gz .zenith_current_version
+            rm -f neon_install.tar.gz .neon_current_version

  deploy-release-proxy:
    docker:
@@ -704,8 +690,8 @@ workflows:
          matrix:
            parameters:
              build_type: ["debug", "release"]
-      - build-zenith:
-          name: build-zenith-<< matrix.build_type >>
+      - build-neon:
+          name: build-neon-<< matrix.build_type >>
          matrix:
            parameters:
              build_type: ["debug", "release"]
@@ -720,7 +706,7 @@ workflows:
          test_selection: batch_pg_regress
          needs_postgres_source: true
          requires:
-            - build-zenith-<< matrix.build_type >>
+            - build-neon-<< matrix.build_type >>
      - run-pytest:
          name: other-tests-<< matrix.build_type >>
          matrix:
@@ -728,7 +714,7 @@ workflows:
              build_type: ["debug", "release"]
          test_selection: batch_others
          requires:
-            - build-zenith-<< matrix.build_type >>
+            - build-neon-<< matrix.build_type >>
      - run-pytest:
          name: benchmarks
          context: PERF_TEST_RESULT_CONNSTR
@@ -737,7 +723,7 @@ workflows:
          run_in_parallel: false
          save_perf_report: true
          requires:
-            - build-zenith-release
+            - build-neon-release
      - coverage-report:
          # Context passes credentials for gh api
          context: CI_ACCESS_TOKEN
@@ -833,6 +819,6 @@ workflows:
            # XXX: Successful build doesn't mean everything is OK, but
            # the job to be triggered takes so much time to complete (~22 min)
            # that it's better not to wait for the commented-out steps
-            - build-zenith-release
+            - build-neon-release
            # - pg_regress-tests-release
            # - other-tests-release
--- a/.circleci/helm-values/production.proxy.yaml
+++ b/.circleci/helm-values/production.proxy.yaml
@@ -1,6 +1,9 @@
 # Helm chart values for zenith-proxy.
 # This is a YAML-formatted file.

+image:
+  repository: neondatabase/neon
+
 settings:
  authEndpoint: "https://console.zenith.tech/authenticate_proxy_request/"
  uri: "https://console.zenith.tech/psql_session/"
--- a/.circleci/helm-values/staging.proxy.yaml
+++ b/.circleci/helm-values/staging.proxy.yaml
@@ -1,6 +1,9 @@
 # Helm chart values for zenith-proxy.
 # This is a YAML-formatted file.

+image:
+  repository: neondatabase/neon
+
 settings:
  authEndpoint: "https://console.stage.zenith.tech/authenticate_proxy_request/"
  uri: "https://console.stage.zenith.tech/psql_session/"
--- a/.config/hakari.toml
+++ b/.config/hakari.toml
@@ -10,6 +10,8 @@ dep-format-version = "2"
 # Hakari works much better with the new feature resolver.
 # For more about the new feature resolver, see:
 # https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#cargos-new-feature-resolver
+# Have to keep the resolver still here since hakari requires this field,
+# despite it's now the default for 2021 edition & cargo.
 resolver = "2"

 # Add triples corresponding to platforms commonly used by developers here.
--- a/.github/workflows/testing.yml
+++ b/.github/workflows/testing.yml
@@ -36,8 +36,7 @@ jobs:

      - name: Install macOs postgres dependencies
        if: matrix.os == 'macos-latest'
-        run: |
-          brew install flex bison
+        run: brew install flex bison

      - name: Set pg revision for caching
        id: pg_ver
@@ -53,8 +52,7 @@ jobs:

      - name: Build postgres
        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          make postgres
+        run: make postgres

      - name: Cache cargo deps
        id: cache_cargo
@@ -64,13 +62,10 @@ jobs:
            ~/.cargo/registry
            ~/.cargo/git
            target
-          key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
+          key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}

-      # Use `env CARGO_INCREMENTAL=0` to mitigate https://github.com/rust-lang/rust/issues/91696 for rustc 1.57.0
-      - name: Run cargo build
-        run: |
-          env CARGO_INCREMENTAL=0 cargo build --workspace --bins --examples --tests
+      - name: Run cargo clippy
+        run: ./run_clippy.sh

      - name: Run cargo test
-        run: |
-          env CARGO_INCREMENTAL=0 cargo test -- --nocapture --test-threads=1
+        run: cargo test --all --all-targets
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,22 +3,19 @@ members = [
    "compute_tools",
    "control_plane",
    "pageserver",
-    "postgres_ffi",
    "proxy",
-    "walkeeper",
+    "safekeeper",
    "workspace_hack",
    "zenith",
-    "zenith_metrics",
-    "zenith_utils",
+    "libs/*",
 ]
-resolver = "2"

 [profile.release]
 # This is useful for profiling and, to some extent, debug.
 # Besides, debug info should not affect the performance.
 debug = true

-# This is only needed for proxy's tests
-# TODO: we should probably fork tokio-postgres-rustls instead
+# This is only needed for proxy's tests.
+# TODO: we should probably fork `tokio-postgres-rustls` instead.
 [patch.crates-io]
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
--- a/15
+++ b/15
@@ -1,7 +1,5 @@
 # Build Postgres
-#
-#FROM zimg/rust:1.56 AS pg-build
-FROM zenithdb/build:buster-20220309 AS pg-build
+FROM zimg/rust:1.58 AS pg-build
 WORKDIR /pg

 USER root
@@ -11,27 +9,26 @@ COPY Makefile Makefile

 ENV BUILD_TYPE release
 RUN set -e \
-    && make -j $(nproc) -s postgres \
+    && mold -run make -j $(nproc) -s postgres \
    && rm -rf tmp_install/build \
    && tar -C tmp_install -czf /postgres_install.tar.gz .

 # Build zenith binaries
-#
-#FROM zimg/rust:1.56 AS build
-FROM zenithdb/build:buster-20220309 AS build
+FROM zimg/rust:1.58 AS build
 ARG GIT_VERSION=local

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
 ARG AWS_ACCESS_KEY_ID
 ARG AWS_SECRET_ACCESS_KEY
-ENV RUSTC_WRAPPER /usr/local/cargo/bin/cachepot

 COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
 COPY . .

 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, loosing the compilation stats.
-RUN cargo build --release && /usr/local/cargo/bin/cachepot -s
+RUN set -e \
+    && sudo -E "PATH=$PATH" mold -run cargo build --release \
+    && cachepot -s

 # Build final image
 #
--- a/Dockerfile.build
+++ b/Dockerfile.build
@@ -1,23 +0,0 @@
-FROM rust:1.56.1-slim-buster
-WORKDIR /home/circleci/project
-
-RUN set -e \
-    && apt-get update \
-    && apt-get -yq install \
-        automake \
-        libtool \
-        build-essential \
-        bison \
-        flex \
-        libreadline-dev \
-        zlib1g-dev \
-        libxml2-dev \
-        libseccomp-dev \
-        pkg-config \
-        libssl-dev \
-        clang
-
-RUN set -e \
-    && rustup component add clippy \
-    && cargo install cargo-audit \
-    && cargo install --git https://github.com/paritytech/cachepot
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,19 +1,18 @@
 # First transient image to build compute_tools binaries
 # NB: keep in sync with rust image version in .circle/config.yml
-FROM zenithdb/build:buster-20220309 AS rust-build
-
-WORKDIR /zenith
+FROM zimg/rust:1.58 AS rust-build

 ARG CACHEPOT_BUCKET=zenith-rust-cachepot
 ARG AWS_ACCESS_KEY_ID
 ARG AWS_SECRET_ACCESS_KEY
-ENV RUSTC_WRAPPER /usr/local/cargo/bin/cachepot

 COPY . .

-RUN cargo build -p compute_tools --release && /usr/local/cargo/bin/cachepot -s
+RUN set -e \
+    && sudo -E "PATH=$PATH" mold -run cargo build -p compute_tools --release \
+    && cachepot -s

 # Final image that only has one binary
 FROM debian:buster-slim

-COPY --from=rust-build /zenith/target/release/zenith_ctl /usr/local/bin/zenith_ctl
+COPY --from=rust-build /home/circleci/project/target/release/zenith_ctl /usr/local/bin/zenith_ctl
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libsec
 libssl-dev clang pkg-config libpq-dev
 ```

-[Rust] 1.56.1 or later is also required.
+[Rust] 1.58 or later is also required.

 To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.

--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -11,11 +11,11 @@ clap = "3.0"
 env_logger = "0.9"
 hyper = { version = "0.14", features = ["full"] }
 log = { version = "0.4", features = ["std", "serde"] }
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 regex = "1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 tar = "0.4"
 tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/compute_tools/src/bin/zenith_ctl.rs
+++ b/compute_tools/src/bin/zenith_ctl.rs
@@ -129,6 +129,7 @@ fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {

    handle_roles(&read_state.spec, &mut client)?;
    handle_databases(&read_state.spec, &mut client)?;
+    handle_grants(&read_state.spec, &mut client)?;
    create_writablity_check_data(&mut client)?;

    // 'Close' connection
@@ -157,7 +158,7 @@ fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {
 }

 fn main() -> Result<()> {
-    // TODO: re-use `zenith_utils::logging` later
+    // TODO: re-use `utils::logging` later
    init_logger(DEFAULT_LOG_LEVEL)?;

    // Env variable is set by `cargo`
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -132,7 +132,14 @@ impl Role {
        let mut params: String = "LOGIN".to_string();

        if let Some(pass) = &self.encrypted_password {
-            params.push_str(&format!(" PASSWORD 'md5{}'", pass));
+            // Some time ago we supported only md5 and treated all encrypted_password as md5.
+            // Now we also support SCRAM-SHA-256 and to preserve compatibility
+            // we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
+            if pass.starts_with("SCRAM-SHA-256") {
+                params.push_str(&format!(" PASSWORD '{}'", pass));
+            } else {
+                params.push_str(&format!(" PASSWORD 'md5{}'", pass));
+            }
        } else {
            params.push_str(" PASSWORD NULL");
        }
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -244,3 +244,24 @@ pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> {

    Ok(())
 }
+
+// Grant CREATE ON DATABASE to the database owner
+// to allow clients create trusted extensions.
+pub fn handle_grants(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
+    info!("cluster spec grants:");
+
+    for db in &spec.cluster.databases {
+        let dbname = &db.name;
+
+        let query: String = format!(
+            "GRANT CREATE ON DATABASE {} TO {}",
+            dbname.quote(),
+            db.owner.quote()
+        );
+        info!("grant query {}", &query);
+
+        client.execute(query.as_str(), &[])?;
+    }
+
+    Ok(())
+}
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2021"

 [dependencies]
 tar = "0.4.33"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 serde = { version = "1.0", features = ["derive"] }
 serde_with = "1.12.0"
 toml = "0.5"
@@ -18,6 +18,6 @@ url = "2.2.2"
 reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }

 pageserver = { path = "../pageserver" }
-walkeeper = { path = "../walkeeper" }
-zenith_utils = { path = "../zenith_utils" }
+safekeeper = { path = "../safekeeper" }
+utils = { path = "../libs/utils" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -11,11 +11,12 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{Context, Result};
-use zenith_utils::connstring::connection_host_port;
-use zenith_utils::lsn::Lsn;
-use zenith_utils::postgres_backend::AuthType;
-use zenith_utils::zid::ZTenantId;
-use zenith_utils::zid::ZTimelineId;
+use utils::{
+    connstring::connection_host_port,
+    lsn::Lsn,
+    postgres_backend::AuthType,
+    zid::{ZTenantId, ZTimelineId},
+};

 use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;
@@ -272,12 +273,7 @@ impl PostgresNode {
        conf.append("wal_sender_timeout", "5s");
        conf.append("listen_addresses", &self.address.ip().to_string());
        conf.append("port", &self.address.port().to_string());
-
-        // Never clean up old WAL. TODO: We should use a replication
-        // slot or something proper, to prevent the compute node
-        // from removing WAL that hasn't been streamed to the safekeeper or
-        // page server yet. (gh issue #349)
-        conf.append("wal_keep_size", "10TB");
+        conf.append("wal_keep_size", "0");

        // Configure the node to fetch pages from pageserver
        let pageserver_connstr = {
@@ -331,14 +327,14 @@ impl PostgresNode {
            // Configure the node to connect to the safekeepers
            conf.append("synchronous_standby_names", "walproposer");

-            let wal_acceptors = self
+            let safekeepers = self
                .env
                .safekeepers
                .iter()
                .map(|sk| format!("localhost:{}", sk.pg_port))
                .collect::<Vec<String>>()
                .join(",");
-            conf.append("wal_acceptors", &wal_acceptors);
+            conf.append("wal_acceptors", &safekeepers);
        } else {
            // We only use setup without safekeepers for tests,
            // and don't care about data durability on pageserver,
@@ -420,10 +416,15 @@ impl PostgresNode {
        if let Some(token) = auth_token {
            cmd.env("ZENITH_AUTH_TOKEN", token);
        }
-        let pg_ctl = cmd.status().context("pg_ctl failed")?;

-        if !pg_ctl.success() {
-            anyhow::bail!("pg_ctl failed");
+        let pg_ctl = cmd.output().context("pg_ctl failed")?;
+        if !pg_ctl.status.success() {
+            anyhow::bail!(
+                "pg_ctl failed, exit code: {}, stdout: {}, stderr: {}",
+                pg_ctl.status,
+                String::from_utf8_lossy(&pg_ctl.stdout),
+                String::from_utf8_lossy(&pg_ctl.stderr),
+            );
        }
        Ok(())
    }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -11,9 +11,11 @@ use std::env;
 use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
-use zenith_utils::auth::{encode_from_key_file, Claims, Scope};
-use zenith_utils::postgres_backend::AuthType;
-use zenith_utils::zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId};
+use utils::{
+    auth::{encode_from_key_file, Claims, Scope},
+    postgres_backend::AuthType,
+    zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
+};

 use crate::safekeeper::SafekeeperNode;

--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -13,15 +13,17 @@ use nix::unistd::Pid;
 use postgres::Config;
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
+use safekeeper::http::models::TimelineCreateRequest;
 use thiserror::Error;
-use walkeeper::http::models::TimelineCreateRequest;
-use zenith_utils::http::error::HttpErrorBody;
-use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
+use utils::{
+    connstring::connection_address,
+    http::error::HttpErrorBody,
+    zid::{ZNodeId, ZTenantId, ZTimelineId},
+};

 use crate::local_env::{LocalEnv, SafekeeperConf};
 use crate::storage::PageServerNode;
 use crate::{fill_rust_env_vars, read_pidfile};
-use zenith_utils::connstring::connection_address;

 #[derive(Error, Debug)]
 pub enum SafekeeperHttpError {
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
 use std::io::Write;
 use std::net::TcpStream;
 use std::path::PathBuf;
@@ -9,21 +10,23 @@ use anyhow::{bail, Context};
 use nix::errno::Errno;
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
-use pageserver::http::models::{TenantCreateRequest, TimelineCreateRequest};
+use pageserver::http::models::{TenantConfigRequest, TenantCreateRequest, TimelineCreateRequest};
 use pageserver::timelines::TimelineInfo;
 use postgres::{Config, NoTls};
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
-use zenith_utils::http::error::HttpErrorBody;
-use zenith_utils::lsn::Lsn;
-use zenith_utils::postgres_backend::AuthType;
-use zenith_utils::zid::{ZTenantId, ZTimelineId};
+use utils::{
+    connstring::connection_address,
+    http::error::HttpErrorBody,
+    lsn::Lsn,
+    postgres_backend::AuthType,
+    zid::{ZTenantId, ZTimelineId},
+};

 use crate::local_env::LocalEnv;
 use crate::{fill_rust_env_vars, read_pidfile};
 use pageserver::tenant_mgr::TenantInfo;
-use zenith_utils::connstring::connection_address;

 #[derive(Error, Debug)]
 pub enum PageserverHttpError {
@@ -342,10 +345,32 @@ impl PageServerNode {
    pub fn tenant_create(
        &self,
        new_tenant_id: Option<ZTenantId>,
+        settings: HashMap<&str, &str>,
    ) -> anyhow::Result<Option<ZTenantId>> {
        let tenant_id_string = self
            .http_request(Method::POST, format!("{}/tenant", self.http_base_url))
-            .json(&TenantCreateRequest { new_tenant_id })
+            .json(&TenantCreateRequest {
+                new_tenant_id,
+                checkpoint_distance: settings
+                    .get("checkpoint_distance")
+                    .map(|x| x.parse::<u64>())
+                    .transpose()?,
+                compaction_target_size: settings
+                    .get("compaction_target_size")
+                    .map(|x| x.parse::<u64>())
+                    .transpose()?,
+                compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
+                compaction_threshold: settings
+                    .get("compaction_threshold")
+                    .map(|x| x.parse::<usize>())
+                    .transpose()?,
+                gc_horizon: settings
+                    .get("gc_horizon")
+                    .map(|x| x.parse::<u64>())
+                    .transpose()?,
+                gc_period: settings.get("gc_period").map(|x| x.to_string()),
+                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
+            })
            .send()?
            .error_from_body()?
            .json::<Option<String>>()?;
@@ -362,6 +387,32 @@ impl PageServerNode {
            .transpose()
    }

+    pub fn tenant_config(&self, tenant_id: ZTenantId, settings: HashMap<&str, &str>) -> Result<()> {
+        self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))
+            .json(&TenantConfigRequest {
+                tenant_id,
+                checkpoint_distance: settings
+                    .get("checkpoint_distance")
+                    .map(|x| x.parse::<u64>().unwrap()),
+                compaction_target_size: settings
+                    .get("compaction_target_size")
+                    .map(|x| x.parse::<u64>().unwrap()),
+                compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
+                compaction_threshold: settings
+                    .get("compaction_threshold")
+                    .map(|x| x.parse::<usize>().unwrap()),
+                gc_horizon: settings
+                    .get("gc_horizon")
+                    .map(|x| x.parse::<u64>().unwrap()),
+                gc_period: settings.get("gc_period").map(|x| x.to_string()),
+                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
+            })
+            .send()?
+            .error_from_body()?;
+
+        Ok(())
+    }
+
    pub fn timeline_list(&self, tenant_id: &ZTenantId) -> anyhow::Result<Vec<TimelineInfo>> {
        let timeline_infos: Vec<TimelineInfo> = self
            .http_request(
--- a/docs/README.md
+++ b/docs/README.md
@@ -8,7 +8,7 @@
 - [multitenancy.md](multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
 - [sourcetree.md](sourcetree.md) — Overview of the source tree layeout.
 - [pageserver/README](/pageserver/README) — pageserver overview.
- [postgres_ffi/README](/postgres_ffi/README) — Postgres FFI overview.
+- [postgres_ffi/README](/libs/postgres_ffi/README) — Postgres FFI overview.
 - [test_runner/README.md](/test_runner/README.md) — tests infrastructure overview.
- [walkeeper/README](/walkeeper/README) — WAL service overview.
+- [safekeeper/README](/safekeeper/README) — WAL service overview.
 - [core_changes.md](core_changes.md) - Description of Zenith changes in Postgres core
--- a/docs/authentication.md
+++ b/docs/authentication.md
@@ -27,4 +27,4 @@ management_token = jwt.encode({"scope": "pageserverapi"}, auth_keys.priv, algori
 tenant_token = jwt.encode({"scope": "tenant", "tenant_id": ps.initial_tenant}, auth_keys.priv, algorithm="RS256")
 ```

-Utility functions to work with jwts in rust are located in zenith_utils/src/auth.rs
+Utility functions to work with jwts in rust are located in libs/utils/src/auth.rs
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -29,7 +29,7 @@ Each Branch lives in a corresponding timeline[] and has an ancestor[].

 NOTE: This is an overloaded term.

-A checkpoint record in the WAL marks a point in the WAL sequence at which it is guaranteed that all data files have been updated with all information from shared memory modified before that checkpoint; 
+A checkpoint record in the WAL marks a point in the WAL sequence at which it is guaranteed that all data files have been updated with all information from shared memory modified before that checkpoint;

 ### Checkpoint (Layered repository)

@@ -108,10 +108,10 @@ PostgreSQL LSNs and functions to monitor them:
 * `pg_current_wal_lsn()` - Returns the current write-ahead log write location.
 * `pg_current_wal_flush_lsn()` - Returns the current write-ahead log flush location.
 * `pg_last_wal_receive_lsn()` - Returns the last write-ahead log location that has been received and synced to disk by streaming replication. While streaming replication is in progress this will increase monotonically.
-* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically. 
+* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
 [source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):

-Zenith safekeeper LSNs. For more check [walkeeper/README_PROTO.md](/walkeeper/README_PROTO.md)
+Zenith safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
 * `CommitLSN`: position in WAL confirmed by quorum safekeepers.
 * `RestartLSN`: position in WAL confirmed by all safekeepers.
 * `FlushLSN`: part of WAL persisted to the disk by safekeeper.
@@ -190,7 +190,7 @@ or we do not support them in zenith yet (pg_commit_ts).
 Tenant represents a single customer, interacting with Zenith.
 Wal redo[] activity, timelines[], layers[] are managed for each tenant independently.
 One pageserver[] can serve multiple tenants at once.
-One safekeeper 
+One safekeeper

 See `docs/multitenancy.md` for more.

--- a/docs/rfcs/009-snapshot-first-storage-cli.md
+++ b/docs/rfcs/009-snapshot-first-storage-cli.md
@@ -12,7 +12,7 @@ Init empty pageserver using `initdb` in temporary directory.

 `--storage_dest=FILE_PREFIX | S3_PREFIX |...` option defines object storage type, all other parameters are passed via env variables. Inspired by WAL-G style naming : https://wal-g.readthedocs.io/STORAGES/.

-Save`storage_dest` and other parameters in config. 
+Save`storage_dest` and other parameters in config.
 Push snapshots to `storage_dest` in background.

 ```
@@ -21,7 +21,7 @@ zenith start
 ```

 #### 2. Restart pageserver (manually or crash-recovery).
-Take `storage_dest` from pageserver config, start pageserver from latest snapshot in `storage_dest`. 
+Take `storage_dest` from pageserver config, start pageserver from latest snapshot in `storage_dest`.
 Push snapshots to `storage_dest` in background.

 ```
@@ -32,7 +32,7 @@ zenith start
 Start pageserver from existing snapshot.
 Path to snapshot provided via `--snapshot_path=FILE_PREFIX | S3_PREFIX | ...`
 Do not save `snapshot_path` and `snapshot_format` in config, as it is a one-time operation.
-Save`storage_dest` parameters in config. 
+Save`storage_dest` parameters in config.
 Push snapshots to `storage_dest` in background.
 ```
 //I.e. we want to start zenith on top of existing $PGDATA and use s3 as a persistent storage.
@@ -42,15 +42,15 @@ zenith start
 How to pass credentials needed for `snapshot_path`?

 #### 4. Export.
-Manually push snapshot to `snapshot_path` which differs from `storage_dest` 
+Manually push snapshot to `snapshot_path` which differs from `storage_dest`
 Optionally set `snapshot_format`, which can be plain pgdata format or zenith format.
 ```
 zenith export --snapshot_path=FILE_PREFIX --snapshot_format=pgdata
 ```

 #### Notes and questions
- walkeeper s3_offload should use same (similar) syntax for storage. How to set it in UI?
+- safekeeper s3_offload should use same (similar) syntax for storage. How to set it in UI?
 - Why do we need `zenith init` as a separate command? Can't we init everything at first start?
 - We can think of better names for all options.
 - Export to plain postgres format will be useless, if we are not 100% compatible on page level.
-I can recall at least one such difference - PD_WAL_LOGGED flag in pages.
+I can recall at least one such difference - PD_WAL_LOGGED flag in pages.
--- a/docs/rfcs/cluster-size-limits.md
+++ b/docs/rfcs/cluster-size-limits.md
@@ -0,0 +1,79 @@
+Cluster size limits
+==================
+
+## Summary
+
+One of the resource consumption limits for free-tier users is a cluster size limit.
+
+To enforce it, we need to calculate the timeline size and check if the limit is reached before relation create/extend operations.
+If the limit is reached, the query must fail with some meaningful error/warning.
+We may want to exempt some operations from the quota to allow users free space to fit back into the limit.
+
+The stateless compute node that performs validation is separate from the storage that calculates the usage, so we need to exchange cluster size information between those components.
+
+## Motivation
+
+Limit the maximum size of a PostgreSQL instance to limit free tier users (and other tiers in the future).
+First of all, this is needed to control our free tier production costs.
+Another reason to limit resources is risk management — we haven't (fully) tested and optimized zenith for big clusters,
+so we don't want to give users access to the functionality that we don't think is ready.
+
+## Components
+
+* pageserver - calculate the size consumed by a timeline and add it to the feedback message.
+* safekeeper - pass feedback message from pageserver to compute.
+* compute - receive feedback message, enforce size limit based on GUC `zenith.max_cluster_size`.
+* console - set and update `zenith.max_cluster_size` setting
+
+## Proposed implementation
+
+First of all, it's necessary to define timeline size.
+
+The current approach is to count all data, including SLRUs. (not including WAL)
+Here we think of it as a physical disk underneath the Postgres cluster.
+This is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.
+
+Alternatively, we could count only relation data. As in pg_database_size().
+This approach is somewhat more user-friendly because it is the data that is really affected by the user.
+On the other hand, it puts us in a weaker position than other services, i.e., RDS.
+We will need to refactor the timeline_size counter or add another counter to implement it. 
+
+Timeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.
+Then this size should be reported to compute node.
+
+`current_timeline_size` value is included in the walreceiver's custom feedback message: `ZenithFeedback.`
+
+(PR about protocol changes https://github.com/zenithdb/zenith/pull/1037).
+
+This message is received by the safekeeper and propagated to compute node as a part of `AppendResponse`.
+
+Finally, when compute node receives the `current_timeline_size` from safekeeper (or from pageserver directly), it updates the global variable.
+
+And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > zenith.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
+(see Postgres error codes [https://www.postgresql.org/docs/devel/errcodes-appendix.html](https://www.postgresql.org/docs/devel/errcodes-appendix.html))
+
+TODO:
+We can allow autovacuum processes to bypass this check, simply checking `IsAutoVacuumWorkerProcess()`.
+It would be nice to allow manual VACUUM and VACUUM FULL to bypass the check, but it's uneasy to distinguish these operations at the low level.
+See issues https://github.com/neondatabase/neon/issues/1245
+https://github.com/zenithdb/zenith/issues/1445
+
+TODO:
+We should warn users if the limit is soon to be reached.
+
+### **Reliability, failure modes and corner cases**
+
+1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.
+    
+    If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.
+    
+    So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?
+    
+    Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.
+
+
+### **Security implications**
+
+We treat compute as an untrusted component. That's why we try to isolate it with secure container runtime or a VM.
+Malicious users may change the `zenith.max_cluster_size`, so we need an extra size limit check.
+To cover this case, we also monitor the compute node size in the console.
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -156,6 +156,9 @@ access_key_id = 'SOMEKEYAAAAASADSAH*#'

 # Secret access key to connect to the bucket ("password" part of the credentials)
 secret_access_key = 'SOMEsEcReTsd292v'
+
+# S3 API query limit to avoid getting errors/throttling from AWS.
+concurrency_limit = 100
 ```

 ###### General remote storage configuration
@@ -167,8 +170,8 @@ Besides, there are parameters common for all types of remote storage that can be

 ```toml
 [remote_storage]
-# Max number of concurrent connections to open for uploading to or downloading from the remote storage.
-max_concurrent_sync = 100
+# Max number of concurrent timeline synchronized (layers uploaded or downloaded) with the remote storage at the same time.
+max_concurrent_timelines_sync = 50

 # Max number of errors a single task can have before it's considered failed and not attempted to run anymore.
 max_sync_errors = 10
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -30,11 +30,6 @@ The pageserver has a few different duties:

 For more detailed info, see `/pageserver/README`

-`/postgres_ffi`:
-
-Utility functions for interacting with PostgreSQL file formats.
-Misc constants, copied from PostgreSQL headers.
-
 `/proxy`:

 Postgres protocol proxy/router.
@@ -57,12 +52,12 @@ PostgreSQL extension that implements storage manager API and network communicati

 PostgreSQL extension that contains functions needed for testing and debugging.

-`/walkeeper`:
+`/safekeeper`:

 The zenith WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
 It acts as a holding area and redistribution center for recently generated WAL.

-For more detailed info, see `/walkeeper/README`
+For more detailed info, see `/safekeeper/README`

 `/workspace_hack`:
 The workspace_hack crate exists only to pin down some dependencies.
@@ -74,14 +69,21 @@ We use [cargo-hakari](https://crates.io/crates/cargo-hakari) for automation.
 Main entry point for the 'zenith' CLI utility.
 TODO: Doesn't it belong to control_plane?

-`/zenith_metrics`:
+`/libs`:
+Unites granular neon helper crates under the hood.

+`/libs/postgres_ffi`:
+
+Utility functions for interacting with PostgreSQL file formats.
+Misc constants, copied from PostgreSQL headers.
+
+`/libs/utils`:
+Generic helpers that are shared between other crates in this repository.
+A subject for future modularization.
+
+`/libs/metrics`:
 Helpers for exposing Prometheus metrics from the server.

-`/zenith_utils`:
-
-Helpers that are shared between other crates in this repository.
-
 ## Using Python
 Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
 so manual installation of dependencies is not recommended.
--- a/zenith_metrics/Cargo.toml
+++ b/zenith_metrics/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "zenith_metrics"
+name = "metrics"
 version = "0.1.0"
 edition = "2021"

@@ -8,4 +8,4 @@ prometheus = {version = "0.13", default_features=false} # removes protobuf depen
 libc = "0.2"
 lazy_static = "1.4"
 once_cell = "1.8.0"
-workspace_hack = { version = "0.1", path = "../workspace_hack" }
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/zenith_metrics/src/lib.rs
+++ b/zenith_metrics/src/lib.rs
--- a/zenith_metrics/src/wrappers.rs
+++ b/zenith_metrics/src/wrappers.rs
@@ -8,8 +8,8 @@ use std::io::{Read, Result, Write};
 ///
 /// ```
 /// # use std::io::{Result, Read};
-/// # use zenith_metrics::{register_int_counter, IntCounter};
-/// # use zenith_metrics::CountedReader;
+/// # use metrics::{register_int_counter, IntCounter};
+/// # use metrics::CountedReader;
 /// #
 /// # lazy_static::lazy_static! {
 /// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
@@ -83,8 +83,8 @@ impl<T: Read> Read for CountedReader<'_, T> {
 ///
 /// ```
 /// # use std::io::{Result, Write};
-/// # use zenith_metrics::{register_int_counter, IntCounter};
-/// # use zenith_metrics::CountedWriter;
+/// # use metrics::{register_int_counter, IntCounter};
+/// # use metrics::CountedWriter;
 /// #
 /// # lazy_static::lazy_static! {
 /// #     static ref INT_COUNTER: IntCounter = register_int_counter!(
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -17,8 +17,8 @@ log = "0.4.14"
 memoffset = "0.6.2"
 thiserror = "1.0"
 serde = { version = "1.0", features = ["derive"] }
-zenith_utils = { path = "../zenith_utils" }
-workspace_hack = { version = "0.1", path = "../workspace_hack" }
+utils = { path = "../utils" }
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }

 [build-dependencies]
 bindgen = "0.59.1"
--- a/libs/postgres_ffi/README
+++ b/libs/postgres_ffi/README
--- a/libs/postgres_ffi/build.rs
+++ b/libs/postgres_ffi/build.rs
@@ -88,8 +88,8 @@ fn main() {
        // 'pg_config --includedir-server' would perhaps be the more proper way to find it,
        // but this will do for now.
        //
-        .clang_arg("-I../tmp_install/include/server")
-        .clang_arg("-I../tmp_install/include/postgresql/server")
+        .clang_arg("-I../../tmp_install/include/server")
+        .clang_arg("-I../../tmp_install/include/postgresql/server")
        //
        // Finish the builder and generate the bindings.
        //
--- a/libs/postgres_ffi/pg_control_ffi.h
+++ b/libs/postgres_ffi/pg_control_ffi.h
--- a/libs/postgres_ffi/samples/pg_hba.conf
+++ b/libs/postgres_ffi/samples/pg_hba.conf
--- a/libs/postgres_ffi/src/controlfile_utils.rs
+++ b/libs/postgres_ffi/src/controlfile_utils.rs
@@ -43,7 +43,7 @@ impl ControlFileData {
    /// Interpret a slice of bytes as a Postgres control file.
    ///
    pub fn decode(buf: &[u8]) -> Result<ControlFileData> {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;

        // Check that the slice has the expected size. The control file is
        // padded with zeros up to a 512 byte sector size, so accept a
@@ -77,7 +77,7 @@ impl ControlFileData {
    ///
    /// The CRC is recomputed to match the contents of the fields.
    pub fn encode(&self) -> Bytes {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;

        // Serialize into a new buffer.
        let b = self.ser().unwrap();
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
--- a/libs/postgres_ffi/src/nonrelfile_utils.rs
+++ b/libs/postgres_ffi/src/nonrelfile_utils.rs
--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
--- a/libs/postgres_ffi/src/relfile_utils.rs
+++ b/libs/postgres_ffi/src/relfile_utils.rs
--- a/libs/postgres_ffi/src/waldecoder.rs
+++ b/libs/postgres_ffi/src/waldecoder.rs
@@ -4,7 +4,7 @@
 //! This understands the WAL page and record format, enough to figure out where the WAL record
 //! boundaries are, and to reassemble WAL records that cross page boundaries.
 //!
-//! This functionality is needed by both the pageserver and the walkeepers. The pageserver needs
+//! This functionality is needed by both the pageserver and the safekeepers. The pageserver needs
 //! to look deeper into the WAL records to also understand which blocks they modify, the code
 //! for that is in pageserver/src/walrecord.rs
 //!
@@ -18,7 +18,7 @@ use crc32c::*;
 use log::*;
 use std::cmp::min;
 use thiserror::Error;
-use zenith_utils::lsn::Lsn;
+use utils::lsn::Lsn;

 pub struct WalStreamDecoder {
    lsn: Lsn,
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -28,7 +28,7 @@ use std::io::prelude::*;
 use std::io::SeekFrom;
 use std::path::{Path, PathBuf};
 use std::time::SystemTime;
-use zenith_utils::lsn::Lsn;
+use utils::lsn::Lsn;

 pub const XLOG_FNAME_LEN: usize = 24;
 pub const XLOG_BLCKSZ: usize = 8192;
@@ -351,17 +351,17 @@ pub fn main() {

 impl XLogRecord {
    pub fn from_slice(buf: &[u8]) -> XLogRecord {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;
        XLogRecord::des(buf).unwrap()
    }

    pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogRecord {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;
        XLogRecord::des_from(&mut buf.reader()).unwrap()
    }

    pub fn encode(&self) -> Bytes {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;
        self.ser().unwrap().into()
    }

@@ -373,19 +373,19 @@ impl XLogRecord {

 impl XLogPageHeaderData {
    pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogPageHeaderData {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;
        XLogPageHeaderData::des_from(&mut buf.reader()).unwrap()
    }
 }

 impl XLogLongPageHeaderData {
    pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogLongPageHeaderData {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;
        XLogLongPageHeaderData::des_from(&mut buf.reader()).unwrap()
    }

    pub fn encode(&self) -> Bytes {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;
        self.ser().unwrap().into()
    }
 }
@@ -394,12 +394,12 @@ pub const SIZEOF_CHECKPOINT: usize = std::mem::size_of::<CheckPoint>();

 impl CheckPoint {
    pub fn encode(&self) -> Bytes {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;
        self.ser().unwrap().into()
    }

    pub fn decode(buf: &[u8]) -> Result<CheckPoint, anyhow::Error> {
-        use zenith_utils::bin_ser::LeSer;
+        use utils::bin_ser::LeSer;
        Ok(CheckPoint::des(buf)?)
    }

@@ -477,7 +477,9 @@ mod tests {
    #[test]
    pub fn test_find_end_of_wal() {
        // 1. Run initdb to generate some WAL
-        let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("..");
+        let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("..")
+            .join("..");
        let data_dir = top_path.join("test_output/test_find_end_of_wal");
        let initdb_path = top_path.join("tmp_install/bin/initdb");
        let lib_path = top_path.join("tmp_install/lib");
--- a/zenith_utils/Cargo.toml
+++ b/zenith_utils/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "zenith_utils"
+name = "utils"
 version = "0.1.0"
 edition = "2021"

@@ -10,8 +10,8 @@ bytes = "1.0.1"
 hyper = { version = "0.14.7", features = ["full"] }
 lazy_static = "1.4.0"
 pin-project-lite = "0.2.7"
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
-postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 routerify = "3"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
@@ -22,23 +22,23 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 nix = "0.23.0"
 signal-hook = "0.3.10"
 rand = "0.8.3"
-jsonwebtoken = "7"
+jsonwebtoken = "8"
 hex = { version = "0.4.3", features = ["serde"] }
-rustls = "0.19.1"
-rustls-split = "0.2.1"
+rustls = "0.20.2"
+rustls-split = "0.3.0"
 git-version = "0.3.5"
 serde_with = "1.12.0"

-zenith_metrics = { path = "../zenith_metrics" }
-workspace_hack = { version = "0.1", path = "../workspace_hack" }
+metrics = { path = "../metrics" }
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }

 [dev-dependencies]
 byteorder = "1.4.3"
 bytes = "1.0.1"
 hex-literal = "0.3"
 tempfile = "3.2"
-webpki = "0.21"
 criterion = "0.3"
+rustls-pemfile = "0.2.1"

 [[bench]]
 name = "benchmarks"
--- a/zenith_utils/benches/benchmarks.rs
+++ b/zenith_utils/benches/benchmarks.rs
@@ -1,7 +1,7 @@
 #![allow(unused)]

 use criterion::{criterion_group, criterion_main, Criterion};
-use zenith_utils::zid;
+use utils::zid;

 pub fn bench_zid_stringify(c: &mut Criterion) {
    // Can only use public methods.
--- a/zenith_utils/build.rs
+++ b/zenith_utils/build.rs
--- a/zenith_utils/scripts/restore_from_wal.sh
+++ b/zenith_utils/scripts/restore_from_wal.sh
@@ -1,10 +1,11 @@
+#!/bin/bash
 PG_BIN=$1
 WAL_PATH=$2
 DATA_DIR=$3
 PORT=$4
 SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
 rm -fr $DATA_DIR
-env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -D $DATA_DIR --sysid=$SYSID
+env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
 echo port=$PORT >> $DATA_DIR/postgresql.conf
 REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
 declare -i WAL_SIZE=$REDO_POS+114
--- a/zenith_utils/scripts/restore_from_wal_archive.sh
+++ b/zenith_utils/scripts/restore_from_wal_archive.sh
--- a/zenith_utils/src/accum.rs
+++ b/zenith_utils/src/accum.rs
@@ -5,7 +5,7 @@
 /// For example, to calculate the smallest value among some integers:
 ///
 /// ```
-/// use zenith_utils::accum::Accum;
+/// use utils::accum::Accum;
 ///
 /// let values = [1, 2, 3];
 ///
--- a/zenith_utils/src/auth.rs
+++ b/zenith_utils/src/auth.rs
@@ -1,8 +1,6 @@
 // For details about authentication see docs/authentication.md
-// TODO there are two issues for our use case in jsonwebtoken library which will be resolved in next release
-// The first one is that there is no way to disable expiration claim, but it can be excluded from validation, so use this as a workaround for now.
-// Relevant issue: https://github.com/Keats/jsonwebtoken/issues/190
-// The second one is that we wanted to use ed25519 keys, but they are also not supported until next version. So we go with RSA keys for now.
+//
+// TODO: use ed25519 keys
 // Relevant issue: https://github.com/Keats/jsonwebtoken/issues/162

 use serde;
@@ -59,19 +57,19 @@ pub fn check_permission(claims: &Claims, tenantid: Option<ZTenantId>) -> Result<
 }

 pub struct JwtAuth {
-    decoding_key: DecodingKey<'static>,
+    decoding_key: DecodingKey,
    validation: Validation,
 }

 impl JwtAuth {
-    pub fn new(decoding_key: DecodingKey<'_>) -> Self {
+    pub fn new(decoding_key: DecodingKey) -> Self {
+        let mut validation = Validation::new(JWT_ALGORITHM);
+        // The default 'required_spec_claims' is 'exp'. But we don't want to require
+        // expiration.
+        validation.required_spec_claims = [].into();
        Self {
-            decoding_key: decoding_key.into_static(),
-            validation: Validation {
-                algorithms: vec![JWT_ALGORITHM],
-                validate_exp: false,
-                ..Default::default()
-            },
+            decoding_key,
+            validation,
        }
    }

--- a/zenith_utils/src/bin_ser.rs
+++ b/zenith_utils/src/bin_ser.rs
--- a/zenith_utils/src/connstring.rs
+++ b/zenith_utils/src/connstring.rs
--- a/zenith_utils/src/crashsafe_dir.rs
+++ b/zenith_utils/src/crashsafe_dir.rs
--- a/zenith_utils/src/http/endpoint.rs
+++ b/zenith_utils/src/http/endpoint.rs
@@ -5,12 +5,11 @@ use anyhow::anyhow;
 use hyper::header::AUTHORIZATION;
 use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
 use lazy_static::lazy_static;
+use metrics::{new_common_metric_name, register_int_counter, Encoder, IntCounter, TextEncoder};
 use routerify::ext::RequestExt;
 use routerify::RequestInfo;
 use routerify::{Middleware, Router, RouterBuilder, RouterService};
 use tracing::info;
-use zenith_metrics::{new_common_metric_name, register_int_counter, IntCounter};
-use zenith_metrics::{Encoder, TextEncoder};

 use std::future::Future;
 use std::net::TcpListener;
@@ -36,7 +35,7 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
    let mut buffer = vec![];
    let encoder = TextEncoder::new();

-    let metrics = zenith_metrics::gather();
+    let metrics = metrics::gather();
    encoder.encode(&metrics, &mut buffer).unwrap();

    let response = Response::builder()
--- a/zenith_utils/src/http/error.rs
+++ b/zenith_utils/src/http/error.rs
--- a/zenith_utils/src/http/json.rs
+++ b/zenith_utils/src/http/json.rs
--- a/zenith_utils/src/http/mod.rs
+++ b/zenith_utils/src/http/mod.rs
--- a/zenith_utils/src/http/request.rs
+++ b/zenith_utils/src/http/request.rs
--- a/zenith_utils/src/lib.rs
+++ b/zenith_utils/src/lib.rs
@@ -1,4 +1,4 @@
-//! zenith_utils is intended to be a place to put code that is shared
+//! `utils` is intended to be a place to put code that is shared
 //! between other crates in this repository.

 #![allow(clippy::manual_range_contains)]
@@ -70,7 +70,7 @@ pub mod signals;
 // So the build script will be run only when GIT_VERSION envvar has changed.
 //
 // Why not to use buildscript to get git commit sha directly without procmacro from different crate?
-// Caching and workspaces complicates that. In case zenith_utils is not
+// Caching and workspaces complicates that. In case `utils` is not
 // recompiled due to caching then version may become outdated.
 // git_version crate handles that case by introducing a dependency on .git internals via include_bytes! macro,
 // so if we changed the index state git_version will pick that up and rerun the macro.
--- a/zenith_utils/src/logging.rs
+++ b/zenith_utils/src/logging.rs
--- a/zenith_utils/src/lsn.rs
+++ b/zenith_utils/src/lsn.rs
--- a/zenith_utils/src/nonblock.rs
+++ b/zenith_utils/src/nonblock.rs
--- a/zenith_utils/src/postgres_backend.rs
+++ b/zenith_utils/src/postgres_backend.rs
@@ -304,8 +304,8 @@ impl PostgresBackend {
    pub fn start_tls(&mut self) -> anyhow::Result<()> {
        match self.stream.take() {
            Some(Stream::Bidirectional(bidi_stream)) => {
-                let session = rustls::ServerSession::new(&self.tls_config.clone().unwrap());
-                self.stream = Some(Stream::Bidirectional(bidi_stream.start_tls(session)?));
+                let conn = rustls::ServerConnection::new(self.tls_config.clone().unwrap())?;
+                self.stream = Some(Stream::Bidirectional(bidi_stream.start_tls(conn)?));
                Ok(())
            }
            stream => {
--- a/zenith_utils/src/pq_proto.rs
+++ b/zenith_utils/src/pq_proto.rs
@@ -100,6 +100,21 @@ pub struct FeExecuteMessage {
 #[derive(Debug)]
 pub struct FeCloseMessage {}

+/// Retry a read on EINTR
+///
+/// This runs the enclosed expression, and if it returns
+/// Err(io::ErrorKind::Interrupted), retries it.
+macro_rules! retry_read {
+    ( $x:expr ) => {
+        loop {
+            match $x {
+                Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
+                res => break res,
+            }
+        }
+    };
+}
+
 impl FeMessage {
    /// Read one message from the stream.
    /// This function returns `Ok(None)` in case of EOF.
@@ -107,7 +122,7 @@ impl FeMessage {
    ///
    /// ```
    /// # use std::io;
-    /// # use zenith_utils::pq_proto::FeMessage;
+    /// # use utils::pq_proto::FeMessage;
    /// #
    /// # fn process_message(msg: FeMessage) -> anyhow::Result<()> {
    /// #     Ok(())
@@ -141,12 +156,12 @@ impl FeMessage {
            // Each libpq message begins with a message type byte, followed by message length
            // If the client closes the connection, return None. But if the client closes the
            // connection in the middle of a message, we will return an error.
-            let tag = match stream.read_u8().await {
+            let tag = match retry_read!(stream.read_u8().await) {
                Ok(b) => b,
                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
                Err(e) => return Err(e.into()),
            };
-            let len = stream.read_u32().await?;
+            let len = retry_read!(stream.read_u32().await)?;

            // The message length includes itself, so it better be at least 4
            let bodylen = len
@@ -207,7 +222,7 @@ impl FeStartupPacket {
            // reading 4 bytes, to be precise), return None to indicate that the connection
            // was closed. This matches the PostgreSQL server's behavior, which avoids noise
            // in the log if the client opens connection but closes it immediately.
-            let len = match stream.read_u32().await {
+            let len = match retry_read!(stream.read_u32().await) {
                Ok(len) => len as usize,
                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
                Err(e) => return Err(e.into()),
@@ -217,7 +232,7 @@ impl FeStartupPacket {
                bail!("invalid message length");
            }

-            let request_code = stream.read_u32().await?;
+            let request_code = retry_read!(stream.read_u32().await)?;

            // the rest of startup packet are params
            let params_len = len - 8;
--- a/zenith_utils/src/seqwait.rs
+++ b/zenith_utils/src/seqwait.rs
--- a/zenith_utils/src/seqwait_async.rs
+++ b/zenith_utils/src/seqwait_async.rs
--- a/zenith_utils/src/shutdown.rs
+++ b/zenith_utils/src/shutdown.rs
--- a/zenith_utils/src/signals.rs
+++ b/zenith_utils/src/signals.rs
--- a/zenith_utils/src/sock_split.rs
+++ b/zenith_utils/src/sock_split.rs
@@ -4,7 +4,7 @@ use std::{
    sync::Arc,
 };

-use rustls::Session;
+use rustls::Connection;

 /// Wrapper supporting reads of a shared TcpStream.
 pub struct ArcTcpRead(Arc<TcpStream>);
@@ -56,7 +56,7 @@ impl BufStream {

 pub enum ReadStream {
    Tcp(BufReader<ArcTcpRead>),
-    Tls(rustls_split::ReadHalf<rustls::ServerSession>),
+    Tls(rustls_split::ReadHalf),
 }

 impl io::Read for ReadStream {
@@ -79,7 +79,7 @@ impl ReadStream {

 pub enum WriteStream {
    Tcp(Arc<TcpStream>),
-    Tls(rustls_split::WriteHalf<rustls::ServerSession>),
+    Tls(rustls_split::WriteHalf),
 }

 impl WriteStream {
@@ -107,11 +107,11 @@ impl io::Write for WriteStream {
    }
 }

-type TlsStream<T> = rustls::StreamOwned<rustls::ServerSession, T>;
+type TlsStream<T> = rustls::StreamOwned<rustls::ServerConnection, T>;

 pub enum BidiStream {
    Tcp(BufStream),
-    /// This variant is boxed, because [`rustls::ServerSession`] is quite larger than [`BufStream`].
+    /// This variant is boxed, because [`rustls::ServerConnection`] is quite larger than [`BufStream`].
    Tls(Box<TlsStream<BufStream>>),
 }

@@ -127,7 +127,7 @@ impl BidiStream {
                if how == Shutdown::Read {
                    tls_boxed.sock.get_ref().shutdown(how)
                } else {
-                    tls_boxed.sess.send_close_notify();
+                    tls_boxed.conn.send_close_notify();
                    let res = tls_boxed.flush();
                    tls_boxed.sock.get_ref().shutdown(how)?;
                    res
@@ -154,19 +154,23 @@ impl BidiStream {
                // TODO would be nice to avoid the Arc here
                let socket = Arc::try_unwrap(reader.into_inner().0).unwrap();

-                let (read_half, write_half) =
-                    rustls_split::split(socket, tls_boxed.sess, read_buf_cfg, write_buf_cfg);
+                let (read_half, write_half) = rustls_split::split(
+                    socket,
+                    Connection::Server(tls_boxed.conn),
+                    read_buf_cfg,
+                    write_buf_cfg,
+                );
                (ReadStream::Tls(read_half), WriteStream::Tls(write_half))
            }
        }
    }

-    pub fn start_tls(self, mut session: rustls::ServerSession) -> io::Result<Self> {
+    pub fn start_tls(self, mut conn: rustls::ServerConnection) -> io::Result<Self> {
        match self {
            Self::Tcp(mut stream) => {
-                session.complete_io(&mut stream)?;
-                assert!(!session.is_handshaking());
-                Ok(Self::Tls(Box::new(TlsStream::new(session, stream))))
+                conn.complete_io(&mut stream)?;
+                assert!(!conn.is_handshaking());
+                Ok(Self::Tls(Box::new(TlsStream::new(conn, stream))))
            }
            Self::Tls { .. } => Err(io::Error::new(
                io::ErrorKind::InvalidInput,
--- a/zenith_utils/src/sync.rs
+++ b/zenith_utils/src/sync.rs
@@ -29,7 +29,7 @@ impl<S, T: Future> SyncFuture<S, T> {
    /// Example:
    ///
    /// ```
-    /// # use zenith_utils::sync::SyncFuture;
+    /// # use utils::sync::SyncFuture;
    /// # use std::future::Future;
    /// # use tokio::io::AsyncReadExt;
    /// #
--- a/zenith_utils/src/tcp_listener.rs
+++ b/zenith_utils/src/tcp_listener.rs
--- a/zenith_utils/src/vec_map.rs
+++ b/zenith_utils/src/vec_map.rs
--- a/zenith_utils/src/zid.rs
+++ b/zenith_utils/src/zid.rs
--- a/zenith_utils/tests/bin_ser_test.rs
+++ b/zenith_utils/tests/bin_ser_test.rs
@@ -2,7 +2,7 @@ use bytes::{Buf, BytesMut};
 use hex_literal::hex;
 use serde::Deserialize;
 use std::io::Read;
-use zenith_utils::bin_ser::LeSer;
+use utils::bin_ser::LeSer;

 #[derive(Debug, PartialEq, Deserialize)]
 pub struct HeaderData {
--- a/zenith_utils/tests/cert.pem
+++ b/zenith_utils/tests/cert.pem
--- a/zenith_utils/tests/key.pem
+++ b/zenith_utils/tests/key.pem
--- a/zenith_utils/tests/ssl_test.rs
+++ b/zenith_utils/tests/ssl_test.rs
@@ -8,9 +8,8 @@ use std::{
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use lazy_static::lazy_static;
-use rustls::Session;

-use zenith_utils::postgres_backend::{AuthType, Handler, PostgresBackend};
+use utils::postgres_backend::{AuthType, Handler, PostgresBackend};

 fn make_tcp_pair() -> (TcpStream, TcpStream) {
    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
@@ -23,11 +22,11 @@ fn make_tcp_pair() -> (TcpStream, TcpStream) {
 lazy_static! {
    static ref KEY: rustls::PrivateKey = {
        let mut cursor = Cursor::new(include_bytes!("key.pem"));
-        rustls::internal::pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone()
+        rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
    };
    static ref CERT: rustls::Certificate = {
        let mut cursor = Cursor::new(include_bytes!("cert.pem"));
-        rustls::internal::pemfile::certs(&mut cursor).unwrap()[0].clone()
+        rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
    };
 }

@@ -45,17 +44,23 @@ fn ssl() {
        let ssl_response = client_sock.read_u8().unwrap();
        assert_eq!(b'S', ssl_response);

-        let mut cfg = rustls::ClientConfig::new();
-        cfg.root_store.add(&CERT).unwrap();
+        let cfg = rustls::ClientConfig::builder()
+            .with_safe_defaults()
+            .with_root_certificates({
+                let mut store = rustls::RootCertStore::empty();
+                store.add(&CERT).unwrap();
+                store
+            })
+            .with_no_client_auth();
        let client_config = Arc::new(cfg);

-        let dns_name = webpki::DNSNameRef::try_from_ascii_str("localhost").unwrap();
-        let mut session = rustls::ClientSession::new(&client_config, dns_name);
+        let dns_name = "localhost".try_into().unwrap();
+        let mut conn = rustls::ClientConnection::new(client_config, dns_name).unwrap();

-        session.complete_io(&mut client_sock).unwrap();
-        assert!(!session.is_handshaking());
+        conn.complete_io(&mut client_sock).unwrap();
+        assert!(!conn.is_handshaking());

-        let mut stream = rustls::Stream::new(&mut session, &mut client_sock);
+        let mut stream = rustls::Stream::new(&mut conn, &mut client_sock);

        // StartupMessage
        stream.write_u32::<BigEndian>(9).unwrap();
@@ -105,8 +110,10 @@ fn ssl() {
    }
    let mut handler = TestHandler { got_query: false };

-    let mut cfg = rustls::ServerConfig::new(rustls::NoClientAuth::new());
-    cfg.set_single_cert(vec![CERT.clone()], KEY.clone())
+    let cfg = rustls::ServerConfig::builder()
+        .with_safe_defaults()
+        .with_no_client_auth()
+        .with_single_cert(vec![CERT.clone()], KEY.clone())
        .unwrap();
    let tls_config = Some(Arc::new(cfg));

@@ -209,8 +216,10 @@ fn server_forces_ssl() {
    }
    let mut handler = TestHandler;

-    let mut cfg = rustls::ServerConfig::new(rustls::NoClientAuth::new());
-    cfg.set_single_cert(vec![CERT.clone()], KEY.clone())
+    let cfg = rustls::ServerConfig::builder()
+        .with_safe_defaults()
+        .with_no_client_auth()
+        .with_single_cert(vec![CERT.clone()], KEY.clone())
        .unwrap();
    let tls_config = Some(Arc::new(cfg));

--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -3,6 +3,10 @@ name = "pageserver"
 version = "0.1.0"
 edition = "2021"

+[features]
+default = []
+profiling = ["pprof"]
+
 [dependencies]
 chrono = "0.4.19"
 rand = "0.8.3"
@@ -19,10 +23,10 @@ clap = { version = "3.1.8", features = ["derive"] }
 daemonize = "0.4.1"
 tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
 tokio-util = { version = "0.7", features = ["io"] }
-postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
-postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
-tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
+postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
+tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 tokio-stream = "0.1.8"
 anyhow = { version = "1.0", features = ["backtrace"] }
 crc32c = "0.6.0"
@@ -33,6 +37,8 @@ serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_with = "1.12.0"

+pprof = { git = "https://github.com/neondatabase/pprof-rs.git", branch = "wallclock-profiling", features = ["flamegraph"], optional = true }
+
 toml_edit = { version = "0.13", features = ["easy"] }
 scopeguard = "1.1.0"
 const_format = "0.2.21"
@@ -47,11 +53,10 @@ fail = "0.5.0"
 rusoto_core = "0.47"
 rusoto_s3 = "0.47"
 async-trait = "0.1"
-async-compression = {version = "0.3", features = ["zstd", "tokio"]}

-postgres_ffi = { path = "../postgres_ffi" }
-zenith_metrics = { path = "../zenith_metrics" }
-zenith_utils = { path = "../zenith_utils" }
+postgres_ffi = { path = "../libs/postgres_ffi" }
+metrics = { path = "../libs/metrics" }
+utils = { path = "../libs/utils" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }

 [dev-dependencies]
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -12,20 +12,20 @@
 //!
 use anyhow::{ensure, Context, Result};
 use bytes::{BufMut, BytesMut};
-use log::*;
 use std::fmt::Write as FmtWrite;
 use std::io;
 use std::io::Write;
 use std::sync::Arc;
 use std::time::SystemTime;
 use tar::{Builder, EntryType, Header};
+use tracing::*;

 use crate::reltag::SlruKind;
 use crate::repository::Timeline;
 use crate::DatadirTimelineImpl;
 use postgres_ffi::xlog_utils::*;
 use postgres_ffi::*;
-use zenith_utils::lsn::Lsn;
+use utils::lsn::Lsn;

 /// This is short-living object only for the time of tarball creation,
 /// created mostly to avoid passing a lot of parameters between various functions
--- a/pageserver/src/bin/dump_layerfile.rs
+++ b/pageserver/src/bin/dump_layerfile.rs
@@ -7,7 +7,7 @@ use pageserver::layered_repository::dump_layerfile_from_path;
 use pageserver::page_cache;
 use pageserver::virtual_file;
 use std::path::PathBuf;
-use zenith_utils::GIT_VERSION;
+use utils::GIT_VERSION;

 fn main() -> Result<()> {
    let arg_matches = Command::new("Zenith dump_layerfile utility")
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -2,14 +2,6 @@

 use std::{env, path::Path, str::FromStr};
 use tracing::*;
-use zenith_utils::{
-    auth::JwtAuth,
-    logging,
-    postgres_backend::AuthType,
-    tcp_listener,
-    zid::{ZTenantId, ZTimelineId},
-    GIT_VERSION,
-};

 use anyhow::{bail, Context, Result};

@@ -18,22 +10,34 @@ use daemonize::Daemonize;

 use pageserver::{
    config::{defaults::*, PageServerConf},
-    http, page_cache, page_service,
+    http, page_cache, page_service, profiling,
    remote_storage::{self, SyncStartupData},
    repository::{Repository, TimelineSyncStatusUpdate},
    tenant_mgr, thread_mgr,
    thread_mgr::ThreadKind,
    timelines, virtual_file, LOG_FILE_NAME,
 };
-use zenith_utils::http::endpoint;
-use zenith_utils::shutdown::exit_now;
-use zenith_utils::signals::{self, Signal};
+use utils::{
+    auth::JwtAuth,
+    http::endpoint,
+    logging,
+    postgres_backend::AuthType,
+    shutdown::exit_now,
+    signals::{self, Signal},
+    tcp_listener,
+    zid::{ZTenantId, ZTimelineId},
+    GIT_VERSION,
+};
+
+fn version() -> String {
+    format!("{} profiling:{}", GIT_VERSION, cfg!(feature = "profiling"))
+}

 fn main() -> anyhow::Result<()> {
-    zenith_metrics::set_common_metrics_prefix("pageserver");
+    metrics::set_common_metrics_prefix("pageserver");
    let arg_matches = Command::new("Zenith page server")
        .about("Materializes WAL stream to pages and serves them to the postgres")
-        .version(GIT_VERSION)
+        .version(&*version())
        .arg(
            Arg::new("daemonize")
                .short('d')
@@ -245,11 +249,12 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()

    for (tenant_id, local_timeline_init_statuses) in local_timeline_init_statuses {
        // initialize local tenant
-        let repo = tenant_mgr::load_local_repo(conf, tenant_id, &remote_index);
+        let repo = tenant_mgr::load_local_repo(conf, tenant_id, &remote_index)
+            .with_context(|| format!("Failed to load repo for tenant {}", tenant_id))?;
        for (timeline_id, init_status) in local_timeline_init_statuses {
            match init_status {
                remote_storage::LocalTimelineInitStatus::LocallyComplete => {
-                    debug!("timeline {} for tenant {} is locally complete, registering it in repository", tenant_id, timeline_id);
+                    debug!("timeline {} for tenant {} is locally complete, registering it in repository", timeline_id, tenant_id);
                    // Lets fail here loudly to be on the safe side.
                    // XXX: It may be a better api to actually distinguish between repository startup
                    //   and processing of newly downloaded timelines.
@@ -286,6 +291,9 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
    };
    info!("Using auth: {:#?}", conf.auth_type);

+    // start profiler (if enabled)
+    let profiler_guard = profiling::init_profiler(conf);
+
    // Spawn a new thread for the http endpoint
    // bind before launching separate thread so the error reported before startup exits
    let auth_cloned = auth.clone();
@@ -296,7 +304,7 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
        "http_endpoint_thread",
        false,
        move || {
-            let router = http::make_router(conf, auth_cloned, remote_index);
+            let router = http::make_router(conf, auth_cloned, remote_index)?;
            endpoint::serve_thread_main(router, http_listener, thread_mgr::shutdown_watcher())
        },
    )?;
@@ -318,6 +326,7 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
                "Got {}. Terminating in immediate shutdown mode",
                signal.name()
            );
+            profiling::exit_profiler(conf, &profiler_guard);
            std::process::exit(111);
        }

@@ -326,6 +335,7 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
                "Got {}. Terminating gracefully in fast shutdown mode",
                signal.name()
            );
+            profiling::exit_profiler(conf, &profiler_guard);
            pageserver::shutdown_pageserver();
            unreachable!()
        }
--- a/pageserver/src/bin/pageserver_zst.rs
+++ b/pageserver/src/bin/pageserver_zst.rs
@@ -1,334 +0,0 @@
-//! A CLI helper to deal with remote storage (S3, usually) blobs as archives.
-//! See [`compression`] for more details about the archives.
-
-use std::{collections::BTreeSet, path::Path};
-
-use anyhow::{bail, ensure, Context};
-use clap::{Arg, Command};
-use pageserver::{
-    layered_repository::metadata::{TimelineMetadata, METADATA_FILE_NAME},
-    remote_storage::compression,
-};
-use tokio::{fs, io};
-use zenith_utils::GIT_VERSION;
-
-const LIST_SUBCOMMAND: &str = "list";
-const ARCHIVE_ARG_NAME: &str = "archive";
-
-const EXTRACT_SUBCOMMAND: &str = "extract";
-const TARGET_DIRECTORY_ARG_NAME: &str = "target_directory";
-
-const CREATE_SUBCOMMAND: &str = "create";
-const SOURCE_DIRECTORY_ARG_NAME: &str = "source_directory";
-
-#[tokio::main(flavor = "current_thread")]
-async fn main() -> anyhow::Result<()> {
-    let arg_matches = Command::new("pageserver zst blob [un]compressor utility")
-        .version(GIT_VERSION)
-        .subcommands(vec![
-            Command::new(LIST_SUBCOMMAND)
-                .about("List the archive contents")
-                .arg(
-                    Arg::new(ARCHIVE_ARG_NAME)
-                        .required(true)
-                        .takes_value(true)
-                        .help("An archive to list the contents of"),
-                ),
-            Command::new(EXTRACT_SUBCOMMAND)
-                .about("Extracts the archive into the directory")
-                .arg(
-                    Arg::new(ARCHIVE_ARG_NAME)
-                        .required(true)
-                        .takes_value(true)
-                        .help("An archive to extract"),
-                )
-                .arg(
-                    Arg::new(TARGET_DIRECTORY_ARG_NAME)
-                        .required(false)
-                        .takes_value(true)
-                        .help("A directory to extract the archive into. Optional, will use the current directory if not specified"),
-                ),
-            Command::new(CREATE_SUBCOMMAND)
-                .about("Creates an archive with the contents of a directory (only the first level files are taken, metadata file has to be present in the same directory)")
-                .arg(
-                    Arg::new(SOURCE_DIRECTORY_ARG_NAME)
-                        .required(true)
-                        .takes_value(true)
-                        .help("A directory to use for creating the archive"),
-                )
-                .arg(
-                    Arg::new(TARGET_DIRECTORY_ARG_NAME)
-                        .required(false)
-                        .takes_value(true)
-                        .help("A directory to create the archive in. Optional, will use the current directory if not specified"),
-                ),
-        ])
-        .get_matches();
-
-    let subcommand_name = match arg_matches.subcommand_name() {
-        Some(name) => name,
-        None => bail!("No subcommand specified"),
-    };
-
-    let subcommand_matches = match arg_matches.subcommand_matches(subcommand_name) {
-        Some(matches) => matches,
-        None => bail!(
-            "No subcommand arguments were recognized for subcommand '{}'",
-            subcommand_name
-        ),
-    };
-
-    let target_dir = Path::new(
-        subcommand_matches
-            .value_of(TARGET_DIRECTORY_ARG_NAME)
-            .unwrap_or("./"),
-    );
-
-    match subcommand_name {
-        LIST_SUBCOMMAND => {
-            let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
-                Some(archive) => Path::new(archive),
-                None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
-            };
-            list_archive(archive).await
-        }
-        EXTRACT_SUBCOMMAND => {
-            let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
-                Some(archive) => Path::new(archive),
-                None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
-            };
-            extract_archive(archive, target_dir).await
-        }
-        CREATE_SUBCOMMAND => {
-            let source_dir = match subcommand_matches.value_of(SOURCE_DIRECTORY_ARG_NAME) {
-                Some(source) => Path::new(source),
-                None => bail!("No '{}' argument is specified", SOURCE_DIRECTORY_ARG_NAME),
-            };
-            create_archive(source_dir, target_dir).await
-        }
-        unknown => bail!("Unknown subcommand {}", unknown),
-    }
-}
-
-async fn list_archive(archive: &Path) -> anyhow::Result<()> {
-    let archive = archive.canonicalize().with_context(|| {
-        format!(
-            "Failed to get the absolute path for the archive path '{}'",
-            archive.display()
-        )
-    })?;
-    ensure!(
-        archive.is_file(),
-        "Path '{}' is not an archive file",
-        archive.display()
-    );
-    println!("Listing an archive at path '{}'", archive.display());
-    let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
-        Some(name) => name,
-        None => bail!(
-            "Failed to get the archive name from the path '{}'",
-            archive.display()
-        ),
-    };
-
-    let archive_bytes = fs::read(&archive)
-        .await
-        .context("Failed to read the archive bytes")?;
-
-    let header = compression::read_archive_header(archive_name, &mut archive_bytes.as_slice())
-        .await
-        .context("Failed to read the archive header")?;
-
-    let empty_path = Path::new("");
-    println!("-------------------------------");
-
-    let longest_path_in_archive = header
-        .files
-        .iter()
-        .filter_map(|file| Some(file.subpath.as_path(empty_path).to_str()?.len()))
-        .max()
-        .unwrap_or_default()
-        .max(METADATA_FILE_NAME.len());
-
-    for regular_file in &header.files {
-        println!(
-            "File: {:width$} uncompressed size: {} bytes",
-            regular_file.subpath.as_path(empty_path).display(),
-            regular_file.size,
-            width = longest_path_in_archive,
-        )
-    }
-    println!(
-        "File: {:width$} uncompressed size: {} bytes",
-        METADATA_FILE_NAME,
-        header.metadata_file_size,
-        width = longest_path_in_archive,
-    );
-    println!("-------------------------------");
-
-    Ok(())
-}
-
-async fn extract_archive(archive: &Path, target_dir: &Path) -> anyhow::Result<()> {
-    let archive = archive.canonicalize().with_context(|| {
-        format!(
-            "Failed to get the absolute path for the archive path '{}'",
-            archive.display()
-        )
-    })?;
-    ensure!(
-        archive.is_file(),
-        "Path '{}' is not an archive file",
-        archive.display()
-    );
-    let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
-        Some(name) => name,
-        None => bail!(
-            "Failed to get the archive name from the path '{}'",
-            archive.display()
-        ),
-    };
-
-    if !target_dir.exists() {
-        fs::create_dir_all(target_dir).await.with_context(|| {
-            format!(
-                "Failed to create the target dir at path '{}'",
-                target_dir.display()
-            )
-        })?;
-    }
-    let target_dir = target_dir.canonicalize().with_context(|| {
-        format!(
-            "Failed to get the absolute path for the target dir path '{}'",
-            target_dir.display()
-        )
-    })?;
-    ensure!(
-        target_dir.is_dir(),
-        "Path '{}' is not a directory",
-        target_dir.display()
-    );
-    let mut dir_contents = fs::read_dir(&target_dir)
-        .await
-        .context("Failed to list the target directory contents")?;
-    let dir_entry = dir_contents
-        .next_entry()
-        .await
-        .context("Failed to list the target directory contents")?;
-    ensure!(
-        dir_entry.is_none(),
-        "Target directory '{}' is not empty",
-        target_dir.display()
-    );
-
-    println!(
-        "Extracting an archive at path '{}' into directory '{}'",
-        archive.display(),
-        target_dir.display()
-    );
-
-    let mut archive_file = fs::File::open(&archive).await.with_context(|| {
-        format!(
-            "Failed to get the archive name from the path '{}'",
-            archive.display()
-        )
-    })?;
-    let header = compression::read_archive_header(archive_name, &mut archive_file)
-        .await
-        .context("Failed to read the archive header")?;
-    compression::uncompress_with_header(&BTreeSet::new(), &target_dir, header, &mut archive_file)
-        .await
-        .context("Failed to extract the archive")
-}
-
-async fn create_archive(source_dir: &Path, target_dir: &Path) -> anyhow::Result<()> {
-    let source_dir = source_dir.canonicalize().with_context(|| {
-        format!(
-            "Failed to get the absolute path for the source dir path '{}'",
-            source_dir.display()
-        )
-    })?;
-    ensure!(
-        source_dir.is_dir(),
-        "Path '{}' is not a directory",
-        source_dir.display()
-    );
-
-    if !target_dir.exists() {
-        fs::create_dir_all(target_dir).await.with_context(|| {
-            format!(
-                "Failed to create the target dir at path '{}'",
-                target_dir.display()
-            )
-        })?;
-    }
-    let target_dir = target_dir.canonicalize().with_context(|| {
-        format!(
-            "Failed to get the absolute path for the target dir path '{}'",
-            target_dir.display()
-        )
-    })?;
-    ensure!(
-        target_dir.is_dir(),
-        "Path '{}' is not a directory",
-        target_dir.display()
-    );
-
-    println!(
-        "Compressing directory '{}' and creating resulting archive in directory '{}'",
-        source_dir.display(),
-        target_dir.display()
-    );
-
-    let mut metadata_file_contents = None;
-    let mut files_co_archive = Vec::new();
-
-    let mut source_dir_contents = fs::read_dir(&source_dir)
-        .await
-        .context("Failed to read the source directory contents")?;
-
-    while let Some(source_dir_entry) = source_dir_contents
-        .next_entry()
-        .await
-        .context("Failed to read a source dir entry")?
-    {
-        let entry_path = source_dir_entry.path();
-        if entry_path.is_file() {
-            if entry_path.file_name().and_then(|name| name.to_str()) == Some(METADATA_FILE_NAME) {
-                let metadata_bytes = fs::read(entry_path)
-                    .await
-                    .context("Failed to read metata file bytes in the source dir")?;
-                metadata_file_contents = Some(
-                    TimelineMetadata::from_bytes(&metadata_bytes)
-                        .context("Failed to parse metata file contents in the source dir")?,
-                );
-            } else {
-                files_co_archive.push(entry_path);
-            }
-        }
-    }
-
-    let metadata = match metadata_file_contents {
-        Some(metadata) => metadata,
-        None => bail!(
-            "No metadata file found in the source dir '{}', cannot create the archive",
-            source_dir.display()
-        ),
-    };
-
-    let _ = compression::archive_files_as_stream(
-        &source_dir,
-        files_co_archive.iter(),
-        &metadata,
-        move |mut archive_streamer, archive_name| async move {
-            let archive_target = target_dir.join(&archive_name);
-            let mut archive_file = fs::File::create(&archive_target).await?;
-            io::copy(&mut archive_streamer, &mut archive_file).await?;
-            Ok(archive_target)
-        },
-    )
-    .await
-    .context("Failed to create an archive")?;
-
-    Ok(())
-}
--- a/pageserver/src/bin/psbench.rs
+++ b/pageserver/src/bin/psbench.rs
@@ -28,8 +28,8 @@ use std::{
 };
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
 use tokio::net::TcpStream;
-use zenith_utils::zid::{ZTenantId, ZTimelineId};
-use zenith_utils::{
+use utils::zid::{ZTenantId, ZTimelineId};
+use utils::{
    lsn::Lsn,
    pq_proto::{BeMessage, FeMessage},
 };
--- a/pageserver/src/bin/update_metadata.rs
+++ b/pageserver/src/bin/update_metadata.rs
@@ -6,8 +6,7 @@ use clap::{Arg, Command};
 use pageserver::layered_repository::metadata::TimelineMetadata;
 use std::path::PathBuf;
 use std::str::FromStr;
-use zenith_utils::lsn::Lsn;
-use zenith_utils::GIT_VERSION;
+use utils::{lsn::Lsn, GIT_VERSION};

 fn main() -> Result<()> {
    let arg_matches = Command::new("Zenith update metadata utility")
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -4,22 +4,24 @@
 //! file, or on the command line.
 //! See also `settings.md` for better description on every parameter.

-use anyhow::{bail, ensure, Context, Result};
-use toml_edit;
-use toml_edit::{Document, Item};
-use zenith_utils::postgres_backend::AuthType;
-use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
-
-use std::convert::TryInto;
+use anyhow::{anyhow, bail, ensure, Context, Result};
 use std::env;
 use std::num::{NonZeroU32, NonZeroUsize};
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
 use std::time::Duration;
+use toml_edit;
+use toml_edit::{Document, Item};
+use utils::{
+    postgres_backend::AuthType,
+    zid::{ZNodeId, ZTenantId, ZTimelineId},
+};

 use crate::layered_repository::TIMELINES_SEGMENT_NAME;
+use crate::tenant_config::{TenantConf, TenantConfOpt};

 pub mod defaults {
+    use crate::tenant_config::defaults::*;
    use const_format::formatcp;

    pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
@@ -27,27 +29,22 @@ pub mod defaults {
    pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
    pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");

-    // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
-    // would be more appropriate. But a low value forces the code to be exercised more,
-    // which is good for now to trigger bugs.
-    // This parameter actually determines L0 layer file size.
-    pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
-
-    // Target file size, when creating image and delta layers.
-    // This parameter determines L1 layer file size.
-    pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
-    pub const DEFAULT_COMPACTION_PERIOD: &str = "1 s";
-    pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
-
-    pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
-    pub const DEFAULT_GC_PERIOD: &str = "100 s";
-
    pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "60 s";
    pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";

    pub const DEFAULT_SUPERUSER: &str = "zenith_admin";
-    pub const DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC: usize = 10;
+    /// How many different timelines can be processed simultaneously when synchronizing layers with the remote storage.
+    /// During regular work, pageserver produces one layer file per timeline checkpoint, with bursts of concurrency
+    /// during start (where local and remote timelines are compared and initial sync tasks are scheduled) and timeline attach.
+    /// Both cases may trigger timeline download, that might download a lot of layers. This concurrency is limited by the clients internally, if needed.
+    pub const DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_TIMELINES_SYNC: usize = 50;
    pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
+    /// Currently, sync happens with AWS S3, that has two limits on requests per second:
+    /// ~200 RPS for IAM services
+    /// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html
+    /// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests
+    /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/
+    pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;

    pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
    pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
@@ -62,14 +59,6 @@ pub mod defaults {
 #listen_pg_addr = '{DEFAULT_PG_LISTEN_ADDR}'
 #listen_http_addr = '{DEFAULT_HTTP_LISTEN_ADDR}'

-#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
-#compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
-#compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
-#compaction_threshold = '{DEFAULT_COMPACTION_THRESHOLD}'
-
-#gc_period = '{DEFAULT_GC_PERIOD}'
-#gc_horizon = {DEFAULT_GC_HORIZON}
-
 #wait_lsn_timeout = '{DEFAULT_WAIT_LSN_TIMEOUT}'
 #wal_redo_timeout = '{DEFAULT_WAL_REDO_TIMEOUT}'

@@ -78,6 +67,16 @@ pub mod defaults {
 # initial superuser role name to use when creating a new tenant
 #initial_superuser_name = '{DEFAULT_SUPERUSER}'

+# [tenant_config]
+#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
+#compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
+#compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
+#compaction_threshold = '{DEFAULT_COMPACTION_THRESHOLD}'
+
+#gc_period = '{DEFAULT_GC_PERIOD}'
+#gc_horizon = {DEFAULT_GC_HORIZON}
+#pitr_interval = '{DEFAULT_PITR_INTERVAL}'
+
 # [remote_storage]

 "###
@@ -95,25 +94,6 @@ pub struct PageServerConf {
    /// Example (default): 127.0.0.1:9898
    pub listen_http_addr: String,

-    // Flush out an inmemory layer, if it's holding WAL older than this
-    // This puts a backstop on how much WAL needs to be re-digested if the
-    // page server crashes.
-    // This parameter actually determines L0 layer file size.
-    pub checkpoint_distance: u64,
-
-    // Target file size, when creating image and delta layers.
-    // This parameter determines L1 layer file size.
-    pub compaction_target_size: u64,
-
-    // How often to check if there's compaction work to be done.
-    pub compaction_period: Duration,
-
-    // Level0 delta layer threshold for compaction.
-    pub compaction_threshold: usize,
-
-    pub gc_horizon: u64,
-    pub gc_period: Duration,
-
    // Timeout when waiting for WAL receiver to catch up to an LSN given in a GetPage@LSN call.
    pub wait_lsn_timeout: Duration,
    // How long to wait for WAL redo to complete.
@@ -140,6 +120,27 @@ pub struct PageServerConf {
    pub remote_storage_config: Option<RemoteStorageConfig>,

    pub emit_wal_metadata: bool,
+    pub profiling: ProfilingConfig,
+    pub default_tenant_conf: TenantConf,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum ProfilingConfig {
+    Disabled,
+    PageRequests,
+}
+
+impl FromStr for ProfilingConfig {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<ProfilingConfig, Self::Err> {
+        let result = match s {
+            "disabled"  => ProfilingConfig::Disabled,
+            "page_requests"  => ProfilingConfig::PageRequests,
+            _ => bail!("invalid value \"{s}\" for profiling option, valid values are \"disabled\" and \"page_requests\""),
+        };
+        Ok(result)
+    }
 }

 // use dedicated enum for builder to better indicate the intention
@@ -164,15 +165,6 @@ struct PageServerConfigBuilder {

    listen_http_addr: BuilderValue<String>,

-    checkpoint_distance: BuilderValue<u64>,
-
-    compaction_target_size: BuilderValue<u64>,
-    compaction_period: BuilderValue<Duration>,
-    compaction_threshold: BuilderValue<usize>,
-
-    gc_horizon: BuilderValue<u64>,
-    gc_period: BuilderValue<Duration>,
-
    wait_lsn_timeout: BuilderValue<Duration>,
    wal_redo_timeout: BuilderValue<Duration>,

@@ -194,6 +186,7 @@ struct PageServerConfigBuilder {
    id: BuilderValue<ZNodeId>,

    emit_wal_metadata: BuilderValue<bool>,
+    profiling: BuilderValue<ProfilingConfig>,
 }

 impl Default for PageServerConfigBuilder {
@@ -203,14 +196,6 @@ impl Default for PageServerConfigBuilder {
        Self {
            listen_pg_addr: Set(DEFAULT_PG_LISTEN_ADDR.to_string()),
            listen_http_addr: Set(DEFAULT_HTTP_LISTEN_ADDR.to_string()),
-            checkpoint_distance: Set(DEFAULT_CHECKPOINT_DISTANCE),
-            compaction_target_size: Set(DEFAULT_COMPACTION_TARGET_SIZE),
-            compaction_period: Set(humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
-                .expect("cannot parse default compaction period")),
-            compaction_threshold: Set(DEFAULT_COMPACTION_THRESHOLD),
-            gc_horizon: Set(DEFAULT_GC_HORIZON),
-            gc_period: Set(humantime::parse_duration(DEFAULT_GC_PERIOD)
-                .expect("cannot parse default gc period")),
            wait_lsn_timeout: Set(humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
                .expect("cannot parse default wait lsn timeout")),
            wal_redo_timeout: Set(humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
@@ -227,6 +212,7 @@ impl Default for PageServerConfigBuilder {
            remote_storage_config: Set(None),
            id: NotSet,
            emit_wal_metadata: Set(false),
+            profiling: Set(ProfilingConfig::Disabled),
        }
    }
 }
@@ -240,30 +226,6 @@ impl PageServerConfigBuilder {
        self.listen_http_addr = BuilderValue::Set(listen_http_addr)
    }

-    pub fn checkpoint_distance(&mut self, checkpoint_distance: u64) {
-        self.checkpoint_distance = BuilderValue::Set(checkpoint_distance)
-    }
-
-    pub fn compaction_target_size(&mut self, compaction_target_size: u64) {
-        self.compaction_target_size = BuilderValue::Set(compaction_target_size)
-    }
-
-    pub fn compaction_period(&mut self, compaction_period: Duration) {
-        self.compaction_period = BuilderValue::Set(compaction_period)
-    }
-
-    pub fn compaction_threshold(&mut self, compaction_threshold: usize) {
-        self.compaction_threshold = BuilderValue::Set(compaction_threshold)
-    }
-
-    pub fn gc_horizon(&mut self, gc_horizon: u64) {
-        self.gc_horizon = BuilderValue::Set(gc_horizon)
-    }
-
-    pub fn gc_period(&mut self, gc_period: Duration) {
-        self.gc_period = BuilderValue::Set(gc_period)
-    }
-
    pub fn wait_lsn_timeout(&mut self, wait_lsn_timeout: Duration) {
        self.wait_lsn_timeout = BuilderValue::Set(wait_lsn_timeout)
    }
@@ -315,58 +277,49 @@ impl PageServerConfigBuilder {
        self.emit_wal_metadata = BuilderValue::Set(value)
    }

+    pub fn profiling(&mut self, profiling: ProfilingConfig) {
+        self.profiling = BuilderValue::Set(profiling)
+    }
+
    pub fn build(self) -> Result<PageServerConf> {
        Ok(PageServerConf {
            listen_pg_addr: self
                .listen_pg_addr
-                .ok_or(anyhow::anyhow!("missing listen_pg_addr"))?,
+                .ok_or(anyhow!("missing listen_pg_addr"))?,
            listen_http_addr: self
                .listen_http_addr
-                .ok_or(anyhow::anyhow!("missing listen_http_addr"))?,
-            checkpoint_distance: self
-                .checkpoint_distance
-                .ok_or(anyhow::anyhow!("missing checkpoint_distance"))?,
-            compaction_target_size: self
-                .compaction_target_size
-                .ok_or(anyhow::anyhow!("missing compaction_target_size"))?,
-            compaction_period: self
-                .compaction_period
-                .ok_or(anyhow::anyhow!("missing compaction_period"))?,
-            compaction_threshold: self
-                .compaction_threshold
-                .ok_or(anyhow::anyhow!("missing compaction_threshold"))?,
-            gc_horizon: self
-                .gc_horizon
-                .ok_or(anyhow::anyhow!("missing gc_horizon"))?,
-            gc_period: self.gc_period.ok_or(anyhow::anyhow!("missing gc_period"))?,
+                .ok_or(anyhow!("missing listen_http_addr"))?,
            wait_lsn_timeout: self
                .wait_lsn_timeout
-                .ok_or(anyhow::anyhow!("missing wait_lsn_timeout"))?,
+                .ok_or(anyhow!("missing wait_lsn_timeout"))?,
            wal_redo_timeout: self
                .wal_redo_timeout
-                .ok_or(anyhow::anyhow!("missing wal_redo_timeout"))?,
-            superuser: self.superuser.ok_or(anyhow::anyhow!("missing superuser"))?,
+                .ok_or(anyhow!("missing wal_redo_timeout"))?,
+            superuser: self.superuser.ok_or(anyhow!("missing superuser"))?,
            page_cache_size: self
                .page_cache_size
-                .ok_or(anyhow::anyhow!("missing page_cache_size"))?,
+                .ok_or(anyhow!("missing page_cache_size"))?,
            max_file_descriptors: self
                .max_file_descriptors
-                .ok_or(anyhow::anyhow!("missing max_file_descriptors"))?,
-            workdir: self.workdir.ok_or(anyhow::anyhow!("missing workdir"))?,
+                .ok_or(anyhow!("missing max_file_descriptors"))?,
+            workdir: self.workdir.ok_or(anyhow!("missing workdir"))?,
            pg_distrib_dir: self
                .pg_distrib_dir
-                .ok_or(anyhow::anyhow!("missing pg_distrib_dir"))?,
-            auth_type: self.auth_type.ok_or(anyhow::anyhow!("missing auth_type"))?,
+                .ok_or(anyhow!("missing pg_distrib_dir"))?,
+            auth_type: self.auth_type.ok_or(anyhow!("missing auth_type"))?,
            auth_validation_public_key_path: self
                .auth_validation_public_key_path
-                .ok_or(anyhow::anyhow!("missing auth_validation_public_key_path"))?,
+                .ok_or(anyhow!("missing auth_validation_public_key_path"))?,
            remote_storage_config: self
                .remote_storage_config
-                .ok_or(anyhow::anyhow!("missing remote_storage_config"))?,
-            id: self.id.ok_or(anyhow::anyhow!("missing id"))?,
+                .ok_or(anyhow!("missing remote_storage_config"))?,
+            id: self.id.ok_or(anyhow!("missing id"))?,
            emit_wal_metadata: self
                .emit_wal_metadata
-                .ok_or(anyhow::anyhow!("emit_wal_metadata not specifiec"))?,
+                .ok_or(anyhow!("emit_wal_metadata not specifiec"))?,
+            profiling: self.profiling.ok_or(anyhow!("missing profiling"))?,
+            // TenantConf is handled separately
+            default_tenant_conf: TenantConf::default(),
        })
    }
 }
@@ -375,7 +328,7 @@ impl PageServerConfigBuilder {
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct RemoteStorageConfig {
    /// Max allowed number of concurrent sync operations between pageserver and the remote storage.
-    pub max_concurrent_sync: NonZeroUsize,
+    pub max_concurrent_timelines_sync: NonZeroUsize,
    /// Max allowed errors before the sync task is considered failed and evicted.
    pub max_sync_errors: NonZeroU32,
    /// The storage connection configuration.
@@ -416,6 +369,9 @@ pub struct S3Config {
    ///
    /// Example: `http://127.0.0.1:5000`
    pub endpoint: Option<String>,
+    /// AWS S3 has various limits on its API calls, we need not to exceed those.
+    /// See [`defaults::DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT`] for more details.
+    pub concurrency_limit: NonZeroUsize,
 }

 impl std::fmt::Debug for S3Config {
@@ -424,6 +380,7 @@ impl std::fmt::Debug for S3Config {
            .field("bucket_name", &self.bucket_name)
            .field("bucket_region", &self.bucket_region)
            .field("prefix_in_bucket", &self.prefix_in_bucket)
+            .field("concurrency_limit", &self.concurrency_limit)
            .finish()
    }
 }
@@ -469,20 +426,12 @@ impl PageServerConf {
        let mut builder = PageServerConfigBuilder::default();
        builder.workdir(workdir.to_owned());

+        let mut t_conf: TenantConfOpt = Default::default();
+
        for (key, item) in toml.iter() {
            match key {
                "listen_pg_addr" => builder.listen_pg_addr(parse_toml_string(key, item)?),
                "listen_http_addr" => builder.listen_http_addr(parse_toml_string(key, item)?),
-                "checkpoint_distance" => builder.checkpoint_distance(parse_toml_u64(key, item)?),
-                "compaction_target_size" => {
-                    builder.compaction_target_size(parse_toml_u64(key, item)?)
-                }
-                "compaction_period" => builder.compaction_period(parse_toml_duration(key, item)?),
-                "compaction_threshold" => {
-                    builder.compaction_threshold(parse_toml_u64(key, item)? as usize)
-                }
-                "gc_horizon" => builder.gc_horizon(parse_toml_u64(key, item)?),
-                "gc_period" => builder.gc_period(parse_toml_duration(key, item)?),
                "wait_lsn_timeout" => builder.wait_lsn_timeout(parse_toml_duration(key, item)?),
                "wal_redo_timeout" => builder.wal_redo_timeout(parse_toml_duration(key, item)?),
                "initial_superuser_name" => builder.superuser(parse_toml_string(key, item)?),
@@ -496,13 +445,17 @@ impl PageServerConf {
                "auth_validation_public_key_path" => builder.auth_validation_public_key_path(Some(
                    PathBuf::from(parse_toml_string(key, item)?),
                )),
-                "auth_type" => builder.auth_type(parse_toml_auth_type(key, item)?),
+                "auth_type" => builder.auth_type(parse_toml_from_str(key, item)?),
                "remote_storage" => {
                    builder.remote_storage_config(Some(Self::parse_remote_storage_config(item)?))
                }
+                "tenant_conf" => {
+                    t_conf = Self::parse_toml_tenant_conf(item)?;
+                }
                "id" => builder.id(ZNodeId(parse_toml_u64(key, item)?)),
                "emit_wal_metadata" => builder.emit_wal_metadata(true),
-                _ => bail!("unrecognized pageserver option '{}'", key),
+                "profiling" => builder.profiling(parse_toml_from_str(key, item)?),
+                _ => bail!("unrecognized pageserver option '{key}'"),
            }
        }

@@ -528,41 +481,75 @@ impl PageServerConf {
            );
        }

+        conf.default_tenant_conf = t_conf.merge(TenantConf::default());
+
        Ok(conf)
    }

+    // subroutine of parse_and_validate to parse `[tenant_conf]` section
+
+    pub fn parse_toml_tenant_conf(item: &toml_edit::Item) -> Result<TenantConfOpt> {
+        let mut t_conf: TenantConfOpt = Default::default();
+        if let Some(checkpoint_distance) = item.get("checkpoint_distance") {
+            t_conf.checkpoint_distance =
+                Some(parse_toml_u64("checkpoint_distance", checkpoint_distance)?);
+        }
+
+        if let Some(compaction_target_size) = item.get("compaction_target_size") {
+            t_conf.compaction_target_size = Some(parse_toml_u64(
+                "compaction_target_size",
+                compaction_target_size,
+            )?);
+        }
+
+        if let Some(compaction_period) = item.get("compaction_period") {
+            t_conf.compaction_period =
+                Some(parse_toml_duration("compaction_period", compaction_period)?);
+        }
+
+        if let Some(compaction_threshold) = item.get("compaction_threshold") {
+            t_conf.compaction_threshold =
+                Some(parse_toml_u64("compaction_threshold", compaction_threshold)?.try_into()?);
+        }
+
+        if let Some(gc_horizon) = item.get("gc_horizon") {
+            t_conf.gc_horizon = Some(parse_toml_u64("gc_horizon", gc_horizon)?);
+        }
+
+        if let Some(gc_period) = item.get("gc_period") {
+            t_conf.gc_period = Some(parse_toml_duration("gc_period", gc_period)?);
+        }
+
+        if let Some(pitr_interval) = item.get("pitr_interval") {
+            t_conf.pitr_interval = Some(parse_toml_duration("pitr_interval", pitr_interval)?);
+        }
+
+        Ok(t_conf)
+    }
+
    /// subroutine of parse_config(), to parse the `[remote_storage]` table.
    fn parse_remote_storage_config(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {
        let local_path = toml.get("local_path");
        let bucket_name = toml.get("bucket_name");
        let bucket_region = toml.get("bucket_region");

-        let max_concurrent_sync: NonZeroUsize = if let Some(s) = toml.get("max_concurrent_sync") {
-            parse_toml_u64("max_concurrent_sync", s)
-                .and_then(|toml_u64| {
-                    toml_u64.try_into().with_context(|| {
-                        format!("'max_concurrent_sync' value {} is too large", toml_u64)
-                    })
-                })
-                .ok()
-                .and_then(NonZeroUsize::new)
-                .context("'max_concurrent_sync' must be a non-zero positive integer")?
-        } else {
-            NonZeroUsize::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC).unwrap()
-        };
-        let max_sync_errors: NonZeroU32 = if let Some(s) = toml.get("max_sync_errors") {
-            parse_toml_u64("max_sync_errors", s)
-                .and_then(|toml_u64| {
-                    toml_u64.try_into().with_context(|| {
-                        format!("'max_sync_errors' value {} is too large", toml_u64)
-                    })
-                })
-                .ok()
-                .and_then(NonZeroU32::new)
-                .context("'max_sync_errors' must be a non-zero positive integer")?
-        } else {
-            NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS).unwrap()
-        };
+        let max_concurrent_timelines_sync = NonZeroUsize::new(
+            parse_optional_integer("max_concurrent_timelines_sync", toml)?
+                .unwrap_or(defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_TIMELINES_SYNC),
+        )
+        .context("Failed to parse 'max_concurrent_timelines_sync' as a positive integer")?;
+
+        let max_sync_errors = NonZeroU32::new(
+            parse_optional_integer("max_sync_errors", toml)?
+                .unwrap_or(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS),
+        )
+        .context("Failed to parse 'max_sync_errors' as a positive integer")?;
+
+        let concurrency_limit = NonZeroUsize::new(
+            parse_optional_integer("concurrency_limit", toml)?
+                .unwrap_or(defaults::DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT),
+        )
+        .context("Failed to parse 'concurrency_limit' as a positive integer")?;

        let storage = match (local_path, bucket_name, bucket_region) {
            (None, None, None) => bail!("no 'local_path' nor 'bucket_name' option"),
@@ -593,6 +580,7 @@ impl PageServerConf {
                    .get("endpoint")
                    .map(|endpoint| parse_toml_string("endpoint", endpoint))
                    .transpose()?,
+                concurrency_limit,
            }),
            (Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
                parse_toml_string("local_path", local_path)?,
@@ -601,7 +589,7 @@ impl PageServerConf {
        };

        Ok(RemoteStorageConfig {
-            max_concurrent_sync,
+            max_concurrent_timelines_sync,
            max_sync_errors,
            storage,
        })
@@ -609,19 +597,13 @@ impl PageServerConf {

    #[cfg(test)]
    pub fn test_repo_dir(test_name: &str) -> PathBuf {
-        PathBuf::from(format!("../tmp_check/test_{}", test_name))
+        PathBuf::from(format!("../tmp_check/test_{test_name}"))
    }

    #[cfg(test)]
    pub fn dummy_conf(repo_dir: PathBuf) -> Self {
        PageServerConf {
            id: ZNodeId(0),
-            checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
-            compaction_target_size: 4 * 1024 * 1024,
-            compaction_period: Duration::from_secs(10),
-            compaction_threshold: defaults::DEFAULT_COMPACTION_THRESHOLD,
-            gc_horizon: defaults::DEFAULT_GC_HORIZON,
-            gc_period: Duration::from_secs(10),
            wait_lsn_timeout: Duration::from_secs(60),
            wal_redo_timeout: Duration::from_secs(60),
            page_cache_size: defaults::DEFAULT_PAGE_CACHE_SIZE,
@@ -635,6 +617,8 @@ impl PageServerConf {
            auth_validation_public_key_path: None,
            remote_storage_config: None,
            emit_wal_metadata: false,
+            profiling: ProfilingConfig::Disabled,
+            default_tenant_conf: TenantConf::dummy_conf(),
        }
    }
 }
@@ -644,7 +628,7 @@ impl PageServerConf {
 fn parse_toml_string(name: &str, item: &Item) -> Result<String> {
    let s = item
        .as_str()
-        .with_context(|| format!("configure option {} is not a string", name))?;
+        .with_context(|| format!("configure option {name} is not a string"))?;
    Ok(s.to_string())
 }

@@ -653,26 +637,46 @@ fn parse_toml_u64(name: &str, item: &Item) -> Result<u64> {
    // for our use, though.
    let i: i64 = item
        .as_integer()
-        .with_context(|| format!("configure option {} is not an integer", name))?;
+        .with_context(|| format!("configure option {name} is not an integer"))?;
    if i < 0 {
-        bail!("configure option {} cannot be negative", name);
+        bail!("configure option {name} cannot be negative");
    }
    Ok(i as u64)
 }

+fn parse_optional_integer<I, E>(name: &str, item: &toml_edit::Item) -> anyhow::Result<Option<I>>
+where
+    I: TryFrom<i64, Error = E>,
+    E: std::error::Error + Send + Sync + 'static,
+{
+    let toml_integer = match item.get(name) {
+        Some(item) => item
+            .as_integer()
+            .with_context(|| format!("configure option {name} is not an integer"))?,
+        None => return Ok(None),
+    };
+
+    I::try_from(toml_integer)
+        .map(Some)
+        .with_context(|| format!("configure option {name} is too large"))
+}
+
 fn parse_toml_duration(name: &str, item: &Item) -> Result<Duration> {
    let s = item
        .as_str()
-        .with_context(|| format!("configure option {} is not a string", name))?;
+        .with_context(|| format!("configure option {name} is not a string"))?;

    Ok(humantime::parse_duration(s)?)
 }

-fn parse_toml_auth_type(name: &str, item: &Item) -> Result<AuthType> {
+fn parse_toml_from_str<T>(name: &str, item: &Item) -> Result<T>
+where
+    T: FromStr<Err = anyhow::Error>,
+{
    let v = item
        .as_str()
-        .with_context(|| format!("configure option {} is not a string", name))?;
-    AuthType::from_str(v)
+        .with_context(|| format!("configure option {name} is not a string"))?;
+    T::from_str(v)
 }

 #[cfg(test)]
@@ -689,15 +693,6 @@ mod tests {
 listen_pg_addr = '127.0.0.1:64000'
 listen_http_addr = '127.0.0.1:9898'

-checkpoint_distance = 111 # in bytes
-
-compaction_target_size = 111 # in bytes
-compaction_period = '111 s'
-compaction_threshold = 2
-
-gc_period = '222 s'
-gc_horizon = 222
-
 wait_lsn_timeout = '111 s'
 wal_redo_timeout = '111 s'

@@ -718,10 +713,8 @@ id = 10
        let config_string = format!("pg_distrib_dir='{}'\nid=10", pg_distrib_dir.display());
        let toml = config_string.parse()?;

-        let parsed_config =
-            PageServerConf::parse_and_validate(&toml, &workdir).unwrap_or_else(|e| {
-                panic!("Failed to parse config '{}', reason: {}", config_string, e)
-            });
+        let parsed_config = PageServerConf::parse_and_validate(&toml, &workdir)
+            .unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e}"));

        assert_eq!(
            parsed_config,
@@ -729,12 +722,6 @@ id = 10
                id: ZNodeId(10),
                listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
                listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
-                checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
-                compaction_target_size: defaults::DEFAULT_COMPACTION_TARGET_SIZE,
-                compaction_period: humantime::parse_duration(defaults::DEFAULT_COMPACTION_PERIOD)?,
-                compaction_threshold: defaults::DEFAULT_COMPACTION_THRESHOLD,
-                gc_horizon: defaults::DEFAULT_GC_HORIZON,
-                gc_period: humantime::parse_duration(defaults::DEFAULT_GC_PERIOD)?,
                wait_lsn_timeout: humantime::parse_duration(defaults::DEFAULT_WAIT_LSN_TIMEOUT)?,
                wal_redo_timeout: humantime::parse_duration(defaults::DEFAULT_WAL_REDO_TIMEOUT)?,
                superuser: defaults::DEFAULT_SUPERUSER.to_string(),
@@ -746,6 +733,8 @@ id = 10
                auth_validation_public_key_path: None,
                remote_storage_config: None,
                emit_wal_metadata: false,
+                profiling: ProfilingConfig::Disabled,
+                default_tenant_conf: TenantConf::default(),
            },
            "Correct defaults should be used when no config values are provided"
        );
@@ -759,16 +748,13 @@ id = 10
        let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;

        let config_string = format!(
-            "{}pg_distrib_dir='{}'",
-            ALL_BASE_VALUES_TOML,
+            "{ALL_BASE_VALUES_TOML}pg_distrib_dir='{}'",
            pg_distrib_dir.display()
        );
        let toml = config_string.parse()?;

-        let parsed_config =
-            PageServerConf::parse_and_validate(&toml, &workdir).unwrap_or_else(|e| {
-                panic!("Failed to parse config '{}', reason: {}", config_string, e)
-            });
+        let parsed_config = PageServerConf::parse_and_validate(&toml, &workdir)
+            .unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e}"));

        assert_eq!(
            parsed_config,
@@ -776,12 +762,6 @@ id = 10
                id: ZNodeId(10),
                listen_pg_addr: "127.0.0.1:64000".to_string(),
                listen_http_addr: "127.0.0.1:9898".to_string(),
-                checkpoint_distance: 111,
-                compaction_target_size: 111,
-                compaction_period: Duration::from_secs(111),
-                compaction_threshold: 2,
-                gc_horizon: 222,
-                gc_period: Duration::from_secs(222),
                wait_lsn_timeout: Duration::from_secs(111),
                wal_redo_timeout: Duration::from_secs(111),
                superuser: "zzzz".to_string(),
@@ -793,6 +773,8 @@ id = 10
                auth_validation_public_key_path: None,
                remote_storage_config: None,
                emit_wal_metadata: false,
+                profiling: ProfilingConfig::Disabled,
+                default_tenant_conf: TenantConf::default(),
            },
            "Should be able to parse all basic config values correctly"
        );
@@ -821,37 +803,33 @@ local_path = '{}'"#,

        for remote_storage_config_str in identical_toml_declarations {
            let config_string = format!(
-                r#"{}
+                r#"{ALL_BASE_VALUES_TOML}
 pg_distrib_dir='{}'

-{}"#,
-                ALL_BASE_VALUES_TOML,
+{remote_storage_config_str}"#,
                pg_distrib_dir.display(),
-                remote_storage_config_str,
            );

            let toml = config_string.parse()?;

            let parsed_remote_storage_config = PageServerConf::parse_and_validate(&toml, &workdir)
-                .unwrap_or_else(|e| {
-                    panic!("Failed to parse config '{}', reason: {}", config_string, e)
-                })
+                .unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e}"))
                .remote_storage_config
                .expect("Should have remote storage config for the local FS");

            assert_eq!(
-            parsed_remote_storage_config,
-            RemoteStorageConfig {
-                max_concurrent_sync: NonZeroUsize::new(
-                    defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC
-                )
-                .unwrap(),
-                max_sync_errors: NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS)
+                parsed_remote_storage_config,
+                RemoteStorageConfig {
+                    max_concurrent_timelines_sync: NonZeroUsize::new(
+                        defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_TIMELINES_SYNC
+                    )
                    .unwrap(),
-                storage: RemoteStorageKind::LocalFs(local_storage_path.clone()),
-            },
-            "Remote storage config should correctly parse the local FS config and fill other storage defaults"
-        );
+                    max_sync_errors: NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS)
+                        .unwrap(),
+                    storage: RemoteStorageKind::LocalFs(local_storage_path.clone()),
+                },
+                "Remote storage config should correctly parse the local FS config and fill other storage defaults"
+            );
        }
        Ok(())
    }
@@ -867,52 +845,49 @@ pg_distrib_dir='{}'
        let access_key_id = "SOMEKEYAAAAASADSAH*#".to_string();
        let secret_access_key = "SOMEsEcReTsd292v".to_string();
        let endpoint = "http://localhost:5000".to_string();
-        let max_concurrent_sync = NonZeroUsize::new(111).unwrap();
+        let max_concurrent_timelines_sync = NonZeroUsize::new(111).unwrap();
        let max_sync_errors = NonZeroU32::new(222).unwrap();
+        let s3_concurrency_limit = NonZeroUsize::new(333).unwrap();

        let identical_toml_declarations = &[
            format!(
                r#"[remote_storage]
-max_concurrent_sync = {}
-max_sync_errors = {}
-bucket_name = '{}'
-bucket_region = '{}'
-prefix_in_bucket = '{}'
-access_key_id = '{}'
-secret_access_key = '{}'
-endpoint = '{}'"#,
-                max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
+max_concurrent_timelines_sync = {max_concurrent_timelines_sync}
+max_sync_errors = {max_sync_errors}
+bucket_name = '{bucket_name}'
+bucket_region = '{bucket_region}'
+prefix_in_bucket = '{prefix_in_bucket}'
+access_key_id = '{access_key_id}'
+secret_access_key = '{secret_access_key}'
+endpoint = '{endpoint}'
+concurrency_limit = {s3_concurrency_limit}"#
            ),
            format!(
-                "remote_storage={{max_concurrent_sync={}, max_sync_errors={}, bucket_name='{}', bucket_region='{}', prefix_in_bucket='{}', access_key_id='{}', secret_access_key='{}', endpoint='{}'}}",
-                max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
+                "remote_storage={{max_concurrent_timelines_sync={max_concurrent_timelines_sync}, max_sync_errors={max_sync_errors}, bucket_name='{bucket_name}',\
+                bucket_region='{bucket_region}', prefix_in_bucket='{prefix_in_bucket}', access_key_id='{access_key_id}', secret_access_key='{secret_access_key}', endpoint='{endpoint}', concurrency_limit={s3_concurrency_limit}}}",
            ),
        ];

        for remote_storage_config_str in identical_toml_declarations {
            let config_string = format!(
-                r#"{}
+                r#"{ALL_BASE_VALUES_TOML}
 pg_distrib_dir='{}'

-{}"#,
-                ALL_BASE_VALUES_TOML,
+{remote_storage_config_str}"#,
                pg_distrib_dir.display(),
-                remote_storage_config_str,
            );

            let toml = config_string.parse()?;

            let parsed_remote_storage_config = PageServerConf::parse_and_validate(&toml, &workdir)
-                .unwrap_or_else(|e| {
-                    panic!("Failed to parse config '{}', reason: {}", config_string, e)
-                })
+                .unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e}"))
                .remote_storage_config
                .expect("Should have remote storage config for S3");

            assert_eq!(
                parsed_remote_storage_config,
                RemoteStorageConfig {
-                    max_concurrent_sync,
+                    max_concurrent_timelines_sync,
                    max_sync_errors,
                    storage: RemoteStorageKind::AwsS3(S3Config {
                        bucket_name: bucket_name.clone(),
@@ -920,7 +895,8 @@ pg_distrib_dir='{}'
                        access_key_id: Some(access_key_id.clone()),
                        secret_access_key: Some(secret_access_key.clone()),
                        prefix_in_bucket: Some(prefix_in_bucket.clone()),
-                        endpoint: Some(endpoint.clone())
+                        endpoint: Some(endpoint.clone()),
+                        concurrency_limit: s3_concurrency_limit,
                    }),
                },
                "Remote storage config should correctly parse the S3 config"
--- a/pageserver/src/http/models.rs
+++ b/pageserver/src/http/models.rs
@@ -1,6 +1,6 @@
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
-use zenith_utils::{
+use utils::{
    lsn::Lsn,
    zid::{ZNodeId, ZTenantId, ZTimelineId},
 };
@@ -20,11 +20,18 @@ pub struct TimelineCreateRequest {
 }

 #[serde_as]
-#[derive(Serialize, Deserialize)]
+#[derive(Serialize, Deserialize, Default)]
 pub struct TenantCreateRequest {
    #[serde(default)]
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub new_tenant_id: Option<ZTenantId>,
+    pub checkpoint_distance: Option<u64>,
+    pub compaction_target_size: Option<u64>,
+    pub compaction_period: Option<String>,
+    pub compaction_threshold: Option<usize>,
+    pub gc_horizon: Option<u64>,
+    pub gc_period: Option<String>,
+    pub pitr_interval: Option<String>,
 }

 #[serde_as]
@@ -36,3 +43,42 @@ pub struct TenantCreateResponse(#[serde_as(as = "DisplayFromStr")] pub ZTenantId
 pub struct StatusResponse {
    pub id: ZNodeId,
 }
+
+impl TenantCreateRequest {
+    pub fn new(new_tenant_id: Option<ZTenantId>) -> TenantCreateRequest {
+        TenantCreateRequest {
+            new_tenant_id,
+            ..Default::default()
+        }
+    }
+}
+
+#[serde_as]
+#[derive(Serialize, Deserialize)]
+pub struct TenantConfigRequest {
+    pub tenant_id: ZTenantId,
+    #[serde(default)]
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub checkpoint_distance: Option<u64>,
+    pub compaction_target_size: Option<u64>,
+    pub compaction_period: Option<String>,
+    pub compaction_threshold: Option<usize>,
+    pub gc_horizon: Option<u64>,
+    pub gc_period: Option<String>,
+    pub pitr_interval: Option<String>,
+}
+
+impl TenantConfigRequest {
+    pub fn new(tenant_id: ZTenantId) -> TenantConfigRequest {
+        TenantConfigRequest {
+            tenant_id,
+            checkpoint_distance: None,
+            compaction_target_size: None,
+            compaction_period: None,
+            compaction_threshold: None,
+            gc_horizon: None,
+            gc_period: None,
+            pitr_interval: None,
+        }
+    }
+}
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -328,11 +328,7 @@ paths:
        content:
          application/json:
            schema:
-              type: object
-              properties:
-                new_tenant_id:
-                  type: string
-                  format: hex
+              $ref: "#/components/schemas/TenantCreateInfo"
      responses:
        "201":
          description: New tenant created successfully
@@ -371,7 +367,48 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/Error"
-
+  /v1/tenant/config:
+    put:
+      description: |
+        Update tenant's config.
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/TenantConfigInfo"
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/TenantInfo"
+        "400":
+          description: Malformed tenant config request
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
 components:
  securitySchemes:
    JWT:
@@ -389,6 +426,45 @@ components:
          type: string
        state:
          type: string
+    TenantCreateInfo:
+      type: object
+      properties:
+        new_tenant_id:
+          type: string
+          format: hex
+        tenant_id:
+          type: string
+          format: hex
+        gc_period:
+          type: string
+        gc_horizon:
+          type: integer
+        pitr_interval:
+          type: string
+        checkpoint_distance:
+          type: integer
+        compaction_period:
+          type: string
+        compaction_threshold:
+          type: string
+    TenantConfigInfo:
+      type: object
+      properties:
+        tenant_id:
+          type: string
+          format: hex
+        gc_period:
+          type: string
+        gc_horizon:
+          type: integer
+        pitr_interval:
+          type: string
+        checkpoint_distance:
+          type: integer
+        compaction_period:
+          type: string
+        compaction_threshold:
+          type: string
    TimelineInfo:
      type: object
      required:
@@ -409,6 +485,7 @@ components:
      type: object
      required:
        - awaits_download
+        - remote_consistent_lsn
      properties:
        awaits_download:
          type: boolean
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1,36 +1,45 @@
 use std::sync::Arc;

-use anyhow::Result;
+use anyhow::{Context, Result};
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
 use tracing::*;
-use zenith_utils::auth::JwtAuth;
-use zenith_utils::http::endpoint::attach_openapi_ui;
-use zenith_utils::http::endpoint::auth_middleware;
-use zenith_utils::http::endpoint::check_permission;
-use zenith_utils::http::error::ApiError;
-use zenith_utils::http::{
-    endpoint,
-    error::HttpErrorBody,
-    json::{json_request, json_response},
-    request::parse_request_param,
-};
-use zenith_utils::http::{RequestExt, RouterBuilder};
-use zenith_utils::zid::{ZTenantTimelineId, ZTimelineId};

 use super::models::{
-    StatusResponse, TenantCreateRequest, TenantCreateResponse, TimelineCreateRequest,
+    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse,
+    TimelineCreateRequest,
+};
+use crate::config::RemoteStorageKind;
+use crate::remote_storage::{
+    download_index_part, schedule_timeline_download, LocalFs, RemoteIndex, RemoteTimeline, S3Bucket,
 };
-use crate::remote_storage::{schedule_timeline_download, RemoteIndex};
 use crate::repository::Repository;
+use crate::tenant_config::TenantConfOpt;
 use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
-use crate::{config::PageServerConf, tenant_mgr, timelines, ZTenantId};
+use crate::{config::PageServerConf, tenant_mgr, timelines};
+use utils::{
+    auth::JwtAuth,
+    http::{
+        endpoint::{self, attach_openapi_ui, auth_middleware, check_permission},
+        error::{ApiError, HttpErrorBody},
+        json::{json_request, json_response},
+        request::parse_request_param,
+        RequestExt, RouterBuilder,
+    },
+    zid::{ZTenantId, ZTenantTimelineId, ZTimelineId},
+};

 struct State {
    conf: &'static PageServerConf,
    auth: Option<Arc<JwtAuth>>,
    remote_index: RemoteIndex,
    allowlist_routes: Vec<Uri>,
+    remote_storage: Option<GenericRemoteStorage>,
+}
+
+enum GenericRemoteStorage {
+    Local(LocalFs),
+    S3(S3Bucket),
 }

 impl State {
@@ -38,17 +47,34 @@ impl State {
        conf: &'static PageServerConf,
        auth: Option<Arc<JwtAuth>>,
        remote_index: RemoteIndex,
-    ) -> Self {
+    ) -> anyhow::Result<Self> {
        let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml"]
            .iter()
            .map(|v| v.parse().unwrap())
            .collect::<Vec<_>>();
-        Self {
+        // Note that this remote storage is created separately from the main one in the sync_loop.
+        // It's fine since it's stateless and some code duplication saves us from bloating the code around with generics.
+        let remote_storage = conf
+            .remote_storage_config
+            .as_ref()
+            .map(|storage_config| match &storage_config.storage {
+                RemoteStorageKind::LocalFs(root) => {
+                    LocalFs::new(root.clone(), &conf.workdir).map(GenericRemoteStorage::Local)
+                }
+                RemoteStorageKind::AwsS3(s3_config) => {
+                    S3Bucket::new(s3_config, &conf.workdir).map(GenericRemoteStorage::S3)
+                }
+            })
+            .transpose()
+            .context("Failed to init generic remote storage")?;
+
+        Ok(Self {
            conf,
            auth,
            allowlist_routes,
            remote_index,
-        }
+            remote_storage,
+        })
    }
 }

@@ -122,8 +148,8 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
                    timeline_id,
                })
                .map(|remote_entry| RemoteTimelineInfo {
-                    remote_consistent_lsn: remote_entry.disk_consistent_lsn(),
-                    awaits_download: remote_entry.get_awaits_download(),
+                    remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
+                    awaits_download: remote_entry.awaits_download,
                }),
        })
    }
@@ -184,8 +210,8 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
                timeline_id,
            })
            .map(|remote_entry| RemoteTimelineInfo {
-                remote_consistent_lsn: remote_entry.disk_consistent_lsn(),
-                awaits_download: remote_entry.get_awaits_download(),
+                remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
+                awaits_download: remote_entry.awaits_download,
            })
    };

@@ -212,41 +238,105 @@ async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body
    check_permission(&request, Some(tenant_id))?;

    let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
-    let span = info_span!("timeline_attach_handler", tenant = %tenant_id, timeline = %timeline_id);
+    info!(
+        "Handling timeline {} attach for tenant: {}",
+        timeline_id, tenant_id,
+    );

-    let span = tokio::task::spawn_blocking(move || {
-        let entered = span.entered();
+    tokio::task::spawn_blocking(move || {
        if tenant_mgr::get_timeline_for_tenant_load(tenant_id, timeline_id).is_ok() {
            // TODO: maybe answer with 309 Not Modified here?
            anyhow::bail!("Timeline is already present locally")
        };
-        Ok(entered.exit())
+        Ok(())
    })
    .await
    .map_err(ApiError::from_err)??;

-    let mut remote_index_write = get_state(&request).remote_index.write().await;
+    let sync_id = ZTenantTimelineId {
+        tenant_id,
+        timeline_id,
+    };
+    let state = get_state(&request);
+    let remote_index = &state.remote_index;

-    let _enter = span.entered(); // entered guard cannot live across awaits (non Send)
-    let index_entry = remote_index_write
-        .timeline_entry_mut(&ZTenantTimelineId {
-            tenant_id,
-            timeline_id,
-        })
-        .ok_or_else(|| ApiError::NotFound("Unknown remote timeline".to_string()))?;
+    let mut index_accessor = remote_index.write().await;
+    if let Some(remote_timeline) = index_accessor.timeline_entry_mut(&sync_id) {
+        if remote_timeline.awaits_download {
+            return Err(ApiError::Conflict(
+                "Timeline download is already in progress".to_string(),
+            ));
+        }

-    if index_entry.get_awaits_download() {
-        return Err(ApiError::Conflict(
-            "Timeline download is already in progress".to_string(),
-        ));
+        remote_timeline.awaits_download = true;
+        schedule_timeline_download(tenant_id, timeline_id);
+        return json_response(StatusCode::ACCEPTED, ());
+    } else {
+        // no timeline in the index, release the lock to make the potentially lengthy download opetation
+        drop(index_accessor);
    }

-    index_entry.set_awaits_download(true);
-    schedule_timeline_download(tenant_id, timeline_id);
+    let new_timeline = match try_download_shard_data(state, sync_id).await {
+        Ok(Some(mut new_timeline)) => {
+            tokio::fs::create_dir_all(state.conf.timeline_path(&timeline_id, &tenant_id))
+                .await
+                .context("Failed to create new timeline directory")?;
+            new_timeline.awaits_download = true;
+            new_timeline
+        }
+        Ok(None) => return Err(ApiError::NotFound("Unknown remote timeline".to_string())),
+        Err(e) => {
+            error!("Failed to retrieve remote timeline data: {:?}", e);
+            return Err(ApiError::NotFound(
+                "Failed to retrieve remote timeline".to_string(),
+            ));
+        }
+    };

+    let mut index_accessor = remote_index.write().await;
+    match index_accessor.timeline_entry_mut(&sync_id) {
+        Some(remote_timeline) => {
+            if remote_timeline.awaits_download {
+                return Err(ApiError::Conflict(
+                    "Timeline download is already in progress".to_string(),
+                ));
+            }
+            remote_timeline.awaits_download = true;
+        }
+        None => index_accessor.add_timeline_entry(sync_id, new_timeline),
+    }
+    schedule_timeline_download(tenant_id, timeline_id);
    json_response(StatusCode::ACCEPTED, ())
 }

+async fn try_download_shard_data(
+    state: &State,
+    sync_id: ZTenantTimelineId,
+) -> anyhow::Result<Option<RemoteTimeline>> {
+    let shard = match state.remote_storage.as_ref() {
+        Some(GenericRemoteStorage::Local(local_storage)) => {
+            download_index_part(state.conf, local_storage, sync_id).await
+        }
+        Some(GenericRemoteStorage::S3(s3_storage)) => {
+            download_index_part(state.conf, s3_storage, sync_id).await
+        }
+        None => return Ok(None),
+    }
+    .with_context(|| format!("Failed to download index shard for timeline {}", sync_id))?;
+
+    let timeline_path = state
+        .conf
+        .timeline_path(&sync_id.timeline_id, &sync_id.tenant_id);
+    RemoteTimeline::from_index_part(&timeline_path, shard)
+        .map(Some)
+        .with_context(|| {
+            format!(
+                "Failed to convert index shard into remote timeline for timeline {}",
+                sync_id
+            )
+        })
+}
+
 async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;
@@ -287,6 +377,27 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
    let request_data: TenantCreateRequest = json_request(&mut request).await?;
    let remote_index = get_state(&request).remote_index.clone();

+    let mut tenant_conf: TenantConfOpt = Default::default();
+    if let Some(gc_period) = request_data.gc_period {
+        tenant_conf.gc_period =
+            Some(humantime::parse_duration(&gc_period).map_err(ApiError::from_err)?);
+    }
+    tenant_conf.gc_horizon = request_data.gc_horizon;
+
+    if let Some(pitr_interval) = request_data.pitr_interval {
+        tenant_conf.pitr_interval =
+            Some(humantime::parse_duration(&pitr_interval).map_err(ApiError::from_err)?);
+    }
+
+    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
+    tenant_conf.compaction_target_size = request_data.compaction_target_size;
+    tenant_conf.compaction_threshold = request_data.compaction_threshold;
+
+    if let Some(compaction_period) = request_data.compaction_period {
+        tenant_conf.compaction_period =
+            Some(humantime::parse_duration(&compaction_period).map_err(ApiError::from_err)?);
+    }
+
    let target_tenant_id = request_data
        .new_tenant_id
        .map(ZTenantId::from)
@@ -294,8 +405,9 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo

    let new_tenant_id = tokio::task::spawn_blocking(move || {
        let _enter = info_span!("tenant_create", tenant = ?target_tenant_id).entered();
+        let conf = get_config(&request);

-        tenant_mgr::create_tenant_repository(get_config(&request), target_tenant_id, remote_index)
+        tenant_mgr::create_tenant_repository(conf, tenant_conf, target_tenant_id, remote_index)
    })
    .await
    .map_err(ApiError::from_err)??;
@@ -306,6 +418,44 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
    })
 }

+async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let request_data: TenantConfigRequest = json_request(&mut request).await?;
+    let tenant_id = request_data.tenant_id;
+    // check for management permission
+    check_permission(&request, Some(tenant_id))?;
+
+    let mut tenant_conf: TenantConfOpt = Default::default();
+    if let Some(gc_period) = request_data.gc_period {
+        tenant_conf.gc_period =
+            Some(humantime::parse_duration(&gc_period).map_err(ApiError::from_err)?);
+    }
+    tenant_conf.gc_horizon = request_data.gc_horizon;
+
+    if let Some(pitr_interval) = request_data.pitr_interval {
+        tenant_conf.pitr_interval =
+            Some(humantime::parse_duration(&pitr_interval).map_err(ApiError::from_err)?);
+    }
+
+    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
+    tenant_conf.compaction_target_size = request_data.compaction_target_size;
+    tenant_conf.compaction_threshold = request_data.compaction_threshold;
+
+    if let Some(compaction_period) = request_data.compaction_period {
+        tenant_conf.compaction_period =
+            Some(humantime::parse_duration(&compaction_period).map_err(ApiError::from_err)?);
+    }
+
+    tokio::task::spawn_blocking(move || {
+        let _enter = info_span!("tenant_config", tenant = ?tenant_id).entered();
+
+        tenant_mgr::update_tenant_config(tenant_conf, tenant_id)
+    })
+    .await
+    .map_err(ApiError::from_err)??;
+
+    json_response(StatusCode::OK, ())
+}
+
 async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(
        StatusCode::NOT_FOUND,
@@ -317,7 +467,7 @@ pub fn make_router(
    conf: &'static PageServerConf,
    auth: Option<Arc<JwtAuth>>,
    remote_index: RemoteIndex,
-) -> RouterBuilder<hyper::Body, ApiError> {
+) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
    let spec = include_bytes!("openapi_spec.yml");
    let mut router = attach_openapi_ui(endpoint::make_router(), spec, "/swagger.yml", "/v1/doc");
    if auth.is_some() {
@@ -331,11 +481,14 @@ pub fn make_router(
        }))
    }

-    router
-        .data(Arc::new(State::new(conf, auth, remote_index)))
+    Ok(router
+        .data(Arc::new(
+            State::new(conf, auth, remote_index).context("Failed to initialize router state")?,
+        ))
        .get("/v1/status", status_handler)
        .get("/v1/tenant", tenant_list_handler)
        .post("/v1/tenant", tenant_create_handler)
+        .put("/v1/tenant/config", tenant_config_handler)
        .get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
        .post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
        .get(
@@ -350,5 +503,5 @@ pub fn make_router(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
            timeline_detach_handler,
        )
-        .any(handler_404)
+        .any(handler_404))
 }
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -20,7 +20,7 @@ use postgres_ffi::waldecoder::*;
 use postgres_ffi::xlog_utils::*;
 use postgres_ffi::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
 use postgres_ffi::{Oid, TransactionId};
-use zenith_utils::lsn::Lsn;
+use utils::lsn::Lsn;

 ///
 /// Import all relation data pages from local disk into the repository.
--- a/pageserver/src/layered_repository.rs
+++ b/pageserver/src/layered_repository.rs
@@ -29,11 +29,13 @@ use std::ops::{Bound::Included, Deref, Range};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::{self, AtomicBool};
 use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockReadGuard, TryLockError};
-use std::time::Instant;
+use std::time::{Duration, Instant, SystemTime};

 use self::metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME};
 use crate::config::PageServerConf;
 use crate::keyspace::KeySpace;
+use crate::tenant_config::{TenantConf, TenantConfOpt};
+
 use crate::page_cache;
 use crate::remote_storage::{schedule_timeline_checkpoint_upload, RemoteIndex};
 use crate::repository::{
@@ -46,15 +48,18 @@ use crate::virtual_file::VirtualFile;
 use crate::walreceiver::IS_WAL_RECEIVER;
 use crate::walredo::WalRedoManager;
 use crate::CheckpointConfig;
-use crate::{ZTenantId, ZTimelineId};

-use zenith_metrics::{
-    register_histogram_vec, register_int_counter, register_int_gauge_vec, Histogram, HistogramVec,
-    IntCounter, IntGauge, IntGaugeVec,
+use metrics::{
+    register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge_vec,
+    Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
+};
+use toml_edit;
+use utils::{
+    crashsafe_dir,
+    lsn::{AtomicLsn, Lsn, RecordLsn},
+    seqwait::SeqWait,
+    zid::{ZTenantId, ZTimelineId},
 };
-use zenith_utils::crashsafe_dir;
-use zenith_utils::lsn::{AtomicLsn, Lsn, RecordLsn};
-use zenith_utils::seqwait::SeqWait;

 mod blob_io;
 pub mod block_io;
@@ -101,6 +106,21 @@ lazy_static! {
    .expect("failed to define a metric");
 }

+lazy_static! {
+    static ref MATERIALIZED_PAGE_CACHE_HIT: IntCounterVec = register_int_counter_vec!(
+        "materialize_page_cache_hits",
+        "Number of cache hits from materialized page cache",
+        &["tenant_id", "timeline_id"]
+    )
+    .expect("failed to define a metric");
+    static ref WAIT_LSN_TIME: HistogramVec = register_histogram_vec!(
+        "wait_lsn_time",
+        "Time spent waiting for WAL to arrive",
+        &["tenant_id", "timeline_id"]
+    )
+    .expect("failed to define a metric");
+}
+
 lazy_static! {
    static ref LAST_RECORD_LSN: IntGaugeVec = register_int_gauge_vec!(
        "pageserver_last_record_lsn",
@@ -132,7 +152,15 @@ pub const TIMELINES_SEGMENT_NAME: &str = "timelines";
 /// Repository consists of multiple timelines. Keep them in a hash table.
 ///
 pub struct LayeredRepository {
+    // Global pageserver config parameters
    pub conf: &'static PageServerConf,
+
+    // Overridden tenant-specific config parameters.
+    // We keep TenantConfOpt sturct here to preserve the information
+    // about parameters that are not set.
+    // This is necessary to allow global config updates.
+    tenant_conf: Arc<RwLock<TenantConfOpt>>,
+
    tenantid: ZTenantId,
    timelines: Mutex<HashMap<ZTimelineId, LayeredTimelineEntry>>,
    // This mutex prevents creation of new timelines during GC.
@@ -202,6 +230,7 @@ impl Repository for LayeredRepository {

        let timeline = LayeredTimeline::new(
            self.conf,
+            Arc::clone(&self.tenant_conf),
            metadata,
            None,
            timelineid,
@@ -285,6 +314,7 @@ impl Repository for LayeredRepository {
        &self,
        target_timelineid: Option<ZTimelineId>,
        horizon: u64,
+        pitr: Duration,
        checkpoint_before_gc: bool,
    ) -> Result<GcResult> {
        let timeline_str = target_timelineid
@@ -294,7 +324,7 @@ impl Repository for LayeredRepository {
        STORAGE_TIME
            .with_label_values(&["gc", &self.tenantid.to_string(), &timeline_str])
            .observe_closure_duration(|| {
-                self.gc_iteration_internal(target_timelineid, horizon, checkpoint_before_gc)
+                self.gc_iteration_internal(target_timelineid, horizon, pitr, checkpoint_before_gc)
            })
    }

@@ -387,8 +417,6 @@ impl Repository for LayeredRepository {
            timeline_id, timeline_sync_status_update
        );
        match timeline_sync_status_update {
-            TimelineSyncStatusUpdate::Uploaded => { /* nothing to do, remote consistent lsn is managed by the remote storage */
-            }
            TimelineSyncStatusUpdate::Downloaded => {
                match self.timelines.lock().unwrap().entry(timeline_id) {
                    Entry::Occupied(_) => bail!("We completed a download for a timeline that already exists in repository. This is a bug."),
@@ -465,6 +493,64 @@ impl From<LayeredTimelineEntry> for RepositoryTimeline<LayeredTimeline> {

 /// Private functions
 impl LayeredRepository {
+    pub fn get_checkpoint_distance(&self) -> u64 {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .checkpoint_distance
+            .unwrap_or(self.conf.default_tenant_conf.checkpoint_distance)
+    }
+
+    pub fn get_compaction_target_size(&self) -> u64 {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .compaction_target_size
+            .unwrap_or(self.conf.default_tenant_conf.compaction_target_size)
+    }
+
+    pub fn get_compaction_period(&self) -> Duration {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .compaction_period
+            .unwrap_or(self.conf.default_tenant_conf.compaction_period)
+    }
+
+    pub fn get_compaction_threshold(&self) -> usize {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .compaction_threshold
+            .unwrap_or(self.conf.default_tenant_conf.compaction_threshold)
+    }
+
+    pub fn get_gc_horizon(&self) -> u64 {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .gc_horizon
+            .unwrap_or(self.conf.default_tenant_conf.gc_horizon)
+    }
+
+    pub fn get_gc_period(&self) -> Duration {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .gc_period
+            .unwrap_or(self.conf.default_tenant_conf.gc_period)
+    }
+
+    pub fn get_pitr_interval(&self) -> Duration {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .pitr_interval
+            .unwrap_or(self.conf.default_tenant_conf.pitr_interval)
+    }
+
+    pub fn update_tenant_config(&self, new_tenant_conf: TenantConfOpt) -> Result<()> {
+        let mut tenant_conf = self.tenant_conf.write().unwrap();
+
+        tenant_conf.update(&new_tenant_conf);
+
+        LayeredRepository::persist_tenant_config(self.conf, self.tenantid, *tenant_conf)?;
+        Ok(())
+    }
+
    // Implementation of the public `get_timeline` function.
    // Differences from the public:
    //  * interface in that the caller must already hold the mutex on the 'timelines' hashmap.
@@ -538,8 +624,10 @@ impl LayeredRepository {
            .flatten()
            .map(LayeredTimelineEntry::Loaded);
        let _enter = info_span!("loading local timeline").entered();
+
        let timeline = LayeredTimeline::new(
            self.conf,
+            Arc::clone(&self.tenant_conf),
            metadata,
            ancestor,
            timelineid,
@@ -556,6 +644,7 @@ impl LayeredRepository {

    pub fn new(
        conf: &'static PageServerConf,
+        tenant_conf: TenantConfOpt,
        walredo_mgr: Arc<dyn WalRedoManager + Send + Sync>,
        tenantid: ZTenantId,
        remote_index: RemoteIndex,
@@ -564,6 +653,7 @@ impl LayeredRepository {
        LayeredRepository {
            tenantid,
            conf,
+            tenant_conf: Arc::new(RwLock::new(tenant_conf)),
            timelines: Mutex::new(HashMap::new()),
            gc_cs: Mutex::new(()),
            walredo_mgr,
@@ -572,6 +662,71 @@ impl LayeredRepository {
        }
    }

+    /// Locate and load config
+    pub fn load_tenant_config(
+        conf: &'static PageServerConf,
+        tenantid: ZTenantId,
+    ) -> anyhow::Result<TenantConfOpt> {
+        let target_config_path = TenantConf::path(conf, tenantid);
+
+        info!("load tenantconf from {}", target_config_path.display());
+
+        // FIXME If the config file is not found, assume that we're attaching
+        // a detached tenant and config is passed via attach command.
+        // https://github.com/neondatabase/neon/issues/1555
+        if !target_config_path.exists() {
+            info!(
+                "Zenith tenant config is not found in {}",
+                target_config_path.display()
+            );
+            return Ok(Default::default());
+        }
+
+        // load and parse file
+        let config = fs::read_to_string(target_config_path)?;
+
+        let toml = config.parse::<toml_edit::Document>()?;
+
+        let mut tenant_conf: TenantConfOpt = Default::default();
+        for (key, item) in toml.iter() {
+            match key {
+                "tenant_conf" => {
+                    tenant_conf = PageServerConf::parse_toml_tenant_conf(item)?;
+                }
+                _ => bail!("unrecognized pageserver option '{}'", key),
+            }
+        }
+
+        Ok(tenant_conf)
+    }
+
+    pub fn persist_tenant_config(
+        conf: &'static PageServerConf,
+        tenantid: ZTenantId,
+        tenant_conf: TenantConfOpt,
+    ) -> anyhow::Result<()> {
+        let _enter = info_span!("saving tenantconf").entered();
+        let target_config_path = TenantConf::path(conf, tenantid);
+        info!("save tenantconf to {}", target_config_path.display());
+
+        let mut conf_content = r#"# This file contains a specific per-tenant's config.
+#  It is read in case of pageserver restart.
+
+# [tenant_config]
+"#
+        .to_string();
+
+        // Convert the config to a toml file.
+        conf_content += &toml_edit::easy::to_string(&tenant_conf)?;
+
+        fs::write(&target_config_path, conf_content).with_context(|| {
+            format!(
+                "Failed to write config file into path '{}'",
+                target_config_path.display()
+            )
+        })
+    }
+
    /// Save timeline metadata to file
    fn save_metadata(
        conf: &'static PageServerConf,
@@ -647,10 +802,12 @@ impl LayeredRepository {
        &self,
        target_timelineid: Option<ZTimelineId>,
        horizon: u64,
+        pitr: Duration,
        checkpoint_before_gc: bool,
    ) -> Result<GcResult> {
        let _span_guard =
-            info_span!("gc iteration", tenant = %self.tenantid, timeline = ?target_timelineid);
+            info_span!("gc iteration", tenant = %self.tenantid, timeline = ?target_timelineid)
+                .entered();
        let mut totals: GcResult = Default::default();
        let now = Instant::now();

@@ -722,7 +879,7 @@ impl LayeredRepository {
                    timeline.checkpoint(CheckpointConfig::Forced)?;
                    info!("timeline {} checkpoint_before_gc done", timelineid);
                }
-                timeline.update_gc_info(branchpoints, cutoff);
+                timeline.update_gc_info(branchpoints, cutoff, pitr);
                let result = timeline.gc()?;

                totals += result;
@@ -737,6 +894,7 @@ impl LayeredRepository {

 pub struct LayeredTimeline {
    conf: &'static PageServerConf,
+    tenant_conf: Arc<RwLock<TenantConfOpt>>,

    tenantid: ZTenantId,
    timelineid: ZTimelineId,
@@ -779,10 +937,12 @@ pub struct LayeredTimeline {

    // Metrics
    reconstruct_time_histo: Histogram,
+    materialized_page_cache_hit_counter: IntCounter,
    flush_time_histo: Histogram,
    compact_time_histo: Histogram,
    create_images_time_histo: Histogram,
    last_record_gauge: IntGauge,
+    wait_lsn_time_histo: Histogram,

    /// If `true`, will backup its files that appear after each checkpointing to the remote storage.
    upload_layers: AtomicBool,
@@ -839,6 +999,11 @@ struct GcInfo {
    ///
    /// FIXME: is this inclusive or exclusive?
    cutoff: Lsn,
+
+    /// In addition to 'retain_lsns', keep everything newer than 'SystemTime::now()'
+    /// minus 'pitr_interval'
+    ///
+    pitr: Duration,
 }

 /// Public interface functions
@@ -862,14 +1027,15 @@ impl Timeline for LayeredTimeline {
            "wait_lsn called by WAL receiver thread"
        );

-        self.last_record_lsn
-            .wait_for_timeout(lsn, self.conf.wait_lsn_timeout)
-            .with_context(|| {
-                format!(
-                    "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}",
-                    lsn, self.get_last_record_lsn(), self.get_disk_consistent_lsn()
-                )
-            })?;
+        self.wait_lsn_time_histo.observe_closure_duration(
+            || self.last_record_lsn
+                .wait_for_timeout(lsn, self.conf.wait_lsn_timeout)
+                .with_context(|| {
+                    format!(
+                        "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}",
+                        lsn, self.get_last_record_lsn(), self.get_disk_consistent_lsn()
+                    )
+                }))?;

        Ok(())
    }
@@ -968,12 +1134,34 @@ impl Timeline for LayeredTimeline {
 }

 impl LayeredTimeline {
+    fn get_checkpoint_distance(&self) -> u64 {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .checkpoint_distance
+            .unwrap_or(self.conf.default_tenant_conf.checkpoint_distance)
+    }
+
+    fn get_compaction_target_size(&self) -> u64 {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .compaction_target_size
+            .unwrap_or(self.conf.default_tenant_conf.compaction_target_size)
+    }
+
+    fn get_compaction_threshold(&self) -> usize {
+        let tenant_conf = self.tenant_conf.read().unwrap();
+        tenant_conf
+            .compaction_threshold
+            .unwrap_or(self.conf.default_tenant_conf.compaction_threshold)
+    }
+
    /// Open a Timeline handle.
    ///
    /// Loads the metadata for the timeline into memory, but not the layer map.
    #[allow(clippy::too_many_arguments)]
    fn new(
        conf: &'static PageServerConf,
+        tenant_conf: Arc<RwLock<TenantConfOpt>>,
        metadata: TimelineMetadata,
        ancestor: Option<LayeredTimelineEntry>,
        timelineid: ZTimelineId,
@@ -984,6 +1172,9 @@ impl LayeredTimeline {
        let reconstruct_time_histo = RECONSTRUCT_TIME
            .get_metric_with_label_values(&[&tenantid.to_string(), &timelineid.to_string()])
            .unwrap();
+        let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
+            .get_metric_with_label_values(&[&tenantid.to_string(), &timelineid.to_string()])
+            .unwrap();
        let flush_time_histo = STORAGE_TIME
            .get_metric_with_label_values(&[
                "layer flush",
@@ -1008,9 +1199,13 @@ impl LayeredTimeline {
        let last_record_gauge = LAST_RECORD_LSN
            .get_metric_with_label_values(&[&tenantid.to_string(), &timelineid.to_string()])
            .unwrap();
+        let wait_lsn_time_histo = WAIT_LSN_TIME
+            .get_metric_with_label_values(&[&tenantid.to_string(), &timelineid.to_string()])
+            .unwrap();

        LayeredTimeline {
            conf,
+            tenant_conf,
            timelineid,
            tenantid,
            layers: RwLock::new(LayerMap::default()),
@@ -1030,10 +1225,12 @@ impl LayeredTimeline {
            ancestor_lsn: metadata.ancestor_lsn(),

            reconstruct_time_histo,
+            materialized_page_cache_hit_counter,
            flush_time_histo,
            compact_time_histo,
            create_images_time_histo,
            last_record_gauge,
+            wait_lsn_time_histo,

            upload_layers: AtomicBool::new(upload_layers),

@@ -1044,6 +1241,7 @@ impl LayeredTimeline {
            gc_info: RwLock::new(GcInfo {
                retain_lsns: Vec::new(),
                cutoff: Lsn(0),
+                pitr: Duration::ZERO,
            }),

            latest_gc_cutoff_lsn: RwLock::new(metadata.latest_gc_cutoff_lsn()),
@@ -1150,6 +1348,12 @@ impl LayeredTimeline {

        let mut path: Vec<(ValueReconstructResult, Lsn, Arc<dyn Layer>)> = Vec::new();

+        let cached_lsn = if let Some((cached_lsn, _)) = &reconstruct_state.img {
+            *cached_lsn
+        } else {
+            Lsn(0)
+        };
+
        // 'prev_lsn' tracks the last LSN that we were at in our search. It's used
        // to check that each iteration make some progress, to break infinite
        // looping if something goes wrong.
@@ -1160,10 +1364,15 @@ impl LayeredTimeline {

        'outer: loop {
            // The function should have updated 'state'
-            //info!("CALLED for {} at {}: {:?} with {} records", reconstruct_state.key, reconstruct_state.lsn, result, reconstruct_state.records.len());
+            //info!("CALLED for {} at {}: {:?} with {} records, cached {}", key, cont_lsn, result, reconstruct_state.records.len(), cached_lsn);
            match result {
                ValueReconstructResult::Complete => return Ok(()),
                ValueReconstructResult::Continue => {
+                    // If we reached an earlier cached page image, we're done.
+                    if cont_lsn == cached_lsn + 1 {
+                        self.materialized_page_cache_hit_counter.inc_by(1);
+                        return Ok(());
+                    }
                    if prev_lsn <= cont_lsn {
                        // Didn't make any progress in last iteration. Error out to avoid
                        // getting stuck in the loop.
@@ -1217,12 +1426,15 @@ impl LayeredTimeline {
                let start_lsn = open_layer.get_lsn_range().start;
                if cont_lsn > start_lsn {
                    //info!("CHECKING for {} at {} on open layer {}", key, cont_lsn, open_layer.filename().display());
+                    // Get all the data needed to reconstruct the page version from this layer.
+                    // But if we have an older cached page image, no need to go past that.
+                    let lsn_floor = max(cached_lsn + 1, start_lsn);
                    result = open_layer.get_value_reconstruct_data(
                        key,
-                        open_layer.get_lsn_range().start..cont_lsn,
+                        lsn_floor..cont_lsn,
                        reconstruct_state,
                    )?;
-                    cont_lsn = start_lsn;
+                    cont_lsn = lsn_floor;
                    path.push((result, cont_lsn, open_layer.clone()));
                    continue;
                }
@@ -1231,12 +1443,13 @@ impl LayeredTimeline {
                let start_lsn = frozen_layer.get_lsn_range().start;
                if cont_lsn > start_lsn {
                    //info!("CHECKING for {} at {} on frozen layer {}", key, cont_lsn, frozen_layer.filename().display());
+                    let lsn_floor = max(cached_lsn + 1, start_lsn);
                    result = frozen_layer.get_value_reconstruct_data(
                        key,
-                        frozen_layer.get_lsn_range().start..cont_lsn,
+                        lsn_floor..cont_lsn,
                        reconstruct_state,
                    )?;
-                    cont_lsn = start_lsn;
+                    cont_lsn = lsn_floor;
                    path.push((result, cont_lsn, frozen_layer.clone()));
                    continue 'outer;
                }
@@ -1245,6 +1458,7 @@ impl LayeredTimeline {
            if let Some(SearchResult { lsn_floor, layer }) = layers.search(key, cont_lsn)? {
                //info!("CHECKING for {} at {} on historic layer {}", key, cont_lsn, layer.filename().display());

+                let lsn_floor = max(cached_lsn + 1, lsn_floor);
                result = layer.get_value_reconstruct_data(
                    key,
                    lsn_floor..cont_lsn,
@@ -1252,10 +1466,10 @@ impl LayeredTimeline {
                )?;
                cont_lsn = lsn_floor;
                path.push((result, cont_lsn, layer));
-            } else if self.ancestor_timeline.is_some() {
+            } else if timeline.ancestor_timeline.is_some() {
                // Nothing on this timeline. Traverse to parent
                result = ValueReconstructResult::Continue;
-                cont_lsn = Lsn(self.ancestor_lsn.0 + 1);
+                cont_lsn = Lsn(timeline.ancestor_lsn.0 + 1);
            } else {
                // Nothing found
                result = ValueReconstructResult::Missing;
@@ -1388,7 +1602,7 @@ impl LayeredTimeline {
        let last_lsn = self.get_last_record_lsn();

        let distance = last_lsn.widening_sub(self.last_freeze_at.load());
-        if distance >= self.conf.checkpoint_distance.into() {
+        if distance >= self.get_checkpoint_distance().into() {
            self.freeze_inmem_layer(true);
            self.last_freeze_at.store(last_lsn);
        }
@@ -1548,7 +1762,7 @@ impl LayeredTimeline {
                schedule_timeline_checkpoint_upload(
                    self.tenantid,
                    self.timelineid,
-                    vec![new_delta_path],
+                    new_delta_path,
                    metadata,
                );
            }
@@ -1597,13 +1811,15 @@ impl LayeredTimeline {
        // above. Rewrite it.
        let _compaction_cs = self.compaction_cs.lock().unwrap();

-        let target_file_size = self.conf.checkpoint_distance;
+        let target_file_size = self.get_checkpoint_distance();

        // Define partitioning schema if needed
        if let Ok(pgdir) = tenant_mgr::get_timeline_for_tenant_load(self.tenantid, self.timelineid)
        {
-            let (partitioning, lsn) =
-                pgdir.repartition(self.get_last_record_lsn(), self.conf.compaction_target_size)?;
+            let (partitioning, lsn) = pgdir.repartition(
+                self.get_last_record_lsn(),
+                self.get_compaction_target_size(),
+            )?;
            let timer = self.create_images_time_histo.start_timer();
            // 2. Create new image layers for partitions that have been modified
            // "enough".
@@ -1704,7 +1920,7 @@ impl LayeredTimeline {

        // We compact or "shuffle" the level-0 delta layers when they've
        // accumulated over the compaction threshold.
-        if level0_deltas.len() < self.conf.compaction_threshold {
+        if level0_deltas.len() < self.get_compaction_threshold() {
            return Ok(());
        }
        drop(layers);
@@ -1827,10 +2043,11 @@ impl LayeredTimeline {
    /// the latest LSN subtracted by a constant, and doesn't do anything smart
    /// to figure out what read-only nodes might actually need.)
    ///
-    fn update_gc_info(&self, retain_lsns: Vec<Lsn>, cutoff: Lsn) {
+    fn update_gc_info(&self, retain_lsns: Vec<Lsn>, cutoff: Lsn, pitr: Duration) {
        let mut gc_info = self.gc_info.write().unwrap();
        gc_info.retain_lsns = retain_lsns;
        gc_info.cutoff = cutoff;
+        gc_info.pitr = pitr;
    }

    ///
@@ -1841,7 +2058,7 @@ impl LayeredTimeline {
    /// obsolete.
    ///
    fn gc(&self) -> Result<GcResult> {
-        let now = Instant::now();
+        let now = SystemTime::now();
        let mut result: GcResult = Default::default();
        let disk_consistent_lsn = self.get_disk_consistent_lsn();

@@ -1850,6 +2067,7 @@ impl LayeredTimeline {
        let gc_info = self.gc_info.read().unwrap();
        let retain_lsns = &gc_info.retain_lsns;
        let cutoff = gc_info.cutoff;
+        let pitr = gc_info.pitr;

        let _enter = info_span!("garbage collection", timeline = %self.timelineid, tenant = %self.tenantid, cutoff = %cutoff).entered();

@@ -1867,8 +2085,9 @@ impl LayeredTimeline {
        //
        // Garbage collect the layer if all conditions are satisfied:
        // 1. it is older than cutoff LSN;
-        // 2. it doesn't need to be retained for 'retain_lsns';
-        // 3. newer on-disk image layers cover the layer's whole key range
+        // 2. it is older than PITR interval;
+        // 3. it doesn't need to be retained for 'retain_lsns';
+        // 4. newer on-disk image layers cover the layer's whole key range
        //
        let mut layers = self.layers.write().unwrap();
        'outer: for l in layers.iter_historic_layers() {
@@ -1894,8 +2113,31 @@ impl LayeredTimeline {
                result.layers_needed_by_cutoff += 1;
                continue 'outer;
            }
-
-            // 2. Is it needed by a child branch?
+            // 2. It is newer than PiTR interval?
+            // We use modification time of layer file to estimate update time.
+            // This estimation is not quite precise but maintaining LSN->timestamp map seems to be overkill.
+            // It is not expected that users will need high precision here. And this estimation
+            // is conservative: modification time of file is always newer than actual time of version
+            // creation. So it is safe for users.
+            // TODO A possible "bloat" issue still persists here.
+            // If modification time changes because of layer upload/download, we will keep these files
+            // longer than necessary.
+            // https://github.com/neondatabase/neon/issues/1554
+            //
+            if let Ok(metadata) = fs::metadata(&l.filename()) {
+                let last_modified = metadata.modified()?;
+                if now.duration_since(last_modified)? < pitr {
+                    debug!(
+                        "keeping {} because it's modification time {:?} is newer than PITR {:?}",
+                        l.filename().display(),
+                        last_modified,
+                        pitr
+                    );
+                    result.layers_needed_by_pitr += 1;
+                    continue 'outer;
+                }
+            }
+            // 3. Is it needed by a child branch?
            // NOTE With that wee would keep data that
            // might be referenced by child branches forever.
            // We can track this in child timeline GC and delete parent layers when
@@ -1914,7 +2156,7 @@ impl LayeredTimeline {
                }
            }

-            // 3. Is there a later on-disk layer for this relation?
+            // 4. Is there a later on-disk layer for this relation?
            //
            // The end-LSN is exclusive, while disk_consistent_lsn is
            // inclusive. For example, if disk_consistent_lsn is 100, it is
@@ -1955,7 +2197,7 @@ impl LayeredTimeline {
            result.layers_removed += 1;
        }

-        result.elapsed = now.elapsed();
+        result.elapsed = now.elapsed()?;
        Ok(result)
    }

@@ -2232,7 +2474,8 @@ pub mod tests {
            }

            let cutoff = tline.get_last_record_lsn();
-            tline.update_gc_info(Vec::new(), cutoff);
+
+            tline.update_gc_info(Vec::new(), cutoff, Duration::ZERO);
            tline.checkpoint(CheckpointConfig::Forced)?;
            tline.compact()?;
            tline.gc()?;
@@ -2302,7 +2545,7 @@ pub mod tests {
            // Perform a cycle of checkpoint, compaction, and GC
            println!("checkpointing {}", lsn);
            let cutoff = tline.get_last_record_lsn();
-            tline.update_gc_info(Vec::new(), cutoff);
+            tline.update_gc_info(Vec::new(), cutoff, Duration::ZERO);
            tline.checkpoint(CheckpointConfig::Forced)?;
            tline.compact()?;
            tline.gc()?;
@@ -2379,7 +2622,7 @@ pub mod tests {
            // Perform a cycle of checkpoint, compaction, and GC
            println!("checkpointing {}", lsn);
            let cutoff = tline.get_last_record_lsn();
-            tline.update_gc_info(Vec::new(), cutoff);
+            tline.update_gc_info(Vec::new(), cutoff, Duration::ZERO);
            tline.checkpoint(CheckpointConfig::Forced)?;
            tline.compact()?;
            tline.gc()?;
@@ -2387,4 +2630,61 @@ pub mod tests {

        Ok(())
    }
+
+    #[test]
+    fn test_traverse_ancestors() -> Result<()> {
+        let repo = RepoHarness::create("test_traverse_ancestors")?.load();
+        let mut tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
+
+        const NUM_KEYS: usize = 100;
+        const NUM_TLINES: usize = 50;
+
+        let mut test_key = Key::from_hex("012222222233333333444444445500000000").unwrap();
+        // Track page mutation lsns across different timelines.
+        let mut updated = [[Lsn(0); NUM_KEYS]; NUM_TLINES];
+
+        let mut lsn = Lsn(0);
+        let mut tline_id = TIMELINE_ID;
+
+        #[allow(clippy::needless_range_loop)]
+        for idx in 0..NUM_TLINES {
+            let new_tline_id = ZTimelineId::generate();
+            repo.branch_timeline(tline_id, new_tline_id, lsn)?;
+            tline = repo.get_timeline_load(new_tline_id)?;
+            tline_id = new_tline_id;
+
+            for _ in 0..NUM_KEYS {
+                lsn = Lsn(lsn.0 + 0x10);
+                let blknum = thread_rng().gen_range(0..NUM_KEYS);
+                test_key.field6 = blknum as u32;
+                let writer = tline.writer();
+                writer.put(
+                    test_key,
+                    lsn,
+                    Value::Image(TEST_IMG(&format!("{} {} at {}", idx, blknum, lsn))),
+                )?;
+                println!("updating [{}][{}] at {}", idx, blknum, lsn);
+                writer.finish_write(lsn);
+                drop(writer);
+                updated[idx][blknum] = lsn;
+            }
+        }
+
+        // Read pages from leaf timeline across all ancestors.
+        for (idx, lsns) in updated.iter().enumerate() {
+            for (blknum, lsn) in lsns.iter().enumerate() {
+                // Skip empty mutations.
+                if lsn.0 == 0 {
+                    continue;
+                }
+                println!("chekcking [{}][{}] at {}", idx, blknum, lsn);
+                test_key.field6 = blknum as u32;
+                assert_eq!(
+                    tline.get(test_key, *lsn)?,
+                    TEST_IMG(&format!("{} {} at {}", idx, blknum, lsn))
+                );
+            }
+        }
+        Ok(())
+    }
 }
--- a/pageserver/src/layered_repository/blob_io.rs
+++ b/pageserver/src/layered_repository/blob_io.rs
@@ -1,12 +1,20 @@
 //!
 //! Functions for reading and writing variable-sized "blobs".
 //!
-//! Each blob begins with a 4-byte length, followed by the actual data.
+//! Each blob begins with a 1- or 4-byte length field, followed by the
+//! actual data. If the length is smaller than 128 bytes, the length
+//! is written as a one byte. If it's larger than that, the length
+//! is written as a four-byte integer, in big-endian, with the high
+//! bit set. This way, we can detect whether it's 1- or 4-byte header
+//! by peeking at the first byte.
+//!
+//! len <  128: 0XXXXXXX
+//! len >= 128: 1XXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
 //!
 use crate::layered_repository::block_io::{BlockCursor, BlockReader};
 use crate::page_cache::PAGE_SZ;
 use std::cmp::min;
-use std::io::Error;
+use std::io::{Error, ErrorKind};

 /// For reading
 pub trait BlobCursor {
@@ -40,21 +48,30 @@ where

        let mut buf = self.read_blk(blknum)?;

-        // read length
-        let mut len_buf = [0u8; 4];
-        let thislen = PAGE_SZ - off;
-        if thislen < 4 {
-            // it is split across two pages
-            len_buf[..thislen].copy_from_slice(&buf[off..PAGE_SZ]);
-            blknum += 1;
-            buf = self.read_blk(blknum)?;
-            len_buf[thislen..].copy_from_slice(&buf[0..4 - thislen]);
-            off = 4 - thislen;
+        // peek at the first byte, to determine if it's a 1- or 4-byte length
+        let first_len_byte = buf[off];
+        let len: usize = if first_len_byte < 0x80 {
+            // 1-byte length header
+            off += 1;
+            first_len_byte as usize
        } else {
-            len_buf.copy_from_slice(&buf[off..off + 4]);
-            off += 4;
-        }
-        let len = u32::from_ne_bytes(len_buf) as usize;
+            // 4-byte length header
+            let mut len_buf = [0u8; 4];
+            let thislen = PAGE_SZ - off;
+            if thislen < 4 {
+                // it is split across two pages
+                len_buf[..thislen].copy_from_slice(&buf[off..PAGE_SZ]);
+                blknum += 1;
+                buf = self.read_blk(blknum)?;
+                len_buf[thislen..].copy_from_slice(&buf[0..4 - thislen]);
+                off = 4 - thislen;
+            } else {
+                len_buf.copy_from_slice(&buf[off..off + 4]);
+                off += 4;
+            }
+            len_buf[0] &= 0x7f;
+            u32::from_be_bytes(len_buf) as usize
+        };

        dstbuf.clear();

@@ -130,10 +147,27 @@ where
 {
    fn write_blob(&mut self, srcbuf: &[u8]) -> Result<u64, Error> {
        let offset = self.offset;
-        self.inner
-            .write_all(&((srcbuf.len()) as u32).to_ne_bytes())?;
+
+        if srcbuf.len() < 128 {
+            // Short blob. Write a 1-byte length header
+            let len_buf = srcbuf.len() as u8;
+            self.inner.write_all(&[len_buf])?;
+            self.offset += 1;
+        } else {
+            // Write a 4-byte length header
+            if srcbuf.len() > 0x7fff_ffff {
+                return Err(Error::new(
+                    ErrorKind::Other,
+                    format!("blob too large ({} bytes)", srcbuf.len()),
+                ));
+            }
+            let mut len_buf = ((srcbuf.len()) as u32).to_be_bytes();
+            len_buf[0] |= 0x80;
+            self.inner.write_all(&len_buf)?;
+            self.offset += 4;
+        }
        self.inner.write_all(srcbuf)?;
-        self.offset += 4 + srcbuf.len() as u64;
+        self.offset += srcbuf.len() as u64;
        Ok(offset)
    }
 }
--- a/pageserver/src/layered_repository/delta_layer.rs
+++ b/pageserver/src/layered_repository/delta_layer.rs
@@ -35,11 +35,10 @@ use crate::page_cache::{PageReadGuard, PAGE_SZ};
 use crate::repository::{Key, Value, KEY_SIZE};
 use crate::virtual_file::VirtualFile;
 use crate::walrecord;
-use crate::{ZTenantId, ZTimelineId};
 use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
 use anyhow::{bail, ensure, Context, Result};
-use log::*;
 use serde::{Deserialize, Serialize};
+use tracing::*;
 // avoid binding to Write (conflicts with std::io::Write)
 // while being able to use std::fmt::Write's methods
 use std::fmt::Write as _;
@@ -51,8 +50,11 @@ use std::os::unix::fs::FileExt;
 use std::path::{Path, PathBuf};
 use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};

-use zenith_utils::bin_ser::BeSer;
-use zenith_utils::lsn::Lsn;
+use utils::{
+    bin_ser::BeSer,
+    lsn::Lsn,
+    zid::{ZTenantId, ZTimelineId},
+};

 ///
 /// Header stored in the beginning of the file
@@ -222,6 +224,7 @@ impl Layer for DeltaLayer {
        lsn_range: Range<Lsn>,
        reconstruct_state: &mut ValueReconstructState,
    ) -> anyhow::Result<ValueReconstructResult> {
+        ensure!(lsn_range.start >= self.lsn_range.start);
        let mut need_image = true;

        ensure!(self.key_range.contains(&key));
@@ -287,7 +290,10 @@ impl Layer for DeltaLayer {
    }

    fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = anyhow::Result<(Key, Lsn, Value)>> + 'a> {
-        let inner = self.load().unwrap();
+        let inner = match self.load() {
+            Ok(inner) => inner,
+            Err(e) => panic!("Failed to load a delta layer: {e:?}"),
+        };

        match DeltaValueIter::new(inner) {
            Ok(iter) => Box::new(iter),
@@ -419,7 +425,9 @@ impl DeltaLayer {
            drop(inner);
            let inner = self.inner.write().unwrap();
            if !inner.loaded {
-                self.load_inner(inner)?;
+                self.load_inner(inner).with_context(|| {
+                    format!("Failed to load delta layer {}", self.path().display())
+                })?;
            } else {
                // Another thread loaded it while we were not holding the lock.
            }
--- a/pageserver/src/layered_repository/ephemeral_file.rs
+++ b/pageserver/src/layered_repository/ephemeral_file.rs
@@ -16,8 +16,8 @@ use std::io::{Error, ErrorKind};
 use std::ops::DerefMut;
 use std::path::PathBuf;
 use std::sync::{Arc, RwLock};
-use zenith_utils::zid::ZTenantId;
-use zenith_utils::zid::ZTimelineId;
+use tracing::*;
+use utils::zid::{ZTenantId, ZTimelineId};

 use std::os::unix::fs::FileExt;

@@ -199,18 +199,24 @@ impl BlobWriter for EphemeralFile {
        let mut buf = self.get_buf_for_write(blknum)?;

        // Write the length field
-        let len_buf = u32::to_ne_bytes(srcbuf.len() as u32);
-        let thislen = PAGE_SZ - off;
-        if thislen < 4 {
-            // it needs to be split across pages
-            buf[off..(off + thislen)].copy_from_slice(&len_buf[..thislen]);
-            blknum += 1;
-            buf = self.get_buf_for_write(blknum)?;
-            buf[0..4 - thislen].copy_from_slice(&len_buf[thislen..]);
-            off = 4 - thislen;
+        if srcbuf.len() < 0x80 {
+            buf[off] = srcbuf.len() as u8;
+            off += 1;
        } else {
-            buf[off..off + 4].copy_from_slice(&len_buf);
-            off += 4;
+            let mut len_buf = u32::to_be_bytes(srcbuf.len() as u32);
+            len_buf[0] |= 0x80;
+            let thislen = PAGE_SZ - off;
+            if thislen < 4 {
+                // it needs to be split across pages
+                buf[off..(off + thislen)].copy_from_slice(&len_buf[..thislen]);
+                blknum += 1;
+                buf = self.get_buf_for_write(blknum)?;
+                buf[0..4 - thislen].copy_from_slice(&len_buf[thislen..]);
+                off = 4 - thislen;
+            } else {
+                buf[off..off + 4].copy_from_slice(&len_buf);
+                off += 4;
+            }
        }

        // Write the payload
@@ -229,7 +235,13 @@ impl BlobWriter for EphemeralFile {
            buf_remain = &buf_remain[this_blk_len..];
        }
        drop(buf);
-        self.size += 4 + srcbuf.len() as u64;
+
+        if srcbuf.len() < 0x80 {
+            self.size += 1;
+        } else {
+            self.size += 4;
+        }
+        self.size += srcbuf.len() as u64;

        Ok(pos)
    }
@@ -244,16 +256,31 @@ impl Drop for EphemeralFile {
        // remove entry from the hash map
        EPHEMERAL_FILES.write().unwrap().files.remove(&self.file_id);

-        // unlink file
-        // FIXME: print error
-        let _ = std::fs::remove_file(&self.file.path);
+        // unlink the file
+        let res = std::fs::remove_file(&self.file.path);
+        if let Err(e) = res {
+            warn!(
+                "could not remove ephemeral file '{}': {}",
+                self.file.path.display(),
+                e
+            );
+        }
    }
 }

 pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), std::io::Error> {
    if let Some(file) = EPHEMERAL_FILES.read().unwrap().files.get(&file_id) {
-        file.write_all_at(buf, blkno as u64 * PAGE_SZ as u64)?;
-        Ok(())
+        match file.write_all_at(buf, blkno as u64 * PAGE_SZ as u64) {
+            Ok(_) => Ok(()),
+            Err(e) => Err(std::io::Error::new(
+                ErrorKind::Other,
+                format!(
+                    "failed to write back to ephemeral file at {} error: {}",
+                    file.path.display(),
+                    e
+                ),
+            )),
+        }
    } else {
        Err(std::io::Error::new(
            ErrorKind::Other,
@@ -372,6 +399,12 @@ mod tests {
            let pos = file.write_blob(&data)?;
            blobs.push((pos, data));
        }
+        // also test with a large blobs
+        for i in 0..100 {
+            let data = format!("blob{}", i).as_bytes().repeat(100);
+            let pos = file.write_blob(&data)?;
+            blobs.push((pos, data));
+        }

        let mut cursor = BlockCursor::new(&file);
        for (pos, expected) in blobs {
--- a/pageserver/src/layered_repository/filename.rs
+++ b/pageserver/src/layered_repository/filename.rs
@@ -8,7 +8,7 @@ use std::fmt;
 use std::ops::Range;
 use std::path::PathBuf;

-use zenith_utils::lsn::Lsn;
+use utils::lsn::Lsn;

 // Note: LayeredTimeline::load_layer_map() relies on this sort order
 #[derive(Debug, PartialEq, Eq, Clone)]
--- a/pageserver/src/layered_repository/image_layer.rs
+++ b/pageserver/src/layered_repository/image_layer.rs
@@ -30,12 +30,10 @@ use crate::layered_repository::storage_layer::{
 use crate::page_cache::PAGE_SZ;
 use crate::repository::{Key, Value, KEY_SIZE};
 use crate::virtual_file::VirtualFile;
-use crate::{ZTenantId, ZTimelineId};
 use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION};
 use anyhow::{bail, ensure, Context, Result};
 use bytes::Bytes;
 use hex;
-use log::*;
 use serde::{Deserialize, Serialize};
 use std::fs;
 use std::io::Write;
@@ -43,9 +41,13 @@ use std::io::{Seek, SeekFrom};
 use std::ops::Range;
 use std::path::{Path, PathBuf};
 use std::sync::{RwLock, RwLockReadGuard};
+use tracing::*;

-use zenith_utils::bin_ser::BeSer;
-use zenith_utils::lsn::Lsn;
+use utils::{
+    bin_ser::BeSer,
+    lsn::Lsn,
+    zid::{ZTenantId, ZTimelineId},
+};

 ///
 /// Header stored in the beginning of the file
@@ -148,6 +150,7 @@ impl Layer for ImageLayer {
        reconstruct_state: &mut ValueReconstructState,
    ) -> anyhow::Result<ValueReconstructResult> {
        assert!(self.key_range.contains(&key));
+        assert!(lsn_range.start >= self.lsn);
        assert!(lsn_range.end >= self.lsn);

        let inner = self.load()?;
@@ -251,7 +254,9 @@ impl ImageLayer {
            drop(inner);
            let mut inner = self.inner.write().unwrap();
            if !inner.loaded {
-                self.load_inner(&mut inner)?;
+                self.load_inner(&mut inner).with_context(|| {
+                    format!("Failed to load image layer {}", self.path().display())
+                })?
            } else {
                // Another thread loaded it while we were not holding the lock.
            }
--- a/pageserver/src/layered_repository/inmemory_layer.rs
+++ b/pageserver/src/layered_repository/inmemory_layer.rs
@@ -14,19 +14,21 @@ use crate::layered_repository::storage_layer::{
 };
 use crate::repository::{Key, Value};
 use crate::walrecord;
-use crate::{ZTenantId, ZTimelineId};
 use anyhow::{bail, ensure, Result};
-use log::*;
 use std::collections::HashMap;
+use tracing::*;
+use utils::{
+    bin_ser::BeSer,
+    lsn::Lsn,
+    vec_map::VecMap,
+    zid::{ZTenantId, ZTimelineId},
+};
 // avoid binding to Write (conflicts with std::io::Write)
 // while being able to use std::fmt::Write's methods
 use std::fmt::Write as _;
 use std::ops::Range;
 use std::path::PathBuf;
 use std::sync::RwLock;
-use zenith_utils::bin_ser::BeSer;
-use zenith_utils::lsn::Lsn;
-use zenith_utils::vec_map::VecMap;

 pub struct InMemoryLayer {
    conf: &'static PageServerConf,
@@ -113,7 +115,7 @@ impl Layer for InMemoryLayer {
        lsn_range: Range<Lsn>,
        reconstruct_state: &mut ValueReconstructState,
    ) -> anyhow::Result<ValueReconstructResult> {
-        ensure!(lsn_range.start <= self.start_lsn);
+        ensure!(lsn_range.start >= self.start_lsn);
        let mut need_image = true;

        let inner = self.inner.read().unwrap();
@@ -124,13 +126,6 @@ impl Layer for InMemoryLayer {
        if let Some(vec_map) = inner.index.get(&key) {
            let slice = vec_map.slice_range(lsn_range);
            for (entry_lsn, pos) in slice.iter().rev() {
-                match &reconstruct_state.img {
-                    Some((cached_lsn, _)) if entry_lsn <= cached_lsn => {
-                        return Ok(ValueReconstructResult::Complete)
-                    }
-                    _ => {}
-                }
-
                let buf = reader.read_blob(*pos)?;
                let value = Value::des(&buf)?;
                match value {
--- a/Show More
+++ b/Show More