Add debug information to hunt down port collisions.

We've been seeing a lot of sporadic test failures with "Cannot assign requested address" lately. Add some debug information to help us find the cause: - When server startup fails, print "netstat -tnlap" output to the test log. If the failure was caused by "Cannot assign requested address", this will hopefully tell us which process was occupying the port. - In pageserver and safekeeper startup, print its PID. This way, we can correlate the PID from netstat output with the test that launched it. - In safekeeper startup, print the HTTP port it's using to the log, in addition to the libpq port. The pageserver was already doing it.
2026-01-26 14:50:36 +00:00 · 2022-11-30 14:36:19 +02:00
108 changed files with 7905 additions and 37229 deletions
--- a/.github/actions/neon-branch-create/action.yml
+++ b/.github/actions/neon-branch-create/action.yml
@@ -5,12 +5,12 @@ inputs:
  api_key:
    desctiption: 'Neon API key'
    required: true
+  environment:
+    desctiption: 'dev (aka captest) or staging'
+    required: true
  project_id:
    desctiption: 'ID of the Project to create Branch in'
    required: true
-  api_host:
-    desctiption: 'Neon API host'
-    default: console.stage.neon.tech
 outputs:
  dsn:
    description: 'Created Branch DSN (for main database)'
@@ -22,6 +22,27 @@ outputs:
 runs:
  using: "composite"
  steps:
+    - name: Parse Input
+      id: parse-input
+      shell: bash -euxo pipefail {0}
+      run: |
+        case "${ENVIRONMENT}" in
+          dev)
+            API_HOST=console.dev.neon.tech
+            ;;
+          staging)
+            API_HOST=console.stage.neon.tech
+            ;;
+          *)
+            echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
+            exit 1
+            ;;
+        esac
+
+        echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
+      env:
+        ENVIRONMENT: ${{ inputs.environment }}
+
    - name: Create New Branch
      id: create-branch
      shell: bash -euxo pipefail {0}
@@ -35,12 +56,7 @@ runs:
            --data "{
              \"branch\": {
                \"name\": \"Created by actions/neon-branch-create; GITHUB_RUN_ID=${GITHUB_RUN_ID} at $(date +%s)\"
-              },
-              \"endpoints\": [
-                {
-                  \"type\": \"read_write\"
-                }
-              ]
+              }
            }")

          if [ -z "${branch}" ]; then
@@ -68,8 +84,8 @@ runs:
        host=$(echo $branch | jq --raw-output '.endpoints[0].host')
        echo "host=${host}" >> $GITHUB_OUTPUT
      env:
-        API_HOST: ${{ inputs.api_host }}
        API_KEY: ${{ inputs.api_key }}
+        API_HOST: ${{ steps.parse-input.outputs.api_host }}
        PROJECT_ID: ${{ inputs.project_id }}

    - name: Get Role name
@@ -87,8 +103,8 @@ runs:
        role_name=$(echo $roles | jq --raw-output '.roles[] | select(.protected == false) | .name')
        echo "role_name=${role_name}" >> $GITHUB_OUTPUT
      env:
-        API_HOST: ${{ inputs.api_host }}
        API_KEY: ${{ inputs.api_key }}
+        API_HOST: ${{ steps.parse-input.outputs.api_host }}
        PROJECT_ID: ${{ inputs.project_id }}
        BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}

@@ -130,8 +146,8 @@ runs:
        echo "::add-mask::${dsn}"
        echo "dsn=${dsn}" >> $GITHUB_OUTPUT
      env:
-        API_HOST: ${{ inputs.api_host }}
        API_KEY: ${{ inputs.api_key }}
+        API_HOST: ${{ steps.parse-input.outputs.api_host }}
        PROJECT_ID: ${{ inputs.project_id }}
        BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}
        ROLE_NAME: ${{ steps.role-name.outputs.role_name }}
--- a/.github/actions/neon-branch-delete/action.yml
+++ b/.github/actions/neon-branch-delete/action.yml
@@ -5,19 +5,40 @@ inputs:
  api_key:
    desctiption: 'Neon API key'
    required: true
+  environment:
+    desctiption: 'dev (aka captest) or staging'
+    required: true
  project_id:
    desctiption: 'ID of the Project which should be deleted'
    required: true
  branch_id:
    desctiption: 'ID of the branch to delete'
    required: true
-  api_host:
-    desctiption: 'Neon API host'
-    default: console.stage.neon.tech

 runs:
  using: "composite"
  steps:
+    - name: Parse Input
+      id: parse-input
+      shell: bash -euxo pipefail {0}
+      run: |
+        case "${ENVIRONMENT}" in
+          dev)
+            API_HOST=console.dev.neon.tech
+            ;;
+          staging)
+            API_HOST=console.stage.neon.tech
+            ;;
+          *)
+            echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
+            exit 1
+            ;;
+        esac
+
+        echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
+      env:
+        ENVIRONMENT: ${{ inputs.environment }}
+
    - name: Delete Branch
      # Do not try to delete a branch if .github/actions/neon-project-create
      # or .github/actions/neon-branch-create failed before
@@ -52,7 +73,7 @@ runs:
          exit 1
        fi
      env:
-        API_HOST: ${{ inputs.api_host }}
        API_KEY: ${{ inputs.api_key }}
        PROJECT_ID: ${{ inputs.project_id }}
        BRANCH_ID: ${{ inputs.branch_id }}
+        API_HOST: ${{ steps.parse-input.outputs.api_host }}
--- a/.github/actions/neon-project-create/action.yml
+++ b/.github/actions/neon-project-create/action.yml
@@ -5,16 +5,12 @@ inputs:
  api_key:
    desctiption: 'Neon API key'
    required: true
+  environment:
+    desctiption: 'dev (aka captest) or staging'
+    required: true
  region_id:
    desctiption: 'Region ID, if not set the project will be created in the default region'
-    default: aws-us-east-2
-  postgres_version:
-    desctiption: 'Postgres version; default is 15'
-    default: 15
-  api_host:
-    desctiption: 'Neon API host'
-    default: console.stage.neon.tech
-
+    required: false
 outputs:
  dsn:
    description: 'Created Project DSN (for main database)'
@@ -26,6 +22,31 @@ outputs:
 runs:
  using: "composite"
  steps:
+    - name: Parse Input
+      id: parse-input
+      shell: bash -euxo pipefail {0}
+      run: |
+        case "${ENVIRONMENT}" in
+          dev)
+            API_HOST=console.dev.neon.tech
+            REGION_ID=${REGION_ID:-aws-eu-west-1}
+            ;;
+          staging)
+            API_HOST=console.stage.neon.tech
+            REGION_ID=${REGION_ID:-aws-us-east-2}
+            ;;
+          *)
+            echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
+            exit 1
+            ;;
+        esac
+
+        echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
+        echo "region_id=${REGION_ID}" >> $GITHUB_OUTPUT
+      env:
+        ENVIRONMENT: ${{ inputs.environment }}
+        REGION_ID: ${{ inputs.region_id }}
+
    - name: Create Neon Project
      id: create-neon-project
      # A shell without `set -x` to not to expose password/dsn in logs
@@ -40,7 +61,6 @@ runs:
          --data "{
            \"project\": {
              \"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
-              \"pg_version\": ${POSTGRES_VERSION},
              \"region_id\": \"${REGION_ID}\",
              \"settings\": { }
            }
@@ -56,7 +76,6 @@ runs:
        project_id=$(echo $project | jq --raw-output '.project.id')
        echo "project_id=${project_id}" >> $GITHUB_OUTPUT
      env:
-        API_HOST: ${{ inputs.api_host }}
        API_KEY: ${{ inputs.api_key }}
-        REGION_ID: ${{ inputs.region_id }}
-        POSTGRES_VERSION: ${{ inputs.postgres_version }}
+        API_HOST: ${{ steps.parse-input.outputs.api_host }}
+        REGION_ID: ${{ steps.parse-input.outputs.region_id }}
--- a/.github/actions/neon-project-delete/action.yml
+++ b/.github/actions/neon-project-delete/action.yml
@@ -5,16 +5,37 @@ inputs:
  api_key:
    desctiption: 'Neon API key'
    required: true
+  environment:
+    desctiption: 'dev (aka captest) or staging'
+    required: true
  project_id:
    desctiption: 'ID of the Project to delete'
    required: true
-  api_host:
-    desctiption: 'Neon API host'
-    default: console.stage.neon.tech

 runs:
  using: "composite"
  steps:
+    - name: Parse Input
+      id: parse-input
+      shell: bash -euxo pipefail {0}
+      run: |
+        case "${ENVIRONMENT}" in
+          dev)
+            API_HOST=console.dev.neon.tech
+            ;;
+          staging)
+            API_HOST=console.stage.neon.tech
+            ;;
+          *)
+            echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
+            exit 1
+            ;;
+        esac
+
+        echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
+      env:
+        ENVIRONMENT: ${{ inputs.environment }}
+
    - name: Delete Neon Project
      # Do not try to delete a project if .github/actions/neon-project-create failed before
      if: ${{ inputs.project_id != '' }}
@@ -28,6 +49,6 @@ runs:
          --header "Content-Type: application/json" \
          --header "Authorization: Bearer ${API_KEY}"
      env:
-        API_HOST: ${{ inputs.api_host }}
        API_KEY: ${{ inputs.api_key }}
        PROJECT_ID: ${{ inputs.project_id }}
+        API_HOST: ${{ steps.parse-input.outputs.api_host }}
--- a/.github/ansible/scripts/init_pageserver.sh
+++ b/.github/ansible/scripts/init_pageserver.sh
@@ -1,8 +1,7 @@
 #!/bin/sh

-# fetch params from meta-data service
+# get instance id from meta-data service
 INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
-AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)

 # store fqdn hostname in var
 HOST=$(hostname -f)
@@ -17,8 +16,7 @@ cat <<EOF | tee /tmp/payload
  "instance_id": "${INSTANCE_ID}",
  "http_host": "${HOST}",
  "http_port": 9898,
-  "active": false,
-  "availability_zone_id": "${AZ_ID}"
+  "active": false
 }
 EOF

--- a/.github/ansible/systemd/pageserver.service
+++ b/.github/ansible/systemd/pageserver.service
@@ -5,7 +5,7 @@ After=network.target auditd.service
 [Service]
 Type=simple
 User=pageserver
-Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_PAGESERVER }}
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib
 ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
--- a/.github/ansible/systemd/safekeeper.service
+++ b/.github/ansible/systemd/safekeeper.service
@@ -5,7 +5,7 @@ After=network.target auditd.service
 [Service]
 Type=simple
 User=safekeeper
-Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_SAFEKEEPER }}
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib
 ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}'
 ExecReload=/bin/kill -HUP $MAINPID
 KillMode=mixed
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-storage-broker.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-storage-broker.yaml
@@ -1,53 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: staging
-  neon_service: storage-broker
-
-ingress:
-  enabled: true
-  annotations:
-    kubernetes.io/ingress.class: nginx-internal
-    nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
-    nginx.ingress.kubernetes.io/ssl-redirect: "true"
-    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
-    cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
-
-  hosts:
-    - host: storage-broker.zeta.eu-west-1.internal.aws.neon.build
-      paths:
-        - path: /
-          pathType: Prefix
-  tls:
-    - hosts:
-        - storage-broker.zeta.eu-west-1.internal.aws.neon.build
-      secretName: storage-broker-tls
-
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
--- a/.github/helm-values/dev-us-east-2-beta.neon-storage-broker.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-storage-broker.yaml
@@ -1,53 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: staging
-  neon_service: storage-broker
-
-ingress:
-  enabled: true
-  annotations:
-    kubernetes.io/ingress.class: nginx-internal
-    nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
-    nginx.ingress.kubernetes.io/ssl-redirect: "true"
-    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
-    cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
-
-  hosts:
-    - host: storage-broker.beta.us-east-2.internal.aws.neon.build
-      paths:
-        - path: /
-          pathType: Prefix
-  tls:
-    - hosts:
-        - storage-broker.beta.us-east-2.internal.aws.neon.build
-      secretName: storage-broker-tls
-
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
--- a/.github/helm-values/neon-stress.neon-storage-broker.yaml
+++ b/.github/helm-values/neon-stress.neon-storage-broker.yaml
@@ -1,54 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: neon-stress
-  neon_service: storage-broker
-
-ingress:
-  enabled: true
-  annotations:
-    kubernetes.io/ingress.class: alb
-    alb.ingress.kubernetes.io/healthcheck-path: /status
-    alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
-    alb.ingress.kubernetes.io/scheme: "internal"
-    alb.ingress.kubernetes.io/target-type: "ip"
-    alb.ingress.kubernetes.io/ssl-redirect: "443"
-    alb.ingress.kubernetes.io/backend-protocol-version: "GRPC"
-
-  hosts:
-    - host: storage-broker-stress.stage.neon.tech
-      paths:
-        - path: /
-          pathType: Prefix
-
-metrics:
-  enabled: true
-  serviceMonitor:
-    enabled: true
-    selector:
-      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-storage-broker.yaml
@@ -1,53 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-ingress:
-  enabled: true
-  annotations:
-    kubernetes.io/ingress.class: nginx-internal
-    nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
-    nginx.ingress.kubernetes.io/ssl-redirect: "true"
-    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
-    cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
-
-  hosts:
-    - host: storage-broker.epsilon.ap-southeast-1.internal.aws.neon.tech
-      paths:
-        - path: /
-          pathType: Prefix
-  tls:
-    - hosts:
-        - storage-broker.epsilon.ap-southeast-1.internal.aws.neon.tech
-      secretName: storage-broker-tls
-
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-storage-broker.yaml
@@ -1,53 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-ingress:
-  enabled: true
-  annotations:
-    kubernetes.io/ingress.class: nginx-internal
-    nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
-    nginx.ingress.kubernetes.io/ssl-redirect: "true"
-    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
-    cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
-
-  hosts:
-    - host: storage-broker.gamma.eu-central-1.internal.aws.neon.tech
-      paths:
-        - path: /
-          pathType: Prefix
-  tls:
-    - hosts:
-        - storage-broker.gamma.eu-central-1.internal.aws.neon.tech
-      secretName: storage-broker-tls
-
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
--- a/.github/helm-values/prod-us-east-2-delta.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-storage-broker.yaml
@@ -1,53 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-ingress:
-  enabled: true
-  annotations:
-    kubernetes.io/ingress.class: nginx-internal
-    nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
-    nginx.ingress.kubernetes.io/ssl-redirect: "true"
-    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
-    cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
-
-  hosts:
-    - host: storage-broker.delta.us-east-2.internal.aws.neon.tech
-      paths:
-        - path: /
-          pathType: Prefix
-  tls:
-    - hosts:
-        - storage-broker.delta.us-east-2.internal.aws.neon.tech
-      secretName: storage-broker-tls
-
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
@@ -1,31 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://console-release.local/management/api/v2"
-  domain: "*.us-west-2.aws.neon.tech"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  zenith_service: proxy-scram
-  zenith_env: prod
-  zenith_region: us-west-2
-  zenith_region_slug: us-west-2
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: us-west-2.aws.neon.tech
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
--- a/.github/helm-values/prod-us-west-2-eta.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-storage-broker.yaml
@@ -1,53 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-ingress:
-  enabled: true
-  annotations:
-    kubernetes.io/ingress.class: nginx-internal
-    nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
-    nginx.ingress.kubernetes.io/ssl-redirect: "true"
-    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
-    cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
-
-  hosts:
-    - host: storage-broker.eta.us-west-2.internal.aws.neon.tech
-      paths:
-        - path: /
-          pathType: Prefix
-  tls:
-    - hosts:
-        - storage-broker.eta.us-west-2.internal.aws.neon.tech
-      secretName: storage-broker-tls
-
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
--- a/.github/helm-values/production.neon-storage-broker.yaml
+++ b/.github/helm-values/production.neon-storage-broker.yaml
@@ -1,54 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-ingress:
-  enabled: true
-  annotations:
-    kubernetes.io/ingress.class: alb
-    alb.ingress.kubernetes.io/healthcheck-path: /status
-    alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
-    alb.ingress.kubernetes.io/scheme: "internal"
-    alb.ingress.kubernetes.io/target-type: "ip"
-    alb.ingress.kubernetes.io/ssl-redirect: "443"
-    alb.ingress.kubernetes.io/backend-protocol-version: "GRPC"
-
-  hosts:
-    - host: storage-broker.neon.tech
-      paths:
-        - path: /
-          pathType: Prefix
-
-metrics:
-  enabled: true
-  serviceMonitor:
-    enabled: true
-    selector:
-      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
--- a/.github/helm-values/staging.neon-storage-broker.yaml
+++ b/.github/helm-values/staging.neon-storage-broker.yaml
@@ -1,54 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: staging
-  neon_service: storage-broker
-
-ingress:
-  enabled: true
-  annotations:
-    kubernetes.io/ingress.class: alb
-    alb.ingress.kubernetes.io/healthcheck-path: /status
-    alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
-    alb.ingress.kubernetes.io/scheme: "internal"
-    alb.ingress.kubernetes.io/target-type: "ip"
-    alb.ingress.kubernetes.io/ssl-redirect: "443"
-    alb.ingress.kubernetes.io/backend-protocol-version: "GRPC"
-
-  hosts:
-    - host: storage-broker.stage.neon.tech
-      paths:
-        - path: /
-          pathType: Prefix
-
-metrics:
-  enabled: true
-  serviceMonitor:
-    enabled: true
-    selector:
-      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -15,6 +15,9 @@ on:

  workflow_dispatch: # adds ability to run this manually
    inputs:
+      environment:
+        description: 'Environment to run remote tests on (dev or staging)'
+        required: false
      region_id:
        description: 'Use a particular region. If not set the default region will be used'
        required: false
@@ -34,69 +37,103 @@ concurrency:

 jobs:
  bench:
-    env:
-      TEST_PG_BENCH_DURATIONS_MATRIX: "300"
-      TEST_PG_BENCH_SCALES_MATRIX: "10,100"
-      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: 14
-      TEST_OUTPUT: /tmp/test_output
-      BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
-      PLATFORM: "neon-staging"
+    # this workflow runs on self hosteed runner
+    # it's environment is quite different from usual guthub runner
+    # probably the most important difference is that it doesn't start from clean workspace each time
+    # e g if you install system packages they are not cleaned up since you install them directly in host machine
+    # not a container or something
+    # See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
+    runs-on: [self-hosted, zenith-benchmarker]

-    runs-on: [ self-hosted, us-east-2, x64 ]
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+    env:
+      POSTGRES_DISTRIB_DIR: /usr/pgsql
+      DEFAULT_PG_VERSION: 14

    steps:
-    - uses: actions/checkout@v3
+    - name: Checkout zenith repo
+      uses: actions/checkout@v3

-    - name: Download Neon artifact
-      uses: ./.github/actions/download
-      with:
-        name: neon-${{ runner.os }}-release-artifact
-        path: /tmp/neon/
-        prefix: latest
+    # actions/setup-python@v2 is not working correctly on self-hosted runners
+    # see https://github.com/actions/setup-python/issues/162
+    # and probably https://github.com/actions/setup-python/issues/162#issuecomment-865387976 in particular
+    # so the simplest solution to me is to use already installed system python and spin virtualenvs for job runs.
+    # there is Python 3.7.10 already installed on the machine so use it to install poetry and then use poetry's virtuealenvs
+    - name: Install poetry & deps
+      run: |
+        python3 -m pip install --upgrade poetry wheel
+        # since pip/poetry caches are reused there shouldn't be any troubles with install every time
+        ./scripts/pysync
+
+    - name: Show versions
+      run: |
+        echo Python
+        python3 --version
+        poetry run python3 --version
+        echo Poetry
+        poetry --version
+        echo Pgbench
+        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version

    - name: Create Neon Project
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
-        region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
-        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
-        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+        environment: ${{ github.event.inputs.environment || 'staging' }}
+        api_key: ${{ ( github.event.inputs.environment || 'staging' ) == 'staging' && secrets.NEON_STAGING_API_KEY  || secrets.NEON_CAPTEST_API_KEY }}

    - name: Run benchmark
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance
-        run_in_parallel: false
-        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        # Set --sparse-ordering option of pytest-order plugin
-        # to ensure tests are running in order of appears in the file.
-        # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
-        extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
+      # pgbench is installed system wide from official repo
+      # https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
+      # via
+      # sudo tee /etc/yum.repos.d/pgdg.repo<<EOF
+      # [pgdg13]
+      # name=PostgreSQL 13 for RHEL/CentOS 7 - x86_64
+      # baseurl=https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
+      # enabled=1
+      # gpgcheck=0
+      # EOF
+      # sudo yum makecache
+      # sudo yum install postgresql13-contrib
+      # actual binaries are located in /usr/pgsql-13/bin/
      env:
+        # The pgbench test runs two tests of given duration against each scale.
+        # So the total runtime with these parameters is 2 * 2 * 300 = 1200, or 20 minutes.
+        # Plus time needed to initialize the test databases.
+        TEST_PG_BENCH_DURATIONS_MATRIX: "300"
+        TEST_PG_BENCH_SCALES_MATRIX: "10,100"
+        PLATFORM: "neon-staging"
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
+        REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
+      run: |
+        # just to be sure that no data was cached on self hosted runner
+        # since it might generate duplicates when calling ingest_perf_test_result.py
+        rm -rf perf-report-staging
+        mkdir -p perf-report-staging
+        # Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
+        # it's important for test_perf_pgbench.py::test_pgbench_remote_* tests.
+        # Do not run tests from test_runner/performance/test_perf_olap.py because they require a prepared DB. We run them separately in `clickbench-compare` job.
+        ./scripts/pytest test_runner/performance/ -v \
+          -m "remote_cluster" \
+          --sparse-ordering \
+          --out-dir perf-report-staging \
+          --timeout 5400 \
+          --ignore test_runner/performance/test_perf_olap.py
+
+    - name: Submit result
+      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+      run: |
+        REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh

    - name: Delete Neon Project
      if: ${{ always() }}
      uses: ./.github/actions/neon-project-delete
      with:
+        environment: staging
        project_id: ${{ steps.create-neon-project.outputs.project_id }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}

-    - name: Create Allure report
-      if: success() || failure()
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
-
    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
@@ -115,22 +152,15 @@ jobs:
        # neon-captest-prefetch: Same, with prefetching enabled (new project)
        # rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
        # rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
-        platform: [ neon-captest-new, neon-captest-prefetch, rds-postgres ]
+        platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch, rds-postgres ]
        db_size: [ 10gb ]
-        runner: [ us-east-2 ]
        include:
-          - platform: neon-captest-reuse
-            db_size: 10gb
-            runner: dev  # TODO: Switch to us-east-2 after dry-bonus-223539 migration to staging
          - platform: neon-captest-new
            db_size: 50gb
-            runner: us-east-2
          - platform: neon-captest-prefetch
            db_size: 50gb
-            runner: us-east-2
          - platform: rds-aurora
            db_size: 50gb
-            runner: us-east-2

    env:
      TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
@@ -142,9 +172,9 @@ jobs:
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
      PLATFORM: ${{ matrix.platform }}

-    runs-on: [ self-hosted, "${{ matrix.runner }}", x64 ]
+    runs-on: [ self-hosted, dev, x64 ]
    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
      options: --init

    timeout-minutes: 360 # 6h
@@ -169,9 +199,8 @@ jobs:
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
-        region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
-        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
-        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+        environment: ${{ github.event.inputs.environment || 'dev' }}
+        api_key: ${{ ( github.event.inputs.environment || 'dev' ) == 'staging' && secrets.NEON_STAGING_API_KEY  || secrets.NEON_CAPTEST_API_KEY }}

    - name: Set up Connection String
      id: set-up-connstr
@@ -202,11 +231,8 @@ jobs:
    - name: Set database options
      if: matrix.platform == 'neon-captest-prefetch'
      run: |
-        DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
-
-        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
-        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
-        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE neondb SET enable_seqscan_prefetch=on"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE neondb SET seqscan_prefetch_buffers=10"
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}

@@ -249,13 +275,6 @@ jobs:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"

-    - name: Delete Neon Project
-      if: ${{ steps.create-neon-project.outputs.project_id && always() }}
-      uses: ./.github/actions/neon-project-delete
-      with:
-        project_id: ${{ steps.create-neon-project.outputs.project_id }}
-        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
-
    - name: Create Allure report
      if: success() || failure()
      uses: ./.github/actions/allure-report
@@ -263,6 +282,14 @@ jobs:
        action: generate
        build_type: ${{ env.BUILD_TYPE }}

+    - name: Delete Neon Project
+      if: ${{ steps.create-neon-project.outputs.project_id && always() }}
+      uses: ./.github/actions/neon-project-delete
+      with:
+        environment: dev
+        project_id: ${{ steps.create-neon-project.outputs.project_id }}
+        api_key: ${{ secrets.NEON_CAPTEST_API_KEY }}
+
    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
@@ -299,9 +326,9 @@ jobs:
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
      PLATFORM: ${{ matrix.platform }}

-    runs-on: [ self-hosted, us-east-2, x64 ]
+    runs-on: [ self-hosted, dev, x64 ]
    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
      options: --init

    timeout-minutes: 360 # 6h
@@ -347,15 +374,12 @@ jobs:
    - name: Set database options
      if: matrix.platform == 'neon-captest-prefetch'
      run: |
-        DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
-
-        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
-        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
-        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET enable_seqscan_prefetch=on"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET seqscan_prefetch_buffers=10"
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}

-    - name: ClickBench benchmark
+    - name: Benchmark clickbench
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: ${{ env.BUILD_TYPE }}
@@ -383,114 +407,3 @@ jobs:
        slack-message: "Periodic OLAP perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-
-  tpch-compare:
-    # TCP-H DB for rds-aurora and rds-Postgres deployed to the same clusters
-    # we use for performance testing in pgbench-compare & clickbench-compare.
-    # Run this job only when clickbench-compare is finished to avoid the intersection.
-    # We might change it after https://github.com/neondatabase/neon/issues/2900.
-    #
-    # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
-    if: success() || failure()
-    needs: [ clickbench-compare ]
-
-    strategy:
-      fail-fast: false
-      matrix:
-        # neon-captest-prefetch: We have pre-created projects with prefetch enabled
-        # rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
-        # rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
-        platform: [ neon-captest-prefetch, rds-postgres, rds-aurora ]
-
-    env:
-      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: 14
-      TEST_OUTPUT: /tmp/test_output
-      BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
-      PLATFORM: ${{ matrix.platform }}
-
-    runs-on: [ self-hosted, us-east-2, x64 ]
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
-      options: --init
-
-    timeout-minutes: 360 # 6h
-
-    steps:
-    - uses: actions/checkout@v3
-
-    - name: Download Neon artifact
-      uses: ./.github/actions/download
-      with:
-        name: neon-${{ runner.os }}-release-artifact
-        path: /tmp/neon/
-        prefix: latest
-
-    - name: Add Postgres binaries to PATH
-      run: |
-        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
-        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
-
-    - name: Set up Connection String
-      id: set-up-connstr
-      run: |
-        case "${PLATFORM}" in
-          neon-captest-prefetch)
-            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_TPCH_S10_CONNSTR }}
-            ;;
-          rds-aurora)
-            CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_TPCH_S10_CONNSTR }}
-            ;;
-          rds-postgres)
-            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR }}
-            ;;
-          *)
-            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
-            exit 1
-            ;;
-        esac
-
-        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
-
-        psql ${CONNSTR} -c "SELECT version();"
-
-    - name: Set database options
-      if: matrix.platform == 'neon-captest-prefetch'
-      run: |
-        DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
-
-        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
-        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
-        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
-      env:
-        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
-
-    - name: Run TPC-H benchmark
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance/test_perf_olap.py
-        run_in_parallel: false
-        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 21600 -k test_tpch
-      env:
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
-
-    - name: Create Allure report
-      if: success() || failure()
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
-
-    - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
-      uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -305,7 +305,7 @@ jobs:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
    needs: [ regress-tests, benchmarks ]
-    if: ${{ !cancelled() }}
+    if: success() || failure()
    strategy:
      fail-fast: false
      matrix:
@@ -668,11 +668,11 @@ jobs:
      - id: set-matrix
        run: |
          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "storage_broker_ns": "neon-storage-broker", "storage_broker_config": "staging.neon-storage-broker", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
-            NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "storage_broker_ns": "neon-stress-storage-broker", "storage_broker_config": "neon-stress.neon-storage-broker", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY", storage_broker_config: }'
+            STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
+            NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY"}'
            echo "include=[$STAGING, $NEON_STRESS]" >> $GITHUB_OUTPUT
          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "storage_broker_ns": "neon-storage-broker", "storage_broker_config": "production.neon-storage-broker", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
+            PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
            echo "include=[$PRODUCTION]" >> $GITHUB_OUTPUT
          else
            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
@@ -732,7 +732,7 @@ jobs:
          ssh-add ssh-key
          rm -f ssh-key ssh-key-cert.pub
          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }}
          rm -f neon_install.tar.gz .neon_current_version

  deploy-new:
@@ -770,7 +770,7 @@ jobs:
            exit 1
          fi
          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
          rm -f neon_install.tar.gz .neon_current_version

  deploy-pr-test-new:
@@ -803,7 +803,7 @@ jobs:
          ./get_binaries.sh

          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
          rm -f neon_install.tar.gz .neon_current_version

  deploy-prod-new:
@@ -843,7 +843,7 @@ jobs:
          fi

          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          ansible-playbook deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_PRODUCTION_API_KEY}}
          rm -f neon_install.tar.gz .neon_current_version

  deploy-proxy:
@@ -885,48 +885,8 @@ jobs:
      - name: Re-deploy proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml       --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-
-  deploy-storage-broker-staging:
-    runs-on: [ self-hosted, dev, x64 ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
-    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
-    needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
-    if: |
-      (github.ref_name == 'main' || github.ref_name == 'release') &&
-      github.event_name != 'workflow_dispatch'
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
-    env:
-      KUBECONFIG: .kubeconfig
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-
-      - name: Add curl
-        run: apt update && apt install curl -y
-
-      - name: Store kubeconfig file
-        run: |
-          echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
-          chmod 0600 ${KUBECONFIG}
-
-      - name: Setup helm v3
-        run: |
-          curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-
-      - name: Deploy storage-broker
-        run:
-          helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace ${{ matrix.storage_broker_ns }} --create-namespace --install --atomic -f .github/helm-values/${{ matrix.storage_broker_config }}.yaml --set image.tag=${{ needs.tag.outputs.build-tag }} --wait --timeout 5m0s
+          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
+          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s

  deploy-proxy-new:
    runs-on: [ self-hosted, dev, x64 ]
@@ -965,53 +925,19 @@ jobs:
      - name: Re-deploy scram proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s

      - name: Re-deploy link proxy
        if: matrix.deploy_link_proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+          helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s

      - name: Re-deploy legacy scram proxy
        if: matrix.deploy_legacy_scram_proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-
-  deploy-storage-broker-dev-new:
-    runs-on: [ self-hosted, dev, x64 ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
-    needs: [ push-docker-hub, tag, regress-tests ]
-    if: |
-      (github.ref_name == 'main') &&
-      github.event_name != 'workflow_dispatch'
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: dev-us-east-2-beta
-          - target_region:  eu-west-1
-            target_cluster: dev-eu-west-1-zeta
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-
-      - name: Deploy storage-broker
-        run:
-          helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace neon-storage-broker --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ needs.tag.outputs.build-tag }} --wait --timeout 5m0s
+          helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s

  deploy-proxy-prod-new:
    runs-on: prod
@@ -1029,8 +955,6 @@ jobs:
        include:
          - target_region:  us-east-2
            target_cluster: prod-us-east-2-delta
-          - target_region:  us-west-2
-            target_cluster: prod-us-west-2-eta
          - target_region: eu-central-1
            target_cluster: prod-eu-central-1-gamma
          - target_region: ap-southeast-1
@@ -1050,45 +974,7 @@ jobs:
      - name: Re-deploy proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-
-  deploy-storage-broker-prod-new:
-    runs-on: prod
-    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-    # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
-    needs: [ push-docker-hub, tag, regress-tests ]
-    if: |
-      (github.ref_name == 'release') &&
-      github.event_name != 'workflow_dispatch'
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: prod-us-east-2-delta
-          - target_region:  us-west-2
-            target_cluster: prod-us-west-2-eta
-          - target_region: eu-central-1
-            target_cluster: prod-eu-central-1-gamma
-          - target_region: ap-southeast-1
-            target_cluster: prod-ap-southeast-1-epsilon
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-
-      - name: Deploy storage-broker
-        run:
-          helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace neon-storage-broker --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ needs.tag.outputs.build-tag }} --wait --timeout 5m0s
+          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s

  promote-compatibility-data:
    runs-on: [ self-hosted, dev, x64 ]
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -23,7 +23,6 @@ jobs:
    runs-on: [ ubuntu-latest ]

    env:
-      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output

    steps:
@@ -52,8 +51,8 @@ jobs:
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
+        environment: staging
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
-        postgres_version: ${{ env.DEFAULT_PG_VERSION }}

    - name: Run pytest
      env:
@@ -64,7 +63,7 @@ jobs:
      run: |
        # Test framework expects we have psql binary;
        # but since we don't really need it in this test, let's mock it
-        mkdir -p "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin" && touch "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin/psql";
+        mkdir -p "$POSTGRES_DISTRIB_DIR/v14/bin" && touch "$POSTGRES_DISTRIB_DIR/v14/bin/psql";
        ./scripts/pytest \
          --junitxml=$TEST_OUTPUT/junit.xml \
          --tb=short \
@@ -76,6 +75,7 @@ jobs:
      if: ${{ always() }}
      uses: ./.github/actions/neon-project-delete
      with:
+        environment: staging
        project_id: ${{ steps.create-neon-project.outputs.project_id }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}

--- a/Cargo.lock
+++ b/Cargo.lock
--- a/README.md
+++ b/README.md
@@ -2,20 +2,29 @@

 Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.

+The project used to be called "Zenith". Many of the commands and code comments
+still refer to "zenith", but we are in the process of renaming things.
+
 ## Quick start
-Try the [Neon Free Tier](https://neon.tech/docs/introduction/technical-preview-free-tier/) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.tech/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.tech/docs/connect/connect-from-any-app/) for connection instructions.
+[Join the waitlist](https://neon.tech/) for our free tier to receive your serverless postgres instance. Then connect to it with your preferred postgres client (psql, dbeaver, etc) or use the online SQL editor.

 Alternatively, compile and run the project [locally](#running-local-installation).

 ## Architecture overview

-A Neon installation consists of compute nodes and the Neon storage engine. Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
+A Neon installation consists of compute nodes and a Neon storage engine.
+
+Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.

 The Neon storage engine consists of two major components:
 - Pageserver. Scalable storage backend for the compute nodes.
- Safekeepers. The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage.
+- WAL service. The service receives WAL from the compute node and ensures that it is stored durably.

-See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more information.
+Pageserver consists of:
+- Repository - Neon storage implementation.
+- WAL receiver - service that receives WAL from WAL service and stores it in the repository.
+- Page service - service that communicates with compute nodes and responds with pages from the repository.
+- WAL redo - service that builds pages from base images and WAL records on Page service request

 ## Running local installation

@@ -220,20 +229,12 @@ CARGO_BUILD_FLAGS="--features=testing" make

 ## Documentation

-[/docs/](/docs/) Contains a top-level overview of all available markdown documentation.
+Now we use README files to cover design ideas and overall architecture for each module and `rustdoc` style documentation comments. See also [/docs/](/docs/) a top-level overview of all available markdown documentation.

 - [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.

 To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`

-See also README files in some source directories, and `rustdoc` style documentation comments.
-
-Other resources:
-
- [SELECT 'Hello, World'](https://neon.tech/blog/hello-world/): Blog post by Nikita Shamgunov on the high level architecture
- [Architecture decisions in Neon](https://neon.tech/blog/architecture-decisions-in-neon/): Blog post by Heikki Linnakangas
- [Neon: Serverless PostgreSQL!](https://www.youtube.com/watch?v=rES0yzeERns): Presentation on storage system by Heikki Linnakangas in the CMU Database Group seminar series
-
 ### Postgres-specific terms

 Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2021"

 [dependencies]
 anyhow = "1.0"
-chrono = { version = "0.4", default-features = false, features = ["clock"] }
+chrono = "0.4"
 clap = "4.0"
 env_logger = "0.9"
 futures = "0.3.13"
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -14,19 +14,17 @@

 use std::ffi::OsStr;
 use std::io::Write;
-use std::os::unix::prelude::AsRawFd;
-use std::os::unix::process::CommandExt;
-use std::path::{Path, PathBuf};
+use std::path::Path;
 use std::process::{Child, Command};
 use std::time::Duration;
 use std::{fs, io, thread};

-use anyhow::Context;
+use anyhow::{anyhow, bail, Context, Result};
 use nix::errno::Errno;
-use nix::fcntl::{FcntlArg, FdFlag};
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
-use utils::pid_file::{self, PidFileRead};
+
+use utils::lock_file;

 // These constants control the loop used to poll for process start / stop.
 //
@@ -88,14 +86,6 @@ where
    let filled_cmd = fill_aws_secrets_vars(fill_rust_env_vars(background_command));
    filled_cmd.envs(envs);

-    let pid_file_to_check = match initial_pid_file {
-        InitialPidFile::Create(path) => {
-            pre_exec_create_pidfile(filled_cmd, path);
-            path
-        }
-        InitialPidFile::Expect(path) => path,
-    };
-
    let mut spawned_process = filled_cmd.spawn().with_context(|| {
        format!("Could not spawn {process_name}, see console output and log files for details.")
    })?;
@@ -105,8 +95,29 @@ where
            .with_context(|| format!("Subprocess {process_name} has invalid pid {pid}"))?,
    );

+    let pid_file_to_check = match initial_pid_file {
+        InitialPidFile::Create(target_pid_file_path) => {
+            match lock_file::create_lock_file(target_pid_file_path, pid.to_string()) {
+                lock_file::LockCreationResult::Created { .. } => {
+                    // We use "lock" file here only to create the pid file. The lock on the pidfile will be dropped as soon
+                    // as this CLI invocation exits, so it's a bit useless, but doesn't any harm either.
+                }
+                lock_file::LockCreationResult::AlreadyLocked { .. } => {
+                    anyhow::bail!("Cannot write pid file for {process_name} at path {target_pid_file_path:?}: file is already locked by another process")
+                }
+                lock_file::LockCreationResult::CreationFailed(e) => {
+                    return Err(e.context(format!(
+                    "Failed to create pid file for {process_name} at path {target_pid_file_path:?}"
+                )))
+                }
+            }
+            None
+        }
+        InitialPidFile::Expect(pid_file_path) => Some(pid_file_path),
+    };
+
    for retries in 0..RETRIES {
-        match process_started(pid, Some(pid_file_to_check), &process_status_check) {
+        match process_started(pid, pid_file_to_check, &process_status_check) {
            Ok(true) => {
                println!("\n{process_name} started, pid: {pid}");
                return Ok(spawned_process);
@@ -136,45 +147,14 @@ where
    anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
 }

-/// Send SIGTERM to child process
-pub fn send_stop_child_process(child: &std::process::Child) -> anyhow::Result<()> {
-    let pid = child.id();
-    match kill(
-        nix::unistd::Pid::from_raw(pid.try_into().unwrap()),
-        Signal::SIGTERM,
-    ) {
-        Ok(()) => Ok(()),
-        Err(Errno::ESRCH) => {
-            println!("child process with pid {pid} does not exist");
-            Ok(())
-        }
-        Err(e) => anyhow::bail!("Failed to send signal to child process with pid {pid}: {e}"),
-    }
-}
-
 /// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
 pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> anyhow::Result<()> {
-    let pid = match pid_file::read(pid_file)
-        .with_context(|| format!("read pid_file {pid_file:?}"))?
-    {
-        PidFileRead::NotExist => {
-            println!("{process_name} is already stopped: no pid file present at {pid_file:?}");
-            return Ok(());
-        }
-        PidFileRead::NotHeldByAnyProcess(_) => {
-            // Don't try to kill according to file contents beacuse the pid might have been re-used by another process.
-            // Don't delete the file either, it can race with new pid file creation.
-            // Read `pid_file` module comment for details.
-            println!(
-                "No process is holding the pidfile. The process must have already exited. Leave in place to avoid race conditions: {pid_file:?}"
-            );
-            return Ok(());
-        }
-        PidFileRead::LockedByOtherProcess(pid) => pid,
-    };
-    // XXX the pid could become invalid (and recycled) at any time before the kill() below.
+    if !pid_file.exists() {
+        println!("{process_name} is already stopped: no pid file {pid_file:?} is present");
+        return Ok(());
+    }
+    let pid = read_pidfile(pid_file)?;

-    // send signal
    let sig = if immediate {
        print!("Stopping {process_name} with pid {pid} immediately..");
        Signal::SIGQUIT
@@ -186,9 +166,8 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
    match kill(pid, sig) {
        Ok(()) => (),
        Err(Errno::ESRCH) => {
-            // Again, don't delete the pid file. The unlink can race with a new pid file being created.
            println!(
-                "{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found. Likely the pid got recycled. Lucky we didn't harm anyone."
+                "{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found"
            );
            return Ok(());
        }
@@ -200,6 +179,11 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
        match process_has_stopped(pid) {
            Ok(true) => {
                println!("\n{process_name} stopped");
+                if let Err(e) = fs::remove_file(pid_file) {
+                    if e.kind() != io::ErrorKind::NotFound {
+                        eprintln!("Failed to remove pid file {pid_file:?} after stopping the process: {e:#}");
+                    }
+                }
                return Ok(());
            }
            Ok(false) => {
@@ -257,69 +241,6 @@ fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
    cmd
 }

-/// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),
-/// 1. Claims a pidfile with a fcntl lock on it and
-/// 2. Sets up the pidfile's file descriptor so that it (and the lock)
-///    will remain held until the cmd exits.
-fn pre_exec_create_pidfile<P>(cmd: &mut Command, path: P) -> &mut Command
-where
-    P: Into<PathBuf>,
-{
-    let path: PathBuf = path.into();
-    // SAFETY
-    // pre_exec is marked unsafe because it runs between fork and exec.
-    // Why is that dangerous in various ways?
-    // Long answer:  https://github.com/rust-lang/rust/issues/39575
-    // Short answer: in a multi-threaded program, other threads may have
-    // been inside of critical sections at the time of fork. In the
-    // original process, that was allright, assuming they protected
-    // the critical sections appropriately, e.g., through locks.
-    // Fork adds another process to the mix that
-    //   1. Has a single thread T
-    //   2. In an exact copy of the address space at the time of fork.
-    // A variety of problems scan occur now:
-    //   1. T tries to grab a lock that was locked at the time of fork.
-    //      It will wait forever since in its address space, the lock
-    //      is in state 'taken' but the thread that would unlock it is
-    //      not there.
-    //   2. A rust object that represented some external resource in the
-    //      parent now got implicitly copied by the the fork, even though
-    //      the object's type is not `Copy`. The parent program may use
-    //      non-copyability as way to enforce unique ownership of an
-    //      external resource in the typesystem. The fork breaks that
-    //      assumption, as now both parent and child process have an
-    //      owned instance of the object that represents the same
-    //      underlying resource.
-    // While these seem like niche problems, (1) in particular is
-    // highly relevant. For example, `malloc()` may grab a mutex internally,
-    // and so, if we forked while another thread was mallocing' and our
-    // pre_exec closure allocates as well, it will block on the malloc
-    // mutex forever
-    //
-    // The proper solution is to only use C library functions that are marked
-    // "async-signal-safe": https://man7.org/linux/man-pages/man7/signal-safety.7.html
-    //
-    // With this specific pre_exec() closure, the non-error path doesn't allocate.
-    // The error path uses `anyhow`, and hence does allocate.
-    // We take our chances there, hoping that any potential disaster is constrained
-    // to the child process (e.g., malloc has no state ourside of the child process).
-    // Last, `expect` prints to stderr, and stdio is not async-signal-safe.
-    // Again, we take our chances, making the same assumptions as for malloc.
-    unsafe {
-        cmd.pre_exec(move || {
-            let file = pid_file::claim_for_current_process(&path).expect("claim pid file");
-            // Remove the FD_CLOEXEC flag on the pidfile descriptor so that the pidfile
-            // remains locked after exec.
-            nix::fcntl::fcntl(file.as_raw_fd(), FcntlArg::F_SETFD(FdFlag::empty()))
-                .expect("remove FD_CLOEXEC");
-            // Don't run drop(file), it would close the file before we actually exec.
-            std::mem::forget(file);
-            Ok(())
-        });
-    }
-    cmd
-}
-
 fn process_started<F>(
    pid: Pid,
    pid_file_to_check: Option<&Path>,
@@ -330,11 +251,14 @@ where
 {
    match status_check() {
        Ok(true) => match pid_file_to_check {
-            Some(pid_file_path) => match pid_file::read(pid_file_path)? {
-                PidFileRead::NotExist => Ok(false),
-                PidFileRead::LockedByOtherProcess(pid_in_file) => Ok(pid_in_file == pid),
-                PidFileRead::NotHeldByAnyProcess(_) => Ok(false),
-            },
+            Some(pid_file_path) => {
+                if pid_file_path.exists() {
+                    let pid_in_file = read_pidfile(pid_file_path)?;
+                    Ok(pid_in_file == pid)
+                } else {
+                    Ok(false)
+                }
+            }
            None => Ok(true),
        },
        Ok(false) => Ok(false),
@@ -342,6 +266,21 @@ where
    }
 }

+/// Read a PID file
+///
+/// We expect a file that contains a single integer.
+fn read_pidfile(pidfile: &Path) -> Result<Pid> {
+    let pid_str = fs::read_to_string(pidfile)
+        .with_context(|| format!("failed to read pidfile {pidfile:?}"))?;
+    let pid: i32 = pid_str
+        .parse()
+        .map_err(|_| anyhow!("failed to parse pidfile {pidfile:?}"))?;
+    if pid < 1 {
+        bail!("pidfile {pidfile:?} contained bad value '{pid}'");
+    }
+    Ok(Pid::from_raw(pid))
+}
+
 fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
    match kill(pid, None) {
        // Process exists, keep waiting
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -324,7 +324,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
            pg_version,
        )
        .unwrap_or_else(|e| {
-            eprintln!("pageserver init failed: {e:?}");
+            eprintln!("pageserver init failed: {e}");
            exit(1);
        });

--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -1,12 +1,12 @@
 use std::collections::HashMap;
-use std::fs::File;
+use std::fs::{self, File};
 use std::io::{BufReader, Write};
 use std::num::NonZeroU64;
 use std::path::{Path, PathBuf};
 use std::process::Child;
 use std::{io, result};

-use anyhow::{bail, ensure, Context};
+use anyhow::{bail, Context};
 use pageserver_api::models::{
    TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
 };
@@ -168,21 +168,29 @@ impl PageServerNode {
            }
            Err(e) => eprintln!("{e:#}"),
        }
-        background_process::send_stop_child_process(&pageserver_process)?;
-
-        let exit_code = pageserver_process.wait()?;
-        ensure!(
-            exit_code.success(),
-            format!(
-                "pageserver init failed with exit code {:?}",
-                exit_code.code()
-            )
-        );
-        println!(
-            "Stopped pageserver {} process with pid {}",
-            self.env.pageserver.id,
-            pageserver_process.id(),
-        );
+        match pageserver_process.kill() {
+            Err(e) => {
+                eprintln!(
+                    "Failed to stop pageserver {} process with pid {}: {e:#}",
+                    self.env.pageserver.id,
+                    pageserver_process.id(),
+                )
+            }
+            Ok(()) => {
+                println!(
+                    "Stopped pageserver {} process with pid {}",
+                    self.env.pageserver.id,
+                    pageserver_process.id(),
+                );
+                // cleanup after pageserver startup, since we do not call regular `stop_process` during init
+                let pid_file = self.pid_file();
+                if let Err(e) = fs::remove_file(&pid_file) {
+                    if e.kind() != io::ErrorKind::NotFound {
+                        eprintln!("Failed to remove pid file {pid_file:?} after stopping the process: {e:#}");
+                    }
+                }
+            }
+        }
        init_result
    }

--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -45,9 +45,9 @@ and create new databases and accounts (control plane API in our case).

 Integration tests, written in Python using the `pytest` framework.

-`/vendor/postgres-v14` and `/vendor/postgres-v15`:
+`/vendor/postgres-v14`:

-PostgreSQL source tree per version, with the modifications needed for Neon.
+PostgreSQL source tree, with the modifications needed for Neon.

 `/pgxn/neon`:

--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -201,6 +201,8 @@ pub struct TimelineInfo {
    pub last_received_msg_ts: Option<u128>,
    pub pg_version: u32,

+    pub awaits_download: bool,
+
    pub state: TimelineState,

    // Some of the above fields are duplicated in 'local' and 'remote', for backwards-
--- a/libs/postgres_ffi/wal_craft/src/lib.rs
+++ b/libs/postgres_ffi/wal_craft/src/lib.rs
@@ -1,6 +1,7 @@
 use anyhow::*;
 use core::time::Duration;
 use log::*;
+use once_cell::sync::Lazy;
 use postgres::types::PgLsn;
 use postgres::Client;
 use postgres_ffi::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
@@ -25,13 +26,15 @@ pub struct PostgresServer {
    client_config: postgres::Config,
 }

-pub static REQUIRED_POSTGRES_CONFIG: [&str; 4] = [
-    "wal_keep_size=50MB",            // Ensure old WAL is not removed
-    "shared_preload_libraries=neon", // can only be loaded at startup
-    // Disable background processes as much as possible
-    "wal_writer_delay=10s",
-    "autovacuum=off",
-];
+pub static REQUIRED_POSTGRES_CONFIG: Lazy<Vec<&'static str>> = Lazy::new(|| {
+    vec![
+        "wal_keep_size=50MB",            // Ensure old WAL is not removed
+        "shared_preload_libraries=neon", // can only be loaded at startup
+        // Disable background processes as much as possible
+        "wal_writer_delay=10s",
+        "autovacuum=off",
+    ]
+});

 impl Conf {
    pub fn pg_distrib_dir(&self) -> anyhow::Result<PathBuf> {
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -9,11 +9,8 @@ async-trait = "0.1"
 metrics = { version = "0.1", path = "../metrics" }
 utils = { version = "0.1", path = "../utils" }
 once_cell = "1.13.0"
-aws-smithy-http = "0.51.0"
-aws-types = "0.51.0"
-aws-config = { version = "0.51.0", default-features = false, features=["rustls"] }
-aws-sdk-s3 = "0.21.0"
-hyper = { version = "0.14", features = ["stream"] }
+rusoto_core = "0.48"
+rusoto_s3 = "0.48"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -10,7 +10,7 @@ mod s3_bucket;

 use std::{
    collections::HashMap,
-    fmt::Debug,
+    fmt::{Debug, Display},
    num::{NonZeroU32, NonZeroUsize},
    ops::Deref,
    path::{Path, PathBuf},
@@ -41,27 +41,44 @@ pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;

 const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';

-/// Path on the remote storage, relative to some inner prefix.
-/// The prefix is an implementation detail, that allows representing local paths
-/// as the remote ones, stripping the local storage prefix away.
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct RemotePath(PathBuf);
-
-impl RemotePath {
-    pub fn new(relative_path: &Path) -> anyhow::Result<Self> {
-        anyhow::ensure!(
-            relative_path.is_relative(),
-            "Path {relative_path:?} is not relative"
-        );
-        Ok(Self(relative_path.to_path_buf()))
-    }
-
-    pub fn with_base(&self, base_path: &Path) -> PathBuf {
-        base_path.join(&self.0)
-    }
+#[derive(Clone, PartialEq, Eq)]
+pub struct RemoteObjectId(String);

+///
+/// A key that refers to an object in remote storage. It works much like a Path,
+/// but it's a separate datatype so that you don't accidentally mix local paths
+/// and remote keys.
+///
+impl RemoteObjectId {
+    // Needed to retrieve last component for RemoteObjectId.
+    // In other words a file name
+    /// Turn a/b/c or a/b/c/ into c
    pub fn object_name(&self) -> Option<&str> {
-        self.0.file_name().and_then(|os_str| os_str.to_str())
+        // corner case, char::to_string is not const, thats why this is more verbose than it needs to be
+        // see https://github.com/rust-lang/rust/issues/88674
+        if self.0.len() == 1 && self.0.chars().next().unwrap() == REMOTE_STORAGE_PREFIX_SEPARATOR {
+            return None;
+        }
+
+        if self.0.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
+            self.0.rsplit(REMOTE_STORAGE_PREFIX_SEPARATOR).nth(1)
+        } else {
+            self.0
+                .rsplit_once(REMOTE_STORAGE_PREFIX_SEPARATOR)
+                .map(|(_, last)| last)
+        }
+    }
+}
+
+impl Debug for RemoteObjectId {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+        Debug::fmt(&self.0, fmt)
+    }
+}
+
+impl Display for RemoteObjectId {
+    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        Display::fmt(&self.0, fmt)
    }
 }

@@ -70,40 +87,49 @@ impl RemotePath {
 /// providing basic CRUD operations for storage files.
 #[async_trait::async_trait]
 pub trait RemoteStorage: Send + Sync + 'static {
+    /// Attempts to derive the storage path out of the local path, if the latter is correct.
+    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId>;
+
+    /// Gets the download path of the given storage file.
+    fn local_path(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf>;
+
    /// Lists all items the storage has right now.
-    async fn list(&self) -> anyhow::Result<Vec<RemotePath>>;
+    async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>>;

    /// Lists all top level subdirectories for a given prefix
    /// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
    /// which already takes into account any kind of global prefix (prefix_in_bucket for S3 or storage_root for LocalFS)
    /// so this method doesnt need to.
-    async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>>;
+    async fn list_prefixes(
+        &self,
+        prefix: Option<&RemoteObjectId>,
+    ) -> anyhow::Result<Vec<RemoteObjectId>>;

    /// Streams the local file contents into remote into the remote storage entry.
    async fn upload(
        &self,
-        data: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
+        from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
        // S3 PUT request requires the content length to be specified,
        // otherwise it starts to fail with the concurrent connection count increasing.
-        data_size_bytes: usize,
-        to: &RemotePath,
+        from_size_bytes: usize,
+        to: &RemoteObjectId,
        metadata: Option<StorageMetadata>,
    ) -> anyhow::Result<()>;

    /// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
    /// Returns the metadata, if any was stored with the file previously.
-    async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError>;
+    async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError>;

    /// Streams a given byte range of the remote storage entry contents into the buffered writer given, returns the filled writer.
    /// Returns the metadata, if any was stored with the file previously.
    async fn download_byte_range(
        &self,
-        from: &RemotePath,
+        from: &RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
    ) -> Result<Download, DownloadError>;

-    async fn delete(&self, path: &RemotePath) -> anyhow::Result<()>;
+    async fn delete(&self, path: &RemoteObjectId) -> anyhow::Result<()>;

    /// Downcast to LocalFs implementation. For tests.
    fn as_local(&self) -> Option<&LocalFs> {
@@ -152,35 +178,34 @@ impl std::error::Error for DownloadError {}
 /// Every storage, currently supported.
 /// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
 #[derive(Clone)]
-pub enum GenericRemoteStorage {
-    LocalFs(LocalFs),
-    AwsS3(Arc<S3Bucket>),
-}
+pub struct GenericRemoteStorage(Arc<dyn RemoteStorage>);

 impl Deref for GenericRemoteStorage {
    type Target = dyn RemoteStorage;

    fn deref(&self) -> &Self::Target {
-        match self {
-            GenericRemoteStorage::LocalFs(local_fs) => local_fs,
-            GenericRemoteStorage::AwsS3(s3_bucket) => s3_bucket.as_ref(),
-        }
+        self.0.as_ref()
    }
 }

 impl GenericRemoteStorage {
+    pub fn new(storage: impl RemoteStorage) -> Self {
+        Self(Arc::new(storage))
+    }
+
    pub fn from_config(
+        working_directory: PathBuf,
        storage_config: &RemoteStorageConfig,
    ) -> anyhow::Result<GenericRemoteStorage> {
        Ok(match &storage_config.storage {
            RemoteStorageKind::LocalFs(root) => {
                info!("Using fs root '{}' as a remote storage", root.display());
-                GenericRemoteStorage::LocalFs(LocalFs::new(root.clone())?)
+                GenericRemoteStorage::new(LocalFs::new(root.clone(), working_directory)?)
            }
            RemoteStorageKind::AwsS3(s3_config) => {
                info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}'",
                      s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
-                GenericRemoteStorage::AwsS3(Arc::new(S3Bucket::new(s3_config)?))
+                GenericRemoteStorage::new(S3Bucket::new(s3_config, working_directory)?)
            }
        })
    }
@@ -194,12 +219,23 @@ impl GenericRemoteStorage {
        &self,
        from: Box<dyn tokio::io::AsyncRead + Unpin + Send + Sync + 'static>,
        from_size_bytes: usize,
-        to: &RemotePath,
+        from_path: &Path,
    ) -> anyhow::Result<()> {
-        self.upload(from, from_size_bytes, to, None)
+        let target_storage_path = self.remote_object_id(from_path).with_context(|| {
+            format!(
+                "Failed to get the storage path for source local path '{}'",
+                from_path.display()
+            )
+        })?;
+
+        self.upload(from, from_size_bytes, &target_storage_path, None)
            .await
            .with_context(|| {
-                format!("Failed to upload data of length {from_size_bytes} to storage path {to:?}")
+                format!(
+                    "Failed to upload from '{}' to storage path '{:?}'",
+                    from_path.display(),
+                    target_storage_path
+                )
            })
    }

@@ -208,11 +244,24 @@ impl GenericRemoteStorage {
    pub async fn download_storage_object(
        &self,
        byte_range: Option<(u64, Option<u64>)>,
-        from: &RemotePath,
+        to_path: &Path,
    ) -> Result<Download, DownloadError> {
+        let remote_object_path = self
+            .remote_object_id(to_path)
+            .with_context(|| {
+                format!(
+                    "Failed to get the storage path for target local path '{}'",
+                    to_path.display()
+                )
+            })
+            .map_err(DownloadError::BadInput)?;
+
        match byte_range {
-            Some((start, end)) => self.download_byte_range(from, start, end).await,
-            None => self.download(from).await,
+            Some((start, end)) => {
+                self.download_byte_range(&remote_object_path, start, end)
+                    .await
+            }
+            None => self.download(&remote_object_path).await,
        }
    }
 }
@@ -222,6 +271,23 @@ impl GenericRemoteStorage {
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct StorageMetadata(HashMap<String, String>);

+fn strip_path_prefix<'a>(prefix: &'a Path, path: &'a Path) -> anyhow::Result<&'a Path> {
+    if prefix == path {
+        anyhow::bail!(
+            "Prefix and the path are equal, cannot strip: '{}'",
+            prefix.display()
+        )
+    } else {
+        path.strip_prefix(prefix).with_context(|| {
+            format!(
+                "Path '{}' is not prefixed with '{}'",
+                path.display(),
+                prefix.display(),
+            )
+        })
+    }
+}
+
 /// External backup storage configuration, enough for creating a client for that storage.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct RemoteStorageConfig {
@@ -365,24 +431,21 @@ mod tests {
    use super::*;

    #[test]
-    fn test_object_name() {
-        let k = RemotePath::new(Path::new("a/b/c")).unwrap();
+    fn object_name() {
+        let k = RemoteObjectId("a/b/c".to_owned());
        assert_eq!(k.object_name(), Some("c"));

-        let k = RemotePath::new(Path::new("a/b/c/")).unwrap();
+        let k = RemoteObjectId("a/b/c/".to_owned());
        assert_eq!(k.object_name(), Some("c"));

-        let k = RemotePath::new(Path::new("a/")).unwrap();
+        let k = RemoteObjectId("a/".to_owned());
        assert_eq!(k.object_name(), Some("a"));

        // XXX is it impossible to have an empty key?
-        let k = RemotePath::new(Path::new("")).unwrap();
+        let k = RemoteObjectId("".to_owned());
+        assert_eq!(k.object_name(), None);
+
+        let k = RemoteObjectId("/".to_owned());
        assert_eq!(k.object_name(), None);
    }
-
-    #[test]
-    fn rempte_path_cannot_be_created_from_absolute_ones() {
-        let err = RemotePath::new(Path::new("/")).expect_err("Should fail on absolute paths");
-        assert_eq!(err.to_string(), "Path \"/\" is not relative");
-    }
 }
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -5,7 +5,6 @@
 //! volume is mounted to the local FS.

 use std::{
-    borrow::Cow,
    future::Future,
    path::{Path, PathBuf},
    pin::Pin,
@@ -19,33 +18,60 @@ use tokio::{
 use tracing::*;
 use utils::crashsafe::path_with_suffix_extension;

-use crate::{Download, DownloadError, RemotePath};
+use crate::{Download, DownloadError, RemoteObjectId};

-use super::{RemoteStorage, StorageMetadata};
+use super::{strip_path_prefix, RemoteStorage, StorageMetadata};

 const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp";

-#[derive(Debug, Clone)]
+/// Convert a Path in the remote storage into a RemoteObjectId
+fn remote_object_id_from_path(path: &Path) -> anyhow::Result<RemoteObjectId> {
+    Ok(RemoteObjectId(
+        path.to_str()
+            .ok_or_else(|| anyhow::anyhow!("unexpected characters found in path"))?
+            .to_string(),
+    ))
+}
+
 pub struct LocalFs {
+    working_directory: PathBuf,
    storage_root: PathBuf,
 }

 impl LocalFs {
    /// Attempts to create local FS storage, along with its root directory.
-    /// Storage root will be created (if does not exist) and transformed into an absolute path (if passed as relative).
-    pub fn new(mut storage_root: PathBuf) -> anyhow::Result<Self> {
-        if !storage_root.exists() {
-            std::fs::create_dir_all(&storage_root).with_context(|| {
-                format!("Failed to create all directories in the given root path {storage_root:?}")
-            })?;
-        }
-        if !storage_root.is_absolute() {
-            storage_root = storage_root.canonicalize().with_context(|| {
-                format!("Failed to represent path {storage_root:?} as an absolute path")
+    pub fn new(root: PathBuf, working_directory: PathBuf) -> anyhow::Result<Self> {
+        if !root.exists() {
+            std::fs::create_dir_all(&root).with_context(|| {
+                format!(
+                    "Failed to create all directories in the given root path '{}'",
+                    root.display(),
+                )
            })?;
        }
+        Ok(Self {
+            working_directory,
+            storage_root: root,
+        })
+    }

-        Ok(Self { storage_root })
+    ///
+    /// Get the absolute path in the local filesystem to given remote object.
+    ///
+    /// This is public so that it can be used in tests. Should not be used elsewhere.
+    ///
+    pub fn resolve_in_storage(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf> {
+        let path = PathBuf::from(&remote_object_id.0);
+        if path.is_relative() {
+            Ok(self.storage_root.join(path))
+        } else if path.starts_with(&self.storage_root) {
+            Ok(path)
+        } else {
+            bail!(
+                "Path '{}' does not belong to the current storage",
+                path.display()
+            )
+        }
    }

    async fn read_storage_metadata(
@@ -77,48 +103,45 @@ impl LocalFs {

 #[async_trait::async_trait]
 impl RemoteStorage for LocalFs {
-    async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
-        Ok(get_all_files(&self.storage_root, true)
-            .await?
-            .into_iter()
-            .map(|path| {
-                path.strip_prefix(&self.storage_root)
-                    .context("Failed to strip storage root prefix")
-                    .and_then(RemotePath::new)
-                    .expect(
-                        "We list files for storage root, hence should be able to remote the prefix",
-                    )
-            })
-            .collect())
+    /// Convert a "local" path into a "remote path"
+    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId> {
+        let path = self.storage_root.join(
+            strip_path_prefix(&self.working_directory, local_path)
+                .context("local path does not belong to this storage")?,
+        );
+        remote_object_id_from_path(&path)
    }

-    async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
+    fn local_path(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf> {
+        let storage_path = PathBuf::from(&remote_object_id.0);
+        let relative_path = strip_path_prefix(&self.storage_root, &storage_path)
+            .context("local path does not belong to this storage")?;
+        Ok(self.working_directory.join(relative_path))
+    }
+
+    async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>> {
+        get_all_files(&self.storage_root, true).await
+    }
+
+    async fn list_prefixes(
+        &self,
+        prefix: Option<&RemoteObjectId>,
+    ) -> anyhow::Result<Vec<RemoteObjectId>> {
        let path = match prefix {
-            Some(prefix) => Cow::Owned(prefix.with_base(&self.storage_root)),
-            None => Cow::Borrowed(&self.storage_root),
+            Some(prefix) => Path::new(&prefix.0),
+            None => &self.storage_root,
        };
-        Ok(get_all_files(path.as_ref(), false)
-            .await?
-            .into_iter()
-            .map(|path| {
-                path.strip_prefix(&self.storage_root)
-                    .context("Failed to strip preifix")
-                    .and_then(RemotePath::new)
-                    .expect(
-                        "We list files for storage root, hence should be able to remote the prefix",
-                    )
-            })
-            .collect())
+        get_all_files(path, false).await
    }

    async fn upload(
        &self,
-        data: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
-        data_size_bytes: usize,
-        to: &RemotePath,
+        from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
+        from_size_bytes: usize,
+        to: &RemoteObjectId,
        metadata: Option<StorageMetadata>,
    ) -> anyhow::Result<()> {
-        let target_file_path = to.with_base(&self.storage_root);
+        let target_file_path = self.resolve_in_storage(to)?;
        create_target_directory(&target_file_path).await?;
        // We need this dance with sort of durable rename (without fsyncs)
        // to prevent partial uploads. This was really hit when pageserver shutdown
@@ -139,8 +162,8 @@ impl RemoteStorage for LocalFs {
                })?,
        );

-        let from_size_bytes = data_size_bytes as u64;
-        let mut buffer_to_read = data.take(from_size_bytes);
+        let from_size_bytes = from_size_bytes as u64;
+        let mut buffer_to_read = from.take(from_size_bytes);

        let bytes_read = io::copy(&mut buffer_to_read, &mut destination)
            .await
@@ -197,22 +220,27 @@ impl RemoteStorage for LocalFs {
        Ok(())
    }

-    async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
-        let target_path = from.with_base(&self.storage_root);
-        if file_exists(&target_path).map_err(DownloadError::BadInput)? {
+    async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError> {
+        let file_path = self
+            .resolve_in_storage(from)
+            .map_err(DownloadError::BadInput)?;
+        if file_exists(&file_path).map_err(DownloadError::BadInput)? {
            let source = io::BufReader::new(
                fs::OpenOptions::new()
                    .read(true)
-                    .open(&target_path)
+                    .open(&file_path)
                    .await
                    .with_context(|| {
-                        format!("Failed to open source file {target_path:?} to use in the download")
+                        format!(
+                            "Failed to open source file '{}' to use in the download",
+                            file_path.display()
+                        )
                    })
                    .map_err(DownloadError::Other)?,
            );

            let metadata = self
-                .read_storage_metadata(&target_path)
+                .read_storage_metadata(&file_path)
                .await
                .map_err(DownloadError::Other)?;
            Ok(Download {
@@ -226,7 +254,7 @@ impl RemoteStorage for LocalFs {

    async fn download_byte_range(
        &self,
-        from: &RemotePath,
+        from: &RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
    ) -> Result<Download, DownloadError> {
@@ -238,15 +266,20 @@ impl RemoteStorage for LocalFs {
                return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
            }
        }
-        let target_path = from.with_base(&self.storage_root);
-        if file_exists(&target_path).map_err(DownloadError::BadInput)? {
+        let file_path = self
+            .resolve_in_storage(from)
+            .map_err(DownloadError::BadInput)?;
+        if file_exists(&file_path).map_err(DownloadError::BadInput)? {
            let mut source = io::BufReader::new(
                fs::OpenOptions::new()
                    .read(true)
-                    .open(&target_path)
+                    .open(&file_path)
                    .await
                    .with_context(|| {
-                        format!("Failed to open source file {target_path:?} to use in the download")
+                        format!(
+                            "Failed to open source file '{}' to use in the download",
+                            file_path.display()
+                        )
                    })
                    .map_err(DownloadError::Other)?,
            );
@@ -256,7 +289,7 @@ impl RemoteStorage for LocalFs {
                .context("Failed to seek to the range start in a local storage file")
                .map_err(DownloadError::Other)?;
            let metadata = self
-                .read_storage_metadata(&target_path)
+                .read_storage_metadata(&file_path)
                .await
                .map_err(DownloadError::Other)?;

@@ -275,12 +308,15 @@ impl RemoteStorage for LocalFs {
        }
    }

-    async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
-        let file_path = path.with_base(&self.storage_root);
+    async fn delete(&self, path: &RemoteObjectId) -> anyhow::Result<()> {
+        let file_path = self.resolve_in_storage(path)?;
        if file_path.exists() && file_path.is_file() {
            Ok(fs::remove_file(file_path).await?)
        } else {
-            bail!("File {file_path:?} either does not exist or is not a file")
+            bail!(
+                "File '{}' either does not exist or is not a file",
+                file_path.display()
+            )
        }
    }

@@ -296,7 +332,7 @@ fn storage_metadata_path(original_path: &Path) -> PathBuf {
 fn get_all_files<'a, P>(
    directory_path: P,
    recursive: bool,
-) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<PathBuf>>> + Send + Sync + 'a>>
+) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<RemoteObjectId>>> + Send + Sync + 'a>>
 where
    P: AsRef<Path> + Send + Sync + 'a,
 {
@@ -310,20 +346,20 @@ where
                    let file_type = dir_entry.file_type().await?;
                    let entry_path = dir_entry.path();
                    if file_type.is_symlink() {
-                        debug!("{entry_path:?} us a symlink, skipping")
+                        debug!("{:?} us a symlink, skipping", entry_path)
                    } else if file_type.is_dir() {
                        if recursive {
                            paths.extend(get_all_files(&entry_path, true).await?.into_iter())
                        } else {
-                            paths.push(entry_path)
+                            paths.push(remote_object_id_from_path(&dir_entry.path())?)
                        }
                    } else {
-                        paths.push(entry_path);
+                        paths.push(remote_object_id_from_path(&dir_entry.path())?);
                    }
                }
                Ok(paths)
            } else {
-                bail!("Path {directory_path:?} is not a directory")
+                bail!("Path '{}' is not a directory", directory_path.display())
            }
        } else {
            Ok(Vec::new())
@@ -358,6 +394,173 @@ fn file_exists(file_path: &Path) -> anyhow::Result<bool> {
    }
 }

+#[cfg(test)]
+mod pure_tests {
+    use tempfile::tempdir;
+
+    use super::*;
+
+    #[test]
+    fn storage_path_positive() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+
+        let storage_root = PathBuf::from("somewhere").join("else");
+        let storage = LocalFs {
+            working_directory: workdir.clone(),
+            storage_root: storage_root.clone(),
+        };
+
+        let local_path = workdir
+            .join("timelines")
+            .join("some_timeline")
+            .join("file_name");
+        let expected_path = storage_root.join(local_path.strip_prefix(&workdir)?);
+
+        let actual_path = PathBuf::from(
+            storage
+                .remote_object_id(&local_path)
+                .expect("Matching path should map to storage path normally")
+                .0,
+        );
+        assert_eq!(
+            expected_path,
+            actual_path,
+            "File paths from workdir should be stored in local fs storage with the same path they have relative to the workdir"
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn storage_path_negatives() -> anyhow::Result<()> {
+        #[track_caller]
+        fn storage_path_error(storage: &LocalFs, mismatching_path: &Path) -> String {
+            match storage.remote_object_id(mismatching_path) {
+                Ok(wrong_path) => panic!(
+                    "Expected path '{}' to error, but got storage path: {:?}",
+                    mismatching_path.display(),
+                    wrong_path,
+                ),
+                Err(e) => format!("{:?}", e),
+            }
+        }
+
+        let workdir = tempdir()?.path().to_owned();
+        let storage_root = PathBuf::from("somewhere").join("else");
+        let storage = LocalFs {
+            working_directory: workdir.clone(),
+            storage_root,
+        };
+
+        let error_string = storage_path_error(&storage, &workdir);
+        assert!(error_string.contains("does not belong to this storage"));
+        assert!(error_string.contains(workdir.to_str().unwrap()));
+
+        let mismatching_path_str = "/something/else";
+        let error_message = storage_path_error(&storage, Path::new(mismatching_path_str));
+        assert!(
+            error_message.contains(mismatching_path_str),
+            "Error should mention wrong path"
+        );
+        assert!(
+            error_message.contains(workdir.to_str().unwrap()),
+            "Error should mention server workdir"
+        );
+        assert!(error_message.contains("does not belong to this storage"));
+
+        Ok(())
+    }
+
+    #[test]
+    fn local_path_positive() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+        let storage_root = PathBuf::from("somewhere").join("else");
+        let storage = LocalFs {
+            working_directory: workdir.clone(),
+            storage_root: storage_root.clone(),
+        };
+
+        let name = "not a metadata";
+        let local_path = workdir.join("timelines").join("some_timeline").join(name);
+        assert_eq!(
+            local_path,
+            storage
+                .local_path(&remote_object_id_from_path(
+                    &storage_root.join(local_path.strip_prefix(&workdir)?)
+                )?)
+                .expect("For a valid input, valid local path should be parsed"),
+            "Should be able to parse metadata out of the correctly named remote delta file"
+        );
+
+        let local_metadata_path = workdir
+            .join("timelines")
+            .join("some_timeline")
+            .join("metadata");
+        let remote_metadata_path = storage.remote_object_id(&local_metadata_path)?;
+        assert_eq!(
+            local_metadata_path,
+            storage
+                .local_path(&remote_metadata_path)
+                .expect("For a valid input, valid local path should be parsed"),
+            "Should be able to parse metadata out of the correctly named remote metadata file"
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn local_path_negatives() -> anyhow::Result<()> {
+        #[track_caller]
+        fn local_path_error(storage: &LocalFs, storage_path: &RemoteObjectId) -> String {
+            match storage.local_path(storage_path) {
+                Ok(wrong_path) => panic!(
+                    "Expected local path input {:?} to cause an error, but got file path: {:?}",
+                    storage_path, wrong_path,
+                ),
+                Err(e) => format!("{:?}", e),
+            }
+        }
+
+        let storage_root = PathBuf::from("somewhere").join("else");
+        let storage = LocalFs {
+            working_directory: tempdir()?.path().to_owned(),
+            storage_root,
+        };
+
+        let totally_wrong_path = "wrong_wrong_wrong";
+        let error_message =
+            local_path_error(&storage, &RemoteObjectId(totally_wrong_path.to_string()));
+        assert!(error_message.contains(totally_wrong_path));
+
+        Ok(())
+    }
+
+    #[test]
+    fn download_destination_matches_original_path() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+        let original_path = workdir
+            .join("timelines")
+            .join("some_timeline")
+            .join("some name");
+
+        let storage_root = PathBuf::from("somewhere").join("else");
+        let dummy_storage = LocalFs {
+            working_directory: workdir,
+            storage_root,
+        };
+
+        let storage_path = dummy_storage.remote_object_id(&original_path)?;
+        let download_destination = dummy_storage.local_path(&storage_path)?;
+
+        assert_eq!(
+            original_path, download_destination,
+            "'original path -> storage path -> matching fs path' transformation should produce the same path as the input one for the correct path"
+        );
+
+        Ok(())
+    }
+}
+
 #[cfg(test)]
 mod fs_tests {
    use super::*;
@@ -369,7 +572,7 @@ mod fs_tests {
        storage: &LocalFs,
        #[allow(clippy::ptr_arg)]
        // have to use &PathBuf due to `storage.local_path` parameter requirements
-        remote_storage_path: &RemotePath,
+        remote_storage_path: &RemoteObjectId,
        expected_metadata: Option<&StorageMetadata>,
    ) -> anyhow::Result<String> {
        let mut download = storage
@@ -392,16 +595,41 @@ mod fs_tests {

    #[tokio::test]
    async fn upload_file() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
        let storage = create_storage()?;

-        let target_path_1 = upload_dummy_file(&storage, "upload_1", None).await?;
+        let (file, size) = create_file_for_upload(
+            &storage.working_directory.join("whatever"),
+            "whatever_contents",
+        )
+        .await?;
+        let target_path = "/somewhere/else";
+        match storage
+            .upload(
+                Box::new(file),
+                size,
+                &RemoteObjectId(target_path.to_string()),
+                None,
+            )
+            .await
+        {
+            Ok(()) => panic!("Should not allow storing files with wrong target path"),
+            Err(e) => {
+                let message = format!("{:?}", e);
+                assert!(message.contains(target_path));
+                assert!(message.contains("does not belong to the current storage"));
+            }
+        }
+        assert!(storage.list().await?.is_empty());
+
+        let target_path_1 = upload_dummy_file(&workdir, &storage, "upload_1", None).await?;
        assert_eq!(
            storage.list().await?,
            vec![target_path_1.clone()],
            "Should list a single file after first upload"
        );

-        let target_path_2 = upload_dummy_file(&storage, "upload_2", None).await?;
+        let target_path_2 = upload_dummy_file(&workdir, &storage, "upload_2", None).await?;
        assert_eq!(
            list_files_sorted(&storage).await?,
            vec![target_path_1.clone(), target_path_2.clone()],
@@ -415,7 +643,7 @@ mod fs_tests {
    async fn upload_file_negatives() -> anyhow::Result<()> {
        let storage = create_storage()?;

-        let id = RemotePath::new(Path::new("dummy"))?;
+        let id = storage.remote_object_id(&storage.working_directory.join("dummy"))?;
        let content = std::io::Cursor::new(b"12345");

        // Check that you get an error if the size parameter doesn't match the actual
@@ -440,14 +668,16 @@ mod fs_tests {
    }

    fn create_storage() -> anyhow::Result<LocalFs> {
-        LocalFs::new(tempdir()?.path().to_owned())
+        LocalFs::new(tempdir()?.path().to_owned(), tempdir()?.path().to_owned())
    }

    #[tokio::test]
    async fn download_file() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+
        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
+        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

        let contents = read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
        assert_eq!(
@@ -457,7 +687,7 @@ mod fs_tests {
        );

        let non_existing_path = "somewhere/else";
-        match storage.download(&RemotePath::new(Path::new(non_existing_path))?).await {
+        match storage.download(&RemoteObjectId(non_existing_path.to_string())).await {
            Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
            other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
        }
@@ -466,9 +696,11 @@ mod fs_tests {

    #[tokio::test]
    async fn download_file_range_positive() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+
        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
+        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

        let full_range_download_contents =
            read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
@@ -534,9 +766,11 @@ mod fs_tests {

    #[tokio::test]
    async fn download_file_range_negative() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+
        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
+        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

        let start = 1_000_000_000;
        let end = start + 1;
@@ -578,9 +812,11 @@ mod fs_tests {

    #[tokio::test]
    async fn delete_file() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+
        let storage = create_storage()?;
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
+        let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;

        storage.delete(&upload_target).await?;
        assert!(storage.list().await?.is_empty());
@@ -590,8 +826,7 @@ mod fs_tests {
            Err(e) => {
                let error_string = e.to_string();
                assert!(error_string.contains("does not exist"));
-                let expected_path = upload_target.with_base(&storage.storage_root);
-                assert!(error_string.contains(expected_path.to_str().unwrap()));
+                assert!(error_string.contains(&upload_target.0));
            }
        }
        Ok(())
@@ -599,6 +834,8 @@ mod fs_tests {

    #[tokio::test]
    async fn file_with_metadata() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+
        let storage = create_storage()?;
        let upload_name = "upload_1";
        let metadata = StorageMetadata(HashMap::from([
@@ -606,7 +843,7 @@ mod fs_tests {
            ("two".to_string(), "2".to_string()),
        ]));
        let upload_target =
-            upload_dummy_file(&storage, upload_name, Some(metadata.clone())).await?;
+            upload_dummy_file(&workdir, &storage, upload_name, Some(metadata.clone())).await?;

        let full_range_download_contents =
            read_and_assert_remote_file_contents(&storage, &upload_target, Some(&metadata)).await?;
@@ -646,32 +883,23 @@ mod fs_tests {
    }

    async fn upload_dummy_file(
+        workdir: &Path,
        storage: &LocalFs,
        name: &str,
        metadata: Option<StorageMetadata>,
-    ) -> anyhow::Result<RemotePath> {
-        let from_path = storage
-            .storage_root
-            .join("timelines")
-            .join("some_timeline")
-            .join(name);
+    ) -> anyhow::Result<RemoteObjectId> {
+        let timeline_path = workdir.join("timelines").join("some_timeline");
+        let relative_timeline_path = timeline_path.strip_prefix(&workdir)?;
+        let storage_path = storage.storage_root.join(relative_timeline_path).join(name);
+        let remote_object_id = RemoteObjectId(storage_path.to_str().unwrap().to_string());
+
+        let from_path = storage.working_directory.join(name);
        let (file, size) = create_file_for_upload(&from_path, &dummy_contents(name)).await?;

-        let relative_path = from_path
-            .strip_prefix(&storage.storage_root)
-            .context("Failed to strip storage root prefix")
-            .and_then(RemotePath::new)
-            .with_context(|| {
-                format!(
-                    "Failed to resolve remote part of path {:?} for base {:?}",
-                    from_path, storage.storage_root
-                )
-            })?;
-
        storage
-            .upload(Box::new(file), size, &relative_path, metadata)
+            .upload(Box::new(file), size, &remote_object_id, metadata)
            .await?;
-        Ok(relative_path)
+        remote_object_id_from_path(&storage_path)
    }

    async fn create_file_for_upload(
@@ -696,7 +924,7 @@ mod fs_tests {
        format!("contents for {name}")
    }

-    async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<RemotePath>> {
+    async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<RemoteObjectId>> {
        let mut files = storage.list().await?;
        files.sort_by(|a, b| a.0.cmp(&b.0));
        Ok(files)
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -4,34 +4,27 @@
 //! allowing multiple api users to independently work with the same S3 bucket, if
 //! their bucket prefixes are both specified and different.

-use std::env::var;
-use std::sync::Arc;
-use std::time::Duration;
+use std::path::{Path, PathBuf};

 use anyhow::Context;
-use aws_config::{
-    environment::credentials::EnvironmentVariableCredentialsProvider, imds,
-    imds::credentials::ImdsCredentialsProvider, meta::credentials::provide_credentials_fn,
+use rusoto_core::{
+    credential::{InstanceMetadataProvider, StaticProvider},
+    HttpClient, Region, RusotoError,
 };
-use aws_sdk_s3::{
-    config::Config,
-    error::{GetObjectError, GetObjectErrorKind},
-    types::{ByteStream, SdkError},
-    Client, Endpoint, Region,
+use rusoto_s3::{
+    DeleteObjectRequest, GetObjectError, GetObjectRequest, ListObjectsV2Request, PutObjectRequest,
+    S3Client, StreamingBody, S3,
 };
-use aws_smithy_http::body::SdkBody;
-use aws_types::credentials::{CredentialsError, ProvideCredentials};
-use hyper::Body;
 use tokio::{io, sync::Semaphore};
 use tokio_util::io::ReaderStream;
 use tracing::debug;

-use super::StorageMetadata;
 use crate::{
-    Download, DownloadError, RemotePath, RemoteStorage, S3Config, REMOTE_STORAGE_PREFIX_SEPARATOR,
+    strip_path_prefix, Download, DownloadError, RemoteObjectId, RemoteStorage, S3Config,
+    REMOTE_STORAGE_PREFIX_SEPARATOR,
 };

-const DEFAULT_IMDS_TIMEOUT: Duration = Duration::from_secs(10);
+use super::StorageMetadata;

 pub(super) mod metrics {
    use metrics::{register_int_counter_vec, IntCounterVec};
@@ -98,9 +91,32 @@ pub(super) mod metrics {
    }
 }

+fn download_destination(
+    id: &RemoteObjectId,
+    workdir: &Path,
+    prefix_to_strip: Option<&str>,
+) -> PathBuf {
+    let path_without_prefix = match prefix_to_strip {
+        Some(prefix) => id.0.strip_prefix(prefix).unwrap_or_else(|| {
+            panic!(
+                "Could not strip prefix '{}' from S3 object key '{}'",
+                prefix, id.0
+            )
+        }),
+        None => &id.0,
+    };
+
+    workdir.join(
+        path_without_prefix
+            .split(REMOTE_STORAGE_PREFIX_SEPARATOR)
+            .collect::<PathBuf>(),
+    )
+}
+
 /// AWS S3 storage.
 pub struct S3Bucket {
-    client: Client,
+    workdir: PathBuf,
+    client: S3Client,
    bucket_name: String,
    prefix_in_bucket: Option<String>,
    // Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.
@@ -109,53 +125,50 @@ pub struct S3Bucket {
    concurrency_limiter: Semaphore,
 }

-#[derive(Default)]
-struct GetObjectRequest {
-    bucket: String,
-    key: String,
-    range: Option<String>,
-}
 impl S3Bucket {
    /// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
-    pub fn new(aws_config: &S3Config) -> anyhow::Result<Self> {
+    pub fn new(aws_config: &S3Config, workdir: PathBuf) -> anyhow::Result<Self> {
        debug!(
            "Creating s3 remote storage for S3 bucket {}",
            aws_config.bucket_name
        );
-        let mut config_builder = Config::builder()
-            .region(Region::new(aws_config.bucket_region.clone()))
-            .credentials_provider(provide_credentials_fn(|| async {
-                match var("AWS_ACCESS_KEY_ID").is_ok() && var("AWS_SECRET_ACCESS_KEY").is_ok() {
-                    true => {
-                        EnvironmentVariableCredentialsProvider::new()
-                            .provide_credentials()
-                            .await
-                    }
-                    false => {
-                        let imds_client = imds::Client::builder()
-                            .connect_timeout(DEFAULT_IMDS_TIMEOUT)
-                            .read_timeout(DEFAULT_IMDS_TIMEOUT)
-                            .build()
-                            .await
-                            .map_err(CredentialsError::unhandled)?;
-                        ImdsCredentialsProvider::builder()
-                            .imds_client(imds_client)
-                            .build()
-                            .provide_credentials()
-                            .await
-                    }
-                }
-            }));
+        let region = match aws_config.endpoint.clone() {
+            Some(custom_endpoint) => Region::Custom {
+                name: aws_config.bucket_region.clone(),
+                endpoint: custom_endpoint,
+            },
+            None => aws_config
+                .bucket_region
+                .parse::<Region>()
+                .context("Failed to parse the s3 region from config")?,
+        };
+        let request_dispatcher = HttpClient::new().context("Failed to create S3 http client")?;

-        if let Some(custom_endpoint) = aws_config.endpoint.clone() {
-            let endpoint = Endpoint::immutable(
-                custom_endpoint
-                    .parse()
-                    .expect("Failed to parse S3 custom endpoint"),
+        let access_key_id = std::env::var("AWS_ACCESS_KEY_ID").ok();
+        let secret_access_key = std::env::var("AWS_SECRET_ACCESS_KEY").ok();
+        // session token is used when authorizing through sso
+        // which is typically the case when testing locally on developer machine
+        let session_token = std::env::var("AWS_SESSION_TOKEN").ok();
+
+        let client = if access_key_id.is_none() && secret_access_key.is_none() {
+            debug!("Using IAM-based AWS access");
+            S3Client::new_with(request_dispatcher, InstanceMetadataProvider::new(), region)
+        } else {
+            debug!(
+                "Using credentials-based AWS access. Session token is set: {}",
+                session_token.is_some()
            );
-            config_builder.set_endpoint_resolver(Some(Arc::new(endpoint)));
-        }
-        let client = Client::from_conf(config_builder.build());
+            S3Client::new_with(
+                request_dispatcher,
+                StaticProvider::new(
+                    access_key_id.unwrap_or_default(),
+                    secret_access_key.unwrap_or_default(),
+                    session_token,
+                    None,
+                ),
+                region,
+            )
+        };

        let prefix_in_bucket = aws_config.prefix_in_bucket.as_deref().map(|prefix| {
            let mut prefix = prefix;
@@ -169,41 +182,16 @@ impl S3Bucket {
            }
            prefix
        });
+
        Ok(Self {
            client,
+            workdir,
            bucket_name: aws_config.bucket_name.clone(),
            prefix_in_bucket,
            concurrency_limiter: Semaphore::new(aws_config.concurrency_limit.get()),
        })
    }

-    fn s3_object_to_relative_path(&self, key: &str) -> RemotePath {
-        let relative_path =
-            match key.strip_prefix(self.prefix_in_bucket.as_deref().unwrap_or_default()) {
-                Some(stripped) => stripped,
-                // we rely on AWS to return properly prefixed paths
-                // for requests with a certain prefix
-                None => panic!(
-                    "Key {} does not start with bucket prefix {:?}",
-                    key, self.prefix_in_bucket
-                ),
-            };
-        RemotePath(
-            relative_path
-                .split(REMOTE_STORAGE_PREFIX_SEPARATOR)
-                .collect(),
-        )
-    }
-
-    fn relative_path_to_s3_object(&self, path: &RemotePath) -> String {
-        let mut full_path = self.prefix_in_bucket.clone().unwrap_or_default();
-        for segment in path.0.iter() {
-            full_path.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
-            full_path.push_str(segment.to_str().unwrap_or_default());
-        }
-        full_path
-    }
-
    async fn download_object(&self, request: GetObjectRequest) -> Result<Download, DownloadError> {
        let _guard = self
            .concurrency_limiter
@@ -214,33 +202,20 @@ impl S3Bucket {

        metrics::inc_get_object();

-        let get_object = self
-            .client
-            .get_object()
-            .bucket(request.bucket)
-            .key(request.key)
-            .set_range(request.range)
-            .send()
-            .await;
-
-        match get_object {
-            Ok(object_output) => {
-                let metadata = object_output.metadata().cloned().map(StorageMetadata);
-                Ok(Download {
-                    metadata,
-                    download_stream: Box::pin(io::BufReader::new(
-                        object_output.body.into_async_read(),
-                    )),
-                })
-            }
-            Err(SdkError::ServiceError {
-                err:
-                    GetObjectError {
-                        kind: GetObjectErrorKind::NoSuchKey(..),
-                        ..
-                    },
-                ..
-            }) => Err(DownloadError::NotFound),
+        match self.client.get_object(request).await {
+            Ok(object_output) => match object_output.body {
+                None => {
+                    metrics::inc_get_object_fail();
+                    Err(DownloadError::Other(anyhow::anyhow!(
+                        "Got no body for the S3 object given"
+                    )))
+                }
+                Some(body) => Ok(Download {
+                    metadata: object_output.metadata.map(StorageMetadata),
+                    download_stream: Box::pin(io::BufReader::new(body.into_async_read())),
+                }),
+            },
+            Err(RusotoError::Service(GetObjectError::NoSuchKey(_))) => Err(DownloadError::NotFound),
            Err(e) => {
                metrics::inc_get_object_fail();
                Err(DownloadError::Other(anyhow::anyhow!(
@@ -253,7 +228,25 @@ impl S3Bucket {

 #[async_trait::async_trait]
 impl RemoteStorage for S3Bucket {
-    async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
+    fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId> {
+        let relative_path = strip_path_prefix(&self.workdir, local_path)?;
+        let mut key = self.prefix_in_bucket.clone().unwrap_or_default();
+        for segment in relative_path {
+            key.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
+            key.push_str(&segment.to_string_lossy());
+        }
+        Ok(RemoteObjectId(key))
+    }
+
+    fn local_path(&self, storage_path: &RemoteObjectId) -> anyhow::Result<PathBuf> {
+        Ok(download_destination(
+            storage_path,
+            &self.workdir,
+            self.prefix_in_bucket.as_deref(),
+        ))
+    }
+
+    async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>> {
        let mut document_keys = Vec::new();

        let mut continuation_token = None;
@@ -268,11 +261,12 @@ impl RemoteStorage for S3Bucket {

            let fetch_response = self
                .client
-                .list_objects_v2()
-                .bucket(self.bucket_name.clone())
-                .set_prefix(self.prefix_in_bucket.clone())
-                .set_continuation_token(continuation_token)
-                .send()
+                .list_objects_v2(ListObjectsV2Request {
+                    bucket: self.bucket_name.clone(),
+                    prefix: self.prefix_in_bucket.clone(),
+                    continuation_token,
+                    ..ListObjectsV2Request::default()
+                })
                .await
                .map_err(|e| {
                    metrics::inc_list_objects_fail();
@@ -283,7 +277,7 @@ impl RemoteStorage for S3Bucket {
                    .contents
                    .unwrap_or_default()
                    .into_iter()
-                    .filter_map(|o| Some(self.s3_object_to_relative_path(o.key()?))),
+                    .filter_map(|o| Some(RemoteObjectId(o.key?))),
            );

            match fetch_response.continuation_token {
@@ -297,10 +291,13 @@ impl RemoteStorage for S3Bucket {

    /// See the doc for `RemoteStorage::list_prefixes`
    /// Note: it wont include empty "directories"
-    async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
+    async fn list_prefixes(
+        &self,
+        prefix: Option<&RemoteObjectId>,
+    ) -> anyhow::Result<Vec<RemoteObjectId>> {
        // get the passed prefix or if it is not set use prefix_in_bucket value
        let list_prefix = prefix
-            .map(|p| self.relative_path_to_s3_object(p))
+            .map(|p| p.0.clone())
            .or_else(|| self.prefix_in_bucket.clone())
            .map(|mut p| {
                // required to end with a separator
@@ -325,12 +322,13 @@ impl RemoteStorage for S3Bucket {

            let fetch_response = self
                .client
-                .list_objects_v2()
-                .bucket(self.bucket_name.clone())
-                .set_prefix(list_prefix.clone())
-                .set_continuation_token(continuation_token)
-                .delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string())
-                .send()
+                .list_objects_v2(ListObjectsV2Request {
+                    bucket: self.bucket_name.clone(),
+                    prefix: list_prefix.clone(),
+                    continuation_token,
+                    delimiter: Some(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()),
+                    ..ListObjectsV2Request::default()
+                })
                .await
                .map_err(|e| {
                    metrics::inc_list_objects_fail();
@@ -342,7 +340,7 @@ impl RemoteStorage for S3Bucket {
                    .common_prefixes
                    .unwrap_or_default()
                    .into_iter()
-                    .filter_map(|o| Some(self.s3_object_to_relative_path(o.prefix()?))),
+                    .filter_map(|o| Some(RemoteObjectId(o.prefix?))),
            );

            match fetch_response.continuation_token {
@@ -358,7 +356,7 @@ impl RemoteStorage for S3Bucket {
        &self,
        from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
        from_size_bytes: usize,
-        to: &RemotePath,
+        to: &RemoteObjectId,
        metadata: Option<StorageMetadata>,
    ) -> anyhow::Result<()> {
        let _guard = self
@@ -368,18 +366,17 @@ impl RemoteStorage for S3Bucket {
            .context("Concurrency limiter semaphore got closed during S3 upload")?;

        metrics::inc_put_object();
-
-        let body = Body::wrap_stream(ReaderStream::new(from));
-        let bytes_stream = ByteStream::new(SdkBody::from(body));
-
        self.client
-            .put_object()
-            .bucket(self.bucket_name.clone())
-            .key(self.relative_path_to_s3_object(to))
-            .set_metadata(metadata.map(|m| m.0))
-            .content_length(from_size_bytes.try_into()?)
-            .body(bytes_stream)
-            .send()
+            .put_object(PutObjectRequest {
+                body: Some(StreamingBody::new_with_size(
+                    ReaderStream::new(from),
+                    from_size_bytes,
+                )),
+                bucket: self.bucket_name.clone(),
+                key: to.0.to_owned(),
+                metadata: metadata.map(|m| m.0),
+                ..PutObjectRequest::default()
+            })
            .await
            .map_err(|e| {
                metrics::inc_put_object_fail();
@@ -388,10 +385,10 @@ impl RemoteStorage for S3Bucket {
        Ok(())
    }

-    async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
+    async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError> {
        self.download_object(GetObjectRequest {
            bucket: self.bucket_name.clone(),
-            key: self.relative_path_to_s3_object(from),
+            key: from.0.to_owned(),
            ..GetObjectRequest::default()
        })
        .await
@@ -399,7 +396,7 @@ impl RemoteStorage for S3Bucket {

    async fn download_byte_range(
        &self,
-        from: &RemotePath,
+        from: &RemoteObjectId,
        start_inclusive: u64,
        end_exclusive: Option<u64>,
    ) -> Result<Download, DownloadError> {
@@ -407,19 +404,20 @@ impl RemoteStorage for S3Bucket {
        // and needs both ends to be exclusive
        let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
        let range = Some(match end_inclusive {
-            Some(end_inclusive) => format!("bytes={start_inclusive}-{end_inclusive}"),
-            None => format!("bytes={start_inclusive}-"),
+            Some(end_inclusive) => format!("bytes={}-{}", start_inclusive, end_inclusive),
+            None => format!("bytes={}-", start_inclusive),
        });

        self.download_object(GetObjectRequest {
            bucket: self.bucket_name.clone(),
-            key: self.relative_path_to_s3_object(from),
+            key: from.0.to_owned(),
            range,
+            ..GetObjectRequest::default()
        })
        .await
    }

-    async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
+    async fn delete(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<()> {
        let _guard = self
            .concurrency_limiter
            .acquire()
@@ -429,10 +427,11 @@ impl RemoteStorage for S3Bucket {
        metrics::inc_delete_object();

        self.client
-            .delete_object()
-            .bucket(self.bucket_name.clone())
-            .key(self.relative_path_to_s3_object(path))
-            .send()
+            .delete_object(DeleteObjectRequest {
+                bucket: self.bucket_name.clone(),
+                key: remote_object_id.0.to_owned(),
+                ..DeleteObjectRequest::default()
+            })
            .await
            .map_err(|e| {
                metrics::inc_delete_object_fail();
@@ -441,3 +440,181 @@ impl RemoteStorage for S3Bucket {
        Ok(())
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use tempfile::tempdir;
+
+    use super::*;
+
+    #[test]
+    fn test_download_destination() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+        let local_path = workdir.join("one").join("two").join("test_name");
+        let relative_path = local_path.strip_prefix(&workdir)?;
+
+        let key = RemoteObjectId(format!(
+            "{}{}",
+            REMOTE_STORAGE_PREFIX_SEPARATOR,
+            relative_path
+                .iter()
+                .map(|segment| segment.to_str().unwrap())
+                .collect::<Vec<_>>()
+                .join(&REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()),
+        ));
+
+        assert_eq!(
+            local_path,
+            download_destination(&key, &workdir, None),
+            "Download destination should consist of s3 path joined with the workdir prefix"
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn storage_path_positive() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+
+        let segment_1 = "matching";
+        let segment_2 = "file";
+        let local_path = &workdir.join(segment_1).join(segment_2);
+
+        let storage = dummy_storage(workdir);
+
+        let expected_key = RemoteObjectId(format!(
+            "{}{REMOTE_STORAGE_PREFIX_SEPARATOR}{segment_1}{REMOTE_STORAGE_PREFIX_SEPARATOR}{segment_2}",
+            storage.prefix_in_bucket.as_deref().unwrap_or_default(),
+        ));
+
+        let actual_key = storage
+            .remote_object_id(local_path)
+            .expect("Matching path should map to S3 path normally");
+        assert_eq!(
+            expected_key,
+            actual_key,
+            "S3 key from the matching path should contain all segments after the workspace prefix, separated with S3 separator"
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn storage_path_negatives() -> anyhow::Result<()> {
+        #[track_caller]
+        fn storage_path_error(storage: &S3Bucket, mismatching_path: &Path) -> String {
+            match storage.remote_object_id(mismatching_path) {
+                Ok(wrong_key) => panic!(
+                    "Expected path '{}' to error, but got S3 key: {:?}",
+                    mismatching_path.display(),
+                    wrong_key,
+                ),
+                Err(e) => e.to_string(),
+            }
+        }
+
+        let workdir = tempdir()?.path().to_owned();
+        let storage = dummy_storage(workdir.clone());
+
+        let error_message = storage_path_error(&storage, &workdir);
+        assert!(
+            error_message.contains("Prefix and the path are equal"),
+            "Message '{}' does not contain the required string",
+            error_message
+        );
+
+        let mismatching_path = PathBuf::from("somewhere").join("else");
+        let error_message = storage_path_error(&storage, &mismatching_path);
+        assert!(
+            error_message.contains(mismatching_path.to_str().unwrap()),
+            "Error should mention wrong path"
+        );
+        assert!(
+            error_message.contains(workdir.to_str().unwrap()),
+            "Error should mention server workdir"
+        );
+        assert!(
+            error_message.contains("is not prefixed with"),
+            "Message '{}' does not contain a required string",
+            error_message
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn local_path_positive() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+        let storage = dummy_storage(workdir.clone());
+        let timeline_dir = workdir.join("timelines").join("test_timeline");
+        let relative_timeline_path = timeline_dir.strip_prefix(&workdir)?;
+
+        let s3_key = create_s3_key(
+            &relative_timeline_path.join("not a metadata"),
+            storage.prefix_in_bucket.as_deref(),
+        );
+        assert_eq!(
+            download_destination(&s3_key, &workdir, storage.prefix_in_bucket.as_deref()),
+            storage
+                .local_path(&s3_key)
+                .expect("For a valid input, valid S3 info should be parsed"),
+            "Should be able to parse metadata out of the correctly named remote delta file"
+        );
+
+        let s3_key = create_s3_key(
+            &relative_timeline_path.join("metadata"),
+            storage.prefix_in_bucket.as_deref(),
+        );
+        assert_eq!(
+            download_destination(&s3_key, &workdir, storage.prefix_in_bucket.as_deref()),
+            storage
+                .local_path(&s3_key)
+                .expect("For a valid input, valid S3 info should be parsed"),
+            "Should be able to parse metadata out of the correctly named remote metadata file"
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn download_destination_matches_original_path() -> anyhow::Result<()> {
+        let workdir = tempdir()?.path().to_owned();
+        let original_path = workdir
+            .join("timelines")
+            .join("some_timeline")
+            .join("some name");
+
+        let dummy_storage = dummy_storage(workdir);
+
+        let key = dummy_storage.remote_object_id(&original_path)?;
+        let download_destination = dummy_storage.local_path(&key)?;
+
+        assert_eq!(
+            original_path, download_destination,
+            "'original path -> storage key -> matching fs path' transformation should produce the same path as the input one for the correct path"
+        );
+
+        Ok(())
+    }
+
+    fn dummy_storage(workdir: PathBuf) -> S3Bucket {
+        S3Bucket {
+            workdir,
+            client: S3Client::new("us-east-1".parse().unwrap()),
+            bucket_name: "dummy-bucket".to_string(),
+            prefix_in_bucket: Some("dummy_prefix/".to_string()),
+            concurrency_limiter: Semaphore::new(1),
+        }
+    }
+
+    fn create_s3_key(relative_file_path: &Path, prefix: Option<&str>) -> RemoteObjectId {
+        RemoteObjectId(relative_file_path.iter().fold(
+            prefix.unwrap_or_default().to_string(),
+            |mut path_string, segment| {
+                path_string.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
+                path_string.push_str(segment.to_str().unwrap());
+                path_string
+            },
+        ))
+    }
+}
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -4,7 +4,6 @@ version = "0.1.0"
 edition = "2021"

 [dependencies]
-sentry = "0.29.0"
 async-trait = "0.1"
 anyhow = "1.0"
 bincode = "1.3"
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -34,7 +34,6 @@ pub mod sock_split;
 pub mod logging;

 pub mod lock_file;
-pub mod pid_file;

 // Misc
 pub mod accum;
@@ -47,7 +46,6 @@ pub mod tcp_listener;
 pub mod nonblock;

 // Default signal handling
-pub mod sentry_init;
 pub mod signals;

 pub mod fs_ext;
--- a/libs/utils/src/lock_file.rs
+++ b/libs/utils/src/lock_file.rs
@@ -1,133 +1,81 @@
-//! A module to create and read lock files.
+//! A module to create and read lock files. A lock file ensures that only one
+//! process is running at a time, in a particular directory.
 //!
-//! File locking is done using [`fcntl::flock`] exclusive locks.
-//! The only consumer of this module is currently [`pid_file`].
-//! See the module-level comment there for potential pitfalls
-//! with lock files that are used to store PIDs (pidfiles).
+//! File locking is done using [`fcntl::flock`], which means that holding the
+//! lock on file only prevents acquiring another lock on it; all other
+//! operations are still possible on files. Other process can still open, read,
+//! write, or remove the file, for example.
+//! If the file is removed while a process is holding a lock on it,
+//! the process that holds the lock does not get any error or notification.
+//! Furthermore, you can create a new file with the same name and lock the new file,
+//! while the old process is still running.
+//! Deleting the lock file while the locking process is still running is a bad idea!

-use std::{
-    fs,
-    io::{Read, Write},
-    ops::Deref,
-    os::unix::prelude::AsRawFd,
-    path::{Path, PathBuf},
-};
+use std::{fs, os::unix::prelude::AsRawFd, path::Path};

 use anyhow::Context;
-use nix::{errno::Errno::EAGAIN, fcntl};
+use nix::fcntl;

 use crate::crashsafe;

-/// A handle to an open and unlocked, but not-yet-written lock file.
-/// Returned by [`create_exclusive`].
-#[must_use]
-pub struct UnwrittenLockFile {
-    path: PathBuf,
-    file: fs::File,
+pub enum LockCreationResult {
+    Created {
+        new_lock_contents: String,
+        file: fs::File,
+    },
+    AlreadyLocked {
+        existing_lock_contents: String,
+    },
+    CreationFailed(anyhow::Error),
 }

-/// Returned by [`UnwrittenLockFile::write_content`].
-#[must_use]
-pub struct LockFileGuard(fs::File);
-
-impl Deref for LockFileGuard {
-    type Target = fs::File;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl UnwrittenLockFile {
-    /// Replace the content of this lock file with the byte representation of `contents`.
-    pub fn write_content(mut self, contents: String) -> anyhow::Result<LockFileGuard> {
-        self.file
-            .set_len(0)
-            .context("Failed to truncate lockfile")?;
-        self.file
-            .write_all(contents.as_bytes())
-            .with_context(|| format!("Failed to write '{contents}' contents into lockfile"))?;
-        crashsafe::fsync_file_and_parent(&self.path).context("fsync lockfile")?;
-        Ok(LockFileGuard(self.file))
-    }
-}
-
-/// Creates and opens a lock file in the path, grabs an exclusive flock on it, and returns
-/// a handle that allows overwriting the locked file's content.
-///
-/// The exclusive lock is released when dropping the returned handle.
-///
-/// It is not an error if the file already exists.
-/// It is an error if the file is already locked.
-pub fn create_exclusive(lock_file_path: &Path) -> anyhow::Result<UnwrittenLockFile> {
-    let lock_file = fs::OpenOptions::new()
+/// Creates a lock file in the path given and writes the given contents into the file.
+/// Note: The lock is automatically released when the file closed. You might want to use Box::leak to make sure it lives until the end of the program.
+pub fn create_lock_file(lock_file_path: &Path, contents: String) -> LockCreationResult {
+    let lock_file = match fs::OpenOptions::new()
        .create(true) // O_CREAT
        .write(true)
        .open(lock_file_path)
-        .context("open lock file")?;
-
-    let res = fcntl::flock(
-        lock_file.as_raw_fd(),
-        fcntl::FlockArg::LockExclusiveNonblock,
-    );
-    match res {
-        Ok(()) => Ok(UnwrittenLockFile {
-            path: lock_file_path.to_owned(),
-            file: lock_file,
-        }),
-        Err(EAGAIN) => anyhow::bail!("file is already locked"),
-        Err(e) => Err(e).context("flock error"),
-    }
-}
-
-/// Returned by [`read_and_hold_lock_file`].
-/// Check out the [`pid_file`] module for what the variants mean
-/// and potential caveats if the lock files that are used to store PIDs.
-pub enum LockFileRead {
-    /// No file exists at the given path.
-    NotExist,
-    /// No other process held the lock file, so we grabbed an flock
-    /// on it and read its contents.
-    /// Release the flock by dropping the [`LockFileGuard`].
-    NotHeldByAnyProcess(LockFileGuard, String),
-    /// The file exists but another process was holding an flock on it.
-    LockedByOtherProcess {
-        not_locked_file: fs::File,
-        content: String,
-    },
-}
-
-/// Open & try to lock the lock file at the given `path`, returning a [handle][`LockFileRead`] to
-/// inspect its content. It is not an `Err(...)` if the file does not exist or is already locked.
-/// Check the [`LockFileRead`] variants for details.
-pub fn read_and_hold_lock_file(path: &Path) -> anyhow::Result<LockFileRead> {
-    let res = fs::OpenOptions::new().read(true).open(path);
-    let mut lock_file = match res {
-        Ok(f) => f,
-        Err(e) => match e.kind() {
-            std::io::ErrorKind::NotFound => return Ok(LockFileRead::NotExist),
-            _ => return Err(e).context("open lock file"),
-        },
+        .context("Failed to open lock file")
+    {
+        Ok(file) => file,
+        Err(e) => return LockCreationResult::CreationFailed(e),
    };
-    let res = fcntl::flock(
+
+    match fcntl::flock(
        lock_file.as_raw_fd(),
        fcntl::FlockArg::LockExclusiveNonblock,
-    );
-    // We need the content regardless of lock success / failure.
-    // But, read it after flock so that, if it succeeded, the content is consistent.
-    let mut content = String::new();
-    lock_file
-        .read_to_string(&mut content)
-        .context("read lock file")?;
-    match res {
-        Ok(()) => Ok(LockFileRead::NotHeldByAnyProcess(
-            LockFileGuard(lock_file),
-            content,
-        )),
-        Err(EAGAIN) => Ok(LockFileRead::LockedByOtherProcess {
-            not_locked_file: lock_file,
-            content,
-        }),
-        Err(e) => Err(e).context("flock error"),
+    ) {
+        Ok(()) => {
+            match lock_file
+                .set_len(0)
+                .context("Failed to truncate lockfile")
+                .and_then(|()| {
+                    fs::write(lock_file_path, &contents).with_context(|| {
+                        format!("Failed to write '{contents}' contents into lockfile")
+                    })
+                })
+                .and_then(|()| {
+                    crashsafe::fsync_file_and_parent(lock_file_path)
+                        .context("Failed to fsync lockfile")
+                }) {
+                Ok(()) => LockCreationResult::Created {
+                    new_lock_contents: contents,
+                    file: lock_file,
+                },
+                Err(e) => LockCreationResult::CreationFailed(e),
+            }
+        }
+        Err(nix::errno::Errno::EAGAIN) => {
+            match fs::read_to_string(lock_file_path).context("Failed to read lockfile contents") {
+                Ok(existing_lock_contents) => LockCreationResult::AlreadyLocked {
+                    existing_lock_contents,
+                },
+                Err(e) => LockCreationResult::CreationFailed(e),
+            }
+        }
+        Err(e) => {
+            LockCreationResult::CreationFailed(anyhow::anyhow!("Failed to lock lockfile: {e}"))
+        }
    }
 }
--- a/libs/utils/src/pid_file.rs
+++ b/libs/utils/src/pid_file.rs
@@ -1,165 +0,0 @@
-//! Abstraction to create & read pidfiles.
-//!
-//! A pidfile is a file in the filesystem that stores a process's PID.
-//! Its purpose is to implement a singleton behavior where only
-//! one process of some "kind" is supposed to be running at a given time.
-//! The "kind" is identified by the pidfile.
-//!
-//! During process startup, the process that is supposed to be a singleton
-//! must [claim][`claim_for_current_process`] the pidfile first.
-//! If that is unsuccessful, the process must not act as the singleton, i.e.,
-//! it must not access any of the resources that only the singleton may access.
-//!
-//! A common need is to signal a running singleton process, e.g., to make
-//! it shut down and exit.
-//! For that, we have to [`read`] the pidfile. The result of the `read` operation
-//! tells us if there is any singleton process, and if so, what PID it has.
-//! We can then proceed to signal it, although some caveats still apply.
-//! Read the function-level documentation of [`read`] for that.
-//!
-//! ## Never Remove Pidfiles
-//!
-//! It would be natural to assume that the process who claimed the pidfile
-//! should remove it upon exit to avoid leaving a stale pidfile in place.
-//! However, we already have a reliable way to detect staleness of the pidfile,
-//! i.e., the `flock` that [claiming][`claim_for_current_process`] puts on it.
-//!
-//! And further, removing pidfiles would introduce a **catastrophic race condition**
-//! where two processes are running that are supposed to be singletons.
-//! Suppose we were to remove our pidfile during process shutdown.
-//! Here is how the race plays out:
-//! - Suppose we have a service called `myservice` with pidfile `myservice.pidfile`.
-//! - Process `A` starts to shut down.
-//! - Process `B` is just starting up
-//!     - It `open("myservice.pid", O_WRONLY|O_CREAT)` the file
-//!     - It blocks on `flock`
-//! - Process `A` removes the pidfile as the last step of its shutdown procedure
-//!     - `unlink("myservice.pid")
-//! - Process `A` exits
-//!     - This releases its `flock` and unblocks `B`
-//! - Process `B` still has the file descriptor for `myservice.pid` open
-//! - Process `B` writes its PID into `myservice.pid`.
-//! - But the `myservice.pid` file has been unlinked, so, there is `myservice.pid`
-//!   in the directory.
-//! - Process `C` starts
-//!     - It `open("myservice.pid", O_WRONLY|O_CREAT)` which creates a new file (new inode)
-//!     - It `flock`s the file, which, since it's a different file, does not block
-//!     - It writes its PID into the file
-//!
-//! At this point, `B` and `C` are running, which is hazardous.
-//! Morale of the story: don't unlink pidfiles, ever.
-
-use std::{ops::Deref, path::Path};
-
-use anyhow::Context;
-use nix::unistd::Pid;
-
-use crate::lock_file::{self, LockFileRead};
-
-/// Keeps a claim on a pidfile alive until it is dropped.
-/// Returned by [`claim_for_current_process`].
-#[must_use]
-pub struct PidFileGuard(lock_file::LockFileGuard);
-
-impl Deref for PidFileGuard {
-    type Target = lock_file::LockFileGuard;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-/// Try to claim `path` as a pidfile for the current process.
-///
-/// If another process has already claimed the pidfile, and it is still running,
-/// this function returns ane error.
-/// Otherwise, the function `flock`s the file and updates its contents to the
-/// current process's PID.
-/// If the update fails, the flock is released and an error returned.
-/// On success, the function returns a [`PidFileGuard`] to keep the flock alive.
-///
-/// ### Maintaining A Claim
-///
-/// It is the caller's responsibility to maintain the claim.
-/// The claim ends as soon as the returned guard object is dropped.
-/// To maintain the claim for the remaining lifetime of the current process,
-/// use [`std::mem::forget`] or similar.
-pub fn claim_for_current_process(path: &Path) -> anyhow::Result<PidFileGuard> {
-    let unwritten_lock_file = lock_file::create_exclusive(path).context("lock file")?;
-    // if any of the next steps fail, we drop the file descriptor and thereby release the lock
-    let guard = unwritten_lock_file
-        .write_content(Pid::this().to_string())
-        .context("write pid to lock file")?;
-    Ok(PidFileGuard(guard))
-}
-
-/// Returned by [`read`].
-pub enum PidFileRead {
-    /// No file exists at the given path.
-    NotExist,
-    /// The given pidfile is currently not claimed by any process.
-    /// To determine this, the [`read`] operation acquired
-    /// an exclusive flock on the file. The lock is still held and responsibility
-    /// to release it is returned through the guard object.
-    /// Before releasing it, other [`claim_for_current_process`] or [`read`] calls
-    /// will fail.
-    ///
-    /// ### Caveats
-    ///
-    /// Do not unlink the pidfile from the filesystem. See module-comment for why.
-    NotHeldByAnyProcess(PidFileGuard),
-    /// The given pidfile is still claimed by another process whose PID is given
-    /// as part of this variant.
-    ///
-    /// ### Caveats
-    ///
-    /// 1. The other process might exit at any time, turning the given PID stale.
-    /// 2. There is a small window in which `claim_for_current_process` has already
-    ///    locked the file but not yet updates its contents. [`read`] will return
-    ///    this variant here, but with the old file contents, i.e., a stale PID.
-    ///
-    /// The kernel is free to recycle PID once it has been `wait(2)`ed upon by
-    /// its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`
-    /// system call on it, bears the risk of killing an unrelated process.
-    /// This is an inherent limitation of using pidfiles.
-    /// The only race-free solution is to have a supervisor-process with a lifetime
-    /// that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).
-    LockedByOtherProcess(Pid),
-}
-
-/// Try to read the file at the given path as a pidfile that was previously created
-/// through [`claim_for_current_process`].
-///
-/// On success, this function returns a [`PidFileRead`].
-/// Check its docs for a description of the meaning of its different variants.
-pub fn read(pidfile: &Path) -> anyhow::Result<PidFileRead> {
-    let res = lock_file::read_and_hold_lock_file(pidfile).context("read and hold pid file")?;
-    let ret = match res {
-        LockFileRead::NotExist => PidFileRead::NotExist,
-        LockFileRead::NotHeldByAnyProcess(guard, _) => {
-            PidFileRead::NotHeldByAnyProcess(PidFileGuard(guard))
-        }
-        LockFileRead::LockedByOtherProcess {
-            not_locked_file: _not_locked_file,
-            content,
-        } => {
-            // XXX the read races with the write in claim_pid_file_for_pid().
-            // But pids are smaller than a page, so the kernel page cache will lock for us.
-            // The only problem is that we might get the old contents here.
-            // Can only fix that by implementing some scheme that downgrades the
-            // exclusive lock to shared lock in claim_pid_file_for_pid().
-            PidFileRead::LockedByOtherProcess(parse_pidfile_content(&content)?)
-        }
-    };
-    Ok(ret)
-}
-
-fn parse_pidfile_content(content: &str) -> anyhow::Result<Pid> {
-    let pid: i32 = content
-        .parse()
-        .map_err(|_| anyhow::anyhow!("parse pidfile content to PID"))?;
-    if pid < 1 {
-        anyhow::bail!("bad value in pidfile '{pid}'");
-    }
-    Ok(Pid::from_raw(pid))
-}
--- a/libs/utils/src/sentry_init.rs
+++ b/libs/utils/src/sentry_init.rs
@@ -1,27 +0,0 @@
-use sentry::ClientInitGuard;
-use std::borrow::Cow;
-use std::env;
-
-pub use sentry::release_name;
-
-#[must_use]
-pub fn init_sentry(
-    release_name: Option<Cow<'static, str>>,
-    extra_options: &[(&str, &str)],
-) -> Option<ClientInitGuard> {
-    let dsn = env::var("SENTRY_DSN").ok()?;
-
-    let guard = sentry::init((
-        dsn,
-        sentry::ClientOptions {
-            release: release_name,
-            ..Default::default()
-        },
-    ));
-    sentry::configure_scope(|scope| {
-        for &(key, value) in extra_options {
-            scope.set_extra(key, value.into());
-        }
-    });
-    Some(guard)
-}
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -18,7 +18,7 @@ async-stream = "0.3"
 async-trait = "0.1"
 byteorder = "1.4.3"
 bytes = "1.0.1"
-chrono = { version = "0.4.23", default-features = false, features = ["clock"] }
+chrono = "0.4.19"
 clap = { version = "4.0", features = ["string"] }
 close_fds = "0.3.2"
 const_format = "0.2.21"
--- a/pageserver/benches/README.md
+++ b/pageserver/benches/README.md
@@ -1,12 +0,0 @@
-## Pageserver Benchmarks
-
-# How to run
-
-To run all benchmarks:
-`cargo bench`
-
-To run a specific file:
-`cargo bench --bench bench_layer_map`
-
-To run a specific function:
-`cargo bench --bench bench_layer_map -- real_map_uniform_queries`
--- a/pageserver/benches/bench_layer_map.rs
+++ b/pageserver/benches/bench_layer_map.rs
--- a/pageserver/benches/bench_walredo.rs
+++ b/pageserver/benches/bench_walredo.rs
@@ -431,7 +431,7 @@ fn pg_record(will_init: bool, bytes: &'static [u8]) -> NeonWalRecord {
 struct Request {
    key: Key,
    lsn: Lsn,
-    base_img: Option<(Lsn, Bytes)>,
+    base_img: Option<Bytes>,
    records: Vec<(Lsn, NeonWalRecord)>,
    pg_version: u32,
 }
--- a/pageserver/benches/large-layer-map-layernames.txt
+++ b/pageserver/benches/large-layer-map-layernames.txt
--- a/pageserver/benches/odd-brook-layernames.txt
+++ b/pageserver/benches/odd-brook-layernames.txt
--- a/pageserver/src/bin/draw_timeline_dir.rs
+++ b/pageserver/src/bin/draw_timeline_dir.rs
@@ -11,8 +11,8 @@
 //!
 //! Example use:
 //! ```
-//! $ ls test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE | \
-//! $   grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
+//! $ cd test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE
+//! $ ls | grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
 //! $ firefox out.svg
 //! ```
 //!
@@ -25,8 +25,6 @@ use anyhow::Result;
 use pageserver::repository::Key;
 use std::cmp::Ordering;
 use std::io::{self, BufRead};
-use std::path::PathBuf;
-use std::str::FromStr;
 use std::{
    collections::{BTreeMap, BTreeSet},
    ops::Range,
@@ -67,11 +65,7 @@ fn main() -> Result<()> {
    let mut ranges: Vec<(Range<Key>, Range<Lsn>)> = vec![];
    let stdin = io::stdin();
    for line in stdin.lock().lines() {
-        let line = line.unwrap();
-        let line = PathBuf::from_str(&line).unwrap();
-        let filename = line.file_name().unwrap();
-        let filename = filename.to_str().unwrap();
-        let range = parse_filename(filename);
+        let range = parse_filename(&line.unwrap());
        ranges.push(range);
    }

--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -7,6 +7,7 @@ use std::{env, ops::ControlFlow, path::Path, str::FromStr};
 use anyhow::{anyhow, Context};
 use clap::{Arg, ArgAction, Command};
 use fail::FailScenario;
+use nix::unistd::Pid;
 use tracing::*;

 use metrics::set_build_info_metric;
@@ -22,10 +23,9 @@ use pageserver::{
 use remote_storage::GenericRemoteStorage;
 use utils::{
    auth::JwtAuth,
-    logging,
+    lock_file, logging,
    postgres_backend::AuthType,
    project_git_version,
-    sentry_init::{init_sentry, release_name},
    signals::{self, Signal},
    tcp_listener,
 };
@@ -85,9 +85,6 @@ fn main() -> anyhow::Result<()> {
        }
    };

-    // initialize sentry if SENTRY_DSN is provided
-    let _sentry_guard = init_sentry(release_name!(), &[("node_id", &conf.id.to_string())]);
-
    let tenants_path = conf.tenants_path();
    if !tenants_path.exists() {
        utils::crashsafe::create_dir_all(conf.tenants_path()).with_context(|| {
@@ -219,13 +216,30 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
    }

    let lock_file_path = conf.workdir.join(PID_FILE_NAME);
-    let lock_file =
-        utils::pid_file::claim_for_current_process(&lock_file_path).context("claim pid file")?;
-    info!("Claimed pid file at {lock_file_path:?}");
-
+    let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
+        lock_file::LockCreationResult::Created {
+            new_lock_contents,
+            file,
+        } => {
+            info!("Created lock file at {lock_file_path:?} with contenst {new_lock_contents}");
+            file
+        }
+        lock_file::LockCreationResult::AlreadyLocked {
+            existing_lock_contents,
+        } => anyhow::bail!(
+            "Could not lock pid file; pageserver is already running in {:?} with PID {}",
+            conf.workdir,
+            existing_lock_contents
+        ),
+        lock_file::LockCreationResult::CreationFailed(e) => {
+            return Err(e.context(format!("Failed to create lock file at {lock_file_path:?}")))
+        }
+    };
    // ensure that the lock file is held even if the main thread of the process is panics
    // we need to release the lock file only when the current process is gone
-    std::mem::forget(lock_file);
+    let _ = Box::leak(Box::new(lock_file));
+
+    info!("Created PID file with PID {}", Pid::this().to_string());

    // TODO: Check that it looks like a valid repository before going further

@@ -280,23 +294,15 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
    let remote_storage = conf
        .remote_storage_config
        .as_ref()
-        .map(GenericRemoteStorage::from_config)
+        .map(|storage_config| {
+            GenericRemoteStorage::from_config(conf.workdir.clone(), storage_config)
+        })
        .transpose()
        .context("Failed to init generic remote storage")?;
-
-    let (init_result_sender, init_result_receiver) =
-        std::sync::mpsc::channel::<anyhow::Result<()>>();
-    let storage_for_spawn = remote_storage.clone();
-    let _handler = BACKGROUND_RUNTIME.spawn(async move {
-        let result = tenant_mgr::init_tenant_mgr(conf, storage_for_spawn).await;
-        init_result_sender.send(result)
-    });
-    match init_result_receiver.recv() {
-        Ok(init_result) => init_result.context("Failed to init tenant_mgr")?,
-        Err(_sender_dropped_err) => {
-            anyhow::bail!("Failed to init tenant_mgr: no init status was returned");
-        }
-    }
+    {
+        let _rt_guard = BACKGROUND_RUNTIME.enter();
+        tenant_mgr::init_tenant_mgr(conf, remote_storage.clone())?
+    };

    // Spawn all HTTP related tasks in the MGMT_REQUEST_RUNTIME.
    // bind before launching separate thread so the error reported before startup exits
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -5,7 +5,7 @@
 //! See also `settings.md` for better description on every parameter.

 use anyhow::{anyhow, bail, ensure, Context, Result};
-use remote_storage::{RemotePath, RemoteStorageConfig};
+use remote_storage::RemoteStorageConfig;
 use std::env;
 use utils::crashsafe::path_with_suffix_extension;
 use utils::id::ConnectionId;
@@ -27,9 +27,7 @@ use utils::{

 use crate::tenant::{TENANT_ATTACHING_MARKER_FILENAME, TIMELINES_SEGMENT_NAME};
 use crate::tenant_config::{TenantConf, TenantConfOpt};
-use crate::{
-    IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_UNINIT_MARK_SUFFIX,
-};
+use crate::{METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_UNINIT_MARK_SUFFIX};

 pub mod defaults {
    use crate::tenant_config::defaults::*;
@@ -333,6 +331,10 @@ impl PageServerConfigBuilder {
    }

    pub fn build(self) -> anyhow::Result<PageServerConf> {
+        let broker_endpoints = self
+            .broker_endpoints
+            .ok_or(anyhow!("No broker endpoints provided"))?;
+
        Ok(PageServerConf {
            listen_pg_addr: self
                .listen_pg_addr
@@ -368,9 +370,7 @@ impl PageServerConfigBuilder {
            profiling: self.profiling.ok_or(anyhow!("missing profiling"))?,
            // TenantConf is handled separately
            default_tenant_conf: TenantConf::default(),
-            broker_endpoints: self
-                .broker_endpoints
-                .ok_or(anyhow!("No broker endpoints provided"))?,
+            broker_endpoints,
            broker_etcd_prefix: self
                .broker_etcd_prefix
                .ok_or(anyhow!("missing broker_etcd_prefix"))?,
@@ -402,10 +402,6 @@ impl PageServerConf {
            .join(TENANT_ATTACHING_MARKER_FILENAME)
    }

-    pub fn tenant_ignore_mark_file_path(&self, tenant_id: TenantId) -> PathBuf {
-        self.tenant_path(&tenant_id).join(IGNORED_TENANT_FILE_NAME)
-    }
-
    /// Points to a place in pageserver's local directory,
    /// where certain tenant's tenantconf file should be located.
    pub fn tenant_config_path(&self, tenant_id: TenantId) -> PathBuf {
@@ -454,28 +450,6 @@ impl PageServerConf {
            .join(METADATA_FILE_NAME)
    }

-    /// Files on the remote storage are stored with paths, relative to the workdir.
-    /// That path includes in itself both tenant and timeline ids, allowing to have a unique remote storage path.
-    ///
-    /// Errors if the path provided does not start from pageserver's workdir.
-    pub fn remote_path(&self, local_path: &Path) -> anyhow::Result<RemotePath> {
-        local_path
-            .strip_prefix(&self.workdir)
-            .context("Failed to strip workdir prefix")
-            .and_then(RemotePath::new)
-            .with_context(|| {
-                format!(
-                    "Failed to resolve remote part of path {:?} for base {:?}",
-                    local_path, self.workdir
-                )
-            })
-    }
-
-    /// Turns storage remote path of a file into its local path.
-    pub fn local_path(&self, remote_path: &RemotePath) -> PathBuf {
-        remote_path.with_base(&self.workdir)
-    }
-
    //
    // Postgres distribution paths
    //
@@ -512,7 +486,7 @@ impl PageServerConf {
        let mut builder = PageServerConfigBuilder::default();
        builder.workdir(workdir.to_owned());

-        let mut t_conf = TenantConfOpt::default();
+        let mut t_conf: TenantConfOpt = Default::default();

        for (key, item) in toml.iter() {
            match key {
@@ -643,12 +617,6 @@ impl PageServerConf {
        if let Some(max_lsn_wal_lag) = item.get("max_lsn_wal_lag") {
            t_conf.max_lsn_wal_lag = Some(parse_toml_from_str("max_lsn_wal_lag", max_lsn_wal_lag)?);
        }
-        if let Some(trace_read_requests) = item.get("trace_read_requests") {
-            t_conf.trace_read_requests =
-                Some(trace_read_requests.as_bool().with_context(|| {
-                    "configure option trace_read_requests is not a bool".to_string()
-                })?);
-        }

        Ok(t_conf)
    }
@@ -1048,35 +1016,6 @@ broker_endpoints = ['{broker_endpoint}']
        Ok(())
    }

-    #[test]
-    fn parse_tenant_config() -> anyhow::Result<()> {
-        let tempdir = tempdir()?;
-        let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
-
-        let broker_endpoint = "http://127.0.0.1:7777";
-        let trace_read_requests = true;
-
-        let config_string = format!(
-            r#"{ALL_BASE_VALUES_TOML}
-pg_distrib_dir='{}'
-broker_endpoints = ['{broker_endpoint}']
-
-[tenant_config]
-trace_read_requests = {trace_read_requests}"#,
-            pg_distrib_dir.display(),
-        );
-
-        let toml = config_string.parse()?;
-
-        let conf = PageServerConf::parse_and_validate(&toml, &workdir)?;
-        assert_eq!(
-            conf.default_tenant_conf.trace_read_requests, trace_read_requests,
-            "Tenant config from pageserver config file should be parsed and udpated values used as defaults for all tenants",
-        );
-
-        Ok(())
-    }
-
    fn prepare_fs(tempdir: &TempDir) -> anyhow::Result<(PathBuf, PathBuf)> {
        let tempdir_path = tempdir.path();

--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -274,7 +274,6 @@ paths:
        schema:
          type: string
          format: hex
-
    post:
      description: Schedules attach operation to happen in the background for given tenant
      responses:
@@ -326,9 +325,7 @@ paths:
          type: string
          format: hex
    post:
-      description: |
-        Remove tenant data (including all corresponding timelines) from pageserver's memory and file system.
-        Files on the remote storage are not affected.
+      description: Detach local tenant
      responses:
        "200":
          description: Tenant detached
@@ -357,92 +354,6 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

-  /v1/tenant/{tenant_id}/ignore:
-    parameters:
-      - name: tenant_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-    post:
-      description: |
-        Remove tenant data (including all corresponding timelines) from pageserver's memory.
-        Files on local disk and remote storage are not affected.
-
-        Future pageserver restarts won't load the data back until `load` is called on such tenant.
-      responses:
-        "200":
-          description: Tenant ignored
-        "400":
-          description: Error when no tenant id found in path parameters
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-
-  /v1/tenant/{tenant_id}/load:
-    parameters:
-      - name: tenant_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-    post:
-      description: |
-        Schedules an operation that attempts to load a tenant from the local disk and
-        synchronise it with the remote storage (if enabled), repeating pageserver's restart logic for tenant load.
-        If the tenant was ignored before, removes the ignore mark and continues with load scheduling.
-
-        Errors if the tenant is absent on disk, already present in memory or fails to schedule its load.
-        Scheduling a load does not mean that the tenant would load successfully, check tenant status to ensure load correctness.
-      responses:
-        "202":
-          description: Tenant scheduled to load successfully
-        "400":
-          description: Error when no tenant id found in path parameters
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-
  /v1/tenant/{tenant_id}/size:
    parameters:
      - name: tenant_id
@@ -748,6 +659,7 @@ components:
        - tenant_id
        - last_record_lsn
        - disk_consistent_lsn
+        - awaits_download
        - state
        - latest_gc_cutoff_lsn
      properties:
@@ -790,6 +702,8 @@ components:
          format: hex
        last_received_msg_ts:
          type: integer
+        awaits_download:
+          type: boolean
        state:
          type: string
        latest_gc_cutoff_lsn:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -3,7 +3,9 @@ use std::sync::Arc;
 use anyhow::{anyhow, Context, Result};
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
+use pageserver_api::models::TenantState;
 use remote_storage::GenericRemoteStorage;
+use tokio::task::JoinError;
 use tracing::*;

 use super::models::{
@@ -80,11 +82,12 @@ fn check_permission(request: &Request<Body>, tenant_id: Option<TenantId>) -> Res

 // Helper function to construct a TimelineInfo struct for a timeline
 fn build_timeline_info(
+    tenant_state: TenantState,
    timeline: &Arc<Timeline>,
    include_non_incremental_logical_size: bool,
    include_non_incremental_physical_size: bool,
 ) -> anyhow::Result<TimelineInfo> {
-    let mut info = build_timeline_info_common(timeline)?;
+    let mut info = build_timeline_info_common(tenant_state, timeline)?;
    if include_non_incremental_logical_size {
        info.current_logical_size_non_incremental =
            Some(timeline.get_current_logical_size_non_incremental(info.last_record_lsn)?);
@@ -96,7 +99,10 @@ fn build_timeline_info(
    Ok(info)
 }

-fn build_timeline_info_common(timeline: &Arc<Timeline>) -> anyhow::Result<TimelineInfo> {
+fn build_timeline_info_common(
+    tenant_state: TenantState,
+    timeline: &Arc<Timeline>,
+) -> anyhow::Result<TimelineInfo> {
    let last_record_lsn = timeline.get_last_record_lsn();
    let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
        let guard = timeline.last_received_wal.lock().unwrap();
@@ -148,6 +154,10 @@ fn build_timeline_info_common(timeline: &Arc<Timeline>) -> anyhow::Result<Timeli

        state,

+        // XXX bring back tracking of downloads per timeline, or, introduce
+        // an 'Attaching' state for the timeline and get rid of this field.
+        awaits_download: tenant_state == TenantState::Attaching,
+
        // Duplicate some fields in 'local' and 'remote' fields, for backwards-compatility
        // with the control plane.
        local: LocalTimelineInfo {
@@ -179,9 +189,7 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
        .new_timeline_id
        .unwrap_or_else(TimelineId::generate);

-    let tenant = tenant_mgr::get_tenant(tenant_id, true)
-        .await
-        .map_err(ApiError::NotFound)?;
+    let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
    match tenant.create_timeline(
        new_timeline_id,
        request_data.ancestor_timeline_id.map(TimelineId::from),
@@ -192,7 +200,7 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
    .await {
        Ok(Some(new_timeline)) => {
            // Created. Construct a TimelineInfo for it.
-            let timeline_info = build_timeline_info_common(&new_timeline)
+            let timeline_info = build_timeline_info_common(tenant.current_state(), &new_timeline)
                .map_err(ApiError::InternalServerError)?;
            json_response(StatusCode::CREATED, timeline_info)
        }
@@ -209,29 +217,26 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
        query_param_present(&request, "include-non-incremental-physical-size");
    check_permission(&request, Some(tenant_id))?;

-    let response_data = async {
-        let tenant = tenant_mgr::get_tenant(tenant_id, true)
-            .await
-            .map_err(ApiError::NotFound)?;
-        let timelines = tenant.list_timelines();
+    let _entered = info_span!("timeline_list", tenant = %tenant_id).entered();

-        let mut response_data = Vec::with_capacity(timelines.len());
-        for timeline in timelines {
-            let timeline_info = build_timeline_info(
-                &timeline,
-                include_non_incremental_logical_size,
-                include_non_incremental_physical_size,
-            )
-            .context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
-            .map_err(ApiError::InternalServerError)?;
+    let (tenant_state, timelines) = {
+        let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
+        (tenant.current_state(), tenant.list_timelines())
+    };

-            response_data.push(timeline_info);
-        }
+    let mut response_data = Vec::with_capacity(timelines.len());
+    for timeline in timelines {
+        let timeline_info = build_timeline_info(
+            tenant_state,
+            &timeline,
+            include_non_incremental_logical_size,
+            include_non_incremental_physical_size,
+        )
+        .context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
+        .map_err(ApiError::InternalServerError)?;

-        Ok(response_data)
+        response_data.push(timeline_info);
    }
-    .instrument(info_span!("timeline_list", tenant = %tenant_id))
-    .await?;

    json_response(StatusCode::OK, response_data)
 }
@@ -276,15 +281,20 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
    check_permission(&request, Some(tenant_id))?;

    let timeline_info = async {
-        let tenant = tenant_mgr::get_tenant(tenant_id, true)
-            .await
-            .map_err(ApiError::NotFound)?;
+        let (tenant_state, timeline) = tokio::task::spawn_blocking(move || {
+            let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
+            Ok((
+                tenant.current_state(),
+                tenant.get_timeline(timeline_id, false),
+            ))
+        })
+        .await
+        .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;

-        let timeline = tenant
-            .get_timeline(timeline_id, false)
-            .map_err(ApiError::NotFound)?;
+        let timeline = timeline.map_err(ApiError::NotFound)?;

        let timeline_info = build_timeline_info(
+            tenant_state,
            &timeline,
            include_non_incremental_logical_size,
            include_non_incremental_physical_size,
@@ -312,7 +322,6 @@ async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response
    let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);

    let timeline = tenant_mgr::get_tenant(tenant_id, true)
-        .await
        .and_then(|tenant| tenant.get_timeline(timeline_id, true))
        .map_err(ApiError::NotFound)?;
    let result = match timeline
@@ -338,13 +347,13 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,

    if let Some(remote_storage) = &state.remote_storage {
        // FIXME: distinguish between "Tenant already exists" and other errors
-        tenant_mgr::attach_tenant(state.conf, tenant_id, remote_storage.clone())
+        tenant_mgr::attach_tenant(state.conf, tenant_id, remote_storage)
            .instrument(info_span!("tenant_attach", tenant = %tenant_id))
            .await
            .map_err(ApiError::InternalServerError)?;
    } else {
        return Err(ApiError::BadRequest(anyhow!(
-            "attach_tenant is not possible because pageserver was configured without remote storage"
+            "attach_tenant is possible because pageserver was configured without remote storage"
        )));
    }

@@ -383,49 +392,23 @@ async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>,
    json_response(StatusCode::OK, ())
 }

-async fn tenant_load_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
-
-    let state = get_state(&request);
-    tenant_mgr::load_tenant(state.conf, tenant_id, state.remote_storage.clone())
-        .instrument(info_span!("load", tenant = %tenant_id))
-        .await
-        .map_err(ApiError::InternalServerError)?;
-
-    json_response(StatusCode::ACCEPTED, ())
-}
-
-async fn tenant_ignore_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
-
-    let state = get_state(&request);
-    let conf = state.conf;
-    tenant_mgr::ignore_tenant(conf, tenant_id)
-        .instrument(info_span!("ignore_tenant", tenant = %tenant_id))
-        .await
-        // FIXME: Errors from `ignore_tenant` can be caused by both both user and internal errors.
-        // Replace this with better handling once the error type permits it.
-        .map_err(ApiError::InternalServerError)?;
-
-    json_response(StatusCode::OK, ())
-}
-
 async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permission(&request, None)?;

-    let response_data = tenant_mgr::list_tenants()
-        .instrument(info_span!("tenant_list"))
-        .await
-        .iter()
-        .map(|(id, state)| TenantInfo {
-            id: *id,
-            state: *state,
-            current_physical_size: None,
-            has_in_progress_downloads: Some(state.has_in_progress_downloads()),
-        })
-        .collect::<Vec<TenantInfo>>();
+    let response_data = tokio::task::spawn_blocking(move || {
+        let _enter = info_span!("tenant_list").entered();
+        tenant_mgr::list_tenants()
+            .iter()
+            .map(|(id, state)| TenantInfo {
+                id: *id,
+                state: *state,
+                current_physical_size: None,
+                has_in_progress_downloads: Some(state.has_in_progress_downloads()),
+            })
+            .collect::<Vec<TenantInfo>>()
+    })
+    .await
+    .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;

    json_response(StatusCode::OK, response_data)
 }
@@ -434,8 +417,9 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let tenant_info = async {
-        let tenant = tenant_mgr::get_tenant(tenant_id, false).await?;
+    let tenant_info = tokio::task::spawn_blocking(move || {
+        let _enter = info_span!("tenant_status_handler", tenant = %tenant_id).entered();
+        let tenant = tenant_mgr::get_tenant(tenant_id, false)?;

        // Calculate total physical size of all timelines
        let mut current_physical_size = 0;
@@ -444,15 +428,17 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
        }

        let state = tenant.current_state();
-        Ok(TenantInfo {
+        let tenant_info = TenantInfo {
            id: tenant_id,
            state,
            current_physical_size: Some(current_physical_size),
            has_in_progress_downloads: Some(state.has_in_progress_downloads()),
-        })
-    }
-    .instrument(info_span!("tenant_status_handler", tenant = %tenant_id))
+        };
+
+        Ok::<_, anyhow::Error>(tenant_info)
+    })
    .await
+    .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?
    .map_err(ApiError::InternalServerError)?;

    json_response(StatusCode::OK, tenant_info)
@@ -462,9 +448,7 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let tenant = tenant_mgr::get_tenant(tenant_id, true)
-        .await
-        .map_err(ApiError::InternalServerError)?;
+    let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::InternalServerError)?;

    // this can be long operation, it currently is not backed by any request coalescing or similar
    let inputs = tenant
@@ -581,19 +565,22 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
        .map(TenantId::from)
        .unwrap_or_else(TenantId::generate);

-    let state = get_state(&request);
+    let new_tenant = tokio::task::spawn_blocking(move || {
+        let _enter = info_span!("tenant_create", tenant = ?target_tenant_id).entered();
+        let state = get_state(&request);

-    let new_tenant = tenant_mgr::create_tenant(
-        state.conf,
-        tenant_conf,
-        target_tenant_id,
-        state.remote_storage.clone(),
-    )
-    .instrument(info_span!("tenant_create", tenant = ?target_tenant_id))
+        tenant_mgr::create_tenant(
+            state.conf,
+            tenant_conf,
+            target_tenant_id,
+            state.remote_storage.clone(),
+        )
+        // FIXME: `create_tenant` can fail from both user and internal errors. Replace this
+        // with better error handling once the type permits it
+        .map_err(ApiError::InternalServerError)
+    })
    .await
-    // FIXME: `create_tenant` can fail from both user and internal errors. Replace this
-    // with better error handling once the type permits it
-    .map_err(ApiError::InternalServerError)?;
+    .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;

    Ok(match new_tenant {
        Some(tenant) => {
@@ -684,13 +671,17 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
        );
    }

-    let state = get_state(&request);
-    tenant_mgr::update_tenant_config(state.conf, tenant_conf, tenant_id)
-        .instrument(info_span!("tenant_config", tenant = ?tenant_id))
-        .await
-        // FIXME: `update_tenant_config` can fail because of both user and internal errors.
-        // Replace this `map_err` with better error handling once the type permits it
-        .map_err(ApiError::InternalServerError)?;
+    tokio::task::spawn_blocking(move || {
+        let _enter = info_span!("tenant_config", tenant = ?tenant_id).entered();
+
+        let state = get_state(&request);
+        tenant_mgr::update_tenant_config(state.conf, tenant_conf, tenant_id)
+            // FIXME: `update_tenant_config` can fail because of both user and internal errors.
+            // Replace this `map_err` with better error handling once the type permits it
+            .map_err(ApiError::InternalServerError)
+    })
+    .await
+    .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;

    json_response(StatusCode::OK, ())
 }
@@ -737,7 +728,7 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body

    let gc_req: TimelineGcRequest = json_request(&mut request).await?;

-    let wait_task_done = tenant_mgr::immediate_gc(tenant_id, timeline_id, gc_req).await?;
+    let wait_task_done = tenant_mgr::immediate_gc(tenant_id, timeline_id, gc_req)?;
    let gc_result = wait_task_done
        .await
        .context("wait for gc task")
@@ -754,9 +745,7 @@ async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Bod
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let tenant = tenant_mgr::get_tenant(tenant_id, true)
-        .await
-        .map_err(ApiError::NotFound)?;
+    let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
    let timeline = tenant
        .get_timeline(timeline_id, true)
        .map_err(ApiError::NotFound)?;
@@ -775,9 +764,7 @@ async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let tenant = tenant_mgr::get_tenant(tenant_id, true)
-        .await
-        .map_err(ApiError::NotFound)?;
+    let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
    let timeline = tenant
        .get_timeline(timeline_id, true)
        .map_err(ApiError::NotFound)?;
@@ -851,8 +838,6 @@ pub fn make_router(
        .post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
        .post("/v1/tenant/:tenant_id/attach", tenant_attach_handler)
        .post("/v1/tenant/:tenant_id/detach", tenant_detach_handler)
-        .post("/v1/tenant/:tenant_id/load", tenant_load_handler)
-        .post("/v1/tenant/:tenant_id/ignore", tenant_ignore_handler)
        .get(
            "/v1/tenant/:tenant_id/timeline/:timeline_id",
            timeline_detail_handler,
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -125,13 +125,6 @@ pub const TEMP_FILE_SUFFIX: &str = "___temp";
 /// Full path: `tenants/<tenant_id>/timelines/<timeline_id>___uninit`.
 pub const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";

-/// A marker file to prevent pageserver from loading a certain tenant on restart.
-/// Different from [`TIMELINE_UNINIT_MARK_SUFFIX`] due to semantics of the corresponding
-/// `ignore` management API command, that expects the ignored tenant to be properly loaded
-/// into pageserver's memory before being ignored.
-/// Full path: `tenants/<tenant_id>/___ignored_tenant`.
-pub const IGNORED_TENANT_FILE_NAME: &str = "___ignored_tenant";
-
 pub fn is_temporary(path: &Path) -> bool {
    match path.file_name() {
        Some(name) => name.to_string_lossy().ends_with(TEMP_FILE_SUFFIX),
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -941,7 +941,7 @@ impl postgres_backend_async::Handler for PageServerHandler {
 /// ensures that queries don't fail immediately after pageserver startup, because
 /// all tenants are still loading.
 async fn get_active_tenant_with_timeout(tenant_id: TenantId) -> Result<Arc<Tenant>> {
-    let tenant = tenant_mgr::get_tenant(tenant_id, false).await?;
+    let tenant = tenant_mgr::get_tenant(tenant_id, false)?;
    match tokio::time::timeout(Duration::from_secs(30), tenant.wait_to_become_active()).await {
        Ok(wait_result) => wait_result
            // no .context(), the error message is good enough and some tests depend on it
--- a/pageserver/src/storage_sync2.rs
+++ b/pageserver/src/storage_sync2.rs
@@ -202,9 +202,9 @@ use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Mutex};

 use anyhow::ensure;
-use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
+use remote_storage::{DownloadError, GenericRemoteStorage};
 use tokio::runtime::Runtime;
-use tracing::{info, warn};
+use tracing::{error, info, warn};
 use tracing::{info_span, Instrument};

 use utils::lsn::Lsn;
@@ -217,7 +217,7 @@ use crate::metrics::RemoteOpKind;
 use crate::metrics::REMOTE_UPLOAD_QUEUE_UNFINISHED_TASKS;
 use crate::{
    config::PageServerConf,
-    storage_sync::index::LayerFileMetadata,
+    storage_sync::index::{LayerFileMetadata, RelativePath},
    task_mgr,
    task_mgr::TaskKind,
    task_mgr::BACKGROUND_RUNTIME,
@@ -287,7 +287,7 @@ struct UploadQueueInitialized {

    /// All layer files stored in the remote storage, taking into account all
    /// in-progress and queued operations
-    latest_files: HashMap<RemotePath, LayerFileMetadata>,
+    latest_files: HashMap<RelativePath, LayerFileMetadata>,

    /// Metadata stored in the remote storage, taking into account all
    /// in-progress and queued operations.
@@ -337,18 +337,18 @@ impl UploadQueue {

        let state = UploadQueueInitialized {
            // As described in the doc comment, it's ok for `latest_files` and `latest_metadata` to be ahead.
-            latest_files: HashMap::new(),
+            latest_files: Default::default(),
            latest_metadata: metadata.clone(),
            // We haven't uploaded anything yet, so, `last_uploaded_consistent_lsn` must be 0 to prevent
            // safekeepers from garbage-collecting anything.
            last_uploaded_consistent_lsn: Lsn(0),
            // what follows are boring default initializations
-            task_counter: 0,
+            task_counter: Default::default(),
            num_inprogress_layer_uploads: 0,
            num_inprogress_metadata_uploads: 0,
            num_inprogress_deletions: 0,
-            inprogress_tasks: HashMap::new(),
-            queued_operations: VecDeque::new(),
+            inprogress_tasks: Default::default(),
+            queued_operations: Default::default(),
        };

        *self = UploadQueue::Initialized(state);
@@ -357,10 +357,6 @@ impl UploadQueue {

    fn initialize_with_current_remote_index_part(
        &mut self,
-        conf: &'static PageServerConf,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-
        index_part: &IndexPart,
    ) -> anyhow::Result<&mut UploadQueueInitialized> {
        match self {
@@ -370,19 +366,14 @@ impl UploadQueue {
            }
        }

-        let mut files = HashMap::with_capacity(index_part.timeline_layers.len());
-        let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
-        for timeline_name in &index_part.timeline_layers {
-            let local_path = timeline_path.join(timeline_name);
-            let remote_timeline_path = conf.remote_path(&local_path).expect(
-                "Remote timeline path and local timeline path were constructed form the same conf",
-            );
+        let mut files = HashMap::new();
+        for path in &index_part.timeline_layers {
            let layer_metadata = index_part
                .layer_metadata
-                .get(timeline_name)
+                .get(path)
                .map(LayerFileMetadata::from)
                .unwrap_or(LayerFileMetadata::MISSING);
-            files.insert(remote_timeline_path, layer_metadata);
+            files.insert(path.clone(), layer_metadata);
        }

        let index_part_metadata = index_part.parse_metadata()?;
@@ -400,8 +391,8 @@ impl UploadQueue {
            num_inprogress_layer_uploads: 0,
            num_inprogress_metadata_uploads: 0,
            num_inprogress_deletions: 0,
-            inprogress_tasks: HashMap::new(),
-            queued_operations: VecDeque::new(),
+            inprogress_tasks: Default::default(),
+            queued_operations: Default::default(),
        };

        *self = UploadQueue::Initialized(state);
@@ -465,12 +456,7 @@ impl RemoteTimelineClient {
    /// The given `index_part` must be the one on the remote.
    pub fn init_upload_queue(&self, index_part: &IndexPart) -> anyhow::Result<()> {
        let mut upload_queue = self.upload_queue.lock().unwrap();
-        upload_queue.initialize_with_current_remote_index_part(
-            self.conf,
-            self.tenant_id,
-            self.timeline_id,
-            index_part,
-        )?;
+        upload_queue.initialize_with_current_remote_index_part(index_part)?;
        Ok(())
    }

@@ -524,13 +510,15 @@ impl RemoteTimelineClient {
    /// On success, returns the size of the downloaded file.
    pub async fn download_layer_file(
        &self,
-        remote_path: &RemotePath,
+        path: &RelativePath,
        layer_metadata: &LayerFileMetadata,
    ) -> anyhow::Result<u64> {
        let downloaded_size = download::download_layer_file(
            self.conf,
            &self.storage_impl,
-            remote_path,
+            self.tenant_id,
+            self.timeline_id,
+            path,
            layer_metadata,
        )
        .measure_remote_op(
@@ -548,13 +536,13 @@ impl RemoteTimelineClient {
            let new_metadata = LayerFileMetadata::new(downloaded_size);
            let mut guard = self.upload_queue.lock().unwrap();
            let upload_queue = guard.initialized_mut()?;
-            if let Some(upgraded) = upload_queue.latest_files.get_mut(remote_path) {
+            if let Some(upgraded) = upload_queue.latest_files.get_mut(path) {
                upgraded.merge(&new_metadata);
            } else {
                // The file should exist, since we just downloaded it.
                warn!(
                    "downloaded file {:?} not found in local copy of the index file",
-                    remote_path
+                    path
                );
            }
        }
@@ -624,9 +612,14 @@ impl RemoteTimelineClient {
            "file size not initialized in metadata"
        );

+        let relative_path = RelativePath::from_local_path(
+            &self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
+            path,
+        )?;
+
        upload_queue
            .latest_files
-            .insert(self.conf.remote_path(path)?, layer_metadata.clone());
+            .insert(relative_path, layer_metadata.clone());

        let op = UploadOp::UploadLayer(PathBuf::from(path), layer_metadata.clone());
        self.update_upload_queue_unfinished_metric(1, &op);
@@ -648,10 +641,13 @@ impl RemoteTimelineClient {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;

-        // Convert the paths into RemotePaths, and gather other information we need.
-        let mut remote_paths = Vec::with_capacity(paths.len());
+        // Convert the paths into RelativePaths, and gather other information we need.
+        let mut relative_paths = Vec::with_capacity(paths.len());
        for path in paths {
-            remote_paths.push(self.conf.remote_path(path)?);
+            relative_paths.push(RelativePath::from_local_path(
+                &self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
+                path,
+            )?);
        }

        // Deleting layers doesn't affect the values stored in TimelineMetadata,
@@ -667,8 +663,8 @@ impl RemoteTimelineClient {
        // from latest_files, but not yet scheduled for deletion. Use a closure
        // to syntactically forbid ? or bail! calls here.
        let no_bail_here = || {
-            for remote_path in remote_paths {
-                upload_queue.latest_files.remove(&remote_path);
+            for relative_path in relative_paths {
+                upload_queue.latest_files.remove(&relative_path);
            }

            let index_part = IndexPart::new(
@@ -842,19 +838,14 @@ impl RemoteTimelineClient {

            let upload_result: anyhow::Result<()> = match &task.op {
                UploadOp::UploadLayer(ref path, ref layer_metadata) => {
-                    upload::upload_timeline_layer(
-                        self.conf,
-                        &self.storage_impl,
-                        path,
-                        layer_metadata,
-                    )
-                    .measure_remote_op(
-                        self.tenant_id,
-                        self.timeline_id,
-                        RemoteOpFileKind::Layer,
-                        RemoteOpKind::Upload,
-                    )
-                    .await
+                    upload::upload_timeline_layer(&self.storage_impl, path, layer_metadata)
+                        .measure_remote_op(
+                            self.tenant_id,
+                            self.timeline_id,
+                            RemoteOpFileKind::Layer,
+                            RemoteOpKind::Upload,
+                        )
+                        .await
                }
                UploadOp::UploadMetadata(ref index_part, _lsn) => {
                    upload::upload_index_part(
@@ -873,7 +864,7 @@ impl RemoteTimelineClient {
                    .await
                }
                UploadOp::Delete(metric_file_kind, ref path) => {
-                    delete::delete_layer(self.conf, &self.storage_impl, path)
+                    delete::delete_layer(&self.storage_impl, path)
                        .measure_remote_op(
                            self.tenant_id,
                            self.timeline_id,
@@ -897,20 +888,10 @@ impl RemoteTimelineClient {
                Err(e) => {
                    let retries = task.retries.fetch_add(1, Ordering::SeqCst);

-                    // uploads may fail due to rate limts (IAM, S3) or spurious network and external errors
-                    // such issues are relatively regular, so don't use WARN or ERROR to avoid alerting
-                    // people and tests until the retries are definitely causing delays.
-                    if retries < 3 {
-                        info!(
-                            "failed to perform remote task {}, will retry (attempt {}): {:?}",
-                            task.op, retries, e
-                        );
-                    } else {
-                        warn!(
-                            "failed to perform remote task {}, will retry (attempt {}): {:?}",
-                            task.op, retries, e
-                        );
-                    }
+                    error!(
+                        "failed to perform remote task {}, will retry (attempt {}): {:?}",
+                        task.op, retries, e
+                    );

                    // sleep until it's time to retry, or we're cancelled
                    tokio::select! {
@@ -1102,11 +1083,15 @@ mod tests {
        TimelineMetadata::from_bytes(&metadata.to_bytes().unwrap()).unwrap()
    }

-    fn assert_file_list(a: &HashSet<String>, b: &[&str]) {
-        let mut avec: Vec<&str> = a.iter().map(|a| a.as_str()).collect();
+    fn assert_file_list(a: &HashSet<RelativePath>, b: &[&str]) {
+        let xx = PathBuf::from("");
+        let mut avec: Vec<String> = a
+            .iter()
+            .map(|x| x.to_local_path(&xx).to_string_lossy().into())
+            .collect();
        avec.sort();

-        let mut bvec = b.to_vec();
+        let mut bvec = b.to_owned();
        bvec.sort_unstable();

        assert_eq!(avec, bvec);
@@ -1174,7 +1159,8 @@ mod tests {

        println!("workdir: {}", harness.conf.workdir.display());

-        let storage_impl = GenericRemoteStorage::from_config(&storage_config)?;
+        let storage_impl =
+            GenericRemoteStorage::from_config(harness.conf.workdir.clone(), &storage_config)?;
        let client = Arc::new(RemoteTimelineClient {
            conf: harness.conf,
            runtime,
--- a/pageserver/src/storage_sync2/delete.rs
+++ b/pageserver/src/storage_sync2/delete.rs
@@ -5,24 +5,34 @@ use tracing::debug;

 use remote_storage::GenericRemoteStorage;

-use crate::config::PageServerConf;
-
-pub(super) async fn delete_layer<'a>(
-    conf: &'static PageServerConf,
-    storage: &'a GenericRemoteStorage,
-    local_layer_path: &'a Path,
+pub(super) async fn delete_layer(
+    storage: &GenericRemoteStorage,
+    local_layer_path: &Path,
 ) -> anyhow::Result<()> {
    fail::fail_point!("before-delete-layer", |_| {
        anyhow::bail!("failpoint before-delete-layer")
    });
-    debug!("Deleting layer from remote storage: {local_layer_path:?}",);
+    debug!(
+        "Deleting layer from remote storage: {:?}",
+        local_layer_path.display()
+    );

-    let path_to_delete = conf.remote_path(local_layer_path)?;
+    let storage_path = storage
+        .remote_object_id(local_layer_path)
+        .with_context(|| {
+            format!(
+                "Failed to get the layer storage path for local path '{}'",
+                local_layer_path.display()
+            )
+        })?;

    // XXX: If the deletion fails because the object already didn't exist,
    // it would be good to just issue a warning but consider it success.
    // https://github.com/neondatabase/neon/issues/2934
-    storage.delete(&path_to_delete).await.with_context(|| {
-        format!("Failed to delete remote layer from storage at {path_to_delete:?}")
+    storage.delete(&storage_path).await.with_context(|| {
+        format!(
+            "Failed to delete remote layer from storage at '{:?}'",
+            storage_path
+        )
    })
 }
--- a/pageserver/src/storage_sync2/download.rs
+++ b/pageserver/src/storage_sync2/download.rs
@@ -10,11 +10,12 @@ use tracing::debug;

 use crate::config::PageServerConf;
 use crate::storage_sync::index::LayerFileMetadata;
-use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
+use remote_storage::{DownloadError, GenericRemoteStorage};
 use utils::crashsafe::path_with_suffix_extension;
 use utils::id::{TenantId, TimelineId};

 use super::index::IndexPart;
+use super::RelativePath;

 async fn fsync_path(path: impl AsRef<std::path::Path>) -> Result<(), std::io::Error> {
    fs::File::open(path).await?.sync_all().await
@@ -28,10 +29,21 @@ async fn fsync_path(path: impl AsRef<std::path::Path>) -> Result<(), std::io::Er
 pub async fn download_layer_file<'a>(
    conf: &'static PageServerConf,
    storage: &'a GenericRemoteStorage,
-    remote_path: &'a RemotePath,
+    tenant_id: TenantId,
+    timeline_id: TimelineId,
+    path: &'a RelativePath,
    layer_metadata: &'a LayerFileMetadata,
 ) -> anyhow::Result<u64> {
-    let local_path = conf.local_path(remote_path);
+    let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
+
+    let local_path = path.to_local_path(&timeline_path);
+
+    let layer_storage_path = storage.remote_object_id(&local_path).with_context(|| {
+        format!(
+            "Failed to get the layer storage path for local path '{}'",
+            local_path.display()
+        )
+    })?;

    // Perform a rename inspired by durable_rename from file_utils.c.
    // The sequence:
@@ -52,13 +64,18 @@ pub async fn download_layer_file<'a>(
            temp_file_path.display()
        )
    })?;
-    let mut download = storage.download(remote_path).await.with_context(|| {
-        format!(
-            "Failed to open a download stream for layer with remote storage path '{remote_path:?}'"
-        )
-    })?;
+    let mut download = storage
+        .download(&layer_storage_path)
+        .await
+        .with_context(|| {
+            format!(
+                "Failed to open a download stream for layer with remote storage path '{layer_storage_path:?}'"
+            )
+        })?;
    let bytes_amount = tokio::io::copy(&mut download.download_stream, &mut destination_file).await.with_context(|| {
-        format!("Failed to download layer with remote storage path '{remote_path:?}' into file {temp_file_path:?}")
+        format!(
+            "Failed to download layer with remote storage path '{layer_storage_path:?}' into file '{}'", temp_file_path.display()
+        )
    })?;

    // Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that:
@@ -134,7 +151,12 @@ pub async fn list_remote_timelines<'a>(
    tenant_id: TenantId,
 ) -> anyhow::Result<Vec<(TimelineId, IndexPart)>> {
    let tenant_path = conf.timelines_path(&tenant_id);
-    let tenant_storage_path = conf.remote_path(&tenant_path)?;
+    let tenant_storage_path = storage.remote_object_id(&tenant_path).with_context(|| {
+        format!(
+            "Failed to get tenant storage path for local path '{}'",
+            tenant_path.display()
+        )
+    })?;

    let timelines = storage
        .list_prefixes(Some(&tenant_storage_path))
@@ -196,8 +218,14 @@ pub async fn download_index_part(
    let index_part_path = conf
        .metadata_path(timeline_id, tenant_id)
        .with_file_name(IndexPart::FILE_NAME);
-    let part_storage_path = conf
-        .remote_path(&index_part_path)
+    let part_storage_path = storage
+        .remote_object_id(&index_part_path)
+        .with_context(|| {
+            format!(
+                "Failed to get the index part storage path for local path '{}'",
+                index_part_path.display()
+            )
+        })
        .map_err(DownloadError::BadInput)?;

    let mut index_part_download = storage.download(&part_storage_path).await?;
@@ -208,12 +236,20 @@ pub async fn download_index_part(
        &mut index_part_bytes,
    )
    .await
-    .with_context(|| format!("Failed to download an index part into file {index_part_path:?}"))
+    .with_context(|| {
+        format!(
+            "Failed to download an index part into file '{}'",
+            index_part_path.display()
+        )
+    })
    .map_err(DownloadError::Other)?;

    let index_part: IndexPart = serde_json::from_slice(&index_part_bytes)
        .with_context(|| {
-            format!("Failed to deserialize index part file into file {index_part_path:?}")
+            format!(
+                "Failed to deserialize index part file into file '{}'",
+                index_part_path.display()
+            )
        })
        .map_err(DownloadError::Other)?;

--- a/pageserver/src/storage_sync2/index.rs
+++ b/pageserver/src/storage_sync2/index.rs
@@ -2,9 +2,12 @@
 //! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
 //! remote timeline layers and its metadata.

-use std::collections::{HashMap, HashSet};
+use std::{
+    collections::{HashMap, HashSet},
+    path::{Path, PathBuf},
+};

-use remote_storage::RemotePath;
+use anyhow::{Context, Ok};
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};

@@ -12,6 +15,33 @@ use crate::tenant::metadata::TimelineMetadata;

 use utils::lsn::Lsn;

+/// A part of the filesystem path, that needs a root to become a path again.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
+#[serde(transparent)]
+pub struct RelativePath(String);
+
+impl RelativePath {
+    /// Attempts to strip off the base from path, producing a relative path or an error.
+    pub fn from_local_path(timeline_path: &Path, path: &Path) -> anyhow::Result<RelativePath> {
+        let relative = path.strip_prefix(timeline_path).with_context(|| {
+            format!(
+                "path '{}' is not relative to base '{}'",
+                path.display(),
+                timeline_path.display()
+            )
+        })?;
+        Ok(Self::from_filename(relative))
+    }
+
+    pub fn from_filename(path: &Path) -> RelativePath {
+        RelativePath(path.to_string_lossy().to_string())
+    }
+
+    pub fn to_local_path(&self, timeline_path: &Path) -> PathBuf {
+        timeline_path.join(&self.0)
+    }
+}
+
 /// Metadata gathered for each of the layer files.
 ///
 /// Fields have to be `Option`s because remote [`IndexPart`]'s can be from different version, which
@@ -67,22 +97,21 @@ pub struct IndexPart {
    #[serde(default)]
    version: usize,

-    /// Layer names, which are stored on the remote storage.
+    /// Each of the layers present on remote storage.
    ///
    /// Additional metadata can might exist in `layer_metadata`.
-    pub timeline_layers: HashSet<String>,
+    pub timeline_layers: HashSet<RelativePath>,

    /// FIXME: unused field. This should be removed, but that changes the on-disk format,
-    /// so we need to make sure we're backwards-` (and maybe forwards-) compatible
-    /// First pass is to move it to Optional and the next would be its removal
-    missing_layers: Option<HashSet<String>>,
+    /// so we need to make sure we're backwards- (and maybe forwards-) compatible
+    missing_layers: HashSet<RelativePath>,

-    /// Per layer file name metadata, which can be present for a present or missing layer file.
+    /// Per layer file metadata, which can be present for a present or missing layer file.
    ///
    /// Older versions of `IndexPart` will not have this property or have only a part of metadata
    /// that latest version stores.
    #[serde(default)]
-    pub layer_metadata: HashMap<String, IndexLayerMetadata>,
+    pub layer_metadata: HashMap<RelativePath, IndexLayerMetadata>,

    // 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata.
    // It's duplicated here for convenience.
@@ -100,29 +129,23 @@ impl IndexPart {
    pub const FILE_NAME: &'static str = "index_part.json";

    pub fn new(
-        layers_and_metadata: HashMap<RemotePath, LayerFileMetadata>,
+        layers_and_metadata: HashMap<RelativePath, LayerFileMetadata>,
        disk_consistent_lsn: Lsn,
        metadata_bytes: Vec<u8>,
    ) -> Self {
-        let mut timeline_layers = HashSet::with_capacity(layers_and_metadata.len());
-        let mut layer_metadata = HashMap::with_capacity(layers_and_metadata.len());
+        let mut timeline_layers = HashSet::new();
+        let mut layer_metadata = HashMap::new();

-        for (remote_path, metadata) in &layers_and_metadata {
-            let metadata = IndexLayerMetadata::from(metadata);
-            match remote_path.object_name() {
-                Some(layer_name) => {
-                    timeline_layers.insert(layer_name.to_owned());
-                    layer_metadata.insert(layer_name.to_owned(), metadata);
-                }
-                // TODO move this on a type level: we know, that every layer entry does have a name
-                None => panic!("Layer {remote_path:?} has no file name, skipping"),
-            }
-        }
+        separate_paths_and_metadata(
+            &layers_and_metadata,
+            &mut timeline_layers,
+            &mut layer_metadata,
+        );

        Self {
            version: Self::LATEST_VERSION,
            timeline_layers,
-            missing_layers: Some(HashSet::new()),
+            missing_layers: HashSet::new(),
            layer_metadata,
            disk_consistent_lsn,
            metadata_bytes,
@@ -148,6 +171,18 @@ impl From<&'_ LayerFileMetadata> for IndexLayerMetadata {
    }
 }

+fn separate_paths_and_metadata(
+    input: &HashMap<RelativePath, LayerFileMetadata>,
+    output: &mut HashSet<RelativePath>,
+    layer_metadata: &mut HashMap<RelativePath, IndexLayerMetadata>,
+) {
+    for (path, metadata) in input {
+        let metadata = IndexLayerMetadata::from(metadata);
+        layer_metadata.insert(path.clone(), metadata);
+        output.insert(path.clone());
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -163,8 +198,8 @@ mod tests {

        let expected = IndexPart {
            version: 0,
-            timeline_layers: HashSet::from([String::from("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9")]),
-            missing_layers: Some(HashSet::from([String::from("not_a_real_layer_but_adding_coverage")])),
+            timeline_layers: [RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned())].into_iter().collect(),
+            missing_layers: [RelativePath("not_a_real_layer_but_adding_coverage".to_owned())].into_iter().collect(),
            layer_metadata: HashMap::default(),
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
            metadata_bytes: [113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
@@ -191,13 +226,13 @@ mod tests {
        let expected = IndexPart {
            // note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
            version: 1,
-            timeline_layers: HashSet::from([String::from("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9")]),
-            missing_layers: Some(HashSet::from([String::from("not_a_real_layer_but_adding_coverage")])),
+            timeline_layers: [RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned())].into_iter().collect(),
+            missing_layers: [RelativePath("not_a_real_layer_but_adding_coverage".to_owned())].into_iter().collect(),
            layer_metadata: HashMap::from([
-                (String::from("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"), IndexLayerMetadata {
+                (RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned()), IndexLayerMetadata {
                    file_size: Some(25600000),
                }),
-                (String::from("not_a_real_layer_but_adding_coverage"), IndexLayerMetadata {
+                (RelativePath("not_a_real_layer_but_adding_coverage".to_owned()), IndexLayerMetadata {
                    // serde_json should always parse this but this might be a double with jq for
                    // example.
                    file_size: Some(9007199254741001),
@@ -210,46 +245,4 @@ mod tests {
        let part = serde_json::from_str::<IndexPart>(example).unwrap();
        assert_eq!(part, expected);
    }
-
-    #[test]
-    fn v1_indexpart_is_parsed_with_optional_missing_layers() {
-        let example = r#"{
-            "version":1,
-            "timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
-            "layer_metadata":{
-                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
-                "not_a_real_layer_but_adding_coverage": { "file_size": 9007199254741001 }
-            },
-            "disk_consistent_lsn":"0/16960E8",
-            "metadata_bytes":[112,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
-        }"#;
-
-        let expected = IndexPart {
-            // note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
-            version: 1,
-            timeline_layers: HashSet::from(["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_string()]),
-            layer_metadata: HashMap::from([
-                (
-                    "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_string(),
-                    IndexLayerMetadata {
-                        file_size: Some(25600000),
-                    }
-                ),
-                (
-                    "not_a_real_layer_but_adding_coverage".to_string(),
-                    IndexLayerMetadata {
-                        // serde_json should always parse this but this might be a double with jq for
-                        // example.
-                        file_size: Some(9007199254741001),
-                    }
-                )
-            ]),
-            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
-            metadata_bytes: [112,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
-            missing_layers: None,
-        };
-
-        let part = serde_json::from_str::<IndexPart>(example).unwrap();
-        assert_eq!(part, expected);
-    }
 }
--- a/pageserver/src/storage_sync2/upload.rs
+++ b/pageserver/src/storage_sync2/upload.rs
@@ -30,9 +30,12 @@ pub(super) async fn upload_index_part<'a>(
    let index_part_path = conf
        .metadata_path(timeline_id, tenant_id)
        .with_file_name(IndexPart::FILE_NAME);
-    let storage_path = conf.remote_path(&index_part_path)?;
    storage
-        .upload_storage_object(Box::new(index_part_bytes), index_part_size, &storage_path)
+        .upload_storage_object(
+            Box::new(index_part_bytes),
+            index_part_size,
+            &index_part_path,
+        )
        .await
        .with_context(|| format!("Failed to upload index part for '{tenant_id} / {timeline_id}'"))
 }
@@ -41,26 +44,36 @@ pub(super) async fn upload_index_part<'a>(
 /// No extra checks for overlapping files is made and any files that are already present remotely will be overwritten, if submitted during the upload.
 ///
 /// On an error, bumps the retries count and reschedules the entire task.
-pub(super) async fn upload_timeline_layer<'a>(
-    conf: &'static PageServerConf,
-    storage: &'a GenericRemoteStorage,
-    source_path: &'a Path,
-    known_metadata: &'a LayerFileMetadata,
+pub(super) async fn upload_timeline_layer(
+    storage: &GenericRemoteStorage,
+    source_path: &Path,
+    known_metadata: &LayerFileMetadata,
 ) -> anyhow::Result<()> {
    fail_point!("before-upload-layer", |_| {
        bail!("failpoint before-upload-layer")
    });
-    let storage_path = conf.remote_path(source_path)?;
+    let storage_path = storage.remote_object_id(source_path).with_context(|| {
+        format!(
+            "Failed to get the layer storage path for local path '{}'",
+            source_path.display()
+        )
+    })?;

-    let source_file = fs::File::open(&source_path)
-        .await
-        .with_context(|| format!("Failed to open a source file for layer {source_path:?}"))?;
+    let source_file = fs::File::open(&source_path).await.with_context(|| {
+        format!(
+            "Failed to open a source file for layer '{}'",
+            source_path.display()
+        )
+    })?;

    let fs_size = source_file
        .metadata()
        .await
        .with_context(|| {
-            format!("Failed to get the source file metadata for layer {source_path:?}")
+            format!(
+                "Failed to get the source file metadata for layer '{}'",
+                source_path.display()
+            )
        })?
        .len();

--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -139,7 +139,7 @@ pub struct PageserverTaskId(u64);

 /// Each task that we track is associated with a "task ID". It's just an
 /// increasing number that we assign. Note that it is different from tokio::task::Id.
-static NEXT_TASK_ID: AtomicU64 = AtomicU64::new(1);
+static NEXT_TASK_ID: Lazy<AtomicU64> = Lazy::new(|| AtomicU64::new(1));

 /// Global registry of tasks
 static TASKS: Lazy<Mutex<HashMap<u64, Arc<PageServerTask>>>> =
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -571,7 +571,7 @@ impl Tenant {
    pub fn spawn_attach(
        conf: &'static PageServerConf,
        tenant_id: TenantId,
-        remote_storage: GenericRemoteStorage,
+        remote_storage: &GenericRemoteStorage,
    ) -> Arc<Tenant> {
        // XXX: Attach should provide the config, especially during tenant migration.
        //      See https://github.com/neondatabase/neon/issues/1555
@@ -584,7 +584,7 @@ impl Tenant {
            tenant_conf,
            wal_redo_manager,
            tenant_id,
-            Some(remote_storage),
+            Some(remote_storage.clone()),
        ));

        // Do all the hard work in the background
@@ -782,7 +782,7 @@ impl Tenant {
        let tenant_conf = match Self::load_tenant_config(conf, tenant_id) {
            Ok(conf) => conf,
            Err(e) => {
-                error!("load tenant config failed: {:?}", e);
+                error!("load tenant config failed: {}", e);
                return Tenant::create_broken_tenant(conf, tenant_id);
            }
        };
@@ -2669,7 +2669,7 @@ pub mod harness {
            &self,
            key: Key,
            lsn: Lsn,
-            base_img: Option<(Lsn, Bytes)>,
+            base_img: Option<Bytes>,
            records: Vec<(Lsn, NeonWalRecord)>,
            _pg_version: u32,
        ) -> Result<Bytes, WalRedoError> {
--- a/pageserver/src/tenant/block_io.rs
+++ b/pageserver/src/tenant/block_io.rs
@@ -5,6 +5,7 @@
 use crate::page_cache;
 use crate::page_cache::{ReadBufResult, PAGE_SZ};
 use bytes::Bytes;
+use once_cell::sync::Lazy;
 use std::ops::{Deref, DerefMut};
 use std::os::unix::fs::FileExt;
 use std::sync::atomic::AtomicU64;
@@ -116,7 +117,7 @@ where
    }
 }

-static NEXT_ID: AtomicU64 = AtomicU64::new(1);
+static NEXT_ID: Lazy<AtomicU64> = Lazy::new(|| AtomicU64::new(1));

 /// An adapter for reading a (virtual) file using the page cache.
 ///
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -19,7 +19,7 @@ use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering};
 use std::sync::{Arc, Mutex, MutexGuard, RwLock};
 use std::time::{Duration, Instant, SystemTime};

-use crate::storage_sync::index::IndexPart;
+use crate::storage_sync::index::{IndexPart, RelativePath};
 use crate::storage_sync::RemoteTimelineClient;
 use crate::tenant::{
    delta_layer::{DeltaLayer, DeltaLayerWriter},
@@ -999,9 +999,55 @@ impl Timeline {
        &self,
        index_part: &IndexPart,
        remote_client: &RemoteTimelineClient,
-        local_layers: HashSet<PathBuf>,
+        mut local_filenames: HashSet<PathBuf>,
        up_to_date_disk_consistent_lsn: Lsn,
    ) -> anyhow::Result<HashSet<PathBuf>> {
+        let mut remote_filenames: HashSet<PathBuf> = HashSet::new();
+        for fname in index_part.timeline_layers.iter() {
+            remote_filenames.insert(fname.to_local_path(&PathBuf::from("")));
+        }
+
+        // Are there any local files that exist, with a size that doesn't match
+        // with the size stored in the remote index file?
+        // If so, rename_to_backup those files so that we re-download them later.
+        local_filenames.retain(|path| {
+            let layer_metadata = index_part
+                .layer_metadata
+                .get(&RelativePath::from_filename(path))
+                .map(LayerFileMetadata::from)
+                .unwrap_or(LayerFileMetadata::MISSING);
+
+            if let Some(remote_size) = layer_metadata.file_size() {
+                let local_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id).join(&path);
+                match local_path.metadata() {
+                    Ok(metadata) => {
+                        let local_size = metadata.len();
+
+                        if local_size != remote_size {
+                            warn!("removing local file \"{}\" because it has unexpected length {}; length in remote index is {}",
+                                  path.display(),
+                                  local_size,
+                                  remote_size);
+                            if let Err(err) = rename_to_backup(&local_path) {
+                                error!("could not rename file \"{}\": {:?}",
+                                       local_path.display(), err);
+                            }
+                            self.metrics.current_physical_size_gauge.sub(local_size);
+                            false
+                        } else {
+                            true
+                        }
+                    }
+                    Err(err) => {
+                        error!("could not get size of local file \"{}\": {:?}", path.display(), err);
+                        true
+                    }
+                }
+            } else {
+                true
+            }
+        });
+
        // Are we missing some files that are present in remote storage?
        // Download them now.
        // TODO Downloading many files this way is not efficient.
@@ -1010,63 +1056,17 @@ impl Timeline {
        //    b) typical case now is that there is nothing to sync, this downloads a lot
        //       1) if there was another pageserver that came and generated new files
        //       2) during attach of a timeline with big history which we currently do not do
-        let mut local_only_layers = local_layers;
-        let timeline_dir = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
-        for remote_layer_name in &index_part.timeline_layers {
-            let local_layer_path = timeline_dir.join(remote_layer_name);
-            local_only_layers.remove(&local_layer_path);
+        for path in remote_filenames.difference(&local_filenames) {
+            let fname = path.to_str().unwrap();
+            info!("remote layer file {fname} does not exist locally");

-            let remote_layer_metadata = index_part
+            let layer_metadata = index_part
                .layer_metadata
-                .get(remote_layer_name)
+                .get(&RelativePath::from_filename(path))
                .map(LayerFileMetadata::from)
                .unwrap_or(LayerFileMetadata::MISSING);

-            let remote_layer_path = self
-                .conf
-                .remote_path(&local_layer_path)
-                .expect("local_layer_path received from the same conf that provided a workdir");
-
-            if local_layer_path.exists() {
-                let mut already_downloaded = true;
-                // Are there any local files that exist, with a size that doesn't match
-                // with the size stored in the remote index file?
-                // If so, rename_to_backup those files so that we re-download them later.
-                if let Some(remote_size) = remote_layer_metadata.file_size() {
-                    match local_layer_path.metadata() {
-                        Ok(metadata) => {
-                            let local_size = metadata.len();
-
-                            if local_size != remote_size {
-                                warn!("removing local file {local_layer_path:?} because it has unexpected length {local_size}; length in remote index is {remote_size}");
-                                if let Err(err) = rename_to_backup(&local_layer_path) {
-                                    error!("could not rename file {local_layer_path:?}: {err:?}");
-                                } else {
-                                    self.metrics.current_physical_size_gauge.sub(local_size);
-                                    already_downloaded = false;
-                                }
-                            }
-                        }
-                        Err(err) => {
-                            error!("could not get size of local file {local_layer_path:?}: {err:?}")
-                        }
-                    }
-                }
-
-                if already_downloaded {
-                    continue;
-                }
-            } else {
-                info!("remote layer {remote_layer_path:?} does not exist locally");
-            }
-
-            let layer_name = local_layer_path
-                .file_name()
-                .and_then(|os_str| os_str.to_str())
-                .with_context(|| {
-                    format!("Layer file {local_layer_path:?} has no name in unicode")
-                })?;
-            if let Some(imgfilename) = ImageFileName::parse_str(layer_name) {
+            if let Some(imgfilename) = ImageFileName::parse_str(fname) {
                if imgfilename.lsn > up_to_date_disk_consistent_lsn {
                    warn!(
                        "found future image layer {} on timeline {} remote_consistent_lsn is {}",
@@ -1075,13 +1075,11 @@ impl Timeline {
                    continue;
                }

-                trace!("downloading image file: {remote_layer_path:?}");
-                let downloaded_size = remote_client
-                    .download_layer_file(&remote_layer_path, &remote_layer_metadata)
+                trace!("downloading image file: {}", file = path.display());
+                let sz = remote_client
+                    .download_layer_file(&RelativePath::from_filename(path), &layer_metadata)
                    .await
-                    .with_context(|| {
-                        format!("failed to download image layer from path {remote_layer_path:?}")
-                    })?;
+                    .context("download image layer")?;
                trace!("done");

                let image_layer =
@@ -1091,10 +1089,8 @@ impl Timeline {
                    .write()
                    .unwrap()
                    .insert_historic(Arc::new(image_layer));
-                self.metrics
-                    .current_physical_size_gauge
-                    .add(downloaded_size);
-            } else if let Some(deltafilename) = DeltaFileName::parse_str(layer_name) {
+                self.metrics.current_physical_size_gauge.add(sz);
+            } else if let Some(deltafilename) = DeltaFileName::parse_str(fname) {
                // Create a DeltaLayer struct for each delta file.
                // The end-LSN is exclusive, while disk_consistent_lsn is
                // inclusive. For example, if disk_consistent_lsn is 100, it is
@@ -1109,13 +1105,11 @@ impl Timeline {
                    continue;
                }

-                trace!("downloading delta file: {remote_layer_path:?}");
+                trace!("downloading image file: {}", file = path.display());
                let sz = remote_client
-                    .download_layer_file(&remote_layer_path, &remote_layer_metadata)
+                    .download_layer_file(&RelativePath::from_filename(path), &layer_metadata)
                    .await
-                    .with_context(|| {
-                        format!("failed to download delta layer from path {remote_layer_path:?}")
-                    })?;
+                    .context("download delta layer")?;
                trace!("done");

                let delta_layer =
@@ -1126,18 +1120,17 @@ impl Timeline {
                    .unwrap()
                    .insert_historic(Arc::new(delta_layer));
                self.metrics.current_physical_size_gauge.add(sz);
-            } else if layer_name.ends_with(".old") {
-                // For details see https://github.com/neondatabase/neon/issues/3024
-                warn!(
-                    "got backup file on the remote storage, ignoring it {file}",
-                    file = layer_name
-                )
            } else {
-                bail!("unexpected layer filename {layer_name} in remote storage path: {remote_layer_path:?}");
+                bail!("unexpected layer filename in remote storage: {}", fname);
            }
        }

-        Ok(local_only_layers)
+        // now these are local only filenames
+        let local_only_filenames = local_filenames
+            .difference(&remote_filenames)
+            .cloned()
+            .collect();
+        Ok(local_only_filenames)
    }

    ///
@@ -1171,46 +1164,47 @@ impl Timeline {
        let disk_consistent_lsn = up_to_date_metadata.disk_consistent_lsn();

        // Build a map of local layers for quick lookups
-        let local_layers = self
-            .layers
-            .read()
-            .unwrap()
-            .iter_historic_layers()
-            .map(|historic_layer| {
-                historic_layer
-                    .local_path()
-                    .expect("Historic layers should have a path")
-            })
-            .collect::<HashSet<_>>();
+        let mut local_filenames: HashSet<PathBuf> = HashSet::new();
+        for layer in self.layers.read().unwrap().iter_historic_layers() {
+            local_filenames.insert(layer.filename());
+        }

-        let local_only_layers = match index_part {
+        let local_only_filenames = match index_part {
            Some(index_part) => {
                info!(
                    "initializing upload queue from remote index with {} layer files",
                    index_part.timeline_layers.len()
                );
                remote_client.init_upload_queue(index_part)?;
-                self.download_missing(index_part, remote_client, local_layers, disk_consistent_lsn)
-                    .await?
+                let local_only_filenames = self
+                    .download_missing(
+                        index_part,
+                        remote_client,
+                        local_filenames,
+                        disk_consistent_lsn,
+                    )
+                    .await?;
+                local_only_filenames
            }
            None => {
                info!("initializing upload queue as empty");
                remote_client.init_upload_queue_for_empty_remote(up_to_date_metadata)?;
-                local_layers
+                local_filenames
            }
        };

        // Are there local files that don't exist remotely? Schedule uploads for them
-        for layer_path in &local_only_layers {
-            let layer_size = layer_path
+        let timeline_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
+        for fname in &local_only_filenames {
+            let absolute = timeline_path.join(fname);
+            let sz = absolute
                .metadata()
-                .with_context(|| format!("failed to get file {layer_path:?} metadata"))?
+                .with_context(|| format!("failed to get file {} metadata", fname.display()))?
                .len();
-            info!("scheduling {layer_path:?} for upload");
-            remote_client
-                .schedule_layer_file_upload(layer_path, &LayerFileMetadata::new(layer_size))?;
+            info!("scheduling {} for upload", fname.display());
+            remote_client.schedule_layer_file_upload(&absolute, &LayerFileMetadata::new(sz))?;
        }
-        if !local_only_layers.is_empty() {
+        if !local_only_filenames.is_empty() {
            remote_client.schedule_index_upload(up_to_date_metadata)?;
        }

@@ -2648,22 +2642,24 @@ impl Timeline {
                    data.records.len()
                );
            } else {
-                if data.img.is_some() {
+                let base_img = if let Some((_lsn, img)) = data.img {
                    trace!(
                        "found {} WAL records and a base image for {} at {}, performing WAL redo",
                        data.records.len(),
                        key,
                        request_lsn
                    );
+                    Some(img)
                } else {
                    trace!("found {} WAL records that will init the page for {} at {}, performing WAL redo", data.records.len(), key, request_lsn);
+                    None
                };

                let last_rec_lsn = data.records.last().unwrap().0;

                let img = self
                    .walredo_mgr
-                    .request_redo(key, request_lsn, data.img, data.records, self.pg_version)
+                    .request_redo(key, request_lsn, base_img, data.records, self.pg_version)
                    .context("Failed to reconstruct a page image:")?;

                if img.len() == page_cache::PAGE_SZ {
--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -185,9 +185,6 @@ impl TenantConfOpt {
        if let Some(max_lsn_wal_lag) = other.max_lsn_wal_lag {
            self.max_lsn_wal_lag = Some(max_lsn_wal_lag);
        }
-        if let Some(trace_read_requests) = other.trace_read_requests {
-            self.trace_read_requests = Some(trace_read_requests);
-        }
    }
 }

--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -1,59 +1,75 @@
 //! This module acts as a switchboard to access different repositories managed by this
 //! page server.

-use std::collections::{hash_map, HashMap};
+use std::collections::hash_map;
 use std::ffi::OsStr;
+use std::fs;
 use std::path::Path;
 use std::sync::Arc;
-use tokio::fs;

 use anyhow::Context;
-use once_cell::sync::Lazy;
-use tokio::sync::RwLock;
 use tracing::*;

 use remote_storage::GenericRemoteStorage;
-use utils::crashsafe;

 use crate::config::PageServerConf;
 use crate::task_mgr::{self, TaskKind};
 use crate::tenant::{Tenant, TenantState};
 use crate::tenant_config::TenantConfOpt;
-use crate::IGNORED_TENANT_FILE_NAME;

 use utils::fs_ext::PathExt;
 use utils::id::{TenantId, TimelineId};

-static TENANTS: Lazy<RwLock<HashMap<TenantId, Arc<Tenant>>>> =
-    Lazy::new(|| RwLock::new(HashMap::new()));
+mod tenants_state {
+    use once_cell::sync::Lazy;
+    use std::{
+        collections::HashMap,
+        sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard},
+    };
+    use utils::id::TenantId;
+
+    use crate::tenant::Tenant;
+
+    static TENANTS: Lazy<RwLock<HashMap<TenantId, Arc<Tenant>>>> =
+        Lazy::new(|| RwLock::new(HashMap::new()));
+
+    pub(super) fn read_tenants() -> RwLockReadGuard<'static, HashMap<TenantId, Arc<Tenant>>> {
+        TENANTS
+            .read()
+            .expect("Failed to read() tenants lock, it got poisoned")
+    }
+
+    pub(super) fn write_tenants() -> RwLockWriteGuard<'static, HashMap<TenantId, Arc<Tenant>>> {
+        TENANTS
+            .write()
+            .expect("Failed to write() tenants lock, it got poisoned")
+    }
+}

 /// Initialize repositories with locally available timelines.
 /// Timelines that are only partially available locally (remote storage has more data than this pageserver)
 /// are scheduled for download and added to the tenant once download is completed.
-#[instrument(skip(conf, remote_storage))]
-pub async fn init_tenant_mgr(
+pub fn init_tenant_mgr(
    conf: &'static PageServerConf,
    remote_storage: Option<GenericRemoteStorage>,
 ) -> anyhow::Result<()> {
+    let _entered = info_span!("init_tenant_mgr").entered();
+
    // Scan local filesystem for attached tenants
    let mut number_of_tenants = 0;
    let tenants_dir = conf.tenants_path();
-
-    let mut dir_entries = fs::read_dir(&tenants_dir)
-        .await
-        .with_context(|| format!("Failed to list tenants dir {tenants_dir:?}"))?;
-
-    loop {
-        match dir_entries.next_entry().await {
-            Ok(None) => break,
-            Ok(Some(dir_entry)) => {
+    for dir_entry in std::fs::read_dir(&tenants_dir)
+        .with_context(|| format!("Failed to list tenants dir {}", tenants_dir.display()))?
+    {
+        match &dir_entry {
+            Ok(dir_entry) => {
                let tenant_dir_path = dir_entry.path();
                if crate::is_temporary(&tenant_dir_path) {
                    info!(
                        "Found temporary tenant directory, removing: {}",
                        tenant_dir_path.display()
                    );
-                    if let Err(e) = fs::remove_dir_all(&tenant_dir_path).await {
+                    if let Err(e) = std::fs::remove_dir_all(&tenant_dir_path) {
                        error!(
                            "Failed to remove temporary directory '{}': {:?}",
                            tenant_dir_path.display(),
@@ -61,38 +77,27 @@ pub async fn init_tenant_mgr(
                        );
                    }
                } else {
-                    // This case happens if we crash during attach before creating the attach marker file
-                    let is_empty = tenant_dir_path.is_empty_dir().with_context(|| {
-                        format!("Failed to check whether {tenant_dir_path:?} is an empty dir")
-                    })?;
-                    if is_empty {
-                        info!("removing empty tenant directory {tenant_dir_path:?}");
-                        if let Err(e) = fs::remove_dir(&tenant_dir_path).await {
-                            error!(
-                                "Failed to remove empty tenant directory '{}': {e:#}",
-                                tenant_dir_path.display()
-                            )
-                        }
-                        continue;
-                    }
-
-                    let tenant_ignore_mark_file = tenant_dir_path.join(IGNORED_TENANT_FILE_NAME);
-                    if tenant_ignore_mark_file.exists() {
-                        info!("Found an ignore mark file {tenant_ignore_mark_file:?}, skipping the tenant");
-                        continue;
-                    }
-
-                    match schedule_local_tenant_processing(
-                        conf,
-                        &tenant_dir_path,
-                        remote_storage.clone(),
-                    ) {
-                        Ok(tenant) => {
-                            TENANTS.write().await.insert(tenant.tenant_id(), tenant);
+                    match load_local_tenant(conf, &tenant_dir_path, remote_storage.clone()) {
+                        Ok(Some(tenant)) => {
+                            tenants_state::write_tenants().insert(tenant.tenant_id(), tenant);
                            number_of_tenants += 1;
                        }
+                        Ok(None) => {
+                            // This case happens if we crash during attach before creating the attach marker file
+                            if let Err(e) = std::fs::remove_dir(&tenant_dir_path) {
+                                error!(
+                                    "Failed to remove empty tenant directory '{}': {e:#}",
+                                    tenant_dir_path.display()
+                                )
+                            }
+                        }
                        Err(e) => {
-                            error!("Failed to collect tenant files from dir {tenants_dir:?} for entry {dir_entry:?}, reason: {e:#}");
+                            error!(
+                            "Failed to collect tenant files from dir '{}' for entry {:?}, reason: {:#}",
+                            tenants_dir.display(),
+                            dir_entry,
+                            e
+                        );
                        }
                    }
                }
@@ -102,7 +107,10 @@ pub async fn init_tenant_mgr(
                // here, the pageserver startup fails altogether, causing outage for *all*
                // tenants. That seems worse.
                error!(
-                    "Failed to list tenants dir entry in directory {tenants_dir:?}, reason: {e:?}"
+                    "Failed to list tenants dir entry {:?} in directory {}, reason: {:?}",
+                    dir_entry,
+                    tenants_dir.display(),
+                    e,
                );
            }
        }
@@ -112,45 +120,34 @@ pub async fn init_tenant_mgr(
    Ok(())
 }

-pub fn schedule_local_tenant_processing(
+fn load_local_tenant(
    conf: &'static PageServerConf,
    tenant_path: &Path,
    remote_storage: Option<GenericRemoteStorage>,
-) -> anyhow::Result<Arc<Tenant>> {
-    anyhow::ensure!(
-        tenant_path.is_dir(),
-        "Cannot load tenant from path {tenant_path:?}, it either does not exist or not a directory"
-    );
-    anyhow::ensure!(
-        !crate::is_temporary(tenant_path),
-        "Cannot load tenant from temporary path {tenant_path:?}"
-    );
-    anyhow::ensure!(
-        !tenant_path.is_empty_dir().with_context(|| {
-            format!("Failed to check whether {tenant_path:?} is an empty dir")
-        })?,
-        "Cannot load tenant from empty directory {tenant_path:?}"
-    );
+) -> anyhow::Result<Option<Arc<Tenant>>> {
+    if !tenant_path.is_dir() {
+        anyhow::bail!("tenant_path is not a directory: {tenant_path:?}")
+    }
+
+    let is_empty = tenant_path
+        .is_empty_dir()
+        .context("check whether tenant_path is an empty dir")?;
+    if is_empty {
+        info!("skipping empty tenant directory {tenant_path:?}");
+        return Ok(None);
+    }

    let tenant_id = tenant_path
        .file_name()
        .and_then(OsStr::to_str)
        .unwrap_or_default()
        .parse::<TenantId>()
-        .with_context(|| {
-            format!("Could not parse tenant id out of the tenant dir name in path {tenant_path:?}")
-        })?;
-
-    let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(tenant_id);
-    anyhow::ensure!(
-        !conf.tenant_ignore_mark_file_path(tenant_id).exists(),
-        "Cannot load tenant, ignore mark found at {tenant_ignore_mark:?}"
-    );
+        .context("Could not parse tenant id out of the tenant dir name")?;

    let tenant = if conf.tenant_attaching_mark_file_path(&tenant_id).exists() {
        info!("tenant {tenant_id} has attaching mark file, resuming its attach operation");
        if let Some(remote_storage) = remote_storage {
-            Tenant::spawn_attach(conf, tenant_id, remote_storage)
+            Tenant::spawn_attach(conf, tenant_id, &remote_storage)
        } else {
            warn!("tenant {tenant_id} has attaching mark file, but pageserver has no remote storage configured");
            Tenant::create_broken_tenant(conf, tenant_id)
@@ -160,7 +157,7 @@ pub fn schedule_local_tenant_processing(
        // Start loading the tenant into memory. It will initially be in Loading state.
        Tenant::spawn_load(conf, tenant_id, remote_storage)
    };
-    Ok(tenant)
+    Ok(Some(tenant))
 }

 ///
@@ -168,7 +165,7 @@ pub fn schedule_local_tenant_processing(
 ///
 pub async fn shutdown_all_tenants() {
    let tenants_to_shut_down = {
-        let mut m = TENANTS.write().await;
+        let mut m = tenants_state::write_tenants();
        let mut tenants_to_shut_down = Vec::with_capacity(m.len());
        for (_, tenant) in m.drain() {
            if tenant.is_active() {
@@ -202,13 +199,13 @@ pub async fn shutdown_all_tenants() {
    }
 }

-pub async fn create_tenant(
+pub fn create_tenant(
    conf: &'static PageServerConf,
    tenant_conf: TenantConfOpt,
    tenant_id: TenantId,
    remote_storage: Option<GenericRemoteStorage>,
 ) -> anyhow::Result<Option<Arc<Tenant>>> {
-    match TENANTS.write().await.entry(tenant_id) {
+    match tenants_state::write_tenants().entry(tenant_id) {
        hash_map::Entry::Occupied(_) => {
            debug!("tenant {tenant_id} already exists");
            Ok(None)
@@ -218,36 +215,44 @@ pub async fn create_tenant(
            // If this section ever becomes contentious, introduce a new `TenantState::Creating`.
            let tenant_directory =
                super::tenant::create_tenant_files(conf, tenant_conf, tenant_id)?;
-            let created_tenant =
-                schedule_local_tenant_processing(conf, &tenant_directory, remote_storage)?;
-            let crated_tenant_id = created_tenant.tenant_id();
-            anyhow::ensure!(
-                tenant_id == crated_tenant_id,
-                "loaded created tenant has unexpected tenant id (expect {tenant_id} != actual {crated_tenant_id})",
-            );
-            v.insert(Arc::clone(&created_tenant));
-            Ok(Some(created_tenant))
+            let created_tenant = load_local_tenant(conf, &tenant_directory, remote_storage)?;
+            match created_tenant {
+                None => {
+                    // We get None in case the directory is empty.
+                    // This shouldn't happen here, because we just created the directory.
+                    // So, skip any cleanup work for now, we don't know how we reached this state.
+                    anyhow::bail!("we just created the tenant directory, it can't be empty");
+                }
+                Some(tenant) => {
+                    anyhow::ensure!(
+                        tenant_id == tenant.tenant_id(),
+                        "loaded created tenant has unexpected tenant id (expect {} != actual {})",
+                        tenant_id,
+                        tenant.tenant_id()
+                    );
+                    v.insert(Arc::clone(&tenant));
+                    Ok(Some(tenant))
+                }
+            }
        }
    }
 }

-pub async fn update_tenant_config(
+pub fn update_tenant_config(
    conf: &'static PageServerConf,
    tenant_conf: TenantConfOpt,
    tenant_id: TenantId,
 ) -> anyhow::Result<()> {
    info!("configuring tenant {tenant_id}");
-    get_tenant(tenant_id, true)
-        .await?
-        .update_tenant_config(tenant_conf);
+    get_tenant(tenant_id, true)?.update_tenant_config(tenant_conf);
    Tenant::persist_tenant_config(&conf.tenant_config_path(tenant_id), tenant_conf, false)?;
    Ok(())
 }

 /// Gets the tenant from the in-memory data, erroring if it's absent or is not fitting to the query.
 /// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
-pub async fn get_tenant(tenant_id: TenantId, active_only: bool) -> anyhow::Result<Arc<Tenant>> {
-    let m = TENANTS.read().await;
+pub fn get_tenant(tenant_id: TenantId, active_only: bool) -> anyhow::Result<Arc<Tenant>> {
+    let m = tenants_state::read_tenants();
    let tenant = m
        .get(&tenant_id)
        .with_context(|| format!("Tenant {tenant_id} not found in the local state"))?;
@@ -283,7 +288,7 @@ pub async fn delete_timeline(tenant_id: TenantId, timeline_id: TimelineId) -> an
    info!("waiting for timeline tasks to shutdown");
    task_mgr::shutdown_tasks(None, Some(tenant_id), Some(timeline_id)).await;
    info!("timeline task shutdown completed");
-    match get_tenant(tenant_id, true).await {
+    match get_tenant(tenant_id, true) {
        Ok(tenant) => {
            tenant.delete_timeline(timeline_id).await?;
        }
@@ -297,67 +302,40 @@ pub async fn detach_tenant(
    conf: &'static PageServerConf,
    tenant_id: TenantId,
 ) -> anyhow::Result<()> {
-    remove_tenant_from_memory(tenant_id, async {
-        let local_tenant_directory = conf.tenant_path(&tenant_id);
-        fs::remove_dir_all(&local_tenant_directory)
-            .await
-            .with_context(|| {
-                format!("Failed to remove local tenant directory {local_tenant_directory:?}")
-            })?;
-        Ok(())
-    })
-    .await
-}
+    let tenant = match {
+        let mut tenants_accessor = tenants_state::write_tenants();
+        tenants_accessor.remove(&tenant_id)
+    } {
+        Some(tenant) => tenant,
+        None => anyhow::bail!("Tenant not found for id {tenant_id}"),
+    };

-pub async fn load_tenant(
-    conf: &'static PageServerConf,
-    tenant_id: TenantId,
-    remote_storage: Option<GenericRemoteStorage>,
-) -> anyhow::Result<()> {
-    run_if_no_tenant_in_memory(tenant_id, |vacant_entry| {
-        let tenant_path = conf.tenant_path(&tenant_id);
-        let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(tenant_id);
-        if tenant_ignore_mark.exists() {
-            std::fs::remove_file(&tenant_ignore_mark)
-                .with_context(|| format!("Failed to remove tenant ignore mark {tenant_ignore_mark:?} during tenant loading"))?;
-        }
+    tenant.set_stopping();
+    // shutdown all tenant and timeline tasks: gc, compaction, page service)
+    task_mgr::shutdown_tasks(None, Some(tenant_id), None).await;

-        let new_tenant = schedule_local_tenant_processing(conf, &tenant_path, remote_storage)
-            .with_context(|| {
-                format!("Failed to schedule tenant processing in path {tenant_path:?}")
-            })?;
+    // If removal fails there will be no way to successfully retry detach,
+    // because the tenant no longer exists in the in-memory map. And it needs to be removed from it
+    // before we remove files, because it contains references to tenant
+    // which references ephemeral files which are deleted on drop. So if we keep these references,
+    // we will attempt to remove files which no longer exist. This can be fixed by having shutdown
+    // mechanism for tenant that will clean temporary data to avoid any references to ephemeral files
+    let local_tenant_directory = conf.tenant_path(&tenant_id);
+    fs::remove_dir_all(&local_tenant_directory).with_context(|| {
+        format!(
+            "Failed to remove local tenant directory '{}'",
+            local_tenant_directory.display()
+        )
+    })?;

-        vacant_entry.insert(new_tenant);
-        Ok(())
-    }).await
-}
-
-pub async fn ignore_tenant(
-    conf: &'static PageServerConf,
-    tenant_id: TenantId,
-) -> anyhow::Result<()> {
-    remove_tenant_from_memory(tenant_id, async {
-        let ignore_mark_file = conf.tenant_ignore_mark_file_path(tenant_id);
-        fs::File::create(&ignore_mark_file)
-            .await
-            .context("Failed to create ignore mark file")
-            .and_then(|_| {
-                crashsafe::fsync_file_and_parent(&ignore_mark_file)
-                    .context("Failed to fsync ignore mark file")
-            })
-            .with_context(|| format!("Failed to crate ignore mark for tenant {tenant_id}"))?;
-        Ok(())
-    })
-    .await
+    Ok(())
 }

 ///
 /// Get list of tenants, for the mgmt API
 ///
-pub async fn list_tenants() -> Vec<(TenantId, TenantState)> {
-    TENANTS
-        .read()
-        .await
+pub fn list_tenants() -> Vec<(TenantId, TenantState)> {
+    tenants_state::read_tenants()
        .iter()
        .map(|(id, tenant)| (*id, tenant.current_state()))
        .collect()
@@ -370,92 +348,25 @@ pub async fn list_tenants() -> Vec<(TenantId, TenantState)> {
 pub async fn attach_tenant(
    conf: &'static PageServerConf,
    tenant_id: TenantId,
-    remote_storage: GenericRemoteStorage,
+    remote_storage: &GenericRemoteStorage,
 ) -> anyhow::Result<()> {
-    run_if_no_tenant_in_memory(tenant_id, |vacant_entry| {
-        let tenant_path = conf.tenant_path(&tenant_id);
-        anyhow::ensure!(
-            !tenant_path.exists(),
-            "Cannot attach tenant {tenant_id}, local tenant directory already exists"
-        );
-
-        let tenant = Tenant::spawn_attach(conf, tenant_id, remote_storage);
-        vacant_entry.insert(tenant);
-
-        Ok(())
-    })
-    .await
-}
-
-async fn run_if_no_tenant_in_memory<F, V>(tenant_id: TenantId, run: F) -> anyhow::Result<V>
-where
-    F: FnOnce(hash_map::VacantEntry<TenantId, Arc<Tenant>>) -> anyhow::Result<V>,
-{
-    match TENANTS.write().await.entry(tenant_id) {
+    match tenants_state::write_tenants().entry(tenant_id) {
        hash_map::Entry::Occupied(e) => {
-            anyhow::bail!(
-                "tenant {tenant_id} already exists, state: {:?}",
-                e.get().current_state()
-            )
-        }
-        hash_map::Entry::Vacant(v) => run(v),
-    }
-}
-
-/// Stops and removes the tenant from memory, if it's not [`TenantState::Stopping`] already, bails otherwise.
-/// Allows to remove other tenant resources manually, via `tenant_cleanup`.
-/// If the cleanup fails, tenant will stay in memory in [`TenantState::Broken`] state, and another removal
-/// operation would be needed to remove it.
-async fn remove_tenant_from_memory<V, F>(
-    tenant_id: TenantId,
-    tenant_cleanup: F,
-) -> anyhow::Result<V>
-where
-    F: std::future::Future<Output = anyhow::Result<V>>,
-{
-    // It's important to keep the tenant in memory after the final cleanup, to avoid cleanup races.
-    // The exclusive lock here ensures we don't miss the tenant state updates before trying another removal.
-    // tenant-wde cleanup operations may take some time (removing the entire tenant directory), we want to
-    // avoid holding the lock for the entire process.
-    {
-        let tenants_accessor = TENANTS.write().await;
-        match tenants_accessor.get(&tenant_id) {
-            Some(tenant) => match tenant.current_state() {
-                TenantState::Attaching
-                | TenantState::Loading
-                | TenantState::Broken
-                | TenantState::Active => tenant.set_stopping(),
-                TenantState::Stopping => {
-                    anyhow::bail!("Tenant {tenant_id} is stopping already")
+            // Cannot attach a tenant that already exists. The error message depends on
+            // the state it's in.
+            match e.get().current_state() {
+                TenantState::Attaching => {
+                    anyhow::bail!("tenant {tenant_id} attach is already in progress")
+                }
+                current_state => {
+                    anyhow::bail!("tenant already exists, current state: {current_state:?}")
                }
-            },
-            None => anyhow::bail!("Tenant not found for id {tenant_id}"),
-        }
-    }
-
-    // shutdown all tenant and timeline tasks: gc, compaction, page service)
-    // No new tasks will be started for this tenant because it's in `Stopping` state.
-    // Hence, once we're done here, the `tenant_cleanup` callback can mutate tenant on-disk state freely.
-    task_mgr::shutdown_tasks(None, Some(tenant_id), None).await;
-
-    match tenant_cleanup
-        .await
-        .with_context(|| format!("Failed to run cleanup for tenant {tenant_id}"))
-    {
-        Ok(hook_value) => {
-            let mut tenants_accessor = TENANTS.write().await;
-            if tenants_accessor.remove(&tenant_id).is_none() {
-                warn!("Tenant {tenant_id} got removed from memory before operation finished");
            }
-            Ok(hook_value)
        }
-        Err(e) => {
-            let tenants_accessor = TENANTS.read().await;
-            match tenants_accessor.get(&tenant_id) {
-                Some(tenant) => tenant.set_broken(),
-                None => warn!("Tenant {tenant_id} got removed from memory"),
-            }
-            Err(e)
+        hash_map::Entry::Vacant(v) => {
+            let tenant = Tenant::spawn_attach(conf, tenant_id, remote_storage);
+            v.insert(tenant);
+            Ok(())
        }
    }
 }
@@ -467,12 +378,12 @@ use {
 };

 #[cfg(feature = "testing")]
-pub async fn immediate_gc(
+pub fn immediate_gc(
    tenant_id: TenantId,
    timeline_id: TimelineId,
    gc_req: TimelineGcRequest,
 ) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
-    let guard = TENANTS.read().await;
+    let guard = tenants_state::read_tenants();

    let tenant = guard
        .get(&tenant_id)
--- a/pageserver/src/tenant_tasks.rs
+++ b/pageserver/src/tenant_tasks.rs
@@ -155,7 +155,7 @@ async fn wait_for_active_tenant(
    wait: Duration,
 ) -> ControlFlow<(), Arc<Tenant>> {
    let tenant = loop {
-        match tenant_mgr::get_tenant(tenant_id, false).await {
+        match tenant_mgr::get_tenant(tenant_id, false) {
            Ok(tenant) => break tenant,
            Err(e) => {
                error!("Failed to get a tenant {tenant_id}: {e:#}");
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -84,7 +84,7 @@ pub trait WalRedoManager: Send + Sync {
        &self,
        key: Key,
        lsn: Lsn,
-        base_img: Option<(Lsn, Bytes)>,
+        base_img: Option<Bytes>,
        records: Vec<(Lsn, NeonWalRecord)>,
        pg_version: u32,
    ) -> Result<Bytes, WalRedoError>;
@@ -147,7 +147,7 @@ impl WalRedoManager for PostgresRedoManager {
        &self,
        key: Key,
        lsn: Lsn,
-        base_img: Option<(Lsn, Bytes)>,
+        base_img: Option<Bytes>,
        records: Vec<(Lsn, NeonWalRecord)>,
        pg_version: u32,
    ) -> Result<Bytes, WalRedoError> {
@@ -156,8 +156,7 @@ impl WalRedoManager for PostgresRedoManager {
            return Err(WalRedoError::InvalidRequest);
        }

-        let base_img_lsn = base_img.as_ref().map(|p| p.0).unwrap_or(Lsn::INVALID);
-        let mut img = base_img.map(|p| p.1);
+        let mut img: Option<Bytes> = base_img;
        let mut batch_neon = can_apply_in_neon(&records[0].1);
        let mut batch_start = 0;
        for i in 1..records.len() {
@@ -171,7 +170,6 @@ impl WalRedoManager for PostgresRedoManager {
                        key,
                        lsn,
                        img,
-                        base_img_lsn,
                        &records[batch_start..i],
                        self.conf.wal_redo_timeout,
                        pg_version,
@@ -191,7 +189,6 @@ impl WalRedoManager for PostgresRedoManager {
                key,
                lsn,
                img,
-                base_img_lsn,
                &records[batch_start..],
                self.conf.wal_redo_timeout,
                pg_version,
@@ -226,13 +223,11 @@ impl PostgresRedoManager {
    ///
    /// Process one request for WAL redo using wal-redo postgres
    ///
-    #[allow(clippy::too_many_arguments)]
    fn apply_batch_postgres(
        &self,
        key: Key,
        lsn: Lsn,
        base_img: Option<Bytes>,
-        base_img_lsn: Lsn,
        records: &[(Lsn, NeonWalRecord)],
        wal_redo_timeout: Duration,
        pg_version: u32,
@@ -287,12 +282,9 @@ impl PostgresRedoManager {
        // next request will launch a new one.
        if result.is_err() {
            error!(
-                "error applying {} WAL records {}..{} ({} bytes) to base image with LSN {} to reconstruct page image at LSN {}",
+                "error applying {} WAL records ({} bytes) to reconstruct page image at LSN {}",
                records.len(),
-				records.first().map(|p| p.0).unwrap_or(Lsn(0)),
-				records.last().map(|p| p.0).unwrap_or(Lsn(0)),
                nbytes,
-				base_img_lsn,
                lsn
            );
            let process = process_guard.take().unwrap();
@@ -930,7 +922,8 @@ impl NoLeakChild {

        match child.wait() {
            Ok(exit_status) => {
-                info!(exit_status = %exit_status, "wait successful");
+                // log at error level since .kill() is something we only do on errors ATM
+                error!(exit_status = %exit_status, "wait successful");
            }
            Err(e) => {
                error!(error = %e, "wait error; might leak the child process; it will show as zombie (defunct)");
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -464,12 +464,12 @@ pg_init_libpagestore(void)
 							NULL, NULL, NULL);
 	DefineCustomIntVariable("neon.readahead_buffer_size",
 							"number of prefetches to buffer",
-							"This buffer is used to hold and manage prefetched "
-							"data; so it is important that this buffer is at "
-							"least as large as the configured value of all "
-							"tablespaces' effective_io_concurrency and "
-							"maintenance_io_concurrency, and your sessions' "
-							"values for these settings.",
+							"This buffer is used to store prefetched data; so "
+							"it is important that this buffer is at least as "
+							"large as the configured value of all tablespaces' "
+							"effective_io_concurrency and maintenance_io_concurrency, "
+							"your sessions' values of these, and the value for "
+							"seqscan_prefetch_buffers.",
 							&readahead_buffer_size,
 							128, 16, 1024,
 							PGC_USERSET,
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -242,14 +242,6 @@ PrefetchState *MyPState;
 	) \
 )

-#define ReceiveBufferNeedsCompaction() (\
-	(MyPState->n_responses_buffered / 8) < ( \
-		MyPState->ring_receive - \
-			MyPState->ring_last - \
-			MyPState->n_responses_buffered \
-	) \
-)
-
 int			n_prefetch_hits = 0;
 int			n_prefetch_misses = 0;
 int			n_prefetch_missed_caches = 0;
@@ -257,99 +249,17 @@ int			n_prefetch_dupes = 0;

 XLogRecPtr	prefetch_lsn = 0;

-static bool compact_prefetch_buffers(void);
 static void consume_prefetch_responses(void);
 static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn);
 static bool prefetch_read(PrefetchRequest *slot);
 static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn);
 static bool prefetch_wait_for(uint64 ring_index);
-static void prefetch_cleanup_trailing_unused(void);
+static void prefetch_cleanup(void);
 static inline void prefetch_set_unused(uint64 ring_index);

 static XLogRecPtr neon_get_request_lsn(bool *latest, RelFileNode rnode,
 									   ForkNumber forknum, BlockNumber blkno);

-static bool
-compact_prefetch_buffers(void)
-{
-	uint64	empty_ring_index = MyPState->ring_last;
-	uint64	search_ring_index = MyPState->ring_receive;
-	int n_moved = 0;
-	
-	if (MyPState->ring_receive == MyPState->ring_last)
-		return false;
-
-	while (search_ring_index > MyPState->ring_last)
-	{
-		search_ring_index--;
-		if (GetPrfSlot(search_ring_index)->status == PRFS_UNUSED)
-		{
-			empty_ring_index = search_ring_index;
-			break;
-		}
-	}
-
-	/*
-	 * Here we have established:
-	 * slots < search_ring_index may be unused (not scanned)
-	 * slots >= search_ring_index and <= empty_ring_index are unused
-	 * slots > empty_ring_index are in use, or outside our buffer's range.
-	 * 
-	 * Therefore, there is a gap of at least one unused items between
-	 * search_ring_index and empty_ring_index, which grows as we hit
-	 * more unused items while moving backwards through the array.
-	 */
-
-	while (search_ring_index > MyPState->ring_last)
-	{
-		PrefetchRequest *source_slot;
-		PrefetchRequest *target_slot;
-		bool		found;
-
-		search_ring_index--;
-
-		source_slot = GetPrfSlot(search_ring_index);
-
-		if (source_slot->status == PRFS_UNUSED)
-			continue;
-
-		target_slot = GetPrfSlot(empty_ring_index);
-
-		Assert(source_slot->status == PRFS_RECEIVED);
-		Assert(target_slot->status == PRFS_UNUSED);
-
-		target_slot->buftag = source_slot->buftag;
-		target_slot->status = source_slot->status;
-		target_slot->response = source_slot->response;
-		target_slot->effective_request_lsn = source_slot->effective_request_lsn;
-		target_slot->my_ring_index = empty_ring_index;
-
-		prfh_delete(MyPState->prf_hash, source_slot);
-		prfh_insert(MyPState->prf_hash, target_slot, &found);
-
-		Assert(!found);
-
-		/* Adjust the location of our known-empty slot */
-		empty_ring_index--;
-
-		source_slot->status = PRFS_UNUSED;
-		source_slot->buftag = (BufferTag) {0};
-		source_slot->response = NULL;
-		source_slot->my_ring_index = 0;
-		source_slot->effective_request_lsn = 0;
-
-		n_moved++;
-	}
-
-	if (MyPState->ring_last != empty_ring_index)
-	{
-		prefetch_cleanup_trailing_unused();
-		return true;
-	}
-
-	return false;
-}
-
 void
 readahead_buffer_resize(int newsize, void *extra)
 {
@@ -413,7 +323,7 @@ readahead_buffer_resize(int newsize, void *extra)
 		prfh_insert(newPState->prf_hash, newslot, &found);

 		Assert(!found);
-
+		
 		switch (newslot->status)
 		{
 			case PRFS_UNUSED:
@@ -460,7 +370,7 @@ consume_prefetch_responses(void)
 }

 static void
-prefetch_cleanup_trailing_unused(void)
+prefetch_cleanup(void)
 {
 	uint64	ring_index;
 	PrefetchRequest *slot;
@@ -621,10 +531,7 @@ prefetch_set_unused(uint64 ring_index)

 	/* run cleanup if we're holding back ring_last */
 	if (MyPState->ring_last == ring_index)
-		prefetch_cleanup_trailing_unused();
-	/* ... and try to store the buffered responses more compactly if > 12.5% of the buffer is gaps */
-	else if (ReceiveBufferNeedsCompaction())
-		compact_prefetch_buffers();
+		prefetch_cleanup();
 }

 static void
@@ -675,33 +582,6 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
 		request.req.lsn = lsn;
 		prefetch_lsn = Max(prefetch_lsn, lsn);
 		slot->effective_request_lsn = prefetch_lsn;
-
-		/*
-		 * Remember request LSN in the last-written LSN cache to avoid false
-		 * prefetch invalidations.
-		 *
-		 * Imagine what would happen without this, when you perform a large
-		 * sequential scan with UPDATE. The sequential scan issues a prefetch
-		 * request for each page in order, and every page is also dirtied. On
-		 * each page, the oldest page in the last-written LSN cache is evicted,
-		 * which advances the global last-written LSN. The pages being scanned are
-		 * not in the last-written cache, so each prefetch request will use the
-		 * global last-written LSN in the request and memorize that in the
-		 * slot. However, when we receive the response to the prefetch request,
-		 * the global last-written LSN has already moved forwards, and the
-		 * cross-check we make that the last-written LSN matches will fail, and we
-		 * discard the prefetched response unnecessary.
-		 *
-		 * Inserting the LSN we use in the prefetch request to the last-written LSN
-		 * cache avoids that problem. With that, we will use the cached value in
-		 * the cross-check, instead of the more recent global last-written LSN value.
-		 */
-		SetLastWrittenLSNForBlock(
-			request.req.lsn,
-			slot->buftag.rnode,
-			slot->buftag.forkNum,
-			slot->buftag.blockNum
-			);
 	}

 	Assert(slot->response == NULL);
@@ -822,31 +702,20 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls

 		Assert(slot->status != PRFS_UNUSED);

-		/*
-		 * If there is good reason to run compaction on the prefetch buffers,
-		 * try to do that.
-		 */
-		if (ReceiveBufferNeedsCompaction() && compact_prefetch_buffers())
+		/* We have the slot for ring_last, so that must still be in progress */
+		switch (slot->status)
 		{
-			Assert(slot->status == PRFS_UNUSED);
-		}
-		else
-		{
-			/* We have the slot for ring_last, so that must still be in progress */
-			switch (slot->status)
-			{
-				case PRFS_REQUESTED:
-					Assert(MyPState->ring_receive == cleanup_index);
-					prefetch_wait_for(cleanup_index);
-					prefetch_set_unused(cleanup_index);
-					break;
-				case PRFS_RECEIVED:
-				case PRFS_TAG_REMAINS:
-					prefetch_set_unused(cleanup_index);
-					break;
-				default:
-					pg_unreachable();
-			}
+			case PRFS_REQUESTED:
+				Assert(MyPState->ring_receive == cleanup_index);
+				prefetch_wait_for(cleanup_index);
+				prefetch_set_unused(cleanup_index);
+				break;
+			case PRFS_RECEIVED:
+			case PRFS_TAG_REMAINS:
+				prefetch_set_unused(cleanup_index);
+				break;
+			default:
+				pg_unreachable();
 		}
 	}

@@ -1233,7 +1102,7 @@ PageIsEmptyHeapPage(char *buffer)
 }

 static void
-neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool force)
+neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
 {
 	XLogRecPtr	lsn = PageGetLSN(buffer);

@@ -1247,7 +1116,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
 	 * correctness, the non-logged updates are not critical. But we want to
 	 * have a reasonably up-to-date VM and FSM in the page server.
 	 */
-	if ((force || forknum == FSM_FORKNUM || forknum == VISIBILITYMAP_FORKNUM) && !RecoveryInProgress())
+	if (forknum == FSM_FORKNUM && !RecoveryInProgress())
 	{
 		/* FSM is never WAL-logged and we don't care. */
 		XLogRecPtr	recptr;
@@ -1256,7 +1125,30 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
 		XLogFlush(recptr);
 		lsn = recptr;
 		ereport(SmgrTrace,
-				(errmsg("Page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
+				(errmsg("FSM page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
+						blocknum,
+						reln->smgr_rnode.node.spcNode,
+						reln->smgr_rnode.node.dbNode,
+						reln->smgr_rnode.node.relNode,
+						forknum, LSN_FORMAT_ARGS(lsn))));
+	}
+	else if (forknum == VISIBILITYMAP_FORKNUM && !RecoveryInProgress())
+	{
+		/*
+		 * Always WAL-log vm. We should never miss clearing visibility map
+		 * bits.
+		 *
+		 * TODO Is it too bad for performance? Hopefully we do not evict
+		 * actively used vm too often.
+		 */
+		XLogRecPtr	recptr;
+
+		recptr = log_newpage_copy(&reln->smgr_rnode.node, forknum, blocknum, buffer, false);
+		XLogFlush(recptr);
+		lsn = recptr;
+
+		ereport(SmgrTrace,
+				(errmsg("Visibilitymap page %u of relation %u/%u/%u.%u was force logged at lsn=%X/%X",
 						blocknum,
 						reln->smgr_rnode.node.spcNode,
 						reln->smgr_rnode.node.dbNode,
@@ -1651,7 +1543,6 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 			char *buffer, bool skipFsync)
 {
 	XLogRecPtr	lsn;
-	BlockNumber	n_blocks = 0;

 	switch (reln->smgr_relpersistence)
 	{
@@ -1691,16 +1582,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 					 errhint("This limit is defined by neon.max_cluster_size GUC")));
 	}

-	/*
-	 * Usually Postgres doesn't extend relation on more than one page
-	 * (leaving holes). But this rule is violated in PG-15 where CreateAndCopyRelationData
-	 * call smgrextend for destination relation n using size of source relation
-	 */
-	get_cached_relsize(reln->smgr_rnode.node, forkNum, &n_blocks);
-	while (n_blocks < blkno)
-		neon_wallog_page(reln, forkNum, n_blocks++, buffer, true);
-
-	neon_wallog_page(reln, forkNum, blkno, buffer, false);
+	neon_wallog_page(reln, forkNum, blkno, buffer);
 	set_cached_relsize(reln->smgr_rnode.node, forkNum, blkno + 1);

 	lsn = PageGetLSN(buffer);
@@ -1898,17 +1780,6 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
 												  &request_lsn);
 			slot = GetPrfSlot(ring_index);
 		}
-		else
-		{
-			/*
-			 * Empty our reference to the prefetch buffer's hash entry.
-			 * When we wait for prefetches, the entry reference is invalidated by 
-			 * potential updates to the hash, and when we reconnect to the 
-			 * pageserver the prefetch we're waiting for may be dropped,
-			 * in which case we need to retry and take the branch above.
-			 */
-			entry = NULL;
-		}

 		Assert(slot->my_ring_index == ring_index);
 		Assert(MyPState->ring_last <= ring_index &&
@@ -1947,7 +1818,7 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,

 	/* buffer was used, clean up for later reuse */
 	prefetch_set_unused(ring_index);
-	prefetch_cleanup_trailing_unused();
+	prefetch_cleanup();
 }

 /*
@@ -2128,7 +1999,7 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	neon_wallog_page(reln, forknum, blocknum, buffer, false);
+	neon_wallog_page(reln, forknum, blocknum, buffer);

 	lsn = PageGetLSN(buffer);
 	elog(SmgrTrace, "smgrwrite called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
--- a/poetry.lock
+++ b/poetry.lock
@@ -525,7 +525,7 @@ typing-extensions = ">=4.1.0"

 [[package]]
 name = "certifi"
-version = "2022.12.7"
+version = "2022.9.24"
 description = "Python package for providing Mozilla's CA Bundle."
 category = "main"
 optional = false
@@ -1248,8 +1248,8 @@ python-versions = ">=3.6"

 [package.dependencies]
 pytest = [
-    {version = ">=5.0", markers = "python_version < \"3.10\""},
    {version = ">=6.2.4", markers = "python_version >= \"3.10\""},
+    {version = ">=5.0", markers = "python_version < \"3.10\""},
 ]

 [[package]]
@@ -1702,8 +1702,8 @@ botocore-stubs = [
    {file = "botocore_stubs-1.27.38-py3-none-any.whl", hash = "sha256:7add7641e9a479a9c8366893bb522fd9ca3d58714201e43662a200a148a1bc38"},
 ]
 certifi = [
-    {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"},
-    {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"},
+    {file = "certifi-2022.9.24-py3-none-any.whl", hash = "sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382"},
+    {file = "certifi-2022.9.24.tar.gz", hash = "sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14"},
 ]
 cffi = [
    {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"},
@@ -2036,7 +2036,6 @@ psutil = [
 psycopg2-binary = [
    {file = "psycopg2-binary-2.9.3.tar.gz", hash = "sha256:761df5313dc15da1502b21453642d7599d26be88bff659382f8f9747c7ebea4e"},
    {file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:539b28661b71da7c0e428692438efbcd048ca21ea81af618d845e06ebfd29478"},
-    {file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f2534ab7dc7e776a263b463a16e189eb30e85ec9bbe1bff9e78dae802608932"},
    {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e82d38390a03da28c7985b394ec3f56873174e2c88130e6966cb1c946508e65"},
    {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57804fc02ca3ce0dbfbef35c4b3a4a774da66d66ea20f4bda601294ad2ea6092"},
    {file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:083a55275f09a62b8ca4902dd11f4b33075b743cf0d360419e2051a8a5d5ff76"},
@@ -2070,7 +2069,6 @@ psycopg2-binary = [
    {file = "psycopg2_binary-2.9.3-cp37-cp37m-win32.whl", hash = "sha256:adf20d9a67e0b6393eac162eb81fb10bc9130a80540f4df7e7355c2dd4af9fba"},
    {file = "psycopg2_binary-2.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2f9ffd643bc7349eeb664eba8864d9e01f057880f510e4681ba40a6532f93c71"},
    {file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:def68d7c21984b0f8218e8a15d514f714d96904265164f75f8d3a70f9c295667"},
-    {file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e6aa71ae45f952a2205377773e76f4e3f27951df38e69a4c95440c779e013560"},
    {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dffc08ca91c9ac09008870c9eb77b00a46b3378719584059c034b8945e26b272"},
    {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:280b0bb5cbfe8039205c7981cceb006156a675362a00fe29b16fbc264e242834"},
    {file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:af9813db73395fb1fc211bac696faea4ca9ef53f32dc0cfa27e4e7cf766dcf24"},
@@ -2082,7 +2080,6 @@ psycopg2-binary = [
    {file = "psycopg2_binary-2.9.3-cp38-cp38-win32.whl", hash = "sha256:6472a178e291b59e7f16ab49ec8b4f3bdada0a879c68d3817ff0963e722a82ce"},
    {file = "psycopg2_binary-2.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:35168209c9d51b145e459e05c31a9eaeffa9a6b0fd61689b48e07464ffd1a83e"},
    {file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:47133f3f872faf28c1e87d4357220e809dfd3fa7c64295a4a148bcd1e6e34ec9"},
-    {file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b3a24a1982ae56461cc24f6680604fffa2c1b818e9dc55680da038792e004d18"},
    {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91920527dea30175cc02a1099f331aa8c1ba39bf8b7762b7b56cbf54bc5cce42"},
    {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887dd9aac71765ac0d0bac1d0d4b4f2c99d5f5c1382d8b770404f0f3d0ce8a39"},
    {file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:1f14c8b0942714eb3c74e1e71700cbbcb415acbc311c730370e70c578a44a25c"},
@@ -2099,7 +2096,18 @@ py = [
    {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
 ]
 pyasn1 = [
+    {file = "pyasn1-0.4.8-py2.4.egg", hash = "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"},
+    {file = "pyasn1-0.4.8-py2.5.egg", hash = "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf"},
+    {file = "pyasn1-0.4.8-py2.6.egg", hash = "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00"},
+    {file = "pyasn1-0.4.8-py2.7.egg", hash = "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8"},
    {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"},
+    {file = "pyasn1-0.4.8-py3.1.egg", hash = "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86"},
+    {file = "pyasn1-0.4.8-py3.2.egg", hash = "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7"},
+    {file = "pyasn1-0.4.8-py3.3.egg", hash = "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576"},
+    {file = "pyasn1-0.4.8-py3.4.egg", hash = "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12"},
+    {file = "pyasn1-0.4.8-py3.5.egg", hash = "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2"},
+    {file = "pyasn1-0.4.8-py3.6.egg", hash = "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359"},
+    {file = "pyasn1-0.4.8-py3.7.egg", hash = "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776"},
    {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"},
 ]
 pycodestyle = [
@@ -2205,13 +2213,6 @@ pyyaml = [
    {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
    {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
    {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
-    {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
-    {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
-    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
-    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
-    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
-    {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
-    {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
    {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
    {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
    {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
--- a/proxy/src/main.rs
+++ b/proxy/src/main.rs
@@ -28,7 +28,6 @@ use std::{borrow::Cow, future::Future, net::SocketAddr};
 use tokio::{net::TcpListener, task::JoinError};
 use tracing::info;
 use utils::project_git_version;
-use utils::sentry_init::{init_sentry, release_name};

 project_git_version!(GIT_VERSION);

@@ -46,9 +45,6 @@ async fn main() -> anyhow::Result<()> {
        .with_target(false)
        .init();

-    // initialize sentry if SENTRY_DSN is provided
-    let _sentry_guard = init_sentry(release_name!(), &[]);
-
    let arg_matches = cli().get_matches();

    let tls_config = match (
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -4,6 +4,7 @@
 use anyhow::{bail, Context, Result};
 use clap::{value_parser, Arg, ArgAction, Command};
 use const_format::formatcp;
+use nix::unistd::Pid;
 use remote_storage::RemoteStorageConfig;
 use std::fs::{self, File};
 use std::io::{ErrorKind, Write};
@@ -14,7 +15,7 @@ use tokio::sync::mpsc;
 use toml_edit::Document;
 use tracing::*;
 use url::{ParseError, Url};
-use utils::pid_file;
+use utils::lock_file;

 use metrics::set_build_info_metric;
 use safekeeper::broker;
@@ -34,14 +35,11 @@ use utils::{
    http::endpoint,
    id::NodeId,
    logging::{self, LogFormat},
-    project_git_version,
-    sentry_init::{init_sentry, release_name},
-    signals, tcp_listener,
+    project_git_version, signals, tcp_listener,
 };

 const PID_FILE_NAME: &str = "safekeeper.pid";
 const ID_FILE_NAME: &str = "safekeeper.id";
-
 project_git_version!(GIT_VERSION);

 fn main() -> anyhow::Result<()> {
@@ -135,8 +133,6 @@ fn main() -> anyhow::Result<()> {
        conf.log_format = LogFormat::from_config(log_format)?;
    }

-    // initialize sentry if SENTRY_DSN is provided
-    let _sentry_guard = init_sentry(release_name!(), &[("node_id", &conf.my_id.to_string())]);
    start_safekeeper(conf, given_id, arg_matches.get_flag("init"))
 }

@@ -146,13 +142,30 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo

    // Prevent running multiple safekeepers on the same directory
    let lock_file_path = conf.workdir.join(PID_FILE_NAME);
-    let lock_file =
-        pid_file::claim_for_current_process(&lock_file_path).context("claim pid file")?;
-    info!("Claimed pid file at {lock_file_path:?}");
-
+    let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
+        lock_file::LockCreationResult::Created {
+            new_lock_contents,
+            file,
+        } => {
+            info!("Created lock file at {lock_file_path:?} with contenst {new_lock_contents}");
+            file
+        }
+        lock_file::LockCreationResult::AlreadyLocked {
+            existing_lock_contents,
+        } => anyhow::bail!(
+            "Could not lock pid file; safekeeper is already running in {:?} with PID {}",
+            conf.workdir,
+            existing_lock_contents
+        ),
+        lock_file::LockCreationResult::CreationFailed(e) => {
+            return Err(e.context(format!("Failed to create lock file at {lock_file_path:?}")))
+        }
+    };
    // ensure that the lock file is held even if the main thread of the process is panics
    // we need to release the lock file only when the current process is gone
-    std::mem::forget(lock_file);
+    let _ = Box::leak(Box::new(lock_file));
+
+    info!("Created PID file with PID {}", Pid::this().to_string());

    // Set or read our ID.
    set_id(&mut conf, given_id)?;
@@ -160,12 +173,19 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
        return Ok(());
    }

+    info!(
+        "Starting safekeeper http handler on {}",
+        conf.listen_http_addr
+    );
    let http_listener = tcp_listener::bind(conf.listen_http_addr.clone()).map_err(|e| {
        error!("failed to bind to address {}: {}", conf.listen_http_addr, e);
        e
    })?;

-    info!("Starting safekeeper on {}", conf.listen_pg_addr);
+    info!(
+        "Starting safekeeper pg protocol handler on {}",
+        conf.listen_pg_addr
+    );
    let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
        error!("failed to bind to address {}: {}", conf.listen_pg_addr, e);
        e
@@ -264,10 +284,10 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
    signals.handle(|signal| {
        // TODO: implement graceful shutdown with joining threads etc
        info!(
-            "received {}, terminating in immediate shutdown mode",
+            "Got {}. Terminating in immediate shutdown mode",
            signal.name()
        );
-        std::process::exit(0);
+        std::process::exit(111);
    })
 }

--- a/safekeeper/src/send_wal.rs
+++ b/safekeeper/src/send_wal.rs
@@ -226,7 +226,6 @@ impl ReplicationConn {
            let mut end_pos = stop_pos.unwrap_or(inmem_state.commit_lsn);

            let mut wal_reader = WalReader::new(
-                spg.conf.workdir.clone(),
                spg.conf.timeline_dir(&tli.ttid),
                &persisted_state,
                start_pos,
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -13,7 +13,7 @@ use std::time::Duration;
 use postgres_ffi::v14::xlog_utils::XLogSegNoOffsetToRecPtr;
 use postgres_ffi::XLogFileName;
 use postgres_ffi::{XLogSegNo, PG_TLI};
-use remote_storage::{GenericRemoteStorage, RemotePath};
+use remote_storage::GenericRemoteStorage;
 use tokio::fs::File;
 use tokio::runtime::Builder;

@@ -151,7 +151,7 @@ async fn update_task(
            let timeline_dir = conf.timeline_dir(&ttid);

            let handle = tokio::spawn(
-                backup_task_main(ttid, timeline_dir, conf.workdir.clone(), shutdown_rx)
+                backup_task_main(ttid, timeline_dir, shutdown_rx)
                    .instrument(info_span!("WAL backup task", ttid = %ttid)),
            );

@@ -182,10 +182,10 @@ async fn wal_backup_launcher_main_loop(

    let conf_ = conf.clone();
    REMOTE_STORAGE.get_or_init(|| {
-        conf_
-            .remote_storage
-            .as_ref()
-            .map(|c| GenericRemoteStorage::from_config(c).expect("failed to create remote storage"))
+        conf_.remote_storage.as_ref().map(|c| {
+            GenericRemoteStorage::from_config(conf_.workdir, c)
+                .expect("failed to create remote storage")
+        })
    });

    // Presense in this map means launcher is aware s3 offloading is needed for
@@ -234,7 +234,6 @@ async fn wal_backup_launcher_main_loop(
 struct WalBackupTask {
    timeline: Arc<Timeline>,
    timeline_dir: PathBuf,
-    workspace_dir: PathBuf,
    wal_seg_size: usize,
    commit_lsn_watch_rx: watch::Receiver<Lsn>,
 }
@@ -243,7 +242,6 @@ struct WalBackupTask {
 async fn backup_task_main(
    ttid: TenantTimelineId,
    timeline_dir: PathBuf,
-    workspace_dir: PathBuf,
    mut shutdown_rx: Receiver<()>,
 ) {
    info!("started");
@@ -259,7 +257,6 @@ async fn backup_task_main(
        commit_lsn_watch_rx: tli.get_commit_lsn_watch_rx(),
        timeline: tli,
        timeline_dir,
-        workspace_dir,
    };

    // task is spinned up only when wal_seg_size already initialized
@@ -324,7 +321,6 @@ impl WalBackupTask {
                commit_lsn,
                self.wal_seg_size,
                &self.timeline_dir,
-                &self.workspace_dir,
            )
            .await
            {
@@ -357,12 +353,11 @@ pub async fn backup_lsn_range(
    end_lsn: Lsn,
    wal_seg_size: usize,
    timeline_dir: &Path,
-    workspace_dir: &Path,
 ) -> Result<Lsn> {
    let mut res = start_lsn;
    let segments = get_segments(start_lsn, end_lsn, wal_seg_size);
    for s in &segments {
-        backup_single_segment(s, timeline_dir, workspace_dir)
+        backup_single_segment(s, timeline_dir)
            .await
            .with_context(|| format!("offloading segno {}", s.seg_no))?;

@@ -377,24 +372,11 @@ pub async fn backup_lsn_range(
    Ok(res)
 }

-async fn backup_single_segment(
-    seg: &Segment,
-    timeline_dir: &Path,
-    workspace_dir: &Path,
-) -> Result<()> {
-    let segment_file_path = seg.file_path(timeline_dir)?;
-    let remote_segment_path = segment_file_path
-        .strip_prefix(&workspace_dir)
-        .context("Failed to strip workspace dir prefix")
-        .and_then(RemotePath::new)
-        .with_context(|| {
-            format!(
-                "Failed to resolve remote part of path {segment_file_path:?} for base {workspace_dir:?}",
-            )
-        })?;
+async fn backup_single_segment(seg: &Segment, timeline_dir: &Path) -> Result<()> {
+    let segment_file_name = seg.file_path(timeline_dir)?;

-    backup_object(&segment_file_path, &remote_segment_path, seg.size()).await?;
-    debug!("Backup of {} done", segment_file_path.display());
+    backup_object(&segment_file_name, seg.size()).await?;
+    debug!("Backup of {} done", segment_file_name.display());

    Ok(())
 }
@@ -444,7 +426,7 @@ fn get_segments(start: Lsn, end: Lsn, seg_size: usize) -> Vec<Segment> {

 static REMOTE_STORAGE: OnceCell<Option<GenericRemoteStorage>> = OnceCell::new();

-async fn backup_object(source_file: &Path, target_file: &RemotePath, size: usize) -> Result<()> {
+async fn backup_object(source_file: &Path, size: usize) -> Result<()> {
    let storage = REMOTE_STORAGE
        .get()
        .expect("failed to get remote storage")
@@ -459,12 +441,12 @@ async fn backup_object(source_file: &Path, target_file: &RemotePath, size: usize
    })?);

    storage
-        .upload_storage_object(Box::new(file), size, target_file)
+        .upload_storage_object(Box::new(file), size, source_file)
        .await
 }

 pub async fn read_object(
-    file_path: &RemotePath,
+    file_path: PathBuf,
    offset: u64,
 ) -> anyhow::Result<Pin<Box<dyn tokio::io::AsyncRead>>> {
    let storage = REMOTE_STORAGE
@@ -473,13 +455,19 @@ pub async fn read_object(
        .as_ref()
        .context("No remote storage configured")?;

-    info!("segment download about to start from remote path {file_path:?} at offset {offset}");
-
+    info!(
+        "segment download about to start for local path {} at offset {}",
+        file_path.display(),
+        offset
+    );
    let download = storage
-        .download_storage_object(Some((offset, None)), file_path)
+        .download_storage_object(Some((offset, None)), &file_path)
        .await
        .with_context(|| {
-            format!("Failed to open WAL segment download stream for remote path {file_path:?}")
+            format!(
+                "Failed to open WAL segment download stream for local path {}",
+                file_path.display()
+            )
        })?;

    Ok(download.download_stream)
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -8,7 +8,6 @@
 //! Note that last file has `.partial` suffix, that's different from postgres.

 use anyhow::{bail, Context, Result};
-use remote_storage::RemotePath;

 use std::io::{self, Seek, SeekFrom};
 use std::pin::Pin;
@@ -446,7 +445,6 @@ fn remove_segments_from_disk(
 }

 pub struct WalReader {
-    workdir: PathBuf,
    timeline_dir: PathBuf,
    wal_seg_size: usize,
    pos: Lsn,
@@ -461,7 +459,6 @@ pub struct WalReader {

 impl WalReader {
    pub fn new(
-        workdir: PathBuf,
        timeline_dir: PathBuf,
        state: &SafeKeeperState,
        start_pos: Lsn,
@@ -481,7 +478,6 @@ impl WalReader {
        }

        Ok(Self {
-            workdir,
            timeline_dir,
            wal_seg_size: state.server.wal_seg_size as usize,
            pos: start_pos,
@@ -549,17 +545,7 @@ impl WalReader {

        // Try to open remote file, if remote reads are enabled
        if self.enable_remote_read {
-            let remote_wal_file_path = wal_file_path
-                .strip_prefix(&self.workdir)
-                .context("Failed to strip workdir prefix")
-                .and_then(RemotePath::new)
-                .with_context(|| {
-                    format!(
-                        "Failed to resolve remote part of path {:?} for base {:?}",
-                        wal_file_path, self.workdir,
-                    )
-                })?;
-            return read_object(&remote_wal_file_path, xlogoff as u64).await;
+            return read_object(wal_file_path, xlogoff as u64).await;
        }

        bail!("WAL segment is not found")
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1119,14 +1119,6 @@ class PageserverHttpClient(requests.Session):
        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/detach")
        self.verbose_error(res)

-    def tenant_load(self, tenant_id: TenantId):
-        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/load")
-        self.verbose_error(res)
-
-    def tenant_ignore(self, tenant_id: TenantId):
-        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/ignore")
-        self.verbose_error(res)
-
    def tenant_status(self, tenant_id: TenantId) -> Dict[Any, Any]:
        res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id}")
        self.verbose_error(res)
@@ -1588,7 +1580,17 @@ class NeonCli(AbstractNeonCli):
            s3_env_vars = self.env.remote_storage.access_env_vars()
            extra_env_vars = (extra_env_vars or {}) | s3_env_vars

-        return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
+        try:
+            return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
+        except Exception:
+            # A common reason for startup failure is that the port is already in use. We
+            # coordinate port assignment with PortDistributor, but it's a common mistake
+            # when writing a new test to use a hardcoded port, or assign the port without
+            # using the distributor, causing races where two tests runnign concurrently
+            # sometimes choose the same port. To help debug such cases, get a listing
+            # of all inuse ports and the processes holding them.
+            list_inuse_ports()
+            raise

    def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]":
        cmd = ["pageserver", "stop"]
@@ -1603,7 +1605,11 @@ class NeonCli(AbstractNeonCli):
        if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
            s3_env_vars = self.env.remote_storage.access_env_vars()

-        return self.raw_cli(["safekeeper", "start", str(id)], extra_env_vars=s3_env_vars)
+        try:
+            return self.raw_cli(["safekeeper", "start", str(id)], extra_env_vars=s3_env_vars)
+        except Exception:
+            list_inuse_ports()  # see comment in  pageserver_start
+            raise

    def safekeeper_stop(
        self, id: Optional[int] = None, immediate=False
@@ -1751,7 +1757,6 @@ class NeonPageserver(PgProtocol):
            ".*Connection aborted: error communicating with the server: Connection reset by peer.*",
            ".*kill_and_wait_impl.*: wait successful.*",
            ".*end streaming to Some.*",
-            ".*query handler for 'pagestream.*failed: Broken pipe.*",  # pageserver notices compute shut down
            # safekeeper connection can fail with this, in the window between timeline creation
            # and streaming start
            ".*Failed to process query for timeline .*: state uninitialized, no data to read.*",
@@ -1777,7 +1782,6 @@ class NeonPageserver(PgProtocol):
            ".*gc_loop.*Gc failed, retrying in.*timeline is Stopping",  # When gc checks timeline state after acquiring layer_removal_cs
            ".*compaction_loop.*Compaction failed, retrying in.*timeline is Stopping",  # When compaction checks timeline state after acquiring layer_removal_cs
            ".*query handler for 'pagestream.*failed: Timeline .* was not found",  # postgres reconnects while timeline_delete doesn't hold the tenant's timelines.lock()
-            ".*query handler for 'pagestream.*failed: Timeline .* is not active",  # timeline delete in progress
        ]

    def start(
@@ -2896,7 +2900,6 @@ def assert_no_in_progress_downloads_for_tenant(
 ):
    tenant_status = pageserver_http_client.tenant_status(tenant)
    assert tenant_status["has_in_progress_downloads"] is False, tenant_status
-    assert tenant_status["state"] == "Active"


 def remote_consistent_lsn(
@@ -2938,27 +2941,6 @@ def wait_for_upload(
    )


-# Does not use `wait_until` for debugging purposes
-def wait_until_tenant_state(
-    pageserver_http: PageserverHttpClient,
-    tenant_id: TenantId,
-    expected_state: str,
-    iterations: int,
-) -> bool:
-    for _ in range(iterations):
-        try:
-            tenant = pageserver_http.tenant_status(tenant_id=tenant_id)
-            log.debug(f"Tenant {tenant_id} data: {tenant}")
-            if tenant["state"] == expected_state:
-                return True
-        except Exception as e:
-            log.debug(f"Tenant {tenant_id} state retrieval failure: {e}")
-
-        time.sleep(1)
-
-    raise Exception(f"Tenant {tenant_id} did not become {expected_state} in {iterations} seconds")
-
-
 def last_record_lsn(
    pageserver_http_client: PageserverHttpClient, tenant: TenantId, timeline: TimelineId
 ) -> Lsn:
@@ -3013,3 +2995,24 @@ def fork_at_current_lsn(
    """
    current_lsn = pg.safe_psql("SELECT pg_current_wal_lsn()")[0][0]
    return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn)
+
+
+def list_inuse_ports():
+    """
+    Print "netstat -tnlap" output to the test log. This is useful for debugging
+    port collisions in tests.
+    """
+
+    # This won't work on all platforms, because not all platforms have 'netstat',
+    # and the CLI arguments vary across platforms, too. macOS's netstat doesn't have
+    # the -p option, for example. So this is just best-effort.
+    res = subprocess.run(
+        ["netstat", "-tnlap"],
+        check=False,
+        universal_newlines=True,
+        capture_output=True,
+    )
+    if res.returncode:
+        log.info(f"netstat -tnlap failed with return code {res.returncode}")
+    log.info(f"netstat -tnlap stdout: \n{res.stdout}\n")
+    log.info(f"netstat -tnlap stderr: \n{res.stderr}\n")
--- a/test_runner/performance/test_bulk_update.py
+++ b/test_runner/performance/test_bulk_update.py
@@ -42,8 +42,7 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)

    cur.execute("drop table t")
    cur.execute("set enable_seqscan_prefetch=on")
-    cur.execute("set effective_io_concurrency=32")
-    cur.execute("set maintenance_io_concurrency=32")
+    cur.execute("set seqscan_prefetch_buffers=100")

    cur.execute(f"create table t2(x integer) WITH (fillfactor={fillfactor})")

--- a/test_runner/performance/test_perf_olap.py
+++ b/test_runner/performance/test_perf_olap.py
@@ -2,10 +2,8 @@ from dataclasses import dataclass
 from typing import Dict, Tuple

 import pytest
-from _pytest.mark import ParameterSet
 from fixtures.compare_fixtures import RemoteCompare
 from fixtures.log_helper import log
-from fixtures.utils import get_self_dir


@dataclass
@@ -111,36 +109,3 @@ def test_clickbench(query: LabelledQuery, remote_compare: RemoteCompare):
    """

    run_psql(remote_compare, query, times=3)
-
-
-def tpch_queuies() -> Tuple[ParameterSet, ...]:
-    """
-    A list of queries to run for the TPC-H benchmark.
-    - querues in returning tuple are ordered by the query number
-    - pytest parameters id is adjusted to match the query id (the numbering starts from 1)
-    """
-    queries_dir = get_self_dir().parent / "performance" / "tpc-h" / "queries"
-    assert queries_dir.exists(), f"TPC-H queries dir not found: {queries_dir}"
-
-    return tuple(
-        pytest.param(LabelledQuery(f"Q{f.stem}", f.read_text()), id=f"query{f.stem}")
-        for f in sorted(queries_dir.glob("*.sql"), key=lambda f: int(f.stem))
-    )
-
-
-@pytest.mark.parametrize("query", tpch_queuies())
-@pytest.mark.remote_cluster
-def test_tpch(query: LabelledQuery, remote_compare: RemoteCompare):
-    """
-    TCP-H Benchmark
-
-    The DB prepared manually in advance:
-    - schema: test_runner/performance/tpc-h/create-schema.sql
-    - indexes: test_runner/performance/tpc-h/create-indexes.sql
-    - data generated by `dbgen` program of the official TPC-H benchmark
-    - `VACUUM (FREEZE, PARALLEL 0);`
-
-    For query generation `1669822882` is used as a seed to the RNG
-    """
-
-    run_psql(remote_compare, query, times=1)
--- a/test_runner/performance/test_read_trace.py
+++ b/test_runner/performance/test_read_trace.py
@@ -1,14 +1,10 @@
 from contextlib import closing

-from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_last_record_lsn
-from fixtures.types import Lsn, TenantId, TimelineId
-from fixtures.utils import query_scalar
+from fixtures.neon_fixtures import NeonEnvBuilder


 # This test demonstrates how to collect a read trace. It's useful until
 # it gets replaced by a test that actually does stuff with the trace.
-#
-# Additionally, tests that pageserver is able to create tenants with custom configs.
 def test_read_request_tracing(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.num_safekeepers = 1
    env = neon_env_builder.init_start()
@@ -27,12 +23,6 @@ def test_read_request_tracing(neon_env_builder: NeonEnvBuilder):
            cur.execute("create table t (i integer);")
            cur.execute(f"insert into t values (generate_series(1,{10000}));")
            cur.execute("select count(*) from t;")
-            tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
-            timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
-            current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
-    # wait until pageserver receives that data
-    pageserver_http = env.pageserver.http_client()
-    wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, current_lsn)

    # Stop pg so we drop the connection and flush the traces
    pg.stop()
--- a/test_runner/performance/tpc-h/create-indexes.sql
+++ b/test_runner/performance/tpc-h/create-indexes.sql
@@ -1,43 +0,0 @@
-- Section 1.4.2.2
-
-ALTER TABLE part ADD PRIMARY KEY (p_partkey);
-ALTER TABLE supplier ADD PRIMARY KEY (s_suppkey);
-ALTER TABLE partsupp ADD PRIMARY KEY (ps_partkey, ps_suppkey);
-ALTER TABLE customer ADD PRIMARY KEY (c_custkey);
-ALTER TABLE orders ADD PRIMARY KEY (o_orderkey);
-ALTER TABLE lineitem ADD PRIMARY KEY (l_orderkey, l_linenumber);
-ALTER TABLE nation ADD PRIMARY KEY (n_nationkey);
-ALTER TABLE region ADD PRIMARY KEY (r_regionkey);
-
-- Section 1.4.2.3
-
-CREATE INDEX ON supplier USING btree (s_nationkey);
-ALTER TABLE supplier ADD FOREIGN KEY (s_nationkey) REFERENCES nation (n_nationkey);
-
-/* IGNORE: implied by primary key */
-- CREATE INDEX ON partsupp USING btree (ps_partkey);
-CREATE INDEX ON partsupp USING btree (ps_suppkey);
-ALTER TABLE partsupp ADD FOREIGN KEY (ps_partkey) REFERENCES part (p_partkey);
-ALTER TABLE partsupp ADD FOREIGN KEY (ps_suppkey) REFERENCES supplier (s_suppkey);
-
-CREATE INDEX ON customer USING btree (c_nationkey);
-ALTER TABLE customer ADD FOREIGN KEY (c_nationkey) REFERENCES nation (n_nationkey);
-
-CREATE INDEX ON orders USING btree (o_custkey);
-ALTER TABLE orders ADD FOREIGN KEY (o_custkey) REFERENCES customer (c_custkey);
-
-/* IGNORE: implied by primary key */
-- CREATE INDEX ON lineitem USING btree (l_orderkey);
-CREATE INDEX ON lineitem USING btree (l_partkey, l_suppkey);
-CREATE INDEX ON lineitem USING btree (l_suppkey);
-ALTER TABLE lineitem ADD FOREIGN KEY (l_orderkey) REFERENCES orders (o_orderkey);
-ALTER TABLE lineitem ADD FOREIGN KEY (l_partkey) REFERENCES part (p_partkey);
-ALTER TABLE lineitem ADD FOREIGN KEY (l_suppkey) REFERENCES supplier (s_suppkey);
-ALTER TABLE lineitem ADD FOREIGN KEY (l_partkey, l_suppkey) REFERENCES partsupp (ps_partkey, ps_suppkey);
-
-CREATE INDEX ON nation USING btree (n_regionkey);
-ALTER TABLE nation ADD FOREIGN KEY (n_regionkey) REFERENCES region (r_regionkey);
-
-- Section 1.4.2.4
-
-ALTER TABLE lineitem ADD CHECK (l_shipdate <= l_receiptdate);
--- a/test_runner/performance/tpc-h/create-schema.sql
+++ b/test_runner/performance/tpc-h/create-schema.sql
@@ -1,69 +0,0 @@
-- Sccsid:     @(#)dss.ddl	2.1.8.1
-CREATE TABLE NATION  ( N_NATIONKEY  INTEGER NOT NULL,
-                            N_NAME       CHAR(25) NOT NULL,
-                            N_REGIONKEY  INTEGER NOT NULL,
-                            N_COMMENT    VARCHAR(152));
-
-CREATE TABLE REGION  ( R_REGIONKEY  INTEGER NOT NULL,
-                            R_NAME       CHAR(25) NOT NULL,
-                            R_COMMENT    VARCHAR(152));
-
-CREATE TABLE PART  ( P_PARTKEY     INTEGER NOT NULL,
-                          P_NAME        VARCHAR(55) NOT NULL,
-                          P_MFGR        CHAR(25) NOT NULL,
-                          P_BRAND       CHAR(10) NOT NULL,
-                          P_TYPE        VARCHAR(25) NOT NULL,
-                          P_SIZE        INTEGER NOT NULL,
-                          P_CONTAINER   CHAR(10) NOT NULL,
-                          P_RETAILPRICE DECIMAL(15,2) NOT NULL,
-                          P_COMMENT     VARCHAR(23) NOT NULL );
-
-CREATE TABLE SUPPLIER ( S_SUPPKEY     INTEGER NOT NULL,
-                             S_NAME        CHAR(25) NOT NULL,
-                             S_ADDRESS     VARCHAR(40) NOT NULL,
-                             S_NATIONKEY   INTEGER NOT NULL,
-                             S_PHONE       CHAR(15) NOT NULL,
-                             S_ACCTBAL     DECIMAL(15,2) NOT NULL,
-                             S_COMMENT     VARCHAR(101) NOT NULL);
-
-CREATE TABLE PARTSUPP ( PS_PARTKEY     INTEGER NOT NULL,
-                             PS_SUPPKEY     INTEGER NOT NULL,
-                             PS_AVAILQTY    INTEGER NOT NULL,
-                             PS_SUPPLYCOST  DECIMAL(15,2)  NOT NULL,
-                             PS_COMMENT     VARCHAR(199) NOT NULL );
-
-CREATE TABLE CUSTOMER ( C_CUSTKEY     INTEGER NOT NULL,
-                             C_NAME        VARCHAR(25) NOT NULL,
-                             C_ADDRESS     VARCHAR(40) NOT NULL,
-                             C_NATIONKEY   INTEGER NOT NULL,
-                             C_PHONE       CHAR(15) NOT NULL,
-                             C_ACCTBAL     DECIMAL(15,2)   NOT NULL,
-                             C_MKTSEGMENT  CHAR(10) NOT NULL,
-                             C_COMMENT     VARCHAR(117) NOT NULL);
-
-CREATE TABLE ORDERS  ( O_ORDERKEY       INTEGER NOT NULL,
-                           O_CUSTKEY        INTEGER NOT NULL,
-                           O_ORDERSTATUS    CHAR(1) NOT NULL,
-                           O_TOTALPRICE     DECIMAL(15,2) NOT NULL,
-                           O_ORDERDATE      DATE NOT NULL,
-                           O_ORDERPRIORITY  CHAR(15) NOT NULL,
-                           O_CLERK          CHAR(15) NOT NULL,
-                           O_SHIPPRIORITY   INTEGER NOT NULL,
-                           O_COMMENT        VARCHAR(79) NOT NULL);
-
-CREATE TABLE LINEITEM ( L_ORDERKEY    INTEGER NOT NULL,
-                             L_PARTKEY     INTEGER NOT NULL,
-                             L_SUPPKEY     INTEGER NOT NULL,
-                             L_LINENUMBER  INTEGER NOT NULL,
-                             L_QUANTITY    DECIMAL(15,2) NOT NULL,
-                             L_EXTENDEDPRICE  DECIMAL(15,2) NOT NULL,
-                             L_DISCOUNT    DECIMAL(15,2) NOT NULL,
-                             L_TAX         DECIMAL(15,2) NOT NULL,
-                             L_RETURNFLAG  CHAR(1) NOT NULL,
-                             L_LINESTATUS  CHAR(1) NOT NULL,
-                             L_SHIPDATE    DATE NOT NULL,
-                             L_COMMITDATE  DATE NOT NULL,
-                             L_RECEIPTDATE DATE NOT NULL,
-                             L_SHIPINSTRUCT CHAR(25) NOT NULL,
-                             L_SHIPMODE     CHAR(10) NOT NULL,
-                             L_COMMENT      VARCHAR(44) NOT NULL);
--- a/test_runner/performance/tpc-h/queries/1.sql
+++ b/test_runner/performance/tpc-h/queries/1.sql
@@ -1,27 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Pricing Summary Report Query (Q1)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	l_returnflag,
-	l_linestatus,
-	sum(l_quantity) as sum_qty,
-	sum(l_extendedprice) as sum_base_price,
-	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
-	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
-	avg(l_quantity) as avg_qty,
-	avg(l_extendedprice) as avg_price,
-	avg(l_discount) as avg_disc,
-	count(*) as count_order
-from
-	lineitem
-where
-	l_shipdate <= date '1998-12-01' - interval '89' day
-group by
-	l_returnflag,
-	l_linestatus
-order by
-	l_returnflag,
-	l_linestatus;
--- a/test_runner/performance/tpc-h/queries/10.sql
+++ b/test_runner/performance/tpc-h/queries/10.sql
@@ -1,38 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Returned Item Reporting Query (Q10)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	c_custkey,
-	c_name,
-	sum(l_extendedprice * (1 - l_discount)) as revenue,
-	c_acctbal,
-	n_name,
-	c_address,
-	c_phone,
-	c_comment
-from
-	customer,
-	orders,
-	lineitem,
-	nation
-where
-	c_custkey = o_custkey
-	and l_orderkey = o_orderkey
-	and o_orderdate >= date '1993-08-01'
-	and o_orderdate < date '1993-08-01' + interval '3' month
-	and l_returnflag = 'R'
-	and c_nationkey = n_nationkey
-group by
-	c_custkey,
-	c_name,
-	c_acctbal,
-	c_phone,
-	n_name,
-	c_address,
-	c_comment
-order by
-	revenue desc
-limit 20;
--- a/test_runner/performance/tpc-h/queries/11.sql
+++ b/test_runner/performance/tpc-h/queries/11.sql
@@ -1,34 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Important Stock Identification Query (Q11)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	ps_partkey,
-	sum(ps_supplycost * ps_availqty) as value
-from
-	partsupp,
-	supplier,
-	nation
-where
-	ps_suppkey = s_suppkey
-	and s_nationkey = n_nationkey
-	and n_name = 'INDONESIA'
-group by
-	ps_partkey having
-		sum(ps_supplycost * ps_availqty) > (
-			select
-				sum(ps_supplycost * ps_availqty) * 0.0001000000
-			from
-				partsupp,
-				supplier,
-				nation
-			where
-				ps_suppkey = s_suppkey
-				and s_nationkey = n_nationkey
-				and n_name = 'INDONESIA'
-		)
-order by
-	value desc
-;
--- a/test_runner/performance/tpc-h/queries/12.sql
+++ b/test_runner/performance/tpc-h/queries/12.sql
@@ -1,35 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Shipping Modes and Order Priority Query (Q12)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	l_shipmode,
-	sum(case
-		when o_orderpriority = '1-URGENT'
-			or o_orderpriority = '2-HIGH'
-			then 1
-		else 0
-	end) as high_line_count,
-	sum(case
-		when o_orderpriority <> '1-URGENT'
-			and o_orderpriority <> '2-HIGH'
-			then 1
-		else 0
-	end) as low_line_count
-from
-	orders,
-	lineitem
-where
-	o_orderkey = l_orderkey
-	and l_shipmode in ('REG AIR', 'AIR')
-	and l_commitdate < l_receiptdate
-	and l_shipdate < l_commitdate
-	and l_receiptdate >= date '1995-01-01'
-	and l_receiptdate < date '1995-01-01' + interval '1' year
-group by
-	l_shipmode
-order by
-	l_shipmode
-;
--- a/test_runner/performance/tpc-h/queries/13.sql
+++ b/test_runner/performance/tpc-h/queries/13.sql
@@ -1,27 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Customer Distribution Query (Q13)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	c_count,
-	count(*) as custdist
-from
-	(
-		select
-			c_custkey,
-			count(o_orderkey)
-		from
-			customer left outer join orders on
-				c_custkey = o_custkey
-				and o_comment not like '%special%accounts%'
-		group by
-			c_custkey
-	) as c_orders (c_custkey, c_count)
-group by
-	c_count
-order by
-	custdist desc,
-	c_count desc
-;
--- a/test_runner/performance/tpc-h/queries/14.sql
+++ b/test_runner/performance/tpc-h/queries/14.sql
@@ -1,20 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Promotion Effect Query (Q14)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	100.00 * sum(case
-		when p_type like 'PROMO%'
-			then l_extendedprice * (1 - l_discount)
-		else 0
-	end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
-from
-	lineitem,
-	part
-where
-	l_partkey = p_partkey
-	and l_shipdate >= date '1995-07-01'
-	and l_shipdate < date '1995-07-01' + interval '1' month
-;
--- a/test_runner/performance/tpc-h/queries/15.sql
+++ b/test_runner/performance/tpc-h/queries/15.sql
@@ -1,40 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Top Supplier Query (Q15)
-- Functional Query Definition
-- Approved February 1998
-
-create view revenue0 (supplier_no, total_revenue) as
-	select
-		l_suppkey,
-		sum(l_extendedprice * (1 - l_discount))
-	from
-		lineitem
-	where
-		l_shipdate >= date '1995-01-01'
-		and l_shipdate < date '1995-01-01' + interval '3' month
-	group by
-		l_suppkey;
-
-
-select
-	s_suppkey,
-	s_name,
-	s_address,
-	s_phone,
-	total_revenue
-from
-	supplier,
-	revenue0
-where
-	s_suppkey = supplier_no
-	and total_revenue = (
-		select
-			max(total_revenue)
-		from
-			revenue0
-	)
-order by
-	s_suppkey;
-
-drop view revenue0
-;
--- a/test_runner/performance/tpc-h/queries/16.sql
+++ b/test_runner/performance/tpc-h/queries/16.sql
@@ -1,37 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Parts/Supplier Relationship Query (Q16)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	p_brand,
-	p_type,
-	p_size,
-	count(distinct ps_suppkey) as supplier_cnt
-from
-	partsupp,
-	part
-where
-	p_partkey = ps_partkey
-	and p_brand <> 'Brand#43'
-	and p_type not like 'PROMO POLISHED%'
-	and p_size in (35, 5, 42, 13, 11, 40, 50, 47)
-	and ps_suppkey not in (
-		select
-			s_suppkey
-		from
-			supplier
-		where
-			s_comment like '%Customer%Complaints%'
-	)
-group by
-	p_brand,
-	p_type,
-	p_size
-order by
-	supplier_cnt desc,
-	p_brand,
-	p_type,
-	p_size
-;
--- a/test_runner/performance/tpc-h/queries/17.sql
+++ b/test_runner/performance/tpc-h/queries/17.sql
@@ -1,25 +0,0 @@
-
-- $ID$
-- TPC-H/TPC-R Small-Quantity-Order Revenue Query (Q17)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	sum(l_extendedprice) / 7.0 as avg_yearly
-from
-	lineitem,
-	part
-where
-	p_partkey = l_partkey
-	and p_brand = 'Brand#35'
-	and p_container = 'JUMBO JAR'
-	and l_quantity < (
-		select
-			0.2 * avg(l_quantity)
-		from
-			lineitem
-		where
-			l_partkey = p_partkey
-	)
-;
--- a/test_runner/performance/tpc-h/queries/18.sql
+++ b/test_runner/performance/tpc-h/queries/18.sql
@@ -1,39 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Large Volume Customer Query (Q18)
-- Function Query Definition
-- Approved February 1998
-
-
-select
-	c_name,
-	c_custkey,
-	o_orderkey,
-	o_orderdate,
-	o_totalprice,
-	sum(l_quantity)
-from
-	customer,
-	orders,
-	lineitem
-where
-	o_orderkey in (
-		select
-			l_orderkey
-		from
-			lineitem
-		group by
-			l_orderkey having
-				sum(l_quantity) > 315
-	)
-	and c_custkey = o_custkey
-	and o_orderkey = l_orderkey
-group by
-	c_name,
-	c_custkey,
-	o_orderkey,
-	o_orderdate,
-	o_totalprice
-order by
-	o_totalprice desc,
-	o_orderdate
-limit 100;
--- a/test_runner/performance/tpc-h/queries/19.sql
+++ b/test_runner/performance/tpc-h/queries/19.sql
@@ -1,42 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Discounted Revenue Query (Q19)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	sum(l_extendedprice* (1 - l_discount)) as revenue
-from
-	lineitem,
-	part
-where
-	(
-		p_partkey = l_partkey
-		and p_brand = 'Brand#41'
-		and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
-		and l_quantity >= 10 and l_quantity <= 10 + 10
-		and p_size between 1 and 5
-		and l_shipmode in ('AIR', 'AIR REG')
-		and l_shipinstruct = 'DELIVER IN PERSON'
-	)
-	or
-	(
-		p_partkey = l_partkey
-		and p_brand = 'Brand#52'
-		and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
-		and l_quantity >= 20 and l_quantity <= 20 + 10
-		and p_size between 1 and 10
-		and l_shipmode in ('AIR', 'AIR REG')
-		and l_shipinstruct = 'DELIVER IN PERSON'
-	)
-	or
-	(
-		p_partkey = l_partkey
-		and p_brand = 'Brand#14'
-		and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
-		and l_quantity >= 22 and l_quantity <= 22 + 10
-		and p_size between 1 and 15
-		and l_shipmode in ('AIR', 'AIR REG')
-		and l_shipinstruct = 'DELIVER IN PERSON'
-	)
-;
--- a/test_runner/performance/tpc-h/queries/2.sql
+++ b/test_runner/performance/tpc-h/queries/2.sql
@@ -1,50 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Minimum Cost Supplier Query (Q2)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	s_acctbal,
-	s_name,
-	n_name,
-	p_partkey,
-	p_mfgr,
-	s_address,
-	s_phone,
-	s_comment
-from
-	part,
-	supplier,
-	partsupp,
-	nation,
-	region
-where
-	p_partkey = ps_partkey
-	and s_suppkey = ps_suppkey
-	and p_size = 39
-	and p_type like '%BRASS'
-	and s_nationkey = n_nationkey
-	and n_regionkey = r_regionkey
-	and r_name = 'MIDDLE EAST'
-	and ps_supplycost = (
-		select
-			min(ps_supplycost)
-		from
-			partsupp,
-			supplier,
-			nation,
-			region
-		where
-			p_partkey = ps_partkey
-			and s_suppkey = ps_suppkey
-			and s_nationkey = n_nationkey
-			and n_regionkey = r_regionkey
-			and r_name = 'MIDDLE EAST'
-	)
-order by
-	s_acctbal desc,
-	n_name,
-	s_name,
-	p_partkey
-limit 100;
--- a/test_runner/performance/tpc-h/queries/20.sql
+++ b/test_runner/performance/tpc-h/queries/20.sql
@@ -1,44 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Potential Part Promotion Query (Q20)
-- Function Query Definition
-- Approved February 1998
-
-
-select
-	s_name,
-	s_address
-from
-	supplier,
-	nation
-where
-	s_suppkey in (
-		select
-			ps_suppkey
-		from
-			partsupp
-		where
-			ps_partkey in (
-				select
-					p_partkey
-				from
-					part
-				where
-					p_name like 'bisque%'
-			)
-			and ps_availqty > (
-				select
-					0.5 * sum(l_quantity)
-				from
-					lineitem
-				where
-					l_partkey = ps_partkey
-					and l_suppkey = ps_suppkey
-					and l_shipdate >= date '1997-01-01'
-					and l_shipdate < date '1997-01-01' + interval '1' year
-			)
-	)
-	and s_nationkey = n_nationkey
-	and n_name = 'ETHIOPIA'
-order by
-	s_name
-;
--- a/test_runner/performance/tpc-h/queries/21.sql
+++ b/test_runner/performance/tpc-h/queries/21.sql
@@ -1,46 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Suppliers Who Kept Orders Waiting Query (Q21)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	s_name,
-	count(*) as numwait
-from
-	supplier,
-	lineitem l1,
-	orders,
-	nation
-where
-	s_suppkey = l1.l_suppkey
-	and o_orderkey = l1.l_orderkey
-	and o_orderstatus = 'F'
-	and l1.l_receiptdate > l1.l_commitdate
-	and exists (
-		select
-			*
-		from
-			lineitem l2
-		where
-			l2.l_orderkey = l1.l_orderkey
-			and l2.l_suppkey <> l1.l_suppkey
-	)
-	and not exists (
-		select
-			*
-		from
-			lineitem l3
-		where
-			l3.l_orderkey = l1.l_orderkey
-			and l3.l_suppkey <> l1.l_suppkey
-			and l3.l_receiptdate > l3.l_commitdate
-	)
-	and s_nationkey = n_nationkey
-	and n_name = 'SAUDI ARABIA'
-group by
-	s_name
-order by
-	numwait desc,
-	s_name
-limit 100;
--- a/test_runner/performance/tpc-h/queries/22.sql
+++ b/test_runner/performance/tpc-h/queries/22.sql
@@ -1,44 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Global Sales Opportunity Query (Q22)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	cntrycode,
-	count(*) as numcust,
-	sum(c_acctbal) as totacctbal
-from
-	(
-		select
-			substring(c_phone from 1 for 2) as cntrycode,
-			c_acctbal
-		from
-			customer
-		where
-			substring(c_phone from 1 for 2) in
-				('15', '14', '29', '34', '33', '19', '13')
-			and c_acctbal > (
-				select
-					avg(c_acctbal)
-				from
-					customer
-				where
-					c_acctbal > 0.00
-					and substring(c_phone from 1 for 2) in
-						('15', '14', '29', '34', '33', '19', '13')
-			)
-			and not exists (
-				select
-					*
-				from
-					orders
-				where
-					o_custkey = c_custkey
-			)
-	) as custsale
-group by
-	cntrycode
-order by
-	cntrycode
-;
--- a/test_runner/performance/tpc-h/queries/3.sql
+++ b/test_runner/performance/tpc-h/queries/3.sql
@@ -1,29 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Shipping Priority Query (Q3)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	l_orderkey,
-	sum(l_extendedprice * (1 - l_discount)) as revenue,
-	o_orderdate,
-	o_shippriority
-from
-	customer,
-	orders,
-	lineitem
-where
-	c_mktsegment = 'AUTOMOBILE'
-	and c_custkey = o_custkey
-	and l_orderkey = o_orderkey
-	and o_orderdate < date '1995-03-26'
-	and l_shipdate > date '1995-03-26'
-group by
-	l_orderkey,
-	o_orderdate,
-	o_shippriority
-order by
-	revenue desc,
-	o_orderdate
-limit 10;
--- a/test_runner/performance/tpc-h/queries/4.sql
+++ b/test_runner/performance/tpc-h/queries/4.sql
@@ -1,28 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Order Priority Checking Query (Q4)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	o_orderpriority,
-	count(*) as order_count
-from
-	orders
-where
-	o_orderdate >= date '1996-12-01'
-	and o_orderdate < date '1996-12-01' + interval '3' month
-	and exists (
-		select
-			*
-		from
-			lineitem
-		where
-			l_orderkey = o_orderkey
-			and l_commitdate < l_receiptdate
-	)
-group by
-	o_orderpriority
-order by
-	o_orderpriority
-;
--- a/test_runner/performance/tpc-h/queries/5.sql
+++ b/test_runner/performance/tpc-h/queries/5.sql
@@ -1,31 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Local Supplier Volume Query (Q5)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	n_name,
-	sum(l_extendedprice * (1 - l_discount)) as revenue
-from
-	customer,
-	orders,
-	lineitem,
-	supplier,
-	nation,
-	region
-where
-	c_custkey = o_custkey
-	and l_orderkey = o_orderkey
-	and l_suppkey = s_suppkey
-	and c_nationkey = s_nationkey
-	and s_nationkey = n_nationkey
-	and n_regionkey = r_regionkey
-	and r_name = 'ASIA'
-	and o_orderdate >= date '1996-01-01'
-	and o_orderdate < date '1996-01-01' + interval '1' year
-group by
-	n_name
-order by
-	revenue desc
-;
--- a/test_runner/performance/tpc-h/queries/6.sql
+++ b/test_runner/performance/tpc-h/queries/6.sql
@@ -1,16 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Forecasting Revenue Change Query (Q6)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	sum(l_extendedprice * l_discount) as revenue
-from
-	lineitem
-where
-	l_shipdate >= date '1996-01-01'
-	and l_shipdate < date '1996-01-01' + interval '1' year
-	and l_discount between 0.02 - 0.01 and 0.02 + 0.01
-	and l_quantity < 24
-;
--- a/test_runner/performance/tpc-h/queries/7.sql
+++ b/test_runner/performance/tpc-h/queries/7.sql
@@ -1,46 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Volume Shipping Query (Q7)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	supp_nation,
-	cust_nation,
-	l_year,
-	sum(volume) as revenue
-from
-	(
-		select
-			n1.n_name as supp_nation,
-			n2.n_name as cust_nation,
-			extract(year from l_shipdate) as l_year,
-			l_extendedprice * (1 - l_discount) as volume
-		from
-			supplier,
-			lineitem,
-			orders,
-			customer,
-			nation n1,
-			nation n2
-		where
-			s_suppkey = l_suppkey
-			and o_orderkey = l_orderkey
-			and c_custkey = o_custkey
-			and s_nationkey = n1.n_nationkey
-			and c_nationkey = n2.n_nationkey
-			and (
-				(n1.n_name = 'ALGERIA' and n2.n_name = 'CANADA')
-				or (n1.n_name = 'CANADA' and n2.n_name = 'ALGERIA')
-			)
-			and l_shipdate between date '1995-01-01' and date '1996-12-31'
-	) as shipping
-group by
-	supp_nation,
-	cust_nation,
-	l_year
-order by
-	supp_nation,
-	cust_nation,
-	l_year
-;
--- a/test_runner/performance/tpc-h/queries/8.sql
+++ b/test_runner/performance/tpc-h/queries/8.sql
@@ -1,44 +0,0 @@
-- $ID$
-- TPC-H/TPC-R National Market Share Query (Q8)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	o_year,
-	sum(case
-		when nation = 'CANADA' then volume
-		else 0
-	end) / sum(volume) as mkt_share
-from
-	(
-		select
-			extract(year from o_orderdate) as o_year,
-			l_extendedprice * (1 - l_discount) as volume,
-			n2.n_name as nation
-		from
-			part,
-			supplier,
-			lineitem,
-			orders,
-			customer,
-			nation n1,
-			nation n2,
-			region
-		where
-			p_partkey = l_partkey
-			and s_suppkey = l_suppkey
-			and l_orderkey = o_orderkey
-			and o_custkey = c_custkey
-			and c_nationkey = n1.n_nationkey
-			and n1.n_regionkey = r_regionkey
-			and r_name = 'AMERICA'
-			and s_nationkey = n2.n_nationkey
-			and o_orderdate between date '1995-01-01' and date '1996-12-31'
-			and p_type = 'SMALL POLISHED BRASS'
-	) as all_nations
-group by
-	o_year
-order by
-	o_year
-;
--- a/test_runner/performance/tpc-h/queries/9.sql
+++ b/test_runner/performance/tpc-h/queries/9.sql
@@ -1,39 +0,0 @@
-- $ID$
-- TPC-H/TPC-R Product Type Profit Measure Query (Q9)
-- Functional Query Definition
-- Approved February 1998
-
-
-select
-	nation,
-	o_year,
-	sum(amount) as sum_profit
-from
-	(
-		select
-			n_name as nation,
-			extract(year from o_orderdate) as o_year,
-			l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
-		from
-			part,
-			supplier,
-			lineitem,
-			partsupp,
-			orders,
-			nation
-		where
-			s_suppkey = l_suppkey
-			and ps_suppkey = l_suppkey
-			and ps_partkey = l_partkey
-			and p_partkey = l_partkey
-			and o_orderkey = l_orderkey
-			and s_nationkey = n_nationkey
-			and p_name like '%firebrick%'
-	) as profit
-group by
-	nation,
-	o_year
-order by
-	nation,
-	o_year desc
-;
--- a/Show More
+++ b/Show More