Use B-Tree instead of R-Tree

Fix indentation
Make clippy happy
2026-03-09 03:10:37 +00:00 · 2022-10-07 14:57:25 +03:00 · 2022-10-06 22:31:11 +03:00 · 2022-10-06 21:47:26 +03:00 · 2022-10-06 20:16:13 +03:00 · 2022-10-06 16:06:39 +03:00
47 changed files with 347 additions and 1191 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -564,7 +564,7 @@ jobs:

  promote-images:
    runs-on: dev
-    needs: [ neon-image, compute-node-image, compute-node-image-v14, compute-node-image-v15, compute-tools-image ]
+    needs: [ neon-image, compute-node-image, compute-node-image-v14, compute-tools-image ]
    if: github.event_name != 'workflow_dispatch'
    container: amazon/aws-cli
    strategy:
@@ -573,7 +573,7 @@ jobs:
        # compute-node uses postgres 14, which is default now
        # cloud repo depends on this image name, thus duplicating it
        # remove compute-node when cloud repo is updated
-        name: [ neon, compute-node, compute-node-v14, compute-node-v15, compute-tools ]
+        name: [ neon, compute-node, compute-node-v14, compute-tools ]

    steps:
      - name: Promote image to latest
@@ -608,9 +608,6 @@ jobs:
      - name: Pull compute node v14 image from ECR
        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest compute-node-v14

-      - name: Pull compute node v15 image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest compute-node-v15
-
      - name: Pull rust image from ECR
        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust

@@ -641,9 +638,6 @@ jobs:
      - name: Push compute node v14 image to Docker Hub
        run: crane push compute-node-v14 neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}}

-      - name: Push compute node v15 image to Docker Hub
-        run: crane push compute-node-v15 neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}}
-
      - name: Push rust image to Docker Hub
        run: crane push rust neondatabase/rust:pinned

@@ -656,7 +650,6 @@ jobs:
          crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
          crane tag neondatabase/compute-node:${{needs.tag.outputs.build-tag}} latest
          crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
-          crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest

  calculate-deploy-targets:
    runs-on: [ self-hosted, Linux, k8s-runner ]
@@ -775,5 +768,5 @@ jobs:
      - name: Re-deploy proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
-          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
+          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
+          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -497,10 +497,8 @@ dependencies = [
 "chrono",
 "clap 3.2.16",
 "env_logger",
- "futures",
 "hyper",
 "log",
- "notify",
 "postgres",
 "regex",
 "serde",
@@ -542,11 +540,11 @@ dependencies = [
 "git-version",
 "nix",
 "once_cell",
- "pageserver_api",
+ "pageserver",
 "postgres",
 "regex",
 "reqwest",
- "safekeeper_api",
+ "safekeeper",
 "serde",
 "serde_with",
 "tar",
@@ -1074,15 +1072,6 @@ dependencies = [
 "winapi",
 ]

-[[package]]
-name = "fsevent-sys"
-version = "4.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "futures"
 version = "0.3.21"
@@ -1504,26 +1493,6 @@ dependencies = [
 "str_stack",
 ]

-[[package]]
-name = "inotify"
-version = "0.9.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff"
-dependencies = [
- "bitflags",
- "inotify-sys",
- "libc",
-]
-
-[[package]]
-name = "inotify-sys"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "instant"
 version = "0.1.12"
@@ -1583,26 +1552,6 @@ dependencies = [
 "simple_asn1",
 ]

-[[package]]
-name = "kqueue"
-version = "1.0.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d6112e8f37b59803ac47a42d14f1f3a59bbf72fc6857ffc5be455e28a691f8e"
-dependencies = [
- "kqueue-sys",
- "libc",
-]
-
-[[package]]
-name = "kqueue-sys"
-version = "1.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8367585489f01bc55dd27404dcf56b95e6da061a256a666ab23be9ba96a2e587"
-dependencies = [
- "bitflags",
- "libc",
-]
-
 [[package]]
 name = "kstring"
 version = "1.0.6"
@@ -1848,24 +1797,6 @@ dependencies = [
 "minimal-lexical",
 ]

-[[package]]
-name = "notify"
-version = "5.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed2c66da08abae1c024c01d635253e402341b4060a12e99b31c7594063bf490a"
-dependencies = [
- "bitflags",
- "crossbeam-channel",
- "filetime",
- "fsevent-sys",
- "inotify",
- "kqueue",
- "libc",
- "mio",
- "walkdir",
- "winapi",
-]
-
 [[package]]
 name = "num-bigint"
 version = "0.4.3"
@@ -2044,7 +1975,6 @@ dependencies = [
 "nix",
 "num-traits",
 "once_cell",
- "pageserver_api",
 "postgres",
 "postgres-protocol",
 "postgres-types",
@@ -2073,17 +2003,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "pageserver_api"
-version = "0.1.0"
-dependencies = [
- "const_format",
- "serde",
- "serde_with",
- "utils",
- "workspace_hack",
-]
-
 [[package]]
 name = "parking_lot"
 version = "0.11.2"
@@ -2972,7 +2891,6 @@ dependencies = [
 "postgres_ffi",
 "regex",
 "remote_storage",
- "safekeeper_api",
 "serde",
 "serde_json",
 "serde_with",
@@ -2988,17 +2906,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "safekeeper_api"
-version = "0.1.0"
-dependencies = [
- "const_format",
- "serde",
- "serde_with",
- "utils",
- "workspace_hack",
-]
-
 [[package]]
 name = "same-file"
 version = "1.0.6"
@@ -4235,7 +4142,6 @@ dependencies = [
 "bstr",
 "bytes",
 "chrono",
- "crossbeam-utils",
 "either",
 "fail",
 "hashbrown",
--- a/Dockerfile.compute-node-v15
+++ b/Dockerfile.compute-node-v15
@@ -5,7 +5,7 @@

 ARG TAG=pinned
 # apparently, ARGs don't get replaced in RUN commands in kaniko
-# ARG POSTGIS_VERSION=3.3.1
+# ARG POSTGIS_VERSION=3.3.0
 # ARG PLV8_VERSION=3.1.4
 # ARG PG_VERSION=v15

@@ -13,12 +13,9 @@ ARG TAG=pinned
 # Layer "build-deps"
 #
 FROM debian:bullseye-slim AS build-deps
-RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
-    echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
-    apt update
 RUN apt update &&  \
    apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \
-    libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libglib2.0-dev
+    libcurl4-openssl-dev libossp-uuid-dev

 #
 # Layer "pg-build"
@@ -45,11 +42,11 @@ RUN cd postgres && \
 FROM build-deps AS postgis-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN apt update && \
-    apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc
+    apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc wget

-RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
-    tar xvzf postgis-3.3.1.tar.gz && \
-    cd postgis-3.3.1 && \
+RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \
+    tar xvzf postgis-3.3.0.tar.gz && \
+    cd postgis-3.3.0 && \
    ./autogen.sh && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
    ./configure && \
@@ -67,13 +64,15 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
 # Build plv8
 #
 FROM build-deps AS plv8-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN apt update && \
-    apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5
+    apt install -y git curl wget make ninja-build build-essential libncurses5 python3-dev pkg-config libc++-dev libc++abi-dev libglib2.0-dev

 # https://github.com/plv8/plv8/issues/475
 # Debian bullseye provides binutils 2.35 when >= 2.38 is necessary
-RUN apt update && \
+RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \
+    echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \
+    apt update && \
    apt install -y --no-install-recommends -t testing binutils

 RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
@@ -85,46 +84,12 @@ RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
    rm -rf /plv8-* && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control

-#
-# Layer "h3-pg-build"
-# Build h3_pg
-#
-FROM build-deps AS h3-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-# packaged cmake is too old
-RUN apt update && \
-    apt install -y --no-install-recommends -t testing cmake
-
-RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
-    tar xvzf h3.tgz  && \
-    cd h3-4.0.1 && \
-    mkdir build && \
-    cd build && \
-    cmake .. -DCMAKE_BUILD_TYPE=Release && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    DESTDIR=/h3 make install && \
-    cp -R /h3/usr / && \
-    rm -rf build
-
-RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3-pg.tgz && \
-    tar xvzf h3-pg.tgz && \
-    cd h3-pg-4.0.1 && \
-    export PATH="/usr/local/pgsql/bin:$PATH" && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/h3.control
-
 #
 # Layer "neon-pg-ext-build"
 # compile neon extensions
 #
 FROM build-deps AS neon-pg-ext-build
 COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
-# plv8 still sometimes crashes during the creation
-# COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=h3-pg-build /h3/usr /
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -172,6 +137,8 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
    chmod 0750 /var/db/postgres/compute && \
    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig

+# TODO: Check if we can make the extension setup more modular versus a linear build
+# currently plv8-build copies the output /usr/local/pgsql from postgis-build, etc#
 COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
 COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl

--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -8,10 +8,8 @@ anyhow = "1.0"
 chrono = "0.4"
 clap = "3.0"
 env_logger = "0.9"
-futures = "0.3.13"
 hyper = { version = "0.14", features = ["full"] }
 log = { version = "0.4", features = ["std", "serde"] }
-notify = "5.0.0"
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
 regex = "1"
 serde = { version = "1.0", features = ["derive"] }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -178,6 +178,7 @@ impl ComputeNode {
            .args(&["--sync-safekeepers"])
            .env("PGDATA", &self.pgdata) // we cannot use -D in this mode
            .stdout(Stdio::piped())
+            .stderr(Stdio::piped())
            .spawn()
            .expect("postgres --sync-safekeepers failed to start");

@@ -190,10 +191,10 @@ impl ComputeNode {

        if !sync_output.status.success() {
            anyhow::bail!(
-                "postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}",
+                "postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}, stderr: {}",
                sync_output.status,
-                String::from_utf8(sync_output.stdout)
-                    .expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
+                String::from_utf8(sync_output.stdout).expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
+                String::from_utf8(sync_output.stderr).expect("postgres --sync-safekeepers exited, and stderr is not utf-8"),
            );
        }

@@ -257,7 +258,14 @@ impl ComputeNode {
            .spawn()
            .expect("cannot start postgres process");

-        wait_for_postgres(&mut pg, pgdata_path)?;
+        // Try default Postgres port if it is not provided
+        let port = self
+            .spec
+            .cluster
+            .settings
+            .find("port")
+            .unwrap_or_else(|| "5432".to_string());
+        wait_for_postgres(&mut pg, &port, pgdata_path)?;

        // If connection fails,
        // it may be the old node with `zenith_admin` superuser.
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -1,19 +1,18 @@
 use std::fmt::Write;
-use std::fs;
 use std::fs::File;
 use std::io::{BufRead, BufReader};
+use std::net::{SocketAddr, TcpStream};
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
 use std::process::Child;
-use std::time::{Duration, Instant};
+use std::str::FromStr;
+use std::{fs, thread, time};

 use anyhow::{bail, Result};
 use postgres::{Client, Transaction};
 use serde::Deserialize;

-use notify::{RecursiveMode, Watcher};
-
-const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
+const POSTGRES_WAIT_TIMEOUT: u64 = 60 * 1000; // milliseconds

 /// Rust representation of Postgres role info with only those fields
 /// that matter for us.
@@ -231,112 +230,52 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
    Ok(postgres_dbs)
 }

-/// Wait for Postgres to become ready to accept connections. It's ready to
-/// accept connections when the state-field in `pgdata/postmaster.pid` says
-/// 'ready'.
-pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
+/// Wait for Postgres to become ready to accept connections:
+/// - state should be `ready` in the `pgdata/postmaster.pid`
+/// - and we should be able to connect to 127.0.0.1:5432
+pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()> {
    let pid_path = pgdata.join("postmaster.pid");
+    let mut slept: u64 = 0; // ms
+    let pause = time::Duration::from_millis(100);

-    // PostgreSQL writes line "ready" to the postmaster.pid file, when it has
-    // completed initialization and is ready to accept connections. We want to
-    // react quickly and perform the rest of our initialization as soon as
-    // PostgreSQL starts accepting connections. Use 'notify' to be notified
-    // whenever the PID file is changed, and whenever it changes, read it to
-    // check if it's now "ready".
-    //
-    // You cannot actually watch a file before it exists, so we first watch the
-    // data directory, and once the postmaster.pid file appears, we switch to
-    // watch the file instead. We also wake up every 100 ms to poll, just in
-    // case we miss some events for some reason. Not strictly necessary, but
-    // better safe than sorry.
-    let (tx, rx) = std::sync::mpsc::channel();
-    let (mut watcher, rx): (Box<dyn Watcher>, _) = match notify::recommended_watcher(move |res| {
-        let _ = tx.send(res);
-    }) {
-        Ok(watcher) => (Box::new(watcher), rx),
-        Err(e) => {
-            match e.kind {
-                notify::ErrorKind::Io(os) if os.raw_os_error() == Some(38) => {
-                    // docker on m1 macs does not support recommended_watcher
-                    // but return "Function not implemented (os error 38)"
-                    // see https://github.com/notify-rs/notify/issues/423
-                    let (tx, rx) = std::sync::mpsc::channel();
+    let timeout = time::Duration::from_millis(10);
+    let addr = SocketAddr::from_str(&format!("127.0.0.1:{}", port)).unwrap();

-                    // let's poll it faster than what we check the results for (100ms)
-                    let config =
-                        notify::Config::default().with_poll_interval(Duration::from_millis(50));
-
-                    let watcher = notify::PollWatcher::new(
-                        move |res| {
-                            let _ = tx.send(res);
-                        },
-                        config,
-                    )?;
-
-                    (Box::new(watcher), rx)
-                }
-                _ => return Err(e.into()),
-            }
-        }
-    };
-
-    watcher.watch(pgdata, RecursiveMode::NonRecursive)?;
-
-    let started_at = Instant::now();
-    let mut postmaster_pid_seen = false;
    loop {
+        // Sleep POSTGRES_WAIT_TIMEOUT at max (a bit longer actually if consider a TCP timeout,
+        // but postgres starts listening almost immediately, even if it is not really
+        // ready to accept connections).
+        if slept >= POSTGRES_WAIT_TIMEOUT {
+            bail!("timed out while waiting for Postgres to start");
+        }
+
        if let Ok(Some(status)) = pg.try_wait() {
            // Postgres exited, that is not what we expected, bail out earlier.
            let code = status.code().unwrap_or(-1);
            bail!("Postgres exited unexpectedly with code {}", code);
        }

-        let res = rx.recv_timeout(Duration::from_millis(100));
-        log::debug!("woken up by notify: {res:?}");
-        // If there are multiple events in the channel already, we only need to be
-        // check once. Swallow the extra events before we go ahead to check the
-        // pid file.
-        while let Ok(res) = rx.try_recv() {
-            log::debug!("swallowing extra event: {res:?}");
-        }
-
        // Check that we can open pid file first.
        if let Ok(file) = File::open(&pid_path) {
-            if !postmaster_pid_seen {
-                log::debug!("postmaster.pid appeared");
-                watcher
-                    .unwatch(pgdata)
-                    .expect("Failed to remove pgdata dir watch");
-                watcher
-                    .watch(&pid_path, RecursiveMode::NonRecursive)
-                    .expect("Failed to add postmaster.pid file watch");
-                postmaster_pid_seen = true;
-            }
-
            let file = BufReader::new(file);
            let last_line = file.lines().last();

            // Pid file could be there and we could read it, but it could be empty, for example.
            if let Some(Ok(line)) = last_line {
                let status = line.trim();
-                log::debug!("last line of postmaster.pid: {status:?}");
+                let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();

                // Now Postgres is ready to accept connections
-                if status == "ready" {
+                if status == "ready" && can_connect {
                    break;
                }
            }
        }

-        // Give up after POSTGRES_WAIT_TIMEOUT.
-        let duration = started_at.elapsed();
-        if duration >= POSTGRES_WAIT_TIMEOUT {
-            bail!("timed out while waiting for Postgres to start");
-        }
+        thread::sleep(pause);
+        slept += 100;
    }

-    log::info!("PostgreSQL is now running, continuing to configure it");
-
    Ok(())
 }

--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -19,9 +19,7 @@ thiserror = "1"
 nix = "0.23"
 reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }

-# Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
-# instead, so that recompile times are better.
-pageserver_api = { path = "../libs/pageserver_api" }
-safekeeper_api = { path = "../libs/safekeeper_api" }
+pageserver = { path = "../pageserver" }
+safekeeper = { path = "../safekeeper" }
 utils = { path = "../libs/utils" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -12,12 +12,12 @@ use control_plane::local_env::{EtcdBroker, LocalEnv};
 use control_plane::safekeeper::SafekeeperNode;
 use control_plane::storage::PageServerNode;
 use control_plane::{etcd, local_env};
-use pageserver_api::models::TimelineInfo;
-use pageserver_api::{
+use pageserver::config::defaults::{
    DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR,
    DEFAULT_PG_LISTEN_ADDR as DEFAULT_PAGESERVER_PG_ADDR,
 };
-use safekeeper_api::{
+use pageserver::http::models::TimelineInfo;
+use safekeeper::defaults::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
 };
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -284,7 +284,7 @@ impl PostgresNode {
        conf.append("max_wal_senders", "10");
        // wal_log_hints is mandatory when running against pageserver (see gh issue#192)
        // TODO: is it possible to check wal_log_hints at pageserver side via XLOG_PARAMETER_CHANGE?
-        conf.append("wal_log_hints", "on");
+        conf.append("wal_log_hints", "off");
        conf.append("max_replication_slots", "10");
        conf.append("hot_standby", "on");
        conf.append("shared_buffers", "1MB");
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -12,7 +12,7 @@ use nix::unistd::Pid;
 use postgres::Config;
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
-use safekeeper_api::models::TimelineCreateRequest;
+use safekeeper::http::models::TimelineCreateRequest;
 use thiserror::Error;
 use utils::{
    connstring::connection_address,
--- a/control_plane/src/storage.rs
+++ b/control_plane/src/storage.rs
@@ -11,7 +11,7 @@ use anyhow::{bail, Context};
 use nix::errno::Errno;
 use nix::sys::signal::{kill, Signal};
 use nix::unistd::Pid;
-use pageserver_api::models::{
+use pageserver::http::models::{
    TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
 };
 use postgres::{Config, NoTls};
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -3,7 +3,7 @@
 //! Otherwise, we might not see all metrics registered via
 //! a default registry.
 use once_cell::sync::Lazy;
-use prometheus::core::{AtomicU64, Collector, GenericGauge, GenericGaugeVec};
+use prometheus::core::{AtomicU64, GenericGauge, GenericGaugeVec};
 pub use prometheus::opts;
 pub use prometheus::register;
 pub use prometheus::{core, default_registry, proto};
@@ -17,7 +17,6 @@ pub use prometheus::{register_int_counter_vec, IntCounterVec};
 pub use prometheus::{register_int_gauge, IntGauge};
 pub use prometheus::{register_int_gauge_vec, IntGaugeVec};
 pub use prometheus::{Encoder, TextEncoder};
-use prometheus::{Registry, Result};

 mod wrappers;
 pub use wrappers::{CountedReader, CountedWriter};
@@ -33,27 +32,13 @@ macro_rules! register_uint_gauge_vec {
    }};
 }

-/// Special internal registry, to collect metrics independently from the default registry.
-/// Was introduced to fix deadlock with lazy registration of metrics in the default registry.
-static INTERNAL_REGISTRY: Lazy<Registry> = Lazy::new(Registry::new);
-
-/// Register a collector in the internal registry. MUST be called before the first call to `gather()`.
-/// Otherwise, we can have a deadlock in the `gather()` call, trying to register a new collector
-/// while holding the lock.
-pub fn register_internal(c: Box<dyn Collector>) -> Result<()> {
-    INTERNAL_REGISTRY.register(c)
-}
-
 /// Gathers all Prometheus metrics and records the I/O stats just before that.
 ///
 /// Metrics gathering is a relatively simple and standalone operation, so
 /// it might be fine to do it this way to keep things simple.
 pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
    update_rusage_metrics();
-    let mut mfs = prometheus::gather();
-    let mut internal_mfs = INTERNAL_REGISTRY.gather();
-    mfs.append(&mut internal_mfs);
-    mfs
+    prometheus::gather()
 }

 static DISK_IO_BYTES: Lazy<IntGaugeVec> = Lazy::new(|| {
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -1,12 +0,0 @@
-[package]
-name = "pageserver_api"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-serde = { version = "1.0", features = ["derive"] }
-serde_with = "1.12.0"
-const_format = "0.2.21"
-
-utils = { path = "../utils" }
-workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/pageserver_api/src/lib.rs
+++ b/libs/pageserver_api/src/lib.rs
@@ -1,9 +0,0 @@
-use const_format::formatcp;
-
-/// Public API types
-pub mod models;
-
-pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
-pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
-pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
-pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
--- a/libs/safekeeper_api/Cargo.toml
+++ b/libs/safekeeper_api/Cargo.toml
@@ -1,12 +0,0 @@
-[package]
-name = "safekeeper_api"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-serde = { version = "1.0", features = ["derive"] }
-serde_with = "1.12.0"
-const_format = "0.2.21"
-
-utils = { path = "../utils" }
-workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/safekeeper_api/src/lib.rs
+++ b/libs/safekeeper_api/src/lib.rs
@@ -1,10 +0,0 @@
-use const_format::formatcp;
-
-/// Public API types
-pub mod models;
-
-pub const DEFAULT_PG_LISTEN_PORT: u16 = 5454;
-pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
-
-pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 7676;
-pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -9,7 +9,6 @@ use once_cell::sync::Lazy;
 use routerify::ext::RequestExt;
 use routerify::RequestInfo;
 use routerify::{Middleware, Router, RouterBuilder, RouterService};
-use tokio::task::JoinError;
 use tracing::info;

 use std::future::Future;
@@ -36,13 +35,7 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
    let mut buffer = vec![];
    let encoder = TextEncoder::new();

-    let metrics = tokio::task::spawn_blocking(move || {
-        // Currently we take a lot of mutexes while collecting metrics, so it's
-        // better to spawn a blocking task to avoid blocking the event loop.
-        metrics::gather()
-    })
-    .await
-    .map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;
+    let metrics = metrics::gather();
    encoder.encode(&metrics, &mut buffer).unwrap();

    let response = Response::builder()
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -58,7 +58,6 @@ rstar = "0.9.3"
 num-traits = "0.2.15"
 amplify_num = "0.4.1"

-pageserver_api = { path = "../libs/pageserver_api" }
 postgres_ffi = { path = "../libs/postgres_ffi" }
 etcd_broker = { path = "../libs/etcd_broker" }
 metrics = { path = "../libs/metrics" }
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -30,10 +30,10 @@ pub mod defaults {
    use crate::tenant_config::defaults::*;
    use const_format::formatcp;

-    pub use pageserver_api::{
-        DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_HTTP_LISTEN_PORT, DEFAULT_PG_LISTEN_ADDR,
-        DEFAULT_PG_LISTEN_PORT,
-    };
+    pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
+    pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
+    pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
+    pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");

    pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "60 s";
    pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
--- a/pageserver/src/http/mod.rs
+++ b/pageserver/src/http/mod.rs
@@ -1,4 +1,3 @@
+pub mod models;
 pub mod routes;
 pub use routes::make_router;
-
-pub use pageserver_api::models;
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -7,17 +7,7 @@ use utils::{
    lsn::Lsn,
 };

-/// A state of a tenant in pageserver's memory.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-pub enum TenantState {
-    /// Tenant is fully operational, its background jobs might be running or not.
-    Active { background_jobs_running: bool },
-    /// A tenant is recognized by pageserver, but not yet ready to operate:
-    /// e.g. not present locally and being downloaded or being read into memory from the file system.
-    Paused,
-    /// A tenant is recognized by the pageserver, but no longer used for any operations, as failed to get activated.
-    Broken,
-}
+use crate::tenant::TenantState;

 #[serde_as]
 #[derive(Serialize, Deserialize)]
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -207,62 +207,6 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

- 
-  /v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp:
-    parameters:
-      - name: tenant_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-      - name: timeline_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-    get:
-      description: Get LSN by a timestamp
-      parameters:
-        - name: timestamp
-          in: query
-          required: true
-          schema:
-            type: string
-            format: date-time
-          description: A timestamp to get the LSN
-      responses:
-        "200":
-          description: OK
-          content:
-            application/json:
-              schema:
-                type: string
-        "400":
-          description: Error when no tenant id found in path, no timeline id or invalid timestamp
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
  /v1/tenant/{tenant_id}/attach:
    parameters:
      - name: tenant_id
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -12,7 +12,6 @@ use super::models::{
    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
    TimelineCreateRequest,
 };
-use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::storage_sync;
 use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
 use crate::tenant::{TenantState, Timeline};
@@ -266,23 +265,6 @@ fn query_param_present(request: &Request<Body>, param: &str) -> bool {
        .unwrap_or(false)
 }

-fn get_query_param(request: &Request<Body>, param_name: &str) -> Result<String, ApiError> {
-    request.uri().query().map_or(
-        Err(ApiError::BadRequest(anyhow!("empty query in request"))),
-        |v| {
-            url::form_urlencoded::parse(v.as_bytes())
-                .into_owned()
-                .find(|(k, _)| k == param_name)
-                .map_or(
-                    Err(ApiError::BadRequest(anyhow!(
-                        "no {param_name} specified in query parameters"
-                    ))),
-                    |(_, v)| Ok(v),
-                )
-        },
-    )
-}
-
 async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -347,33 +329,6 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
    }
 }

-async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
-
-    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    let timestamp_raw = get_query_param(&request, "timestamp")?;
-    let timestamp = humantime::parse_rfc3339(timestamp_raw.as_str())
-        .with_context(|| format!("Invalid time: {:?}", timestamp_raw))
-        .map_err(ApiError::BadRequest)?;
-    let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
-
-    let timeline = tenant_mgr::get_tenant(tenant_id, true)
-        .and_then(|tenant| tenant.get_timeline(timeline_id))
-        .with_context(|| format!("No timeline {timeline_id} in repository for tenant {tenant_id}"))
-        .map_err(ApiError::NotFound)?;
-    let result = match timeline
-        .find_lsn_for_timestamp(timestamp_pg)
-        .map_err(ApiError::InternalServerError)?
-    {
-        LsnForTimestamp::Present(lsn) => format!("{}", lsn),
-        LsnForTimestamp::Future(_lsn) => "future".into(),
-        LsnForTimestamp::Past(_lsn) => "past".into(),
-        LsnForTimestamp::NoData(_lsn) => "nodata".into(),
-    };
-    json_response(StatusCode::OK, result)
-}
-
 // TODO makes sense to provide tenant config right away the same way as it handled in tenant_create
 async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
@@ -382,16 +337,9 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,
    info!("Handling tenant attach {tenant_id}");

    tokio::task::spawn_blocking(move || match tenant_mgr::get_tenant(tenant_id, false) {
-        Ok(tenant) => {
-            if tenant.list_timelines().is_empty() {
-                info!("Attaching to tenant {tenant_id} with zero timelines");
-                Ok(())
-            } else {
-                Err(ApiError::Conflict(
-                    "Tenant is already present locally".to_owned(),
-                ))
-            }
-        }
+        Ok(_) => Err(ApiError::Conflict(
+            "Tenant is already present locally".to_owned(),
+        )),
        Err(_) => Ok(()),
    })
    .await
@@ -862,7 +810,9 @@ async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Bod
        .get_timeline(timeline_id)
        .with_context(|| format!("No timeline {timeline_id} in repository for tenant {tenant_id}"))
        .map_err(ApiError::NotFound)?;
-    timeline.compact().map_err(ApiError::InternalServerError)?;
+    timeline
+        .reconstruct()
+        .map_err(ApiError::InternalServerError)?;

    json_response(StatusCode::OK, ())
 }
@@ -953,10 +903,6 @@ pub fn make_router(
            "/v1/tenant/:tenant_id/timeline/:timeline_id",
            timeline_detail_handler,
        )
-        .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/get_lsn_by_timestamp",
-            get_lsn_by_timestamp_handler,
-        )
        .put(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc",
            testing_api!("run timeline GC", timeline_gc_handler),
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -12,6 +12,7 @@
 use anyhow::{bail, ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use futures::{Stream, StreamExt};
+use regex::Regex;
 use std::io;
 use std::net::TcpListener;
 use std::str;
@@ -34,6 +35,7 @@ use crate::basebackup;
 use crate::config::{PageServerConf, ProfilingConfig};
 use crate::import_datadir::{import_basebackup_from_tar, import_wal_from_tar};
 use crate::metrics::{LIVE_CONNECTIONS_COUNT, SMGR_QUERY_TIME};
+use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::profiling::profpoint_start;
 use crate::reltag::RelTag;
 use crate::task_mgr;
@@ -43,6 +45,7 @@ use crate::tenant_mgr;
 use crate::CheckpointConfig;

 use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
+use postgres_ffi::to_pg_timestamp;
 use postgres_ffi::BLCKSZ;

 // Wrapped in libpq CopyData
@@ -1059,6 +1062,33 @@ impl postgres_backend_async::Handler for PageServerHandler {
                Some(tenant.get_pitr_interval().as_secs().to_string().as_bytes()),
            ]))?
            .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
+        } else if query_string.starts_with("get_lsn_by_timestamp ") {
+            // Locate LSN of last transaction with timestamp less or equal than sppecified
+            // TODO lazy static
+            let re = Regex::new(r"^get_lsn_by_timestamp ([[:xdigit:]]+) ([[:xdigit:]]+) '(.*)'$")
+                .unwrap();
+            let caps = re
+                .captures(query_string)
+                .with_context(|| format!("invalid get_lsn_by_timestamp: '{}'", query_string))?;
+            let tenant_id = TenantId::from_str(caps.get(1).unwrap().as_str())?;
+            let timeline_id = TimelineId::from_str(caps.get(2).unwrap().as_str())?;
+            let timestamp = humantime::parse_rfc3339(caps.get(3).unwrap().as_str())?;
+            let timestamp_pg = to_pg_timestamp(timestamp);
+
+            self.check_permission(Some(tenant_id))?;
+
+            let timeline = get_local_timeline(tenant_id, timeline_id)?;
+            pgb.write_message(&BeMessage::RowDescription(&[RowDescriptor::text_col(
+                b"lsn",
+            )]))?;
+            let result = match timeline.find_lsn_for_timestamp(timestamp_pg)? {
+                LsnForTimestamp::Present(lsn) => format!("{}", lsn),
+                LsnForTimestamp::Future(_lsn) => "future".into(),
+                LsnForTimestamp::Past(_lsn) => "past".into(),
+                LsnForTimestamp::NoData(_lsn) => "nodata".into(),
+            };
+            pgb.write_message(&BeMessage::DataRow(&[Some(result.as_bytes())]))?;
+            pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else {
            bail!("unknown command");
        }
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -45,7 +45,6 @@ use crate::tenant_config::TenantConfOpt;
 use crate::virtual_file::VirtualFile;
 use crate::walredo::WalRedoManager;
 use crate::{CheckpointConfig, TEMP_FILE_SUFFIX};
-pub use pageserver_api::models::TenantState;

 use toml_edit;
 use utils::{
@@ -119,6 +118,18 @@ pub struct Tenant {
    upload_layers: bool,
 }

+/// A state of a tenant in pageserver's memory.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub enum TenantState {
+    /// Tenant is fully operational, its background jobs might be running or not.
+    Active { background_jobs_running: bool },
+    /// A tenant is recognized by pageserver, but not yet ready to operate:
+    /// e.g. not present locally and being downloaded or being read into memory from the file system.
+    Paused,
+    /// A tenant is recognized by the pageserver, but no longer used for any operations, as failed to get activated.
+    Broken,
+}
+
 /// A repository corresponds to one .neon directory. One repository holds multiple
 /// timelines, forked off from the same initial call to 'initdb'.
 impl Tenant {
@@ -299,7 +310,7 @@ impl Tenant {

        for (timeline_id, timeline) in &timelines_to_compact {
            let _entered = info_span!("compact_timeline", timeline = %timeline_id).entered();
-            timeline.compact()?;
+            timeline.reconstruct()?;
        }

        Ok(())
@@ -1730,7 +1741,7 @@ mod tests {
        drop(writer);

        tline.checkpoint(CheckpointConfig::Forced)?;
-        tline.compact()?;
+        tline.reconstruct()?;

        let writer = tline.writer();
        writer.put(*TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?;
@@ -1738,7 +1749,7 @@ mod tests {
        drop(writer);

        tline.checkpoint(CheckpointConfig::Forced)?;
-        tline.compact()?;
+        tline.reconstruct()?;

        let writer = tline.writer();
        writer.put(*TEST_KEY, Lsn(0x30), &Value::Image(TEST_IMG("foo at 0x30")))?;
@@ -1746,7 +1757,7 @@ mod tests {
        drop(writer);

        tline.checkpoint(CheckpointConfig::Forced)?;
-        tline.compact()?;
+        tline.reconstruct()?;

        let writer = tline.writer();
        writer.put(*TEST_KEY, Lsn(0x40), &Value::Image(TEST_IMG("foo at 0x40")))?;
@@ -1754,7 +1765,7 @@ mod tests {
        drop(writer);

        tline.checkpoint(CheckpointConfig::Forced)?;
-        tline.compact()?;
+        tline.reconstruct()?;

        assert_eq!(tline.get(*TEST_KEY, Lsn(0x10))?, TEST_IMG("foo at 0x10"));
        assert_eq!(tline.get(*TEST_KEY, Lsn(0x1f))?, TEST_IMG("foo at 0x10"));
@@ -1802,7 +1813,7 @@ mod tests {

            tline.update_gc_info(Vec::new(), cutoff, Duration::ZERO)?;
            tline.checkpoint(CheckpointConfig::Forced)?;
-            tline.compact()?;
+            tline.reconstruct()?;
            tline.gc()?;
        }

@@ -1872,7 +1883,7 @@ mod tests {
            let cutoff = tline.get_last_record_lsn();
            tline.update_gc_info(Vec::new(), cutoff, Duration::ZERO)?;
            tline.checkpoint(CheckpointConfig::Forced)?;
-            tline.compact()?;
+            tline.reconstruct()?;
            tline.gc()?;
        }

@@ -1951,7 +1962,7 @@ mod tests {
            let cutoff = tline.get_last_record_lsn();
            tline.update_gc_info(Vec::new(), cutoff, Duration::ZERO)?;
            tline.checkpoint(CheckpointConfig::Forced)?;
-            tline.compact()?;
+            tline.reconstruct()?;
            tline.gc()?;
        }

--- a/pageserver/src/tenant/delta_layer.rs
+++ b/pageserver/src/tenant/delta_layer.rs
@@ -95,6 +95,9 @@ impl From<&DeltaLayer> for Summary {
 // Flag indicating that this version initialize the page
 const WILL_INIT: u64 = 1;

+// Flag indicating page image
+const IS_IMAGE: u64 = 2;
+
 ///
 /// Struct representing reference to BLOB in layers. Reference contains BLOB
 /// offset, and for WAL records it also contains `will_init` flag. The flag
@@ -109,15 +112,22 @@ impl BlobRef {
        (self.0 & WILL_INIT) != 0
    }

-    pub fn pos(&self) -> u64 {
-        self.0 >> 1
+    pub fn is_image(&self) -> bool {
+        (self.0 & IS_IMAGE) != 0
    }

-    pub fn new(pos: u64, will_init: bool) -> BlobRef {
-        let mut blob_ref = pos << 1;
+    pub fn pos(&self) -> u64 {
+        self.0 >> 2
+    }
+
+    pub fn new(pos: u64, will_init: bool, is_image: bool) -> BlobRef {
+        let mut blob_ref = pos << 2;
        if will_init {
            blob_ref |= WILL_INIT;
        }
+        if is_image {
+            blob_ref |= IS_IMAGE;
+        }
        BlobRef(blob_ref)
    }
 }
@@ -314,13 +324,13 @@ impl Layer for DeltaLayer {
        }
    }

-    fn key_iter<'a>(&'a self) -> Box<dyn Iterator<Item = (Key, Lsn, u64)> + 'a> {
+    fn key_iter<'a>(&'a self, skip_images: bool) -> Box<dyn Iterator<Item = (Key, Lsn, u64)> + 'a> {
        let inner = match self.load() {
            Ok(inner) => inner,
            Err(e) => panic!("Failed to load a delta layer: {e:?}"),
        };

-        match DeltaKeyIter::new(inner) {
+        match DeltaKeyIter::new(inner, skip_images) {
            Ok(iter) => Box::new(iter),
            Err(e) => panic!("Layer index is corrupted: {e:?}"),
        }
@@ -414,6 +424,30 @@ impl Layer for DeltaLayer {

        Ok(())
    }
+
+    fn contains(&self, key: &Key) -> Result<bool> {
+        // Open the file and lock the metadata in memory
+        let inner = self.load()?;
+
+        // Scan the page versions backwards, starting from `lsn`.
+        let file = inner.file.as_ref().unwrap();
+        let reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
+            inner.index_start_blk,
+            inner.index_root_blk,
+            file,
+        );
+        let search_key = DeltaKey::from_key_lsn(key, Lsn(0));
+        let mut found = false;
+        reader.visit(
+            &search_key.0,
+            VisitDirection::Forwards,
+            |delta_key, _val| {
+                found = DeltaKey::extract_key_from_buf(delta_key) == *key;
+                false
+            },
+        )?;
+        Ok(found)
+    }
 }

 impl DeltaLayer {
@@ -671,7 +705,13 @@ impl DeltaLayerWriter {
    /// The values must be appended in key, lsn order.
    ///
    pub fn put_value(&mut self, key: Key, lsn: Lsn, val: Value) -> Result<()> {
-        self.put_value_bytes(key, lsn, &Value::ser(&val)?, val.will_init())
+        self.put_value_bytes(
+            key,
+            lsn,
+            &Value::ser(&val)?,
+            val.will_init(),
+            val.is_image(),
+        )
    }

    pub fn put_value_bytes(
@@ -680,12 +720,12 @@ impl DeltaLayerWriter {
        lsn: Lsn,
        val: &[u8],
        will_init: bool,
+        is_image: bool,
    ) -> Result<()> {
        assert!(self.lsn_range.start <= lsn);

        let off = self.blob_writer.write_blob(val)?;
-
-        let blob_ref = BlobRef::new(off, will_init);
+        let blob_ref = BlobRef::new(off, will_init, is_image);

        let delta_key = DeltaKey::from_key_lsn(&key, lsn);
        self.tree.append(&delta_key.0, blob_ref.0)?;
@@ -874,7 +914,7 @@ impl Iterator for DeltaKeyIter {
 }

 impl<'a> DeltaKeyIter {
-    fn new(inner: RwLockReadGuard<'a, DeltaLayerInner>) -> Result<Self> {
+    fn new(inner: RwLockReadGuard<'a, DeltaLayerInner>, skip_images: bool) -> Result<Self> {
        let file = inner.file.as_ref().unwrap();
        let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
            inner.index_start_blk,
@@ -883,29 +923,33 @@ impl<'a> DeltaKeyIter {
        );

        let mut all_keys: Vec<(DeltaKey, u64)> = Vec::new();
+        let mut last_pos = 0u64;
+        let mut last_delta: Option<DeltaKey> = None;
        tree_reader.visit(
            &[0u8; DELTA_KEY_SIZE],
            VisitDirection::Forwards,
            |key, value| {
-                let delta_key = DeltaKey::from_slice(key);
-                let pos = BlobRef(value).pos();
-                if let Some(last) = all_keys.last_mut() {
-                    if last.0.key() == delta_key.key() {
-                        return true;
-                    } else {
-                        // subtract offset of new key BLOB and first blob of this key
-                        // to get total size if values associated with this key
-                        let first_pos = last.1;
-                        last.1 = pos - first_pos;
+                let blob_ref = BlobRef(value);
+                if !blob_ref.is_image() || !skip_images {
+                    let next_delta = DeltaKey::from_slice(key);
+                    let pos = blob_ref.pos();
+                    if let Some(prev_delta) = last_delta.take() {
+                        if prev_delta.key() == next_delta.key() {
+                            last_delta = Some(next_delta);
+                            return true;
+                        }
+                        all_keys.push((prev_delta, pos - last_pos));
                    }
+                    last_delta = Some(next_delta);
+                    last_pos = pos;
                }
-                all_keys.push((delta_key, pos));
                true
            },
        )?;
-        if let Some(last) = all_keys.last_mut() {
+        if let Some(prev_delta) = last_delta.take() {
            // Last key occupies all space till end of layer
-            last.1 = std::fs::metadata(&file.file.path)?.len() - last.1;
+            let file_size = std::fs::metadata(&file.file.path)?.len();
+            all_keys.push((prev_delta, file_size - last_pos));
        }
        let iter = DeltaKeyIter {
            all_keys,
--- a/pageserver/src/tenant/image_layer.rs
+++ b/pageserver/src/tenant/image_layer.rs
@@ -223,6 +223,10 @@ impl Layer for ImageLayer {

        Ok(())
    }
+
+    fn contains(&self, key: &Key) -> Result<bool> {
+        Ok(self.get_key_range().contains(key))
+    }
 }

 impl ImageLayer {
--- a/pageserver/src/tenant/inmemory_layer.rs
+++ b/pageserver/src/tenant/inmemory_layer.rs
@@ -235,6 +235,11 @@ impl Layer for InMemoryLayer {

        Ok(())
    }
+
+    fn contains(&self, key: &Key) -> Result<bool> {
+        let inner = self.inner.read().unwrap();
+        Ok(inner.index.get(key).is_some())
+    }
 }

 impl InMemoryLayer {
@@ -358,8 +363,14 @@ impl InMemoryLayer {
            // Write all page versions
            for (lsn, pos) in vec_map.as_slice() {
                cursor.read_blob_into_buf(*pos, &mut buf)?;
-                let will_init = Value::des(&buf)?.will_init();
-                delta_layer_writer.put_value_bytes(key, *lsn, &buf, will_init)?;
+                let value = Value::des(&buf)?;
+                delta_layer_writer.put_value_bytes(
+                    key,
+                    *lsn,
+                    &buf,
+                    value.will_init(),
+                    value.is_image(),
+                )?;
            }
        }

--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -62,9 +62,8 @@ pub struct LayerMap {
    historic_layers: BTreeMap<BTreeKey, Arc<dyn Layer>>,
    layers_seqno: usize,

-    /// L0 layers have key range Key::MIN..Key::MAX, and locating them using R-Tree search is very inefficient.
-    /// So L0 layers are held in l0_delta_layers vector, in addition to the R-tree.
-    l0_delta_layers: Vec<Arc<dyn Layer>>,
+    /// Latest stored delta layer
+    latest_delta_layer: Option<Arc<dyn Layer>>,
 }

 /// Return value of LayerMap::search
@@ -185,7 +184,19 @@ impl LayerMap {
    ///
    pub fn insert_historic(&mut self, layer: Arc<dyn Layer>) {
        if layer.get_key_range() == (Key::MIN..Key::MAX) {
-            self.l0_delta_layers.push(layer.clone());
+            self.latest_delta_layer = Some(layer.clone());
+        } else if !layer.is_incremental() {
+            // If latest delta layer is followed by image layers
+            // then reset it, preventing generation of partial image layer
+            if let Some(latest_delta) = &self.latest_delta_layer {
+                // May be it is more correct to use contains() rather than inrestects
+                // but one delta layer can be covered by several image layers.
+                let kr1 = layer.get_key_range();
+                let kr2 = latest_delta.get_key_range();
+                if range_overlaps(&kr1, &kr2) {
+                    self.latest_delta_layer = None;
+                }
+            }
        }
        self.historic_layers.insert(
            BTreeKey {
@@ -205,16 +216,12 @@ impl LayerMap {
    ///
    pub fn remove_historic(&mut self, layer: Arc<dyn Layer>) {
        if layer.get_key_range() == (Key::MIN..Key::MAX) {
-            let len_before = self.l0_delta_layers.len();
-
-            // FIXME: ptr_eq might fail to return true for 'dyn'
-            // references.  Clippy complains about this. In practice it
-            // seems to work, the assertion below would be triggered
-            // otherwise but this ought to be fixed.
-            #[allow(clippy::vtable_address_comparisons)]
-            self.l0_delta_layers
-                .retain(|other| !Arc::ptr_eq(other, &layer));
-            assert_eq!(self.l0_delta_layers.len(), len_before - 1);
+            if let Some(latest_layer) = &self.latest_delta_layer {
+                #[allow(clippy::vtable_address_comparisons)]
+                if Arc::ptr_eq(&layer, latest_layer) {
+                    self.latest_delta_layer = None;
+                }
+            }
        }
        let len_before = self.historic_layers.len();
        #[allow(clippy::vtable_address_comparisons)]
@@ -384,8 +391,8 @@ impl LayerMap {
    }

    /// Return all L0 delta layers
-    pub fn get_level0_deltas(&self) -> Result<Vec<Arc<dyn Layer>>> {
-        Ok(self.l0_delta_layers.clone())
+    pub fn get_latest_delta_layer(&mut self) -> Option<Arc<dyn Layer>> {
+        self.latest_delta_layer.take()
    }

    /// debugging function to print out the contents of the layer map
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -139,9 +139,9 @@ pub trait Layer: Send + Sync {
    /// Iterate through all keys and values stored in the layer
    fn iter(&self) -> Box<dyn Iterator<Item = Result<(Key, Lsn, Value)>> + '_>;

-    /// Iterate through all keys stored in the layer. Returns key, lsn and value size
-    /// It is used only for compaction and so is currently implemented only for DeltaLayer
-    fn key_iter(&self) -> Box<dyn Iterator<Item = (Key, Lsn, u64)> + '_> {
+    /// Iterate through all keys stored in the layer. Returns key, lsn and value size.
+    /// It is used only for reconstruction and so is currently implemented only for DeltaLayer
+    fn key_iter(&self, _skip_images: bool) -> Box<dyn Iterator<Item = (Key, Lsn, u64)> + '_> {
        panic!("Not implemented")
    }

@@ -150,4 +150,7 @@ pub trait Layer: Send + Sync {

    /// Dump summary of the contents of the layer to stdout
    fn dump(&self, verbose: bool) -> Result<()>;
+
+    // Check if ayer contains particular key
+    fn contains(&self, key: &Key) -> Result<bool>;
 }
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -3,7 +3,6 @@
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use bytes::Bytes;
 use fail::fail_point;
-use itertools::Itertools;
 use once_cell::sync::OnceCell;
 use tokio::task::spawn_blocking;
 use tracing::*;
@@ -119,7 +118,7 @@ pub struct Timeline {

    /// Layer removal lock.
    /// A lock to ensure that no layer of the timeline is removed concurrently by other tasks.
-    /// This lock is acquired in [`Timeline::gc`], [`Timeline::compact`],
+    /// This lock is acquired in [`Timeline::gc`], [`Timeline::reconstruct`],
    /// and [`Tenant::delete_timeline`].
    layer_removal_cs: Mutex<()>,

@@ -469,7 +468,7 @@ impl Timeline {
            CheckpointConfig::Forced => {
                self.freeze_inmem_layer(false);
                self.flush_frozen_layers(true)?;
-                self.compact()
+                self.reconstruct()
            }
        }
    }
@@ -510,13 +509,6 @@ impl Timeline {
            .unwrap_or(self.conf.default_tenant_conf.compaction_target_size)
    }

-    fn get_compaction_threshold(&self) -> usize {
-        let tenant_conf = self.tenant_conf.read().unwrap();
-        tenant_conf
-            .compaction_threshold
-            .unwrap_or(self.conf.default_tenant_conf.compaction_threshold)
-    }
-
    fn get_image_creation_threshold(&self) -> usize {
        let tenant_conf = self.tenant_conf.read().unwrap();
        tenant_conf
@@ -597,7 +589,7 @@ impl Timeline {
            last_received_wal: Mutex::new(None),
            rel_size_cache: RwLock::new(HashMap::new()),
        };
-        result.repartition_threshold = result.get_checkpoint_distance() / 10;
+        result.repartition_threshold = result.get_checkpoint_distance() * 3;
        result
    }

@@ -731,7 +723,7 @@ impl Timeline {
    pub fn layer_removal_guard(&self) -> anyhow::Result<MutexGuard<()>> {
        self.layer_removal_cs
            .try_lock()
-            .map_err(|e| anyhow!("cannot lock compaction critical section {e}"))
+            .map_err(|e| anyhow!("cannot lock reconstruction critical section {e}"))
    }

    /// Retrieve current logical size of the timeline.
@@ -1333,17 +1325,17 @@ impl Timeline {
        Ok(new_delta_path)
    }

-    pub fn compact(&self) -> anyhow::Result<()> {
+    pub fn reconstruct(&self) -> anyhow::Result<()> {
        let last_record_lsn = self.get_last_record_lsn();

        // Last record Lsn could be zero in case the timelie was just created
        if !last_record_lsn.is_valid() {
-            warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
+            warn!("Skipping reconstruction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
            return Ok(());
        }

        //
-        // High level strategy for compaction / image creation:
+        // High level strategy for reconstruction / image creation:
        //
        // 1. First, calculate the desired "partitioning" of the
        // currently in-use key space. The goal is to partition the
@@ -1367,13 +1359,13 @@ impl Timeline {
        // total in the delta file. Or perhaps: if creating an image
        // file would allow to delete some older files.
        //
-        // 3. After that, we compact all level0 delta files if there
-        // are too many of them.  While compacting, we also garbage
+        // 3. After that, we reconstruct all level0 delta files if there
+        // are too many of them.  While reconstructing, we also garbage
        // collect any page versions that are no longer needed because
        // of the new image layers we created in step 2.
        //
        // TODO: This high level strategy hasn't been implemented yet.
-        // Below are functions compact_level0() and create_image_layers()
+        // Below are functions reconstruct_level0() and create_image_layers()
        // but they are a bit ad hoc and don't quite work like it's explained
        // above. Rewrite it.
        let _layer_removal_cs = self.layer_removal_cs.lock().unwrap();
@@ -1400,21 +1392,21 @@ impl Timeline {
                        None,
                    );
                }
-
-                // 3. Compact
-                let timer = self.metrics.compact_time_histo.start_timer();
-                self.compact_level0(target_file_size)?;
-                timer.stop_and_record();
            }
            Err(err) => {
                // no partitioning? This is normal, if the timeline was just created
                // as an empty timeline. Also in unit tests, when we use the timeline
                // as a simple key-value store, ignoring the datadir layout. Log the
                // error but continue.
-                error!("could not compact, repartitioning keyspace failed: {err:?}");
+                error!("could not reconstruct, repartitioning keyspace failed: {err:?}");
            }
        };

+        // 3. Reconstruct
+        let timer = self.metrics.reconstruct_time_histo.start_timer();
+        self.reconstruct_level0(target_file_size)?;
+        timer.stop_and_record();
+
        Ok(())
    }

@@ -1514,7 +1506,7 @@ impl Timeline {
        // We must also fsync the timeline dir to ensure the directory entries for
        // new layer files are durable
        //
-        // Compaction creates multiple image layers. It would be better to create them all
+        // Reconstruction creates multiple image layers. It would be better to create them all
        // and fsync them all in parallel.
        let mut all_paths = Vec::from_iter(layer_paths_to_upload.clone());
        all_paths.push(self.conf.timeline_path(&self.timeline_id, &self.tenant_id));
@@ -1534,230 +1526,46 @@ impl Timeline {
    }

    ///
-    /// Collect a bunch of Level 0 layer files, and compact and reshuffle them as
+    /// Collect a bunch of Level 0 layer files, and reconstruct and reshuffle them as
    /// as Level 1 files.
    ///
-    fn compact_level0(&self, target_file_size: u64) -> Result<()> {
-        let layers = self.layers.read().unwrap();
-        let mut level0_deltas = layers.get_level0_deltas()?;
+    fn reconstruct_level0(&self, target_file_size: u64) -> Result<()> {
+        let mut layers = self.layers.write().unwrap();
+        let latest_delta_layer = layers.get_latest_delta_layer();
        drop(layers);
-
-        // Only compact if enough layers have accumulated.
-        if level0_deltas.is_empty() || level0_deltas.len() < self.get_compaction_threshold() {
-            return Ok(());
-        }
-
-        // Gather the files to compact in this iteration.
-        //
-        // Start with the oldest Level 0 delta file, and collect any other
-        // level 0 files that form a contiguous sequence, such that the end
-        // LSN of previous file matches the start LSN of the next file.
-        //
-        // Note that if the files don't form such a sequence, we might
-        // "compact" just a single file. That's a bit pointless, but it allows
-        // us to get rid of the level 0 file, and compact the other files on
-        // the next iteration. This could probably made smarter, but such
-        // "gaps" in the sequence of level 0 files should only happen in case
-        // of a crash, partial download from cloud storage, or something like
-        // that, so it's not a big deal in practice.
-        level0_deltas.sort_by_key(|l| l.get_lsn_range().start);
-        let mut level0_deltas_iter = level0_deltas.iter();
-
-        let first_level0_delta = level0_deltas_iter.next().unwrap();
-        let mut prev_lsn_end = first_level0_delta.get_lsn_range().end;
-        let mut deltas_to_compact = vec![Arc::clone(first_level0_delta)];
-        for l in level0_deltas_iter {
-            let lsn_range = l.get_lsn_range();
-
-            if lsn_range.start != prev_lsn_end {
-                break;
-            }
-            deltas_to_compact.push(Arc::clone(l));
-            prev_lsn_end = lsn_range.end;
-        }
-        let lsn_range = Range {
-            start: deltas_to_compact.first().unwrap().get_lsn_range().start,
-            end: deltas_to_compact.last().unwrap().get_lsn_range().end,
-        };
-
-        info!(
-            "Starting Level0 compaction in LSN range {}-{} for {} layers ({} deltas in total)",
-            lsn_range.start,
-            lsn_range.end,
-            deltas_to_compact.len(),
-            level0_deltas.len()
-        );
-        for l in deltas_to_compact.iter() {
-            info!("compact includes {}", l.filename().display());
-        }
-        // We don't need the original list of layers anymore. Drop it so that
-        // we don't accidentally use it later in the function.
-        drop(level0_deltas);
-
-        // This iterator walks through all key-value pairs from all the layers
-        // we're compacting, in key, LSN order.
-        let all_values_iter = deltas_to_compact
-            .iter()
-            .map(|l| l.iter())
-            .kmerge_by(|a, b| {
-                if let Ok((a_key, a_lsn, _)) = a {
-                    if let Ok((b_key, b_lsn, _)) = b {
-                        match a_key.cmp(b_key) {
-                            Ordering::Less => true,
-                            Ordering::Equal => a_lsn <= b_lsn,
-                            Ordering::Greater => false,
-                        }
-                    } else {
-                        false
-                    }
-                } else {
-                    true
-                }
-            });
-
-        // This iterator walks through all keys and is needed to calculate size used by each key
-        let mut all_keys_iter = deltas_to_compact
-            .iter()
-            .map(|l| l.key_iter())
-            .kmerge_by(|a, b| {
-                let (a_key, a_lsn, _) = a;
-                let (b_key, b_lsn, _) = b;
-                match a_key.cmp(b_key) {
-                    Ordering::Less => true,
-                    Ordering::Equal => a_lsn <= b_lsn,
-                    Ordering::Greater => false,
-                }
-            });
-
-        // Merge the contents of all the input delta layers into a new set
-        // of delta layers, based on the current partitioning.
-        //
-        // We split the new delta layers on the key dimension. We iterate through the key space, and for each key, check if including the next key to the current output layer we're building would cause the layer to become too large. If so, dump the current output layer and start new one.
-        // It's possible that there is a single key with so many page versions that storing all of them in a single layer file
-        // would be too large. In that case, we also split on the LSN dimension.
-        //
-        // LSN
-        //  ^
-        //  |
-        //  | +-----------+            +--+--+--+--+
-        //  | |           |            |  |  |  |  |
-        //  | +-----------+            |  |  |  |  |
-        //  | |           |            |  |  |  |  |
-        //  | +-----------+     ==>    |  |  |  |  |
-        //  | |           |            |  |  |  |  |
-        //  | +-----------+            |  |  |  |  |
-        //  | |           |            |  |  |  |  |
-        //  | +-----------+            +--+--+--+--+
-        //  |
-        //  +--------------> key
-        //
-        //
-        // If one key (X) has a lot of page versions:
-        //
-        // LSN
-        //  ^
-        //  |                                 (X)
-        //  | +-----------+            +--+--+--+--+
-        //  | |           |            |  |  |  |  |
-        //  | +-----------+            |  |  +--+  |
-        //  | |           |            |  |  |  |  |
-        //  | +-----------+     ==>    |  |  |  |  |
-        //  | |           |            |  |  +--+  |
-        //  | +-----------+            |  |  |  |  |
-        //  | |           |            |  |  |  |  |
-        //  | +-----------+            +--+--+--+--+
-        //  |
-        //  +--------------> key
-        // TODO: this actually divides the layers into fixed-size chunks, not
-        // based on the partitioning.
-        //
-        // TODO: we should also opportunistically materialize and
-        // garbage collect what we can.
-        let mut new_layers = Vec::new();
-        let mut prev_key: Option<Key> = None;
        let mut writer: Option<DeltaLayerWriter> = None;
-        let mut key_values_total_size = 0u64;
-        let mut dup_start_lsn: Lsn = Lsn::INVALID; // start LSN of layer containing values of the single key
-        let mut dup_end_lsn: Lsn = Lsn::INVALID; // end LSN of layer containing values of the single key
-        for x in all_values_iter {
-            let (key, lsn, value) = x?;
-            let same_key = prev_key.map_or(false, |prev_key| prev_key == key);
-            // We need to check key boundaries once we reach next key or end of layer with the same key
-            if !same_key || lsn == dup_end_lsn {
-                let mut next_key_size = 0u64;
-                let is_dup_layer = dup_end_lsn.is_valid();
-                dup_start_lsn = Lsn::INVALID;
-                if !same_key {
-                    dup_end_lsn = Lsn::INVALID;
-                }
-                // Determine size occupied by this key. We stop at next key or when size becomes larger than target_file_size
-                for (next_key, next_lsn, next_size) in all_keys_iter.by_ref() {
-                    next_key_size = next_size;
-                    if key != next_key {
-                        if dup_end_lsn.is_valid() {
-                            // We are writting segment with duplicates:
-                            // place all remaining values of this key in separate segment
-                            dup_start_lsn = dup_end_lsn; // new segments starts where old stops
-                            dup_end_lsn = lsn_range.end; // there are no more values of this key till end of LSN range
-                        }
-                        break;
-                    }
-                    key_values_total_size += next_size;
-                    // Check if it is time to split segment: if total keys size is larger than target file size.
-                    // We need to avoid generation of empty segments if next_size > target_file_size.
-                    if key_values_total_size > target_file_size && lsn != next_lsn {
-                        // Split key between multiple layers: such layer can contain only single key
-                        dup_start_lsn = if dup_end_lsn.is_valid() {
-                            dup_end_lsn // new segment with duplicates starts where old one stops
-                        } else {
-                            lsn // start with the first LSN for this key
-                        };
-                        dup_end_lsn = next_lsn; // upper LSN boundary is exclusive
-                        break;
-                    }
-                }
-                // handle case when loop reaches last key: in this case dup_end is non-zero but dup_start is not set.
-                if dup_end_lsn.is_valid() && !dup_start_lsn.is_valid() {
-                    dup_start_lsn = dup_end_lsn;
-                    dup_end_lsn = lsn_range.end;
-                }
-                if writer.is_some() {
-                    let written_size = writer.as_mut().unwrap().size();
-                    // check if key cause layer overflow...
-                    if is_dup_layer
-                        || dup_end_lsn.is_valid()
-                        || written_size + key_values_total_size > target_file_size
-                    {
-                        // ... if so, flush previous layer and prepare to write new one
-                        new_layers.push(writer.take().unwrap().finish(prev_key.unwrap().next())?);
+        let mut new_layers = Vec::new();
+        let mut last_key: Option<Key> = None;
+        if let Some(last_delta_layer) = latest_delta_layer {
+            let end_lsn = last_delta_layer.get_lsn_range().end;
+            let lsn_range = end_lsn..end_lsn + 1;
+            for (key, lsn, _) in last_delta_layer.key_iter(true) {
+                let value = self.get(key, lsn)?;
+                if let Some(curr_writer) = &writer {
+                    if curr_writer.size() > target_file_size {
+                        new_layers.push(writer.take().unwrap().finish(key)?);
                        writer = None;
                    }
                }
-                // Remember size of key value because at next iteration we will access next item
-                key_values_total_size = next_key_size;
-            }
-            if writer.is_none() {
                // Create writer if not initiaized yet
-                writer = Some(DeltaLayerWriter::new(
-                    self.conf,
-                    self.timeline_id,
-                    self.tenant_id,
-                    key,
-                    if dup_end_lsn.is_valid() {
-                        // this is a layer containing slice of values of the same key
-                        debug!("Create new dup layer {}..{}", dup_start_lsn, dup_end_lsn);
-                        dup_start_lsn..dup_end_lsn
-                    } else {
-                        debug!("Create new layer {}..{}", lsn_range.start, lsn_range.end);
-                        lsn_range.clone()
-                    },
-                )?);
+                if writer.is_none() {
+                    writer = Some(DeltaLayerWriter::new(
+                        self.conf,
+                        self.timeline_id,
+                        self.tenant_id,
+                        key,
+                        lsn_range.clone(),
+                    )?);
+                }
+                writer
+                    .as_mut()
+                    .unwrap()
+                    .put_value(key, end_lsn, Value::Image(value))?;
+                last_key = Some(key);
            }
-            writer.as_mut().unwrap().put_value(key, lsn, value)?;
-            prev_key = Some(key);
        }
        if let Some(writer) = writer {
-            new_layers.push(writer.finish(prev_key.unwrap().next())?);
+            new_layers.push(writer.finish(last_key.unwrap().next())?);
        }

        // Sync layers
@@ -1787,23 +1595,6 @@ impl Timeline {
            new_layer_paths.insert(new_delta_path);
            layers.insert_historic(Arc::new(l));
        }
-
-        // Now that we have reshuffled the data to set of new delta layers, we can
-        // delete the old ones
-        let mut layer_paths_do_delete = HashSet::with_capacity(deltas_to_compact.len());
-        drop(all_keys_iter);
-        for l in deltas_to_compact {
-            if let Some(path) = l.local_path() {
-                self.metrics
-                    .current_physical_size_gauge
-                    .sub(path.metadata()?.len());
-                layer_paths_do_delete.insert(path);
-            }
-            l.delete()?;
-            layers.remove_historic(l);
-        }
-        drop(layers);
-
        if self.upload_layers.load(atomic::Ordering::Relaxed) {
            storage_sync::schedule_layer_upload(
                self.tenant_id,
@@ -1811,11 +1602,6 @@ impl Timeline {
                new_layer_paths,
                None,
            );
-            storage_sync::schedule_layer_delete(
-                self.tenant_id,
-                self.timeline_id,
-                layer_paths_do_delete,
-            );
        }

        Ok(())
@@ -1823,10 +1609,10 @@ impl Timeline {

    /// Update information about which layer files need to be retained on
    /// garbage collection. This is separate from actually performing the GC,
-    /// and is updated more frequently, so that compaction can remove obsolete
+    /// and is updated more frequently, so that reconstruction can remove obsolete
    /// page versions more aggressively.
    ///
-    /// TODO: that's wishful thinking, compaction doesn't actually do that
+    /// TODO: that's wishful thinking, reconstruction doesn't actually do that
    /// currently.
    ///
    /// The caller specifies how much history is needed with the 3 arguments:
--- a/pageserver/src/tenant_mgr.rs
+++ b/pageserver/src/tenant_mgr.rs
@@ -108,10 +108,6 @@ pub fn init_tenant_mgr(
 /// Attempts to load as many entites as possible: if a certain timeline fails during the load, the tenant is marked as "Broken",
 /// and the load continues.
 ///
-/// For successful tenant attach, it first has to have a `timelines/` subdirectory and a tenant config file that's loaded into memory successfully.
-/// If either of the conditions fails, the tenant will be added to memory with [`TenantState::Broken`] state, otherwise we start to load its timelines.
-/// Alternatively, tenant is considered loaded successfully, if it's already in pageserver's memory (i.e. was loaded already before).
-///
 /// Attach happens on startup and sucessful timeline downloads
 /// (some subset of timeline files, always including its metadata, after which the new one needs to be registered).
 pub fn attach_local_tenants(
@@ -177,28 +173,16 @@ fn load_local_tenant(
        remote_index.clone(),
        conf.remote_storage_config.is_some(),
    ));
-
-    let tenant_timelines_dir = conf.timelines_path(&tenant_id);
-    if !tenant_timelines_dir.is_dir() {
-        error!(
-            "Tenant {} has no timelines directory at {}",
-            tenant_id,
-            tenant_timelines_dir.display()
-        );
-        tenant.set_state(TenantState::Broken);
-    } else {
-        match Tenant::load_tenant_config(conf, tenant_id) {
-            Ok(tenant_conf) => {
-                tenant.update_tenant_config(tenant_conf);
-                tenant.activate(false);
-            }
-            Err(e) => {
-                error!("Failed to read config for tenant {tenant_id}, disabling tenant: {e:?}");
-                tenant.set_state(TenantState::Broken);
-            }
+    match Tenant::load_tenant_config(conf, tenant_id) {
+        Ok(tenant_conf) => {
+            tenant.update_tenant_config(tenant_conf);
+            tenant.activate(false);
+        }
+        Err(e) => {
+            error!("Failed to read config for tenant {tenant_id}, disabling tenant: {e:?}");
+            tenant.set_state(TenantState::Broken);
        }
    }
-
    tenant
 }

@@ -646,10 +630,14 @@ fn collect_timelines_for_tenant(
    }

    if tenant_timelines.is_empty() {
-        // this is normal, we've removed all broken, empty and temporary timeline dirs
-        // but should allow the tenant to stay functional and allow creating new timelines
-        // on a restart, we require tenants to have the timelines dir, so leave it on disk
-        debug!("Tenant {tenant_id} has no timelines loaded");
+        match remove_if_empty(&timelines_dir) {
+            Ok(true) => info!(
+                "Removed empty tenant timelines directory {}",
+                timelines_dir.display()
+            ),
+            Ok(false) => (),
+            Err(e) => error!("Failed to remove empty tenant timelines directory: {e:?}"),
+        }
    }

    Ok((tenant_id, tenant_timelines))
--- a/pageserver/src/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/walreceiver/walreceiver_connection.rs
@@ -12,8 +12,6 @@ use chrono::{NaiveDateTime, Utc};
 use fail::fail_point;
 use futures::StreamExt;
 use postgres::{SimpleQueryMessage, SimpleQueryRow};
-use postgres_ffi::v14::xlog_utils::normalize_lsn;
-use postgres_ffi::WAL_SEGMENT_SIZE;
 use postgres_protocol::message::backend::ReplicationMessage;
 use postgres_types::PgLsn;
 use tokio::{pin, select, sync::watch, time};
@@ -158,14 +156,6 @@ pub async fn handle_walreceiver_connection(
    // There might be some padding after the last full record, skip it.
    startpoint += startpoint.calc_padding(8u32);

-    // If the starting point is at a WAL page boundary, skip past the page header. We don't need the page headers
-    // for anything, and in some corner cases, the compute node might have never generated the WAL for page headers
-    //. That happens if you create a branch at page boundary: the start point of the branch is at the page boundary,
-    // but when the compute node first starts on the branch, we normalize the first REDO position to just after the page
-    // header (see generate_pg_control()), so the WAL for the page header is never streamed from the compute node
-    //  to the safekeepers.
-    startpoint = normalize_lsn(startpoint, WAL_SEGMENT_SIZE);
-
    info!("last_record_lsn {last_rec_lsn} starting replication from {startpoint}, safekeeper is at {end_of_wal}...");

    let query = format!("START_REPLICATION PHYSICAL {startpoint}");
--- a/pytest.ini
+++ b/pytest.ini
@@ -5,7 +5,6 @@ filterwarnings =
    ignore:record_property is incompatible with junit_family:pytest.PytestWarning
 addopts =
    -m 'not remote_cluster'
-    --ignore=test_runner/performance
 markers =
    remote_cluster
 testpaths =
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -33,7 +33,6 @@ toml_edit = { version = "0.13", features = ["easy"] }
 thiserror = "1"
 parking_lot = "0.12.1"

-safekeeper_api = { path = "../libs/safekeeper_api" }
 postgres_ffi = { path = "../libs/postgres_ffi" }
 metrics = { path = "../libs/metrics" }
 utils = { path = "../libs/utils" }
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -291,8 +291,9 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo

    // Register metrics collector for active timelines. It's important to do this
    // after daemonizing, otherwise process collector will be upset.
+    let registry = metrics::default_registry();
    let timeline_collector = safekeeper::metrics::TimelineCollector::new();
-    metrics::register_internal(Box::new(timeline_collector))?;
+    registry.register(Box::new(timeline_collector))?;

    let signals = signals::install_shutdown_handlers()?;
    let mut threads = vec![];
--- a/safekeeper/src/http/mod.rs
+++ b/safekeeper/src/http/mod.rs
@@ -1,4 +1,3 @@
+pub mod models;
 pub mod routes;
 pub use routes::make_router;
-
-pub use safekeeper_api::models;
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -27,13 +27,14 @@ mod timelines_global_map;
 pub use timelines_global_map::GlobalTimelines;

 pub mod defaults {
+    use const_format::formatcp;
    use std::time::Duration;

-    pub use safekeeper_api::{
-        DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_HTTP_LISTEN_PORT, DEFAULT_PG_LISTEN_ADDR,
-        DEFAULT_PG_LISTEN_PORT,
-    };
+    pub const DEFAULT_PG_LISTEN_PORT: u16 = 5454;
+    pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");

+    pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 7676;
+    pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
    pub const DEFAULT_RECALL_PERIOD: Duration = Duration::from_secs(10);
    pub const DEFAULT_WAL_BACKUP_RUNTIME_THREADS: usize = 8;
 }
--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -56,14 +56,6 @@ If you want to run all tests that have the string "bench" in their names:

 `./scripts/pytest -k bench`

-To run tests in parellel we utilize `pytest-xdist` plugin. By default everything runs single threaded. Number of workers can be specified with `-n` argument:
-
-`./scripts/pytest -n4`
-
-By default performance tests are excluded. To run them explicitly pass performance tests selection to the script:
-
-`./scripts/pytest test_runner/performance`
-
 Useful environment variables:

 `NEON_BIN`: The directory where neon binaries can be found.
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -455,9 +455,6 @@ class RemoteStorageKind(enum.Enum):
    LOCAL_FS = "local_fs"
    MOCK_S3 = "mock_s3"
    REAL_S3 = "real_s3"
-    # Pass to tests that are generic to remote storage
-    # to ensure the test pass with or without the remote storage
-    NOOP = "noop"


 def available_remote_storages() -> List[RemoteStorageKind]:
@@ -586,9 +583,7 @@ class NeonEnvBuilder:
        test_name: str,
        force_enable: bool = True,
    ):
-        if remote_storage_kind == RemoteStorageKind.NOOP:
-            return
-        elif remote_storage_kind == RemoteStorageKind.LOCAL_FS:
+        if remote_storage_kind == RemoteStorageKind.LOCAL_FS:
            self.enable_local_fs_remote_storage(force_enable=force_enable)
        elif remote_storage_kind == RemoteStorageKind.MOCK_S3:
            self.enable_mock_s3_remote_storage(bucket_name=test_name, force_enable=force_enable)
@@ -1136,19 +1131,6 @@ class NeonPageserverHttpClient(requests.Session):
        assert res_json is None
        return res_json

-    def timeline_get_lsn_by_timestamp(
-        self, tenant_id: TenantId, timeline_id: TimelineId, timestamp
-    ):
-        log.info(
-            f"Requesting lsn by timestamp {timestamp}, tenant {tenant_id}, timeline {timeline_id}"
-        )
-        res = self.get(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp?timestamp={timestamp}",
-        )
-        self.verbose_error(res)
-        res_json = res.json()
-        return res_json
-
    def timeline_checkpoint(self, tenant_id: TenantId, timeline_id: TimelineId):
        log.info(f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}")
        res = self.put(
@@ -1200,7 +1182,6 @@ class AbstractNeonCli(abc.ABC):
        arguments: List[str],
        extra_env_vars: Optional[Dict[str, str]] = None,
        check_return_code=True,
-        timeout=None,
    ) -> "subprocess.CompletedProcess[str]":
        """
        Run the command with the specified arguments.
@@ -1247,7 +1228,6 @@ class AbstractNeonCli(abc.ABC):
            universal_newlines=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
-            timeout=timeout,
        )
        if not res.returncode:
            log.info(f"Run success: {res.stdout}")
@@ -1621,14 +1601,6 @@ class WalCraft(AbstractNeonCli):
        res.check_returncode()


-class ComputeCtl(AbstractNeonCli):
-    """
-    A typed wrapper around the `compute_ctl` CLI tool.
-    """
-
-    COMMAND = "compute_ctl"
-
-
 class NeonPageserver(PgProtocol):
    """
    An object representing a running pageserver.
--- a/test_runner/regress/test_branching.py
+++ b/test_runner/regress/test_branching.py
@@ -6,8 +6,6 @@ from typing import List
 import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
-from fixtures.types import Lsn
-from fixtures.utils import query_scalar
 from performance.test_perf_pgbench import get_scales_matrix


@@ -90,39 +88,3 @@ def test_branching_with_pgbench(
    for pg in pgs:
        res = pg.safe_psql("SELECT count(*) from pgbench_accounts")
        assert res[0] == (100000 * scale,)
-
-
-# Test branching from an "unnormalized" LSN.
-#
-# Context:
-# When doing basebackup for a newly created branch, pageserver generates
-# 'pg_control' file to bootstrap WAL segment by specifying the redo position
-# a "normalized" LSN based on the timeline's starting LSN:
-#
-# checkpoint.redo = normalize_lsn(self.lsn, pg_constants::WAL_SEGMENT_SIZE).0;
-#
-# This test checks if the pageserver is able to handle a "unnormalized" starting LSN.
-#
-# Related: see discussion in https://github.com/neondatabase/neon/pull/2143#issuecomment-1209092186
-def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBin):
-    XLOG_BLCKSZ = 8192
-
-    env = neon_simple_env
-
-    env.neon_cli.create_branch("b0")
-    pg0 = env.postgres.create_start("b0")
-
-    pg_bin.run_capture(["pgbench", "-i", pg0.connstr()])
-
-    with pg0.cursor() as cur:
-        curr_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
-
-    # Specify the `start_lsn` as a number that is divided by `XLOG_BLCKSZ`
-    # and is smaller than `curr_lsn`.
-    start_lsn = Lsn((int(curr_lsn) - XLOG_BLCKSZ) // XLOG_BLCKSZ * XLOG_BLCKSZ)
-
-    log.info(f"Branching b1 from b0 starting at lsn {start_lsn}...")
-    env.neon_cli.create_branch("b1", "b0", ancestor_start_lsn=start_lsn)
-    pg1 = env.postgres.create_start("b1")
-
-    pg_bin.run_capture(["pgbench", "-i", pg1.connstr()])
--- a/test_runner/regress/test_compute_ctl.py
+++ b/test_runner/regress/test_compute_ctl.py
@@ -1,203 +0,0 @@
-import os
-from subprocess import TimeoutExpired
-
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import ComputeCtl, NeonEnvBuilder, PgBin
-
-
-# Test that compute_ctl works and prints "--sync-safekeepers" logs.
-def test_sync_safekeepers_logs(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
-    neon_env_builder.num_safekeepers = 3
-    env = neon_env_builder.init_start()
-    ctl = ComputeCtl(env)
-
-    env.neon_cli.create_branch("test_compute_ctl", "main")
-    pg = env.postgres.create_start("test_compute_ctl")
-    pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
-
-    with open(pg.config_file_path(), "r") as f:
-        cfg_lines = f.readlines()
-    cfg_map = {}
-    for line in cfg_lines:
-        if "=" in line:
-            k, v = line.split("=")
-            cfg_map[k] = v.strip("\n '\"")
-    log.info(f"postgres config: {cfg_map}")
-    pgdata = pg.pg_data_dir_path()
-    pg_bin_path = os.path.join(pg_bin.pg_bin_path, "postgres")
-
-    pg.stop_and_destroy()
-
-    spec = (
-        """
-{
-    "format_version": 1.0,
-
-    "timestamp": "2021-05-23T18:25:43.511Z",
-    "operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
-
-    "cluster": {
-        "cluster_id": "test-cluster-42",
-        "name": "Neon Test",
-        "state": "restarted",
-        "roles": [
-        ],
-        "databases": [
-        ],
-        "settings": [
-            {
-                "name": "fsync",
-                "value": "off",
-                "vartype": "bool"
-            },
-            {
-                "name": "wal_level",
-                "value": "replica",
-                "vartype": "enum"
-            },
-            {
-                "name": "hot_standby",
-                "value": "on",
-                "vartype": "bool"
-            },
-            {
-                "name": "neon.safekeepers",
-                "value": """
-        + f'"{cfg_map["neon.safekeepers"]}"'
-        + """,
-                "vartype": "string"
-            },
-            {
-                "name": "wal_log_hints",
-                "value": "on",
-                "vartype": "bool"
-            },
-            {
-                "name": "log_connections",
-                "value": "on",
-                "vartype": "bool"
-            },
-            {
-                "name": "shared_buffers",
-                "value": "32768",
-                "vartype": "integer"
-            },
-            {
-                "name": "port",
-                "value": """
-        + f'"{cfg_map["port"]}"'
-        + """,
-                "vartype": "integer"
-            },
-            {
-                "name": "max_connections",
-                "value": "100",
-                "vartype": "integer"
-            },
-            {
-                "name": "max_wal_senders",
-                "value": "10",
-                "vartype": "integer"
-            },
-            {
-                "name": "listen_addresses",
-                "value": "0.0.0.0",
-                "vartype": "string"
-            },
-            {
-                "name": "wal_sender_timeout",
-                "value": "0",
-                "vartype": "integer"
-            },
-            {
-                "name": "password_encryption",
-                "value": "md5",
-                "vartype": "enum"
-            },
-            {
-                "name": "maintenance_work_mem",
-                "value": "65536",
-                "vartype": "integer"
-            },
-            {
-                "name": "max_parallel_workers",
-                "value": "8",
-                "vartype": "integer"
-            },
-            {
-                "name": "max_worker_processes",
-                "value": "8",
-                "vartype": "integer"
-            },
-            {
-                "name": "neon.tenant_id",
-                "value": """
-        + f'"{cfg_map["neon.tenant_id"]}"'
-        + """,
-                "vartype": "string"
-            },
-            {
-                "name": "max_replication_slots",
-                "value": "10",
-                "vartype": "integer"
-            },
-            {
-                "name": "neon.timeline_id",
-                "value": """
-        + f'"{cfg_map["neon.timeline_id"]}"'
-        + """,
-                "vartype": "string"
-            },
-            {
-                "name": "shared_preload_libraries",
-                "value": "neon",
-                "vartype": "string"
-            },
-            {
-                "name": "synchronous_standby_names",
-                "value": "walproposer",
-                "vartype": "string"
-            },
-            {
-                "name": "neon.pageserver_connstring",
-                "value": """
-        + f'"{cfg_map["neon.pageserver_connstring"]}"'
-        + """,
-                "vartype": "string"
-            }
-        ]
-    },
-    "delta_operations": [
-    ]
-}
-"""
-    )
-
-    ps_connstr = cfg_map["neon.pageserver_connstring"]
-    log.info(f"ps_connstr: {ps_connstr}, pgdata: {pgdata}")
-
-    # run compute_ctl and wait for 10s
-    try:
-        ctl.raw_cli(
-            ["--connstr", ps_connstr, "--pgdata", pgdata, "--spec", spec, "--pgbin", pg_bin_path],
-            timeout=10,
-        )
-    except TimeoutExpired as exc:
-        ctl_logs = exc.stderr.decode("utf-8")
-        log.info("compute_ctl output:\n" + ctl_logs)
-
-    start = "starting safekeepers syncing"
-    end = "safekeepers synced at LSN"
-    start_pos = ctl_logs.index(start)
-    assert start_pos != -1
-    end_pos = ctl_logs.index(end, start_pos)
-    assert end_pos != -1
-    sync_safekeepers_logs = ctl_logs[start_pos : end_pos + len(end)]
-    log.info("sync_safekeepers_logs:\n" + sync_safekeepers_logs)
-
-    # assert that --sync-safekeepers logs are present in the output
-    assert "connecting with node" in sync_safekeepers_logs
-    assert "connected with node" in sync_safekeepers_logs
-    assert "proposer connected to quorum (2)" in sync_safekeepers_logs
-    assert "got votes from majority (2)" in sync_safekeepers_logs
-    assert "sending elected msg to node" in sync_safekeepers_logs
--- a/test_runner/regress/test_lsn_mapping.py
+++ b/test_runner/regress/test_lsn_mapping.py
@@ -15,6 +15,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
    pgmain = env.postgres.create_start("test_lsn_mapping")
    log.info("postgres is running on 'test_lsn_mapping' branch")

+    ps_cur = env.pageserver.connect().cursor()
    cur = pgmain.connect().cursor()
    # Create table, and insert rows, each in a separate transaction
    # Disable synchronous_commit to make this initialization go faster.
@@ -37,33 +38,37 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
    # Wait until WAL is received by pageserver
    wait_for_last_flush_lsn(env, pgmain, env.initial_tenant, new_timeline_id)

-    with env.pageserver.http_client() as client:
-        # Check edge cases: timestamp in the future
-        probe_timestamp = tbl[-1][1] + timedelta(hours=1)
-        result = client.timeline_get_lsn_by_timestamp(
-            env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z"
+    # Check edge cases: timestamp in the future
+    probe_timestamp = tbl[-1][1] + timedelta(hours=1)
+    result = query_scalar(
+        ps_cur,
+        f"get_lsn_by_timestamp {env.initial_tenant} {new_timeline_id} '{probe_timestamp.isoformat()}Z'",
+    )
+    assert result == "future"
+
+    # timestamp too the far history
+    probe_timestamp = tbl[0][1] - timedelta(hours=10)
+    result = query_scalar(
+        ps_cur,
+        f"get_lsn_by_timestamp {env.initial_tenant} {new_timeline_id} '{probe_timestamp.isoformat()}Z'",
+    )
+    assert result == "past"
+
+    # Probe a bunch of timestamps in the valid range
+    for i in range(1, len(tbl), 100):
+        probe_timestamp = tbl[i][1]
+
+        # Call get_lsn_by_timestamp to get the LSN
+        lsn = query_scalar(
+            ps_cur,
+            f"get_lsn_by_timestamp {env.initial_tenant} {new_timeline_id} '{probe_timestamp.isoformat()}Z'",
        )
-        assert result == "future"

-        # timestamp too the far history
-        probe_timestamp = tbl[0][1] - timedelta(hours=10)
-        result = client.timeline_get_lsn_by_timestamp(
-            env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z"
+        # Launch a new read-only node at that LSN, and check that only the rows
+        # that were supposed to be committed at that point in time are visible.
+        pg_here = env.postgres.create_start(
+            branch_name="test_lsn_mapping", node_name="test_lsn_mapping_read", lsn=lsn
        )
-        assert result == "past"
+        assert pg_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i

-        # Probe a bunch of timestamps in the valid range
-        for i in range(1, len(tbl), 100):
-            probe_timestamp = tbl[i][1]
-            lsn = client.timeline_get_lsn_by_timestamp(
-                env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z"
-            )
-            # Call get_lsn_by_timestamp to get the LSN
-            # Launch a new read-only node at that LSN, and check that only the rows
-            # that were supposed to be committed at that point in time are visible.
-            pg_here = env.postgres.create_start(
-                branch_name="test_lsn_mapping", node_name="test_lsn_mapping_read", lsn=lsn
-            )
-            assert pg_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i
-
-            pg_here.stop_and_destroy()
+        pg_here.stop_and_destroy()
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -1,5 +1,4 @@
 import os
-import shutil
 from contextlib import closing
 from datetime import datetime
 from pathlib import Path
@@ -8,13 +7,8 @@ from typing import List
 import pytest
 from fixtures.log_helper import log
 from fixtures.metrics import PAGESERVER_PER_TENANT_METRICS, parse_metrics
-from fixtures.neon_fixtures import (
-    NeonEnv,
-    NeonEnvBuilder,
-    RemoteStorageKind,
-    available_remote_storages,
-)
-from fixtures.types import Lsn, TenantId, TimelineId
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder
+from fixtures.types import Lsn, TenantId
 from prometheus_client.samples import Sample


@@ -207,63 +201,3 @@ def test_pageserver_metrics_removed_after_detach(neon_env_builder: NeonEnvBuilde

        post_detach_samples = set([x.name for x in get_ps_metric_samples_for_tenant(tenant)])
        assert post_detach_samples == set()
-
-
-# Check that empty tenants work with or without the remote storage
-@pytest.mark.parametrize(
-    "remote_storage_kind", available_remote_storages() + [RemoteStorageKind.NOOP]
-)
-def test_pageserver_with_empty_tenants(
-    neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind
-):
-    neon_env_builder.enable_remote_storage(
-        remote_storage_kind=remote_storage_kind,
-        test_name="test_pageserver_with_empty_tenants",
-    )
-
-    env = neon_env_builder.init_start()
-    client = env.pageserver.http_client()
-
-    tenant_without_timelines_dir = env.initial_tenant
-    log.info(
-        f"Tenant {tenant_without_timelines_dir} becomes broken: it abnormally looses tenants/ directory and is expected to be completely ignored when pageserver restarts"
-    )
-    shutil.rmtree(Path(env.repo_dir) / "tenants" / str(tenant_without_timelines_dir) / "timelines")
-
-    tenant_with_empty_timelines_dir = client.tenant_create()
-    log.info(
-        f"Tenant {tenant_with_empty_timelines_dir} gets all of its timelines deleted: still should be functional"
-    )
-    temp_timelines = client.timeline_list(tenant_with_empty_timelines_dir)
-    for temp_timeline in temp_timelines:
-        client.timeline_delete(
-            tenant_with_empty_timelines_dir, TimelineId(temp_timeline["timeline_id"])
-        )
-    files_in_timelines_dir = sum(
-        1
-        for _p in Path.iterdir(
-            Path(env.repo_dir) / "tenants" / str(tenant_with_empty_timelines_dir) / "timelines"
-        )
-    )
-    assert (
-        files_in_timelines_dir == 0
-    ), f"Tenant {tenant_with_empty_timelines_dir} should have an empty timelines/ directory"
-
-    # Trigger timeline reinitialization after pageserver restart
-    env.postgres.stop_all()
-    env.pageserver.stop()
-    env.pageserver.start()
-
-    client = env.pageserver.http_client()
-    tenants = client.tenant_list()
-
-    assert (
-        len(tenants) == 1
-    ), "Pageserver should attach only tenants with empty timelines/ dir on restart"
-    loaded_tenant = tenants[0]
-    assert loaded_tenant["id"] == str(
-        tenant_with_empty_timelines_dir
-    ), f"Tenant {tenant_with_empty_timelines_dir} should be loaded as the only one with tenants/ directory"
-    assert loaded_tenant["state"] == {
-        "Active": {"background_jobs_running": False}
-    }, "Empty tenant should be loaded and ready for timeline creation"
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -19,7 +19,6 @@ anyhow = { version = "1", features = ["backtrace", "std"] }
 bstr = { version = "0.2", features = ["lazy_static", "regex-automata", "serde", "serde1", "serde1-nostd", "std", "unicode"] }
 bytes = { version = "1", features = ["serde", "std"] }
 chrono = { version = "0.4", features = ["clock", "libc", "oldtime", "serde", "std", "time", "winapi"] }
-crossbeam-utils = { version = "0.8", features = ["once_cell", "std"] }
 either = { version = "1", features = ["use_std"] }
 fail = { version = "0.5", default-features = false, features = ["failpoints"] }
 hashbrown = { version = "0.12", features = ["ahash", "inline-more", "raw"] }
Author	SHA1	Message	Date
Konstantin Knizhnik	10b90506a0	Use B-Tree instead of R-Tree	2022-10-07 14:57:25 +03:00
Konstantin Knizhnik	5ee4524caa	Fix indentation	2022-10-06 22:31:11 +03:00
Konstantin Knizhnik	c5245a9e4f	Make clippy happy	2022-10-06 21:47:26 +03:00
Konstantin Knizhnik	9f10195d7b	Change LSN range assignment rue for partial image layers	2022-10-06 20:16:13 +03:00
Konstantin Knizhnik	51aa53ab90	Check that image contains search key in LayerMap::search	2022-10-06 16:06:39 +03:00
Konstantin Knizhnik	2359106a9d	Fix writer creation in reconstruct_level0	2022-10-06 10:05:35 +03:00
Konstantin Knizhnik	885033ad42	Make reconstruction more intensive	2022-10-06 09:37:37 +03:00
Konstantin Knizhnik	487ec20085	Fix indentation	2022-10-05 15:30:24 +03:00
Konstantin Knizhnik	898937d500	Store partria image layers instead of compaction	2022-10-04 20:42:34 +03:00