Remove rocksdb implementation.

The layered storage format is good enough that we don't need the rocksdb implementation anymore. There are a lot of known issues but we'll keep working on them.
2026-01-14 00:42:54 +00:00 · 2021-08-25 18:06:34 +03:00
parent 250ae643a8
commit 5998744bcc
17 changed files with 21 additions and 2171 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -152,25 +152,6 @@ dependencies = [
 "serde",
 ]

-[[package]]
-name = "bindgen"
-version = "0.57.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd4865004a46a0aafb2a0a5eb19d3c9fc46ee5f063a6cfc605c69ac9ecf5263d"
-dependencies = [
- "bitflags",
- "cexpr 0.4.0",
- "clang-sys",
- "lazy_static",
- "lazycell",
- "peeking_take_while",
- "proc-macro2",
- "quote",
- "regex",
- "rustc-hash",
- "shlex 0.1.1",
-]
-
 [[package]]
 name = "bindgen"
 version = "0.59.1"
@@ -178,7 +159,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "453c49e5950bb0eb63bb3df640e31618846c89d5b7faa54040d76e98e0134375"
 dependencies = [
 "bitflags",
- "cexpr 0.5.0",
+ "cexpr",
 "clang-sys",
 "clap",
 "env_logger",
@@ -190,7 +171,7 @@ dependencies = [
 "quote",
 "regex",
 "rustc-hash",
- "shlex 1.0.0",
+ "shlex",
 "which",
 ]

@@ -265,18 +246,6 @@ name = "cc"
 version = "1.0.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2"
-dependencies = [
- "jobserver",
-]
-
-[[package]]
-name = "cexpr"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27"
-dependencies = [
- "nom 5.1.2",
-]

 [[package]]
 name = "cexpr"
@@ -284,7 +253,7 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "db507a7679252d2276ed0dd8113c6875ec56d3089f9225b2b42c30cc1f8e5c89"
 dependencies = [
- "nom 6.1.2",
+ "nom",
 ]

 [[package]]
@@ -903,15 +872,6 @@ version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"

-[[package]]
-name = "jobserver"
-version = "0.1.22"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "972f5ae5d1cb9c6ae417789196c803205313edde988685da5e3aae0827b9e7fd"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "js-sys"
 version = "0.3.51"
@@ -963,18 +923,6 @@ dependencies = [
 "winapi",
 ]

-[[package]]
-name = "librocksdb-sys"
-version = "6.17.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5da125e1c0f22c7cae785982115523a0738728498547f415c9054cb17c7e89f9"
-dependencies = [
- "bindgen 0.57.0",
- "cc",
- "glob",
- "libc",
-]
-
 [[package]]
 name = "lock_api"
 version = "0.4.4"
@@ -1109,16 +1057,6 @@ dependencies = [
 "libc",
 ]

-[[package]]
-name = "nom"
-version = "5.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af"
-dependencies = [
- "memchr",
- "version_check",
-]
-
 [[package]]
 name = "nom"
 version = "6.1.2"
@@ -1260,7 +1198,6 @@ dependencies = [
 "postgres_ffi",
 "rand",
 "regex",
- "rocksdb",
 "routerify",
 "rust-s3",
 "scopeguard",
@@ -1442,7 +1379,7 @@ name = "postgres_ffi"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "bindgen 0.59.1",
+ "bindgen",
 "byteorder",
 "bytes",
 "chrono",
@@ -1683,16 +1620,6 @@ dependencies = [
 "winapi",
 ]

-[[package]]
-name = "rocksdb"
-version = "0.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c749134fda8bfc90d0de643d59bfc841dcb3ac8a1062e12b6754bd60235c48b3"
-dependencies = [
- "libc",
- "librocksdb-sys",
-]
-
 [[package]]
 name = "routerify"
 version = "2.2.0"
@@ -1916,12 +1843,6 @@ dependencies = [
 "opaque-debug",
 ]

-[[package]]
-name = "shlex"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
-
 [[package]]
 name = "shlex"
 version = "1.0.0"
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -42,7 +42,7 @@ pub struct LocalEnv {
    #[serde(with = "hex")]
    pub tenantid: ZTenantId,

-    // Repository format, 'rocksdb' or 'layered' or None for default
+    // Repository format, only 'layered' supported currently, or None for default
    pub repository_format: Option<String>,

    // jwt auth token used for communication with pageserver
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -30,8 +30,6 @@ tokio-stream = { version = "0.1.5" }
 postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
 postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
 postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
-# by default rust-rocksdb tries to build a lot of compression algos. Use lz4 only for now as it is simplest dependency.
-rocksdb = { version = "0.16.0", features = ["lz4"], default-features = false }
 routerify = "2"
 anyhow = "1.0"
 crc32c = "0.6.0"
--- a/pageserver/README
+++ b/pageserver/README
@@ -74,9 +74,8 @@ Repository corresponds to one .zenith directory.
 Repository is needed to manage Timelines.
 Each repository has associated WAL redo service.

-Now we have two implementations of Repository:
- ObjectRepository uses RocksDB as a storage
- LayeredRepository uses custom storage format, described in layered_repository/README.md
+There is currently only one implementation of the Repository trait: LayeredRepository.
+It uses custom storage format, described in layered_repository/README.md

 #### Timeline
 Timeline is a page cache workhorse that accepts page changes
@@ -89,13 +88,6 @@ Each Branch lives in a corresponding timeline and has an ancestor.

 To get full snapshot of data at certain moment we need to traverse timeline and its ancestors.

-#### ObjectRepository
-ObjectRepository implements Repository and has associated ObjectStore and WAL redo service.
-
-#### ObjectStore
-ObjectStore is an interface for key-value store for page images and wal records.
-Currently it has one implementation - RocksDB.
-
 #### WAL redo service
 WAL redo service - service that runs PostgreSQL in a special wal_redo mode
 to apply given WAL records over an old page image and return new page image.
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -138,10 +138,9 @@ impl CfgFileParams {
        }

        let repository_format = match self.repository_format.as_ref() {
-            Some(repo_format_str) if repo_format_str == "rocksdb" => RepositoryFormat::RocksDb,
            Some(repo_format_str) if repo_format_str == "layered" => RepositoryFormat::Layered,
            Some(repo_format_str) => anyhow::bail!(
-                "invalid --repository-format '{}', must be 'rocksdb' or 'layered'",
+                "invalid --repository-format '{}', only 'layered' supported",
                repo_format_str
            ),
            None => RepositoryFormat::Layered, // default
@@ -240,7 +239,7 @@ fn main() -> Result<()> {
            Arg::with_name("repository-format")
                .long("repository-format")
                .takes_value(true)
-                .help("Which repository implementation to use, 'rocksdb' or 'layered'"),
+                .help("Which repository implementation to use, only 'layered' supported currently"),
        )
        .get_matches();

--- a/pageserver/src/branches.rs
+++ b/pageserver/src/branches.rs
@@ -20,7 +20,6 @@ use log::*;
 use zenith_utils::lsn::Lsn;

 use crate::logger;
-use crate::object_repository::ObjectRepository;
 use crate::page_cache;
 use crate::restore_local_repo;
 use crate::walredo::WalRedoManager;
@@ -102,16 +101,6 @@ pub fn create_repo(
            RepositoryFormat::Layered => Arc::new(
                crate::layered_repository::LayeredRepository::new(conf, wal_redo_manager, tenantid),
            ),
-            RepositoryFormat::RocksDb => {
-                let obj_store = crate::rocksdb_storage::RocksObjectStore::create(conf, &tenantid)?;
-
-                Arc::new(ObjectRepository::new(
-                    conf,
-                    Arc::new(obj_store),
-                    wal_redo_manager,
-                    tenantid,
-                ))
-            }
        };

    // Load data into pageserver
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -12,15 +12,11 @@ pub mod branches;
 pub mod http;
 pub mod layered_repository;
 pub mod logger;
-pub mod object_key;
-pub mod object_repository;
-pub mod object_store;
 pub mod page_cache;
 pub mod page_service;
 pub mod relish;
 pub mod repository;
 pub mod restore_local_repo;
-pub mod rocksdb_storage;
 pub mod waldecoder;
 pub mod walreceiver;
 pub mod walredo;
@@ -63,7 +59,6 @@ pub struct PageServerConf {
 #[derive(Debug, Clone, PartialEq)]
 pub enum RepositoryFormat {
    Layered,
-    RocksDb,
 }

 impl PageServerConf {
--- a/pageserver/src/object_key.rs
+++ b/pageserver/src/object_key.rs
@@ -1,49 +0,0 @@
-//!
-//! Common structs shared by object_repository.rs and object_store.rs.
-//!
-
-use crate::relish::RelishTag;
-use serde::{Deserialize, Serialize};
-use zenith_utils::zid::ZTimelineId;
-
-///
-/// ObjectKey is the key type used to identify objects stored in an object
-/// repository. It is shared between object_repository.rs and object_store.rs.
-/// It is mostly opaque to ObjectStore, it just stores and retrieves objects
-/// using the key given by the caller.
-///
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ObjectKey {
-    pub timeline: ZTimelineId,
-    pub tag: ObjectTag,
-}
-
-///
-/// ObjectTag is a part of ObjectKey that is specific to the type of
-/// the stored object.
-///
-/// NB: the order of the enum values is significant!  In particular,
-/// rocksdb_storage.rs assumes that TimelineMetadataTag is first
-///
-/// Buffer is the kind of object that is accessible by the public
-/// get_page_at_lsn() / put_page_image() / put_wal_record() functions in
-/// the repository.rs interface. The rest are internal objects stored in
-/// the key-value store, to store various metadata. They're not directly
-/// accessible outside object_repository.rs
-///
-#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
-pub enum ObjectTag {
-    // dummy tag preceeding all other keys
-    FirstTag,
-
-    // Metadata about a timeline. Not versioned.
-    TimelineMetadataTag,
-
-    // These objects store metadata about one relish. Currently it's used
-    // just to track the relish's size. It's not used for non-blocky relishes
-    // at all.
-    RelationMetadata(RelishTag),
-
-    // These are the pages exposed in the public Repository/Timeline interface.
-    Buffer(RelishTag, u32),
-}
--- a/pageserver/src/object_repository.rs
+++ b/pageserver/src/object_repository.rs
--- a/pageserver/src/object_store.rs
+++ b/pageserver/src/object_store.rs
@@ -1,92 +0,0 @@
-//! Low-level key-value storage abstraction.
-//!
-use crate::object_key::*;
-use crate::relish::*;
-use anyhow::Result;
-use std::collections::HashSet;
-use std::iter::Iterator;
-use zenith_utils::lsn::Lsn;
-use zenith_utils::zid::ZTimelineId;
-
-///
-/// Low-level storage abstraction.
-///
-/// All the data in the repository is stored in a key-value store. This trait
-/// abstracts the details of the key-value store.
-///
-/// A simple key-value store would support just GET and PUT operations with
-/// a key, but the upper layer needs slightly complicated read operations
-///
-/// The most frequently used function is 'object_versions'. It is used
-/// to look up a page version. It is LSN aware, in that the caller
-/// specifies an LSN, and the function returns all values for that
-/// block with the same or older LSN.
-///
-pub trait ObjectStore: Send + Sync {
-    ///
-    /// Store a value with given key.
-    ///
-    fn put(&self, key: &ObjectKey, lsn: Lsn, value: &[u8]) -> Result<()>;
-
-    /// Read entry with the exact given key.
-    ///
-    /// This is used for retrieving metadata with special key that doesn't
-    /// correspond to any real relation.
-    fn get(&self, key: &ObjectKey, lsn: Lsn) -> Result<Vec<u8>>;
-
-    /// Read key greater or equal than specified
-    fn get_next_key(&self, key: &ObjectKey) -> Result<Option<ObjectKey>>;
-
-    /// Iterate through all page versions of one object.
-    ///
-    /// Returns all page versions in descending LSN order, along with the LSN
-    /// of each page version.
-    fn object_versions<'a>(
-        &'a self,
-        key: &ObjectKey,
-        lsn: Lsn,
-    ) -> Result<Box<dyn Iterator<Item = (Lsn, Vec<u8>)> + 'a>>;
-
-    /// Iterate through versions of all objects in a timeline.
-    ///
-    /// Returns objects in increasing key-version order.
-    /// Returns all versions up to and including the specified LSN.
-    fn objects<'a>(
-        &'a self,
-        timeline: ZTimelineId,
-        lsn: Lsn,
-    ) -> Result<Box<dyn Iterator<Item = Result<(ObjectTag, Lsn, Vec<u8>)>> + 'a>>;
-
-    /// Iterate through all keys with given tablespace and database ID, and LSN <= 'lsn'.
-    /// Both dbnode and spcnode can be InvalidId (0) which means get all relations in tablespace/cluster
-    ///
-    /// This is used to implement 'create database'
-    fn list_rels(
-        &self,
-        timelineid: ZTimelineId,
-        spcnode: u32,
-        dbnode: u32,
-        lsn: Lsn,
-    ) -> Result<HashSet<RelTag>>;
-
-    /// Iterate through non-rel relishes
-    ///
-    /// This is used to prepare tarball for new node startup.
-    fn list_nonrels<'a>(&'a self, timelineid: ZTimelineId, lsn: Lsn) -> Result<HashSet<RelishTag>>;
-
-    /// Iterate through objects tags. If nonrel_only, then only non-relationa data is iterated.
-    ///
-    /// This is used to implement GC and preparing tarball for new node startup
-    /// Returns objects in increasing key-version order.
-    fn list_objects<'a>(
-        &'a self,
-        timelineid: ZTimelineId,
-        lsn: Lsn,
-    ) -> Result<Box<dyn Iterator<Item = ObjectTag> + 'a>>;
-
-    /// Unlink object (used by GC). This mehod may actually delete object or just mark it for deletion.
-    fn unlink(&self, key: &ObjectKey, lsn: Lsn) -> Result<()>;
-
-    // Compact storage and remove versions marged for deletion
-    fn compact(&self);
-}
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -3,9 +3,7 @@

 use crate::branches;
 use crate::layered_repository::LayeredRepository;
-use crate::object_repository::ObjectRepository;
 use crate::repository::Repository;
-use crate::rocksdb_storage::RocksObjectStore;
 use crate::walredo::PostgresRedoManager;
 use crate::{PageServerConf, RepositoryFormat};
 use anyhow::{anyhow, bail, Result};
@@ -43,16 +41,6 @@ pub fn init(conf: &'static PageServerConf) {
                LayeredRepository::launch_checkpointer_thread(conf, repo.clone());
                repo
            }
-            RepositoryFormat::RocksDb => {
-                let obj_store = RocksObjectStore::open(conf, &tenantid).unwrap();
-
-                Arc::new(ObjectRepository::new(
-                    conf,
-                    Arc::new(obj_store),
-                    Arc::new(walredo_mgr),
-                    tenantid,
-                ))
-            }
        };

        info!("initialized storage for tenant: {}", &tenantid);
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -24,13 +24,13 @@ use std::{io, net::TcpStream};
 use zenith_metrics::{register_histogram_vec, HistogramVec};
 use zenith_utils::auth::{self, JwtAuth};
 use zenith_utils::auth::{Claims, Scope};
+use zenith_utils::lsn::Lsn;
 use zenith_utils::postgres_backend::PostgresBackend;
 use zenith_utils::postgres_backend::{self, AuthType};
 use zenith_utils::pq_proto::{
    BeMessage, FeMessage, RowDescriptor, HELLO_WORLD_ROW, SINGLE_COL_ROWDESC,
 };
 use zenith_utils::zid::{ZTenantId, ZTimelineId};
-use zenith_utils::lsn::Lsn;

 use crate::basebackup;
 use crate::branches;
@@ -596,15 +596,6 @@ impl postgres_backend::Handler for PageServerHandler {
            let result = repo.gc_iteration(Some(timelineid), gc_horizon, true)?;

            pgb.write_message_noflush(&BeMessage::RowDescription(&[
-                RowDescriptor::int8_col(b"n_relations"),
-                RowDescriptor::int8_col(b"truncated"),
-                RowDescriptor::int8_col(b"deleted"),
-                RowDescriptor::int8_col(b"prep_deleted"),
-                RowDescriptor::int8_col(b"slru_deleted"),
-                RowDescriptor::int8_col(b"chkp_deleted"),
-                RowDescriptor::int8_col(b"control_deleted"),
-                RowDescriptor::int8_col(b"filenodemap_deleted"),
-                RowDescriptor::int8_col(b"dropped"),
                RowDescriptor::int8_col(b"snapshot_relfiles_total"),
                RowDescriptor::int8_col(b"snapshot_relfiles_needed_by_cutoff"),
                RowDescriptor::int8_col(b"snapshot_relfiles_needed_by_branches"),
@@ -620,15 +611,6 @@ impl postgres_backend::Handler for PageServerHandler {
                RowDescriptor::int8_col(b"elapsed"),
            ]))?
            .write_message_noflush(&BeMessage::DataRow(&[
-                Some(&result.n_relations.to_string().as_bytes()),
-                Some(&result.truncated.to_string().as_bytes()),
-                Some(&result.deleted.to_string().as_bytes()),
-                Some(&result.prep_deleted.to_string().as_bytes()),
-                Some(&result.slru_deleted.to_string().as_bytes()),
-                Some(&result.chkp_deleted.to_string().as_bytes()),
-                Some(&result.control_deleted.to_string().as_bytes()),
-                Some(&result.filenodemap_deleted.to_string().as_bytes()),
-                Some(&result.dropped.to_string().as_bytes()),
                Some(&result.snapshot_relfiles_total.to_string().as_bytes()),
                Some(
                    &result
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -55,20 +55,6 @@ pub trait Repository: Send + Sync {
 ///
 #[derive(Default)]
 pub struct GcResult {
-    // FIXME: These counters make sense for the ObjectRepository. They are not used
-    // by the LayeredRepository.
-    pub n_relations: u64,
-    pub inspected: u64,
-    pub truncated: u64,
-    pub deleted: u64,
-    pub prep_deleted: u64,        // RelishTag::Twophase
-    pub slru_deleted: u64,        // RelishTag::Slru
-    pub chkp_deleted: u64,        // RelishTag::Checkpoint
-    pub control_deleted: u64,     // RelishTag::ControlFile
-    pub filenodemap_deleted: u64, // RelishTag::FileNodeMap
-    pub dropped: u64,
-
-    // These are used for the LayeredRepository instead
    pub snapshot_relfiles_total: u64,
    pub snapshot_relfiles_needed_by_cutoff: u64,
    pub snapshot_relfiles_needed_by_branches: u64,
@@ -88,11 +74,6 @@ pub struct GcResult {

 impl AddAssign for GcResult {
    fn add_assign(&mut self, other: Self) {
-        self.n_relations += other.n_relations;
-        self.truncated += other.truncated;
-        self.deleted += other.deleted;
-        self.dropped += other.dropped;
-
        self.snapshot_relfiles_total += other.snapshot_relfiles_total;
        self.snapshot_relfiles_needed_by_cutoff += other.snapshot_relfiles_needed_by_cutoff;
        self.snapshot_relfiles_needed_by_branches += other.snapshot_relfiles_needed_by_branches;
@@ -241,8 +222,6 @@ impl WALRecord {
 mod tests {
    use super::*;
    use crate::layered_repository::LayeredRepository;
-    use crate::object_repository::ObjectRepository;
-    use crate::rocksdb_storage::RocksObjectStore;
    use crate::walredo::{WalRedoError, WalRedoManager};
    use crate::{PageServerConf, RepositoryFormat};
    use postgres_ffi::pg_constants;
@@ -279,10 +258,7 @@ mod tests {

    static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]);

-    fn get_test_repo(
-        test_name: &str,
-        repository_format: RepositoryFormat,
-    ) -> Result<Box<dyn Repository>> {
+    fn get_test_repo(test_name: &str) -> Result<Box<dyn Repository>> {
        let repo_dir = PathBuf::from(format!("../tmp_check/test_{}", test_name));
        let _ = fs::remove_dir_all(&repo_dir);
        fs::create_dir_all(&repo_dir)?;
@@ -299,7 +275,7 @@ mod tests {
            pg_distrib_dir: "".into(),
            auth_type: AuthType::Trust,
            auth_validation_public_key_path: None,
-            repository_format,
+            repository_format: RepositoryFormat::Layered,
        };
        // Make a static copy of the config. This can never be free'd, but that's
        // OK in a test.
@@ -315,35 +291,14 @@ mod tests {
                Arc::new(walredo_mgr),
                tenantid,
            )),
-            RepositoryFormat::RocksDb => {
-                let obj_store = RocksObjectStore::create(conf, &tenantid)?;
-
-                Box::new(ObjectRepository::new(
-                    conf,
-                    Arc::new(obj_store),
-                    Arc::new(walredo_mgr),
-                    tenantid,
-                ))
-            }
        };

        Ok(repo)
    }

-    /// Test get_relsize() and truncation.
    #[test]
-    fn test_relsize_rocksdb() -> Result<()> {
-        let repo = get_test_repo("test_relsize_rocksdb", RepositoryFormat::RocksDb)?;
-        test_relsize(&*repo)
-    }
-
-    #[test]
-    fn test_relsize_layered() -> Result<()> {
-        let repo = get_test_repo("test_relsize_layered", RepositoryFormat::Layered)?;
-        test_relsize(&*repo)
-    }
-
-    fn test_relsize(repo: &dyn Repository) -> Result<()> {
+    fn test_relsize() -> Result<()> {
+        let repo = get_test_repo("test_relsize")?;
        // get_timeline() with non-existent timeline id should fail
        //repo.get_timeline("11223344556677881122334455667788");

@@ -428,22 +383,9 @@ mod tests {

    /// Test get_relsize() and truncation with a file larger than 1 GB, so that it's
    /// split into multiple 1 GB segments in Postgres.
-    ///
-    /// This isn't very interesting with the RocksDb implementation, as we don't pay
-    /// any attention to Postgres segment boundaries there.
    #[test]
-    fn test_large_rel_rocksdb() -> Result<()> {
-        let repo = get_test_repo("test_large_rel_rocksdb", RepositoryFormat::RocksDb)?;
-        test_large_rel(&*repo)
-    }
-
-    #[test]
-    fn test_large_rel_layered() -> Result<()> {
-        let repo = get_test_repo("test_large_rel_layered", RepositoryFormat::Layered)?;
-        test_large_rel(&*repo)
-    }
-
-    fn test_large_rel(repo: &dyn Repository) -> Result<()> {
+    fn test_large_rel() -> Result<()> {
+        let repo = get_test_repo("test_large_rel")?;
        let timelineid = ZTimelineId::from_str("11223344556677881122334455667788").unwrap();
        let tline = repo.create_empty_timeline(timelineid, Lsn(0))?;

@@ -498,22 +440,12 @@ mod tests {
        Ok(())
    }

-    #[test]
-    fn test_branch_rocksdb() -> Result<()> {
-        let repo = get_test_repo("test_branch_rocksdb", RepositoryFormat::RocksDb)?;
-        test_branch(&*repo)
-    }
-
-    #[test]
-    fn test_branch_layered() -> Result<()> {
-        let repo = get_test_repo("test_branch_layered", RepositoryFormat::Layered)?;
-        test_branch(&*repo)
-    }
-
    ///
    /// Test branch creation
    ///
-    fn test_branch(repo: &dyn Repository) -> Result<()> {
+    #[test]
+    fn test_branch() -> Result<()> {
+        let repo = get_test_repo("test_branch")?;
        let timelineid = ZTimelineId::from_str("11223344556677881122334455667788").unwrap();
        let tline = repo.create_empty_timeline(timelineid, Lsn(0))?;

--- a/pageserver/src/rocksdb_storage.rs
+++ b/pageserver/src/rocksdb_storage.rs
@@ -1,475 +0,0 @@
-//!
-//! An implementation of the ObjectStore interface, backed by RocksDB
-//!
-use crate::object_key::*;
-use crate::object_store::ObjectStore;
-use crate::relish::*;
-use crate::PageServerConf;
-use anyhow::{bail, Result};
-use serde::{Deserialize, Serialize};
-use std::collections::HashSet;
-use std::sync::{Arc, Mutex};
-use zenith_utils::bin_ser::BeSer;
-use zenith_utils::lsn::Lsn;
-use zenith_utils::zid::ZTenantId;
-use zenith_utils::zid::ZTimelineId;
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-struct StorageKey {
-    obj_key: ObjectKey,
-    lsn: Lsn,
-}
-
-impl StorageKey {
-    /// The first key for a given timeline
-    fn timeline_start(timeline: ZTimelineId) -> Self {
-        Self {
-            obj_key: ObjectKey {
-                timeline,
-                tag: ObjectTag::TimelineMetadataTag,
-            },
-            lsn: Lsn(0),
-        }
-    }
-}
-
-///
-/// RocksDB very inefficiently delete random record. Instead of it we have to use merge
-/// filter, which allows to throw away records at LSM merge phase.
-/// Unfortunately, it is hard (if ever possible)  to determine whether version can be removed
-/// at merge time. Version ca be removed if:
-/// 1. It is above PITR horizon (we need to get current LSN and gc_horizon from config)
-/// 2. Page is reconstructed at horizon (all WAL records above horizon are applied and can be removed)
-///
-/// So we have GC process which reconstructs pages at horizon and mark deteriorated WAL record
-/// for deletion. To mark object for deletion we can either set some flag in object itself.
-/// But it is complicated with new object value format, because RocksDB storage knows nothing about
-/// this format. Also updating whole record just to set one bit seems to be inefficient in any case.
-/// This is why we keep keys of marked for deletion versions in HashSet in memory.
-/// When LSM merge filter found key in this map, it removes it from the set preventing memory overflow.
-///
-struct GarbageCollector {
-    garbage: Mutex<HashSet<Vec<u8>>>,
-}
-
-impl GarbageCollector {
-    fn new() -> GarbageCollector {
-        GarbageCollector {
-            garbage: Mutex::new(HashSet::new()),
-        }
-    }
-
-    /// Called by GC to mark version as delete
-    fn mark_for_deletion(&self, key: &[u8]) {
-        let mut garbage = self.garbage.lock().unwrap();
-        garbage.insert(key.to_vec());
-    }
-
-    /// Called by LSM merge filter. If it finds key in the set, then
-    /// it doesn't merge it and removes from this set.
-    fn was_deleted(&self, key: &[u8]) -> bool {
-        let key = key.to_vec();
-        let mut garbage = self.garbage.lock().unwrap();
-        garbage.remove(&key)
-    }
-}
-
-pub struct RocksObjectStore {
-    _conf: &'static PageServerConf,
-
-    // RocksDB handle
-    db: rocksdb::DB,
-    gc: Arc<GarbageCollector>,
-}
-
-impl ObjectStore for RocksObjectStore {
-    fn get(&self, key: &ObjectKey, lsn: Lsn) -> Result<Vec<u8>> {
-        let val = self.db.get(StorageKey::ser(&StorageKey {
-            obj_key: key.clone(),
-            lsn,
-        })?)?;
-        if let Some(val) = val {
-            Ok(val)
-        } else {
-            bail!("could not find page {:?}", key);
-        }
-    }
-
-    fn get_next_key(&self, key: &ObjectKey) -> Result<Option<ObjectKey>> {
-        let mut iter = self.db.raw_iterator();
-        let search_key = StorageKey {
-            obj_key: key.clone(),
-            lsn: Lsn(0),
-        };
-        iter.seek(search_key.ser()?);
-        if !iter.valid() {
-            Ok(None)
-        } else {
-            let key = StorageKey::des(iter.key().unwrap())?;
-            Ok(Some(key.obj_key.clone()))
-        }
-    }
-
-    fn put(&self, key: &ObjectKey, lsn: Lsn, value: &[u8]) -> Result<()> {
-        self.db.put(
-            StorageKey::ser(&StorageKey {
-                obj_key: key.clone(),
-                lsn,
-            })?,
-            value,
-        )?;
-        Ok(())
-    }
-
-    fn unlink(&self, key: &ObjectKey, lsn: Lsn) -> Result<()> {
-        self.gc.mark_for_deletion(&StorageKey::ser(&StorageKey {
-            obj_key: key.clone(),
-            lsn,
-        })?);
-        Ok(())
-    }
-
-    /// Iterate through page versions of given page, starting from the given LSN.
-    /// The versions are walked in descending LSN order.
-    fn object_versions<'a>(
-        &'a self,
-        key: &ObjectKey,
-        lsn: Lsn,
-    ) -> Result<Box<dyn Iterator<Item = (Lsn, Vec<u8>)> + 'a>> {
-        let iter = RocksObjectVersionIter::new(&self.db, key, lsn)?;
-        Ok(Box::new(iter))
-    }
-
-    /// Iterate through all timeline objects
-    fn list_objects<'a>(
-        &'a self,
-        timeline: ZTimelineId,
-        lsn: Lsn,
-    ) -> Result<Box<dyn Iterator<Item = ObjectTag> + 'a>> {
-        let iter = RocksObjectIter::new(&self.db, timeline, lsn)?;
-        Ok(Box::new(iter))
-    }
-
-    /// Get a list of all distinct relations in given tablespace and database.
-    ///
-    /// TODO: This implementation is very inefficient, it scans
-    /// through all entries in the given database. In practice, this
-    /// is used for CREATE DATABASE, and usually the template database is small.
-    /// But if it's not, this will be slow.
-    fn list_rels(
-        &self,
-        timelineid: ZTimelineId,
-        spcnode: u32,
-        dbnode: u32,
-        lsn: Lsn,
-    ) -> Result<HashSet<RelTag>> {
-        // FIXME: This scans everything. Very slow
-
-        let mut rels: HashSet<RelTag> = HashSet::new();
-
-        let mut search_rel_tag = RelTag {
-            spcnode,
-            dbnode,
-            relnode: 0,
-            forknum: 0u8,
-        };
-        let mut iter = self.db.raw_iterator();
-        loop {
-            let search_key = StorageKey {
-                obj_key: ObjectKey {
-                    timeline: timelineid,
-                    tag: ObjectTag::RelationMetadata(RelishTag::Relation(search_rel_tag)),
-                },
-                lsn: Lsn(0),
-            };
-            iter.seek(search_key.ser()?);
-            if !iter.valid() {
-                break;
-            }
-            let key = StorageKey::des(iter.key().unwrap())?;
-
-            if let ObjectTag::RelationMetadata(RelishTag::Relation(rel_tag)) = key.obj_key.tag {
-                if spcnode != 0 && rel_tag.spcnode != spcnode
-                    || dbnode != 0 && rel_tag.dbnode != dbnode
-                {
-                    break;
-                }
-                if key.lsn <= lsn {
-                    // visible in this snapshot
-                    rels.insert(rel_tag);
-                }
-                search_rel_tag = rel_tag;
-                // skip to next relation
-                // FIXME: What if relnode is u32::MAX ?
-                search_rel_tag.relnode += 1;
-            } else {
-                // no more relation metadata entries
-                break;
-            }
-        }
-
-        Ok(rels)
-    }
-
-    /// Get a list of all distinct NON-relations in timeline
-    /// that are visible at given lsn.
-    ///
-    /// TODO: This implementation is very inefficient, it scans
-    /// through all non-rel page versions in the system. In practice, this
-    /// is used when initializing a new compute node, and the non-rel files
-    /// are never very large nor change very frequently, so this will do for now.
-    fn list_nonrels(&self, timelineid: ZTimelineId, lsn: Lsn) -> Result<HashSet<RelishTag>> {
-        let mut rels: HashSet<RelishTag> = HashSet::new();
-
-        let search_key = StorageKey {
-            obj_key: ObjectKey {
-                timeline: timelineid,
-                tag: ObjectTag::Buffer(FIRST_NONREL_RELISH_TAG, 0),
-            },
-            lsn: Lsn(0),
-        };
-
-        let mut iter = self.db.raw_iterator();
-        iter.seek(search_key.ser()?);
-        while iter.valid() {
-            let key = StorageKey::des(iter.key().unwrap())?;
-
-            if key.obj_key.timeline != timelineid {
-                // reached end of this timeline in the store
-                break;
-            }
-
-            if let ObjectTag::Buffer(rel_tag, _blknum) = key.obj_key.tag {
-                if key.lsn <= lsn {
-                    // visible in this snapshot
-                    rels.insert(rel_tag);
-                }
-            }
-            // TODO: we could skip to next relation here like we do in list_rels(),
-            // but hopefully there are not that many SLRU segments or other non-rel
-            // entries for it to matter.
-            iter.next();
-        }
-
-        Ok(rels)
-    }
-
-    /// Iterate through versions of all objects in a timeline.
-    ///
-    /// Returns objects in increasing key-version order.
-    /// Returns all versions up to and including the specified LSN.
-    fn objects<'a>(
-        &'a self,
-        timeline: ZTimelineId,
-        lsn: Lsn,
-    ) -> Result<Box<dyn Iterator<Item = Result<(ObjectTag, Lsn, Vec<u8>)>> + 'a>> {
-        let start_key = StorageKey::timeline_start(timeline);
-        let start_key_bytes = StorageKey::ser(&start_key)?;
-        let iter = self.db.iterator(rocksdb::IteratorMode::From(
-            &start_key_bytes,
-            rocksdb::Direction::Forward,
-        ));
-
-        Ok(Box::new(RocksObjects {
-            iter,
-            timeline,
-            lsn,
-        }))
-    }
-
-    fn compact(&self) {
-        self.db.compact_range::<&[u8], &[u8]>(None, None);
-    }
-}
-
-impl RocksObjectStore {
-    /// Open a RocksDB database.
-    pub fn open(conf: &'static PageServerConf, tenantid: &ZTenantId) -> Result<RocksObjectStore> {
-        let opts = Self::get_rocksdb_opts();
-        let obj_store = Self::new(conf, opts, tenantid)?;
-        Ok(obj_store)
-    }
-
-    /// Create a new, empty RocksDB database.
-    pub fn create(conf: &'static PageServerConf, tenantid: &ZTenantId) -> Result<RocksObjectStore> {
-        let path = conf.tenant_path(&tenantid).join("rocksdb-storage");
-        std::fs::create_dir(&path)?;
-
-        let mut opts = Self::get_rocksdb_opts();
-        opts.create_if_missing(true);
-        opts.set_error_if_exists(true);
-        let obj_store = Self::new(conf, opts, tenantid)?;
-        Ok(obj_store)
-    }
-
-    fn new(
-        conf: &'static PageServerConf,
-        mut opts: rocksdb::Options,
-        tenantid: &ZTenantId,
-    ) -> Result<RocksObjectStore> {
-        let path = conf.tenant_path(&tenantid).join("rocksdb-storage");
-        let gc = Arc::new(GarbageCollector::new());
-        let gc_ref = gc.clone();
-        opts.set_compaction_filter("ttl", move |_level: u32, key: &[u8], _val: &[u8]| {
-            if gc_ref.was_deleted(key) {
-                rocksdb::compaction_filter::Decision::Remove
-            } else {
-                rocksdb::compaction_filter::Decision::Keep
-            }
-        });
-        let db = rocksdb::DB::open(&opts, &path)?;
-        let obj_store = RocksObjectStore {
-            _conf: conf,
-            db,
-            gc,
-        };
-        Ok(obj_store)
-    }
-
-    /// common options used by `open` and `create`
-    fn get_rocksdb_opts() -> rocksdb::Options {
-        let mut opts = rocksdb::Options::default();
-        opts.set_use_fsync(true);
-        opts.set_compression_type(rocksdb::DBCompressionType::Lz4);
-        opts
-    }
-}
-
-///
-/// Iterator for `object_versions`. Returns all page versions of a given block, in
-/// reverse LSN order.
-///
-struct RocksObjectVersionIter<'a> {
-    obj_key: ObjectKey,
-    dbiter: rocksdb::DBRawIterator<'a>,
-    first_call: bool,
-}
-impl<'a> RocksObjectVersionIter<'a> {
-    fn new(
-        db: &'a rocksdb::DB,
-        obj_key: &ObjectKey,
-        lsn: Lsn,
-    ) -> Result<RocksObjectVersionIter<'a>> {
-        let key = StorageKey {
-            obj_key: obj_key.clone(),
-            lsn,
-        };
-        let mut dbiter = db.raw_iterator();
-        dbiter.seek_for_prev(StorageKey::ser(&key)?); // locate last entry
-        Ok(RocksObjectVersionIter {
-            first_call: true,
-            obj_key: obj_key.clone(),
-            dbiter,
-        })
-    }
-}
-impl<'a> Iterator for RocksObjectVersionIter<'a> {
-    type Item = (Lsn, Vec<u8>);
-
-    fn next(&mut self) -> std::option::Option<Self::Item> {
-        if self.first_call {
-            self.first_call = false;
-        } else {
-            self.dbiter.prev(); // walk backwards
-        }
-
-        if !self.dbiter.valid() {
-            return None;
-        }
-        let key = StorageKey::des(self.dbiter.key().unwrap()).unwrap();
-        if key.obj_key.tag != self.obj_key.tag {
-            return None;
-        }
-        let val = self.dbiter.value().unwrap();
-        let result = val.to_vec();
-
-        Some((key.lsn, result))
-    }
-}
-
-struct RocksObjects<'r> {
-    iter: rocksdb::DBIterator<'r>,
-    timeline: ZTimelineId,
-    lsn: Lsn,
-}
-
-impl<'r> Iterator for RocksObjects<'r> {
-    // TODO consider returning Box<[u8]>
-    type Item = Result<(ObjectTag, Lsn, Vec<u8>)>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.next_result().transpose()
-    }
-}
-
-impl<'r> RocksObjects<'r> {
-    fn next_result(&mut self) -> Result<Option<(ObjectTag, Lsn, Vec<u8>)>> {
-        for (key_bytes, v) in &mut self.iter {
-            let key = StorageKey::des(&key_bytes)?;
-
-            if key.obj_key.timeline != self.timeline {
-                return Ok(None);
-            }
-
-            if key.lsn > self.lsn {
-                // TODO can speed up by seeking iterator
-                continue;
-            }
-
-            return Ok(Some((key.obj_key.tag, key.lsn, v.to_vec())));
-        }
-
-        Ok(None)
-    }
-}
-
-///
-/// Iterator for `list_objects`. Returns all objects preceeding specified LSN
-///
-struct RocksObjectIter<'a> {
-    timeline: ZTimelineId,
-    key: StorageKey,
-    lsn: Lsn,
-    dbiter: rocksdb::DBRawIterator<'a>,
-}
-impl<'a> RocksObjectIter<'a> {
-    fn new(db: &'a rocksdb::DB, timeline: ZTimelineId, lsn: Lsn) -> Result<RocksObjectIter<'a>> {
-        let key = StorageKey {
-            obj_key: ObjectKey {
-                timeline,
-                tag: ObjectTag::FirstTag,
-            },
-            lsn: Lsn(0),
-        };
-        let dbiter = db.raw_iterator();
-        Ok(RocksObjectIter {
-            key,
-            timeline,
-            lsn,
-            dbiter,
-        })
-    }
-}
-impl<'a> Iterator for RocksObjectIter<'a> {
-    type Item = ObjectTag;
-
-    fn next(&mut self) -> std::option::Option<Self::Item> {
-        loop {
-            self.dbiter.seek(StorageKey::ser(&self.key).unwrap());
-            if !self.dbiter.valid() {
-                return None;
-            }
-            let key = StorageKey::des(self.dbiter.key().unwrap()).unwrap();
-            if key.obj_key.timeline != self.timeline {
-                // End of this timeline
-                return None;
-            }
-            self.key = key.clone();
-            self.key.lsn = Lsn(u64::MAX); // next seek should skip all versions
-            if key.lsn <= self.lsn {
-                // visible in this snapshot
-                return Some(key.obj_key.tag);
-            }
-        }
-    }
-}
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -8,7 +8,7 @@ use crate::page_cache;
 use crate::relish::*;
 use crate::restore_local_repo;
 use crate::waldecoder::*;
-use crate::{PageServerConf, RepositoryFormat};
+use crate::PageServerConf;
 use anyhow::{Error, Result};
 use lazy_static::lazy_static;
 use log::*;
@@ -264,12 +264,6 @@ fn walreceiver_main(
                    )?;

                    if newest_segno - oldest_segno >= 10 {
-                        // FIXME: The layered repository performs checkpointing in a separate thread, so this
-                        // isn't needed anymore. Remove 'checkpoint' from the Timeline trait altogether?
-                        if conf.repository_format == RepositoryFormat::RocksDb {
-                            timeline.checkpoint()?;
-                        }
-
                        // TODO: This is where we could remove WAL older than last_rec_lsn.
                        //remove_wal_files(timelineid, pg_constants::WAL_SEGMENT_SIZE, last_rec_lsn)?;
                    }
--- a/test_runner/batch_others/test_gc.py
+++ b/test_runner/batch_others/test_gc.py
@@ -1,105 +0,0 @@
-import pytest
-
-from contextlib import closing
-from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
-import psycopg2.extras
-
-pytest_plugins = ("fixtures.zenith_fixtures")
-
-#
-# Test Garbage Collection of old page versions.
-#
-# This test is pretty tightly coupled with the current implementation of page version storage
-# and garbage collection in object_repository.rs.
-#
-@pytest.mark.skip(reason=""""
-    Current GC test is flaky and overly strict. Since we are migrating to the layered repo format
-    with different GC implementation let's just silence this test for now. This test only
-    works with the RocksDB implementation.
-""")
-def test_gc(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
-    zenith_cli.run(["branch", "test_gc", "empty"])
-    pg = postgres.create_start('test_gc')
-
-    with closing(pg.connect()) as conn:
-        with conn.cursor() as cur:
-            with closing(pageserver.connect()) as psconn:
-                with psconn.cursor(cursor_factory = psycopg2.extras.DictCursor) as pscur:
-
-                    # Get the timeline ID of our branch. We need it for the 'do_gc' command
-                    cur.execute("SHOW zenith.zenith_timeline")
-                    timeline = cur.fetchone()[0]
-
-                    # Create a test table
-                    cur.execute("CREATE TABLE foo(x integer)")
-
-                    # Run GC, to clear out any old page versions left behind in the catalogs by
-                    # the CREATE TABLE command. We want to have a clean slate with no garbage
-                    # before running the actual tests below, otherwise the counts won't match
-                    # what we expect.
-                    print("Running GC before test")
-                    pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
-                    row = pscur.fetchone()
-                    print("GC duration {elapsed} ms, relations: {n_relations}, dropped {dropped}, truncated: {truncated}, deleted: {deleted}".format_map(row))
-                    # remember the number of relations
-                    n_relations = row['n_relations']
-                    assert n_relations > 0
-
-                    # Insert a row. The first insert will also create a metadata entry for the
-                    # relation, with size == 1 block. Hence, bump up the expected relation count.
-                    n_relations += 1
-                    print("Inserting one row and running GC")
-                    cur.execute("INSERT INTO foo VALUES (1)")
-                    pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
-                    row = pscur.fetchone()
-                    print("GC duration {elapsed} ms, relations: {n_relations}, dropped {dropped}, truncated: {truncated}, deleted: {deleted}".format_map(row))
-                    assert row['n_relations'] == n_relations
-                    assert row['dropped'] == 0
-                    assert row['truncated'] == 31
-                    assert row['deleted'] == 4
-
-                    # Insert two more rows and run GC.
-                    print("Inserting two more rows and running GC")
-                    cur.execute("INSERT INTO foo VALUES (2)")
-                    cur.execute("INSERT INTO foo VALUES (3)")
-
-                    pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
-                    row = pscur.fetchone()
-                    print("GC duration {elapsed} ms, relations: {n_relations}, dropped {dropped}, truncated: {truncated}, deleted: {deleted}".format_map(row))
-                    assert row['n_relations'] == n_relations
-                    assert row['dropped'] == 0
-                    assert row['truncated'] == 31
-                    assert row['deleted'] == 4
-
-                    # Insert one more row. It creates one more page version, but doesn't affect the
-                    # relation size.
-                    print("Inserting one more row")
-                    cur.execute("INSERT INTO foo VALUES (3)")
-
-                    pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
-                    row = pscur.fetchone()
-                    print("GC duration {elapsed} ms, relations: {n_relations}, dropped {dropped}, truncated: {truncated}, deleted: {deleted}".format_map(row))
-                    assert row['n_relations'] == n_relations
-                    assert row['dropped'] == 0
-                    assert row['truncated'] == 31
-                    assert row['deleted'] == 2
-
-                    # Run GC again, with no changes in the database. Should not remove anything.
-                    pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
-                    row = pscur.fetchone()
-                    print("GC duration {elapsed} ms, relations: {n_relations}, dropped {dropped}, truncated: {truncated}, deleted: {deleted}".format_map(row))
-                    assert row['n_relations'] == n_relations
-                    assert row['dropped'] == 0
-                    assert row['truncated'] == 31
-                    assert row['deleted'] == 0
-
-                    #
-                    # Test DROP TABLE checks that relation data and metadata was deleted by GC from object storage
-                    #
-                    cur.execute("DROP TABLE foo")
-
-                    pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
-                    row = pscur.fetchone()
-                    print("GC duration {elapsed} ms, relations: {n_relations}, dropped {dropped}, truncated: {truncated}, deleted: {deleted}".format_map(row))
-                    # Each relation fork is counted separately, hence 3.
-                    assert row['dropped'] == 3
--- a/zenith/src/main.rs
+++ b/zenith/src/main.rs
@@ -67,7 +67,7 @@ fn main() -> Result<()> {
                        .long("repository-format")
                        .takes_value(false)
                        .value_name("repository-format")
-                        .help("Choose repository format, 'layered' or 'rocksdb'")
+                        .help("Choose repository format, only 'layered' supported currently")
                ),
        )
        .subcommand(