From 04fb0a034202256daa5b8aebe6853e0b5309e1f4 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 8 Oct 2021 11:50:55 +0300 Subject: [PATCH] Add core relish backup and restore functionality --- Cargo.lock | 426 ++--- pageserver/Cargo.toml | 3 +- pageserver/README | 48 +- pageserver/src/layered_repository.rs | 101 +- .../src/layered_repository/delta_layer.rs | 7 +- pageserver/src/lib.rs | 5 +- pageserver/src/relish_storage.rs | 302 +++- pageserver/src/relish_storage/README.md | 82 + pageserver/src/relish_storage/local_fs.rs | 425 ++++- pageserver/src/relish_storage/rust_s3.rs | 278 ++- pageserver/src/relish_storage/storage_sync.rs | 1559 +++++++++++++++++ .../src/relish_storage/synced_storage.rs | 57 - pageserver/src/repository.rs | 193 +- pageserver/src/tenant_mgr.rs | 12 +- 14 files changed, 3005 insertions(+), 493 deletions(-) create mode 100644 pageserver/src/relish_storage/README.md create mode 100644 pageserver/src/relish_storage/storage_sync.rs delete mode 100644 pageserver/src/relish_storage/synced_storage.rs diff --git a/Cargo.lock b/Cargo.lock index 5f36f48966..8f7967ad0c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,15 +37,15 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.42" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595d3cfa7a60d4555cb5067b99f07142a08ea778de5cf993f7b75c7d8fabc486" +checksum = "61604a8f862e1d5c3229fdd78f8b02c68dcf73a4c4b05fd636d12240aaa242c1" [[package]] name = "async-trait" -version = "0.1.50" +version = "0.1.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b98e84bbb4cbcdd97da190ba0c58a1bb0de2c1fdf67d159e192ed766aeca722" +checksum = "44318e776df68115a881de9a8fd1b9e53368d7a4a5ce4cc48517da3393233a5e" dependencies = [ "proc-macro2", "quote", @@ -54,9 +54,9 @@ dependencies = [ [[package]] name = "attohttpc" -version = "0.16.3" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb8867f378f33f78a811a8eb9bf108ad99430d7aad43315dd9319c827ef6247" +checksum = "9a8bda305457262b339322106c776e3fd21df860018e566eb6a5b1aa4b6ae02d" dependencies = [ "http", "log", @@ -111,9 +111,9 @@ dependencies = [ [[package]] name = "aws-creds" -version = "0.26.0" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1331d069460a674d42bd27c12b47ce578f789954c7bd7f239fd030771eca6616" +checksum = "a5e1c8f64305d3f3096cb247983a3cae16f8c2960129699bcb70639e31180794" dependencies = [ "anyhow", "attohttpc", @@ -225,9 +225,9 @@ checksum = "5988cb1d626264ac94100be357308f29ff7cbdd3b36bda27f450a4ee3f713426" [[package]] name = "bumpalo" -version = "3.7.0" +version = "3.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c59e7af012c713f529e7a3ee57ce9b31ddd858d4b512923602f74608b009631" +checksum = "d9df67f7bf9ef8498769f994239c45613ef0c5899415fb58e9add412d2c1a538" [[package]] name = "byteorder" @@ -237,18 +237,18 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bytes" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b700ce4376041dcd0a327fd0097c41095743c4c8af8887265942faf1100bd040" +checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" dependencies = [ "serde", ] [[package]] name = "cc" -version = "1.0.69" +version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2" +checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd" [[package]] name = "cexpr" @@ -259,12 +259,6 @@ dependencies = [ "nom", ] -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - [[package]] name = "cfg-if" version = "1.0.0" @@ -286,9 +280,9 @@ dependencies = [ [[package]] name = "clang-sys" -version = "1.2.0" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "853eda514c284c2287f4bf20ae614f8781f40a81d32ecda6e91449304dfe077c" +checksum = "10612c0ec0e0a1ff0e97980647cb058a6e7aedb913d01d009c406b8b7d0b26ee" dependencies = [ "glob", "libc", @@ -312,18 +306,18 @@ dependencies = [ [[package]] name = "const_format" -version = "0.2.21" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4556f63e28a78fa5e6f310cfea5647a25636def49a338ab69e33b34a3382057b" +checksum = "22bc6cd49b0ec407b680c3e380182b6ac63b73991cb7602de350352fc309b614" dependencies = [ "const_format_proc_macros", ] [[package]] name = "const_format_proc_macros" -version = "0.2.21" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552782506c398da94466b364973b563887e0ca078bf33a76d4163736165e3594" +checksum = "ef196d5d972878a48da7decb7686eded338b4858fbabeed513d63a7c98b2b82d" dependencies = [ "proc-macro2", "quote", @@ -358,9 +352,9 @@ dependencies = [ [[package]] name = "core-foundation" -version = "0.9.1" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a89e2ae426ea83155dccf10c0fa6b1463ef6d5fcb44cee0b224a408fa640a62" +checksum = "6888e10551bb93e424d8df1d07f1a8b4fceb0001a3a4b048bfc47554946f47b3" dependencies = [ "core-foundation-sys", "libc", @@ -368,15 +362,15 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" [[package]] name = "cpufeatures" -version = "0.1.5" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66c99696f6c9dd7f35d486b9d04d7e6e202aa3e8c40d553f2fdf5e7e0c6a71ef" +checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469" dependencies = [ "libc", ] @@ -392,9 +386,19 @@ dependencies = [ [[package]] name = "crypto-mac" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4857fd85a0c34b3c3297875b747c1e02e06b6a0ea32dd892d8192b9ce0813ea6" +checksum = "bff07008ec701e8028e2ceb8f83f0e4274ee62bd2dbdc4fefff2e9a91824081a" +dependencies = [ + "generic-array", + "subtle", +] + +[[package]] +name = "crypto-mac" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1d1a86f49236c215f271d40892d5fc950490551400b02ef360692c29815c714" dependencies = [ "generic-array", "subtle", @@ -421,9 +425,9 @@ dependencies = [ [[package]] name = "dirs" -version = "3.0.2" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309" +checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" dependencies = [ "dirs-sys", ] @@ -454,7 +458,7 @@ version = "0.8.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -478,11 +482,11 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "filetime" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8" +checksum = "975ccf83d8d9d0d84682850a38c8169027be83368805971cc4f238c2b245bc98" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "redox_syscall", "winapi", @@ -537,9 +541,9 @@ checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" [[package]] name = "futures" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7e43a803dae2fa37c1f6a8fe121e1f7bf9548b4dfc0522a42f34145dadfc27" +checksum = "a12aa0eb539080d55c3f2d45a67c3b58b6b0773c1a3ca2dfec66d58c97fd66ca" dependencies = [ "futures-channel", "futures-core", @@ -552,9 +556,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e682a68b29a882df0545c143dc3646daefe80ba479bcdede94d5a703de2871e2" +checksum = "5da6ba8c3bb3c165d3c7319fc1cc8304facf1fb8db99c5de877183c08a273888" dependencies = [ "futures-core", "futures-sink", @@ -562,15 +566,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0402f765d8a89a26043b889b26ce3c4679d268fa6bb22cd7c6aad98340e179d1" +checksum = "88d1c26957f23603395cd326b0ffe64124b818f4449552f960d815cfba83a53d" [[package]] name = "futures-executor" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "badaa6a909fac9e7236d0620a2f57f7664640c56575b71a7552fbd68deafab79" +checksum = "45025be030969d763025784f7f355043dc6bc74093e4ecc5000ca4dc50d8745c" dependencies = [ "futures-core", "futures-task", @@ -579,15 +583,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acc499defb3b348f8d8f3f66415835a9131856ff7714bf10dadfc4ec4bdb29a1" +checksum = "522de2a0fe3e380f1bc577ba0474108faf3f6b18321dbf60b3b9c39a75073377" [[package]] name = "futures-macro" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c40298486cdf52cc00cd6d6987892ba502c7656a16a4192a9992b1ccedd121" +checksum = "18e4a4b95cea4b4ccbcf1c5675ca7c4ee4e9e75eb79944d07defde18068f79bb" dependencies = [ "autocfg", "proc-macro-hack", @@ -598,21 +602,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a57bead0ceff0d6dde8f465ecd96c9338121bb7717d3e7b108059531870c4282" +checksum = "36ea153c13024fe480590b3e3d4cad89a0cfacecc24577b68f86c6ced9c2bc11" [[package]] name = "futures-task" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a16bef9fc1a4dddb5bee51c989e3fbba26569cbb0e31f5b303c184e3dd33dae" +checksum = "1d3d00f4eddb73e498a54394f228cd55853bdf059259e8e7bc6e69d408892e99" [[package]] name = "futures-util" -version = "0.3.15" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "feb5c238d27e2bf94ffdfd27b2c29e3df4a68c4193bb6427384259e2bf191967" +checksum = "36568465210a3a6ee45e1f165136d68671471a501e632e9a98d96872222b5481" dependencies = [ "autocfg", "futures-channel", @@ -645,7 +649,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "wasi", ] @@ -658,9 +662,9 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "h2" -version = "0.3.3" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "825343c4eef0b63f541f8903f395dc5beb362a979b5799a84062527ef1e37726" +checksum = "6c06815895acec637cd6ed6e9662c935b866d20a106f8361892893a7d9234964" dependencies = [ "bytes", "fnv", @@ -677,9 +681,9 @@ dependencies = [ [[package]] name = "half" -version = "1.7.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3" +checksum = "ac5956d4e63858efaec57e0d6c1c2f6a41e1487f830314a324ccd7e2223a7ca0" [[package]] name = "hashbrown" @@ -716,9 +720,9 @@ dependencies = [ [[package]] name = "hex-literal" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76505e26b6ca3bbdbbb360b68472abbb80998c5fa5dc43672eca34f28258e138" +checksum = "21e4590e13640f19f249fe3e4eca5113bc4289f2497710378190e7f4bd96f45b" [[package]] name = "hmac" @@ -726,15 +730,25 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1441c6b1e930e2817404b5046f1f989899143a12bf92de603b69f4e0aee1e15" dependencies = [ - "crypto-mac", + "crypto-mac 0.10.1", + "digest", +] + +[[package]] +name = "hmac" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2a2320eb7ec0ebe8da8f744d7812d9fc4cb4d09344ac01898dbcb6a20ae69b" +dependencies = [ + "crypto-mac 0.11.1", "digest", ] [[package]] name = "http" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "527e8c9ac747e28542699a951517aa9a6945af506cd1f2e1b53a576c17b6cc11" +checksum = "1323096b05d41827dadeaee54c9981958c0f94e670bc94ed80037d1a7b8b186b" dependencies = [ "bytes", "fnv", @@ -743,9 +757,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60daa14be0e0786db0f03a9e57cb404c9d756eed2b6c62b9ea98ec5743ec75a9" +checksum = "399c583b2979440c60be0821a6199eca73bc3c8dcd9d070d75ac726e2c6186e5" dependencies = [ "bytes", "http", @@ -754,9 +768,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.4.1" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a87b616e37e93c22fb19bcd386f02f3af5ea98a25670ad0fce773de23c5e68" +checksum = "acd94fdbe1d4ff688b67b04eee2e17bd50995534a61539e45adfefb45e5e5503" [[package]] name = "httpdate" @@ -772,9 +786,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.10" +version = "0.14.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7728a72c4c7d72665fde02204bcbd93b247721025b222ef78606f14513e0fd03" +checksum = "15d1cfb9e4f68655fa04c01f59edb405b6074a0f7118ea881e5026e4a1cd8593" dependencies = [ "bytes", "futures-channel", @@ -830,11 +844,11 @@ dependencies = [ [[package]] name = "instant" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bee0328b1209d157ef001c94dd85b4f8f64139adb0eac2659f4b08382b2f474d" +checksum = "716d3d89f35ac6a34fd0eed635395f4c3b76fa889338a4632e5231a8684216bd" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -845,15 +859,15 @@ checksum = "68f2d64f2edebec4ce84ad108148e67e1064789bee435edc5b60ad398714a3a9" [[package]] name = "itoa" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" [[package]] name = "js-sys" -version = "0.3.51" +version = "0.3.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83bdfbace3a0e81a4253f73b49e960b053e396a11012cbd49b9b74d6a2b67062" +checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84" dependencies = [ "wasm-bindgen", ] @@ -886,25 +900,25 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.101" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21" +checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6" [[package]] name = "libloading" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a" +checksum = "c0cf036d15402bea3c5d4de17b3fce76b3e4a56ebc1f577be0e7a72f7c607cf0" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "winapi", ] [[package]] name = "lock_api" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0382880606dff6d15c9476c416d18690b72742aa7b605bb6dd6ec9030fbf07eb" +checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109" dependencies = [ "scopeguard", ] @@ -915,7 +929,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -929,9 +943,9 @@ dependencies = [ [[package]] name = "matches" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" [[package]] name = "maybe-async" @@ -963,9 +977,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "memchr" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" [[package]] name = "memoffset" @@ -1015,9 +1029,9 @@ dependencies = [ [[package]] name = "native-tls" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8d96b2e1c8da3957d58100b09f102c6d9cfdfced01b7ec5a8974044bb09dbd4" +checksum = "48ba9f7719b5a0f42f338907614285fb5fd70e53858141f69898a1fb7203b24d" dependencies = [ "lazy_static", "libc", @@ -1033,14 +1047,15 @@ dependencies = [ [[package]] name = "nix" -version = "0.20.0" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa9b4819da1bc61c0ea48b63b7bc8604064dd43013e7cc325df098d49cd7c18a" +checksum = "f5e06129fb611568ef4e868c14b326274959aa70ff7776e9d55323531c374945" dependencies = [ "bitflags", "cc", - "cfg-if 1.0.0", + "cfg-if", "libc", + "memoffset", ] [[package]] @@ -1118,12 +1133,12 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] name = "openssl" -version = "0.10.35" +version = "0.10.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "549430950c79ae24e6d02e0b7404534ecf311d94cc9f861e9e4020187d13d885" +checksum = "8d9facdb76fec0b73c406f125d44d86fdad818d66fef0531eec9233ca425ff4a" dependencies = [ "bitflags", - "cfg-if 1.0.0", + "cfg-if", "foreign-types", "libc", "once_cell", @@ -1138,9 +1153,9 @@ checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a" [[package]] name = "openssl-sys" -version = "0.9.65" +version = "0.9.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a7907e3bfa08bb85105209cdfcb6c63d109f8f6c1ed6ca318fff5c1853fbc1d" +checksum = "69df2d8dfc6ce3aaf44b40dec6f487d5a886516cf6879c49e98e0710f310a058" dependencies = [ "autocfg", "cc", @@ -1193,6 +1208,7 @@ dependencies = [ "serde_json", "signal-hook", "tar", + "tempfile", "thiserror", "tokio", "toml", @@ -1204,9 +1220,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" dependencies = [ "instant", "lock_api", @@ -1215,11 +1231,11 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" +checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "instant", "libc", "redox_syscall", @@ -1282,9 +1298,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.19" +version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" +checksum = "7c9b1041b4387893b91ee6746cddfc28516aff326a3519fb2adf820932c5e6cb" [[package]] name = "postgres" @@ -1309,7 +1325,7 @@ dependencies = [ "byteorder", "bytes", "fallible-iterator", - "hmac", + "hmac 0.10.1", "lazy_static", "md-5", "memchr", @@ -1352,9 +1368,9 @@ dependencies = [ [[package]] name = "ppv-lite86" -version = "0.2.10" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" +checksum = "c3ca011bd0129ff4ae15cd04c4eef202cadf6c51c21e47aba319b4e0501db741" [[package]] name = "proc-macro-hack" @@ -1370,9 +1386,9 @@ checksum = "bc881b2c22681370c6a780e47af9840ef841837bc98118431d4e1868bd0c1086" [[package]] name = "proc-macro2" -version = "1.0.27" +version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038" +checksum = "edc3358ebc67bc8b7fa0c007f945b0b18226f78437d61bec735a9eb96b61ee70" dependencies = [ "unicode-xid", ] @@ -1383,7 +1399,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5986aa8d62380092d2f50f8b1cdba9cb9b6731ffd4b25b51fd126b6c3e05b99c" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "fnv", "lazy_static", "memchr", @@ -1421,9 +1437,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" dependencies = [ "proc-macro2", ] @@ -1476,9 +1492,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee" +checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" dependencies = [ "bitflags", ] @@ -1495,9 +1511,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.4.6" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a26af418b574bd56588335b3a3659a65725d4e636eb1016c2f9e3b38c7cc759" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" dependencies = [ "aho-corasick", "memchr", @@ -1530,9 +1546,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.4" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "246e9f61b9bb77df069a947682be06e31ac43ea37862e244a69f177694ea6d22" +checksum = "51c732d463dd300362ffb44b7b125f299c23d2990411a4253824630ebc7467fb" dependencies = [ "base64 0.13.0", "bytes", @@ -1593,30 +1609,30 @@ dependencies = [ [[package]] name = "rust-ini" -version = "0.16.1" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55b134767a87e0b086f73a4ce569ac9ce7d202f39c8eab6caa266e2617e73ac6" +checksum = "63471c4aa97a1cf8332a5f97709a79a4234698de6a1f5087faf66f2dae810e22" dependencies = [ - "cfg-if 0.1.10", + "cfg-if", "ordered-multimap", ] [[package]] name = "rust-s3" -version = "0.27.0-rc4" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c93272c1d654d492f8ab30b94cd43d98f2700b1db55b2576aff7712ce40e3ef" +checksum = "b2f26775d15f43dc848ef0ec65f83de8775549e486c7a3a576652049a7122d32" dependencies = [ "anyhow", "async-trait", "aws-creds", "aws-region", "base64 0.13.0", - "cfg-if 1.0.0", + "cfg-if", "chrono", "futures", "hex", - "hmac", + "hmac 0.11.0", "http", "log", "maybe-async", @@ -1663,12 +1679,11 @@ dependencies = [ [[package]] name = "rustls-split" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d63f11490b4d8d45a362e171d7fe4a9ef154770a339e696a05eb354bc36837" +checksum = "7fb079b52cfdb005752b7c3c646048e702003576a8321058e4c8b38227c11aa6" dependencies = [ "rustls", - "webpki", ] [[package]] @@ -1727,9 +1742,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.3.0" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e4effb91b4b8b6fb7732e670b6cee160278ff8e6bf485c7805d9e319d76e284" +checksum = "a9dd14d83160b528b7bfd66439110573efcfbe281b17fc2ca9f39f550d619c7e" dependencies = [ "core-foundation-sys", "libc", @@ -1752,18 +1767,18 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.126" +version = "1.0.130" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03" +checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" dependencies = [ "serde_derive", ] [[package]] name = "serde-xml-rs" -version = "0.4.1" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0bf1ba0696ccf0872866277143ff1fd14d22eec235d2b23702f95e6660f7dfa" +checksum = "65162e9059be2f6a3421ebbb4fef3e74b7d9e7c60c50a0e292c6239f19f1edfa" dependencies = [ "log", "serde", @@ -1773,9 +1788,9 @@ dependencies = [ [[package]] name = "serde_cbor" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" dependencies = [ "half", "serde", @@ -1783,9 +1798,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.126" +version = "1.0.130" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43" +checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" dependencies = [ "proc-macro2", "quote", @@ -1794,9 +1809,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.64" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" +checksum = "0f690853975602e1bfe1ccbf50504d67174e3bcf340f23b5ea9992e0587a52d8" dependencies = [ "itoa", "ryu", @@ -1817,12 +1832,12 @@ dependencies = [ [[package]] name = "sha2" -version = "0.9.5" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b362ae5752fd2137731f9fa25fd4d9058af34666ca1966fb969119cc35719f12" +checksum = "b69f9a4c9740d74c5baa3fd2e547f9525fa8088a8a958e0ca2409a514e33f5fa" dependencies = [ "block-buffer", - "cfg-if 1.0.0", + "cfg-if", "cpufeatures", "digest", "opaque-debug", @@ -1830,18 +1845,18 @@ dependencies = [ [[package]] name = "sharded-slab" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "740223c51853f3145fe7c90360d2d4232f2b62e3449489c207eccde818979982" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" dependencies = [ "lazy_static", ] [[package]] name = "shlex" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a568c8f2cd051a4d283bd6eb0343ac214c1b0f1ac19f93e1175b2dee38c73d" +checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] name = "signal-hook" @@ -1876,27 +1891,27 @@ dependencies = [ [[package]] name = "siphasher" -version = "0.3.5" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbce6d4507c7e4a3962091436e56e95290cb71fa302d0d270e32130b75fbff27" +checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b" [[package]] name = "slab" -version = "0.4.3" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f173ac3d1a7e3b28003f40de0b5ce7fe2710f9b9dc3fc38664cebee46b3b6527" +checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" [[package]] name = "smallvec" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" +checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" [[package]] name = "socket2" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e3dfc207c526015c632472a77be09cf1b6e46866581aecae5cc38fb4235dea2" +checksum = "5dc90fe6c7be1a323296982db1836d1ea9e47b6839496dde9a541bc496df3516" dependencies = [ "libc", "winapi", @@ -1932,9 +1947,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" -version = "1.0.73" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7" +checksum = "d010a1623fbd906d51d650a9916aaefc05ffa0e4053ff7fe601167f3e715d194" dependencies = [ "proc-macro2", "quote", @@ -1949,9 +1964,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tar" -version = "0.4.35" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d779dc6aeff029314570f666ec83f19df7280bb36ef338442cfa8c604021b80" +checksum = "d6f5515d3add52e0bbdcad7b83c388bb36ba7b754dda3b5f5bc2d38640cdba5c" dependencies = [ "filetime", "libc", @@ -1964,7 +1979,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "rand", "redox_syscall", @@ -1992,18 +2007,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.26" +version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2" +checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.26" +version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745" +checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" dependencies = [ "proc-macro2", "quote", @@ -2032,9 +2047,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.2.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b5220f05bb7de7f3f53c7c065e1199b3172696fe2db9f9c4d8ad9b4ee74c342" +checksum = "f83b2a3d4d9091d0abd7eba4dc2710b1718583bd4d8992e2190720ea38f391f7" dependencies = [ "tinyvec_macros", ] @@ -2047,9 +2062,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4efe6fc2395938c8155973d7be49fe8d03a843726e285e100a8a383cc0154ce" +checksum = "c2c2416fdedca8443ae44b4527de1ea633af61d8f7169ffa6e72c5b53d24efcc" dependencies = [ "autocfg", "bytes", @@ -2066,9 +2081,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "1.3.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54473be61f4ebe4efd09cec9bd5d16fa51d70ea0192213d754d2d500457db110" +checksum = "b2dd85aeaba7b68df939bd357c6afb36c87951be9e80bf9c859f2fc3e9fca0fd" dependencies = [ "proc-macro2", "quote", @@ -2120,9 +2135,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.6.7" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1caa0b0c8d94a049db56b5acf8cba99dc0623aab1b26d5b5f5e2d945846b3592" +checksum = "08d3725d3efa29485e87311c5b699de63cde14b00ed4d256b8318aa30ca452cd" dependencies = [ "bytes", "futures-core", @@ -2153,7 +2168,7 @@ version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -2230,18 +2245,15 @@ checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" [[package]] name = "typenum" -version = "1.13.0" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06" +checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" [[package]] name = "unicode-bidi" -version = "0.3.5" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eeb8be209bb1c96b7c177c7420d26e04eccacb0eeae6b980e35fcb74678107e0" -dependencies = [ - "matches", -] +checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f" [[package]] name = "unicode-normalization" @@ -2254,9 +2266,9 @@ dependencies = [ [[package]] name = "unicode-width" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" [[package]] name = "unicode-xid" @@ -2364,21 +2376,19 @@ checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" [[package]] name = "wasm-bindgen" -version = "0.2.74" +version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54ee1d4ed486f78874278e63e4069fc1ab9f6a18ca492076ffb90c5eb2997fd" +checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" dependencies = [ - "cfg-if 1.0.0", - "serde", - "serde_json", + "cfg-if", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.74" +version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b33f6a0694ccfea53d94db8b2ed1c3a8a4c86dd936b13b9f0a15ec4a451b900" +checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" dependencies = [ "bumpalo", "lazy_static", @@ -2391,11 +2401,11 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.24" +version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fba7978c679d53ce2d0ac80c8c175840feb849a161664365d1287b41f2e67f1" +checksum = "8e8d7523cb1f2a4c96c1317ca690031b714a51cc14e05f712446691f413f5d39" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "js-sys", "wasm-bindgen", "web-sys", @@ -2403,9 +2413,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.74" +version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "088169ca61430fe1e58b8096c24975251700e7b1f6fd91cc9d59b04fb9b18bd4" +checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2413,9 +2423,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.74" +version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be2241542ff3d9f241f5e2cb6dd09b37efe786df8851c54957683a49f0987a97" +checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" dependencies = [ "proc-macro2", "quote", @@ -2426,15 +2436,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.74" +version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7cff876b8f18eed75a66cf49b65e7f967cb354a7aa16003fb55dbfd25b44b4f" +checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" [[package]] name = "web-sys" -version = "0.3.51" +version = "0.3.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e828417b379f3df7111d3a2a9e5753706cae29c41f7c4029ee9fd77f3e09e582" +checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb" dependencies = [ "js-sys", "wasm-bindgen", @@ -2538,9 +2548,9 @@ dependencies = [ [[package]] name = "xml-rs" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a" +checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" [[package]] name = "zenith" diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 33c911c840..d696f72285 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -17,7 +17,7 @@ lazy_static = "1.4.0" log = "0.4.14" clap = "2.33.0" daemonize = "0.4.1" -tokio = { version = "1.11", features = ["process", "macros", "fs", "rt"] } +tokio = { version = "1.11", features = ["process", "macros", "fs", "rt", "io-util"] } postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" } postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" } postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" } @@ -45,3 +45,4 @@ workspace_hack = { path = "../workspace_hack" } [dev-dependencies] hex-literal = "0.3" +tempfile = "3.2" diff --git a/pageserver/README b/pageserver/README index 0f858bb4ed..76ace04362 100644 --- a/pageserver/README +++ b/pageserver/README @@ -41,7 +41,7 @@ Legend: +--+ .... -. . Component that we will need, but doesn't exist at the moment. A TODO. +. . Component at its early development phase. .... ---> Data flow @@ -116,13 +116,49 @@ Remove old on-disk layer files that are no longer needed according to the PITR retention policy -TODO: Backup service --------------------- +### Backup service -The backup service is responsible for periodically pushing the chunks to S3. +The backup service, responsible for storing pageserver recovery data externally. -TODO: How/when do restore from S3? Whenever we get a GetPage@LSN request for -a chunk we don't currently have? Or when an external Control Plane tells us? +Currently, pageserver stores its files in a filesystem directory it's pointed to. +That working directory could be rather ephemeral for such cases as "a pageserver pod running in k8s with no persistent volumes attached". +Therefore, the server interacts with external, more reliable storage to back up and restore its state. + +The code for storage support is extensible and can support arbitrary ones as long as they implement a certain Rust trait. +There are the following implementations present: +* local filesystem — to use in tests mainly +* AWS S3 - to use in production + +Implementation details are covered in the [storage readme](./src/relish_storage/README.md) and corresponding Rust file docs. + +The backup service is disabled by default and can be enabled to interact with a single remote storage. + +CLI examples: +* Local FS: `${PAGESERVER_BIN} --relish-storage-local-path="/some/local/path/"` +* AWS S3 : `${PAGESERVER_BIN} --relish-storage-s3-bucket="some-sample-bucket" --relish-storage-region="eu-north-1" --relish-storage-access-key="SOMEKEYAAAAASADSAH*#" --relish-storage-secret-access-key="SOMEsEcReTsd292v"` + +For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS. +For local S3 installations, refer to the their documentation for name format and credentials. + +Similar to other pageserver settings, toml config file can be used to configure either of the storages as backup backup targets. +Required sections are: + +```toml +[relish_storage] +local_path = '/Users/someonetoignore/Downloads/tmp_dir/' +``` + +or + +```toml +[relish_storage] +bucket_name = 'some-sample-bucket' +bucket_region = 'eu-north-1' +access_key_id = 'SOMEKEYAAAAASADSAH*#' +secret_access_key = 'SOMEsEcReTsd292v' +``` + +Also, `AWS_SECRET_ACCESS_KEY` and `AWS_ACCESS_KEY_ID` variables can be used to specify the credentials instead of any of the ways above. TODO: Sharding -------------------- diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index 4e47009326..6f201d83a7 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -113,6 +113,9 @@ lazy_static! { /// The name of the metadata file pageserver creates per timeline. pub const METADATA_FILE_NAME: &str = "metadata"; +/// Parts of the `.zenith/tenants//timelines/` directory prefix. +pub const TENANTS_SEGMENT_NAME: &str = "tenants"; +pub const TIMELINES_SEGMENT_NAME: &str = "timelines"; /// /// Repository consists of multiple timelines. Keep them in a hash table. @@ -266,7 +269,7 @@ impl LayeredRepository { let mut timeline = LayeredTimeline::new( self.conf, - metadata, + metadata.clone(), ancestor, timelineid, self.tenantid, @@ -276,15 +279,9 @@ impl LayeredRepository { )?; // List the layers on disk, and load them into the layer map - let _loaded_layers = timeline.load_layer_map(disk_consistent_lsn)?; + let loaded_layers = timeline.load_layer_map(disk_consistent_lsn)?; if self.upload_relishes { - schedule_timeline_upload(()); - // schedule_timeline_upload( - // self.tenantid, - // timelineid, - // loaded_layers, - // disk_consistent_lsn, - // ); + schedule_timeline_upload(self.tenantid, timelineid, loaded_layers, metadata); } // needs to be after load_layer_map @@ -412,13 +409,7 @@ impl LayeredRepository { .create_new(first_save) .open(&path)?; - let mut metadata_bytes = TimelineMetadata::ser(data)?; - - assert!(metadata_bytes.len() <= METADATA_MAX_DATA_SIZE); - metadata_bytes.resize(METADATA_MAX_SAFE_SIZE, 0u8); - - let checksum = crc32c::crc32c(&metadata_bytes[..METADATA_MAX_DATA_SIZE]); - metadata_bytes[METADATA_MAX_DATA_SIZE..].copy_from_slice(&u32::to_le_bytes(checksum)); + let metadata_bytes = data.to_bytes().context("Failed to get metadata bytes")?; if file.write(&metadata_bytes)? != metadata_bytes.len() { bail!("Could not write all the metadata bytes in a single call"); @@ -445,20 +436,7 @@ impl LayeredRepository { ) -> Result { let path = metadata_path(conf, timelineid, tenantid); let metadata_bytes = std::fs::read(&path)?; - ensure!(metadata_bytes.len() == METADATA_MAX_SAFE_SIZE); - - let data = &metadata_bytes[..METADATA_MAX_DATA_SIZE]; - let calculated_checksum = crc32c::crc32c(data); - - let checksum_bytes: &[u8; METADATA_CHECKSUM_SIZE] = - metadata_bytes[METADATA_MAX_DATA_SIZE..].try_into()?; - let expected_checksum = u32::from_le_bytes(*checksum_bytes); - ensure!(calculated_checksum == expected_checksum); - - let data = TimelineMetadata::des_prefix(data)?; - assert!(data.disk_consistent_lsn.is_aligned()); - - Ok(data) + TimelineMetadata::from_bytes(&metadata_bytes) } // @@ -586,9 +564,11 @@ impl LayeredRepository { /// Metadata stored on disk for each timeline /// /// The fields correspond to the values we hold in memory, in LayeredTimeline. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] pub struct TimelineMetadata { - disk_consistent_lsn: Lsn, + /// [`Lsn`] that corresponds to the corresponding timeline directory + /// contents, stored locally in the pageserver workdir. + pub disk_consistent_lsn: Lsn, // This is only set if we know it. We track it in memory when the page // server is running, but we only track the value corresponding to @@ -600,10 +580,45 @@ pub struct TimelineMetadata { // 'prev_record_lsn' value in memory again. This is only really needed when // doing a clean shutdown, so that there is no more WAL beyond // 'disk_consistent_lsn' - prev_record_lsn: Option, + pub prev_record_lsn: Option, - ancestor_timeline: Option, - ancestor_lsn: Lsn, + pub ancestor_timeline: Option, + pub ancestor_lsn: Lsn, +} + +impl TimelineMetadata { + pub fn from_bytes(metadata_bytes: &[u8]) -> anyhow::Result { + ensure!( + metadata_bytes.len() == METADATA_MAX_SAFE_SIZE, + "metadata bytes size is wrong" + ); + + let data = &metadata_bytes[..METADATA_MAX_DATA_SIZE]; + let calculated_checksum = crc32c::crc32c(data); + + let checksum_bytes: &[u8; METADATA_CHECKSUM_SIZE] = + metadata_bytes[METADATA_MAX_DATA_SIZE..].try_into()?; + let expected_checksum = u32::from_le_bytes(*checksum_bytes); + ensure!( + calculated_checksum == expected_checksum, + "metadata checksum mismatch" + ); + + let data = TimelineMetadata::des_prefix(data)?; + assert!(data.disk_consistent_lsn.is_aligned()); + + Ok(data) + } + + pub fn to_bytes(&self) -> anyhow::Result> { + let mut metadata_bytes = TimelineMetadata::ser(self)?; + assert!(metadata_bytes.len() <= METADATA_MAX_DATA_SIZE); + metadata_bytes.resize(METADATA_MAX_SAFE_SIZE, 0u8); + + let checksum = crc32c::crc32c(&metadata_bytes[..METADATA_MAX_DATA_SIZE]); + metadata_bytes[METADATA_MAX_DATA_SIZE..].copy_from_slice(&u32::to_le_bytes(checksum)); + Ok(metadata_bytes) + } } pub struct LayeredTimeline { @@ -1374,6 +1389,7 @@ impl LayeredTimeline { ancestor_timeline: ancestor_timelineid, ancestor_lsn: self.ancestor_lsn, }; + LayeredRepository::save_metadata( self.conf, self.timelineid, @@ -1381,19 +1397,12 @@ impl LayeredTimeline { &metadata, false, )?; + if self.upload_relishes { + schedule_timeline_upload(self.tenantid, self.timelineid, layer_uploads, metadata); + } // Also update the in-memory copy self.disk_consistent_lsn.store(disk_consistent_lsn); - - if self.upload_relishes { - schedule_timeline_upload(()) - // schedule_timeline_upload( - // self.tenantid, - // self.timelineid, - // layer_uploads, - // disk_consistent_lsn, - // }); - } } Ok(()) @@ -1947,7 +1956,7 @@ pub fn dump_layerfile_from_path(path: &Path) -> Result<()> { Ok(()) } -fn metadata_path( +pub fn metadata_path( conf: &'static PageServerConf, timelineid: ZTimelineId, tenantid: ZTenantId, diff --git a/pageserver/src/layered_repository/delta_layer.rs b/pageserver/src/layered_repository/delta_layer.rs index 24ed9d6e69..736a2694bf 100644 --- a/pageserver/src/layered_repository/delta_layer.rs +++ b/pageserver/src/layered_repository/delta_layer.rs @@ -442,12 +442,7 @@ impl DeltaLayer { } fn open_book(&self) -> Result<(PathBuf, Book)> { - let path = Self::path_for( - &self.path_or_conf, - self.timelineid, - self.tenantid, - &self.layer_name(), - ); + let path = self.path(); let file = File::open(&path)?; let book = Book::new(file)?; diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index 9db7ceb1b4..6bb5d5f7f6 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -1,3 +1,4 @@ +use layered_repository::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}; use zenith_utils::postgres_backend::AuthType; use zenith_utils::zid::{ZTenantId, ZTimelineId}; @@ -91,7 +92,7 @@ impl PageServerConf { // fn tenants_path(&self) -> PathBuf { - self.workdir.join("tenants") + self.workdir.join(TENANTS_SEGMENT_NAME) } fn tenant_path(&self, tenantid: &ZTenantId) -> PathBuf { @@ -115,7 +116,7 @@ impl PageServerConf { } fn timelines_path(&self, tenantid: &ZTenantId) -> PathBuf { - self.tenant_path(tenantid).join("timelines") + self.tenant_path(tenantid).join(TIMELINES_SEGMENT_NAME) } fn timeline_path(&self, timelineid: &ZTimelineId, tenantid: &ZTenantId) -> PathBuf { diff --git a/pageserver/src/relish_storage.rs b/pageserver/src/relish_storage.rs index 885ca9581f..185b666611 100644 --- a/pageserver/src/relish_storage.rs +++ b/pageserver/src/relish_storage.rs @@ -1,60 +1,138 @@ -//! Abstractions for the page server to store its relish layer data in the external storage. +//! A set of generic storage abstractions for the page server to use when backing up and restoring its state from the external storage. +//! This particular module serves as a public API border between pageserver and the internal storage machinery. +//! No other modules from this tree are supposed to be used directly by the external code. //! -//! Main purpose of this module subtree is to provide a set of abstractions to manage the storage state -//! in a way, optimal for page server. +//! There are a few components the storage machinery consists of: +//! * [`RelishStorage`] trait a CRUD-like generic abstraction to use for adapting external storages with a few implementations: +//! * [`local_fs`] allows to use local file system as an external storage +//! * [`rust_s3`] uses AWS S3 bucket entirely as an external storage //! -//! The abstractions hide multiple custom external storage API implementations, -//! such as AWS S3, local filesystem, etc., located in the submodules. +//! * synchronization logic at [`storage_sync`] module that keeps pageserver state (both runtime one and the workdir files) and storage state in sync. +//! +//! * public API via to interact with the external world: [`run_storage_sync_thread`] and [`schedule_timeline_upload`] +//! +//! Here's a schematic overview of all interactions relish storage and the rest of the pageserver perform: +//! +//! +------------------------+ +--------->-------+ +//! | | - - - (init async loop) - - - -> | | +//! | | | | +//! | | -------------------------------> | async | +//! | pageserver | (schedule frozen layer upload) | upload/download | +//! | | | loop | +//! | | <------------------------------- | | +//! | | (register downloaded layers) | | +//! +------------------------+ +---------<-------+ +//! | +//! | +//! CRUD layer file operations | +//! (upload/download/delete/list, etc.) | +//! V +//! +------------------------+ +//! | | +//! | [`RelishStorage`] impl | +//! | | +//! | pageserver assumes it | +//! | owns exclusive write | +//! | access to this storage | +//! +------------------------+ +//! +//! First, during startup, the pageserver inits the storage sync thread with the async loop, or leaves the loop unitialised, if configured so. +//! Some time later, during pageserver checkpoints, in-memory data is flushed onto disk along with its metadata. +//! If the storage sync loop was successfully started before, pageserver schedules the new image uploads after every checkpoint. +//! See [`crate::layered_repository`] for the upload calls and the adjacent logic. +//! +//! The storage logic considers `image` as a set of local files, fully representing a certain timeline at given moment (identified with `disk_consistent_lsn`). +//! Timeline can change its state, by adding more files on disk and advancing its `disk_consistent_lsn`: this happens after pageserver checkpointing and is followed +//! by the storage upload, if enabled. +//! When a certain image gets uploaded, the sync loop remembers the fact, preventing further reuploads of the same image state. +//! No files are deleted from either local or remote storage, only the missing ones locally/remotely get downloaded/uploaded, local metadata file will be overwritten +//! when the newer timeline is downloaded. +//! +//! Meanwhile, the loop inits the storage connection and checks the remote files stored. +//! This is done once at startup only, relying on the fact that pageserver uses the storage alone (ergo, nobody else uploads the files to the storage but this server). +//! Based on the remote image data, the storage sync logic queues image downloads, while accepting any potential upload tasks from pageserver and managing the tasks by their priority. +//! On the image download, a [`crate::tenant_mgr::register_relish_download`] function is called to register the new image in pageserver, initializing all related threads and internal state. +//! +//! When the pageserver terminates, the upload loop finishes a current image sync task (if any) and exits. +//! +//! NOTES: +//! * pageserver assumes it has exclusive write access to the relish storage. If supported, the way multiple pageservers can be separated in the same storage +//! (i.e. using different directories in the local filesystem external storage), but totally up to the storage implementation and not covered with the trait API. +//! +//! * the uploads do not happen right after pageserver startup, they are registered when +//! 1. pageserver does the checkpoint, which happens further in the future after the server start +//! 2. pageserver loads the timeline from disk for the first time +//! +//! * the uploads do not happen right after the upload registration: the sync loop might be occupied with other tasks, or tasks with bigger priority could be waiting already +//! +//! * all synchronization tasks (including the public API to register uploads and downloads and the sync queue management) happens on an image scale: a big set of relish files, +//! enough to represent (and recover, if needed) a certain timeline state. On the contrary, all internal storage CRUD calls are made per reilsh file from those images. +//! This way, the synchronization is able to download the image partially, if some state was synced before, but exposes correctly synced images only. mod local_fs; mod rust_s3; -/// A queue-based storage with the background machinery behind it to synchronize -/// local page server layer files with external storage. -mod synced_storage; +mod storage_sync; -use std::{path::Path, thread}; +use std::{ + path::{Path, PathBuf}, + thread, +}; -use anyhow::Context; +use anyhow::{anyhow, ensure, Context}; +use zenith_utils::zid::{ZTenantId, ZTimelineId}; -pub use self::synced_storage::schedule_timeline_upload; -use self::{local_fs::LocalFs, rust_s3::RustS3}; -use crate::{PageServerConf, RelishStorageKind}; +pub use self::storage_sync::schedule_timeline_upload; +use self::{local_fs::LocalFs, rust_s3::S3}; +use crate::{ + layered_repository::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}, + PageServerConf, RelishStorageKind, +}; +/// Based on the config, initiates the remote storage connection and starts a separate thread +/// that ensures that pageserver and the remote storage are in sync with each other. +/// If no external configuraion connection given, no thread or storage initialization is done. pub fn run_storage_sync_thread( config: &'static PageServerConf, ) -> anyhow::Result>>> { match &config.relish_storage_config { Some(relish_storage_config) => { let max_concurrent_sync = relish_storage_config.max_concurrent_sync; - match &relish_storage_config.storage { - RelishStorageKind::LocalFs(root) => synced_storage::run_storage_sync_thread( + let handle = match &relish_storage_config.storage { + RelishStorageKind::LocalFs(root) => storage_sync::spawn_storage_sync_thread( config, - LocalFs::new(root.clone())?, + LocalFs::new(root.clone(), &config.workdir)?, max_concurrent_sync, ), - RelishStorageKind::AwsS3(s3_config) => synced_storage::run_storage_sync_thread( + RelishStorageKind::AwsS3(s3_config) => storage_sync::spawn_storage_sync_thread( config, - RustS3::new(s3_config)?, + S3::new(s3_config, &config.workdir)?, max_concurrent_sync, ), - } + }; + handle.map(Some) } None => Ok(None), } } /// Storage (potentially remote) API to manage its state. +/// This storage tries to be unaware of any layered repository context, +/// providing basic CRUD operations with storage files. #[async_trait::async_trait] -pub trait RelishStorage: Send + Sync { +trait RelishStorage: Send + Sync { + /// A way to uniquely reference relish in the remote storage. type RelishStoragePath; - fn derive_destination( - page_server_workdir: &Path, - relish_local_path: &Path, - ) -> anyhow::Result; + /// Attempts to derive the storage path out of the local path, if the latter is correct. + fn storage_path(&self, local_path: &Path) -> anyhow::Result; + /// Gets the layered storage information about the given entry. + fn info(&self, storage_path: &Self::RelishStoragePath) -> anyhow::Result; + + /// Lists all items the storage has right now. async fn list_relishes(&self) -> anyhow::Result>; + /// Streams the remote storage entry contents into the buffered writer given, returns the filled writer. async fn download_relish( &self, from: &Self::RelishStoragePath, @@ -65,6 +143,7 @@ pub trait RelishStorage: Send + Sync { async fn delete_relish(&self, path: &Self::RelishStoragePath) -> anyhow::Result<()>; + /// Streams the local file contents into remote into the remote storage entry. async fn upload_relish( &self, from: &mut tokio::io::BufReader, @@ -72,16 +151,173 @@ pub trait RelishStorage: Send + Sync { ) -> anyhow::Result<()>; } -fn strip_workspace_prefix<'a>( - page_server_workdir: &'a Path, - relish_local_path: &'a Path, -) -> anyhow::Result<&'a Path> { - relish_local_path - .strip_prefix(page_server_workdir) - .with_context(|| { +/// Information about a certain remote storage entry. +#[derive(Debug, PartialEq, Eq)] +struct RemoteRelishInfo { + tenant_id: ZTenantId, + timeline_id: ZTimelineId, + /// Path in the pageserver workdir where the file should go to. + download_destination: PathBuf, + is_metadata: bool, +} + +fn strip_path_prefix<'a>(prefix: &'a Path, path: &'a Path) -> anyhow::Result<&'a Path> { + if prefix == path { + anyhow::bail!( + "Prefix and the path are equal, cannot strip: '{}'", + prefix.display() + ) + } else { + path.strip_prefix(prefix).with_context(|| { format!( - "Unexpected: relish local path '{}' is not relevant to server workdir", - relish_local_path.display(), + "Path '{}' is not prefixed with '{}'", + path.display(), + prefix.display(), ) }) + } +} + +fn parse_ids_from_path<'a, R: std::fmt::Display>( + path_segments: impl Iterator, + path_log_representation: &R, +) -> anyhow::Result<(ZTenantId, ZTimelineId)> { + let mut segments = path_segments.skip_while(|&segment| segment != TENANTS_SEGMENT_NAME); + let tenants_segment = segments.next().ok_or_else(|| { + anyhow!( + "Found no '{}' segment in the storage path '{}'", + TENANTS_SEGMENT_NAME, + path_log_representation + ) + })?; + ensure!( + tenants_segment == TENANTS_SEGMENT_NAME, + "Failed to extract '{}' segment from storage path '{}'", + TENANTS_SEGMENT_NAME, + path_log_representation + ); + let tenant_id = segments + .next() + .ok_or_else(|| { + anyhow!( + "Found no tenant id in the storage path '{}'", + path_log_representation + ) + })? + .parse::() + .with_context(|| { + format!( + "Failed to parse tenant id from storage path '{}'", + path_log_representation + ) + })?; + + let timelines_segment = segments.next().ok_or_else(|| { + anyhow!( + "Found no '{}' segment in the storage path '{}'", + TIMELINES_SEGMENT_NAME, + path_log_representation + ) + })?; + ensure!( + timelines_segment == TIMELINES_SEGMENT_NAME, + "Failed to extract '{}' segment from storage path '{}'", + TIMELINES_SEGMENT_NAME, + path_log_representation + ); + let timeline_id = segments + .next() + .ok_or_else(|| { + anyhow!( + "Found no timeline id in the storage path '{}'", + path_log_representation + ) + })? + .parse::() + .with_context(|| { + format!( + "Failed to parse timeline id from storage path '{}'", + path_log_representation + ) + })?; + + Ok((tenant_id, timeline_id)) +} + +/// A set of common test utils to share in unit tests inside the module tree. +#[cfg(test)] +mod test_utils { + use std::path::{Path, PathBuf}; + + use anyhow::ensure; + + use crate::{ + layered_repository::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}, + repository::repo_harness::{RepoHarness, TIMELINE_ID}, + }; + + /// Gives a timeline path with pageserver workdir stripped off. + pub fn relative_timeline_path(harness: &RepoHarness) -> anyhow::Result { + let timeline_path = harness.timeline_path(&TIMELINE_ID); + Ok(timeline_path + .strip_prefix(&harness.conf.workdir)? + .to_path_buf()) + } + + /// Creates a path with custom tenant id in one of its segments. + /// Useful for emulating paths with wrong ids. + pub fn custom_tenant_id_path( + path_with_tenant_id: &Path, + new_tenant_id: &str, + ) -> anyhow::Result { + let mut new_path = PathBuf::new(); + let mut is_tenant_id = false; + let mut tenant_id_replaced = false; + for segment in path_with_tenant_id { + match segment.to_str() { + Some(TENANTS_SEGMENT_NAME) => is_tenant_id = true, + Some(_tenant_id_str) if is_tenant_id => { + is_tenant_id = false; + new_path.push(new_tenant_id); + tenant_id_replaced = true; + continue; + } + _ => {} + } + new_path.push(segment) + } + + ensure!(tenant_id_replaced, "Found no tenant id segment to replace"); + Ok(new_path) + } + + /// Creates a path with custom timeline id in one of its segments. + /// Useful for emulating paths with wrong ids. + pub fn custom_timeline_id_path( + path_with_timeline_id: &Path, + new_timeline_id: &str, + ) -> anyhow::Result { + let mut new_path = PathBuf::new(); + let mut is_timeline_id = false; + let mut timeline_id_replaced = false; + for segment in path_with_timeline_id { + match segment.to_str() { + Some(TIMELINES_SEGMENT_NAME) => is_timeline_id = true, + Some(_timeline_id_str) if is_timeline_id => { + is_timeline_id = false; + new_path.push(new_timeline_id); + timeline_id_replaced = true; + continue; + } + _ => {} + } + new_path.push(segment) + } + + ensure!( + timeline_id_replaced, + "Found no timeline id segment to replace" + ); + Ok(new_path) + } } diff --git a/pageserver/src/relish_storage/README.md b/pageserver/src/relish_storage/README.md new file mode 100644 index 0000000000..6da00fcd68 --- /dev/null +++ b/pageserver/src/relish_storage/README.md @@ -0,0 +1,82 @@ +# Non-implementation details + +This document describes the current state of the backup system in pageserver, existing limitations and concerns, why some things are done the way they are the future development plans. +Detailed description on how the synchronization works and how it fits into the rest of the pageserver can be found in the [storage module](./../relish_storage.rs) and its submodules. +Ideally, this document should disappear after current implementation concerns are mitigated, with the remaining useful knowledge bits moved into rustdocs. + +## Approach + +Backup functionality is a new component, appeared way after the core DB functionality was implemented. +Pageserver layer functionality is also quite volatile at the moment, there's a risk its local file management changes over time. + +To avoid adding more chaos into that, backup functionality is currently designed as a relatively standalone component, with the majority of its logic placed in a standalone async loop. +This way, the backups are managed in background, not affecting directly other pageserver parts: this way the backup and restoration process may lag behind, but eventually keep up with the reality. To track that, a set of prometheus metrics is exposed from pageserver. + +## What's done + +Current implementation +* provides remote storage wrappers for AWS S3 and local FS +* uploads layers, frozen by pageserver checkpoint thread +* downloads and registers layers, found on the remote storage, but missing locally + +No good optimisations or performance testing is done, the feature is disabled by default and gets polished over time. +It's planned to deal with all questions that are currently on and prepare the feature to be enabled by default in cloud environments. + +### Peculiarities + +As mentioned, the backup component is rather new and under development currently, so not all things are done properly from the start. +Here's the list of known compromises with comments: + +* Remote storage model is the same as the `tenants/` directory contents of the pageserver's local workdir storage. +This is relatively simple to implement, but may be costly to use in AWS S3: an initial data image contains ~782 relish file and a metadata file, ~31 MB combined. +AWS charges per API call and for traffic either, layers are expected to be updated frequently, so this model most probably is ineffective. +Additionally, pageservers might need to migrate images between tenants, which does not improve the situation. + +Storage sync API operates images when backing up or restoring a backup, so we're fluent to repack the layer contents the way we want to, which most probably will be done later. + +* no proper file comparison + +Currently, every layer contains `Lsn` in their name, to map the data it holds against a certain DB state. +Then the images with same ids and different `Lsn`'s are compared, files are considered equal if their local file paths are equal (for remote files, "local file path" is their download destination). +No file contents assertion is done currently, but should be. +AWS S3 returns file checksums during the `list` operation, so that can be used to ensure the backup consistency, but that needs further research and, since current pageserver impl also needs to deal with layer file checksums. + +For now, due to this, we consider local workdir files as source of truth, not removing them ever and adjusting remote files instead, if image files mismatch. + +* no proper retry management + +Now, the storage sync attempts to redo the upload/download operation for the image files that failed. +No proper task eviction or backpressure is implemented currently: the tasks will stay in the queue forever, reattempting the downloads. + +This will be fixed when more details on the file consistency model will be agreed on. + +* sad rust-s3 api + +rust-s3 is not very pleasant to use: +1. it returns `anyhow::Result` and it's hard to distinguish "missing file" cases from "no connection" one, for instance +2. at least one function it its API that we need (`get_object_stream`) has `async` keyword and blocks (!), see details [here](https://github.com/zenithdb/zenith/pull/752#discussion_r728373091) +3. it's a prerelease library with unclear maintenance status +4. noisy on debug level + +But it's already used in the project, so for now it's reused to avoid bloating the dependency tree. +Based on previous evaluation, even `rusoto-s3` could be a better choice over this library, but needs further benchmarking. + + +* gc and branches are ignored + +So far, we don't consider non-main images and don't adjust the remote storage based on GC thread loop results. +Only checkpointer loop affects the remote storage. + +* more layers should be downloaded on demand + +Since we download and load remote layers into pageserver, there's a possibility a need for those layers' ancestors arise. +Most probably, every downloaded image's ancestor is not present in locally too, but currently there's no logic for downloading such ancestors and their metadata, +so the pageserver is unable to respond property on requests to such ancestors. + +To implement the downloading, more `tenant_mgr` refactoring is needed to properly handle web requests for layers and handle the state changes. +[Here](https://github.com/zenithdb/zenith/pull/689#issuecomment-931216193) are the details about initial state management updates needed. + +* no IT tests + +Automated S3 testing is lacking currently, due to no convenient way to enable backups during the tests. +After it's fixed, benchmark runs should also be carried out to find bottlenecks. diff --git a/pageserver/src/relish_storage/local_fs.rs b/pageserver/src/relish_storage/local_fs.rs index 49d656d5a6..1fe02b0f5f 100644 --- a/pageserver/src/relish_storage/local_fs.rs +++ b/pageserver/src/relish_storage/local_fs.rs @@ -1,13 +1,11 @@ //! Local filesystem relish storage. +//! Multiple pageservers can use the same "storage" of this kind by using different storage roots. //! -//! Page server already stores layer data on the server, when freezing it. -//! This storage serves a way to -//! -//! * test things locally simply -//! * allow to compabre both binary sets -//! * help validating the relish storage API +//! This storage used in pageserver tests, but can also be used in cases when a certain persistent +//! volume is mounted to the local FS. use std::{ + ffi::OsStr, future::Future, io::Write, path::{Path, PathBuf}, @@ -16,25 +14,31 @@ use std::{ use anyhow::{bail, Context}; use tokio::{fs, io}; +use tracing::*; -use super::{strip_workspace_prefix, RelishStorage}; +use super::{parse_ids_from_path, strip_path_prefix, RelishStorage, RemoteRelishInfo}; +use crate::layered_repository::METADATA_FILE_NAME; pub struct LocalFs { + pageserver_workdir: &'static Path, root: PathBuf, } impl LocalFs { - /// Atetmpts to create local FS relish storage, also creates the directory provided, if not exists. - pub fn new(root: PathBuf) -> anyhow::Result { + /// Attempts to create local FS relish storage, along with the storage root directory. + pub fn new(root: PathBuf, pageserver_workdir: &'static Path) -> anyhow::Result { if !root.exists() { std::fs::create_dir_all(&root).with_context(|| { format!( - "Failed to create all directories in the given root path {}", + "Failed to create all directories in the given root path '{}'", root.display(), ) })?; } - Ok(Self { root }) + Ok(Self { + pageserver_workdir, + root, + }) } fn resolve_in_storage(&self, path: &Path) -> anyhow::Result { @@ -55,11 +59,29 @@ impl LocalFs { impl RelishStorage for LocalFs { type RelishStoragePath = PathBuf; - fn derive_destination( - page_server_workdir: &Path, - relish_local_path: &Path, - ) -> anyhow::Result { - Ok(strip_workspace_prefix(page_server_workdir, relish_local_path)?.to_path_buf()) + fn storage_path(&self, local_path: &Path) -> anyhow::Result { + Ok(self.root.join( + strip_path_prefix(self.pageserver_workdir, local_path) + .context("local path does not belong to this storage")?, + )) + } + + fn info(&self, storage_path: &Self::RelishStoragePath) -> anyhow::Result { + let is_metadata = + storage_path.file_name().and_then(OsStr::to_str) == Some(METADATA_FILE_NAME); + let relative_path = strip_path_prefix(&self.root, storage_path) + .context("local path does not belong to this storage")?; + let download_destination = self.pageserver_workdir.join(relative_path); + let (tenant_id, timeline_id) = parse_ids_from_path( + relative_path.iter().filter_map(|segment| segment.to_str()), + &relative_path.display(), + )?; + Ok(RemoteRelishInfo { + tenant_id, + timeline_id, + download_destination, + is_metadata, + }) } async fn list_relishes(&self) -> anyhow::Result> { @@ -72,6 +94,7 @@ impl RelishStorage for LocalFs { mut to: std::io::BufWriter, ) -> anyhow::Result> { let file_path = self.resolve_in_storage(from)?; + if file_path.exists() && file_path.is_file() { let updated_buffer = tokio::task::spawn_blocking(move || { let mut source = std::io::BufReader::new( @@ -104,7 +127,7 @@ impl RelishStorage for LocalFs { async fn delete_relish(&self, path: &Self::RelishStoragePath) -> anyhow::Result<()> { let file_path = self.resolve_in_storage(path)?; if file_path.exists() && file_path.is_file() { - Ok(tokio::fs::remove_file(file_path).await?) + Ok(fs::remove_file(file_path).await?) } else { bail!( "File '{}' either does not exist or is not a file", @@ -152,12 +175,12 @@ where if directory_path.exists() { if directory_path.is_dir() { let mut paths = Vec::new(); - let mut dir_contents = tokio::fs::read_dir(directory_path).await?; + let mut dir_contents = fs::read_dir(directory_path).await?; while let Some(dir_entry) = dir_contents.next_entry().await? { let file_type = dir_entry.file_type().await?; let entry_path = dir_entry.path(); if file_type.is_symlink() { - log::debug!("{:?} us a symlink, skipping", entry_path) + debug!("{:?} us a symlink, skipping", entry_path) } else if file_type.is_dir() { paths.extend(get_all_files(entry_path).await?.into_iter()) } else { @@ -183,7 +206,369 @@ async fn create_target_directory(target_file_path: &Path) -> anyhow::Result<()> ), }; if !target_dir.exists() { - tokio::fs::create_dir_all(target_dir).await?; + fs::create_dir_all(target_dir).await?; } Ok(()) } + +#[cfg(test)] +mod pure_tests { + use crate::{ + relish_storage::test_utils::{ + custom_tenant_id_path, custom_timeline_id_path, relative_timeline_path, + }, + repository::repo_harness::{RepoHarness, TIMELINE_ID}, + }; + + use super::*; + + #[test] + fn storage_path_positive() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("storage_path_positive")?; + let storage_root = PathBuf::from("somewhere").join("else"); + let storage = LocalFs { + pageserver_workdir: &repo_harness.conf.workdir, + root: storage_root.clone(), + }; + + let local_path = repo_harness.timeline_path(&TIMELINE_ID).join("relish_name"); + let expected_path = storage_root.join(local_path.strip_prefix(&repo_harness.conf.workdir)?); + + assert_eq!( + expected_path, + storage.storage_path(&local_path).expect("Matching path should map to storage path normally"), + "Relish paths from pageserver workdir should be stored in local fs storage with the same path they have relative to the workdir" + ); + + Ok(()) + } + + #[test] + fn storage_path_negatives() -> anyhow::Result<()> { + #[track_caller] + fn storage_path_error(storage: &LocalFs, mismatching_path: &Path) -> String { + match storage.storage_path(mismatching_path) { + Ok(wrong_path) => panic!( + "Expected path '{}' to error, but got storage path: {:?}", + mismatching_path.display(), + wrong_path, + ), + Err(e) => format!("{:?}", e), + } + } + + let repo_harness = RepoHarness::create("storage_path_negatives")?; + let storage_root = PathBuf::from("somewhere").join("else"); + let storage = LocalFs { + pageserver_workdir: &repo_harness.conf.workdir, + root: storage_root, + }; + + let error_string = storage_path_error(&storage, &repo_harness.conf.workdir); + assert!(error_string.contains("does not belong to this storage")); + assert!(error_string.contains(repo_harness.conf.workdir.to_str().unwrap())); + + let mismatching_path_str = "/something/else"; + let error_message = storage_path_error(&storage, Path::new(mismatching_path_str)); + assert!( + error_message.contains(mismatching_path_str), + "Error should mention wrong path" + ); + assert!( + error_message.contains(repo_harness.conf.workdir.to_str().unwrap()), + "Error should mention server workdir" + ); + assert!(error_message.contains("does not belong to this storage")); + + Ok(()) + } + + #[test] + fn info_positive() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("info_positive")?; + let storage_root = PathBuf::from("somewhere").join("else"); + let storage = LocalFs { + pageserver_workdir: &repo_harness.conf.workdir, + root: storage_root.clone(), + }; + + let name = "not a metadata"; + let local_path = repo_harness.timeline_path(&TIMELINE_ID).join(name); + assert_eq!( + RemoteRelishInfo { + tenant_id: repo_harness.tenant_id, + timeline_id: TIMELINE_ID, + download_destination: local_path.clone(), + is_metadata: false, + }, + storage + .info(&storage_root.join(local_path.strip_prefix(&repo_harness.conf.workdir)?)) + .expect("For a valid input, valid S3 info should be parsed"), + "Should be able to parse metadata out of the correctly named remote delta relish" + ); + + let local_metadata_path = repo_harness + .timeline_path(&TIMELINE_ID) + .join(METADATA_FILE_NAME); + let remote_metadata_path = storage.storage_path(&local_metadata_path)?; + assert_eq!( + RemoteRelishInfo { + tenant_id: repo_harness.tenant_id, + timeline_id: TIMELINE_ID, + download_destination: local_metadata_path, + is_metadata: true, + }, + storage + .info(&remote_metadata_path) + .expect("For a valid input, valid S3 info should be parsed"), + "Should be able to parse metadata out of the correctly named remote metadata file" + ); + + Ok(()) + } + + #[test] + fn info_negatives() -> anyhow::Result<()> { + #[track_caller] + #[allow(clippy::ptr_arg)] // have to use &PathBuf due to `storage.info` parameter requirements + fn storage_info_error(storage: &LocalFs, storage_path: &PathBuf) -> String { + match storage.info(storage_path) { + Ok(wrong_info) => panic!( + "Expected storage path input {:?} to cause an error, but got relish info: {:?}", + storage_path, wrong_info, + ), + Err(e) => format!("{:?}", e), + } + } + + let repo_harness = RepoHarness::create("info_negatives")?; + let storage_root = PathBuf::from("somewhere").join("else"); + let storage = LocalFs { + pageserver_workdir: &repo_harness.conf.workdir, + root: storage_root.clone(), + }; + + let totally_wrong_path = "wrong_wrong_wrong"; + let error_message = storage_info_error(&storage, &PathBuf::from(totally_wrong_path)); + assert!(error_message.contains(totally_wrong_path)); + + let relative_timeline_path = relative_timeline_path(&repo_harness)?; + + let relative_relish_path = + custom_tenant_id_path(&relative_timeline_path, "wrong_tenant_id")? + .join("wrong_tenant_id_name"); + let wrong_tenant_id_path = storage_root.join(&relative_relish_path); + let error_message = storage_info_error(&storage, &wrong_tenant_id_path); + assert!( + error_message.contains(relative_relish_path.to_str().unwrap()), + "Error message '{}' does not contain the expected substring", + error_message + ); + + let relative_relish_path = + custom_timeline_id_path(&relative_timeline_path, "wrong_timeline_id")? + .join("wrong_timeline_id_name"); + let wrong_timeline_id_path = storage_root.join(&relative_relish_path); + let error_message = storage_info_error(&storage, &wrong_timeline_id_path); + assert!( + error_message.contains(relative_relish_path.to_str().unwrap()), + "Error message '{}' does not contain the expected substring", + error_message + ); + + Ok(()) + } + + #[test] + fn download_destination_matches_original_path() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("download_destination_matches_original_path")?; + let original_path = repo_harness.timeline_path(&TIMELINE_ID).join("some name"); + + let storage_root = PathBuf::from("somewhere").join("else"); + let dummy_storage = LocalFs { + pageserver_workdir: &repo_harness.conf.workdir, + root: storage_root, + }; + + let storage_path = dummy_storage.storage_path(&original_path)?; + let download_destination = dummy_storage.info(&storage_path)?.download_destination; + + assert_eq!( + original_path, download_destination, + "'original path -> storage path -> matching fs path' transformation should produce the same path as the input one for the correct path" + ); + + Ok(()) + } +} + +#[cfg(test)] +mod fs_tests { + use crate::{ + relish_storage::test_utils::relative_timeline_path, repository::repo_harness::RepoHarness, + }; + + use super::*; + + use tempfile::tempdir; + + #[tokio::test] + async fn upload_relish() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("upload_relish")?; + let storage = create_storage()?; + + let mut source = create_file_for_upload( + &storage.pageserver_workdir.join("whatever"), + "whatever_contents", + ) + .await?; + let target_path = PathBuf::from("/").join("somewhere").join("else"); + match storage.upload_relish(&mut source, &target_path).await { + Ok(()) => panic!("Should not allow storing files with wrong target path"), + Err(e) => { + let message = format!("{:?}", e); + assert!(message.contains(&target_path.display().to_string())); + assert!(message.contains("does not belong to the current storage")); + } + } + assert!(storage.list_relishes().await?.is_empty()); + + let target_path_1 = upload_dummy_file(&repo_harness, &storage, "upload_1").await?; + assert_eq!( + storage.list_relishes().await?, + vec![target_path_1.clone()], + "Should list a single file after first upload" + ); + + let target_path_2 = upload_dummy_file(&repo_harness, &storage, "upload_2").await?; + assert_eq!( + list_relishes_sorted(&storage).await?, + vec![target_path_1.clone(), target_path_2.clone()], + "Should list a two different files after second upload" + ); + + // match storage.upload_relish(&mut source, &target_path_1).await { + // Ok(()) => panic!("Should not allow reuploading storage files"), + // Err(e) => { + // let message = format!("{:?}", e); + // assert!(message.contains(&target_path_1.display().to_string())); + // assert!(message.contains("File exists")); + // } + // } + assert_eq!( + list_relishes_sorted(&storage).await?, + vec![target_path_1, target_path_2], + "Should list a two different files after all upload attempts" + ); + + Ok(()) + } + + fn create_storage() -> anyhow::Result { + let pageserver_workdir = Box::leak(Box::new(tempdir()?.path().to_owned())); + let storage = LocalFs::new(tempdir()?.path().to_owned(), pageserver_workdir)?; + Ok(storage) + } + + #[tokio::test] + async fn download_relish() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("download_relish")?; + let storage = create_storage()?; + let upload_name = "upload_1"; + let upload_target = upload_dummy_file(&repo_harness, &storage, upload_name).await?; + + let contents_bytes = storage + .download_relish(&upload_target, std::io::BufWriter::new(Vec::new())) + .await? + .into_inner()?; + let contents = String::from_utf8(contents_bytes)?; + assert_eq!( + dummy_contents(upload_name), + contents, + "We should upload and download the same contents" + ); + + let non_existing_path = PathBuf::from("somewhere").join("else"); + match storage + .download_relish(&non_existing_path, std::io::BufWriter::new(Vec::new())) + .await + { + Ok(_) => panic!("Should not allow downloading non-existing storage files"), + Err(e) => { + let error_string = e.to_string(); + assert!(error_string.contains("does not exist")); + assert!(error_string.contains(&non_existing_path.display().to_string())); + } + } + Ok(()) + } + + #[tokio::test] + async fn delete_relish() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("delete_relish")?; + let storage = create_storage()?; + let upload_name = "upload_1"; + let upload_target = upload_dummy_file(&repo_harness, &storage, upload_name).await?; + + storage.delete_relish(&upload_target).await?; + assert!(storage.list_relishes().await?.is_empty()); + + match storage.delete_relish(&upload_target).await { + Ok(()) => panic!("Should not allow deleting non-existing storage files"), + Err(e) => { + let error_string = e.to_string(); + assert!(error_string.contains("does not exist")); + assert!(error_string.contains(&upload_target.display().to_string())); + } + } + Ok(()) + } + + async fn upload_dummy_file( + harness: &RepoHarness, + storage: &LocalFs, + name: &str, + ) -> anyhow::Result { + let storage_path = storage + .root + .join(relative_timeline_path(harness)?) + .join(name); + storage + .upload_relish( + &mut create_file_for_upload( + &storage.pageserver_workdir.join(name), + &dummy_contents(name), + ) + .await?, + &storage_path, + ) + .await?; + Ok(storage_path) + } + + async fn create_file_for_upload( + path: &Path, + contents: &str, + ) -> anyhow::Result> { + std::fs::create_dir_all(path.parent().unwrap())?; + let mut file_for_writing = std::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(path)?; + write!(file_for_writing, "{}", contents)?; + drop(file_for_writing); + Ok(io::BufReader::new( + fs::OpenOptions::new().read(true).open(&path).await?, + )) + } + + fn dummy_contents(name: &str) -> String { + format!("contents for {}", name) + } + + async fn list_relishes_sorted(storage: &LocalFs) -> anyhow::Result> { + let mut relishes = storage.list_relishes().await?; + relishes.sort(); + Ok(relishes) + } +} diff --git a/pageserver/src/relish_storage/rust_s3.rs b/pageserver/src/relish_storage/rust_s3.rs index 5dddaa36ca..34091688e0 100644 --- a/pageserver/src/relish_storage/rust_s3.rs +++ b/pageserver/src/relish_storage/rust_s3.rs @@ -1,35 +1,45 @@ -//! A wrapper around AWS S3 client library `rust_s3` to be used a relish storage. +//! AWS S3 relish storage wrapper around `rust_s3` library. +//! Currently does not allow multiple pageservers to use the same bucket concurrently: relishes are +//! placed in the root of the bucket. -use std::io::Write; -use std::path::Path; +use std::{ + io::Write, + path::{Path, PathBuf}, +}; use anyhow::Context; use s3::{bucket::Bucket, creds::Credentials, region::Region}; use crate::{ - relish_storage::{strip_workspace_prefix, RelishStorage}, + layered_repository::METADATA_FILE_NAME, + relish_storage::{parse_ids_from_path, strip_path_prefix, RelishStorage, RemoteRelishInfo}, S3Config, }; const S3_FILE_SEPARATOR: char = '/'; -#[derive(Debug)] +#[derive(Debug, Eq, PartialEq)] pub struct S3ObjectKey(String); impl S3ObjectKey { fn key(&self) -> &str { &self.0 } + + fn download_destination(&self, pageserver_workdir: &Path) -> PathBuf { + pageserver_workdir.join(self.0.split(S3_FILE_SEPARATOR).collect::()) + } } /// AWS S3 relish storage. -pub struct RustS3 { +pub struct S3 { + pageserver_workdir: &'static Path, bucket: Bucket, } -impl RustS3 { +impl S3 { /// Creates the relish storage, errors if incorrect AWS S3 configuration provided. - pub fn new(aws_config: &S3Config) -> anyhow::Result { + pub fn new(aws_config: &S3Config, pageserver_workdir: &'static Path) -> anyhow::Result { let region = aws_config .bucket_region .parse::() @@ -49,19 +59,17 @@ impl RustS3 { credentials, ) .context("Failed to create the s3 bucket")?, + pageserver_workdir, }) } } #[async_trait::async_trait] -impl RelishStorage for RustS3 { +impl RelishStorage for S3 { type RelishStoragePath = S3ObjectKey; - fn derive_destination( - page_server_workdir: &Path, - relish_local_path: &Path, - ) -> anyhow::Result { - let relative_path = strip_workspace_prefix(page_server_workdir, relish_local_path)?; + fn storage_path(&self, local_path: &Path) -> anyhow::Result { + let relative_path = strip_path_prefix(self.pageserver_workdir, local_path)?; let mut key = String::new(); for segment in relative_path { key.push(S3_FILE_SEPARATOR); @@ -70,6 +78,21 @@ impl RelishStorage for RustS3 { Ok(S3ObjectKey(key)) } + fn info(&self, storage_path: &Self::RelishStoragePath) -> anyhow::Result { + let storage_path_key = &storage_path.0; + let is_metadata = + storage_path_key.ends_with(&format!("{}{}", S3_FILE_SEPARATOR, METADATA_FILE_NAME)); + let download_destination = storage_path.download_destination(self.pageserver_workdir); + let (tenant_id, timeline_id) = + parse_ids_from_path(storage_path_key.split(S3_FILE_SEPARATOR), storage_path_key)?; + Ok(RemoteRelishInfo { + tenant_id, + timeline_id, + download_destination, + is_metadata, + }) + } + async fn list_relishes(&self) -> anyhow::Result> { let list_response = self .bucket @@ -101,11 +124,11 @@ impl RelishStorage for RustS3 { )) } else { tokio::task::spawn_blocking(move || { - to.flush().context("Failed to fluch the downoad buffer")?; + to.flush().context("Failed to flush the download buffer")?; Ok::<_, anyhow::Error>(to) }) .await - .context("Failed to joim the download buffer flush task")? + .context("Failed to join the download buffer flush task")? } } @@ -147,3 +170,226 @@ impl RelishStorage for RustS3 { } } } + +#[cfg(test)] +mod tests { + use crate::{ + relish_storage::test_utils::{ + custom_tenant_id_path, custom_timeline_id_path, relative_timeline_path, + }, + repository::repo_harness::{RepoHarness, TIMELINE_ID}, + }; + + use super::*; + + #[test] + fn download_destination() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("download_destination")?; + + let local_path = repo_harness.timeline_path(&TIMELINE_ID).join("test_name"); + let relative_path = local_path.strip_prefix(&repo_harness.conf.workdir)?; + + let key = S3ObjectKey(format!( + "{}{}", + S3_FILE_SEPARATOR, + relative_path + .iter() + .map(|segment| segment.to_str().unwrap()) + .collect::>() + .join(&S3_FILE_SEPARATOR.to_string()), + )); + + assert_eq!( + local_path, + key.download_destination(&repo_harness.conf.workdir), + "Download destination should consist of s3 path joined with the pageserver workdir prefix" + ); + + Ok(()) + } + + #[test] + fn storage_path_positive() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("storage_path_positive")?; + + let segment_1 = "matching"; + let segment_2 = "relish"; + let local_path = &repo_harness.conf.workdir.join(segment_1).join(segment_2); + let expected_key = S3ObjectKey(format!( + "{SEPARATOR}{}{SEPARATOR}{}", + segment_1, + segment_2, + SEPARATOR = S3_FILE_SEPARATOR, + )); + + let actual_key = dummy_storage(&repo_harness.conf.workdir) + .storage_path(local_path) + .expect("Matching path should map to S3 path normally"); + assert_eq!( + expected_key, + actual_key, + "S3 key from the matching path should contain all segments after the workspace prefix, separated with S3 separator" + ); + + Ok(()) + } + + #[test] + fn storage_path_negatives() -> anyhow::Result<()> { + #[track_caller] + fn storage_path_error(storage: &S3, mismatching_path: &Path) -> String { + match storage.storage_path(mismatching_path) { + Ok(wrong_key) => panic!( + "Expected path '{}' to error, but got S3 key: {:?}", + mismatching_path.display(), + wrong_key, + ), + Err(e) => e.to_string(), + } + } + + let repo_harness = RepoHarness::create("storage_path_negatives")?; + let storage = dummy_storage(&repo_harness.conf.workdir); + + let error_message = storage_path_error(&storage, &repo_harness.conf.workdir); + assert!( + error_message.contains("Prefix and the path are equal"), + "Message '{}' does not contain the required string", + error_message + ); + + let mismatching_path = PathBuf::from("somewhere").join("else"); + let error_message = storage_path_error(&storage, &mismatching_path); + assert!( + error_message.contains(mismatching_path.to_str().unwrap()), + "Error should mention wrong path" + ); + assert!( + error_message.contains(repo_harness.conf.workdir.to_str().unwrap()), + "Error should mention server workdir" + ); + assert!( + error_message.contains("is not prefixed with"), + "Message '{}' does not contain a required string", + error_message + ); + + Ok(()) + } + + #[test] + fn info_positive() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("info_positive")?; + let storage = dummy_storage(&repo_harness.conf.workdir); + let relative_timeline_path = relative_timeline_path(&repo_harness)?; + + let s3_key = create_s3_key(&relative_timeline_path.join("not a metadata")); + assert_eq!( + RemoteRelishInfo { + tenant_id: repo_harness.tenant_id, + timeline_id: TIMELINE_ID, + download_destination: s3_key.download_destination(&repo_harness.conf.workdir), + is_metadata: false, + }, + storage + .info(&s3_key) + .expect("For a valid input, valid S3 info should be parsed"), + "Should be able to parse metadata out of the correctly named remote delta relish" + ); + + let s3_key = create_s3_key(&relative_timeline_path.join(METADATA_FILE_NAME)); + assert_eq!( + RemoteRelishInfo { + tenant_id: repo_harness.tenant_id, + timeline_id: TIMELINE_ID, + download_destination: s3_key.download_destination(&repo_harness.conf.workdir), + is_metadata: true, + }, + storage + .info(&s3_key) + .expect("For a valid input, valid S3 info should be parsed"), + "Should be able to parse metadata out of the correctly named remote metadata file" + ); + + Ok(()) + } + + #[test] + fn info_negatives() -> anyhow::Result<()> { + #[track_caller] + fn storage_info_error(storage: &S3, s3_key: &S3ObjectKey) -> String { + match storage.info(s3_key) { + Ok(wrong_info) => panic!( + "Expected key {:?} to error, but got relish info: {:?}", + s3_key, wrong_info, + ), + Err(e) => e.to_string(), + } + } + + let repo_harness = RepoHarness::create("info_negatives")?; + let storage = dummy_storage(&repo_harness.conf.workdir); + let relative_timeline_path = relative_timeline_path(&repo_harness)?; + + let totally_wrong_path = "wrong_wrong_wrong"; + let error_message = + storage_info_error(&storage, &S3ObjectKey(totally_wrong_path.to_string())); + assert!(error_message.contains(totally_wrong_path)); + + let wrong_tenant_id = create_s3_key( + &custom_tenant_id_path(&relative_timeline_path, "wrong_tenant_id")?.join("name"), + ); + let error_message = storage_info_error(&storage, &wrong_tenant_id); + assert!(error_message.contains(&wrong_tenant_id.0)); + + let wrong_timeline_id = create_s3_key( + &custom_timeline_id_path(&relative_timeline_path, "wrong_timeline_id")?.join("name"), + ); + let error_message = storage_info_error(&storage, &wrong_timeline_id); + assert!(error_message.contains(&wrong_timeline_id.0)); + + Ok(()) + } + + #[test] + fn download_destination_matches_original_path() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("download_destination_matches_original_path")?; + let original_path = repo_harness.timeline_path(&TIMELINE_ID).join("some name"); + + let dummy_storage = dummy_storage(&repo_harness.conf.workdir); + + let key = dummy_storage.storage_path(&original_path)?; + let download_destination = dummy_storage.info(&key)?.download_destination; + + assert_eq!( + original_path, download_destination, + "'original path -> storage key -> matching fs path' transformation should produce the same path as the input one for the correct path" + ); + + Ok(()) + } + + fn dummy_storage(pageserver_workdir: &'static Path) -> S3 { + S3 { + pageserver_workdir, + bucket: Bucket::new( + "dummy-bucket", + "us-east-1".parse().unwrap(), + Credentials::anonymous().unwrap(), + ) + .unwrap(), + } + } + + fn create_s3_key(relative_relish_path: &Path) -> S3ObjectKey { + S3ObjectKey( + relative_relish_path + .iter() + .fold(String::new(), |mut path_string, segment| { + path_string.push(S3_FILE_SEPARATOR); + path_string.push_str(segment.to_str().unwrap()); + path_string + }), + ) + } +} diff --git a/pageserver/src/relish_storage/storage_sync.rs b/pageserver/src/relish_storage/storage_sync.rs new file mode 100644 index 0000000000..94611b2f4d --- /dev/null +++ b/pageserver/src/relish_storage/storage_sync.rs @@ -0,0 +1,1559 @@ +//! A synchronization logic for the [`RelishStorage`] and the state to ensure the correct synchronizations. +//! +//! The synchronization does not aim to be immediate, instead +//! doing all the job in a separate thread asynchronously, attempting to fully replicate the +//! pageserver timeline workdir data on the remote storage. +//! +//! [`SYNC_QUEUE`] is a priority queue to hold [`SyncTask`] for image upload/download. +//! The queue gets emptied by a single thread with the loop, that polls the tasks one by one. +//! +//! During the loop startup, an initial loop state is constructed from all remote storage entries. +//! It's enough to poll the remote state once on startup only, due to agreement that the pageserver has +//! an exclusive write access to the relish storage: new files appear in the storage only after the same +//! pageserver writes them. +//! +//! The list construction is currently the only place where the storage sync can return an [`Err`] to the user. +//! New upload tasks are accepted via [`schedule_timeline_upload`] function disregarding of the corresponding loop startup, +//! it's up to the caller to avoid uploading of the new relishes, if that caller did not enable the loop. +//! After the initial state is loaded into memory and the loop starts, any further [`Err`] results do not stop the loop, but rather +//! reschedules the same task, with possibly less files to sync in it. +//! +//! The synchronization unit is an image: a set of layer files (or relishes) and a special metadata file. +//! Both upload and download tasks consider image in a similar way ([`LocalTimeline`] and [`RemoteTimeline`]): +//! * a set of relishes (both upload and download tasks store the files as local pageserver paths, ergo [`PathBuf`] is used). +//! * a set of ids to distinguish the images ([`ZTenantId`] and [`ZTimelineId`]) +//! * `disk_consistent_lsn` which indicates the last [`Lsn`] applicable to the data stored in this image. +//! +//! The same relish has identical layer paths in both structs, since both represent the relish path in pageserver's workdir. +//! This way, the sync can compare remote and local images seamlessly, downloading/uploading missing files if needed. +//! +//! After pageserver parforms a succesful image checkpoint and detects that image state had updated, it reports an upload with +//! the list of image new files and its incremented `disk_consistent_lsn` (that also gets stored into image metadata file). +//! Both the file list and `disk_consistent_lsn` are mandatory for the upload, that's why the uploads happen after checkpointing. +//! Timelines with no such [`Lsn`] cannot guarantee their local file consistency and are not considered for backups. +//! Not every upload of the same timeline gets processed: if `disk_consistent_lsn` is unchanged, the remote timeline is not updated. +//! +//! Remote timelines may lack `disk_consistent_lsn` if their metadata file is corrupt or missing. +//! Such timelines are not downloaded and their layer paths are entirely replaced with the ones from a newer upload for the same timeline. +//! Intact remote timelines are stored in the sync loop memory to avoid duplicate reuploads and then get queried for downloading, if no +//! timeline with the same id is found in the local workdir already. +//! +//! Besides all sync tasks operating images, internally every image is split to its underlying relish files which are synced independently. +//! The sync logic does not distinguish the relishes between each other, uploading/downloading them all via [`FuturesUnordered`] and registering all failures. +//! A single special exception is a metadata file, that is always uploaded/downloaded last (source images with no metadata are ignored), only after the rest +//! of the relishes are successfully synced. +//! If there are relish or metadata sync errors, the task gets resubmitted with all failed layers only, with all the successful layers stored in the loop state. +//! NOTE: No backpressure or eviction is implemented for tasks that always fail, it will be improved later. +//! +//! Synchronization never removes any local from pageserver workdir or remote files from the remote storage: the files from previous +//! uploads that are not mentioned in the new upload lists, are still considered as part of the corresponding image. +//! When determining which files to upload/download, the local file paths (for remote files, that is the same as their download destination) is compared, +//! and two files are considered "equal", if their paths match. Such files are uploaded/downloaded over, no real contents checks are done. +//! NOTE: No real contents or checksum check happens right now and is a subject to improve later. +//! +//! After the whole is downloaded, [`crate::tenant_mgr::register_relish_download`] function is used to register the image in pageserver. +//! +//! When pageserver signals shutdown, current sync task gets finished and the loop exists. +//! +//! Currently there's no other way to download a remote relish if it was not downloaded after initial remote storage files check. +//! This is a subject to change in the near future, but requires more changes to [`crate::tenant_mgr`] before it can happen. + +use std::{ + cmp::Ordering, + collections::{hash_map, BinaryHeap, HashMap, HashSet}, + path::{Path, PathBuf}, + sync::Mutex, + thread, + time::Duration, +}; + +use anyhow::{ensure, Context}; +use futures::stream::{FuturesUnordered, StreamExt}; +use lazy_static::lazy_static; +use tokio::{sync::Semaphore, time::Instant}; +use tracing::*; + +use super::{RelishStorage, RemoteRelishInfo}; +use crate::{ + layered_repository::{metadata_path, TimelineMetadata}, + tenant_mgr::register_relish_download, + PageServerConf, +}; +use zenith_metrics::{register_histogram_vec, register_int_gauge, HistogramVec, IntGauge}; +use zenith_utils::{ + lsn::Lsn, + zid::{ZTenantId, ZTimelineId}, +}; + +lazy_static! { + static ref REMAINING_SYNC_ITEMS: IntGauge = register_int_gauge!( + "pageserver_backup_remaining_sync_items", + "Number of storage sync items left in the queue" + ) + .expect("failed to register pageserver backup remaining sync items int gauge"); + static ref IMAGE_SYNC_TIME: HistogramVec = register_histogram_vec!( + "pageserver_backup_image_sync_time", + "Time took to synchronize (download or upload) a whole pageserver image. \ + Grouped by `operation_kind` (upload|download) and `status` (success|failure)", + &["operation_kind", "status"], + vec![ + 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0, 6.0, 7.0, + 8.0, 9.0, 10.0, 12.5, 15.0, 17.5, 20.0 + ] + ) + .expect("failed to register pageserver image sync time histogram vec"); +} + +lazy_static! { + static ref SYNC_QUEUE: Mutex> = Mutex::new(BinaryHeap::new()); +} + +/// An image sync task to store in the priority queue. +/// The task priority is defined by its [`PartialOrd`] derive: +/// * lower enum variants are of more priority compared to the higher ones +/// * for the same enum variants, "natural" comparison happens for their data +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +enum SyncTask { + /// Regular image download, that is not critical for running, but still needed. + Download(RemoteTimeline), + /// A checkpoint outcome with possible local file updates that need actualization in the remote storage. + /// Not necessary more fresh than the one already uploaded. + Upload(LocalTimeline), + /// Every image that's not present locally but found remotely during sync loop start. + /// Treated as "lost state" that pageserver needs to recover fully before it's ready to work. + UrgentDownload(RemoteTimeline), +} + +/// Local timeline files for upload. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +struct LocalTimeline { + tenant_id: ZTenantId, + timeline_id: ZTimelineId, + /// Relish file paths in the pageserver workdir. + layers: Vec, + metadata: TimelineMetadata, +} + +/// Info about the remote image files. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +struct RemoteTimeline { + tenant_id: ZTenantId, + timeline_id: ZTimelineId, + /// Same paths as in [`LocalTimeline`], pointing at the download + /// destination of every of the remote timeline layers. + layers: Vec, + /// If metadata file is uploaded, the corresponding field from this file. + /// On the contrast with [`LocalTimeline`], remote timeline's metadata may be missing + /// due to various upload errors or abrupt pageserver shutdowns that obstructed + /// the file storing. + metadata: Option, +} + +impl RemoteTimeline { + fn disk_consistent_lsn(&self) -> Option { + self.metadata.as_ref().map(|meta| meta.disk_consistent_lsn) + } +} + +/// Adds the new image as an upload sync task to the queue. +/// Ensure that the loop is started otherwise the task is never processed. +pub fn schedule_timeline_upload( + tenant_id: ZTenantId, + timeline_id: ZTimelineId, + layers: Vec, + metadata: TimelineMetadata, +) { + SYNC_QUEUE + .lock() + .unwrap() + .push(SyncTask::Upload(LocalTimeline { + tenant_id, + timeline_id, + layers, + metadata, + })) +} + +/// Uses a relish storage given to start the storage sync loop. +/// See module docs for loop step description. +pub(super) fn spawn_storage_sync_thread< + P: std::fmt::Debug, + S: 'static + RelishStorage, +>( + config: &'static PageServerConf, + relish_storage: S, + max_concurrent_sync: usize, +) -> anyhow::Result>> { + ensure!( + max_concurrent_sync > 0, + "Got 0 as max concurrent synchronizations allowed, cannot initialize a storage sync thread" + ); + + let handle = thread::Builder::new() + .name("Queue based relish storage sync".to_string()) + .spawn(move || { + let concurrent_sync_limit = Semaphore::new(max_concurrent_sync); + let thread_result = storage_sync_loop(config, relish_storage, &concurrent_sync_limit); + concurrent_sync_limit.close(); + if let Err(e) = &thread_result { + error!("Failed to run storage sync thread: {:#}", e); + } + thread_result + })?; + Ok(handle) +} + +fn storage_sync_loop>( + config: &'static PageServerConf, + relish_storage: S, + concurrent_sync_limit: &Semaphore, +) -> anyhow::Result<()> { + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?; + let mut remote_timelines = runtime + .block_on(fetch_existing_uploads(&relish_storage)) + .context("Failed to determine previously uploaded timelines")?; + + let urgent_downloads = latest_timelines(&remote_timelines) + .iter() + .filter_map(|(&tenant_id, &timeline_id)| remote_timelines.get(&(tenant_id, timeline_id))) + .filter(|latest_remote_timeline| { + let tenant_id = latest_remote_timeline.tenant_id; + let timeline_id = latest_remote_timeline.timeline_id; + let exists_locally = config.timeline_path(&timeline_id, &tenant_id).exists(); + if exists_locally { + debug!( + "Timeline with tenant id {}, relish id {} exists locally, not downloading", + tenant_id, timeline_id + ); + false + } else { + true + } + }) + .cloned() + .map(SyncTask::UrgentDownload) + .collect::>(); + info!( + "Will download {} timelines to restore state", + urgent_downloads.len() + ); + let mut accessor = SYNC_QUEUE.lock().unwrap(); + accessor.extend(urgent_downloads.into_iter()); + drop(accessor); + + while !crate::tenant_mgr::shutdown_requested() { + let mut queue_accessor = SYNC_QUEUE.lock().unwrap(); + let next_task = queue_accessor.pop(); + let remaining_queue_length = queue_accessor.len(); + drop(queue_accessor); + + match next_task { + Some(task) => { + debug!( + "Processing a new task, more tasks left to process: {}", + remaining_queue_length + ); + REMAINING_SYNC_ITEMS.set(remaining_queue_length as i64); + + runtime.block_on(async { + let sync_start = Instant::now(); + match task { + SyncTask::Download(download_data) => { + let sync_status = download_timeline( + config, + concurrent_sync_limit, + &relish_storage, + download_data, + false, + ) + .await; + register_sync_status(sync_start, "download", sync_status); + } + SyncTask::UrgentDownload(download_data) => { + let sync_status = download_timeline( + config, + concurrent_sync_limit, + &relish_storage, + download_data, + true, + ) + .await; + register_sync_status(sync_start, "download", sync_status); + } + SyncTask::Upload(layer_upload) => { + let sync_status = upload_timeline( + config, + concurrent_sync_limit, + &mut remote_timelines, + &relish_storage, + layer_upload, + ) + .await; + register_sync_status(sync_start, "upload", sync_status); + } + } + }) + } + None => { + trace!("No storage sync tasks found"); + thread::sleep(Duration::from_secs(1)); + continue; + } + }; + } + log::debug!("Queue based relish storage sync thread shut down"); + Ok(()) +} + +fn add_to_queue(task: SyncTask) { + SYNC_QUEUE.lock().unwrap().push(task) +} + +fn register_sync_status(sync_start: Instant, sync_name: &str, sync_status: Option) { + let secs_elapsed = sync_start.elapsed().as_secs_f64(); + log::debug!("Processed a sync task in {} seconds", secs_elapsed); + match sync_status { + Some(true) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "success"]), + Some(false) => IMAGE_SYNC_TIME.with_label_values(&[sync_name, "failure"]), + None => return, + } + .observe(secs_elapsed) +} + +fn latest_timelines( + remote_timelines: &HashMap<(ZTenantId, ZTimelineId), RemoteTimeline>, +) -> HashMap { + let mut latest_timelines_for_tenants = HashMap::with_capacity(remote_timelines.len()); + for (&(remote_tenant_id, remote_timeline_id), remote_timeline_data) in remote_timelines { + let (latest_timeline_id, timeline_metadata) = latest_timelines_for_tenants + .entry(remote_tenant_id) + .or_insert_with(|| (remote_timeline_id, remote_timeline_data.metadata.clone())); + if latest_timeline_id != &remote_timeline_id + && timeline_metadata + .as_ref() + .map(|metadata| metadata.disk_consistent_lsn) + < remote_timeline_data.disk_consistent_lsn() + { + *latest_timeline_id = remote_timeline_id; + *timeline_metadata = remote_timeline_data.metadata.clone(); + } + } + + latest_timelines_for_tenants + .into_iter() + .map(|(tenant_id, (timeline_id, _))| (tenant_id, timeline_id)) + .collect() +} + +async fn fetch_existing_uploads< + P: std::fmt::Debug, + S: 'static + RelishStorage, +>( + relish_storage: &S, +) -> anyhow::Result> { + let uploaded_relishes = relish_storage + .list_relishes() + .await + .context("Failed to list relish uploads")?; + + let mut relish_data_fetches = uploaded_relishes + .into_iter() + .map(|remote_path| async { + ( + remote_relish_info(relish_storage, &remote_path).await, + remote_path, + ) + }) + .collect::>(); + + let mut fetched = HashMap::new(); + while let Some((fetch_result, remote_path)) = relish_data_fetches.next().await { + match fetch_result { + Ok((relish_info, remote_metadata)) => { + let tenant_id = relish_info.tenant_id; + let timeline_id = relish_info.timeline_id; + let remote_timeline = + fetched + .entry((tenant_id, timeline_id)) + .or_insert_with(|| RemoteTimeline { + tenant_id, + timeline_id, + layers: Vec::new(), + metadata: None, + }); + if remote_metadata.is_some() { + remote_timeline.metadata = remote_metadata; + } else { + remote_timeline + .layers + .push(relish_info.download_destination); + } + } + Err(e) => { + warn!( + "Failed to fetch relish info for path {:?}, reason: {:#}", + remote_path, e + ); + continue; + } + } + } + + Ok(fetched) +} + +async fn remote_relish_info>( + relish_storage: &S, + remote_path: &P, +) -> anyhow::Result<(RemoteRelishInfo, Option)> { + let info = relish_storage.info(remote_path)?; + let metadata = if info.is_metadata { + let metadata_bytes = relish_storage + .download_relish(remote_path, std::io::BufWriter::new(Vec::new())) + .await + .and_then(|buf_writer| Ok(buf_writer.into_inner()?)) + .with_context(|| { + format!( + "Failed to download metadata file contents for tenant {}, timeline {}", + info.tenant_id, info.timeline_id + ) + })?; + Some(TimelineMetadata::from_bytes(&metadata_bytes)?) + } else { + None + }; + Ok((info, metadata)) +} + +async fn download_timeline<'a, P, S: 'static + RelishStorage>( + config: &'static PageServerConf, + concurrent_sync_limit: &'a Semaphore, + relish_storage: &'a S, + remote_timeline: RemoteTimeline, + urgent: bool, +) -> Option { + let timeline_id = remote_timeline.timeline_id; + let tenant_id = remote_timeline.tenant_id; + debug!("Downloading layers for timeline {}", timeline_id); + + let new_metadata = if let Some(metadata) = remote_timeline.metadata { + metadata + } else { + warn!("Remote timeline incomplete: no metadata found, aborting the download"); + return None; + }; + debug!("Downloading {} layers", remote_timeline.layers.len()); + + let sync_result = synchronize_layers( + config, + concurrent_sync_limit, + relish_storage, + remote_timeline.layers.into_iter(), + SyncOperation::Download, + &new_metadata, + tenant_id, + timeline_id, + ) + .await; + + match sync_result { + SyncResult::Success { .. } => { + register_relish_download(config, tenant_id, timeline_id); + Some(true) + } + SyncResult::MetadataSyncError { .. } => { + let download = RemoteTimeline { + layers: Vec::new(), + metadata: Some(new_metadata), + tenant_id, + timeline_id, + }; + add_to_queue(if urgent { + SyncTask::UrgentDownload(download) + } else { + SyncTask::Download(download) + }); + Some(false) + } + SyncResult::LayerSyncError { not_synced, .. } => { + let download = RemoteTimeline { + layers: not_synced, + metadata: Some(new_metadata), + tenant_id, + timeline_id, + }; + add_to_queue(if urgent { + SyncTask::UrgentDownload(download) + } else { + SyncTask::Download(download) + }); + Some(false) + } + } +} + +#[allow(clippy::unnecessary_filter_map)] +async fn upload_timeline<'a, P, S: 'static + RelishStorage>( + config: &'static PageServerConf, + concurrent_sync_limit: &'a Semaphore, + remote_timelines: &'a mut HashMap<(ZTenantId, ZTimelineId), RemoteTimeline>, + relish_storage: &'a S, + mut new_upload: LocalTimeline, +) -> Option { + let tenant_id = new_upload.tenant_id; + let timeline_id = new_upload.timeline_id; + debug!("Uploading layers for timeline {}", timeline_id); + + if let hash_map::Entry::Occupied(o) = remote_timelines.entry((tenant_id, timeline_id)) { + let uploaded_timeline_files = o.get(); + let uploaded_layers = uploaded_timeline_files + .layers + .iter() + .collect::>(); + new_upload + .layers + .retain(|path_to_upload| !uploaded_layers.contains(path_to_upload)); + match &uploaded_timeline_files.metadata { + None => debug!("Partially uploaded timeline found, downloading missing files only"), + Some(remote_metadata) => { + let new_lsn = new_upload.metadata.disk_consistent_lsn; + let remote_lsn = remote_metadata.disk_consistent_lsn; + match new_lsn.cmp(&remote_lsn) { + Ordering::Equal | Ordering::Less => { + warn!( + "Received a timeline witn LSN {} that's not later than the one from remote storage {}, not uploading", + new_lsn, remote_lsn + ); + return None; + } + Ordering::Greater => debug!( + "Received a timeline with newer LSN {} (storage LSN {}), updating the upload", + new_lsn, remote_lsn + ), + } + } + } + } + + let LocalTimeline { + layers: new_layers, + metadata: new_metadata, + .. + } = new_upload; + let sync_result = synchronize_layers( + config, + concurrent_sync_limit, + relish_storage, + new_layers.into_iter(), + SyncOperation::Upload, + &new_metadata, + tenant_id, + timeline_id, + ) + .await; + + let entry_to_update = remote_timelines + .entry((tenant_id, timeline_id)) + .or_insert_with(|| RemoteTimeline { + layers: Vec::new(), + metadata: Some(new_metadata.clone()), + tenant_id, + timeline_id, + }); + match sync_result { + SyncResult::Success { synced } => { + entry_to_update.layers.extend(synced.into_iter()); + entry_to_update.metadata = Some(new_metadata); + Some(true) + } + SyncResult::MetadataSyncError { synced } => { + entry_to_update.layers.extend(synced.into_iter()); + add_to_queue(SyncTask::Upload(LocalTimeline { + tenant_id, + timeline_id, + layers: Vec::new(), + metadata: new_metadata, + })); + Some(false) + } + SyncResult::LayerSyncError { synced, not_synced } => { + entry_to_update.layers.extend(synced.into_iter()); + add_to_queue(SyncTask::Upload(LocalTimeline { + tenant_id, + timeline_id, + layers: not_synced, + metadata: new_metadata, + })); + Some(false) + } + } +} + +/// Layer sync operation kind. +/// +/// This enum allows to unify the logic for image relish uploads and downloads. +/// When image's layers are synchronized, the only difference +/// between downloads and uploads is the [`RelishStorage`] method we need to call. +#[derive(Debug, Copy, Clone)] +enum SyncOperation { + Download, + Upload, +} + +/// Image sync result. +#[derive(Debug)] +enum SyncResult { + /// All relish files are synced (their paths returned). + /// Metadata file is synced too (path not returned). + Success { synced: Vec }, + /// All relish files are synced (their paths returned). + /// Metadata file is not synced (path not returned). + MetadataSyncError { synced: Vec }, + /// Some relish files are not synced, some are (paths returned). + /// Metadata file is not synced (path not returned). + LayerSyncError { + synced: Vec, + not_synced: Vec, + }, +} + +/// Synchronizes given layers and metadata contents of a certain image. +/// Relishes are always synced before metadata files are, the latter gets synced only if +/// the rest of the files are successfully processed. +#[allow(clippy::too_many_arguments)] +async fn synchronize_layers<'a, P, S: 'static + RelishStorage>( + config: &'static PageServerConf, + concurrent_sync_limit: &'a Semaphore, + relish_storage: &'a S, + layers: impl Iterator, + sync_operation: SyncOperation, + new_metadata: &'a TimelineMetadata, + tenant_id: ZTenantId, + timeline_id: ZTimelineId, +) -> SyncResult { + let mut sync_operations = layers + .into_iter() + .map(|layer_path| async move { + let permit = concurrent_sync_limit + .acquire() + .await + .expect("Semaphore should not be closed yet"); + let sync_result = match sync_operation { + SyncOperation::Download => download(relish_storage, &layer_path).await, + SyncOperation::Upload => upload(relish_storage, &layer_path).await, + }; + drop(permit); + (layer_path, sync_result) + }) + .collect::>(); + + let mut synced = Vec::new(); + let mut not_synced = Vec::new(); + while let Some((layer_path, layer_download_result)) = sync_operations.next().await { + match layer_download_result { + Ok(()) => synced.push(layer_path), + Err(e) => { + error!( + "Failed to sync ({:?}) layer with local path '{}', reason: {:#}", + sync_operation, + layer_path.display(), + e, + ); + not_synced.push(layer_path); + } + } + } + + if not_synced.is_empty() { + debug!( + "Successfully synced ({:?}) all {} layers", + sync_operation, + synced.len(), + ); + trace!("Synced layers: {:?}", synced); + match sync_metadata( + config, + relish_storage, + sync_operation, + new_metadata, + tenant_id, + timeline_id, + ) + .await + { + Ok(()) => { + debug!("Metadata file synced successfully"); + SyncResult::Success { synced } + } + Err(e) => { + error!( + "Failed to sync ({:?}) new metadata, reason: {:#}", + sync_operation, e + ); + SyncResult::MetadataSyncError { synced } + } + } + } else { + SyncResult::LayerSyncError { synced, not_synced } + } +} + +async fn sync_metadata<'a, P, S: 'static + RelishStorage>( + config: &'static PageServerConf, + relish_storage: &'a S, + sync_operation: SyncOperation, + new_metadata: &'a TimelineMetadata, + tenant_id: ZTenantId, + timeline_id: ZTimelineId, +) -> anyhow::Result<()> { + debug!("Synchronizing ({:?}) metadata file", sync_operation); + + let local_metadata_path = metadata_path(config, timeline_id, tenant_id); + let new_metadata_bytes = new_metadata.to_bytes()?; + match sync_operation { + SyncOperation::Download => { + tokio::fs::write(&local_metadata_path, new_metadata_bytes).await?; + tokio::fs::File::open( + local_metadata_path + .parent() + .expect("Metadata should always have a parent"), + ) + .await? + .sync_all() + .await?; + } + SyncOperation::Upload => { + let remote_path = relish_storage + .storage_path(&local_metadata_path) + .with_context(|| { + format!( + "Failed to get remote storage path for local metadata path '{}'", + local_metadata_path.display() + ) + })?; + let mut bytes = tokio::io::BufReader::new(std::io::Cursor::new(new_metadata_bytes)); + relish_storage + .upload_relish(&mut bytes, &remote_path) + .await?; + } + } + Ok(()) +} + +async fn upload>( + relish_storage: &S, + source: &Path, +) -> anyhow::Result<()> { + let destination = relish_storage.storage_path(source).with_context(|| { + format!( + "Failed to derive storage destination out of upload path {}", + source.display() + ) + })?; + let mut source_file = tokio::io::BufReader::new( + tokio::fs::OpenOptions::new() + .read(true) + .open(source) + .await + .with_context(|| { + format!( + "Failed to open target s3 destination at {}", + source.display() + ) + })?, + ); + relish_storage + .upload_relish(&mut source_file, &destination) + .await +} + +async fn download>( + relish_storage: &S, + destination: &Path, +) -> anyhow::Result<()> { + if destination.exists() { + Ok(()) + } else { + let source = relish_storage.storage_path(destination).with_context(|| { + format!( + "Failed to derive storage source out of download destination '{}'", + destination.display() + ) + })?; + + if let Some(target_parent) = destination.parent() { + if !target_parent.exists() { + tokio::fs::create_dir_all(target_parent) + .await + .with_context(|| { + format!( + "Failed to create parent directories for destination '{}'", + destination.display() + ) + })?; + } + } + let destination_file = std::io::BufWriter::new( + std::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(destination) + .with_context(|| { + format!( + "Failed to open download destination file '{}'", + destination.display() + ) + })?, + ); + + relish_storage + .download_relish(&source, destination_file) + .await?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::{ + collections::{BTreeMap, BTreeSet}, + fs, + io::Cursor, + }; + + use super::*; + use crate::{ + relish_storage::local_fs::LocalFs, + repository::repo_harness::{RepoHarness, TIMELINE_ID}, + }; + use hex_literal::hex; + use tempfile::tempdir; + use tokio::io::BufReader; + + const NO_METADATA_TIMELINE_ID: ZTimelineId = + ZTimelineId::from_array(hex!("3755461d2259a63a80635d760958efd0")); + const CORRUPT_METADATA_TIMELINE_ID: ZTimelineId = + ZTimelineId::from_array(hex!("314db9af91fbc02dda586880a3216c61")); + + lazy_static! { + static ref LIMIT: Semaphore = Semaphore::new(100); + } + + #[tokio::test] + async fn upload_new_timeline() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("upload_new_timeline")?; + let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?; + let mut remote_timelines = HashMap::new(); + + assert_timelines_equal( + HashMap::new(), + fetch_existing_uploads(&storage).await.unwrap(), + ); + + let upload_metadata = dummy_metadata(Lsn(0x30)); + let upload = create_local_timeline( + &repo_harness, + TIMELINE_ID, + &["a", "b"], + upload_metadata.clone(), + )?; + let expected_layers = upload.layers.clone(); + ensure_correct_timeline_upload(&repo_harness, &mut remote_timelines, &storage, upload) + .await; + + let mut expected_uploads = HashMap::new(); + expected_uploads.insert( + (repo_harness.tenant_id, TIMELINE_ID), + RemoteTimeline { + tenant_id: repo_harness.tenant_id, + timeline_id: TIMELINE_ID, + layers: expected_layers, + metadata: Some(upload_metadata), + }, + ); + assert_timelines_equal(expected_uploads, fetch_existing_uploads(&storage).await?); + + Ok(()) + } + + #[tokio::test] + async fn reupload_timeline() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("reupload_timeline")?; + let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?; + let mut remote_timelines = HashMap::new(); + + let first_upload_metadata = dummy_metadata(Lsn(0x30)); + let first_timeline = create_local_timeline( + &repo_harness, + TIMELINE_ID, + &["a", "b"], + first_upload_metadata.clone(), + )?; + let first_paths = first_timeline.layers.clone(); + ensure_correct_timeline_upload( + &repo_harness, + &mut remote_timelines, + &storage, + first_timeline, + ) + .await; + let after_first_uploads = remote_timelines.clone(); + + let new_upload_metadata = dummy_metadata(Lsn(0x20)); + assert!( + new_upload_metadata.disk_consistent_lsn < first_upload_metadata.disk_consistent_lsn + ); + let new_upload = + create_local_timeline(&repo_harness, TIMELINE_ID, &["b", "c"], new_upload_metadata)?; + upload_timeline( + repo_harness.conf, + &LIMIT, + &mut remote_timelines, + &storage, + new_upload.clone(), + ) + .await; + assert_sync_queue_contents(SyncTask::Upload(new_upload), false); + assert_timelines_equal(after_first_uploads, remote_timelines.clone()); + + let second_upload_metadata = dummy_metadata(Lsn(0x40)); + let second_timeline = create_local_timeline( + &repo_harness, + TIMELINE_ID, + &["b", "c"], + second_upload_metadata.clone(), + )?; + let second_paths = second_timeline.layers.clone(); + assert!( + first_upload_metadata.disk_consistent_lsn < second_upload_metadata.disk_consistent_lsn + ); + ensure_correct_timeline_upload( + &repo_harness, + &mut remote_timelines, + &storage, + second_timeline, + ) + .await; + + let mut expected_uploads = HashMap::new(); + let mut expected_layers = first_paths.clone(); + expected_layers.extend(second_paths.clone().into_iter()); + expected_layers.dedup(); + + expected_uploads.insert( + (repo_harness.tenant_id, TIMELINE_ID), + RemoteTimeline { + tenant_id: repo_harness.tenant_id, + timeline_id: TIMELINE_ID, + layers: expected_layers, + metadata: Some(second_upload_metadata.clone()), + }, + ); + assert_timelines_equal(expected_uploads, remote_timelines.clone()); + + let third_upload_metadata = dummy_metadata(Lsn(0x50)); + assert!( + second_upload_metadata.disk_consistent_lsn < third_upload_metadata.disk_consistent_lsn + ); + let third_timeline = create_local_timeline( + &repo_harness, + TIMELINE_ID, + &["d", "e"], + third_upload_metadata.clone(), + )?; + let third_paths = third_timeline.layers.clone(); + ensure_correct_timeline_upload( + &repo_harness, + &mut remote_timelines, + &storage, + third_timeline, + ) + .await; + + let mut expected_uploads = HashMap::new(); + let mut expected_layers = first_paths; + expected_layers.extend(second_paths.into_iter()); + expected_layers.extend(third_paths.into_iter()); + expected_layers.dedup(); + + expected_uploads.insert( + (repo_harness.tenant_id, TIMELINE_ID), + RemoteTimeline { + tenant_id: repo_harness.tenant_id, + timeline_id: TIMELINE_ID, + layers: expected_layers, + metadata: Some(third_upload_metadata), + }, + ); + assert_timelines_equal(expected_uploads, remote_timelines); + + Ok(()) + } + + #[tokio::test] + async fn reupload_missing_metadata() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("reupload_missing_metadata")?; + let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?; + let mut remote_timelines = + store_incorrect_metadata_relishes(&repo_harness, &storage).await?; + assert_timelines_equal( + remote_timelines.clone(), + fetch_existing_uploads(&storage).await?, + ); + + let old_remote_timeline = remote_timelines + .get(&(repo_harness.tenant_id, NO_METADATA_TIMELINE_ID)) + .unwrap() + .clone(); + let updated_metadata = dummy_metadata(Lsn(0x100)); + create_local_metadata(&repo_harness, NO_METADATA_TIMELINE_ID, &updated_metadata)?; + ensure_correct_timeline_upload( + &repo_harness, + &mut remote_timelines, + &storage, + LocalTimeline { + tenant_id: repo_harness.tenant_id, + timeline_id: NO_METADATA_TIMELINE_ID, + layers: old_remote_timeline.layers.clone(), + metadata: updated_metadata.clone(), + }, + ) + .await; + let reuploaded_timelines = fetch_existing_uploads(&storage).await?; + + let mut expected_timeline = RemoteTimeline { + metadata: Some(updated_metadata), + ..old_remote_timeline + }; + expected_timeline.layers.sort(); + let mut updated_timeline = reuploaded_timelines + .get(&(repo_harness.tenant_id, NO_METADATA_TIMELINE_ID)) + .unwrap() + .clone(); + updated_timeline.layers.sort(); + assert_eq!(expected_timeline, updated_timeline); + + Ok(()) + } + + #[tokio::test] + async fn test_upload_with_errors() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("test_upload_with_errors")?; + let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?; + let mut remote_timelines = HashMap::new(); + + let local_path = repo_harness.timeline_path(&TIMELINE_ID).join("something"); + assert!(!local_path.exists()); + assert!(fetch_existing_uploads(&storage).await?.is_empty()); + + let timeline_without_local_files = LocalTimeline { + tenant_id: repo_harness.tenant_id, + timeline_id: TIMELINE_ID, + layers: vec![local_path], + metadata: dummy_metadata(Lsn(0x30)), + }; + + upload_timeline( + repo_harness.conf, + &LIMIT, + &mut remote_timelines, + &storage, + timeline_without_local_files.clone(), + ) + .await; + + assert!(fetch_existing_uploads(&storage).await?.is_empty()); + assert_sync_queue_contents(SyncTask::Upload(timeline_without_local_files), true); + assert!(!repo_harness.timeline_path(&TIMELINE_ID).exists()); + + Ok(()) + } + + #[tokio::test] + async fn test_download_timeline() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("test_download_timeline")?; + let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?; + let mut remote_timelines = + store_incorrect_metadata_relishes(&repo_harness, &storage).await?; + fs::remove_dir_all(repo_harness.timeline_path(&NO_METADATA_TIMELINE_ID))?; + fs::remove_dir_all(repo_harness.timeline_path(&CORRUPT_METADATA_TIMELINE_ID))?; + + let regular_timeline_path = repo_harness.timeline_path(&TIMELINE_ID); + let regular_timeline = create_local_timeline( + &repo_harness, + TIMELINE_ID, + &["a", "b"], + dummy_metadata(Lsn(0x30)), + )?; + ensure_correct_timeline_upload( + &repo_harness, + &mut remote_timelines, + &storage, + regular_timeline, + ) + .await; + fs::remove_dir_all(®ular_timeline_path)?; + let remote_regular_timeline = remote_timelines + .get(&(repo_harness.tenant_id, TIMELINE_ID)) + .unwrap() + .clone(); + + download_timeline( + repo_harness.conf, + &LIMIT, + &storage, + remote_regular_timeline.clone(), + true, + ) + .await; + download_timeline( + repo_harness.conf, + &LIMIT, + &storage, + remote_regular_timeline.clone(), + true, + ) + .await; + download_timeline( + repo_harness.conf, + &LIMIT, + &storage, + remote_timelines + .get(&(repo_harness.tenant_id, NO_METADATA_TIMELINE_ID)) + .unwrap() + .clone(), + true, + ) + .await; + download_timeline( + repo_harness.conf, + &LIMIT, + &storage, + remote_timelines + .get(&(repo_harness.tenant_id, CORRUPT_METADATA_TIMELINE_ID)) + .unwrap() + .clone(), + true, + ) + .await; + + assert_timelines_equal(remote_timelines, fetch_existing_uploads(&storage).await?); + assert!(!repo_harness + .timeline_path(&NO_METADATA_TIMELINE_ID) + .exists()); + assert!(!repo_harness + .timeline_path(&CORRUPT_METADATA_TIMELINE_ID) + .exists()); + assert_timeline_files_match(&repo_harness, remote_regular_timeline); + + Ok(()) + } + + #[tokio::test] + async fn metadata_file_sync() -> anyhow::Result<()> { + let repo_harness = RepoHarness::create("metadata_file_sync")?; + let storage = LocalFs::new(tempdir()?.path().to_owned(), &repo_harness.conf.workdir)?; + let mut remote_timelines = HashMap::new(); + + let uploaded_metadata = dummy_metadata(Lsn(0x30)); + let metadata_local_path = + metadata_path(repo_harness.conf, TIMELINE_ID, repo_harness.tenant_id); + let new_upload = create_local_timeline( + &repo_harness, + TIMELINE_ID, + &["a", "b"], + uploaded_metadata.clone(), + )?; + tokio::fs::write(&metadata_local_path, b"incorrect metadata").await?; + + upload_timeline( + repo_harness.conf, + &LIMIT, + &mut remote_timelines, + &storage, + new_upload.clone(), + ) + .await; + assert_timelines_equal( + remote_timelines.clone(), + fetch_existing_uploads(&storage).await?, + ); + + let remote_timeline = remote_timelines + .get(&(repo_harness.tenant_id, TIMELINE_ID)) + .unwrap() + .clone(); + assert_eq!( + remote_timeline.metadata.as_ref(), + Some(&uploaded_metadata), + "Local corrputed metadata should be ignored when uploading an image" + ); + + download_timeline( + repo_harness.conf, + &LIMIT, + &storage, + remote_timeline.clone(), + false, + ) + .await; + let downloaded_metadata_bytes = tokio::fs::read(&metadata_local_path) + .await + .expect("Failed to read metadata file contents after redownload"); + let downloaded_metadata = TimelineMetadata::from_bytes(&downloaded_metadata_bytes) + .expect("Failed to parse metadata file contents after redownload"); + assert_eq!( + downloaded_metadata, uploaded_metadata, + "Should redownload the same metadata that was uploaed" + ); + + Ok(()) + } + + #[test] + fn queue_order_test() { + let repo_harness = RepoHarness::create("queue_order_test").unwrap(); + + let tenant_id = repo_harness.tenant_id; + let timeline_id = TIMELINE_ID; + let layers = Vec::new(); + let smaller_lsn_metadata = dummy_metadata(Lsn(0x200)); + let bigger_lsn_metadata = dummy_metadata(Lsn(0x300)); + assert!(bigger_lsn_metadata > smaller_lsn_metadata); + + for metadata in [bigger_lsn_metadata.clone(), smaller_lsn_metadata.clone()] { + add_to_queue(SyncTask::Upload(LocalTimeline { + tenant_id, + timeline_id, + layers: layers.clone(), + metadata: metadata.clone(), + })); + add_to_queue(SyncTask::Download(RemoteTimeline { + tenant_id, + timeline_id, + layers: layers.clone(), + metadata: Some(metadata.clone()), + })); + add_to_queue(SyncTask::UrgentDownload(RemoteTimeline { + tenant_id, + timeline_id, + layers: layers.clone(), + metadata: Some(metadata), + })); + } + + let mut queue_accessor = SYNC_QUEUE.lock().unwrap(); + let mut ordered_tasks = Vec::with_capacity(queue_accessor.len()); + while let Some(task) = queue_accessor.pop() { + let task_lsn = match &task { + SyncTask::Upload(LocalTimeline { metadata, .. }) => { + Some(metadata.disk_consistent_lsn) + } + SyncTask::UrgentDownload(remote_timeline) | SyncTask::Download(remote_timeline) => { + remote_timeline.disk_consistent_lsn() + } + }; + + if let Some(task_lsn) = task_lsn { + if task_lsn == smaller_lsn_metadata.disk_consistent_lsn + || task_lsn == bigger_lsn_metadata.disk_consistent_lsn + { + ordered_tasks.push(task); + } + } + } + drop(queue_accessor); + + let expected_ordered_tasks = vec![ + SyncTask::UrgentDownload(RemoteTimeline { + tenant_id, + timeline_id, + layers: layers.clone(), + metadata: Some(bigger_lsn_metadata.clone()), + }), + SyncTask::UrgentDownload(RemoteTimeline { + tenant_id, + timeline_id, + layers: layers.clone(), + metadata: Some(smaller_lsn_metadata.clone()), + }), + SyncTask::Upload(LocalTimeline { + tenant_id, + timeline_id, + layers: layers.clone(), + metadata: bigger_lsn_metadata.clone(), + }), + SyncTask::Upload(LocalTimeline { + tenant_id, + timeline_id, + layers: layers.clone(), + metadata: smaller_lsn_metadata.clone(), + }), + SyncTask::Download(RemoteTimeline { + tenant_id, + timeline_id, + layers: layers.clone(), + metadata: Some(bigger_lsn_metadata), + }), + SyncTask::Download(RemoteTimeline { + tenant_id, + timeline_id, + layers, + metadata: Some(smaller_lsn_metadata), + }), + ]; + assert_eq!(expected_ordered_tasks, ordered_tasks); + } + + async fn ensure_correct_timeline_upload<'a>( + harness: &RepoHarness, + remote_timelines: &'a mut HashMap<(ZTenantId, ZTimelineId), RemoteTimeline>, + relish_storage: &'a LocalFs, + new_upload: LocalTimeline, + ) { + upload_timeline( + harness.conf, + &LIMIT, + remote_timelines, + relish_storage, + new_upload.clone(), + ) + .await; + assert_timelines_equal( + remote_timelines.clone(), + fetch_existing_uploads(relish_storage).await.unwrap(), + ); + + let new_remote_files = remote_timelines + .get(&(new_upload.tenant_id, new_upload.timeline_id)) + .unwrap() + .clone(); + assert_eq!(new_remote_files.tenant_id, new_upload.tenant_id); + assert_eq!(new_remote_files.timeline_id, new_upload.timeline_id); + assert_eq!( + new_remote_files.metadata, + Some(new_upload.metadata.clone()), + "Remote timeline should have an updated metadata with later Lsn after successful reupload" + ); + let remote_files_after_upload = new_remote_files + .layers + .clone() + .into_iter() + .collect::>(); + for new_uploaded_layer in &new_upload.layers { + assert!( + remote_files_after_upload.contains(new_uploaded_layer), + "Remote files do not contain layer that should be uploaded: '{}'", + new_uploaded_layer.display() + ); + } + + assert_timeline_files_match(harness, new_remote_files); + assert_sync_queue_contents(SyncTask::Upload(new_upload), false); + } + + #[track_caller] + fn assert_timelines_equal( + mut expected: HashMap<(ZTenantId, ZTimelineId), RemoteTimeline>, + mut actual: HashMap<(ZTenantId, ZTimelineId), RemoteTimeline>, + ) { + let expected_sorted = expected + .iter_mut() + .map(|(key, remote_timeline)| { + remote_timeline.layers.sort(); + (key, remote_timeline) + }) + .collect::>(); + + let actual_sorted = actual + .iter_mut() + .map(|(key, remote_timeline)| { + remote_timeline.layers.sort(); + (key, remote_timeline) + }) + .collect::>(); + + assert_eq!( + expected_sorted, actual_sorted, + "Different timeline contents" + ); + } + + #[track_caller] + fn assert_sync_queue_contents(task: SyncTask, expected_in_queue: bool) { + let mut queue_accessor = SYNC_QUEUE.lock().unwrap(); + let queue_tasks = queue_accessor.drain().collect::>(); + drop(queue_accessor); + + if expected_in_queue { + assert!( + queue_tasks.contains(&task), + "Sync queue should contain task {:?}", + task + ); + } else { + assert!( + !queue_tasks.contains(&task), + "Sync queue has unexpected task {:?}", + task + ); + } + } + + fn assert_timeline_files_match(harness: &RepoHarness, remote_files: RemoteTimeline) { + let local_timeline_dir = harness.timeline_path(&remote_files.timeline_id); + let local_paths = fs::read_dir(&local_timeline_dir) + .unwrap() + .map(|dir| dir.unwrap().path()) + .collect::>(); + let mut reported_remote_files = remote_files.layers.into_iter().collect::>(); + if let Some(remote_metadata) = remote_files.metadata { + let local_metadata_path = + metadata_path(harness.conf, remote_files.timeline_id, harness.tenant_id); + let local_metadata = TimelineMetadata::from_bytes( + &fs::read(&local_metadata_path).expect("Failed to read metadata file when comparing remote and local image files") + ).expect("Failed to parse metadata file contents when comparing remote and local image files"); + assert_eq!( + local_metadata, remote_metadata, + "Timeline remote metadata is different the local one" + ); + reported_remote_files.insert(local_metadata_path); + } + + assert_eq!( + local_paths, reported_remote_files, + "Remote image files and local image files are different, missing locally: {:?}, missing remotely: {:?}", + reported_remote_files.difference(&local_paths).collect::>(), + local_paths.difference(&reported_remote_files).collect::>(), + ); + + if let Some(remote_file) = reported_remote_files.iter().next() { + let actual_remote_paths = fs::read_dir( + remote_file + .parent() + .expect("Remote relishes are expected to have their timeline dir as parent"), + ) + .unwrap() + .map(|dir| dir.unwrap().path()) + .collect::>(); + + let unreported_remote_files = actual_remote_paths + .difference(&reported_remote_files) + .collect::>(); + assert!( + unreported_remote_files.is_empty(), + "Unexpected extra remote files that were not listed: {:?}", + unreported_remote_files + ) + } + } + + async fn store_incorrect_metadata_relishes( + harness: &RepoHarness, + storage: &LocalFs, + ) -> anyhow::Result> { + let mut remote_timelines = HashMap::new(); + + ensure_correct_timeline_upload( + harness, + &mut remote_timelines, + storage, + create_local_timeline( + harness, + NO_METADATA_TIMELINE_ID, + &["a1", "b1"], + dummy_metadata(Lsn(0)), + )?, + ) + .await; + ensure_correct_timeline_upload( + harness, + &mut remote_timelines, + storage, + create_local_timeline( + harness, + CORRUPT_METADATA_TIMELINE_ID, + &["a2", "b2"], + dummy_metadata(Lsn(0)), + )?, + ) + .await; + + storage + .delete_relish(&storage.storage_path(&metadata_path( + harness.conf, + NO_METADATA_TIMELINE_ID, + harness.tenant_id, + ))?) + .await?; + storage + .upload_relish( + &mut BufReader::new(Cursor::new("corrupt meta".to_string().into_bytes())), + &storage.storage_path(&metadata_path( + harness.conf, + CORRUPT_METADATA_TIMELINE_ID, + harness.tenant_id, + ))?, + ) + .await?; + + for remote_relish in remote_timelines.values_mut() { + remote_relish.metadata = None; + } + + Ok(remote_timelines) + } + + fn create_local_timeline( + harness: &RepoHarness, + timeline_id: ZTimelineId, + filenames: &[&str], + metadata: TimelineMetadata, + ) -> anyhow::Result { + let timeline_path = harness.timeline_path(&timeline_id); + fs::create_dir_all(&timeline_path)?; + + let mut layers = Vec::with_capacity(filenames.len()); + for &file in filenames { + let file_path = timeline_path.join(file); + fs::write(&file_path, dummy_contents(file).into_bytes())?; + layers.push(file_path); + } + + create_local_metadata(harness, timeline_id, &metadata)?; + + Ok(LocalTimeline { + tenant_id: harness.tenant_id, + timeline_id, + layers, + metadata, + }) + } + + fn create_local_metadata( + harness: &RepoHarness, + timeline_id: ZTimelineId, + metadata: &TimelineMetadata, + ) -> anyhow::Result<()> { + fs::write( + metadata_path(harness.conf, timeline_id, harness.tenant_id), + metadata.to_bytes()?, + )?; + Ok(()) + } + + fn dummy_contents(name: &str) -> String { + format!("contents for {}", name) + } + + fn dummy_metadata(disk_consistent_lsn: Lsn) -> TimelineMetadata { + TimelineMetadata { + disk_consistent_lsn, + prev_record_lsn: None, + ancestor_timeline: None, + ancestor_lsn: Lsn(0), + } + } +} diff --git a/pageserver/src/relish_storage/synced_storage.rs b/pageserver/src/relish_storage/synced_storage.rs deleted file mode 100644 index e9ac20ff8c..0000000000 --- a/pageserver/src/relish_storage/synced_storage.rs +++ /dev/null @@ -1,57 +0,0 @@ -use std::time::Duration; -use std::{collections::BinaryHeap, sync::Mutex, thread}; - -use crate::tenant_mgr; -use crate::{relish_storage::RelishStorage, PageServerConf}; - -lazy_static::lazy_static! { - static ref UPLOAD_QUEUE: Mutex> = Mutex::new(BinaryHeap::new()); -} - -pub fn schedule_timeline_upload(_local_timeline: ()) { - // UPLOAD_QUEUE - // .lock() - // .unwrap() - // .push(SyncTask::Upload(local_timeline)) -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -enum SyncTask {} - -pub fn run_storage_sync_thread< - P: std::fmt::Debug, - S: 'static + RelishStorage, ->( - config: &'static PageServerConf, - relish_storage: S, - max_concurrent_sync: usize, -) -> anyhow::Result>>> { - let runtime = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build()?; - - let handle = thread::Builder::new() - .name("Queue based relish storage sync".to_string()) - .spawn(move || { - while !tenant_mgr::shutdown_requested() { - let mut queue_accessor = UPLOAD_QUEUE.lock().unwrap(); - log::debug!("Upload queue length: {}", queue_accessor.len()); - let next_task = queue_accessor.pop(); - drop(queue_accessor); - match next_task { - Some(task) => runtime.block_on(async { - // suppress warnings - let _ = (config, task, &relish_storage, max_concurrent_sync); - todo!("omitted for brevity") - }), - None => { - thread::sleep(Duration::from_secs(1)); - continue; - } - } - } - log::debug!("Queue based relish storage sync thread shut down"); - Ok(()) - })?; - Ok(Some(handle)) -} diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs index 73c6f370d6..c1bdb87944 100644 --- a/pageserver/src/repository.rs +++ b/pageserver/src/repository.rs @@ -214,27 +214,114 @@ impl WALRecord { } } +#[cfg(test)] +pub mod repo_harness { + use std::{fs, path::PathBuf}; + + use crate::{ + layered_repository::{LayeredRepository, TIMELINES_SEGMENT_NAME}, + walredo::{WalRedoError, WalRedoManager}, + PageServerConf, + }; + + use super::*; + use hex_literal::hex; + use zenith_utils::zid::ZTenantId; + + pub const TIMELINE_ID: ZTimelineId = + ZTimelineId::from_array(hex!("11223344556677881122334455667788")); + pub const NEW_TIMELINE_ID: ZTimelineId = + ZTimelineId::from_array(hex!("AA223344556677881122334455667788")); + + /// Convenience function to create a page image with given string as the only content + #[allow(non_snake_case)] + pub fn TEST_IMG(s: &str) -> Bytes { + let mut buf = BytesMut::new(); + buf.extend_from_slice(s.as_bytes()); + buf.resize(8192, 0); + + buf.freeze() + } + + pub struct RepoHarness { + pub conf: &'static PageServerConf, + pub tenant_id: ZTenantId, + } + + impl RepoHarness { + pub fn create(test_name: &'static str) -> Result { + let repo_dir = PageServerConf::test_repo_dir(test_name); + let _ = fs::remove_dir_all(&repo_dir); + fs::create_dir_all(&repo_dir)?; + fs::create_dir_all(&repo_dir.join(TIMELINES_SEGMENT_NAME))?; + + let conf = PageServerConf::dummy_conf(repo_dir); + // Make a static copy of the config. This can never be free'd, but that's + // OK in a test. + let conf: &'static PageServerConf = Box::leak(Box::new(conf)); + + let tenant_id = ZTenantId::generate(); + fs::create_dir_all(conf.tenant_path(&tenant_id))?; + + Ok(Self { conf, tenant_id }) + } + + pub fn load(&self) -> Box { + let walredo_mgr = Arc::new(TestRedoManager); + + Box::new(LayeredRepository::new( + self.conf, + walredo_mgr, + self.tenant_id, + false, + )) + } + + pub fn timeline_path(&self, timeline_id: &ZTimelineId) -> PathBuf { + self.conf.timeline_path(timeline_id, &self.tenant_id) + } + } + + // Mock WAL redo manager that doesn't do much + struct TestRedoManager; + + impl WalRedoManager for TestRedoManager { + fn request_redo( + &self, + rel: RelishTag, + blknum: u32, + lsn: Lsn, + base_img: Option, + records: Vec<(Lsn, WALRecord)>, + ) -> Result { + let s = format!( + "redo for {} blk {} to get to {}, with {} and {} records", + rel, + blknum, + lsn, + if base_img.is_some() { + "base image" + } else { + "no base image" + }, + records.len() + ); + println!("{}", s); + Ok(TEST_IMG(&s)) + } + } +} + /// /// Tests that should work the same with any Repository/Timeline implementation. /// #[allow(clippy::bool_assert_comparison)] #[cfg(test)] mod tests { + use super::repo_harness::*; use super::*; - use crate::layered_repository::{LayeredRepository, METADATA_FILE_NAME}; - use crate::walredo::{WalRedoError, WalRedoManager}; - use crate::PageServerConf; - use hex_literal::hex; - use postgres_ffi::pg_constants; - use postgres_ffi::xlog_utils::SIZEOF_CHECKPOINT; - use std::fs; - use std::path::PathBuf; - use zenith_utils::zid::ZTenantId; - - const TIMELINE_ID: ZTimelineId = - ZTimelineId::from_array(hex!("11223344556677881122334455667788")); - const NEW_TIMELINE_ID: ZTimelineId = - ZTimelineId::from_array(hex!("AA223344556677881122334455667788")); + use crate::layered_repository::METADATA_FILE_NAME; + use postgres_ffi::{pg_constants, xlog_utils::SIZEOF_CHECKPOINT}; /// Arbitrary relation tag, for testing. const TESTREL_A: RelishTag = RelishTag::Relation(RelTag { @@ -250,16 +337,6 @@ mod tests { forknum: 0, }); - /// Convenience function to create a page image with given string as the only content - #[allow(non_snake_case)] - fn TEST_IMG(s: &str) -> Bytes { - let mut buf = BytesMut::new(); - buf.extend_from_slice(s.as_bytes()); - buf.resize(8192, 0); - - buf.freeze() - } - fn assert_current_logical_size(timeline: &Arc, lsn: Lsn) { let incremental = timeline.get_current_logical_size(); let non_incremental = timeline @@ -271,45 +348,6 @@ mod tests { static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]); static ZERO_CHECKPOINT: Bytes = Bytes::from_static(&[0u8; SIZEOF_CHECKPOINT]); - struct RepoHarness { - conf: &'static PageServerConf, - tenant_id: ZTenantId, - } - - impl RepoHarness { - fn create(test_name: &'static str) -> Result { - let repo_dir = PageServerConf::test_repo_dir(test_name); - let _ = fs::remove_dir_all(&repo_dir); - fs::create_dir_all(&repo_dir)?; - fs::create_dir_all(&repo_dir.join("timelines"))?; - - let conf = PageServerConf::dummy_conf(repo_dir); - // Make a static copy of the config. This can never be free'd, but that's - // OK in a test. - let conf: &'static PageServerConf = Box::leak(Box::new(conf)); - - let tenant_id = ZTenantId::generate(); - fs::create_dir_all(conf.tenant_path(&tenant_id))?; - - Ok(Self { conf, tenant_id }) - } - - fn load(&self) -> Box { - let walredo_mgr = Arc::new(TestRedoManager); - - Box::new(LayeredRepository::new( - self.conf, - walredo_mgr, - self.tenant_id, - false, - )) - } - - fn timeline_path(&self, timeline_id: &ZTimelineId) -> PathBuf { - self.conf.timeline_path(timeline_id, &self.tenant_id) - } - } - #[test] fn test_relsize() -> Result<()> { let repo = RepoHarness::create("test_relsize")?.load(); @@ -821,33 +859,4 @@ mod tests { Ok(()) } - - // Mock WAL redo manager that doesn't do much - struct TestRedoManager; - - impl WalRedoManager for TestRedoManager { - fn request_redo( - &self, - rel: RelishTag, - blknum: u32, - lsn: Lsn, - base_img: Option, - records: Vec<(Lsn, WALRecord)>, - ) -> Result { - let s = format!( - "redo for {} blk {} to get to {}, with {} and {} records", - rel, - blknum, - lsn, - if base_img.is_some() { - "base image" - } else { - "no base image" - }, - records.len() - ); - println!("{}", s); - Ok(TEST_IMG(&s)) - } - } } diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index 7b9924efae..fbf4362c6b 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -123,8 +123,6 @@ fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) { tenant.state = TenantState::Active; } -// TODO kb Currently unused function, will later be used when the relish storage downloads a new layer. -// Relevant PR: https://github.com/zenithdb/zenith/pull/686 pub fn register_relish_download( conf: &'static PageServerConf, tenant_id: ZTenantId, @@ -138,14 +136,16 @@ pub fn register_relish_download( { let mut m = access_tenants(); - let mut tenant = m.get_mut(&tenant_id).unwrap(); + let tenant = m.entry(tenant_id).or_insert_with(|| Tenant { + state: TenantState::Downloading, + repo: None, + }); tenant.state = TenantState::Downloading; match &tenant.repo { Some(repo) => init_timeline(repo.as_ref(), timeline_id), - None => { - log::info!("Initialize new repo"); - } + None => log::warn!("Initialize new repo"), } + tenant.state = TenantState::Active; } // init repo updates Tenant state