From 2db675a2f2de7c5057104fd7450f982ca85a8c42 Mon Sep 17 00:00:00 2001 From: Rory de Zoete <33318916+zoete@users.noreply.github.com> Date: Thu, 18 Aug 2022 15:18:59 +0200 Subject: [PATCH 01/63] Re-enable test dependency for deploy (#2300) Co-authored-by: Rory de Zoete --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 6b76b6e5fc..c9b696e409 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -567,7 +567,7 @@ jobs: #container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version. # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly - needs: [ push-docker-hub, calculate-deploy-targets, tag ] + needs: [ push-docker-hub, calculate-deploy-targets, tag, other-tests, pg_regress-tests ] if: | (github.ref_name == 'main' || github.ref_name == 'release') && github.event_name != 'workflow_dispatch' @@ -622,7 +622,7 @@ jobs: runs-on: dev container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently. - needs: [ push-docker-hub, calculate-deploy-targets, tag ] + needs: [ push-docker-hub, calculate-deploy-targets, tag, other-tests, pg_regress-tests ] if: | (github.ref_name == 'main' || github.ref_name == 'release') && github.event_name != 'workflow_dispatch' From f99ccb5041eebcdb89efc24f5e0cc501fb1a9039 Mon Sep 17 00:00:00 2001 From: MMeent Date: Thu, 18 Aug 2022 17:12:28 +0200 Subject: [PATCH 02/63] Extract WalProposer into the neon extension (#2217) Including, but not limited to: * Fixes to neon management code to support walproposer-as-an-extension * Fix issue in expected output of pg settings serialization. * Show the logs of a failed --sync-safekeepers process in CI * Add compat layer for renamed GUCs in postgres.conf * Update vendor/postgres to the latest origin/main --- Cargo.lock | 202 +++++++++--------- compute_tools/src/compute.rs | 6 +- compute_tools/src/pg_helpers.rs | 11 +- compute_tools/tests/cluster_spec.json | 3 +- compute_tools/tests/pg_helpers_tests.rs | 2 +- control_plane/src/compute.rs | 4 +- test_runner/batch_others/test_wal_acceptor.py | 2 +- test_runner/fixtures/neon_fixtures.py | 4 +- vendor/postgres | 2 +- 9 files changed, 125 insertions(+), 111 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a70b2b7dc9..505cbb66c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -48,9 +48,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.58" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704" +checksum = "c91f1f46651137be86f3a2b9a8359f9ab421d04d941c62b5982e1ca21113adf9" dependencies = [ "backtrace", ] @@ -77,7 +77,7 @@ dependencies = [ "num-traits", "rusticata-macros", "thiserror", - "time 0.3.11", + "time 0.3.12", ] [[package]] @@ -126,9 +126,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.56" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96cf8829f67d2eab0b2dfa42c5d0ef737e0724e4a82b01b3e292456202b19716" +checksum = "76464446b8bc32758d7e88ee1a804d9914cd9b1cb264c029899680b0be29826f" dependencies = [ "proc-macro2", "quote", @@ -166,7 +166,7 @@ dependencies = [ "http", "http-body", "hyper", - "itoa 1.0.2", + "itoa 1.0.3", "matchit", "memchr", "mime", @@ -298,9 +298,9 @@ checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" [[package]] name = "bytemuck" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c53dfa917ec274df8ed3c572698f381a24eef2efba9492d797301b72b6db408a" +checksum = "a5377c8865e74a160d21f29c2d40669f53286db6eab59b88540cbb12ffc8b835" [[package]] name = "byteorder" @@ -310,9 +310,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bytes" -version = "1.1.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" +checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db" dependencies = [ "serde", ] @@ -386,9 +386,9 @@ dependencies = [ [[package]] name = "clap" -version = "3.2.12" +version = "3.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab8b79fe3946ceb4a0b1c080b4018992b8d27e9ff363644c1c9b6387c854614d" +checksum = "a3dbbb6653e7c55cc8595ad3e1f7be8f32aba4eb7ff7f0fd1163d4f3d137c0a9" dependencies = [ "atty", "bitflags", @@ -455,7 +455,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", - "clap 3.2.12", + "clap 3.2.16", "env_logger", "hyper", "log", @@ -601,9 +601,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" dependencies = [ "cfg-if", "crossbeam-utils", @@ -611,9 +611,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -622,9 +622,9 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d" +checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" dependencies = [ "autocfg", "cfg-if", @@ -636,9 +636,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.10" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83" +checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" dependencies = [ "cfg-if", "once_cell", @@ -917,9 +917,9 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "fastrand" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" dependencies = [ "instant", ] @@ -1086,9 +1086,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.5" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" +checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" dependencies = [ "typenum", "version_check", @@ -1245,7 +1245,7 @@ checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" dependencies = [ "bytes", "fnv", - "itoa 1.0.2", + "itoa 1.0.3", ] [[package]] @@ -1308,7 +1308,7 @@ dependencies = [ "http-body", "httparse", "httpdate", - "itoa 1.0.2", + "itoa 1.0.3", "pin-project-lite", "socket2", "tokio", @@ -1391,7 +1391,7 @@ dependencies = [ "ahash", "atty", "indexmap", - "itoa 1.0.2", + "itoa 1.0.3", "lazy_static", "log", "num-format", @@ -1432,15 +1432,15 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" [[package]] name = "itoa" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" +checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" [[package]] name = "js-sys" -version = "0.3.58" +version = "0.3.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27" +checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2" dependencies = [ "wasm-bindgen", ] @@ -1482,9 +1482,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.126" +version = "0.2.127" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +checksum = "505e71a4706fa491e9b1b55f51b95d4037d0821ee40131190475f692b35b009b" [[package]] name = "libloading" @@ -1659,7 +1659,7 @@ name = "neon_local" version = "0.1.0" dependencies = [ "anyhow", - "clap 3.2.12", + "clap 3.2.16", "comfy-table", "control_plane", "git-version", @@ -1854,7 +1854,7 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 3.2.12", + "clap 3.2.16", "close_fds", "const_format", "crc32c", @@ -2155,9 +2155,9 @@ checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" [[package]] name = "prettyplease" -version = "0.1.16" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da6ffbe862780245013cb1c0a48c4e44b7d665548088f91f6b90876d0625e4c2" +checksum = "697ae720ee02011f439e0701db107ffe2916d83f718342d65d7f8bf7b8a5fee9" dependencies = [ "proc-macro2", "syn", @@ -2171,9 +2171,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.40" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" +checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" dependencies = [ "unicode-ident", ] @@ -2271,7 +2271,7 @@ dependencies = [ "base64", "bstr", "bytes", - "clap 3.2.12", + "clap 3.2.16", "futures", "git-version", "hashbrown 0.11.2", @@ -2326,9 +2326,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" dependencies = [ "proc-macro2", ] @@ -2411,9 +2411,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.13" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ "bitflags", ] @@ -2508,7 +2508,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls", - "rustls-pemfile 1.0.0", + "rustls-pemfile 1.0.1", "serde", "serde_json", "serde_urlencoded", @@ -2708,9 +2708,9 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7522c9de787ff061458fe9a829dc790a3f5b22dc571694fc5883f448b94d9a9" +checksum = "0864aeff53f8c05aa08d86e5ef839d3dfcf07aeba2db32f12db0ef716e87bd55" dependencies = [ "base64", ] @@ -2726,15 +2726,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24c8ad4f0c00e1eb5bc7614d236a7f1300e3dbd76b68cac8e06fb00b015ad8d8" +checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8" [[package]] name = "ryu" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" [[package]] name = "safekeeper" @@ -2744,7 +2744,7 @@ dependencies = [ "async-trait", "byteorder", "bytes", - "clap 3.2.12", + "clap 3.2.16", "const_format", "crc32c", "daemonize", @@ -2835,15 +2835,15 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2333e6df6d6598f2b1974829f853c2b4c5f4a6e503c10af918081aa6f8564e1" +checksum = "93f6841e709003d68bb2deee8c343572bf446003ec20a583e76f7b15cebf3711" [[package]] name = "serde" -version = "1.0.139" +version = "1.0.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0171ebb889e45aa68b44aee0859b3eede84c6f5f5c228e6f140c0b2a0a46cad6" +checksum = "e590c437916fb6b221e1d00df6e3294f3fccd70ca7e92541c475d6ed6ef5fee2" dependencies = [ "serde_derive", ] @@ -2860,9 +2860,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.139" +version = "1.0.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1d3230c1de7932af58ad8ffbe1d784bd55efd5a9d84ac24f69c72d83543dfb" +checksum = "34b5b8d809babe02f538c2cfec6f2c1ed10804c0e5a6a041a049a4f5588ccc2e" dependencies = [ "proc-macro2", "quote", @@ -2871,11 +2871,11 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7" +checksum = "38dd04e3c8279e75b31ef29dbdceebfe5ad89f4d0937213c53f7d49d01b3d5a7" dependencies = [ - "itoa 1.0.2", + "itoa 1.0.3", "ryu", "serde", ] @@ -2887,7 +2887,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ "form_urlencoded", - "itoa 1.0.2", + "itoa 1.0.3", "ryu", "serde", ] @@ -2992,7 +2992,7 @@ dependencies = [ "num-bigint", "num-traits", "thiserror", - "time 0.3.11", + "time 0.3.12", ] [[package]] @@ -3003,9 +3003,12 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" [[package]] name = "slab" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" +checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" +dependencies = [ + "autocfg", +] [[package]] name = "smallvec" @@ -3113,9 +3116,9 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.98" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" +checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13" dependencies = [ "proc-macro2", "quote", @@ -3191,18 +3194,18 @@ checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" [[package]] name = "thiserror" -version = "1.0.31" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" +checksum = "f5f6586b7f764adc0231f4c79be7b920e766bb2f3e51b3661cdb263828f19994" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.31" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" +checksum = "12bafc5b54507e0149cdf1b145a5d80ab80a90bcd9275df43d4fff68460f6c21" dependencies = [ "proc-macro2", "quote", @@ -3231,11 +3234,12 @@ dependencies = [ [[package]] name = "time" -version = "0.3.11" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72c91f41dcb2f096c05f0873d667dceec1087ce5bcf984ec8ffb19acddbb3217" +checksum = "74b7cc93fc23ba97fde84f7eea56c55d1ba183f495c6715defdfc7b9cb8c870f" dependencies = [ - "itoa 1.0.2", + "itoa 1.0.3", + "js-sys", "libc", "num_threads", "quickcheck", @@ -3275,9 +3279,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.20.0" +version = "1.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57aec3cfa4c296db7255446efb4928a6be304b431a806216105542a67b6ca82e" +checksum = "7a8325f63a7d4774dd041e363b2409ed1c5cbbd0f867795e661df066b2b0a581" dependencies = [ "autocfg", "bytes", @@ -3607,9 +3611,9 @@ checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" [[package]] name = "unicode-ident" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7" +checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" [[package]] name = "unicode-normalization" @@ -3728,7 +3732,7 @@ name = "wal_craft" version = "0.1.0" dependencies = [ "anyhow", - "clap 3.2.12", + "clap 3.2.16", "env_logger", "log", "once_cell", @@ -3772,9 +3776,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.81" +version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994" +checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -3782,13 +3786,13 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.81" +version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a" +checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f" dependencies = [ "bumpalo", - "lazy_static", "log", + "once_cell", "proc-macro2", "quote", "syn", @@ -3797,9 +3801,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.31" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de9a9cec1733468a8c657e57fa2413d2ae2c0129b95e87c5b72b8ace4d13f31f" +checksum = "fa76fb221a1f8acddf5b54ace85912606980ad661ac7a503b4570ffd3a624dad" dependencies = [ "cfg-if", "js-sys", @@ -3809,9 +3813,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.81" +version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa" +checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3819,9 +3823,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.81" +version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048" +checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da" dependencies = [ "proc-macro2", "quote", @@ -3832,15 +3836,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.81" +version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be" +checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a" [[package]] name = "web-sys" -version = "0.3.58" +version = "0.3.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90" +checksum = "ed055ab27f941423197eb86b2035720b1a3ce40504df082cac2ecc6ed73335a1" dependencies = [ "js-sys", "wasm-bindgen", @@ -3993,7 +3997,7 @@ dependencies = [ "scopeguard", "serde", "syn", - "time 0.3.11", + "time 0.3.12", "tokio", "tokio-util", "tracing", @@ -4015,7 +4019,7 @@ dependencies = [ "oid-registry", "rusticata-macros", "thiserror", - "time 0.3.11", + "time 0.3.12", ] [[package]] @@ -4044,6 +4048,6 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.5.6" +version = "1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20b578acffd8516a6c3f2a1bdefc1ec37e547bb4e0fb8b6b01a4cafc886b4442" +checksum = "c394b5bd0c6f669e7275d9c20aa90ae064cb22e75a1cad54e1b34088034b149f" diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 1e812f2aa0..58469b1c97 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -178,6 +178,7 @@ impl ComputeNode { .args(&["--sync-safekeepers"]) .env("PGDATA", &self.pgdata) // we cannot use -D in this mode .stdout(Stdio::piped()) + .stderr(Stdio::piped()) .spawn() .expect("postgres --sync-safekeepers failed to start"); @@ -187,10 +188,13 @@ impl ComputeNode { let sync_output = sync_handle .wait_with_output() .expect("postgres --sync-safekeepers failed"); + if !sync_output.status.success() { anyhow::bail!( - "postgres --sync-safekeepers exited with non-zero status: {}", + "postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}, stderr: {}", sync_output.status, + String::from_utf8(sync_output.stdout).expect("postgres --sync-safekeepers exited, and stdout is not utf-8"), + String::from_utf8(sync_output.stderr).expect("postgres --sync-safekeepers exited, and stderr is not utf-8"), ); } diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index 207d09d76b..ac065fa60c 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -62,9 +62,16 @@ impl GenericOption { /// Represent `GenericOption` as configuration option. pub fn to_pg_setting(&self) -> String { if let Some(val) = &self.value { + let name = match self.name.as_str() { + "safekeepers" => "neon.safekeepers", + "wal_acceptor_reconnect" => "neon.safekeeper_reconnect_timeout", + "wal_acceptor_connect_timeout" => "neon.safekeeper_connect_timeout", + it => it, + }; + match self.vartype.as_ref() { - "string" => format!("{} = '{}'", self.name, val), - _ => format!("{} = {}", self.name, val), + "string" => format!("{} = '{}'", name, val), + _ => format!("{} = {}", name, val), } } else { self.name.to_owned() diff --git a/compute_tools/tests/cluster_spec.json b/compute_tools/tests/cluster_spec.json index bdd6e60a69..c29416d9c4 100644 --- a/compute_tools/tests/cluster_spec.json +++ b/compute_tools/tests/cluster_spec.json @@ -85,7 +85,7 @@ "vartype": "bool" }, { - "name": "safekeepers", + "name": "neon.safekeepers", "value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501", "vartype": "string" }, @@ -181,7 +181,6 @@ } ] }, - "delta_operations": [ { "action": "delete_db", diff --git a/compute_tools/tests/pg_helpers_tests.rs b/compute_tools/tests/pg_helpers_tests.rs index 1f2e188398..bae944440e 100644 --- a/compute_tools/tests/pg_helpers_tests.rs +++ b/compute_tools/tests/pg_helpers_tests.rs @@ -28,7 +28,7 @@ mod pg_helpers_tests { assert_eq!( spec.cluster.settings.as_pg_settings(), - "fsync = off\nwal_level = replica\nhot_standby = on\nsafekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'" + "fsync = off\nwal_level = replica\nhot_standby = on\nneon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'" ); } diff --git a/control_plane/src/compute.rs b/control_plane/src/compute.rs index e78f96074e..57b5e1e10a 100644 --- a/control_plane/src/compute.rs +++ b/control_plane/src/compute.rs @@ -150,7 +150,7 @@ impl PostgresNode { let port: u16 = conf.parse_field("port", &context)?; let timeline_id: ZTimelineId = conf.parse_field("neon.timeline_id", &context)?; let tenant_id: ZTenantId = conf.parse_field("neon.tenant_id", &context)?; - let uses_wal_proposer = conf.get("safekeepers").is_some(); + let uses_wal_proposer = conf.get("neon.safekeepers").is_some(); // parse recovery_target_lsn, if any let recovery_target_lsn: Option = @@ -341,7 +341,7 @@ impl PostgresNode { .map(|sk| format!("localhost:{}", sk.pg_port)) .collect::>() .join(","); - conf.append("safekeepers", &safekeepers); + conf.append("neon.safekeepers", &safekeepers); } else { // We only use setup without safekeepers for tests, // and don't care about data durability on pageserver, diff --git a/test_runner/batch_others/test_wal_acceptor.py b/test_runner/batch_others/test_wal_acceptor.py index b6f914858e..d922dd0cb4 100644 --- a/test_runner/batch_others/test_wal_acceptor.py +++ b/test_runner/batch_others/test_wal_acceptor.py @@ -569,7 +569,7 @@ class ProposerPostgres(PgProtocol): f"neon.timeline_id = '{self.timeline_id.hex}'\n", f"neon.tenant_id = '{self.tenant_id.hex}'\n", f"neon.pageserver_connstring = ''\n", - f"safekeepers = '{safekeepers}'\n", + f"neon.safekeepers = '{safekeepers}'\n", f"listen_addresses = '{self.listen_addr}'\n", f"port = '{self.port}'\n", ] diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index fe0a3193c1..5292bc1789 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1855,11 +1855,11 @@ class Postgres(PgProtocol): # walproposer uses different application_name if ("synchronous_standby_names" in cfg_line or # don't repeat safekeepers/wal_acceptors multiple times - "safekeepers" in cfg_line): + "neon.safekeepers" in cfg_line): continue f.write(cfg_line) f.write("synchronous_standby_names = 'walproposer'\n") - f.write("safekeepers = '{}'\n".format(safekeepers)) + f.write("neon.safekeepers = '{}'\n".format(safekeepers)) return self def config(self, lines: List[str]) -> 'Postgres': diff --git a/vendor/postgres b/vendor/postgres index 49015ce98f..7e32bba2aa 160000 --- a/vendor/postgres +++ b/vendor/postgres @@ -1 +1 @@ -Subproject commit 49015ce98f550d4fc08d3c1fe348faa71a15f51b +Subproject commit 7e32bba2aa2a1752996586bfaf35754f1f0a4d53 From a185821d6f47956e39c275995248f47c5987b2ec Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Thu, 18 Aug 2022 22:37:20 +0300 Subject: [PATCH 03/63] Explicitly error on cache issues during I/O (#2303) --- pageserver/src/layered_repository/block_io.rs | 9 +++- .../src/layered_repository/ephemeral_file.rs | 54 ++++++++++++------- pageserver/src/layered_repository/timeline.rs | 18 ++++--- pageserver/src/page_cache.rs | 43 ++++++++------- 4 files changed, 77 insertions(+), 47 deletions(-) diff --git a/pageserver/src/layered_repository/block_io.rs b/pageserver/src/layered_repository/block_io.rs index bc3bc082a0..5e32b8833a 100644 --- a/pageserver/src/layered_repository/block_io.rs +++ b/pageserver/src/layered_repository/block_io.rs @@ -157,7 +157,14 @@ where // Look up the right page let cache = page_cache::get(); loop { - match cache.read_immutable_buf(self.file_id, blknum) { + match cache + .read_immutable_buf(self.file_id, blknum) + .map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::Other, + format!("Failed to read immutable buf: {e:#}"), + ) + })? { ReadBufResult::Found(guard) => break Ok(guard), ReadBufResult::NotFound(mut write_guard) => { // Read the page from disk into the buffer diff --git a/pageserver/src/layered_repository/ephemeral_file.rs b/pageserver/src/layered_repository/ephemeral_file.rs index 1776946e7a..a1b2d68cd5 100644 --- a/pageserver/src/layered_repository/ephemeral_file.rs +++ b/pageserver/src/layered_repository/ephemeral_file.rs @@ -12,7 +12,7 @@ use once_cell::sync::Lazy; use std::cmp::min; use std::collections::HashMap; use std::fs::OpenOptions; -use std::io::{Error, ErrorKind}; +use std::io::{self, ErrorKind}; use std::ops::DerefMut; use std::path::PathBuf; use std::sync::{Arc, RwLock}; @@ -51,7 +51,7 @@ impl EphemeralFile { conf: &PageServerConf, tenantid: ZTenantId, timelineid: ZTimelineId, - ) -> Result { + ) -> Result { let mut l = EPHEMERAL_FILES.write().unwrap(); let file_id = l.next_file_id; l.next_file_id += 1; @@ -76,7 +76,7 @@ impl EphemeralFile { }) } - fn fill_buffer(&self, buf: &mut [u8], blkno: u32) -> Result<(), Error> { + fn fill_buffer(&self, buf: &mut [u8], blkno: u32) -> Result<(), io::Error> { let mut off = 0; while off < PAGE_SZ { let n = self @@ -96,10 +96,13 @@ impl EphemeralFile { Ok(()) } - fn get_buf_for_write(&self, blkno: u32) -> Result { + fn get_buf_for_write(&self, blkno: u32) -> Result { // Look up the right page let cache = page_cache::get(); - let mut write_guard = match cache.write_ephemeral_buf(self.file_id, blkno) { + let mut write_guard = match cache + .write_ephemeral_buf(self.file_id, blkno) + .map_err(|e| to_io_error(e, "Failed to write ephemeral buf"))? + { WriteBufResult::Found(guard) => guard, WriteBufResult::NotFound(mut guard) => { // Read the page from disk into the buffer @@ -127,7 +130,7 @@ pub fn is_ephemeral_file(filename: &str) -> bool { } impl FileExt for EphemeralFile { - fn read_at(&self, dstbuf: &mut [u8], offset: u64) -> Result { + fn read_at(&self, dstbuf: &mut [u8], offset: u64) -> Result { // Look up the right page let blkno = (offset / PAGE_SZ as u64) as u32; let off = offset as usize % PAGE_SZ; @@ -137,7 +140,10 @@ impl FileExt for EphemeralFile { let mut write_guard; let cache = page_cache::get(); - let buf = match cache.read_ephemeral_buf(self.file_id, blkno) { + let buf = match cache + .read_ephemeral_buf(self.file_id, blkno) + .map_err(|e| to_io_error(e, "Failed to read ephemeral buf"))? + { ReadBufResult::Found(guard) => { read_guard = guard; read_guard.as_ref() @@ -158,7 +164,7 @@ impl FileExt for EphemeralFile { Ok(len) } - fn write_at(&self, srcbuf: &[u8], offset: u64) -> Result { + fn write_at(&self, srcbuf: &[u8], offset: u64) -> Result { // Look up the right page let blkno = (offset / PAGE_SZ as u64) as u32; let off = offset as usize % PAGE_SZ; @@ -166,7 +172,10 @@ impl FileExt for EphemeralFile { let mut write_guard; let cache = page_cache::get(); - let buf = match cache.write_ephemeral_buf(self.file_id, blkno) { + let buf = match cache + .write_ephemeral_buf(self.file_id, blkno) + .map_err(|e| to_io_error(e, "Failed to write ephemeral buf"))? + { WriteBufResult::Found(guard) => { write_guard = guard; write_guard.deref_mut() @@ -190,7 +199,7 @@ impl FileExt for EphemeralFile { } impl BlobWriter for EphemeralFile { - fn write_blob(&mut self, srcbuf: &[u8]) -> Result { + fn write_blob(&mut self, srcbuf: &[u8]) -> Result { let pos = self.size; let mut blknum = (self.size / PAGE_SZ as u64) as u32; @@ -268,11 +277,11 @@ impl Drop for EphemeralFile { } } -pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), std::io::Error> { +pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), io::Error> { if let Some(file) = EPHEMERAL_FILES.read().unwrap().files.get(&file_id) { match file.write_all_at(buf, blkno as u64 * PAGE_SZ as u64) { Ok(_) => Ok(()), - Err(e) => Err(std::io::Error::new( + Err(e) => Err(io::Error::new( ErrorKind::Other, format!( "failed to write back to ephemeral file at {} error: {}", @@ -282,7 +291,7 @@ pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), std::io::Er )), } } else { - Err(std::io::Error::new( + Err(io::Error::new( ErrorKind::Other, "could not write back page, not found in ephemeral files hash", )) @@ -292,11 +301,14 @@ pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), std::io::Er impl BlockReader for EphemeralFile { type BlockLease = page_cache::PageReadGuard<'static>; - fn read_blk(&self, blknum: u32) -> Result { + fn read_blk(&self, blknum: u32) -> Result { // Look up the right page let cache = page_cache::get(); loop { - match cache.read_ephemeral_buf(self.file_id, blknum) { + match cache + .read_ephemeral_buf(self.file_id, blknum) + .map_err(|e| to_io_error(e, "Failed to read ephemeral buf"))? + { ReadBufResult::Found(guard) => return Ok(guard), ReadBufResult::NotFound(mut write_guard) => { // Read the page from disk into the buffer @@ -311,6 +323,10 @@ impl BlockReader for EphemeralFile { } } +fn to_io_error(e: anyhow::Error, context: &str) -> io::Error { + io::Error::new(ErrorKind::Other, format!("{context}: {e:#}")) +} + #[cfg(test)] mod tests { use super::*; @@ -322,7 +338,7 @@ mod tests { fn repo_harness( test_name: &str, - ) -> Result<(&'static PageServerConf, ZTenantId, ZTimelineId), Error> { + ) -> Result<(&'static PageServerConf, ZTenantId, ZTimelineId), io::Error> { let repo_dir = PageServerConf::test_repo_dir(test_name); let _ = fs::remove_dir_all(&repo_dir); let conf = PageServerConf::dummy_conf(repo_dir); @@ -339,7 +355,7 @@ mod tests { // Helper function to slurp contents of a file, starting at the current position, // into a string - fn read_string(efile: &EphemeralFile, offset: u64, len: usize) -> Result { + fn read_string(efile: &EphemeralFile, offset: u64, len: usize) -> Result { let mut buf = Vec::new(); buf.resize(len, 0u8); @@ -351,7 +367,7 @@ mod tests { } #[test] - fn test_ephemeral_files() -> Result<(), Error> { + fn test_ephemeral_files() -> Result<(), io::Error> { let (conf, tenantid, timelineid) = repo_harness("ephemeral_files")?; let file_a = EphemeralFile::create(conf, tenantid, timelineid)?; @@ -382,7 +398,7 @@ mod tests { } #[test] - fn test_ephemeral_blobs() -> Result<(), Error> { + fn test_ephemeral_blobs() -> Result<(), io::Error> { let (conf, tenantid, timelineid) = repo_harness("ephemeral_blobs")?; let mut file = EphemeralFile::create(conf, tenantid, timelineid)?; diff --git a/pageserver/src/layered_repository/timeline.rs b/pageserver/src/layered_repository/timeline.rs index 6ef4915bdb..910fc9e9fc 100644 --- a/pageserver/src/layered_repository/timeline.rs +++ b/pageserver/src/layered_repository/timeline.rs @@ -2117,7 +2117,7 @@ impl LayeredTimeline { key: Key, request_lsn: Lsn, mut data: ValueReconstructState, - ) -> Result { + ) -> anyhow::Result { // Perform WAL redo if needed data.records.reverse(); @@ -2167,13 +2167,15 @@ impl LayeredTimeline { if img.len() == page_cache::PAGE_SZ { let cache = page_cache::get(); - cache.memorize_materialized_page( - self.tenant_id, - self.timeline_id, - key, - last_rec_lsn, - &img, - ); + cache + .memorize_materialized_page( + self.tenant_id, + self.timeline_id, + key, + last_rec_lsn, + &img, + ) + .context("Materialized page memoization failed")?; } Ok(img) diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs index 818eaf1b8f..27b1400243 100644 --- a/pageserver/src/page_cache.rs +++ b/pageserver/src/page_cache.rs @@ -45,6 +45,7 @@ use std::{ }, }; +use anyhow::Context; use once_cell::sync::OnceCell; use tracing::error; use utils::{ @@ -342,7 +343,7 @@ impl PageCache { key: Key, lsn: Lsn, img: &[u8], - ) { + ) -> anyhow::Result<()> { let cache_key = CacheKey::MaterializedPage { hash_key: MaterializedPageHashKey { tenant_id, @@ -352,7 +353,7 @@ impl PageCache { lsn, }; - match self.lock_for_write(&cache_key) { + match self.lock_for_write(&cache_key)? { WriteBufResult::Found(write_guard) => { // We already had it in cache. Another thread must've put it there // concurrently. Check that it had the same contents that we @@ -364,17 +365,19 @@ impl PageCache { write_guard.mark_valid(); } } + + Ok(()) } // Section 1.2: Public interface functions for working with Ephemeral pages. - pub fn read_ephemeral_buf(&self, file_id: u64, blkno: u32) -> ReadBufResult { + pub fn read_ephemeral_buf(&self, file_id: u64, blkno: u32) -> anyhow::Result { let mut cache_key = CacheKey::EphemeralPage { file_id, blkno }; self.lock_for_read(&mut cache_key) } - pub fn write_ephemeral_buf(&self, file_id: u64, blkno: u32) -> WriteBufResult { + pub fn write_ephemeral_buf(&self, file_id: u64, blkno: u32) -> anyhow::Result { let cache_key = CacheKey::EphemeralPage { file_id, blkno }; self.lock_for_write(&cache_key) @@ -402,7 +405,7 @@ impl PageCache { // Section 1.3: Public interface functions for working with immutable file pages. - pub fn read_immutable_buf(&self, file_id: u64, blkno: u32) -> ReadBufResult { + pub fn read_immutable_buf(&self, file_id: u64, blkno: u32) -> anyhow::Result { let mut cache_key = CacheKey::ImmutableFilePage { file_id, blkno }; self.lock_for_read(&mut cache_key) @@ -495,15 +498,16 @@ impl PageCache { /// } /// ``` /// - fn lock_for_read(&self, cache_key: &mut CacheKey) -> ReadBufResult { + fn lock_for_read(&self, cache_key: &mut CacheKey) -> anyhow::Result { loop { // First check if the key already exists in the cache. if let Some(read_guard) = self.try_lock_for_read(cache_key) { - return ReadBufResult::Found(read_guard); + return Ok(ReadBufResult::Found(read_guard)); } // Not found. Find a victim buffer - let (slot_idx, mut inner) = self.find_victim(); + let (slot_idx, mut inner) = + self.find_victim().context("Failed to find evict victim")?; // Insert mapping for this. At this point, we may find that another // thread did the same thing concurrently. In that case, we evicted @@ -526,10 +530,10 @@ impl PageCache { inner.dirty = false; slot.usage_count.store(1, Ordering::Relaxed); - return ReadBufResult::NotFound(PageWriteGuard { + return Ok(ReadBufResult::NotFound(PageWriteGuard { inner, valid: false, - }); + })); } } @@ -556,15 +560,16 @@ impl PageCache { /// /// Similar to lock_for_read(), but the returned buffer is write-locked and /// may be modified by the caller even if it's already found in the cache. - fn lock_for_write(&self, cache_key: &CacheKey) -> WriteBufResult { + fn lock_for_write(&self, cache_key: &CacheKey) -> anyhow::Result { loop { // First check if the key already exists in the cache. if let Some(write_guard) = self.try_lock_for_write(cache_key) { - return WriteBufResult::Found(write_guard); + return Ok(WriteBufResult::Found(write_guard)); } // Not found. Find a victim buffer - let (slot_idx, mut inner) = self.find_victim(); + let (slot_idx, mut inner) = + self.find_victim().context("Failed to find evict victim")?; // Insert mapping for this. At this point, we may find that another // thread did the same thing concurrently. In that case, we evicted @@ -587,10 +592,10 @@ impl PageCache { inner.dirty = false; slot.usage_count.store(1, Ordering::Relaxed); - return WriteBufResult::NotFound(PageWriteGuard { + return Ok(WriteBufResult::NotFound(PageWriteGuard { inner, valid: false, - }); + })); } } @@ -754,7 +759,7 @@ impl PageCache { /// Find a slot to evict. /// /// On return, the slot is empty and write-locked. - fn find_victim(&self) -> (usize, RwLockWriteGuard) { + fn find_victim(&self) -> anyhow::Result<(usize, RwLockWriteGuard)> { let iter_limit = self.slots.len() * 10; let mut iters = 0; loop { @@ -767,7 +772,7 @@ impl PageCache { let mut inner = match slot.inner.try_write() { Ok(inner) => inner, Err(TryLockError::Poisoned(err)) => { - panic!("buffer lock was poisoned: {:?}", err) + anyhow::bail!("buffer lock was poisoned: {err:?}") } Err(TryLockError::WouldBlock) => { // If we have looped through the whole buffer pool 10 times @@ -777,7 +782,7 @@ impl PageCache { // there are buffers in the pool. In practice, with a reasonably // large buffer pool it really shouldn't happen. if iters > iter_limit { - panic!("could not find a victim buffer to evict"); + anyhow::bail!("exceeded evict iter limit"); } continue; } @@ -804,7 +809,7 @@ impl PageCache { inner.dirty = false; inner.key = None; } - return (slot_idx, inner); + return Ok((slot_idx, inner)); } } } From 37d90dc3b30d480006f3389baa5248cd47e75137 Mon Sep 17 00:00:00 2001 From: MMeent Date: Thu, 18 Aug 2022 21:51:33 +0200 Subject: [PATCH 04/63] Fix dependencies issue between compute-tools and compute node docker images (#2304) Compute node docker image requires compute-tools to build, but this dependency (and the argument for which image to pick) weren't described in the workflow file. This lead to out-of-date binaries in latest builds, which subsequently broke these images. --- .github/workflows/build_and_test.yml | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c9b696e409..1e71a53f99 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -446,9 +446,29 @@ jobs: - name: Kaniko build compute tools run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID + promote-image-compute-tools: + runs-on: dev + needs: [ compute-tools-image ] + if: github.event_name != 'workflow_dispatch' + container: amazon/aws-cli + strategy: + fail-fast: false + matrix: + name: [ compute-tools ] + + steps: + - name: Promote image to latest + run: + MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST" + compute-node-image: runs-on: dev container: gcr.io/kaniko-project/executor:v1.9.0-debug + # note: This image depends on neondatabase/compute-tools:latest (or :thisversion), + # which isn't available until after the image is promoted. + # Ergo, we must explicitly build and promote compute-tools separately. + needs: + - promote-image-compute-tools steps: - name: Checkout @@ -462,17 +482,17 @@ jobs: - name: Kaniko build compute node working-directory: ./vendor/postgres/ - run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID + run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg=COMPUTE_TOOLS_TAG=$GITHUB_RUN_ID --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID promote-images: runs-on: dev - needs: [ neon-image, compute-tools-image, compute-node-image ] + needs: [ neon-image, compute-node-image ] if: github.event_name != 'workflow_dispatch' container: amazon/aws-cli strategy: fail-fast: false matrix: - name: [ neon, compute-tools, compute-node ] + name: [ neon, compute-node ] steps: - name: Promote image to latest From 6b9cef02a1a84b2d02671fd4c596d4fd1cd54b19 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Thu, 18 Aug 2022 16:55:48 +0300 Subject: [PATCH 05/63] Use better defaults for pageserver Docker image --- Dockerfile | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 17aa0025e8..dccf7b6c54 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,8 @@ +### Creates a storage Docker image with postgres, pageserver, safekeeper and proxy binaries. +### The image itself is mainly used as a container for the binaries and for starting e2e tests with custom parameters. +### By default, the binaries inside the image have some mock parameters and can start, but are not intended to be used +### inside this image in the real deployments. + # Build Postgres FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS pg-build WORKDIR /home/nonroot @@ -58,7 +63,18 @@ COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/proxy COPY --from=pg-build /home/nonroot/tmp_install/ /usr/local/ COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/ +# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config. +# Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values. +RUN mkdir -p /data/.neon/ && chown -R zenith:zenith /data/.neon/ \ + && /usr/local/bin/pageserver -D /data/.neon/ --init \ + -c "id=1234" \ + -c "broker_endpoints=['http://etcd:2379']" \ + -c "pg_distrib_dir='/usr/local'" \ + -c "listen_pg_addr='0.0.0.0:6400'" \ + -c "listen_http_addr='0.0.0.0:9898'" + VOLUME ["/data"] USER zenith EXPOSE 6400 -CMD ["pageserver"] +EXPOSE 9898 +CMD ["/bin/bash"] From 12e87f0df3f39dfa85c2346695941f4128612866 Mon Sep 17 00:00:00 2001 From: Rory de Zoete <33318916+zoete@users.noreply.github.com> Date: Fri, 19 Aug 2022 12:07:46 +0200 Subject: [PATCH 06/63] Update workflow to fix dependency issue (#2309) * Update workflow to fix dependency issue * Update workflow * Update workflow and dockerfile * Specify tag * Update main dockerfile as well * Mirror rust image to docker hub * Update submodule ref Co-authored-by: Rory de Zoete --- .github/workflows/build_and_test.yml | 42 +++++++--------------------- Dockerfile | 7 +++-- Dockerfile.compute-tools | 6 +++- vendor/postgres | 2 +- 4 files changed, 21 insertions(+), 36 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1e71a53f99..3a2e8bad64 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -446,29 +446,13 @@ jobs: - name: Kaniko build compute tools run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID - promote-image-compute-tools: - runs-on: dev - needs: [ compute-tools-image ] - if: github.event_name != 'workflow_dispatch' - container: amazon/aws-cli - strategy: - fail-fast: false - matrix: - name: [ compute-tools ] - - steps: - - name: Promote image to latest - run: - MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST" - compute-node-image: runs-on: dev container: gcr.io/kaniko-project/executor:v1.9.0-debug # note: This image depends on neondatabase/compute-tools:latest (or :thisversion), # which isn't available until after the image is promoted. # Ergo, we must explicitly build and promote compute-tools separately. - needs: - - promote-image-compute-tools + needs: [ compute-tools-image ] steps: - name: Checkout @@ -482,17 +466,17 @@ jobs: - name: Kaniko build compute node working-directory: ./vendor/postgres/ - run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg=COMPUTE_TOOLS_TAG=$GITHUB_RUN_ID --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID + run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg=TAG=$GITHUB_RUN_ID --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID promote-images: runs-on: dev - needs: [ neon-image, compute-node-image ] + needs: [ neon-image, compute-node-image, compute-tools-image ] if: github.event_name != 'workflow_dispatch' container: amazon/aws-cli strategy: fail-fast: false matrix: - name: [ neon, compute-node ] + name: [ neon, compute-node, compute-tools ] steps: - name: Promote image to latest @@ -509,18 +493,6 @@ jobs: run: | go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0 go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0 - -# - name: Get build tag -# run: | -# if [[ "$GITHUB_REF_NAME" == "main" ]]; then -# echo "::set-output name=tag::$(git rev-list --count HEAD)" -# elif [[ "$GITHUB_REF_NAME" == "release" ]]; then -# echo "::set-output name=tag::release-$(git rev-list --count HEAD)" -# else -# echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release' " -# echo "::set-output name=tag::$GITHUB_RUN_ID" -# fi -# id: build-tag - name: Configure ECR login run: | @@ -536,6 +508,9 @@ jobs: - name: Pull compute node image from ECR run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest compute-node + - name: Pull rust image from ECR + run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust + - name: Configure docker login run: | # ECR Credential Helper & Docker Hub don't work together in config, hence reset @@ -551,6 +526,9 @@ jobs: - name: Push compute node image to Docker Hub run: crane push compute-node neondatabase/compute-node:${{needs.tag.outputs.build-tag}} + - name: Push rust image to Docker Hub + run: crane push rust neondatabase/rust:pinned + - name: Add latest tag to images if: | (github.ref_name == 'main' || github.ref_name == 'release') && diff --git a/Dockerfile b/Dockerfile index dccf7b6c54..77598fd086 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,9 +2,12 @@ ### The image itself is mainly used as a container for the binaries and for starting e2e tests with custom parameters. ### By default, the binaries inside the image have some mock parameters and can start, but are not intended to be used ### inside this image in the real deployments. +ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com +ARG IMAGE=rust +ARG TAG=pinned # Build Postgres -FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS pg-build +FROM $REPOSITORY/$IMAGE:$TAG AS pg-build WORKDIR /home/nonroot COPY vendor/postgres vendor/postgres @@ -17,7 +20,7 @@ RUN set -e \ && tar -C tmp_install -czf /home/nonroot/postgres_install.tar.gz . # Build zenith binaries -FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS build +FROM $REPOSITORY/$IMAGE:$TAG AS build WORKDIR /home/nonroot ARG GIT_VERSION=local diff --git a/Dockerfile.compute-tools b/Dockerfile.compute-tools index 05393021c2..47c408bbf2 100644 --- a/Dockerfile.compute-tools +++ b/Dockerfile.compute-tools @@ -1,6 +1,10 @@ # First transient image to build compute_tools binaries # NB: keep in sync with rust image version in .github/workflows/build_and_test.yml -FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS rust-build +ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com +ARG IMAGE=rust +ARG TAG=pinned + +FROM $REPOSITORY/$IMAGE:$TAG AS rust-build WORKDIR /home/nonroot # Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds. diff --git a/vendor/postgres b/vendor/postgres index 7e32bba2aa..3f315a1ec3 160000 --- a/vendor/postgres +++ b/vendor/postgres @@ -1 +1 @@ -Subproject commit 7e32bba2aa2a1752996586bfaf35754f1f0a4d53 +Subproject commit 3f315a1ec336b3a22a09d2015ce91697def4904e From 80436123349d1020d0193ffc62710c63e588bad2 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Thu, 18 Aug 2022 16:03:57 +0300 Subject: [PATCH 07/63] Remove Timeline trait, rename LayeredTimeline struct into Timeline --- pageserver/src/basebackup.rs | 17 +- pageserver/src/http/routes.rs | 9 +- pageserver/src/import_datadir.rs | 34 ++- pageserver/src/layered_repository.rs | 22 +- pageserver/src/layered_repository/timeline.rs | 254 +++++++++--------- pageserver/src/lib.rs | 2 - pageserver/src/page_service.rs | 24 +- pageserver/src/pgdatadir_mapping.rs | 112 ++++---- pageserver/src/repository.rs | 99 +------ pageserver/src/tenant_mgr.rs | 10 +- pageserver/src/timelines.rs | 6 +- pageserver/src/walingest.rs | 57 ++-- .../src/walreceiver/connection_manager.rs | 8 +- .../src/walreceiver/walreceiver_connection.rs | 6 +- 14 files changed, 290 insertions(+), 370 deletions(-) diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index 33f072553f..864c5b8ac8 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -22,8 +22,8 @@ use std::time::SystemTime; use tar::{Builder, EntryType, Header}; use tracing::*; +use crate::layered_repository::Timeline; use crate::reltag::{RelTag, SlruKind}; -use crate::DatadirTimeline; use postgres_ffi::v14::pg_constants; use postgres_ffi::v14::xlog_utils::{generate_wal_segment, normalize_lsn, XLogFileName}; @@ -36,13 +36,12 @@ use utils::lsn::Lsn; /// This is short-living object only for the time of tarball creation, /// created mostly to avoid passing a lot of parameters between various functions /// used for constructing tarball. -pub struct Basebackup<'a, W, T> +pub struct Basebackup<'a, W> where W: Write, - T: DatadirTimeline, { ar: Builder>, - timeline: &'a Arc, + timeline: &'a Arc, pub lsn: Lsn, prev_record_lsn: Lsn, full_backup: bool, @@ -57,18 +56,17 @@ where // * When working without safekeepers. In this situation it is important to match the lsn // we are taking basebackup on with the lsn that is used in pageserver's walreceiver // to start the replication. -impl<'a, W, T> Basebackup<'a, W, T> +impl<'a, W> Basebackup<'a, W> where W: Write, - T: DatadirTimeline, { pub fn new( write: W, - timeline: &'a Arc, + timeline: &'a Arc, req_lsn: Option, prev_lsn: Option, full_backup: bool, - ) -> Result> { + ) -> Result> { // Compute postgres doesn't have any previous WAL files, but the first // record that it's going to write needs to include the LSN of the // previous record (xl_prev). We include prev_record_lsn in the @@ -404,10 +402,9 @@ where } } -impl<'a, W, T> Drop for Basebackup<'a, W, T> +impl<'a, W> Drop for Basebackup<'a, W> where W: Write, - T: DatadirTimeline, { /// If the basebackup was not finished, prevent the Archive::drop() from /// writing the end-of-archive marker. diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 1d0adec63d..8d300e554a 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -11,10 +11,9 @@ use super::models::{ StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo, TimelineCreateRequest, }; -use crate::layered_repository::{metadata::TimelineMetadata, LayeredTimeline}; -use crate::pgdatadir_mapping::DatadirTimeline; +use crate::layered_repository::{metadata::TimelineMetadata, Timeline}; +use crate::repository::Repository; use crate::repository::{LocalTimelineState, RepositoryTimeline}; -use crate::repository::{Repository, Timeline}; use crate::storage_sync; use crate::storage_sync::index::{RemoteIndex, RemoteTimeline}; use crate::tenant_config::TenantConfOpt; @@ -85,7 +84,7 @@ fn get_config(request: &Request) -> &'static PageServerConf { // Helper functions to construct a LocalTimelineInfo struct for a timeline fn local_timeline_info_from_loaded_timeline( - timeline: &LayeredTimeline, + timeline: &Timeline, include_non_incremental_logical_size: bool, include_non_incremental_physical_size: bool, ) -> anyhow::Result { @@ -160,7 +159,7 @@ fn local_timeline_info_from_unloaded_timeline(metadata: &TimelineMetadata) -> Lo } fn local_timeline_info_from_repo_timeline( - repo_timeline: &RepositoryTimeline, + repo_timeline: &RepositoryTimeline, include_non_incremental_logical_size: bool, include_non_incremental_physical_size: bool, ) -> anyhow::Result { diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs index 729829c5e8..54e791e5b5 100644 --- a/pageserver/src/import_datadir.rs +++ b/pageserver/src/import_datadir.rs @@ -11,6 +11,7 @@ use bytes::Bytes; use tracing::*; use walkdir::WalkDir; +use crate::layered_repository::Timeline; use crate::pgdatadir_mapping::*; use crate::reltag::{RelTag, SlruKind}; use crate::walingest::WalIngest; @@ -39,9 +40,9 @@ pub fn get_lsn_from_controlfile(path: &Path) -> Result { /// This is currently only used to import a cluster freshly created by initdb. /// The code that deals with the checkpoint would not work right if the /// cluster was not shut down cleanly. -pub fn import_timeline_from_postgres_datadir( +pub fn import_timeline_from_postgres_datadir( path: &Path, - tline: &T, + tline: &Timeline, lsn: Lsn, ) -> Result<()> { let mut pg_control: Option = None; @@ -99,8 +100,8 @@ pub fn import_timeline_from_postgres_datadir( } // subroutine of import_timeline_from_postgres_datadir(), to load one relation file. -fn import_rel( - modification: &mut DatadirModification, +fn import_rel( + modification: &mut DatadirModification, path: &Path, spcoid: Oid, dboid: Oid, @@ -178,8 +179,8 @@ fn import_rel( /// Import an SLRU segment file /// -fn import_slru( - modification: &mut DatadirModification, +fn import_slru( + modification: &mut DatadirModification, slru: SlruKind, path: &Path, mut reader: Reader, @@ -234,12 +235,7 @@ fn import_slru( /// Scan PostgreSQL WAL files in given directory and load all records between /// 'startpoint' and 'endpoint' into the repository. -fn import_wal( - walpath: &Path, - tline: &T, - startpoint: Lsn, - endpoint: Lsn, -) -> Result<()> { +fn import_wal(walpath: &Path, tline: &Timeline, startpoint: Lsn, endpoint: Lsn) -> Result<()> { let mut waldecoder = WalStreamDecoder::new(startpoint); let mut segno = startpoint.segment_number(pg_constants::WAL_SEGMENT_SIZE); @@ -305,12 +301,12 @@ fn import_wal( Ok(()) } -pub fn import_basebackup_from_tar( - tline: &T, +pub fn import_basebackup_from_tar( + tline: &Timeline, reader: Reader, base_lsn: Lsn, ) -> Result<()> { - info!("importing base at {}", base_lsn); + info!("importing base at {base_lsn}"); let mut modification = tline.begin_modification(base_lsn); modification.init_empty()?; @@ -347,8 +343,8 @@ pub fn import_basebackup_from_tar( Ok(()) } -pub fn import_wal_from_tar( - tline: &T, +pub fn import_wal_from_tar( + tline: &Timeline, reader: Reader, start_lsn: Lsn, end_lsn: Lsn, @@ -428,8 +424,8 @@ pub fn import_wal_from_tar( Ok(()) } -pub fn import_file( - modification: &mut DatadirModification, +pub fn import_file( + modification: &mut DatadirModification, file_path: &Path, reader: Reader, len: usize, diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index 6bf2e71852..c0f4aece54 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -31,7 +31,7 @@ use crate::config::PageServerConf; use crate::storage_sync::index::RemoteIndex; use crate::tenant_config::{TenantConf, TenantConfOpt}; -use crate::repository::{GcResult, Repository, RepositoryTimeline, Timeline}; +use crate::repository::{GcResult, Repository, RepositoryTimeline}; use crate::thread_mgr; use crate::walredo::WalRedoManager; use crate::CheckpointConfig; @@ -61,7 +61,7 @@ mod timeline; use storage_layer::Layer; use timeline::LayeredTimelineEntry; -pub use timeline::LayeredTimeline; +pub use timeline::Timeline; // re-export this function so that page_cache.rs can use it. pub use crate::layered_repository::ephemeral_file::writeback as writeback_ephemeral_file; @@ -121,15 +121,13 @@ pub struct LayeredRepository { /// Public interface impl Repository for LayeredRepository { - type Timeline = LayeredTimeline; - - fn get_timeline(&self, timelineid: ZTimelineId) -> Option> { + fn get_timeline(&self, timelineid: ZTimelineId) -> Option> { let timelines = self.timelines.lock().unwrap(); self.get_timeline_internal(timelineid, &timelines) .map(RepositoryTimeline::from) } - fn get_timeline_load(&self, timelineid: ZTimelineId) -> Result> { + fn get_timeline_load(&self, timelineid: ZTimelineId) -> Result> { let mut timelines = self.timelines.lock().unwrap(); match self.get_timeline_load_internal(timelineid, &mut timelines)? { Some(local_loaded_timeline) => Ok(local_loaded_timeline), @@ -140,7 +138,7 @@ impl Repository for LayeredRepository { } } - fn list_timelines(&self) -> Vec<(ZTimelineId, RepositoryTimeline)> { + fn list_timelines(&self) -> Vec<(ZTimelineId, RepositoryTimeline)> { self.timelines .lock() .unwrap() @@ -158,7 +156,7 @@ impl Repository for LayeredRepository { &self, timeline_id: ZTimelineId, initdb_lsn: Lsn, - ) -> Result> { + ) -> Result> { let mut timelines = self.timelines.lock().unwrap(); let vacant_timeline_entry = match timelines.entry(timeline_id) { Entry::Occupied(_) => bail!("Timeline already exists"), @@ -176,7 +174,7 @@ impl Repository for LayeredRepository { let metadata = TimelineMetadata::new(Lsn(0), None, None, Lsn(0), initdb_lsn, initdb_lsn); timeline::save_metadata(self.conf, timeline_id, self.tenant_id, &metadata, true)?; - let timeline = LayeredTimeline::new( + let timeline = Timeline::new( self.conf, Arc::clone(&self.tenant_conf), metadata, @@ -539,7 +537,7 @@ impl LayeredRepository { &self, timelineid: ZTimelineId, timelines: &mut HashMap, - ) -> anyhow::Result>> { + ) -> anyhow::Result>> { match timelines.get(&timelineid) { Some(entry) => match entry { LayeredTimelineEntry::Loaded(local_timeline) => { @@ -574,7 +572,7 @@ impl LayeredRepository { &self, timeline_id: ZTimelineId, timelines: &mut HashMap, - ) -> anyhow::Result> { + ) -> anyhow::Result> { let metadata = load_metadata(self.conf, timeline_id, self.tenant_id) .context("failed to load metadata")?; let disk_consistent_lsn = metadata.disk_consistent_lsn(); @@ -591,7 +589,7 @@ impl LayeredRepository { .map(LayeredTimelineEntry::Loaded); let _enter = info_span!("loading local timeline").entered(); - let timeline = LayeredTimeline::new( + let timeline = Timeline::new( self.conf, Arc::clone(&self.tenant_conf), metadata, diff --git a/pageserver/src/layered_repository/timeline.rs b/pageserver/src/layered_repository/timeline.rs index 910fc9e9fc..da3a6981da 100644 --- a/pageserver/src/layered_repository/timeline.rs +++ b/pageserver/src/layered_repository/timeline.rs @@ -9,7 +9,7 @@ use once_cell::sync::Lazy; use tracing::*; use std::cmp::{max, min, Ordering}; -use std::collections::{hash_map::Entry, HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; use std::fs; use std::fs::{File, OpenOptions}; use std::io::Write; @@ -43,7 +43,6 @@ use crate::pgdatadir_mapping::BlockNumber; use crate::pgdatadir_mapping::LsnForTimestamp; use crate::reltag::RelTag; use crate::tenant_config::TenantConfOpt; -use crate::DatadirTimeline; use postgres_ffi::v14::xlog_utils::to_pg_timestamp; use utils::{ @@ -52,7 +51,7 @@ use utils::{ zid::{ZTenantId, ZTimelineId}, }; -use crate::repository::{GcResult, RepositoryTimeline, Timeline, TimelineWriter}; +use crate::repository::{GcResult, RepositoryTimeline, TimelineWriter}; use crate::repository::{Key, Value}; use crate::thread_mgr; use crate::virtual_file::VirtualFile; @@ -160,7 +159,7 @@ static PERSISTENT_BYTES_WRITTEN: Lazy = Lazy::new(|| { #[derive(Clone)] pub enum LayeredTimelineEntry { - Loaded(Arc), + Loaded(Arc), Unloaded { id: ZTimelineId, metadata: TimelineMetadata, @@ -191,7 +190,7 @@ impl LayeredTimelineEntry { } } - fn ensure_loaded(&self) -> anyhow::Result<&Arc> { + fn ensure_loaded(&self) -> anyhow::Result<&Arc> { match self { LayeredTimelineEntry::Loaded(timeline) => Ok(timeline), LayeredTimelineEntry::Unloaded { .. } => { @@ -213,7 +212,7 @@ impl LayeredTimelineEntry { } } -impl From for RepositoryTimeline { +impl From for RepositoryTimeline { fn from(entry: LayeredTimelineEntry) -> Self { match entry { LayeredTimelineEntry::Loaded(timeline) => RepositoryTimeline::Loaded(timeline as _), @@ -288,7 +287,7 @@ impl TimelineMetrics { } } -pub struct LayeredTimeline { +pub struct Timeline { conf: &'static PageServerConf, tenant_conf: Arc>, @@ -385,7 +384,7 @@ pub struct LayeredTimeline { pub last_received_wal: Mutex>, /// Relation size cache - rel_size_cache: RwLock>, + pub rel_size_cache: RwLock>, } pub struct WalReceiverInfo { @@ -394,46 +393,6 @@ pub struct WalReceiverInfo { pub last_received_msg_ts: u128, } -/// Inherit all the functions from DatadirTimeline, to provide the -/// functionality to store PostgreSQL relations, SLRUs, etc. in a -/// LayeredTimeline. -impl DatadirTimeline for LayeredTimeline { - fn get_cached_rel_size(&self, tag: &RelTag, lsn: Lsn) -> Option { - let rel_size_cache = self.rel_size_cache.read().unwrap(); - if let Some((cached_lsn, nblocks)) = rel_size_cache.get(tag) { - if lsn >= *cached_lsn { - return Some(*nblocks); - } - } - None - } - - fn update_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber) { - let mut rel_size_cache = self.rel_size_cache.write().unwrap(); - match rel_size_cache.entry(tag) { - Entry::Occupied(mut entry) => { - let cached_lsn = entry.get_mut(); - if lsn >= cached_lsn.0 { - *cached_lsn = (lsn, nblocks); - } - } - Entry::Vacant(entry) => { - entry.insert((lsn, nblocks)); - } - } - } - - fn set_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber) { - let mut rel_size_cache = self.rel_size_cache.write().unwrap(); - rel_size_cache.insert(tag, (lsn, nblocks)); - } - - fn remove_cached_rel_size(&self, tag: &RelTag) { - let mut rel_size_cache = self.rel_size_cache.write().unwrap(); - rel_size_cache.remove(tag); - } -} - /// /// Information about how much history needs to be retained, needed by /// Garbage Collection. @@ -464,45 +423,37 @@ pub struct GcInfo { } /// Public interface functions -impl Timeline for LayeredTimeline { - fn get_ancestor_lsn(&self) -> Lsn { +impl Timeline { + //------------------------------------------------------------------------------ + // Public GET functions + //------------------------------------------------------------------------------ + + /// Get the LSN where this branch was created + pub fn get_ancestor_lsn(&self) -> Lsn { self.ancestor_lsn } - fn get_ancestor_timeline_id(&self) -> Option { + /// Get the ancestor's timeline id + pub fn get_ancestor_timeline_id(&self) -> Option { self.ancestor_timeline .as_ref() .map(LayeredTimelineEntry::timeline_id) } - /// Wait until WAL has been received up to the given LSN. - fn wait_lsn(&self, lsn: Lsn) -> anyhow::Result<()> { - // This should never be called from the WAL receiver thread, because that could lead - // to a deadlock. - ensure!( - !IS_WAL_RECEIVER.with(|c| c.get()), - "wait_lsn called by WAL receiver thread" - ); - - self.metrics.wait_lsn_time_histo.observe_closure_duration( - || self.last_record_lsn - .wait_for_timeout(lsn, self.conf.wait_lsn_timeout) - .with_context(|| { - format!( - "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}", - lsn, self.get_last_record_lsn(), self.get_disk_consistent_lsn() - ) - }))?; - - Ok(()) - } - - fn get_latest_gc_cutoff_lsn(&self) -> RwLockReadGuard { + /// Lock and get timeline's GC cuttof + pub fn get_latest_gc_cutoff_lsn(&self) -> RwLockReadGuard { self.latest_gc_cutoff_lsn.read().unwrap() } - /// Look up the value with the given a key - fn get(&self, key: Key, lsn: Lsn) -> Result { + /// Look up given page version. + /// + /// NOTE: It is considered an error to 'get' a key that doesn't exist. The abstraction + /// above this needs to store suitable metadata to track what data exists with + /// what keys, in separate metadata entries. If a non-existent key is requested, + /// the Repository implementation may incorrectly return a value from an ancestor + /// branch, for example, or waste a lot of cycles chasing the non-existing key. + /// + pub fn get(&self, key: Key, lsn: Lsn) -> Result { // Check the page cache. We will get back the most recent page with lsn <= `lsn`. // The cached image can be returned directly if there is no WAL between the cached image // and requested LSN. The cached image can also be used to reduce the amount of WAL needed @@ -531,68 +482,31 @@ impl Timeline for LayeredTimeline { .observe_closure_duration(|| self.reconstruct_value(key, lsn, reconstruct_state)) } - /// Public entry point for checkpoint(). All the logic is in the private - /// checkpoint_internal function, this public facade just wraps it for - /// metrics collection. - fn checkpoint(&self, cconf: CheckpointConfig) -> anyhow::Result<()> { - match cconf { - CheckpointConfig::Flush => { - self.freeze_inmem_layer(false); - self.flush_frozen_layers(true) - } - CheckpointConfig::Forced => { - self.freeze_inmem_layer(false); - self.flush_frozen_layers(true)?; - self.compact() - } - } - } - - /// - /// Validate lsn against initdb_lsn and latest_gc_cutoff_lsn. - /// - fn check_lsn_is_in_scope( - &self, - lsn: Lsn, - latest_gc_cutoff_lsn: &RwLockReadGuard, - ) -> Result<()> { - ensure!( - lsn >= **latest_gc_cutoff_lsn, - "LSN {} is earlier than latest GC horizon {} (we might've already garbage collected needed data)", - lsn, - **latest_gc_cutoff_lsn, - ); - Ok(()) - } - - fn get_last_record_lsn(&self) -> Lsn { + /// Get last or prev record separately. Same as get_last_record_rlsn().last/prev. + pub fn get_last_record_lsn(&self) -> Lsn { self.last_record_lsn.load().last } - fn get_prev_record_lsn(&self) -> Lsn { + pub fn get_prev_record_lsn(&self) -> Lsn { self.last_record_lsn.load().prev } - fn get_last_record_rlsn(&self) -> RecordLsn { + /// Atomically get both last and prev. + pub fn get_last_record_rlsn(&self) -> RecordLsn { self.last_record_lsn.load() } - fn get_disk_consistent_lsn(&self) -> Lsn { + pub fn get_disk_consistent_lsn(&self) -> Lsn { self.disk_consistent_lsn.load() } - fn writer<'a>(&'a self) -> Box { - Box::new(LayeredTimelineWriter { - tl: self, - _write_guard: self.write_lock.lock().unwrap(), - }) - } - - fn get_physical_size(&self) -> u64 { + /// Get the physical size of the timeline at the latest LSN + pub fn get_physical_size(&self) -> u64 { self.metrics.current_physical_size_gauge.get() } - fn get_physical_size_non_incremental(&self) -> anyhow::Result { + /// Get the physical size of the timeline at the latest LSN non incrementally + pub fn get_physical_size_non_incremental(&self) -> anyhow::Result { let timeline_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id); // total size of layer files in the current timeline directory let mut total_physical_size = 0; @@ -611,9 +525,89 @@ impl Timeline for LayeredTimeline { Ok(total_physical_size) } + + /// + /// Wait until WAL has been received and processed up to this LSN. + /// + /// You should call this before any of the other get_* or list_* functions. Calling + /// those functions with an LSN that has been processed yet is an error. + /// + pub fn wait_lsn(&self, lsn: Lsn) -> anyhow::Result<()> { + // This should never be called from the WAL receiver thread, because that could lead + // to a deadlock. + ensure!( + !IS_WAL_RECEIVER.with(|c| c.get()), + "wait_lsn called by WAL receiver thread" + ); + + self.metrics.wait_lsn_time_histo.observe_closure_duration( + || self.last_record_lsn + .wait_for_timeout(lsn, self.conf.wait_lsn_timeout) + .with_context(|| { + format!( + "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}", + lsn, self.get_last_record_lsn(), self.get_disk_consistent_lsn() + ) + }))?; + + Ok(()) + } + + /// Check that it is valid to request operations with that lsn. + pub fn check_lsn_is_in_scope( + &self, + lsn: Lsn, + latest_gc_cutoff_lsn: &RwLockReadGuard, + ) -> Result<()> { + ensure!( + lsn >= **latest_gc_cutoff_lsn, + "LSN {} is earlier than latest GC horizon {} (we might've already garbage collected needed data)", + lsn, + **latest_gc_cutoff_lsn, + ); + Ok(()) + } + + //------------------------------------------------------------------------------ + // Public PUT functions, to update the repository with new page versions. + // + // These are called by the WAL receiver to digest WAL records. + //------------------------------------------------------------------------------ + + /// Flush to disk all data that was written with the put_* functions + /// + /// NOTE: This has nothing to do with checkpoint in PostgreSQL. We don't + /// know anything about them here in the repository. + pub fn checkpoint(&self, cconf: CheckpointConfig) -> anyhow::Result<()> { + match cconf { + CheckpointConfig::Flush => { + self.freeze_inmem_layer(false); + self.flush_frozen_layers(true) + } + CheckpointConfig::Forced => { + self.freeze_inmem_layer(false); + self.flush_frozen_layers(true)?; + self.compact() + } + } + } + + /// Mutate the timeline with a [`TimelineWriter`]. + /// + /// FIXME: This ought to return &'a TimelineWriter, where TimelineWriter + /// is a generic type in this trait. But that doesn't currently work in + /// Rust: https://rust-lang.github.io/rfcs/1598-generic_associated_types.html + /// TODO kb replace with the concrete type + pub fn writer<'a>(&'a self) -> Box { + Box::new(LayeredTimelineWriter { + tl: self, + _write_guard: self.write_lock.lock().unwrap(), + }) + } } -impl LayeredTimeline { +// Private functions +impl Timeline { fn get_checkpoint_distance(&self) -> u64 { let tenant_conf = self.tenant_conf.read().unwrap(); tenant_conf @@ -662,8 +656,8 @@ impl LayeredTimeline { tenant_id: ZTenantId, walredo_mgr: Arc, upload_layers: bool, - ) -> LayeredTimeline { - let mut result = LayeredTimeline { + ) -> Timeline { + let mut result = Timeline { conf, tenant_conf, timeline_id, @@ -1014,7 +1008,7 @@ impl LayeredTimeline { Some((lsn, img)) } - fn get_ancestor_timeline(&self) -> Result> { + fn get_ancestor_timeline(&self) -> Result> { let ancestor = self .ancestor_timeline .as_ref() @@ -1135,7 +1129,7 @@ impl LayeredTimeline { /// Also flush after a period of time without new data -- it helps /// safekeepers to regard pageserver as caught up and suspend activity. /// - pub fn check_checkpoint_distance(self: &Arc) -> Result<()> { + pub fn check_checkpoint_distance(self: &Arc) -> Result<()> { let last_lsn = self.get_last_record_lsn(); let layers = self.layers.read().unwrap(); if let Some(open_layer) = &layers.open_layer { @@ -2211,12 +2205,12 @@ fn layer_traversal_error( } struct LayeredTimelineWriter<'a> { - tl: &'a LayeredTimeline, + tl: &'a Timeline, _write_guard: MutexGuard<'a, ()>, } impl Deref for LayeredTimelineWriter<'_> { - type Target = dyn Timeline; + type Target = Timeline; fn deref(&self) -> &Self::Target { self.tl diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index 47fd8a84cf..06c5f552a4 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -28,8 +28,6 @@ use tracing::info; use crate::thread_mgr::ThreadKind; use metrics::{register_int_gauge_vec, IntGaugeVec}; -use pgdatadir_mapping::DatadirTimeline; - /// Current storage format version /// /// This is embedded in the metadata file, and also in the header of all the diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index b63bb90be1..f5f1e4d7bd 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -30,11 +30,11 @@ use utils::{ use crate::basebackup; use crate::config::{PageServerConf, ProfilingConfig}; use crate::import_datadir::{import_basebackup_from_tar, import_wal_from_tar}; -use crate::pgdatadir_mapping::{DatadirTimeline, LsnForTimestamp}; +use crate::layered_repository::Timeline; +use crate::pgdatadir_mapping::LsnForTimestamp; use crate::profiling::profpoint_start; use crate::reltag::RelTag; use crate::repository::Repository; -use crate::repository::Timeline; use crate::tenant_mgr; use crate::thread_mgr; use crate::thread_mgr::ThreadKind; @@ -636,8 +636,8 @@ impl PageServerHandler { /// In either case, if the page server hasn't received the WAL up to the /// requested LSN yet, we will wait for it to arrive. The return value is /// the LSN that should be used to look up the page versions. - fn wait_or_get_last_lsn( - timeline: &T, + fn wait_or_get_last_lsn( + timeline: &Timeline, mut lsn: Lsn, latest: bool, latest_gc_cutoff_lsn: &RwLockReadGuard, @@ -684,9 +684,9 @@ impl PageServerHandler { Ok(lsn) } - fn handle_get_rel_exists_request( + fn handle_get_rel_exists_request( &self, - timeline: &T, + timeline: &Timeline, req: &PagestreamExistsRequest, ) -> Result { let _enter = info_span!("get_rel_exists", rel = %req.rel, req_lsn = %req.lsn).entered(); @@ -701,9 +701,9 @@ impl PageServerHandler { })) } - fn handle_get_nblocks_request( + fn handle_get_nblocks_request( &self, - timeline: &T, + timeline: &Timeline, req: &PagestreamNblocksRequest, ) -> Result { let _enter = info_span!("get_nblocks", rel = %req.rel, req_lsn = %req.lsn).entered(); @@ -717,9 +717,9 @@ impl PageServerHandler { })) } - fn handle_db_size_request( + fn handle_db_size_request( &self, - timeline: &T, + timeline: &Timeline, req: &PagestreamDbSizeRequest, ) -> Result { let _enter = info_span!("get_db_size", dbnode = %req.dbnode, req_lsn = %req.lsn).entered(); @@ -735,9 +735,9 @@ impl PageServerHandler { })) } - fn handle_get_page_at_lsn_request( + fn handle_get_page_at_lsn_request( &self, - timeline: &T, + timeline: &Timeline, req: &PagestreamGetPageRequest, ) -> Result { let _enter = info_span!("get_page", rel = %req.rel, blkno = &req.blkno, req_lsn = %req.lsn) diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 88fac0ad5a..d10e48393c 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -7,8 +7,8 @@ //! Clarify that) //! use crate::keyspace::{KeySpace, KeySpaceAccum}; +use crate::layered_repository::Timeline; use crate::reltag::{RelTag, SlruKind}; -use crate::repository::Timeline; use crate::repository::*; use crate::walrecord::ZenithWalRecord; use anyhow::{bail, ensure, Result}; @@ -18,7 +18,7 @@ use postgres_ffi::v14::xlog_utils::TimestampTz; use postgres_ffi::BLCKSZ; use postgres_ffi::{Oid, TransactionId}; use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, HashSet}; +use std::collections::{hash_map, HashMap, HashSet}; use std::ops::Range; use tracing::{debug, trace, warn}; use utils::{bin_ser::BeSer, lsn::Lsn}; @@ -35,23 +35,13 @@ pub enum LsnForTimestamp { } /// -/// This trait provides all the functionality to store PostgreSQL relations, SLRUs, +/// This impl provides all the functionality to store PostgreSQL relations, SLRUs, /// and other special kinds of files, in a versioned key-value store. The -/// Timeline trait provides the key-value store. +/// Timeline struct provides the key-value store. /// -/// This is a trait, so that we can easily include all these functions in a Timeline -/// implementation. You're not expected to have different implementations of this trait, -/// rather, this provides an interface and implementation, over Timeline. -/// -/// If you wanted to store other kinds of data in the Neon repository, e.g. -/// flat files or MySQL, you would create a new trait like this, with all the -/// functions that make sense for the kind of data you're storing. For flat files, -/// for example, you might have a function like "fn read(path, offset, size)". -/// We might also have that situation in the future, to support multiple PostgreSQL -/// versions, if there are big changes in how the data is organized in the data -/// directory, or if new special files are introduced. -/// -pub trait DatadirTimeline: Timeline { +/// This is a separate impl, so that we can easily include all these functions in a Timeline +/// implementation, and might be moved into a separate struct later. +impl Timeline { /// Start ingesting a WAL record, or other atomic modification of /// the timeline. /// @@ -75,7 +65,7 @@ pub trait DatadirTimeline: Timeline { /// functions of the timeline until you finish! And if you update the /// same page twice, the last update wins. /// - fn begin_modification(&self, lsn: Lsn) -> DatadirModification + pub fn begin_modification(&self, lsn: Lsn) -> DatadirModification where Self: Sized, { @@ -93,7 +83,7 @@ pub trait DatadirTimeline: Timeline { //------------------------------------------------------------------------------ /// Look up given page version. - fn get_rel_page_at_lsn(&self, tag: RelTag, blknum: BlockNumber, lsn: Lsn) -> Result { + pub fn get_rel_page_at_lsn(&self, tag: RelTag, blknum: BlockNumber, lsn: Lsn) -> Result { ensure!(tag.relnode != 0, "invalid relnode"); let nblocks = self.get_rel_size(tag, lsn)?; @@ -110,7 +100,7 @@ pub trait DatadirTimeline: Timeline { } // Get size of a database in blocks - fn get_db_size(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result { + pub fn get_db_size(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result { let mut total_blocks = 0; let rels = self.list_rels(spcnode, dbnode, lsn)?; @@ -123,7 +113,7 @@ pub trait DatadirTimeline: Timeline { } /// Get size of a relation file - fn get_rel_size(&self, tag: RelTag, lsn: Lsn) -> Result { + pub fn get_rel_size(&self, tag: RelTag, lsn: Lsn) -> Result { ensure!(tag.relnode != 0, "invalid relnode"); if let Some(nblocks) = self.get_cached_rel_size(&tag, lsn) { @@ -151,7 +141,7 @@ pub trait DatadirTimeline: Timeline { } /// Does relation exist? - fn get_rel_exists(&self, tag: RelTag, lsn: Lsn) -> Result { + pub fn get_rel_exists(&self, tag: RelTag, lsn: Lsn) -> Result { ensure!(tag.relnode != 0, "invalid relnode"); // first try to lookup relation in cache @@ -169,7 +159,7 @@ pub trait DatadirTimeline: Timeline { } /// Get a list of all existing relations in given tablespace and database. - fn list_rels(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result> { + pub fn list_rels(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result> { // fetch directory listing let key = rel_dir_to_key(spcnode, dbnode); let buf = self.get(key, lsn)?; @@ -187,7 +177,7 @@ pub trait DatadirTimeline: Timeline { } /// Look up given SLRU page version. - fn get_slru_page_at_lsn( + pub fn get_slru_page_at_lsn( &self, kind: SlruKind, segno: u32, @@ -199,14 +189,19 @@ pub trait DatadirTimeline: Timeline { } /// Get size of an SLRU segment - fn get_slru_segment_size(&self, kind: SlruKind, segno: u32, lsn: Lsn) -> Result { + pub fn get_slru_segment_size( + &self, + kind: SlruKind, + segno: u32, + lsn: Lsn, + ) -> Result { let key = slru_segment_size_to_key(kind, segno); let mut buf = self.get(key, lsn)?; Ok(buf.get_u32_le()) } /// Get size of an SLRU segment - fn get_slru_segment_exists(&self, kind: SlruKind, segno: u32, lsn: Lsn) -> Result { + pub fn get_slru_segment_exists(&self, kind: SlruKind, segno: u32, lsn: Lsn) -> Result { // fetch directory listing let key = slru_dir_to_key(kind); let buf = self.get(key, lsn)?; @@ -223,7 +218,7 @@ pub trait DatadirTimeline: Timeline { /// so it's not well defined which LSN you get if there were multiple commits /// "in flight" at that point in time. /// - fn find_lsn_for_timestamp(&self, search_timestamp: TimestampTz) -> Result { + pub fn find_lsn_for_timestamp(&self, search_timestamp: TimestampTz) -> Result { let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn(); let min_lsn = *gc_cutoff_lsn_guard; let max_lsn = self.get_last_record_lsn(); @@ -286,7 +281,7 @@ pub trait DatadirTimeline: Timeline { /// Additionally, sets 'found_smaller'/'found_Larger, if encounters any commits /// with a smaller/larger timestamp. /// - fn is_latest_commit_timestamp_ge_than( + pub fn is_latest_commit_timestamp_ge_than( &self, search_timestamp: TimestampTz, probe_lsn: Lsn, @@ -317,7 +312,7 @@ pub trait DatadirTimeline: Timeline { } /// Get a list of SLRU segments - fn list_slru_segments(&self, kind: SlruKind, lsn: Lsn) -> Result> { + pub fn list_slru_segments(&self, kind: SlruKind, lsn: Lsn) -> Result> { // fetch directory entry let key = slru_dir_to_key(kind); @@ -327,14 +322,14 @@ pub trait DatadirTimeline: Timeline { Ok(dir.segments) } - fn get_relmap_file(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result { + pub fn get_relmap_file(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result { let key = relmap_file_key(spcnode, dbnode); let buf = self.get(key, lsn)?; Ok(buf) } - fn list_dbdirs(&self, lsn: Lsn) -> Result> { + pub fn list_dbdirs(&self, lsn: Lsn) -> Result> { // fetch directory entry let buf = self.get(DBDIR_KEY, lsn)?; let dir = DbDirectory::des(&buf)?; @@ -342,13 +337,13 @@ pub trait DatadirTimeline: Timeline { Ok(dir.dbdirs) } - fn get_twophase_file(&self, xid: TransactionId, lsn: Lsn) -> Result { + pub fn get_twophase_file(&self, xid: TransactionId, lsn: Lsn) -> Result { let key = twophase_file_key(xid); let buf = self.get(key, lsn)?; Ok(buf) } - fn list_twophase_files(&self, lsn: Lsn) -> Result> { + pub fn list_twophase_files(&self, lsn: Lsn) -> Result> { // fetch directory entry let buf = self.get(TWOPHASEDIR_KEY, lsn)?; let dir = TwoPhaseDirectory::des(&buf)?; @@ -356,11 +351,11 @@ pub trait DatadirTimeline: Timeline { Ok(dir.xids) } - fn get_control_file(&self, lsn: Lsn) -> Result { + pub fn get_control_file(&self, lsn: Lsn) -> Result { self.get(CONTROLFILE_KEY, lsn) } - fn get_checkpoint(&self, lsn: Lsn) -> Result { + pub fn get_checkpoint(&self, lsn: Lsn) -> Result { self.get(CHECKPOINT_KEY, lsn) } @@ -369,7 +364,7 @@ pub trait DatadirTimeline: Timeline { /// /// Only relation blocks are counted currently. That excludes metadata, /// SLRUs, twophase files etc. - fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result { + pub fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result { // Fetch list of database dirs and iterate them let buf = self.get(DBDIR_KEY, lsn)?; let dbdir = DbDirectory::des(&buf)?; @@ -391,7 +386,7 @@ pub trait DatadirTimeline: Timeline { /// Get a KeySpace that covers all the Keys that are in use at the given LSN. /// Anything that's not listed maybe removed from the underlying storage (from /// that LSN forwards). - fn collect_keyspace(&self, lsn: Lsn) -> Result { + pub fn collect_keyspace(&self, lsn: Lsn) -> Result { // Iterate through key ranges, greedily packing them into partitions let mut result = KeySpaceAccum::new(); @@ -465,27 +460,54 @@ pub trait DatadirTimeline: Timeline { } /// Get cached size of relation if it not updated after specified LSN - fn get_cached_rel_size(&self, tag: &RelTag, lsn: Lsn) -> Option; + pub fn get_cached_rel_size(&self, tag: &RelTag, lsn: Lsn) -> Option { + let rel_size_cache = self.rel_size_cache.read().unwrap(); + if let Some((cached_lsn, nblocks)) = rel_size_cache.get(tag) { + if lsn >= *cached_lsn { + return Some(*nblocks); + } + } + None + } /// Update cached relation size if there is no more recent update - fn update_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber); + pub fn update_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber) { + let mut rel_size_cache = self.rel_size_cache.write().unwrap(); + match rel_size_cache.entry(tag) { + hash_map::Entry::Occupied(mut entry) => { + let cached_lsn = entry.get_mut(); + if lsn >= cached_lsn.0 { + *cached_lsn = (lsn, nblocks); + } + } + hash_map::Entry::Vacant(entry) => { + entry.insert((lsn, nblocks)); + } + } + } /// Store cached relation size - fn set_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber); + pub fn set_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber) { + let mut rel_size_cache = self.rel_size_cache.write().unwrap(); + rel_size_cache.insert(tag, (lsn, nblocks)); + } /// Remove cached relation size - fn remove_cached_rel_size(&self, tag: &RelTag); + pub fn remove_cached_rel_size(&self, tag: &RelTag) { + let mut rel_size_cache = self.rel_size_cache.write().unwrap(); + rel_size_cache.remove(tag); + } } /// DatadirModification represents an operation to ingest an atomic set of /// updates to the repository. It is created by the 'begin_record' /// function. It is called for each WAL record, so that all the modifications /// by a one WAL record appear atomic. -pub struct DatadirModification<'a, T: DatadirTimeline> { +pub struct DatadirModification<'a> { /// The timeline this modification applies to. You can access this to /// read the state, but note that any pending updates are *not* reflected /// in the state in 'tline' yet. - pub tline: &'a T, + pub tline: &'a Timeline, /// Lsn assigned by begin_modification pub lsn: Lsn, @@ -498,7 +520,7 @@ pub struct DatadirModification<'a, T: DatadirTimeline> { pending_nblocks: isize, } -impl<'a, T: DatadirTimeline> DatadirModification<'a, T> { +impl<'a> DatadirModification<'a> { /// Initialize a completely new repository. /// /// This inserts the directory metadata entries that are assumed to @@ -1371,7 +1393,7 @@ fn is_slru_block_key(key: Key) -> bool { pub fn create_test_timeline( repo: R, timeline_id: utils::zid::ZTimelineId, -) -> Result> { +) -> Result> { let tline = repo.create_empty_timeline(timeline_id, Lsn(8))?; let mut m = tline.begin_modification(Lsn(8)); m.init_empty()?; diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs index d09b01437c..5cdc27a846 100644 --- a/pageserver/src/repository.rs +++ b/pageserver/src/repository.rs @@ -1,19 +1,16 @@ use crate::layered_repository::metadata::TimelineMetadata; +use crate::layered_repository::Timeline; use crate::storage_sync::index::RemoteIndex; use crate::walrecord::ZenithWalRecord; -use crate::CheckpointConfig; use anyhow::{bail, Result}; use byteorder::{ByteOrder, BE}; use bytes::Bytes; use serde::{Deserialize, Serialize}; use std::fmt; use std::ops::{AddAssign, Range}; -use std::sync::{Arc, RwLockReadGuard}; +use std::sync::Arc; use std::time::Duration; -use utils::{ - lsn::{Lsn, RecordLsn}, - zid::ZTimelineId, -}; +use utils::{lsn::Lsn, zid::ZTimelineId}; #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)] /// Key used in the Repository kv-store. @@ -185,22 +182,20 @@ impl Value { /// A repository corresponds to one .neon directory. One repository holds multiple /// timelines, forked off from the same initial call to 'initdb'. pub trait Repository: Send + Sync { - type Timeline: crate::DatadirTimeline; - /// Updates timeline based on the `TimelineSyncStatusUpdate`, received from the remote storage synchronization. /// See [`crate::remote_storage`] for more details about the synchronization. fn attach_timeline(&self, timeline_id: ZTimelineId) -> Result<()>; /// Get Timeline handle for given zenith timeline ID. /// This function is idempotent. It doesn't change internal state in any way. - fn get_timeline(&self, timelineid: ZTimelineId) -> Option>; + fn get_timeline(&self, timelineid: ZTimelineId) -> Option>; /// Get Timeline handle for locally available timeline. Load it into memory if it is not loaded. - fn get_timeline_load(&self, timelineid: ZTimelineId) -> Result>; + fn get_timeline_load(&self, timelineid: ZTimelineId) -> Result>; /// Lists timelines the repository contains. /// Up to repository's implementation to omit certain timelines that ar not considered ready for use. - fn list_timelines(&self) -> Vec<(ZTimelineId, RepositoryTimeline)>; + fn list_timelines(&self) -> Vec<(ZTimelineId, RepositoryTimeline)>; /// Create a new, empty timeline. The caller is responsible for loading data into it /// Initdb lsn is provided for timeline impl to be able to perform checks for some operations against it. @@ -208,7 +203,7 @@ pub trait Repository: Send + Sync { &self, timeline_id: ZTimelineId, initdb_lsn: Lsn, - ) -> Result>; + ) -> Result>; /// Branch a timeline fn branch_timeline( @@ -305,81 +300,6 @@ impl AddAssign for GcResult { } } -pub trait Timeline: Send + Sync { - //------------------------------------------------------------------------------ - // Public GET functions - //------------------------------------------------------------------------------ - - /// - /// Wait until WAL has been received and processed up to this LSN. - /// - /// You should call this before any of the other get_* or list_* functions. Calling - /// those functions with an LSN that has been processed yet is an error. - /// - fn wait_lsn(&self, lsn: Lsn) -> Result<()>; - - /// Lock and get timeline's GC cuttof - fn get_latest_gc_cutoff_lsn(&self) -> RwLockReadGuard; - - /// Look up given page version. - /// - /// NOTE: It is considered an error to 'get' a key that doesn't exist. The abstraction - /// above this needs to store suitable metadata to track what data exists with - /// what keys, in separate metadata entries. If a non-existent key is requested, - /// the Repository implementation may incorrectly return a value from an ancestor - /// branch, for example, or waste a lot of cycles chasing the non-existing key. - /// - fn get(&self, key: Key, lsn: Lsn) -> Result; - - /// Get the ancestor's timeline id - fn get_ancestor_timeline_id(&self) -> Option; - - /// Get the LSN where this branch was created - fn get_ancestor_lsn(&self) -> Lsn; - - //------------------------------------------------------------------------------ - // Public PUT functions, to update the repository with new page versions. - // - // These are called by the WAL receiver to digest WAL records. - //------------------------------------------------------------------------------ - /// Atomically get both last and prev. - fn get_last_record_rlsn(&self) -> RecordLsn; - - /// Get last or prev record separately. Same as get_last_record_rlsn().last/prev. - fn get_last_record_lsn(&self) -> Lsn; - - fn get_prev_record_lsn(&self) -> Lsn; - - fn get_disk_consistent_lsn(&self) -> Lsn; - - /// Mutate the timeline with a [`TimelineWriter`]. - /// - /// FIXME: This ought to return &'a TimelineWriter, where TimelineWriter - /// is a generic type in this trait. But that doesn't currently work in - /// Rust: https://rust-lang.github.io/rfcs/1598-generic_associated_types.html - fn writer<'a>(&'a self) -> Box; - - /// - /// Flush to disk all data that was written with the put_* functions - /// - /// NOTE: This has nothing to do with checkpoint in PostgreSQL. We don't - /// know anything about them here in the repository. - fn checkpoint(&self, cconf: CheckpointConfig) -> Result<()>; - - /// - /// Check that it is valid to request operations with that lsn. - fn check_lsn_is_in_scope( - &self, - lsn: Lsn, - latest_gc_cutoff_lsn: &RwLockReadGuard, - ) -> Result<()>; - - /// Get the physical size of the timeline at the latest LSN - fn get_physical_size(&self) -> u64; - /// Get the physical size of the timeline at the latest LSN non incrementally - fn get_physical_size_non_incremental(&self) -> Result; -} - /// Various functions to mutate the timeline. // TODO Currently, Deref is used to allow easy access to read methods from this trait. // This is probably considered a bad practice in Rust and should be fixed eventually, @@ -581,6 +501,9 @@ pub mod repo_harness { #[allow(clippy::bool_assert_comparison)] #[cfg(test)] mod tests { + use crate::layered_repository::Timeline; + use crate::CheckpointConfig; + use super::repo_harness::*; use super::*; //use postgres_ffi::{pg_constants, xlog_utils::SIZEOF_CHECKPOINT}; @@ -689,7 +612,7 @@ mod tests { Ok(()) } - fn make_some_layers(tline: &T, start_lsn: Lsn) -> Result<()> { + fn make_some_layers(tline: &Timeline, start_lsn: Lsn) -> Result<()> { let mut lsn = start_lsn; #[allow(non_snake_case)] { diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index 64f1caa542..36c3e569a6 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -3,7 +3,7 @@ use crate::config::PageServerConf; use crate::http::models::TenantInfo; -use crate::layered_repository::{load_metadata, LayeredRepository, LayeredTimeline}; +use crate::layered_repository::{load_metadata, LayeredRepository, Timeline}; use crate::repository::Repository; use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex}; use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData}; @@ -100,7 +100,7 @@ struct Tenant { /// /// Local timelines have more metadata that's loaded into memory, /// that is located in the `repo.timelines` field, [`crate::layered_repository::LayeredTimelineEntry`]. - local_timelines: HashMap>, + local_timelines: HashMap>, } #[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)] @@ -177,7 +177,7 @@ pub enum LocalTimelineUpdate { }, Attach { id: ZTenantTimelineId, - datadir: Arc, + datadir: Arc, }, } @@ -379,7 +379,7 @@ pub fn get_repository_for_tenant(tenant_id: ZTenantId) -> anyhow::Result anyhow::Result> { +) -> anyhow::Result> { let mut m = tenants_state::write_tenants(); let tenant = m .get_mut(&tenant_id) @@ -486,7 +486,7 @@ pub fn detach_tenant(conf: &'static PageServerConf, tenant_id: ZTenantId) -> any fn load_local_timeline( repo: &LayeredRepository, timeline_id: ZTimelineId, -) -> anyhow::Result> { +) -> anyhow::Result> { let inmem_timeline = repo.get_timeline_load(timeline_id).with_context(|| { format!("Inmem timeline {timeline_id} not found in tenant's repository") })?; diff --git a/pageserver/src/timelines.rs b/pageserver/src/timelines.rs index 0d35195691..6a55dd286e 100644 --- a/pageserver/src/timelines.rs +++ b/pageserver/src/timelines.rs @@ -20,15 +20,15 @@ use utils::{ use crate::import_datadir; use crate::tenant_mgr; +use crate::CheckpointConfig; use crate::{ config::PageServerConf, repository::Repository, storage_sync::index::RemoteIndex, tenant_config::TenantConfOpt, }; use crate::{ - layered_repository::{LayeredRepository, LayeredTimeline}, + layered_repository::{LayeredRepository, Timeline}, walredo::WalRedoManager, }; -use crate::{repository::Timeline, CheckpointConfig}; #[derive(Debug, Clone, Copy)] pub struct PointInTime { @@ -160,7 +160,7 @@ pub(crate) fn create_timeline( new_timeline_id: Option, ancestor_timeline_id: Option, mut ancestor_start_lsn: Option, -) -> Result)>> { +) -> Result)>> { let new_timeline_id = new_timeline_id.unwrap_or_else(ZTimelineId::generate); let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?; diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs index 05afe4ba3e..c24ffc49de 100644 --- a/pageserver/src/walingest.rs +++ b/pageserver/src/walingest.rs @@ -30,6 +30,7 @@ use anyhow::Result; use bytes::{Buf, Bytes, BytesMut}; use tracing::*; +use crate::layered_repository::Timeline; use crate::pgdatadir_mapping::*; use crate::reltag::{RelTag, SlruKind}; use crate::walrecord::*; @@ -43,15 +44,15 @@ use utils::lsn::Lsn; static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]); -pub struct WalIngest<'a, T: DatadirTimeline> { - timeline: &'a T, +pub struct WalIngest<'a> { + timeline: &'a Timeline, checkpoint: CheckPoint, checkpoint_modified: bool, } -impl<'a, T: DatadirTimeline> WalIngest<'a, T> { - pub fn new(timeline: &T, startpoint: Lsn) -> Result> { +impl<'a> WalIngest<'a> { + pub fn new(timeline: &Timeline, startpoint: Lsn) -> Result { // Fetch the latest checkpoint into memory, so that we can compare with it // quickly in `ingest_record` and update it when it changes. let checkpoint_bytes = timeline.get_checkpoint(startpoint)?; @@ -77,7 +78,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { &mut self, recdata: Bytes, lsn: Lsn, - modification: &mut DatadirModification, + modification: &mut DatadirModification, decoded: &mut DecodedWALRecord, ) -> Result<()> { modification.lsn = lsn; @@ -266,7 +267,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn ingest_decoded_block( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, lsn: Lsn, decoded: &DecodedWALRecord, blk: &DecodedBkpBlock, @@ -326,7 +327,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn ingest_heapam_record( &mut self, buf: &mut Bytes, - modification: &mut DatadirModification, + modification: &mut DatadirModification, decoded: &mut DecodedWALRecord, ) -> Result<()> { // Handle VM bit updates that are implicitly part of heap records. @@ -470,7 +471,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { /// Subroutine of ingest_record(), to handle an XLOG_DBASE_CREATE record. fn ingest_xlog_dbase_create( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, rec: &XlCreateDatabase, ) -> Result<()> { let db_id = rec.db_id; @@ -537,7 +538,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn ingest_xlog_smgr_create( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, rec: &XlSmgrCreate, ) -> Result<()> { let rel = RelTag { @@ -555,7 +556,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { /// This is the same logic as in PostgreSQL's smgr_redo() function. fn ingest_xlog_smgr_truncate( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, rec: &XlSmgrTruncate, ) -> Result<()> { let spcnode = rec.rnode.spcnode; @@ -620,7 +621,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { /// fn ingest_xact_record( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, parsed: &XlXactParsedRecord, is_commit: bool, ) -> Result<()> { @@ -689,7 +690,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn ingest_clog_truncate_record( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, xlrec: &XlClogTruncate, ) -> Result<()> { info!( @@ -747,7 +748,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn ingest_multixact_create_record( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, xlrec: &XlMultiXactCreate, ) -> Result<()> { // Create WAL record for updating the multixact-offsets page @@ -826,7 +827,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn ingest_multixact_truncate_record( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, xlrec: &XlMultiXactTruncate, ) -> Result<()> { self.checkpoint.oldestMulti = xlrec.end_trunc_off; @@ -860,7 +861,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn ingest_relmap_page( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, xlrec: &XlRelmapUpdate, decoded: &DecodedWALRecord, ) -> Result<()> { @@ -876,7 +877,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn put_rel_creation( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, rel: RelTag, ) -> Result<()> { modification.put_rel_creation(rel, 0)?; @@ -885,7 +886,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn put_rel_page_image( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, rel: RelTag, blknum: BlockNumber, img: Bytes, @@ -897,7 +898,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn put_rel_wal_record( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, rel: RelTag, blknum: BlockNumber, rec: ZenithWalRecord, @@ -909,7 +910,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn put_rel_truncation( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, rel: RelTag, nblocks: BlockNumber, ) -> Result<()> { @@ -917,11 +918,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { Ok(()) } - fn put_rel_drop( - &mut self, - modification: &mut DatadirModification, - rel: RelTag, - ) -> Result<()> { + fn put_rel_drop(&mut self, modification: &mut DatadirModification, rel: RelTag) -> Result<()> { modification.put_rel_drop(rel)?; Ok(()) } @@ -937,7 +934,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn handle_rel_extend( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, rel: RelTag, blknum: BlockNumber, ) -> Result<()> { @@ -968,7 +965,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn put_slru_page_image( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, kind: SlruKind, segno: u32, blknum: BlockNumber, @@ -981,7 +978,7 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { fn handle_slru_extend( &mut self, - modification: &mut DatadirModification, + modification: &mut DatadirModification, kind: SlruKind, segno: u32, blknum: BlockNumber, @@ -1032,9 +1029,9 @@ impl<'a, T: DatadirTimeline> WalIngest<'a, T> { #[cfg(test)] mod tests { use super::*; + use crate::layered_repository::Timeline; use crate::pgdatadir_mapping::create_test_timeline; use crate::repository::repo_harness::*; - use crate::repository::Timeline; use postgres_ffi::v14::xlog_utils::SIZEOF_CHECKPOINT; use postgres_ffi::RELSEG_SIZE; @@ -1046,13 +1043,13 @@ mod tests { forknum: 0, }; - fn assert_current_logical_size(_timeline: &T, _lsn: Lsn) { + fn assert_current_logical_size(_timeline: &Timeline, _lsn: Lsn) { // TODO } static ZERO_CHECKPOINT: Bytes = Bytes::from_static(&[0u8; SIZEOF_CHECKPOINT]); - fn init_walingest_test(tline: &T) -> Result> { + fn init_walingest_test(tline: &Timeline) -> Result { let mut m = tline.begin_modification(Lsn(0x10)); m.put_checkpoint(ZERO_CHECKPOINT.clone())?; m.put_relmap_file(0, 111, Bytes::from(""))?; // dummy relmapper file diff --git a/pageserver/src/walreceiver/connection_manager.rs b/pageserver/src/walreceiver/connection_manager.rs index e8e0a7c52b..2fc44cb26a 100644 --- a/pageserver/src/walreceiver/connection_manager.rs +++ b/pageserver/src/walreceiver/connection_manager.rs @@ -16,7 +16,7 @@ use std::{ time::Duration, }; -use crate::{layered_repository::LayeredTimeline, repository::Timeline}; +use crate::layered_repository::Timeline; use anyhow::Context; use chrono::{NaiveDateTime, Utc}; use etcd_broker::{ @@ -39,7 +39,7 @@ pub(super) fn spawn_connection_manager_task( id: ZTenantTimelineId, broker_loop_prefix: String, mut client: Client, - local_timeline: Arc, + local_timeline: Arc, wal_connect_timeout: Duration, lagging_wal_timeout: Duration, max_lsn_wal_lag: NonZeroU64, @@ -242,7 +242,7 @@ const WALCONNECTION_RETRY_BACKOFF_MULTIPLIER: f64 = 1.5; struct WalreceiverState { id: ZTenantTimelineId, /// Use pageserver data about the timeline to filter out some of the safekeepers. - local_timeline: Arc, + local_timeline: Arc, /// The timeout on the connection to safekeeper for WAL streaming. wal_connect_timeout: Duration, /// The timeout to use to determine when the current connection is "stale" and reconnect to the other one. @@ -300,7 +300,7 @@ struct EtcdSkTimeline { impl WalreceiverState { fn new( id: ZTenantTimelineId, - local_timeline: Arc, + local_timeline: Arc, wal_connect_timeout: Duration, lagging_wal_timeout: Duration, max_lsn_wal_lag: NonZeroU64, diff --git a/pageserver/src/walreceiver/walreceiver_connection.rs b/pageserver/src/walreceiver/walreceiver_connection.rs index 025bfeb506..283cc76e66 100644 --- a/pageserver/src/walreceiver/walreceiver_connection.rs +++ b/pageserver/src/walreceiver/walreceiver_connection.rs @@ -20,11 +20,7 @@ use tracing::{debug, error, info, info_span, trace, warn, Instrument}; use super::TaskEvent; use crate::{ - layered_repository::WalReceiverInfo, - pgdatadir_mapping::DatadirTimeline, - repository::{Repository, Timeline}, - tenant_mgr, - walingest::WalIngest, + layered_repository::WalReceiverInfo, repository::Repository, tenant_mgr, walingest::WalIngest, walrecord::DecodedWALRecord, }; use postgres_ffi::v14::waldecoder::WalStreamDecoder; From c19b4a65f96062e1aeb521e17fde204b27ca2158 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Thu, 18 Aug 2022 16:20:52 +0300 Subject: [PATCH 08/63] Remove Repository trait, rename LayeredRepository struct into Repository --- pageserver/src/http/routes.rs | 1 - pageserver/src/layered_repository.rs | 69 +++++++++------ pageserver/src/page_service.rs | 1 - pageserver/src/pgdatadir_mapping.rs | 4 +- pageserver/src/repository.rs | 85 ++----------------- pageserver/src/tenant_mgr.rs | 17 ++-- pageserver/src/tenant_tasks.rs | 1 - pageserver/src/timelines.rs | 15 ++-- .../src/walreceiver/connection_manager.rs | 5 +- .../src/walreceiver/walreceiver_connection.rs | 2 +- 10 files changed, 71 insertions(+), 129 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 8d300e554a..da21f6883a 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -12,7 +12,6 @@ use super::models::{ TimelineCreateRequest, }; use crate::layered_repository::{metadata::TimelineMetadata, Timeline}; -use crate::repository::Repository; use crate::repository::{LocalTimelineState, RepositoryTimeline}; use crate::storage_sync; use crate::storage_sync::index::{RemoteIndex, RemoteTimeline}; diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index c0f4aece54..a5877c8482 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -31,7 +31,7 @@ use crate::config::PageServerConf; use crate::storage_sync::index::RemoteIndex; use crate::tenant_config::{TenantConf, TenantConfOpt}; -use crate::repository::{GcResult, Repository, RepositoryTimeline}; +use crate::repository::{GcResult, RepositoryTimeline}; use crate::thread_mgr; use crate::walredo::WalRedoManager; use crate::CheckpointConfig; @@ -78,7 +78,7 @@ pub const TIMELINES_SEGMENT_NAME: &str = "timelines"; /// /// Repository consists of multiple timelines. Keep them in a hash table. /// -pub struct LayeredRepository { +pub struct Repository { // Global pageserver config parameters pub conf: &'static PageServerConf, @@ -119,15 +119,19 @@ pub struct LayeredRepository { upload_layers: bool, } -/// Public interface -impl Repository for LayeredRepository { - fn get_timeline(&self, timelineid: ZTimelineId) -> Option> { +/// A repository corresponds to one .neon directory. One repository holds multiple +/// timelines, forked off from the same initial call to 'initdb'. +impl Repository { + /// Get Timeline handle for given zenith timeline ID. + /// This function is idempotent. It doesn't change internal state in any way. + pub fn get_timeline(&self, timelineid: ZTimelineId) -> Option> { let timelines = self.timelines.lock().unwrap(); self.get_timeline_internal(timelineid, &timelines) .map(RepositoryTimeline::from) } - fn get_timeline_load(&self, timelineid: ZTimelineId) -> Result> { + /// Get Timeline handle for locally available timeline. Load it into memory if it is not loaded. + pub fn get_timeline_load(&self, timelineid: ZTimelineId) -> Result> { let mut timelines = self.timelines.lock().unwrap(); match self.get_timeline_load_internal(timelineid, &mut timelines)? { Some(local_loaded_timeline) => Ok(local_loaded_timeline), @@ -138,7 +142,9 @@ impl Repository for LayeredRepository { } } - fn list_timelines(&self) -> Vec<(ZTimelineId, RepositoryTimeline)> { + /// Lists timelines the repository contains. + /// Up to repository's implementation to omit certain timelines that ar not considered ready for use. + pub fn list_timelines(&self) -> Vec<(ZTimelineId, RepositoryTimeline)> { self.timelines .lock() .unwrap() @@ -152,7 +158,9 @@ impl Repository for LayeredRepository { .collect() } - fn create_empty_timeline( + /// Create a new, empty timeline. The caller is responsible for loading data into it + /// Initdb lsn is provided for timeline impl to be able to perform checks for some operations against it. + pub fn create_empty_timeline( &self, timeline_id: ZTimelineId, initdb_lsn: Lsn, @@ -194,7 +202,7 @@ impl Repository for LayeredRepository { } /// Branch a timeline - fn branch_timeline( + pub fn branch_timeline( &self, src: ZTimelineId, dst: ZTimelineId, @@ -284,10 +292,16 @@ impl Repository for LayeredRepository { Ok(()) } - /// Public entry point to GC. All the logic is in the private - /// gc_iteration_internal function, this public facade just wraps it for - /// metrics collection. - fn gc_iteration( + /// perform one garbage collection iteration, removing old data files from disk. + /// this function is periodically called by gc thread. + /// also it can be explicitly requested through page server api 'do_gc' command. + /// + /// 'timelineid' specifies the timeline to GC, or None for all. + /// `horizon` specifies delta from last lsn to preserve all object versions (pitr interval). + /// `checkpoint_before_gc` parameter is used to force compaction of storage before GC + /// to make tests more deterministic. + /// TODO Do we still need it or we can call checkpoint explicitly in tests where needed? + pub fn gc_iteration( &self, target_timeline_id: Option, horizon: u64, @@ -305,7 +319,11 @@ impl Repository for LayeredRepository { }) } - fn compaction_iteration(&self) -> Result<()> { + /// Perform one compaction iteration. + /// This function is periodically called by compactor thread. + /// Also it can be explicitly requested per timeline through page server + /// api's 'compact' command. + pub fn compaction_iteration(&self) -> Result<()> { // Scan through the hashmap and collect a list of all the timelines, // while holding the lock. Then drop the lock and actually perform the // compactions. We don't want to block everything else while the @@ -333,12 +351,11 @@ impl Repository for LayeredRepository { Ok(()) } - /// /// Flush all in-memory data to disk. /// - /// Used at shutdown. + /// Used at graceful shutdown. /// - fn checkpoint(&self) -> Result<()> { + pub fn checkpoint(&self) -> Result<()> { // Scan through the hashmap and collect a list of all the timelines, // while holding the lock. Then drop the lock and actually perform the // checkpoints. We don't want to block everything else while the @@ -368,7 +385,8 @@ impl Repository for LayeredRepository { Ok(()) } - fn delete_timeline(&self, timeline_id: ZTimelineId) -> anyhow::Result<()> { + /// Removes timeline-related in-memory data + pub fn delete_timeline(&self, timeline_id: ZTimelineId) -> anyhow::Result<()> { // in order to be retriable detach needs to be idempotent // (or at least to a point that each time the detach is called it can make progress) let mut timelines = self.timelines.lock().unwrap(); @@ -405,7 +423,9 @@ impl Repository for LayeredRepository { Ok(()) } - fn attach_timeline(&self, timeline_id: ZTimelineId) -> Result<()> { + /// Updates timeline based on the `TimelineSyncStatusUpdate`, received from the remote storage synchronization. + /// See [`crate::remote_storage`] for more details about the synchronization. + pub fn attach_timeline(&self, timeline_id: ZTimelineId) -> Result<()> { debug!("attach timeline_id: {}", timeline_id,); match self.timelines.lock().unwrap().entry(timeline_id) { Entry::Occupied(_) => bail!("We completed a download for a timeline that already exists in repository. This is a bug."), @@ -419,13 +439,14 @@ impl Repository for LayeredRepository { Ok(()) } - fn get_remote_index(&self) -> &RemoteIndex { + /// Allows to retrieve remote timeline index from the tenant. Used in walreceiver to grab remote consistent lsn. + pub fn get_remote_index(&self) -> &RemoteIndex { &self.remote_index } } /// Private functions -impl LayeredRepository { +impl Repository { pub fn get_checkpoint_distance(&self) -> u64 { let tenant_conf = self.tenant_conf.read().unwrap(); tenant_conf @@ -515,7 +536,7 @@ impl LayeredRepository { tenant_conf.update(&new_tenant_conf); - LayeredRepository::persist_tenant_config(self.conf, self.tenant_id, *tenant_conf)?; + Repository::persist_tenant_config(self.conf, self.tenant_id, *tenant_conf)?; Ok(()) } @@ -613,8 +634,8 @@ impl LayeredRepository { tenant_id: ZTenantId, remote_index: RemoteIndex, upload_layers: bool, - ) -> LayeredRepository { - LayeredRepository { + ) -> Repository { + Repository { tenant_id, file_lock: RwLock::new(()), conf, diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index f5f1e4d7bd..e6114c0fc5 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -34,7 +34,6 @@ use crate::layered_repository::Timeline; use crate::pgdatadir_mapping::LsnForTimestamp; use crate::profiling::profpoint_start; use crate::reltag::RelTag; -use crate::repository::Repository; use crate::tenant_mgr; use crate::thread_mgr; use crate::thread_mgr::ThreadKind; diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index d10e48393c..beaac292ec 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1390,8 +1390,8 @@ fn is_slru_block_key(key: Key) -> bool { // #[cfg(test)] -pub fn create_test_timeline( - repo: R, +pub fn create_test_timeline( + repo: crate::layered_repository::Repository, timeline_id: utils::zid::ZTimelineId, ) -> Result> { let tline = repo.create_empty_timeline(timeline_id, Lsn(8))?; diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs index 5cdc27a846..d0e1ed24b6 100644 --- a/pageserver/src/repository.rs +++ b/pageserver/src/repository.rs @@ -1,6 +1,4 @@ use crate::layered_repository::metadata::TimelineMetadata; -use crate::layered_repository::Timeline; -use crate::storage_sync::index::RemoteIndex; use crate::walrecord::ZenithWalRecord; use anyhow::{bail, Result}; use byteorder::{ByteOrder, BE}; @@ -10,7 +8,7 @@ use std::fmt; use std::ops::{AddAssign, Range}; use std::sync::Arc; use std::time::Duration; -use utils::{lsn::Lsn, zid::ZTimelineId}; +use utils::lsn::Lsn; #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)] /// Key used in the Repository kv-store. @@ -178,76 +176,6 @@ impl Value { } } -/// -/// A repository corresponds to one .neon directory. One repository holds multiple -/// timelines, forked off from the same initial call to 'initdb'. -pub trait Repository: Send + Sync { - /// Updates timeline based on the `TimelineSyncStatusUpdate`, received from the remote storage synchronization. - /// See [`crate::remote_storage`] for more details about the synchronization. - fn attach_timeline(&self, timeline_id: ZTimelineId) -> Result<()>; - - /// Get Timeline handle for given zenith timeline ID. - /// This function is idempotent. It doesn't change internal state in any way. - fn get_timeline(&self, timelineid: ZTimelineId) -> Option>; - - /// Get Timeline handle for locally available timeline. Load it into memory if it is not loaded. - fn get_timeline_load(&self, timelineid: ZTimelineId) -> Result>; - - /// Lists timelines the repository contains. - /// Up to repository's implementation to omit certain timelines that ar not considered ready for use. - fn list_timelines(&self) -> Vec<(ZTimelineId, RepositoryTimeline)>; - - /// Create a new, empty timeline. The caller is responsible for loading data into it - /// Initdb lsn is provided for timeline impl to be able to perform checks for some operations against it. - fn create_empty_timeline( - &self, - timeline_id: ZTimelineId, - initdb_lsn: Lsn, - ) -> Result>; - - /// Branch a timeline - fn branch_timeline( - &self, - src: ZTimelineId, - dst: ZTimelineId, - start_lsn: Option, - ) -> Result<()>; - - /// Flush all data to disk. - /// - /// this is used at graceful shutdown. - fn checkpoint(&self) -> Result<()>; - - /// perform one garbage collection iteration, removing old data files from disk. - /// this function is periodically called by gc thread. - /// also it can be explicitly requested through page server api 'do_gc' command. - /// - /// 'timelineid' specifies the timeline to GC, or None for all. - /// `horizon` specifies delta from last lsn to preserve all object versions (pitr interval). - /// `checkpoint_before_gc` parameter is used to force compaction of storage before GC - /// to make tests more deterministic. - /// TODO Do we still need it or we can call checkpoint explicitly in tests where needed? - fn gc_iteration( - &self, - timelineid: Option, - horizon: u64, - pitr: Duration, - checkpoint_before_gc: bool, - ) -> Result; - - /// Perform one compaction iteration. - /// This function is periodically called by compactor thread. - /// Also it can be explicitly requested per timeline through page server - /// api's 'compact' command. - fn compaction_iteration(&self) -> Result<()>; - - /// removes timeline-related in-memory data - fn delete_timeline(&self, timeline_id: ZTimelineId) -> anyhow::Result<()>; - - /// Allows to retrieve remote timeline index from the repo. Used in walreceiver to grab remote consistent lsn. - fn get_remote_index(&self) -> &RemoteIndex; -} - /// A timeline, that belongs to the current repository. pub enum RepositoryTimeline { /// Timeline, with its files present locally in pageserver's working directory. @@ -332,16 +260,17 @@ pub mod repo_harness { use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}; use std::{fs, path::PathBuf}; + use crate::storage_sync::index::RemoteIndex; use crate::{ config::PageServerConf, - layered_repository::LayeredRepository, + layered_repository::Repository, walredo::{WalRedoError, WalRedoManager}, }; use super::*; use crate::tenant_config::{TenantConf, TenantConfOpt}; use hex_literal::hex; - use utils::zid::ZTenantId; + use utils::zid::{ZTenantId, ZTimelineId}; pub const TIMELINE_ID: ZTimelineId = ZTimelineId::from_array(hex!("11223344556677881122334455667788")); @@ -427,14 +356,14 @@ pub mod repo_harness { }) } - pub fn load(&self) -> LayeredRepository { + pub fn load(&self) -> Repository { self.try_load().expect("failed to load test repo") } - pub fn try_load(&self) -> Result { + pub fn try_load(&self) -> Result { let walredo_mgr = Arc::new(TestRedoManager); - let repo = LayeredRepository::new( + let repo = Repository::new( self.conf, TenantConfOpt::from(self.tenant_conf), walredo_mgr, diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index 36c3e569a6..5afa38c926 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -3,8 +3,7 @@ use crate::config::PageServerConf; use crate::http::models::TenantInfo; -use crate::layered_repository::{load_metadata, LayeredRepository, Timeline}; -use crate::repository::Repository; +use crate::layered_repository::{load_metadata, Repository, Timeline}; use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex}; use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData}; use crate::tenant_config::TenantConfOpt; @@ -94,7 +93,7 @@ mod tenants_state { struct Tenant { state: TenantState, /// Contains in-memory state, including the timeline that might not yet flushed on disk or loaded form disk. - repo: Arc, + repo: Arc, /// Timelines, located locally in the pageserver's datadir. /// Timelines can entirely be removed entirely by the `detach` operation only. /// @@ -365,7 +364,7 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow: Ok(()) } -pub fn get_repository_for_tenant(tenant_id: ZTenantId) -> anyhow::Result> { +pub fn get_repository_for_tenant(tenant_id: ZTenantId) -> anyhow::Result> { let m = tenants_state::read_tenants(); let tenant = m .get(&tenant_id) @@ -484,7 +483,7 @@ pub fn detach_tenant(conf: &'static PageServerConf, tenant_id: ZTenantId) -> any } fn load_local_timeline( - repo: &LayeredRepository, + repo: &Repository, timeline_id: ZTimelineId, ) -> anyhow::Result> { let inmem_timeline = repo.get_timeline_load(timeline_id).with_context(|| { @@ -588,7 +587,7 @@ fn init_local_repository( } fn attach_downloaded_tenant( - repo: &LayeredRepository, + repo: &Repository, downloaded_timelines: HashSet, ) -> anyhow::Result<()> { let mut registration_queue = Vec::with_capacity(downloaded_timelines.len()); @@ -630,14 +629,14 @@ fn load_local_repo( conf: &'static PageServerConf, tenant_id: ZTenantId, remote_index: &RemoteIndex, -) -> anyhow::Result> { +) -> anyhow::Result> { let mut m = tenants_state::write_tenants(); let tenant = m.entry(tenant_id).or_insert_with(|| { // Set up a WAL redo manager, for applying WAL records. let walredo_mgr = PostgresRedoManager::new(conf, tenant_id); // Set up an object repository, for actual data storage. - let repo: Arc = Arc::new(LayeredRepository::new( + let repo: Arc = Arc::new(Repository::new( conf, TenantConfOpt::default(), Arc::new(walredo_mgr), @@ -653,7 +652,7 @@ fn load_local_repo( }); // Restore tenant config - let tenant_conf = LayeredRepository::load_tenant_config(conf, tenant_id)?; + let tenant_conf = Repository::load_tenant_config(conf, tenant_id)?; tenant.repo.update_tenant_config(tenant_conf)?; Ok(Arc::clone(&tenant.repo)) diff --git a/pageserver/src/tenant_tasks.rs b/pageserver/src/tenant_tasks.rs index e51744d3cc..ca239ae254 100644 --- a/pageserver/src/tenant_tasks.rs +++ b/pageserver/src/tenant_tasks.rs @@ -5,7 +5,6 @@ use std::collections::HashMap; use std::ops::ControlFlow; use std::time::Duration; -use crate::repository::Repository; use crate::tenant_mgr::TenantState; use crate::thread_mgr::ThreadKind; use crate::{tenant_mgr, thread_mgr}; diff --git a/pageserver/src/timelines.rs b/pageserver/src/timelines.rs index 6a55dd286e..4f760751db 100644 --- a/pageserver/src/timelines.rs +++ b/pageserver/src/timelines.rs @@ -22,11 +22,10 @@ use crate::import_datadir; use crate::tenant_mgr; use crate::CheckpointConfig; use crate::{ - config::PageServerConf, repository::Repository, storage_sync::index::RemoteIndex, - tenant_config::TenantConfOpt, + config::PageServerConf, storage_sync::index::RemoteIndex, tenant_config::TenantConfOpt, }; use crate::{ - layered_repository::{LayeredRepository, Timeline}, + layered_repository::{Repository, Timeline}, walredo::WalRedoManager, }; @@ -42,7 +41,7 @@ pub fn create_repo( tenant_id: ZTenantId, wal_redo_manager: Arc, remote_index: RemoteIndex, -) -> Result> { +) -> Result> { let repo_dir = conf.tenant_path(&tenant_id); ensure!( !repo_dir.exists(), @@ -57,9 +56,9 @@ pub fn create_repo( info!("created directory structure in {}", repo_dir.display()); // Save tenant's config - LayeredRepository::persist_tenant_config(conf, tenant_id, tenant_conf)?; + Repository::persist_tenant_config(conf, tenant_id, tenant_conf)?; - Ok(Arc::new(LayeredRepository::new( + Ok(Arc::new(Repository::new( conf, tenant_conf, wal_redo_manager, @@ -104,11 +103,11 @@ fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> { // - run initdb to init temporary instance and get bootstrap data // - after initialization complete, remove the temp dir. // -fn bootstrap_timeline( +fn bootstrap_timeline( conf: &'static PageServerConf, tenantid: ZTenantId, tli: ZTimelineId, - repo: &R, + repo: &Repository, ) -> Result<()> { let initdb_path = conf .tenant_path(&tenantid) diff --git a/pageserver/src/walreceiver/connection_manager.rs b/pageserver/src/walreceiver/connection_manager.rs index 2fc44cb26a..912073a731 100644 --- a/pageserver/src/walreceiver/connection_manager.rs +++ b/pageserver/src/walreceiver/connection_manager.rs @@ -735,10 +735,7 @@ fn wal_stream_connection_string( #[cfg(test)] mod tests { - use crate::repository::{ - repo_harness::{RepoHarness, TIMELINE_ID}, - Repository, - }; + use crate::repository::repo_harness::{RepoHarness, TIMELINE_ID}; use super::*; diff --git a/pageserver/src/walreceiver/walreceiver_connection.rs b/pageserver/src/walreceiver/walreceiver_connection.rs index 283cc76e66..b5f266614e 100644 --- a/pageserver/src/walreceiver/walreceiver_connection.rs +++ b/pageserver/src/walreceiver/walreceiver_connection.rs @@ -20,7 +20,7 @@ use tracing::{debug, error, info, info_span, trace, warn, Instrument}; use super::TaskEvent; use crate::{ - layered_repository::WalReceiverInfo, repository::Repository, tenant_mgr, walingest::WalIngest, + layered_repository::WalReceiverInfo, tenant_mgr, walingest::WalIngest, walrecord::DecodedWALRecord, }; use postgres_ffi::v14::waldecoder::WalStreamDecoder; From c634cb1d36b503cf03a0891c8be7b9e844525b76 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Thu, 18 Aug 2022 16:27:49 +0300 Subject: [PATCH 09/63] Remove TimelineWriter trait, rename LayeredTimelineWriter struct into TimelineWriter --- pageserver/src/layered_repository/timeline.rs | 38 ++++++++++++------- pageserver/src/repository.rs | 28 +------------- 2 files changed, 27 insertions(+), 39 deletions(-) diff --git a/pageserver/src/layered_repository/timeline.rs b/pageserver/src/layered_repository/timeline.rs index da3a6981da..1b77f1fab4 100644 --- a/pageserver/src/layered_repository/timeline.rs +++ b/pageserver/src/layered_repository/timeline.rs @@ -51,7 +51,7 @@ use utils::{ zid::{ZTenantId, ZTimelineId}, }; -use crate::repository::{GcResult, RepositoryTimeline, TimelineWriter}; +use crate::repository::{GcResult, RepositoryTimeline}; use crate::repository::{Key, Value}; use crate::thread_mgr; use crate::virtual_file::VirtualFile; @@ -597,12 +597,11 @@ impl Timeline { /// FIXME: This ought to return &'a TimelineWriter, where TimelineWriter /// is a generic type in this trait. But that doesn't currently work in /// Rust: https://rust-lang.github.io/rfcs/1598-generic_associated_types.html - /// TODO kb replace with the concrete type - pub fn writer<'a>(&'a self) -> Box { - Box::new(LayeredTimelineWriter { + pub fn writer(&self) -> TimelineWriter<'_> { + TimelineWriter { tl: self, _write_guard: self.write_lock.lock().unwrap(), - }) + } } } @@ -2204,12 +2203,16 @@ fn layer_traversal_error( Err(msg_iter.fold(err, |err, msg| err.context(msg))) } -struct LayeredTimelineWriter<'a> { +/// Various functions to mutate the timeline. +// TODO Currently, Deref is used to allow easy access to read methods from this trait. +// This is probably considered a bad practice in Rust and should be fixed eventually, +// but will cause large code changes. +pub struct TimelineWriter<'a> { tl: &'a Timeline, _write_guard: MutexGuard<'a, ()>, } -impl Deref for LayeredTimelineWriter<'_> { +impl Deref for TimelineWriter<'_> { type Target = Timeline; fn deref(&self) -> &Self::Target { @@ -2217,23 +2220,32 @@ impl Deref for LayeredTimelineWriter<'_> { } } -impl<'a> TimelineWriter<'_> for LayeredTimelineWriter<'a> { - fn put(&self, key: Key, lsn: Lsn, value: &Value) -> Result<()> { +impl<'a> TimelineWriter<'a> { + /// Put a new page version that can be constructed from a WAL record + /// + /// This will implicitly extend the relation, if the page is beyond the + /// current end-of-file. + pub fn put(&self, key: Key, lsn: Lsn, value: &Value) -> Result<()> { self.tl.put_value(key, lsn, value) } - fn delete(&self, key_range: Range, lsn: Lsn) -> Result<()> { + pub fn delete(&self, key_range: Range, lsn: Lsn) -> Result<()> { self.tl.put_tombstone(key_range, lsn) } - /// + /// Track the end of the latest digested WAL record. /// Remember the (end of) last valid WAL record remembered in the timeline. /// - fn finish_write(&self, new_lsn: Lsn) { + /// Call this after you have finished writing all the WAL up to 'lsn'. + /// + /// 'lsn' must be aligned. This wakes up any wait_lsn() callers waiting for + /// the 'lsn' or anything older. The previous last record LSN is stored alongside + /// the latest and can be read. + pub fn finish_write(&self, new_lsn: Lsn) { self.tl.finish_write(new_lsn); } - fn update_current_logical_size(&self, delta: isize) { + pub fn update_current_logical_size(&self, delta: isize) { self.tl .current_logical_size .fetch_add(delta, AtomicOrdering::SeqCst); diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs index d0e1ed24b6..dc031c03ee 100644 --- a/pageserver/src/repository.rs +++ b/pageserver/src/repository.rs @@ -8,7 +8,6 @@ use std::fmt; use std::ops::{AddAssign, Range}; use std::sync::Arc; use std::time::Duration; -use utils::lsn::Lsn; #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)] /// Key used in the Repository kv-store. @@ -228,37 +227,13 @@ impl AddAssign for GcResult { } } -/// Various functions to mutate the timeline. -// TODO Currently, Deref is used to allow easy access to read methods from this trait. -// This is probably considered a bad practice in Rust and should be fixed eventually, -// but will cause large code changes. -pub trait TimelineWriter<'a> { - /// Put a new page version that can be constructed from a WAL record - /// - /// This will implicitly extend the relation, if the page is beyond the - /// current end-of-file. - fn put(&self, key: Key, lsn: Lsn, value: &Value) -> Result<()>; - - fn delete(&self, key_range: Range, lsn: Lsn) -> Result<()>; - - /// Track the end of the latest digested WAL record. - /// - /// Call this after you have finished writing all the WAL up to 'lsn'. - /// - /// 'lsn' must be aligned. This wakes up any wait_lsn() callers waiting for - /// the 'lsn' or anything older. The previous last record LSN is stored alongside - /// the latest and can be read. - fn finish_write(&self, lsn: Lsn); - - fn update_current_logical_size(&self, delta: isize); -} - #[cfg(test)] pub mod repo_harness { use bytes::BytesMut; use once_cell::sync::Lazy; use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}; use std::{fs, path::PathBuf}; + use utils::lsn::Lsn; use crate::storage_sync::index::RemoteIndex; use crate::{ @@ -440,6 +415,7 @@ mod tests { use bytes::BytesMut; use hex_literal::hex; use once_cell::sync::Lazy; + use utils::lsn::Lsn; static TEST_KEY: Lazy = Lazy::new(|| Key::from_slice(&hex!("112222222233333333444444445500000001"))); From 187a7604099f5a003b6d8d2b85955ee24bb1bd1b Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Thu, 18 Aug 2022 16:51:56 +0300 Subject: [PATCH 10/63] Reset codestyle cargo cache --- .github/workflows/build_and_test.yml | 6 +++--- .github/workflows/codestyle.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3a2e8bad64..4cabd3d672 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -121,8 +121,8 @@ jobs: target/ # Fall back to older versions of the key, if no cache for current Cargo.lock was found key: | - v6-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }} - v6-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}- + v7-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }} + v7-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}- - name: Cache postgres build id: cache_pg @@ -325,7 +325,7 @@ jobs: !~/.cargo/registry/src ~/.cargo/git/ target/ - key: v5-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }} + key: v7-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }} - name: Get Neon artifact uses: ./.github/actions/download diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index 6f13a38dea..d0685f8fd2 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -65,7 +65,7 @@ jobs: - name: Cache postgres build id: cache_pg - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: | tmp_install/ @@ -94,14 +94,14 @@ jobs: - name: Cache cargo deps id: cache_cargo - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: | ~/.cargo/registry !~/.cargo/registry/src ~/.cargo/git target - key: v2-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }} + key: v3-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }} - name: Run cargo clippy run: ./run_clippy.sh From aaa60c92ca18a1d4504a56a1b283a560afdb3af9 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 19 Aug 2022 16:24:47 +0300 Subject: [PATCH 11/63] Use u64/i64 for logical size, comment on why to use signed i64. usize/isize type corresponds to the CPU architecture's pointer width, i.e. 64 bits on a 64-bit platform and 32 bits on a 32-bit platform. The logical size of a database has nothing to do with the that, so u64/i64 is more appropriate. It doesn't make any difference in practice as long as you're on a 64-bit platform, and it's hard to imagine anyone wanting to run the pageserver on a 32-bit platform, but let's be tidy. Also add a comment on why we use signed i64 for the logical size variable, even though size should never be negative. I'm not sure the reasons are very good, but at least this documents them, and hints at some possible better solutions. --- pageserver/src/http/models.rs | 6 ++-- pageserver/src/layered_repository/timeline.rs | 33 ++++++++++++++----- pageserver/src/pgdatadir_mapping.rs | 24 +++++++------- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/pageserver/src/http/models.rs b/pageserver/src/http/models.rs index a4f270580f..232c202ed9 100644 --- a/pageserver/src/http/models.rs +++ b/pageserver/src/http/models.rs @@ -129,9 +129,9 @@ pub struct LocalTimelineInfo { pub latest_gc_cutoff_lsn: Lsn, #[serde_as(as = "DisplayFromStr")] pub disk_consistent_lsn: Lsn, - pub current_logical_size: Option, // is None when timeline is Unloaded - pub current_physical_size: Option, // is None when timeline is Unloaded - pub current_logical_size_non_incremental: Option, + pub current_logical_size: Option, // is None when timeline is Unloaded + pub current_physical_size: Option, // is None when timeline is Unloaded + pub current_logical_size_non_incremental: Option, pub current_physical_size_non_incremental: Option, pub timeline_state: LocalTimelineState, diff --git a/pageserver/src/layered_repository/timeline.rs b/pageserver/src/layered_repository/timeline.rs index 1b77f1fab4..8f3004af98 100644 --- a/pageserver/src/layered_repository/timeline.rs +++ b/pageserver/src/layered_repository/timeline.rs @@ -15,7 +15,7 @@ use std::fs::{File, OpenOptions}; use std::io::Write; use std::ops::{Deref, Range}; use std::path::PathBuf; -use std::sync::atomic::{self, AtomicBool, AtomicIsize, Ordering as AtomicOrdering}; +use std::sync::atomic::{self, AtomicBool, AtomicI64, Ordering as AtomicOrdering}; use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockReadGuard, TryLockError}; use std::time::{Duration, Instant, SystemTime}; @@ -376,7 +376,22 @@ pub struct Timeline { repartition_threshold: u64, /// Current logical size of the "datadir", at the last LSN. - current_logical_size: AtomicIsize, + /// + /// Size shouldn't ever be negative, but this is signed for two reasons: + /// + /// 1. If we initialized the "baseline" size lazily, while we already + /// process incoming WAL, the incoming WAL records could decrement the + /// variable and temporarily make it negative. (This is just future-proofing; + /// the initialization is currently not done lazily.) + /// + /// 2. If there is a bug and we e.g. forget to increment it in some cases + /// when size grows, but remember to decrement it when it shrinks again, the + /// variable could go negative. In that case, it seems better to at least + /// try to keep tracking it, rather than clamp or overflow it. Note that + /// get_current_logical_size() will clamp the returned value to zero if it's + /// negative, and log an error. Could set it permanently to zero or some + /// special value to indicate "broken" instead, but this will do for now. + current_logical_size: AtomicI64, /// Information about the last processed message by the WAL receiver, /// or None if WAL receiver has not received anything for this timeline @@ -695,7 +710,7 @@ impl Timeline { latest_gc_cutoff_lsn: RwLock::new(metadata.latest_gc_cutoff_lsn()), initdb_lsn: metadata.initdb_lsn(), - current_logical_size: AtomicIsize::new(0), + current_logical_size: AtomicI64::new(0), partitioning: Mutex::new((KeyPartitioning::new(), Lsn(0))), repartition_threshold: 0, @@ -813,7 +828,7 @@ impl Timeline { // Logical size 0 means that it was not initialized, so don't believe that. if ancestor_logical_size != 0 && ancestor.get_last_record_lsn() == self.ancestor_lsn { self.current_logical_size - .store(ancestor_logical_size as isize, AtomicOrdering::SeqCst); + .store(ancestor_logical_size as i64, AtomicOrdering::SeqCst); debug!( "logical size copied from ancestor: {}", ancestor_logical_size @@ -828,7 +843,7 @@ impl Timeline { let last_lsn = self.get_last_record_lsn(); let logical_size = self.get_current_logical_size_non_incremental(last_lsn)?; self.current_logical_size - .store(logical_size as isize, AtomicOrdering::SeqCst); + .store(logical_size as i64, AtomicOrdering::SeqCst); debug!("calculated logical size the hard way: {}", logical_size); timer.stop_and_record(); @@ -837,10 +852,10 @@ impl Timeline { /// Retrieve current logical size of the timeline /// - /// NOTE: counted incrementally, includes ancestors, - pub fn get_current_logical_size(&self) -> usize { + /// NOTE: counted incrementally, includes ancestors. + pub fn get_current_logical_size(&self) -> u64 { let current_logical_size = self.current_logical_size.load(AtomicOrdering::Acquire); - match usize::try_from(current_logical_size) { + match u64::try_from(current_logical_size) { Ok(sz) => sz, Err(_) => { error!( @@ -2245,7 +2260,7 @@ impl<'a> TimelineWriter<'a> { self.tl.finish_write(new_lsn); } - pub fn update_current_logical_size(&self, delta: isize) { + pub fn update_current_logical_size(&self, delta: i64) { self.tl .current_logical_size .fetch_add(delta, AtomicOrdering::SeqCst); diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index beaac292ec..0ace850a82 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -364,22 +364,22 @@ impl Timeline { /// /// Only relation blocks are counted currently. That excludes metadata, /// SLRUs, twophase files etc. - pub fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result { + pub fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result { // Fetch list of database dirs and iterate them let buf = self.get(DBDIR_KEY, lsn)?; let dbdir = DbDirectory::des(&buf)?; - let mut total_size: usize = 0; + let mut total_size: u64 = 0; for (spcnode, dbnode) in dbdir.dbdirs.keys() { for rel in self.list_rels(*spcnode, *dbnode, lsn)? { let relsize_key = rel_size_to_key(rel); let mut buf = self.get(relsize_key, lsn)?; let relsize = buf.get_u32_le(); - total_size += relsize as usize; + total_size += relsize as u64; } } - Ok(total_size * BLCKSZ as usize) + Ok(total_size * BLCKSZ as u64) } /// @@ -517,7 +517,7 @@ pub struct DatadirModification<'a> { // underlying key-value store by the 'finish' function. pending_updates: HashMap, pending_deletions: Vec>, - pending_nblocks: isize, + pending_nblocks: i64, } impl<'a> DatadirModification<'a> { @@ -676,7 +676,7 @@ impl<'a> DatadirModification<'a> { } // Update logical database size. - self.pending_nblocks -= total_blocks as isize; + self.pending_nblocks -= total_blocks as i64; // Delete all relations and metadata files for the spcnode/dnode self.delete(dbdir_key_range(spcnode, dbnode)); @@ -719,7 +719,7 @@ impl<'a> DatadirModification<'a> { let buf = nblocks.to_le_bytes(); self.put(size_key, Value::Image(Bytes::from(buf.to_vec()))); - self.pending_nblocks += nblocks as isize; + self.pending_nblocks += nblocks as i64; // Update relation size cache self.tline.set_cached_rel_size(rel, self.lsn, nblocks); @@ -749,7 +749,7 @@ impl<'a> DatadirModification<'a> { self.tline.set_cached_rel_size(rel, self.lsn, nblocks); // Update logical database size. - self.pending_nblocks -= old_size as isize - nblocks as isize; + self.pending_nblocks -= old_size as i64 - nblocks as i64; } Ok(()) } @@ -771,7 +771,7 @@ impl<'a> DatadirModification<'a> { // Update relation size cache self.tline.set_cached_rel_size(rel, self.lsn, nblocks); - self.pending_nblocks += nblocks as isize - old_size as isize; + self.pending_nblocks += nblocks as i64 - old_size as i64; } Ok(()) } @@ -794,7 +794,7 @@ impl<'a> DatadirModification<'a> { // update logical size let size_key = rel_size_to_key(rel); let old_size = self.get(size_key)?.get_u32_le(); - self.pending_nblocks -= old_size as isize; + self.pending_nblocks -= old_size as i64; // Remove enty from relation size cache self.tline.remove_cached_rel_size(&rel); @@ -936,7 +936,7 @@ impl<'a> DatadirModification<'a> { result?; if pending_nblocks != 0 { - writer.update_current_logical_size(pending_nblocks * BLCKSZ as isize); + writer.update_current_logical_size(pending_nblocks * BLCKSZ as i64); self.pending_nblocks = 0; } @@ -964,7 +964,7 @@ impl<'a> DatadirModification<'a> { writer.finish_write(lsn); if pending_nblocks != 0 { - writer.update_current_logical_size(pending_nblocks * BLCKSZ as isize); + writer.update_current_logical_size(pending_nblocks * BLCKSZ as i64); } Ok(()) From 8ac5a285a14d501123b99b5f1dc78d3e3852243a Mon Sep 17 00:00:00 2001 From: MMeent Date: Fri, 19 Aug 2022 20:02:36 +0200 Subject: [PATCH 12/63] Update vendor/postgres to one that is rebased onto REL_14_5 (#2312) This was previously based on REL_14_4 Protected tag of main before rebase is at main-before-rebase-REL_14_5 --- vendor/postgres | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/postgres b/vendor/postgres index 3f315a1ec3..a479855158 160000 --- a/vendor/postgres +++ b/vendor/postgres @@ -1 +1 @@ -Subproject commit 3f315a1ec336b3a22a09d2015ce91697def4904e +Subproject commit a4798551587fb5a52740687a341af83b28733dc6 From daba4c7405b132eb22d753bc727353cf740c9bfa Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 19 Aug 2022 21:57:00 +0300 Subject: [PATCH 13/63] Add a section in glossary to explain what "logical size" means. (#2306) --- docs/glossary.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/glossary.md b/docs/glossary.md index 665596c68d..25c66828c0 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -92,6 +92,7 @@ The layer map tracks what layers exist in a timeline. ### Layered repository Neon repository implementation that keeps data in layers. + ### LSN The Log Sequence Number (LSN) is a unique identifier of the WAL record[] in the WAL log. @@ -125,6 +126,26 @@ TODO: use this name consistently in remote storage code. Now `disk_consistent_ls * `ancestor_lsn` - LSN of the branch point (the LSN at which this branch was created) TODO: add table that describes mapping between PostgreSQL (compute), safekeeper and pageserver LSNs. + +### Logical size + +The pageserver tracks the "logical size" of a timeline. It is the +total size of all relations in all Postgres databases on the +timeline. It includes all user and system tables, including their FSM +and VM forks. But it does not include SLRUs, twophase files or any +other such data or metadata that lives outside relations. + +The logical size is calculated by the pageserver, and is sent to +PostgreSQL via feedback messages to the safekeepers. PostgreSQL uses +the logical size to enforce the size limit in the free tier. The +logical size is also shown to users in the web console. + +The logical size is not affected by branches or the physical layout of +layer files in the pageserver. If you have a database with 1 GB +logical size and you create a branch of it, both branches will have 1 +GB logical size, even though the branch is copy-on-write and won't +consume any extra physical disk space until you make changes to it. + ### Page (block) The basic structure used to store relation data. All pages are of the same size. From 84cd40b4162fc692e359c646da9ec9d74a19c4d8 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 19 Aug 2022 22:21:15 +0300 Subject: [PATCH 14/63] rustfmt fixes. Not sure why these don't show up as CI failures, but on my laptop, rustfmt insists. --- libs/postgres_ffi/src/nonrelfile_utils.rs | 2 +- libs/postgres_ffi/src/waldecoder.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/postgres_ffi/src/nonrelfile_utils.rs b/libs/postgres_ffi/src/nonrelfile_utils.rs index 04ef346d88..1de1d367e0 100644 --- a/libs/postgres_ffi/src/nonrelfile_utils.rs +++ b/libs/postgres_ffi/src/nonrelfile_utils.rs @@ -1,8 +1,8 @@ //! //! Common utilities for dealing with PostgreSQL non-relation files. //! -use crate::transaction_id_precedes; use super::pg_constants; +use crate::transaction_id_precedes; use bytes::BytesMut; use log::*; diff --git a/libs/postgres_ffi/src/waldecoder.rs b/libs/postgres_ffi/src/waldecoder.rs index 0e1c9567cb..768e79621d 100644 --- a/libs/postgres_ffi/src/waldecoder.rs +++ b/libs/postgres_ffi/src/waldecoder.rs @@ -8,9 +8,9 @@ //! to look deeper into the WAL records to also understand which blocks they modify, the code //! for that is in pageserver/src/walrecord.rs //! +use super::bindings::{XLogLongPageHeaderData, XLogPageHeaderData, XLogRecord, XLOG_PAGE_MAGIC}; use super::pg_constants; use super::xlog_utils::*; -use super::bindings::{XLogLongPageHeaderData, XLogPageHeaderData, XLogRecord, XLOG_PAGE_MAGIC}; use bytes::{Buf, BufMut, Bytes, BytesMut}; use crc32c::*; use log::*; From d48177d0d809e11cd43f3f3a13799f63d98e617a Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 19 Aug 2022 22:21:33 +0300 Subject: [PATCH 15/63] Expose timeline logical size as a prometheus metric. Physical size was already exposed, and it'd be nice to show both logical and physical size side by side in our graphana dashboards. --- pageserver/src/layered_repository/timeline.rs | 58 ++++++++++++++++--- .../batch_others/test_timeline_size.py | 33 +++++++++-- 2 files changed, 79 insertions(+), 12 deletions(-) diff --git a/pageserver/src/layered_repository/timeline.rs b/pageserver/src/layered_repository/timeline.rs index 8f3004af98..7bbde53dbd 100644 --- a/pageserver/src/layered_repository/timeline.rs +++ b/pageserver/src/layered_repository/timeline.rs @@ -139,6 +139,15 @@ static CURRENT_PHYSICAL_SIZE: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static CURRENT_LOGICAL_SIZE: Lazy = Lazy::new(|| { + register_int_gauge_vec!( + "pageserver_current_logical_size", + "Current logical size grouped by timeline", + &["tenant_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + // Metrics for cloud upload. These metrics reflect data uploaded to cloud storage, // or in testing they estimate how much we would upload if we did. static NUM_PERSISTENT_FILES_CREATED: Lazy = Lazy::new(|| { @@ -234,6 +243,8 @@ struct TimelineMetrics { pub last_record_gauge: IntGauge, pub wait_lsn_time_histo: Histogram, pub current_physical_size_gauge: UIntGauge, + /// copy of LayeredTimeline.current_logical_size + pub current_logical_size_gauge: IntGauge, } impl TimelineMetrics { @@ -271,6 +282,9 @@ impl TimelineMetrics { let current_physical_size_gauge = CURRENT_PHYSICAL_SIZE .get_metric_with_label_values(&[&tenant_id, &timeline_id]) .unwrap(); + let current_logical_size_gauge = CURRENT_LOGICAL_SIZE + .get_metric_with_label_values(&[&tenant_id, &timeline_id]) + .unwrap(); TimelineMetrics { reconstruct_time_histo, @@ -283,6 +297,7 @@ impl TimelineMetrics { last_record_gauge, wait_lsn_time_histo, current_physical_size_gauge, + current_logical_size_gauge, } } } @@ -391,6 +406,11 @@ pub struct Timeline { /// get_current_logical_size() will clamp the returned value to zero if it's /// negative, and log an error. Could set it permanently to zero or some /// special value to indicate "broken" instead, but this will do for now. + /// + /// Note that we also expose a copy of this value as a prometheus metric, + /// see `current_logical_size_gauge`. Use the `update_current_logical_size` + /// and `set_current_logical_size` functions to modify this, they will + /// also keep the prometheus metric in sync. current_logical_size: AtomicI64, /// Information about the last processed message by the WAL receiver, @@ -827,8 +847,7 @@ impl Timeline { // // Logical size 0 means that it was not initialized, so don't believe that. if ancestor_logical_size != 0 && ancestor.get_last_record_lsn() == self.ancestor_lsn { - self.current_logical_size - .store(ancestor_logical_size as i64, AtomicOrdering::SeqCst); + self.set_current_logical_size(ancestor_logical_size); debug!( "logical size copied from ancestor: {}", ancestor_logical_size @@ -842,8 +861,7 @@ impl Timeline { // Have to calculate it the hard way let last_lsn = self.get_last_record_lsn(); let logical_size = self.get_current_logical_size_non_incremental(last_lsn)?; - self.current_logical_size - .store(logical_size as i64, AtomicOrdering::SeqCst); + self.set_current_logical_size(logical_size); debug!("calculated logical size the hard way: {}", logical_size); timer.stop_and_record(); @@ -867,6 +885,34 @@ impl Timeline { } } + /// Update current logical size, adding `delta' to the old value. + fn update_current_logical_size(&self, delta: i64) { + let new_size = self + .current_logical_size + .fetch_add(delta, AtomicOrdering::SeqCst); + + // Also set the value in the prometheus gauge. Note that + // there is a race condition here: if this is is called by two + // threads concurrently, the prometheus gauge might be set to + // one value while current_logical_size is set to the + // other. Currently, only initialization and the WAL receiver + // updates the logical size, and they don't run concurrently, + // so it cannot happen. And even if it did, it wouldn't be + // very serious, the metrics would just be slightly off until + // the next update. + self.metrics.current_logical_size_gauge.set(new_size); + } + + /// Set current logical size. + fn set_current_logical_size(&self, new_size: u64) { + self.current_logical_size + .store(new_size as i64, AtomicOrdering::SeqCst); + + // Also set the value in the prometheus gauge. Same race condition + // here as in `update_current_logical_size`. + self.metrics.current_logical_size_gauge.set(new_size as i64); + } + /// /// Get a handle to a Layer for reading. /// @@ -2261,9 +2307,7 @@ impl<'a> TimelineWriter<'a> { } pub fn update_current_logical_size(&self, delta: i64) { - self.tl - .current_logical_size - .fetch_add(delta, AtomicOrdering::SeqCst); + self.tl.update_current_logical_size(delta) } } diff --git a/test_runner/batch_others/test_timeline_size.py b/test_runner/batch_others/test_timeline_size.py index 6e1168e38f..4a9359cf43 100644 --- a/test_runner/batch_others/test_timeline_size.py +++ b/test_runner/batch_others/test_timeline_size.py @@ -1,4 +1,5 @@ from contextlib import closing +import math import random from uuid import UUID import re @@ -278,11 +279,13 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): assert_physical_size(env, env.initial_tenant, new_timeline_id) -def test_timeline_physical_size_metric(neon_simple_env: NeonEnv): +# The timeline logical and physical sizes are also exposed as prometheus metrics. +# Test the metrics. +def test_timeline_size_metrics(neon_simple_env: NeonEnv): env = neon_simple_env - new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_metric') - pg = env.postgres.create_start("test_timeline_physical_size_metric") + new_timeline_id = env.neon_cli.create_branch('test_timeline_size_metrics') + pg = env.postgres.create_start("test_timeline_size_metrics") pg.safe_psql_many([ "CREATE TABLE foo (t text)", @@ -301,12 +304,32 @@ def test_timeline_physical_size_metric(neon_simple_env: NeonEnv): metrics, re.MULTILINE) assert matches - - # assert that the metric matches the actual physical size on disk tl_physical_size_metric = int(matches.group(1)) + + # assert that the physical size metric matches the actual physical size on disk timeline_path = env.timeline_dir(env.initial_tenant, new_timeline_id) assert tl_physical_size_metric == get_timeline_dir_size(timeline_path) + # Check that the logical size metric is sane, and matches + matches = re.search( + f'^pageserver_current_logical_size{{tenant_id="{env.initial_tenant.hex}",timeline_id="{new_timeline_id.hex}"}} (\\S+)$', + metrics, + re.MULTILINE) + assert matches + tl_logical_size_metric = int(matches.group(1)) + + # An empty database is around 8 MB. There at least 3 databases, 'postgres', + # 'template0', 'template1'. So the total size should be about 32 MB. This isn't + # very accurate and can change with different PostgreSQL versions, so allow a + # couple of MB of slack. + assert math.isclose(tl_logical_size_metric, 32 * 1024 * 1024, abs_tol=2 * 1024 * 1024) + + # The sum of the sizes of all databases, as seen by pg_database_size(), should also + # be close. Again allow some slack, the logical size metric includes some things like + # the SLRUs that are not included in pg_database_size(). + dbsize_sum = pg.safe_psql("select sum(pg_database_size(oid)) from pg_database")[0][0] + assert math.isclose(dbsize_sum, tl_logical_size_metric, abs_tol=2 * 1024 * 1024) + def test_tenant_physical_size(neon_simple_env: NeonEnv): random.seed(100) From 5522fbab25f1cd7cfaa36cf674e462172f24eff8 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 20 Aug 2022 01:21:18 +0300 Subject: [PATCH 16/63] Move all unit tests related to Repository/Timeline to layered_repository.rs There was a nominal split between the tests in layered_repository.rs and repository.rs, such that tests specific to the layered implementation were supposed to be in layered_repository.rs, and tests that should work with any implementation of the traits were supposed to be in repository.rs. In practice, the line was quite muddled. With minor tweaks, many of the tests in layered_repository.rs should work with other implementations too, and vice versa. And in practice we only have one implementation, so it's more straightforward to gather all unit tests in one place. --- pageserver/src/layered_repository.rs | 540 +++++++++++++++++- pageserver/src/layered_repository/metadata.rs | 3 +- pageserver/src/repository.rs | 524 ----------------- pageserver/src/storage_sync.rs | 4 +- pageserver/src/storage_sync/delete.rs | 2 +- pageserver/src/storage_sync/download.rs | 2 +- pageserver/src/storage_sync/index.rs | 2 +- pageserver/src/storage_sync/upload.rs | 2 +- pageserver/src/walingest.rs | 2 +- .../src/walreceiver/connection_manager.rs | 3 +- 10 files changed, 529 insertions(+), 555 deletions(-) diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index a5877c8482..42474dac0b 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -905,22 +905,525 @@ pub fn load_metadata( }) } -/// -/// Tests that are specific to the layered storage format. -/// -/// There are more unit tests in repository.rs that work through the -/// Repository interface and are expected to work regardless of the -/// file format and directory layout. The test here are more low level. -/// +#[cfg(test)] +pub mod repo_harness { + use bytes::{Bytes, BytesMut}; + use once_cell::sync::Lazy; + use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}; + use std::{fs, path::PathBuf}; + use utils::lsn::Lsn; + + use crate::storage_sync::index::RemoteIndex; + use crate::{ + config::PageServerConf, + layered_repository::Repository, + repository::Key, + walrecord::ZenithWalRecord, + walredo::{WalRedoError, WalRedoManager}, + }; + + use super::*; + use crate::tenant_config::{TenantConf, TenantConfOpt}; + use hex_literal::hex; + use utils::zid::{ZTenantId, ZTimelineId}; + + pub const TIMELINE_ID: ZTimelineId = + ZTimelineId::from_array(hex!("11223344556677881122334455667788")); + pub const NEW_TIMELINE_ID: ZTimelineId = + ZTimelineId::from_array(hex!("AA223344556677881122334455667788")); + + /// Convenience function to create a page image with given string as the only content + #[allow(non_snake_case)] + pub fn TEST_IMG(s: &str) -> Bytes { + let mut buf = BytesMut::new(); + buf.extend_from_slice(s.as_bytes()); + buf.resize(64, 0); + + buf.freeze() + } + + static LOCK: Lazy> = Lazy::new(|| RwLock::new(())); + + impl From for TenantConfOpt { + fn from(tenant_conf: TenantConf) -> Self { + Self { + checkpoint_distance: Some(tenant_conf.checkpoint_distance), + checkpoint_timeout: Some(tenant_conf.checkpoint_timeout), + compaction_target_size: Some(tenant_conf.compaction_target_size), + compaction_period: Some(tenant_conf.compaction_period), + compaction_threshold: Some(tenant_conf.compaction_threshold), + gc_horizon: Some(tenant_conf.gc_horizon), + gc_period: Some(tenant_conf.gc_period), + image_creation_threshold: Some(tenant_conf.image_creation_threshold), + pitr_interval: Some(tenant_conf.pitr_interval), + walreceiver_connect_timeout: Some(tenant_conf.walreceiver_connect_timeout), + lagging_wal_timeout: Some(tenant_conf.lagging_wal_timeout), + max_lsn_wal_lag: Some(tenant_conf.max_lsn_wal_lag), + } + } + } + + pub struct RepoHarness<'a> { + pub conf: &'static PageServerConf, + pub tenant_conf: TenantConf, + pub tenant_id: ZTenantId, + + pub lock_guard: ( + Option>, + Option>, + ), + } + + impl<'a> RepoHarness<'a> { + pub fn create(test_name: &'static str) -> Result { + Self::create_internal(test_name, false) + } + pub fn create_exclusive(test_name: &'static str) -> Result { + Self::create_internal(test_name, true) + } + fn create_internal(test_name: &'static str, exclusive: bool) -> Result { + let lock_guard = if exclusive { + (None, Some(LOCK.write().unwrap())) + } else { + (Some(LOCK.read().unwrap()), None) + }; + + let repo_dir = PageServerConf::test_repo_dir(test_name); + let _ = fs::remove_dir_all(&repo_dir); + fs::create_dir_all(&repo_dir)?; + + let conf = PageServerConf::dummy_conf(repo_dir); + // Make a static copy of the config. This can never be free'd, but that's + // OK in a test. + let conf: &'static PageServerConf = Box::leak(Box::new(conf)); + + let tenant_conf = TenantConf::dummy_conf(); + + let tenant_id = ZTenantId::generate(); + fs::create_dir_all(conf.tenant_path(&tenant_id))?; + fs::create_dir_all(conf.timelines_path(&tenant_id))?; + + Ok(Self { + conf, + tenant_conf, + tenant_id, + lock_guard, + }) + } + + pub fn load(&self) -> Repository { + self.try_load().expect("failed to load test repo") + } + + pub fn try_load(&self) -> Result { + let walredo_mgr = Arc::new(TestRedoManager); + + let repo = Repository::new( + self.conf, + TenantConfOpt::from(self.tenant_conf), + walredo_mgr, + self.tenant_id, + RemoteIndex::default(), + false, + ); + // populate repo with locally available timelines + for timeline_dir_entry in fs::read_dir(self.conf.timelines_path(&self.tenant_id)) + .expect("should be able to read timelines dir") + { + let timeline_dir_entry = timeline_dir_entry.unwrap(); + let timeline_id: ZTimelineId = timeline_dir_entry + .path() + .file_name() + .unwrap() + .to_string_lossy() + .parse() + .unwrap(); + + repo.attach_timeline(timeline_id)?; + } + + Ok(repo) + } + + pub fn timeline_path(&self, timeline_id: &ZTimelineId) -> PathBuf { + self.conf.timeline_path(timeline_id, &self.tenant_id) + } + } + + // Mock WAL redo manager that doesn't do much + pub struct TestRedoManager; + + impl WalRedoManager for TestRedoManager { + fn request_redo( + &self, + key: Key, + lsn: Lsn, + base_img: Option, + records: Vec<(Lsn, ZenithWalRecord)>, + ) -> Result { + let s = format!( + "redo for {} to get to {}, with {} and {} records", + key, + lsn, + if base_img.is_some() { + "base image" + } else { + "no base image" + }, + records.len() + ); + println!("{}", s); + + Ok(TEST_IMG(&s)) + } + } +} + #[cfg(test)] pub mod tests { use super::metadata::METADATA_FILE_NAME; use super::*; use crate::keyspace::KeySpaceAccum; - use crate::repository::repo_harness::*; + use crate::layered_repository::repo_harness::*; use crate::repository::{Key, Value}; + use bytes::BytesMut; + use hex_literal::hex; + use once_cell::sync::Lazy; use rand::{thread_rng, Rng}; + static TEST_KEY: Lazy = + Lazy::new(|| Key::from_slice(&hex!("112222222233333333444444445500000001"))); + + #[test] + fn test_basic() -> Result<()> { + let repo = RepoHarness::create("test_basic")?.load(); + let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; + + let writer = tline.writer(); + writer.put(*TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))?; + writer.finish_write(Lsn(0x10)); + drop(writer); + + let writer = tline.writer(); + writer.put(*TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?; + writer.finish_write(Lsn(0x20)); + drop(writer); + + assert_eq!(tline.get(*TEST_KEY, Lsn(0x10))?, TEST_IMG("foo at 0x10")); + assert_eq!(tline.get(*TEST_KEY, Lsn(0x1f))?, TEST_IMG("foo at 0x10")); + assert_eq!(tline.get(*TEST_KEY, Lsn(0x20))?, TEST_IMG("foo at 0x20")); + + Ok(()) + } + + #[test] + fn no_duplicate_timelines() -> Result<()> { + let repo = RepoHarness::create("no_duplicate_timelines")?.load(); + let _ = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; + + match repo.create_empty_timeline(TIMELINE_ID, Lsn(0)) { + Ok(_) => panic!("duplicate timeline creation should fail"), + Err(e) => assert_eq!(e.to_string(), "Timeline already exists"), + } + + Ok(()) + } + + /// Convenience function to create a page image with given string as the only content + pub fn test_value(s: &str) -> Value { + let mut buf = BytesMut::new(); + buf.extend_from_slice(s.as_bytes()); + Value::Image(buf.freeze()) + } + + /// + /// Test branch creation + /// + #[test] + fn test_branch() -> Result<()> { + let repo = RepoHarness::create("test_branch")?.load(); + let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; + let writer = tline.writer(); + use std::str::from_utf8; + + #[allow(non_snake_case)] + let TEST_KEY_A: Key = Key::from_hex("112222222233333333444444445500000001").unwrap(); + #[allow(non_snake_case)] + let TEST_KEY_B: Key = Key::from_hex("112222222233333333444444445500000002").unwrap(); + + // Insert a value on the timeline + writer.put(TEST_KEY_A, Lsn(0x20), &test_value("foo at 0x20"))?; + writer.put(TEST_KEY_B, Lsn(0x20), &test_value("foobar at 0x20"))?; + writer.finish_write(Lsn(0x20)); + + writer.put(TEST_KEY_A, Lsn(0x30), &test_value("foo at 0x30"))?; + writer.finish_write(Lsn(0x30)); + writer.put(TEST_KEY_A, Lsn(0x40), &test_value("foo at 0x40"))?; + writer.finish_write(Lsn(0x40)); + + //assert_current_logical_size(&tline, Lsn(0x40)); + + // Branch the history, modify relation differently on the new timeline + repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x30)))?; + let newtline = repo + .get_timeline_load(NEW_TIMELINE_ID) + .expect("Should have a local timeline"); + let new_writer = newtline.writer(); + new_writer.put(TEST_KEY_A, Lsn(0x40), &test_value("bar at 0x40"))?; + new_writer.finish_write(Lsn(0x40)); + + // Check page contents on both branches + assert_eq!( + from_utf8(&tline.get(TEST_KEY_A, Lsn(0x40))?)?, + "foo at 0x40" + ); + assert_eq!( + from_utf8(&newtline.get(TEST_KEY_A, Lsn(0x40))?)?, + "bar at 0x40" + ); + assert_eq!( + from_utf8(&newtline.get(TEST_KEY_B, Lsn(0x40))?)?, + "foobar at 0x20" + ); + + //assert_current_logical_size(&tline, Lsn(0x40)); + + Ok(()) + } + + fn make_some_layers(tline: &Timeline, start_lsn: Lsn) -> Result<()> { + let mut lsn = start_lsn; + #[allow(non_snake_case)] + { + let writer = tline.writer(); + // Create a relation on the timeline + writer.put( + *TEST_KEY, + lsn, + &Value::Image(TEST_IMG(&format!("foo at {}", lsn))), + )?; + writer.finish_write(lsn); + lsn += 0x10; + writer.put( + *TEST_KEY, + lsn, + &Value::Image(TEST_IMG(&format!("foo at {}", lsn))), + )?; + writer.finish_write(lsn); + lsn += 0x10; + } + tline.checkpoint(CheckpointConfig::Forced)?; + { + let writer = tline.writer(); + writer.put( + *TEST_KEY, + lsn, + &Value::Image(TEST_IMG(&format!("foo at {}", lsn))), + )?; + writer.finish_write(lsn); + lsn += 0x10; + writer.put( + *TEST_KEY, + lsn, + &Value::Image(TEST_IMG(&format!("foo at {}", lsn))), + )?; + writer.finish_write(lsn); + } + tline.checkpoint(CheckpointConfig::Forced) + } + + #[test] + fn test_prohibit_branch_creation_on_garbage_collected_data() -> Result<()> { + let repo = + RepoHarness::create("test_prohibit_branch_creation_on_garbage_collected_data")?.load(); + let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; + make_some_layers(tline.as_ref(), Lsn(0x20))?; + + // this removes layers before lsn 40 (50 minus 10), so there are two remaining layers, image and delta for 31-50 + // FIXME: this doesn't actually remove any layer currently, given how the checkpointing + // and compaction works. But it does set the 'cutoff' point so that the cross check + // below should fail. + repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?; + + // try to branch at lsn 25, should fail because we already garbage collected the data + match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) { + Ok(_) => panic!("branching should have failed"), + Err(err) => { + assert!(err.to_string().contains("invalid branch start lsn")); + assert!(err + .source() + .unwrap() + .to_string() + .contains("we might've already garbage collected needed data")) + } + } + + Ok(()) + } + + #[test] + fn test_prohibit_branch_creation_on_pre_initdb_lsn() -> Result<()> { + let repo = RepoHarness::create("test_prohibit_branch_creation_on_pre_initdb_lsn")?.load(); + + repo.create_empty_timeline(TIMELINE_ID, Lsn(0x50))?; + // try to branch at lsn 0x25, should fail because initdb lsn is 0x50 + match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) { + Ok(_) => panic!("branching should have failed"), + Err(err) => { + assert!(&err.to_string().contains("invalid branch start lsn")); + assert!(&err + .source() + .unwrap() + .to_string() + .contains("is earlier than latest GC horizon")); + } + } + + Ok(()) + } + + /* + // FIXME: This currently fails to error out. Calling GC doesn't currently + // remove the old value, we'd need to work a little harder + #[test] + fn test_prohibit_get_for_garbage_collected_data() -> Result<()> { + let repo = + RepoHarness::create("test_prohibit_get_for_garbage_collected_data")? + .load(); + + let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; + make_some_layers(tline.as_ref(), Lsn(0x20))?; + + repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?; + let latest_gc_cutoff_lsn = tline.get_latest_gc_cutoff_lsn(); + assert!(*latest_gc_cutoff_lsn > Lsn(0x25)); + match tline.get(*TEST_KEY, Lsn(0x25)) { + Ok(_) => panic!("request for page should have failed"), + Err(err) => assert!(err.to_string().contains("not found at")), + } + Ok(()) + } + */ + + #[test] + fn test_retain_data_in_parent_which_is_needed_for_child() -> Result<()> { + let repo = + RepoHarness::create("test_retain_data_in_parent_which_is_needed_for_child")?.load(); + let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; + make_some_layers(tline.as_ref(), Lsn(0x20))?; + + repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?; + let newtline = repo + .get_timeline_load(NEW_TIMELINE_ID) + .expect("Should have a local timeline"); + // this removes layers before lsn 40 (50 minus 10), so there are two remaining layers, image and delta for 31-50 + repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?; + assert!(newtline.get(*TEST_KEY, Lsn(0x25)).is_ok()); + + Ok(()) + } + #[test] + fn test_parent_keeps_data_forever_after_branching() -> Result<()> { + let repo = RepoHarness::create("test_parent_keeps_data_forever_after_branching")?.load(); + let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; + make_some_layers(tline.as_ref(), Lsn(0x20))?; + + repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?; + let newtline = repo + .get_timeline_load(NEW_TIMELINE_ID) + .expect("Should have a local timeline"); + + make_some_layers(newtline.as_ref(), Lsn(0x60))?; + + // run gc on parent + repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?; + + // Check that the data is still accessible on the branch. + assert_eq!( + newtline.get(*TEST_KEY, Lsn(0x50))?, + TEST_IMG(&format!("foo at {}", Lsn(0x40))) + ); + + Ok(()) + } + + #[test] + fn timeline_load() -> Result<()> { + const TEST_NAME: &str = "timeline_load"; + let harness = RepoHarness::create(TEST_NAME)?; + { + let repo = harness.load(); + let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0x8000))?; + make_some_layers(tline.as_ref(), Lsn(0x8000))?; + tline.checkpoint(CheckpointConfig::Forced)?; + } + + let repo = harness.load(); + let tline = repo + .get_timeline(TIMELINE_ID) + .expect("cannot load timeline"); + assert!(matches!(tline, RepositoryTimeline::Unloaded { .. })); + + assert!(repo.get_timeline_load(TIMELINE_ID).is_ok()); + + let tline = repo + .get_timeline(TIMELINE_ID) + .expect("cannot load timeline"); + assert!(matches!(tline, RepositoryTimeline::Loaded(_))); + + Ok(()) + } + + #[test] + fn timeline_load_with_ancestor() -> Result<()> { + const TEST_NAME: &str = "timeline_load_with_ancestor"; + let harness = RepoHarness::create(TEST_NAME)?; + // create two timelines + { + let repo = harness.load(); + let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; + + make_some_layers(tline.as_ref(), Lsn(0x20))?; + tline.checkpoint(CheckpointConfig::Forced)?; + + repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?; + + let newtline = repo + .get_timeline_load(NEW_TIMELINE_ID) + .expect("Should have a local timeline"); + + make_some_layers(newtline.as_ref(), Lsn(0x60))?; + tline.checkpoint(CheckpointConfig::Forced)?; + } + + // check that both of them are initially unloaded + let repo = harness.load(); + { + let tline = repo.get_timeline(TIMELINE_ID).expect("cannot get timeline"); + assert!(matches!(tline, RepositoryTimeline::Unloaded { .. })); + + let tline = repo + .get_timeline(NEW_TIMELINE_ID) + .expect("cannot get timeline"); + assert!(matches!(tline, RepositoryTimeline::Unloaded { .. })); + } + // load only child timeline + let _ = repo + .get_timeline_load(NEW_TIMELINE_ID) + .expect("cannot load timeline"); + + // check that both, child and ancestor are loaded + let tline = repo + .get_timeline(NEW_TIMELINE_ID) + .expect("cannot get timeline"); + assert!(matches!(tline, RepositoryTimeline::Loaded(_))); + + let tline = repo.get_timeline(TIMELINE_ID).expect("cannot get timeline"); + assert!(matches!(tline, RepositoryTimeline::Loaded(_))); + + Ok(()) + } + #[test] fn corrupt_metadata() -> Result<()> { const TEST_NAME: &str = "corrupt_metadata"; @@ -970,11 +1473,8 @@ pub mod tests { let repo = RepoHarness::create("test_images")?.load(); let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; - #[allow(non_snake_case)] - let TEST_KEY: Key = Key::from_hex("112222222233333333444444445500000001").unwrap(); - let writer = tline.writer(); - writer.put(TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))?; + writer.put(*TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))?; writer.finish_write(Lsn(0x10)); drop(writer); @@ -982,7 +1482,7 @@ pub mod tests { tline.compact()?; let writer = tline.writer(); - writer.put(TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?; + writer.put(*TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?; writer.finish_write(Lsn(0x20)); drop(writer); @@ -990,7 +1490,7 @@ pub mod tests { tline.compact()?; let writer = tline.writer(); - writer.put(TEST_KEY, Lsn(0x30), &Value::Image(TEST_IMG("foo at 0x30")))?; + writer.put(*TEST_KEY, Lsn(0x30), &Value::Image(TEST_IMG("foo at 0x30")))?; writer.finish_write(Lsn(0x30)); drop(writer); @@ -998,18 +1498,18 @@ pub mod tests { tline.compact()?; let writer = tline.writer(); - writer.put(TEST_KEY, Lsn(0x40), &Value::Image(TEST_IMG("foo at 0x40")))?; + writer.put(*TEST_KEY, Lsn(0x40), &Value::Image(TEST_IMG("foo at 0x40")))?; writer.finish_write(Lsn(0x40)); drop(writer); tline.checkpoint(CheckpointConfig::Forced)?; tline.compact()?; - assert_eq!(tline.get(TEST_KEY, Lsn(0x10))?, TEST_IMG("foo at 0x10")); - assert_eq!(tline.get(TEST_KEY, Lsn(0x1f))?, TEST_IMG("foo at 0x10")); - assert_eq!(tline.get(TEST_KEY, Lsn(0x20))?, TEST_IMG("foo at 0x20")); - assert_eq!(tline.get(TEST_KEY, Lsn(0x30))?, TEST_IMG("foo at 0x30")); - assert_eq!(tline.get(TEST_KEY, Lsn(0x40))?, TEST_IMG("foo at 0x40")); + assert_eq!(tline.get(*TEST_KEY, Lsn(0x10))?, TEST_IMG("foo at 0x10")); + assert_eq!(tline.get(*TEST_KEY, Lsn(0x1f))?, TEST_IMG("foo at 0x10")); + assert_eq!(tline.get(*TEST_KEY, Lsn(0x20))?, TEST_IMG("foo at 0x20")); + assert_eq!(tline.get(*TEST_KEY, Lsn(0x30))?, TEST_IMG("foo at 0x30")); + assert_eq!(tline.get(*TEST_KEY, Lsn(0x40))?, TEST_IMG("foo at 0x40")); Ok(()) } diff --git a/pageserver/src/layered_repository/metadata.rs b/pageserver/src/layered_repository/metadata.rs index 0b47f8d697..74679cb43a 100644 --- a/pageserver/src/layered_repository/metadata.rs +++ b/pageserver/src/layered_repository/metadata.rs @@ -175,9 +175,8 @@ impl TimelineMetadata { #[cfg(test)] mod tests { - use crate::repository::repo_harness::TIMELINE_ID; - use super::*; + use crate::layered_repository::repo_harness::TIMELINE_ID; #[test] fn metadata_serializes_correctly() { diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs index dc031c03ee..e46a39436d 100644 --- a/pageserver/src/repository.rs +++ b/pageserver/src/repository.rs @@ -226,527 +226,3 @@ impl AddAssign for GcResult { self.elapsed += other.elapsed; } } - -#[cfg(test)] -pub mod repo_harness { - use bytes::BytesMut; - use once_cell::sync::Lazy; - use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}; - use std::{fs, path::PathBuf}; - use utils::lsn::Lsn; - - use crate::storage_sync::index::RemoteIndex; - use crate::{ - config::PageServerConf, - layered_repository::Repository, - walredo::{WalRedoError, WalRedoManager}, - }; - - use super::*; - use crate::tenant_config::{TenantConf, TenantConfOpt}; - use hex_literal::hex; - use utils::zid::{ZTenantId, ZTimelineId}; - - pub const TIMELINE_ID: ZTimelineId = - ZTimelineId::from_array(hex!("11223344556677881122334455667788")); - pub const NEW_TIMELINE_ID: ZTimelineId = - ZTimelineId::from_array(hex!("AA223344556677881122334455667788")); - - /// Convenience function to create a page image with given string as the only content - #[allow(non_snake_case)] - pub fn TEST_IMG(s: &str) -> Bytes { - let mut buf = BytesMut::new(); - buf.extend_from_slice(s.as_bytes()); - buf.resize(64, 0); - - buf.freeze() - } - - static LOCK: Lazy> = Lazy::new(|| RwLock::new(())); - - impl From for TenantConfOpt { - fn from(tenant_conf: TenantConf) -> Self { - Self { - checkpoint_distance: Some(tenant_conf.checkpoint_distance), - checkpoint_timeout: Some(tenant_conf.checkpoint_timeout), - compaction_target_size: Some(tenant_conf.compaction_target_size), - compaction_period: Some(tenant_conf.compaction_period), - compaction_threshold: Some(tenant_conf.compaction_threshold), - gc_horizon: Some(tenant_conf.gc_horizon), - gc_period: Some(tenant_conf.gc_period), - image_creation_threshold: Some(tenant_conf.image_creation_threshold), - pitr_interval: Some(tenant_conf.pitr_interval), - walreceiver_connect_timeout: Some(tenant_conf.walreceiver_connect_timeout), - lagging_wal_timeout: Some(tenant_conf.lagging_wal_timeout), - max_lsn_wal_lag: Some(tenant_conf.max_lsn_wal_lag), - } - } - } - - pub struct RepoHarness<'a> { - pub conf: &'static PageServerConf, - pub tenant_conf: TenantConf, - pub tenant_id: ZTenantId, - - pub lock_guard: ( - Option>, - Option>, - ), - } - - impl<'a> RepoHarness<'a> { - pub fn create(test_name: &'static str) -> Result { - Self::create_internal(test_name, false) - } - pub fn create_exclusive(test_name: &'static str) -> Result { - Self::create_internal(test_name, true) - } - fn create_internal(test_name: &'static str, exclusive: bool) -> Result { - let lock_guard = if exclusive { - (None, Some(LOCK.write().unwrap())) - } else { - (Some(LOCK.read().unwrap()), None) - }; - - let repo_dir = PageServerConf::test_repo_dir(test_name); - let _ = fs::remove_dir_all(&repo_dir); - fs::create_dir_all(&repo_dir)?; - - let conf = PageServerConf::dummy_conf(repo_dir); - // Make a static copy of the config. This can never be free'd, but that's - // OK in a test. - let conf: &'static PageServerConf = Box::leak(Box::new(conf)); - - let tenant_conf = TenantConf::dummy_conf(); - - let tenant_id = ZTenantId::generate(); - fs::create_dir_all(conf.tenant_path(&tenant_id))?; - fs::create_dir_all(conf.timelines_path(&tenant_id))?; - - Ok(Self { - conf, - tenant_conf, - tenant_id, - lock_guard, - }) - } - - pub fn load(&self) -> Repository { - self.try_load().expect("failed to load test repo") - } - - pub fn try_load(&self) -> Result { - let walredo_mgr = Arc::new(TestRedoManager); - - let repo = Repository::new( - self.conf, - TenantConfOpt::from(self.tenant_conf), - walredo_mgr, - self.tenant_id, - RemoteIndex::default(), - false, - ); - // populate repo with locally available timelines - for timeline_dir_entry in fs::read_dir(self.conf.timelines_path(&self.tenant_id)) - .expect("should be able to read timelines dir") - { - let timeline_dir_entry = timeline_dir_entry.unwrap(); - let timeline_id: ZTimelineId = timeline_dir_entry - .path() - .file_name() - .unwrap() - .to_string_lossy() - .parse() - .unwrap(); - - repo.attach_timeline(timeline_id)?; - } - - Ok(repo) - } - - pub fn timeline_path(&self, timeline_id: &ZTimelineId) -> PathBuf { - self.conf.timeline_path(timeline_id, &self.tenant_id) - } - } - - // Mock WAL redo manager that doesn't do much - pub struct TestRedoManager; - - impl WalRedoManager for TestRedoManager { - fn request_redo( - &self, - key: Key, - lsn: Lsn, - base_img: Option, - records: Vec<(Lsn, ZenithWalRecord)>, - ) -> Result { - let s = format!( - "redo for {} to get to {}, with {} and {} records", - key, - lsn, - if base_img.is_some() { - "base image" - } else { - "no base image" - }, - records.len() - ); - println!("{}", s); - - Ok(TEST_IMG(&s)) - } - } -} - -/// -/// Tests that should work the same with any Repository/Timeline implementation. -/// -#[allow(clippy::bool_assert_comparison)] -#[cfg(test)] -mod tests { - use crate::layered_repository::Timeline; - use crate::CheckpointConfig; - - use super::repo_harness::*; - use super::*; - //use postgres_ffi::{pg_constants, xlog_utils::SIZEOF_CHECKPOINT}; - //use std::sync::Arc; - use bytes::BytesMut; - use hex_literal::hex; - use once_cell::sync::Lazy; - use utils::lsn::Lsn; - - static TEST_KEY: Lazy = - Lazy::new(|| Key::from_slice(&hex!("112222222233333333444444445500000001"))); - - #[test] - fn test_basic() -> Result<()> { - let repo = RepoHarness::create("test_basic")?.load(); - let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; - - let writer = tline.writer(); - writer.put(*TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))?; - writer.finish_write(Lsn(0x10)); - drop(writer); - - let writer = tline.writer(); - writer.put(*TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?; - writer.finish_write(Lsn(0x20)); - drop(writer); - - assert_eq!(tline.get(*TEST_KEY, Lsn(0x10))?, TEST_IMG("foo at 0x10")); - assert_eq!(tline.get(*TEST_KEY, Lsn(0x1f))?, TEST_IMG("foo at 0x10")); - assert_eq!(tline.get(*TEST_KEY, Lsn(0x20))?, TEST_IMG("foo at 0x20")); - - Ok(()) - } - - #[test] - fn no_duplicate_timelines() -> Result<()> { - let repo = RepoHarness::create("no_duplicate_timelines")?.load(); - let _ = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; - - match repo.create_empty_timeline(TIMELINE_ID, Lsn(0)) { - Ok(_) => panic!("duplicate timeline creation should fail"), - Err(e) => assert_eq!(e.to_string(), "Timeline already exists"), - } - - Ok(()) - } - - /// Convenience function to create a page image with given string as the only content - pub fn test_value(s: &str) -> Value { - let mut buf = BytesMut::new(); - buf.extend_from_slice(s.as_bytes()); - Value::Image(buf.freeze()) - } - - /// - /// Test branch creation - /// - #[test] - fn test_branch() -> Result<()> { - let repo = RepoHarness::create("test_branch")?.load(); - let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; - let writer = tline.writer(); - use std::str::from_utf8; - - #[allow(non_snake_case)] - let TEST_KEY_A: Key = Key::from_hex("112222222233333333444444445500000001").unwrap(); - #[allow(non_snake_case)] - let TEST_KEY_B: Key = Key::from_hex("112222222233333333444444445500000002").unwrap(); - - // Insert a value on the timeline - writer.put(TEST_KEY_A, Lsn(0x20), &test_value("foo at 0x20"))?; - writer.put(TEST_KEY_B, Lsn(0x20), &test_value("foobar at 0x20"))?; - writer.finish_write(Lsn(0x20)); - - writer.put(TEST_KEY_A, Lsn(0x30), &test_value("foo at 0x30"))?; - writer.finish_write(Lsn(0x30)); - writer.put(TEST_KEY_A, Lsn(0x40), &test_value("foo at 0x40"))?; - writer.finish_write(Lsn(0x40)); - - //assert_current_logical_size(&tline, Lsn(0x40)); - - // Branch the history, modify relation differently on the new timeline - repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x30)))?; - let newtline = repo - .get_timeline_load(NEW_TIMELINE_ID) - .expect("Should have a local timeline"); - let new_writer = newtline.writer(); - new_writer.put(TEST_KEY_A, Lsn(0x40), &test_value("bar at 0x40"))?; - new_writer.finish_write(Lsn(0x40)); - - // Check page contents on both branches - assert_eq!( - from_utf8(&tline.get(TEST_KEY_A, Lsn(0x40))?)?, - "foo at 0x40" - ); - assert_eq!( - from_utf8(&newtline.get(TEST_KEY_A, Lsn(0x40))?)?, - "bar at 0x40" - ); - assert_eq!( - from_utf8(&newtline.get(TEST_KEY_B, Lsn(0x40))?)?, - "foobar at 0x20" - ); - - //assert_current_logical_size(&tline, Lsn(0x40)); - - Ok(()) - } - - fn make_some_layers(tline: &Timeline, start_lsn: Lsn) -> Result<()> { - let mut lsn = start_lsn; - #[allow(non_snake_case)] - { - let writer = tline.writer(); - // Create a relation on the timeline - writer.put( - *TEST_KEY, - lsn, - &Value::Image(TEST_IMG(&format!("foo at {}", lsn))), - )?; - writer.finish_write(lsn); - lsn += 0x10; - writer.put( - *TEST_KEY, - lsn, - &Value::Image(TEST_IMG(&format!("foo at {}", lsn))), - )?; - writer.finish_write(lsn); - lsn += 0x10; - } - tline.checkpoint(CheckpointConfig::Forced)?; - { - let writer = tline.writer(); - writer.put( - *TEST_KEY, - lsn, - &Value::Image(TEST_IMG(&format!("foo at {}", lsn))), - )?; - writer.finish_write(lsn); - lsn += 0x10; - writer.put( - *TEST_KEY, - lsn, - &Value::Image(TEST_IMG(&format!("foo at {}", lsn))), - )?; - writer.finish_write(lsn); - } - tline.checkpoint(CheckpointConfig::Forced) - } - - #[test] - fn test_prohibit_branch_creation_on_garbage_collected_data() -> Result<()> { - let repo = - RepoHarness::create("test_prohibit_branch_creation_on_garbage_collected_data")?.load(); - let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; - make_some_layers(tline.as_ref(), Lsn(0x20))?; - - // this removes layers before lsn 40 (50 minus 10), so there are two remaining layers, image and delta for 31-50 - // FIXME: this doesn't actually remove any layer currently, given how the checkpointing - // and compaction works. But it does set the 'cutoff' point so that the cross check - // below should fail. - repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?; - - // try to branch at lsn 25, should fail because we already garbage collected the data - match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) { - Ok(_) => panic!("branching should have failed"), - Err(err) => { - assert!(err.to_string().contains("invalid branch start lsn")); - assert!(err - .source() - .unwrap() - .to_string() - .contains("we might've already garbage collected needed data")) - } - } - - Ok(()) - } - - #[test] - fn test_prohibit_branch_creation_on_pre_initdb_lsn() -> Result<()> { - let repo = RepoHarness::create("test_prohibit_branch_creation_on_pre_initdb_lsn")?.load(); - - repo.create_empty_timeline(TIMELINE_ID, Lsn(0x50))?; - // try to branch at lsn 0x25, should fail because initdb lsn is 0x50 - match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) { - Ok(_) => panic!("branching should have failed"), - Err(err) => { - assert!(&err.to_string().contains("invalid branch start lsn")); - assert!(&err - .source() - .unwrap() - .to_string() - .contains("is earlier than latest GC horizon")); - } - } - - Ok(()) - } - - /* - // FIXME: This currently fails to error out. Calling GC doesn't currently - // remove the old value, we'd need to work a little harder - #[test] - fn test_prohibit_get_for_garbage_collected_data() -> Result<()> { - let repo = - RepoHarness::create("test_prohibit_get_for_garbage_collected_data")? - .load(); - - let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; - make_some_layers(tline.as_ref(), Lsn(0x20))?; - - repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?; - let latest_gc_cutoff_lsn = tline.get_latest_gc_cutoff_lsn(); - assert!(*latest_gc_cutoff_lsn > Lsn(0x25)); - match tline.get(*TEST_KEY, Lsn(0x25)) { - Ok(_) => panic!("request for page should have failed"), - Err(err) => assert!(err.to_string().contains("not found at")), - } - Ok(()) - } - */ - - #[test] - fn test_retain_data_in_parent_which_is_needed_for_child() -> Result<()> { - let repo = - RepoHarness::create("test_retain_data_in_parent_which_is_needed_for_child")?.load(); - let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; - make_some_layers(tline.as_ref(), Lsn(0x20))?; - - repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?; - let newtline = repo - .get_timeline_load(NEW_TIMELINE_ID) - .expect("Should have a local timeline"); - // this removes layers before lsn 40 (50 minus 10), so there are two remaining layers, image and delta for 31-50 - repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?; - assert!(newtline.get(*TEST_KEY, Lsn(0x25)).is_ok()); - - Ok(()) - } - #[test] - fn test_parent_keeps_data_forever_after_branching() -> Result<()> { - let repo = RepoHarness::create("test_parent_keeps_data_forever_after_branching")?.load(); - let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; - make_some_layers(tline.as_ref(), Lsn(0x20))?; - - repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?; - let newtline = repo - .get_timeline_load(NEW_TIMELINE_ID) - .expect("Should have a local timeline"); - - make_some_layers(newtline.as_ref(), Lsn(0x60))?; - - // run gc on parent - repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?; - - // Check that the data is still accessible on the branch. - assert_eq!( - newtline.get(*TEST_KEY, Lsn(0x50))?, - TEST_IMG(&format!("foo at {}", Lsn(0x40))) - ); - - Ok(()) - } - - #[test] - fn timeline_load() -> Result<()> { - const TEST_NAME: &str = "timeline_load"; - let harness = RepoHarness::create(TEST_NAME)?; - { - let repo = harness.load(); - let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0x8000))?; - make_some_layers(tline.as_ref(), Lsn(0x8000))?; - tline.checkpoint(CheckpointConfig::Forced)?; - } - - let repo = harness.load(); - let tline = repo - .get_timeline(TIMELINE_ID) - .expect("cannot load timeline"); - assert!(matches!(tline, RepositoryTimeline::Unloaded { .. })); - - assert!(repo.get_timeline_load(TIMELINE_ID).is_ok()); - - let tline = repo - .get_timeline(TIMELINE_ID) - .expect("cannot load timeline"); - assert!(matches!(tline, RepositoryTimeline::Loaded(_))); - - Ok(()) - } - - #[test] - fn timeline_load_with_ancestor() -> Result<()> { - const TEST_NAME: &str = "timeline_load_with_ancestor"; - let harness = RepoHarness::create(TEST_NAME)?; - // create two timelines - { - let repo = harness.load(); - let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; - - make_some_layers(tline.as_ref(), Lsn(0x20))?; - tline.checkpoint(CheckpointConfig::Forced)?; - - repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?; - - let newtline = repo - .get_timeline_load(NEW_TIMELINE_ID) - .expect("Should have a local timeline"); - - make_some_layers(newtline.as_ref(), Lsn(0x60))?; - tline.checkpoint(CheckpointConfig::Forced)?; - } - - // check that both of them are initially unloaded - let repo = harness.load(); - { - let tline = repo.get_timeline(TIMELINE_ID).expect("cannot get timeline"); - assert!(matches!(tline, RepositoryTimeline::Unloaded { .. })); - - let tline = repo - .get_timeline(NEW_TIMELINE_ID) - .expect("cannot get timeline"); - assert!(matches!(tline, RepositoryTimeline::Unloaded { .. })); - } - // load only child timeline - let _ = repo - .get_timeline_load(NEW_TIMELINE_ID) - .expect("cannot load timeline"); - - // check that both, child and ancestor are loaded - let tline = repo - .get_timeline(NEW_TIMELINE_ID) - .expect("cannot get timeline"); - assert!(matches!(tline, RepositoryTimeline::Loaded(_))); - - let tline = repo.get_timeline(TIMELINE_ID).expect("cannot get timeline"); - assert!(matches!(tline, RepositoryTimeline::Loaded(_))); - - Ok(()) - } -} diff --git a/pageserver/src/storage_sync.rs b/pageserver/src/storage_sync.rs index 15f24d7e24..52d544b28c 100644 --- a/pageserver/src/storage_sync.rs +++ b/pageserver/src/storage_sync.rs @@ -1642,7 +1642,7 @@ fn register_sync_status( mod test_utils { use utils::lsn::Lsn; - use crate::repository::repo_harness::RepoHarness; + use crate::layered_repository::repo_harness::RepoHarness; use super::*; @@ -1687,7 +1687,7 @@ mod test_utils { #[cfg(test)] mod tests { use super::test_utils::dummy_metadata; - use crate::repository::repo_harness::TIMELINE_ID; + use crate::layered_repository::repo_harness::TIMELINE_ID; use hex_literal::hex; use utils::lsn::Lsn; diff --git a/pageserver/src/storage_sync/delete.rs b/pageserver/src/storage_sync/delete.rs index a1b26ee9a2..2e39ed073f 100644 --- a/pageserver/src/storage_sync/delete.rs +++ b/pageserver/src/storage_sync/delete.rs @@ -111,7 +111,7 @@ mod tests { use utils::lsn::Lsn; use crate::{ - repository::repo_harness::{RepoHarness, TIMELINE_ID}, + layered_repository::repo_harness::{RepoHarness, TIMELINE_ID}, storage_sync::test_utils::{create_local_timeline, dummy_metadata}, }; use remote_storage::LocalFs; diff --git a/pageserver/src/storage_sync/download.rs b/pageserver/src/storage_sync/download.rs index f714888d9a..98c45bf9af 100644 --- a/pageserver/src/storage_sync/download.rs +++ b/pageserver/src/storage_sync/download.rs @@ -445,7 +445,7 @@ mod tests { use utils::lsn::Lsn; use crate::{ - repository::repo_harness::{RepoHarness, TIMELINE_ID}, + layered_repository::repo_harness::{RepoHarness, TIMELINE_ID}, storage_sync::{ index::RelativePath, test_utils::{create_local_timeline, dummy_metadata}, diff --git a/pageserver/src/storage_sync/index.rs b/pageserver/src/storage_sync/index.rs index 134ae893bc..3dddda09bf 100644 --- a/pageserver/src/storage_sync/index.rs +++ b/pageserver/src/storage_sync/index.rs @@ -341,7 +341,7 @@ mod tests { use std::collections::BTreeSet; use super::*; - use crate::repository::repo_harness::{RepoHarness, TIMELINE_ID}; + use crate::layered_repository::repo_harness::{RepoHarness, TIMELINE_ID}; #[test] fn index_part_conversion() { diff --git a/pageserver/src/storage_sync/upload.rs b/pageserver/src/storage_sync/upload.rs index 2c41f58721..2acc935537 100644 --- a/pageserver/src/storage_sync/upload.rs +++ b/pageserver/src/storage_sync/upload.rs @@ -248,7 +248,7 @@ mod tests { use utils::lsn::Lsn; use crate::{ - repository::repo_harness::{RepoHarness, TIMELINE_ID}, + layered_repository::repo_harness::{RepoHarness, TIMELINE_ID}, storage_sync::{ index::RelativePath, test_utils::{create_local_timeline, dummy_metadata}, diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs index c24ffc49de..f3789d43e3 100644 --- a/pageserver/src/walingest.rs +++ b/pageserver/src/walingest.rs @@ -1029,9 +1029,9 @@ impl<'a> WalIngest<'a> { #[cfg(test)] mod tests { use super::*; + use crate::layered_repository::repo_harness::*; use crate::layered_repository::Timeline; use crate::pgdatadir_mapping::create_test_timeline; - use crate::repository::repo_harness::*; use postgres_ffi::v14::xlog_utils::SIZEOF_CHECKPOINT; use postgres_ffi::RELSEG_SIZE; diff --git a/pageserver/src/walreceiver/connection_manager.rs b/pageserver/src/walreceiver/connection_manager.rs index 912073a731..0261203049 100644 --- a/pageserver/src/walreceiver/connection_manager.rs +++ b/pageserver/src/walreceiver/connection_manager.rs @@ -735,9 +735,8 @@ fn wal_stream_connection_string( #[cfg(test)] mod tests { - use crate::repository::repo_harness::{RepoHarness, TIMELINE_ID}; - use super::*; + use crate::layered_repository::repo_harness::{RepoHarness, TIMELINE_ID}; #[test] fn no_connection_no_candidate() -> anyhow::Result<()> { From 631cbf5b1ba013ae48cbb463b198837af489efe8 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 19 Aug 2022 14:42:35 +0300 Subject: [PATCH 17/63] Use single map to manage timeline data --- pageserver/src/tenant_mgr.rs | 91 ++++++++++-------------------------- 1 file changed, 25 insertions(+), 66 deletions(-) diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index 5afa38c926..4025d6706e 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -4,6 +4,7 @@ use crate::config::PageServerConf; use crate::http::models::TenantInfo; use crate::layered_repository::{load_metadata, Repository, Timeline}; +use crate::repository::RepositoryTimeline; use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex}; use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData}; use crate::tenant_config::TenantConfOpt; @@ -94,12 +95,6 @@ struct Tenant { state: TenantState, /// Contains in-memory state, including the timeline that might not yet flushed on disk or loaded form disk. repo: Arc, - /// Timelines, located locally in the pageserver's datadir. - /// Timelines can entirely be removed entirely by the `detach` operation only. - /// - /// Local timelines have more metadata that's loaded into memory, - /// that is located in the `repo.timelines` field, [`crate::layered_repository::LayeredTimelineEntry`]. - local_timelines: HashMap>, } #[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)] @@ -288,7 +283,6 @@ pub fn create_tenant_repository( v.insert(Tenant { state: TenantState::Idle, repo, - local_timelines: HashMap::new(), }); Ok(Some(tenant_id)) } @@ -379,20 +373,11 @@ pub fn get_local_timeline_with_load( tenant_id: ZTenantId, timeline_id: ZTimelineId, ) -> anyhow::Result> { - let mut m = tenants_state::write_tenants(); - let tenant = m - .get_mut(&tenant_id) - .with_context(|| format!("Tenant {tenant_id} not found"))?; - - if let Some(page_tline) = tenant.local_timelines.get(&timeline_id) { - Ok(Arc::clone(page_tline)) - } else { - let page_tline = load_local_timeline(&tenant.repo, timeline_id) - .with_context(|| format!("Failed to load local timeline for tenant {tenant_id}"))?; - tenant - .local_timelines - .insert(timeline_id, Arc::clone(&page_tline)); - Ok(page_tline) + let repository = get_repository_for_tenant(tenant_id)?; + match repository.get_timeline(timeline_id) { + Some(RepositoryTimeline::Loaded(loaded_timeline)) => Ok(loaded_timeline), + _ => load_local_timeline(&repository, timeline_id) + .with_context(|| format!("Failed to load local timeline for tenant {tenant_id}")), } } @@ -419,10 +404,7 @@ pub fn delete_timeline(tenant_id: ZTenantId, timeline_id: ZTimelineId) -> anyhow thread_mgr::shutdown_threads(None, None, Some(timeline_id)); debug!("thread shutdown completed"); match tenants_state::write_tenants().get_mut(&tenant_id) { - Some(tenant) => { - tenant.repo.delete_timeline(timeline_id)?; - tenant.local_timelines.remove(&timeline_id); - } + Some(tenant) => tenant.repo.delete_timeline(timeline_id)?, None => anyhow::bail!("Tenant {tenant_id} not found in local tenant state"), } @@ -434,37 +416,31 @@ pub fn detach_tenant(conf: &'static PageServerConf, tenant_id: ZTenantId) -> any // shutdown the tenant and timeline threads: gc, compaction, page service threads) thread_mgr::shutdown_threads(None, Some(tenant_id), None); - // FIXME should we protect somehow from starting new threads/walreceivers when tenant is in stopping state? - // send stop signal to wal receiver and collect join handles while holding the lock - let walreceiver_join_handles = { - let tenants = tenants_state::write_tenants(); - let tenant = tenants.get(&tenant_id).context("tenant not found")?; - let mut walreceiver_join_handles = Vec::with_capacity(tenant.local_timelines.len()); - for timeline_id in tenant.local_timelines.keys() { + let mut walreceiver_join_handles = Vec::new(); + let removed_tenant = { + let mut tenants_accessor = tenants_state::write_tenants(); + tenants_accessor.remove(&tenant_id) + }; + if let Some(tenant) = removed_tenant { + for (timeline_id, _) in tenant.repo.list_timelines() { let (sender, receiver) = std::sync::mpsc::channel::<()>(); tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach { - id: ZTenantTimelineId::new(tenant_id, *timeline_id), + id: ZTenantTimelineId::new(tenant_id, timeline_id), join_confirmation_sender: sender, }); - walreceiver_join_handles.push((*timeline_id, receiver)); + walreceiver_join_handles.push((timeline_id, receiver)); } - // drop the tenants lock - walreceiver_join_handles - }; + } // wait for wal receivers to stop without holding the lock, because walreceiver // will attempt to change tenant state which is protected by the same global tenants lock. - // TODO do we need a timeout here? how to handle it? // recv_timeout is broken: https://github.com/rust-lang/rust/issues/94518#issuecomment-1057440631 - // need to use crossbeam-channel for (timeline_id, join_handle) in walreceiver_join_handles { info!("waiting for wal receiver to shutdown timeline_id {timeline_id}"); join_handle.recv().context("failed to join walreceiver")?; info!("wal receiver shutdown confirmed timeline_id {timeline_id}"); } - tenants_state::write_tenants().remove(&tenant_id); - // If removal fails there will be no way to successfully retry detach, // because tenant no longer exists in in memory map. And it needs to be removed from it // before we remove files because it contains references to repository @@ -590,34 +566,18 @@ fn attach_downloaded_tenant( repo: &Repository, downloaded_timelines: HashSet, ) -> anyhow::Result<()> { - let mut registration_queue = Vec::with_capacity(downloaded_timelines.len()); - - // first need to register the in-mem representations, to avoid missing ancestors during the local disk data registration for timeline_id in downloaded_timelines { + // first, register timeline metadata repo.attach_timeline(timeline_id).with_context(|| { format!("Failed to load timeline {timeline_id} into in-memory repository") })?; - registration_queue.push(timeline_id); - } - - for timeline_id in registration_queue { - let tenant_id = repo.tenant_id(); - match tenants_state::write_tenants().get_mut(&tenant_id) { - Some(tenant) => match tenant.local_timelines.entry(timeline_id) { - Entry::Occupied(_) => { - anyhow::bail!("Local timeline {timeline_id} already registered") - } - Entry::Vacant(v) => { - v.insert(load_local_timeline(repo, timeline_id).with_context(|| { - format!("Failed to register add local timeline for tenant {tenant_id}") - })?); - } - }, - None => anyhow::bail!( - "Tenant {} not found in local tenant state", - repo.tenant_id() - ), - } + // and then load its layers in memory + let _ = load_local_timeline(repo, timeline_id).with_context(|| { + format!( + "Failed to register add local timeline for tenant {}", + repo.tenant_id(), + ) + })?; } Ok(()) @@ -647,7 +607,6 @@ fn load_local_repo( Tenant { state: TenantState::Idle, repo, - local_timelines: HashMap::new(), } }); From 32be8739b9d3d4d9abb87dbf99a1836d195e283c Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 19 Aug 2022 16:02:26 +0300 Subject: [PATCH 18/63] Move walreceiver timeline registration into layered_repository --- pageserver/src/layered_repository.rs | 43 +++++++++++++++------------- pageserver/src/tenant_mgr.rs | 9 ++---- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index 42474dac0b..d67b1b0130 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -13,6 +13,7 @@ use anyhow::{bail, ensure, Context, Result}; use tracing::*; +use utils::zid::ZTenantTimelineId; use std::cmp::min; use std::collections::hash_map::Entry; @@ -32,6 +33,7 @@ use crate::storage_sync::index::RemoteIndex; use crate::tenant_config::{TenantConf, TenantConfOpt}; use crate::repository::{GcResult, RepositoryTimeline}; +use crate::tenant_mgr::LocalTimelineUpdate; use crate::thread_mgr; use crate::walredo::WalRedoManager; use crate::CheckpointConfig; @@ -125,8 +127,11 @@ impl Repository { /// Get Timeline handle for given zenith timeline ID. /// This function is idempotent. It doesn't change internal state in any way. pub fn get_timeline(&self, timelineid: ZTimelineId) -> Option> { - let timelines = self.timelines.lock().unwrap(); - self.get_timeline_internal(timelineid, &timelines) + self.timelines + .lock() + .unwrap() + .get(&timelineid) + .cloned() .map(RepositoryTimeline::from) } @@ -198,6 +203,11 @@ impl Repository { let timeline = Arc::new(timeline); vacant_timeline_entry.insert(LayeredTimelineEntry::Loaded(Arc::clone(&timeline))); + crate::tenant_mgr::try_send_timeline_update(LocalTimelineUpdate::Attach { + id: ZTenantTimelineId::new(self.tenant_id(), timeline_id), + datadir: Arc::clone(&timeline), + }); + Ok(timeline) } @@ -540,45 +550,34 @@ impl Repository { Ok(()) } - // Implementation of the public `get_timeline` function. - // Differences from the public: - // * interface in that the caller must already hold the mutex on the 'timelines' hashmap. - fn get_timeline_internal( - &self, - timelineid: ZTimelineId, - timelines: &HashMap, - ) -> Option { - timelines.get(&timelineid).cloned() - } - // Implementation of the public `get_timeline_load` function. // Differences from the public: // * interface in that the caller must already hold the mutex on the 'timelines' hashmap. fn get_timeline_load_internal( &self, - timelineid: ZTimelineId, + timeline_id: ZTimelineId, timelines: &mut HashMap, ) -> anyhow::Result>> { - match timelines.get(&timelineid) { + match timelines.get(&timeline_id) { Some(entry) => match entry { LayeredTimelineEntry::Loaded(local_timeline) => { - debug!("timeline {} found loaded into memory", &timelineid); + debug!("timeline {timeline_id} found loaded into memory"); return Ok(Some(Arc::clone(local_timeline))); } LayeredTimelineEntry::Unloaded { .. } => {} }, None => { - debug!("timeline {} not found", &timelineid); + debug!("timeline {timeline_id} not found"); return Ok(None); } }; debug!( "timeline {} found on a local disk, but not loaded into the memory, loading", - &timelineid + &timeline_id ); - let timeline = self.load_local_timeline(timelineid, timelines)?; + let timeline = self.load_local_timeline(timeline_id, timelines)?; let was_loaded = timelines.insert( - timelineid, + timeline_id, LayeredTimelineEntry::Loaded(Arc::clone(&timeline)), ); ensure!( @@ -586,6 +585,10 @@ impl Repository { || matches!(was_loaded, Some(LayeredTimelineEntry::Unloaded { .. })), "assertion failure, inserted wrong timeline in an incorrect state" ); + crate::tenant_mgr::try_send_timeline_update(LocalTimelineUpdate::Attach { + id: ZTenantTimelineId::new(self.tenant_id(), timeline_id), + datadir: Arc::clone(&timeline), + }); Ok(Some(timeline)) } diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index 4025d6706e..f5b4308067 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -21,6 +21,7 @@ use tokio::sync::mpsc; use tracing::*; use utils::lsn::Lsn; +pub use tenants_state::try_send_timeline_update; use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId}; mod tenants_state { @@ -68,7 +69,7 @@ mod tenants_state { Ok(()) } - pub(super) fn try_send_timeline_update(update: LocalTimelineUpdate) { + pub fn try_send_timeline_update(update: LocalTimelineUpdate) { match TIMELINE_UPDATE_SENDER .read() .expect("Failed to read() timeline_update_sender lock, it got poisoned") @@ -466,12 +467,6 @@ fn load_local_timeline( format!("Inmem timeline {timeline_id} not found in tenant's repository") })?; inmem_timeline.init_logical_size()?; - - tenants_state::try_send_timeline_update(LocalTimelineUpdate::Attach { - id: ZTenantTimelineId::new(repo.tenant_id(), timeline_id), - datadir: Arc::clone(&inmem_timeline), - }); - Ok(inmem_timeline) } From 777930898580110e8800c3e94b41aceea27e6063 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 19 Aug 2022 17:59:06 +0300 Subject: [PATCH 19/63] Ensure timeline logical size is initialized once --- pageserver/src/layered_repository.rs | 18 +++++++------- pageserver/src/layered_repository/timeline.rs | 12 ++++++++++ pageserver/src/tenant_mgr.rs | 24 +++++++++++++------ pageserver/src/walreceiver.rs | 4 ++-- 4 files changed, 41 insertions(+), 17 deletions(-) diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index d67b1b0130..dd173498b9 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -205,7 +205,7 @@ impl Repository { crate::tenant_mgr::try_send_timeline_update(LocalTimelineUpdate::Attach { id: ZTenantTimelineId::new(self.tenant_id(), timeline_id), - datadir: Arc::clone(&timeline), + timeline: Arc::clone(&timeline), }); Ok(timeline) @@ -572,8 +572,7 @@ impl Repository { } }; debug!( - "timeline {} found on a local disk, but not loaded into the memory, loading", - &timeline_id + "timeline {timeline_id} found on a local disk, but not loaded into the memory, loading" ); let timeline = self.load_local_timeline(timeline_id, timelines)?; let was_loaded = timelines.insert( @@ -585,10 +584,6 @@ impl Repository { || matches!(was_loaded, Some(LayeredTimelineEntry::Unloaded { .. })), "assertion failure, inserted wrong timeline in an incorrect state" ); - crate::tenant_mgr::try_send_timeline_update(LocalTimelineUpdate::Attach { - id: ZTenantTimelineId::new(self.tenant_id(), timeline_id), - datadir: Arc::clone(&timeline), - }); Ok(Some(timeline)) } @@ -627,7 +622,14 @@ impl Repository { .load_layer_map(disk_consistent_lsn) .context("failed to load layermap")?; - Ok(Arc::new(timeline)) + let timeline = Arc::new(timeline); + + crate::tenant_mgr::try_send_timeline_update(LocalTimelineUpdate::Attach { + id: ZTenantTimelineId::new(self.tenant_id(), timeline_id), + timeline: Arc::clone(&timeline), + }); + + Ok(timeline) } pub fn new( diff --git a/pageserver/src/layered_repository/timeline.rs b/pageserver/src/layered_repository/timeline.rs index 7bbde53dbd..fb5a4d0b83 100644 --- a/pageserver/src/layered_repository/timeline.rs +++ b/pageserver/src/layered_repository/timeline.rs @@ -412,6 +412,11 @@ pub struct Timeline { /// and `set_current_logical_size` functions to modify this, they will /// also keep the prometheus metric in sync. current_logical_size: AtomicI64, + // TODO we don't have a good, API to ensure on a compilation level + // that the timeline passes all initialization. + // Hence we ensure that we init at least once for every timeline + // and keep this flag to avoid potentually long recomputes. + logical_size_initialized: AtomicBool, /// Information about the last processed message by the WAL receiver, /// or None if WAL receiver has not received anything for this timeline @@ -731,6 +736,7 @@ impl Timeline { initdb_lsn: metadata.initdb_lsn(), current_logical_size: AtomicI64::new(0), + logical_size_initialized: AtomicBool::new(false), partitioning: Mutex::new((KeyPartitioning::new(), Lsn(0))), repartition_threshold: 0, @@ -835,6 +841,10 @@ impl Timeline { /// /// This can be a slow operation. pub fn init_logical_size(&self) -> Result<()> { + if self.logical_size_initialized.load(AtomicOrdering::Acquire) { + return Ok(()); + } + // Try a fast-path first: // Copy logical size from ancestor timeline if there has been no changes on this // branch, and no changes on the ancestor branch since the branch point. @@ -907,6 +917,8 @@ impl Timeline { fn set_current_logical_size(&self, new_size: u64) { self.current_logical_size .store(new_size as i64, AtomicOrdering::SeqCst); + self.logical_size_initialized + .store(true, AtomicOrdering::SeqCst); // Also set the value in the prometheus gauge. Same race condition // here as in `update_current_logical_size`. diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index f5b4308067..921d973a41 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -172,15 +172,15 @@ pub enum LocalTimelineUpdate { }, Attach { id: ZTenantTimelineId, - datadir: Arc, + timeline: Arc, }, } impl std::fmt::Debug for LocalTimelineUpdate { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Detach { id, .. } => f.debug_tuple("Remove").field(id).finish(), - Self::Attach { id, .. } => f.debug_tuple("Add").field(id).finish(), + Self::Detach { id, .. } => f.debug_tuple("Detach").field(id).finish(), + Self::Attach { id, .. } => f.debug_tuple("Attach").field(id).finish(), } } } @@ -376,7 +376,10 @@ pub fn get_local_timeline_with_load( ) -> anyhow::Result> { let repository = get_repository_for_tenant(tenant_id)?; match repository.get_timeline(timeline_id) { - Some(RepositoryTimeline::Loaded(loaded_timeline)) => Ok(loaded_timeline), + Some(RepositoryTimeline::Loaded(loaded_timeline)) => { + loaded_timeline.init_logical_size()?; + Ok(loaded_timeline) + } _ => load_local_timeline(&repository, timeline_id) .with_context(|| format!("Failed to load local timeline for tenant {tenant_id}")), } @@ -435,13 +438,17 @@ pub fn detach_tenant(conf: &'static PageServerConf, tenant_id: ZTenantId) -> any // wait for wal receivers to stop without holding the lock, because walreceiver // will attempt to change tenant state which is protected by the same global tenants lock. + // TODO do we need a timeout here? how to handle it? // recv_timeout is broken: https://github.com/rust-lang/rust/issues/94518#issuecomment-1057440631 + // need to use crossbeam-channel for (timeline_id, join_handle) in walreceiver_join_handles { info!("waiting for wal receiver to shutdown timeline_id {timeline_id}"); join_handle.recv().context("failed to join walreceiver")?; info!("wal receiver shutdown confirmed timeline_id {timeline_id}"); } + tenants_state::write_tenants().remove(&tenant_id); + // If removal fails there will be no way to successfully retry detach, // because tenant no longer exists in in memory map. And it needs to be removed from it // before we remove files because it contains references to repository @@ -561,12 +568,15 @@ fn attach_downloaded_tenant( repo: &Repository, downloaded_timelines: HashSet, ) -> anyhow::Result<()> { - for timeline_id in downloaded_timelines { - // first, register timeline metadata + // first, register timeline metadata to ensure ancestors will be found later during layer load + for &timeline_id in &downloaded_timelines { repo.attach_timeline(timeline_id).with_context(|| { format!("Failed to load timeline {timeline_id} into in-memory repository") })?; - // and then load its layers in memory + } + + // and then load its layers in memory + for timeline_id in downloaded_timelines { let _ = load_local_timeline(repo, timeline_id).with_context(|| { format!( "Failed to register add local timeline for tenant {}", diff --git a/pageserver/src/walreceiver.rs b/pageserver/src/walreceiver.rs index 8a466a8a67..d6420e1d18 100644 --- a/pageserver/src/walreceiver.rs +++ b/pageserver/src/walreceiver.rs @@ -269,7 +269,7 @@ async fn wal_receiver_main_thread_loop_step<'a>( } } // Timeline got attached, retrieve all necessary information to start its broker loop and maintain this loop endlessly. - LocalTimelineUpdate::Attach { id, datadir } => { + LocalTimelineUpdate::Attach { id, timeline } => { let timeline_connection_managers = local_timeline_wal_receivers .entry(id.tenant_id) .or_default(); @@ -305,7 +305,7 @@ async fn wal_receiver_main_thread_loop_step<'a>( id, broker_prefix.to_owned(), etcd_client.clone(), - datadir, + timeline, wal_connect_timeout, lagging_wal_timeout, max_lsn_wal_lag, From 277f2d6d3d55f6e6391997a9c163193eb1d1a964 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Mon, 22 Aug 2022 11:21:50 +0100 Subject: [PATCH 20/63] Report test results to Allure (#2229) --- .github/actions/allure-report/action.yml | 219 ++++++ .../actions/run-python-test-set/action.yml | 10 + .github/workflows/build_and_test.yml | 23 + poetry.lock | 718 +++++++++--------- pyproject.toml | 1 + test_runner/fixtures/neon_fixtures.py | 33 +- 6 files changed, 661 insertions(+), 343 deletions(-) create mode 100644 .github/actions/allure-report/action.yml diff --git a/.github/actions/allure-report/action.yml b/.github/actions/allure-report/action.yml new file mode 100644 index 0000000000..2e52bd7695 --- /dev/null +++ b/.github/actions/allure-report/action.yml @@ -0,0 +1,219 @@ +name: 'Create Allure report' +description: 'Create and publish Allure report' + +inputs: + action: + desctiption: 'generate or store' + required: true + build_type: + description: '`build_type` from run-python-test-set action' + required: true + test_selection: + description: '`test_selector` from run-python-test-set action' + required: false + +runs: + using: "composite" + steps: + - name: Validate input parameters + shell: bash -euxo pipefail {0} + run: | + if [ "${{ inputs.action }}" != "store"] && [ "${{ inputs.action }}" != "generate" ]; then + echo 2>&1 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only" + exit 1 + fi + + if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then + echo 2>&1 "inputs.test_selection must be set for 'store' action" + exit 2 + fi + + - name: Calculate key + id: calculate-key + shell: bash -euxo pipefail {0} + run: | + # TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key + + pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true) + if [ "${pr_number}" != "null" ]; then + key=pr-${pr_number} + elif [ "${GITHUB_REF}" = "refs/heads/main" ]; then + # Shortcut for a special branch + key=main + else + key=branch-$(echo ${GITHUB_REF#refs/heads/} | tr -cd "[:alnum:]._-") + fi + echo "::set-output name=KEY::${key}" + + - uses: actions/setup-java@v3 + if: ${{ inputs.action == 'generate' }} + with: + distribution: 'temurin' + java-version: '17' + + - name: Install Allure + if: ${{ inputs.action == 'generate' }} + shell: bash -euxo pipefail {0} + run: | + if ! which allure; then + ALLURE_ZIP=allure-${ALLURE_VERSION}.zip + wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP} + echo "${ALLURE_ZIP_MD5} ${ALLURE_ZIP}" | md5sum -c + unzip -q ${ALLURE_ZIP} + echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH + rm -f ${ALLURE_ZIP} + fi + env: + ALLURE_VERSION: 2.19.0 + ALLURE_ZIP_MD5: ced21401a1a8b9dfb68cee9e4c210464 + + - name: Upload Allure results + if: ${{ inputs.action == 'store' }} + env: + REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }} + RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }} + TEST_OUTPUT: /tmp/test_output + BUCKET: neon-github-public-dev + shell: bash -euxo pipefail {0} + run: | + # Add metadata + cat < $TEST_OUTPUT/allure/results/executor.json + { + "name": "GitHub Actions", + "type": "github", + "url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html", + "buildOrder": ${GITHUB_RUN_ID}, + "buildName": "GitHub Actions Run #${{ github.run_number }}/${GITHUB_RUN_ATTEMPT}", + "buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}", + "reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html", + "reportName": "Allure Report" + } + EOF + cat < $TEST_OUTPUT/allure/results/environment.properties + TEST_SELECTION=${{ inputs.test_selection }} + BUILD_TYPE=${{ inputs.build_type }} + EOF + + ARCHIVE="${GITHUB_RUN_ID}-${{ inputs.test_selection }}-${GITHUB_RUN_ATTEMPT}.tar.zst" + ZSTD_NBTHREADS=0 + + tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd . + aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}" + + # Potentially we could have several running build for the same key (for example for the main branch), so we use improvised lock for this + - name: Acquire Allure lock + if: ${{ inputs.action == 'generate' }} + shell: bash -euxo pipefail {0} + env: + LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt + BUCKET: neon-github-public-dev + run: | + LOCK_TIMEOUT=300 # seconds + + for _ in $(seq 1 5); do + for i in $(seq 1 ${LOCK_TIMEOUT}); do + LOCK_ADDED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true) + # `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS) + if [ -z "${LOCK_ADDED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ADDED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then + break + fi + sleep 1 + done + echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" > lock.txt + aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}" + + # A double-check that exactly WE have acquired the lock + aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt + if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then + break + fi + done + + - name: Generate and publish final Allure report + if: ${{ inputs.action == 'generate' }} + id: generate-report + env: + REPORT_PREFIX: reports/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }} + RAW_PREFIX: reports-raw/${{ steps.calculate-key.outputs.KEY }}/${{ inputs.build_type }} + TEST_OUTPUT: /tmp/test_output + BUCKET: neon-github-public-dev + shell: bash -euxo pipefail {0} + run: | + # Get previously uploaded data for this run + ZSTD_NBTHREADS=0 + + s3_filepaths=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/${GITHUB_RUN_ID}- | jq --raw-output '.Contents[].Key') + if [ -z "$s3_filepaths" ]; then + # There's no previously uploaded data for this run + exit 0 + fi + for s3_filepath in ${s3_filepaths}; do + aws s3 cp --only-show-errors "s3://${BUCKET}/${s3_filepath}" "${TEST_OUTPUT}/allure/" + + archive=${TEST_OUTPUT}/allure/$(basename $s3_filepath) + mkdir -p ${archive%.tar.zst} + tar -xf ${archive} -C ${archive%.tar.zst} + rm -f ${archive} + done + + # Get history trend + aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${TEST_OUTPUT}/allure/latest/history" || true + + # Generate report + allure generate --clean --output $TEST_OUTPUT/allure/report $TEST_OUTPUT/allure/* + + # Replace a logo link with a redirect to the latest version of the report + sed -i 's| ./index.html + + + + Redirecting to ${REPORT_URL} + + EOF + aws s3 cp --only-show-errors ./index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html" + + echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY} + echo "::set-output name=REPORT_URL::${REPORT_URL}" + + - name: Release Allure lock + if: ${{ inputs.action == 'generate' && always() }} + shell: bash -euxo pipefail {0} + env: + LOCK_FILE: reports/${{ steps.calculate-key.outputs.KEY }}/lock.txt + BUCKET: neon-github-public-dev + run: | + aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0 + + if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${{ inputs.test_selection }}" ]; then + aws s3 rm "s3://${BUCKET}/${LOCK_FILE}" + fi + + - uses: actions/github-script@v6 + if: ${{ inputs.action == 'generate' && always() }} + env: + REPORT_URL: ${{ steps.generate-report.outputs.REPORT_URL }} + BUILD_TYPE: ${{ inputs.build_type }} + SHA: ${{ github.event.pull_request.head.sha || github.sha }} + with: + script: | + const { REPORT_URL, BUILD_TYPE, SHA } = process.env + + result = await github.rest.repos.createCommitStatus({ + owner: context.repo.owner, + repo: context.repo.repo, + sha: `${SHA}`, + state: 'success', + target_url: `${REPORT_URL}`, + context: `Allure report / ${BUILD_TYPE}`, + }) + + console.log(result); diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 3900f93ee4..22447025cb 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -131,8 +131,10 @@ runs: # -n4 uses four processes to run tests via pytest-xdist # -s is not used to prevent pytest from capturing output, because tests are running # in parallel and logs are mixed between different tests + mkdir -p $TEST_OUTPUT/allure/results "${cov_prefix[@]}" ./scripts/pytest \ --junitxml=$TEST_OUTPUT/junit.xml \ + --alluredir=$TEST_OUTPUT/allure/results \ --tb=short \ --verbose \ -m "not remote_cluster" \ @@ -146,6 +148,14 @@ runs: fi fi + - name: Upload Allure results + if: ${{ always() && (inputs.test_selection == 'batch_others' || inputs.test_selection == 'batch_pg_regress') }} + uses: ./.github/actions/allure-report + with: + action: store + build_type: ${{ inputs.build_type }} + test_selection: ${{ inputs.test_selection }} + - name: Delete all data but logs shell: bash -euxo pipefail {0} if: always() diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 4cabd3d672..dab34c84bc 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -298,6 +298,29 @@ jobs: # XXX: no coverage data handling here, since benchmarks are run on release builds, # while coverage is currently collected for the debug ones + merge-allure-report: + runs-on: dev + container: + image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned + options: --init + needs: [ other-tests, pg_regress-tests ] + if: always() + strategy: + fail-fast: false + matrix: + build_type: [ debug, release ] + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: false + + - name: Merge and Allure results + uses: ./.github/actions/allure-report + with: + action: generate + build_type: ${{ matrix.build_type }} + coverage-report: runs-on: dev container: diff --git a/poetry.lock b/poetry.lock index 6ab6bb0e20..17b59852f4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -13,6 +13,32 @@ psycopg2-binary = ">=2.8.4" [package.extras] sa = ["sqlalchemy[postgresql_psycopg2binary] (>=1.3,<1.5)"] +[[package]] +name = "allure-pytest" +version = "2.9.45" +description = "Allure pytest integration" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +allure-python-commons = "2.9.45" +pytest = ">=4.5.0" +six = ">=1.9.0" + +[[package]] +name = "allure-python-commons" +version = "2.9.45" +description = "Common module for integrate allure with python-based frameworks" +category = "main" +optional = false +python-versions = ">=3.5" + +[package.dependencies] +attrs = ">=16.0.0" +pluggy = ">=0.4.0" +six = ">=1.9.0" + [[package]] name = "async-timeout" version = "4.0.2" @@ -109,8 +135,8 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "boto3-stubs" -version = "1.24.46" -description = "Type annotations for boto3 1.24.46 generated with mypy-boto3-builder 7.11.3" +version = "1.24.51" +description = "Type annotations for boto3 1.24.51 generated with mypy-boto3-builder 7.11.6" category = "main" optional = false python-versions = ">=3.7" @@ -122,319 +148,321 @@ types-s3transfer = "*" typing-extensions = ">=4.1.0" [package.extras] -accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.24.0,<1.25.0)"] -account = ["mypy-boto3-account (>=1.24.0,<1.25.0)"] -acm = ["mypy-boto3-acm (>=1.24.0,<1.25.0)"] -acm-pca = ["mypy-boto3-acm-pca (>=1.24.0,<1.25.0)"] -alexaforbusiness = ["mypy-boto3-alexaforbusiness (>=1.24.0,<1.25.0)"] -all = ["mypy-boto3-accessanalyzer (>=1.24.0,<1.25.0)", "mypy-boto3-account (>=1.24.0,<1.25.0)", "mypy-boto3-acm (>=1.24.0,<1.25.0)", "mypy-boto3-acm-pca (>=1.24.0,<1.25.0)", "mypy-boto3-alexaforbusiness (>=1.24.0,<1.25.0)", "mypy-boto3-amp (>=1.24.0,<1.25.0)", "mypy-boto3-amplify (>=1.24.0,<1.25.0)", "mypy-boto3-amplifybackend (>=1.24.0,<1.25.0)", "mypy-boto3-amplifyuibuilder (>=1.24.0,<1.25.0)", "mypy-boto3-apigateway (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewaymanagementapi (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewayv2 (>=1.24.0,<1.25.0)", "mypy-boto3-appconfig (>=1.24.0,<1.25.0)", "mypy-boto3-appconfigdata (>=1.24.0,<1.25.0)", "mypy-boto3-appflow (>=1.24.0,<1.25.0)", "mypy-boto3-appintegrations (>=1.24.0,<1.25.0)", "mypy-boto3-application-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-application-insights (>=1.24.0,<1.25.0)", "mypy-boto3-applicationcostprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-appmesh (>=1.24.0,<1.25.0)", "mypy-boto3-apprunner (>=1.24.0,<1.25.0)", "mypy-boto3-appstream (>=1.24.0,<1.25.0)", "mypy-boto3-appsync (>=1.24.0,<1.25.0)", "mypy-boto3-athena (>=1.24.0,<1.25.0)", "mypy-boto3-auditmanager (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling-plans (>=1.24.0,<1.25.0)", "mypy-boto3-backup (>=1.24.0,<1.25.0)", "mypy-boto3-backup-gateway (>=1.24.0,<1.25.0)", "mypy-boto3-batch (>=1.24.0,<1.25.0)", "mypy-boto3-billingconductor (>=1.24.0,<1.25.0)", "mypy-boto3-braket (>=1.24.0,<1.25.0)", "mypy-boto3-budgets (>=1.24.0,<1.25.0)", "mypy-boto3-ce (>=1.24.0,<1.25.0)", "mypy-boto3-chime (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-identity (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-meetings (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-messaging (>=1.24.0,<1.25.0)", "mypy-boto3-cloud9 (>=1.24.0,<1.25.0)", "mypy-boto3-cloudcontrol (>=1.24.0,<1.25.0)", "mypy-boto3-clouddirectory (>=1.24.0,<1.25.0)", "mypy-boto3-cloudformation (>=1.24.0,<1.25.0)", "mypy-boto3-cloudfront (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsm (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsmv2 (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearch (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearchdomain (>=1.24.0,<1.25.0)", "mypy-boto3-cloudtrail (>=1.24.0,<1.25.0)", "mypy-boto3-cloudwatch (>=1.24.0,<1.25.0)", "mypy-boto3-codeartifact (>=1.24.0,<1.25.0)", "mypy-boto3-codebuild (>=1.24.0,<1.25.0)", "mypy-boto3-codecommit (>=1.24.0,<1.25.0)", "mypy-boto3-codedeploy (>=1.24.0,<1.25.0)", "mypy-boto3-codeguru-reviewer (>=1.24.0,<1.25.0)", "mypy-boto3-codeguruprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-codepipeline (>=1.24.0,<1.25.0)", "mypy-boto3-codestar (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-connections (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-notifications (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-identity (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-idp (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-sync (>=1.24.0,<1.25.0)", "mypy-boto3-comprehend (>=1.24.0,<1.25.0)", "mypy-boto3-comprehendmedical (>=1.24.0,<1.25.0)", "mypy-boto3-compute-optimizer (>=1.24.0,<1.25.0)", "mypy-boto3-config (>=1.24.0,<1.25.0)", "mypy-boto3-connect (>=1.24.0,<1.25.0)", "mypy-boto3-connect-contact-lens (>=1.24.0,<1.25.0)", "mypy-boto3-connectcampaigns (>=1.24.0,<1.25.0)", "mypy-boto3-connectparticipant (>=1.24.0,<1.25.0)", "mypy-boto3-cur (>=1.24.0,<1.25.0)", "mypy-boto3-customer-profiles (>=1.24.0,<1.25.0)", "mypy-boto3-databrew (>=1.24.0,<1.25.0)", "mypy-boto3-dataexchange (>=1.24.0,<1.25.0)", "mypy-boto3-datapipeline (>=1.24.0,<1.25.0)", "mypy-boto3-datasync (>=1.24.0,<1.25.0)", "mypy-boto3-dax (>=1.24.0,<1.25.0)", "mypy-boto3-detective (>=1.24.0,<1.25.0)", "mypy-boto3-devicefarm (>=1.24.0,<1.25.0)", "mypy-boto3-devops-guru (>=1.24.0,<1.25.0)", "mypy-boto3-directconnect (>=1.24.0,<1.25.0)", "mypy-boto3-discovery (>=1.24.0,<1.25.0)", "mypy-boto3-dlm (>=1.24.0,<1.25.0)", "mypy-boto3-dms (>=1.24.0,<1.25.0)", "mypy-boto3-docdb (>=1.24.0,<1.25.0)", "mypy-boto3-drs (>=1.24.0,<1.25.0)", "mypy-boto3-ds (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodb (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodbstreams (>=1.24.0,<1.25.0)", "mypy-boto3-ebs (>=1.24.0,<1.25.0)", "mypy-boto3-ec2 (>=1.24.0,<1.25.0)", "mypy-boto3-ec2-instance-connect (>=1.24.0,<1.25.0)", "mypy-boto3-ecr (>=1.24.0,<1.25.0)", "mypy-boto3-ecr-public (>=1.24.0,<1.25.0)", "mypy-boto3-ecs (>=1.24.0,<1.25.0)", "mypy-boto3-efs (>=1.24.0,<1.25.0)", "mypy-boto3-eks (>=1.24.0,<1.25.0)", "mypy-boto3-elastic-inference (>=1.24.0,<1.25.0)", "mypy-boto3-elasticache (>=1.24.0,<1.25.0)", "mypy-boto3-elasticbeanstalk (>=1.24.0,<1.25.0)", "mypy-boto3-elastictranscoder (>=1.24.0,<1.25.0)", "mypy-boto3-elb (>=1.24.0,<1.25.0)", "mypy-boto3-elbv2 (>=1.24.0,<1.25.0)", "mypy-boto3-emr (>=1.24.0,<1.25.0)", "mypy-boto3-emr-containers (>=1.24.0,<1.25.0)", "mypy-boto3-emr-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-es (>=1.24.0,<1.25.0)", "mypy-boto3-events (>=1.24.0,<1.25.0)", "mypy-boto3-evidently (>=1.24.0,<1.25.0)", "mypy-boto3-finspace (>=1.24.0,<1.25.0)", "mypy-boto3-finspace-data (>=1.24.0,<1.25.0)", "mypy-boto3-firehose (>=1.24.0,<1.25.0)", "mypy-boto3-fis (>=1.24.0,<1.25.0)", "mypy-boto3-fms (>=1.24.0,<1.25.0)", "mypy-boto3-forecast (>=1.24.0,<1.25.0)", "mypy-boto3-forecastquery (>=1.24.0,<1.25.0)", "mypy-boto3-frauddetector (>=1.24.0,<1.25.0)", "mypy-boto3-fsx (>=1.24.0,<1.25.0)", "mypy-boto3-gamelift (>=1.24.0,<1.25.0)", "mypy-boto3-gamesparks (>=1.24.0,<1.25.0)", "mypy-boto3-glacier (>=1.24.0,<1.25.0)", "mypy-boto3-globalaccelerator (>=1.24.0,<1.25.0)", "mypy-boto3-glue (>=1.24.0,<1.25.0)", "mypy-boto3-grafana (>=1.24.0,<1.25.0)", "mypy-boto3-greengrass (>=1.24.0,<1.25.0)", "mypy-boto3-greengrassv2 (>=1.24.0,<1.25.0)", "mypy-boto3-groundstation (>=1.24.0,<1.25.0)", "mypy-boto3-guardduty (>=1.24.0,<1.25.0)", "mypy-boto3-health (>=1.24.0,<1.25.0)", "mypy-boto3-healthlake (>=1.24.0,<1.25.0)", "mypy-boto3-honeycode (>=1.24.0,<1.25.0)", "mypy-boto3-iam (>=1.24.0,<1.25.0)", "mypy-boto3-identitystore (>=1.24.0,<1.25.0)", "mypy-boto3-imagebuilder (>=1.24.0,<1.25.0)", "mypy-boto3-importexport (>=1.24.0,<1.25.0)", "mypy-boto3-inspector (>=1.24.0,<1.25.0)", "mypy-boto3-inspector2 (>=1.24.0,<1.25.0)", "mypy-boto3-iot (>=1.24.0,<1.25.0)", "mypy-boto3-iot-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot-jobs-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-devices (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-projects (>=1.24.0,<1.25.0)", "mypy-boto3-iotanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-iotdeviceadvisor (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents-data (>=1.24.0,<1.25.0)", "mypy-boto3-iotfleethub (>=1.24.0,<1.25.0)", "mypy-boto3-iotsecuretunneling (>=1.24.0,<1.25.0)", "mypy-boto3-iotsitewise (>=1.24.0,<1.25.0)", "mypy-boto3-iotthingsgraph (>=1.24.0,<1.25.0)", "mypy-boto3-iottwinmaker (>=1.24.0,<1.25.0)", "mypy-boto3-iotwireless (>=1.24.0,<1.25.0)", "mypy-boto3-ivs (>=1.24.0,<1.25.0)", "mypy-boto3-ivschat (>=1.24.0,<1.25.0)", "mypy-boto3-kafka (>=1.24.0,<1.25.0)", "mypy-boto3-kafkaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-kendra (>=1.24.0,<1.25.0)", "mypy-boto3-keyspaces (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-archived-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-signaling (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisvideo (>=1.24.0,<1.25.0)", "mypy-boto3-kms (>=1.24.0,<1.25.0)", "mypy-boto3-lakeformation (>=1.24.0,<1.25.0)", "mypy-boto3-lambda (>=1.24.0,<1.25.0)", "mypy-boto3-lex-models (>=1.24.0,<1.25.0)", "mypy-boto3-lex-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-models (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.24.0,<1.25.0)", "mypy-boto3-lightsail (>=1.24.0,<1.25.0)", "mypy-boto3-location (>=1.24.0,<1.25.0)", "mypy-boto3-logs (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutequipment (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutmetrics (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutvision (>=1.24.0,<1.25.0)", "mypy-boto3-m2 (>=1.24.0,<1.25.0)", "mypy-boto3-machinelearning (>=1.24.0,<1.25.0)", "mypy-boto3-macie (>=1.24.0,<1.25.0)", "mypy-boto3-macie2 (>=1.24.0,<1.25.0)", "mypy-boto3-managedblockchain (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-catalog (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-entitlement (>=1.24.0,<1.25.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconvert (>=1.24.0,<1.25.0)", "mypy-boto3-medialive (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage-vod (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore-data (>=1.24.0,<1.25.0)", "mypy-boto3-mediatailor (>=1.24.0,<1.25.0)", "mypy-boto3-memorydb (>=1.24.0,<1.25.0)", "mypy-boto3-meteringmarketplace (>=1.24.0,<1.25.0)", "mypy-boto3-mgh (>=1.24.0,<1.25.0)", "mypy-boto3-mgn (>=1.24.0,<1.25.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhub-config (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhubstrategy (>=1.24.0,<1.25.0)", "mypy-boto3-mobile (>=1.24.0,<1.25.0)", "mypy-boto3-mq (>=1.24.0,<1.25.0)", "mypy-boto3-mturk (>=1.24.0,<1.25.0)", "mypy-boto3-mwaa (>=1.24.0,<1.25.0)", "mypy-boto3-neptune (>=1.24.0,<1.25.0)", "mypy-boto3-network-firewall (>=1.24.0,<1.25.0)", "mypy-boto3-networkmanager (>=1.24.0,<1.25.0)", "mypy-boto3-nimble (>=1.24.0,<1.25.0)", "mypy-boto3-opensearch (>=1.24.0,<1.25.0)", "mypy-boto3-opsworks (>=1.24.0,<1.25.0)", "mypy-boto3-opsworkscm (>=1.24.0,<1.25.0)", "mypy-boto3-organizations (>=1.24.0,<1.25.0)", "mypy-boto3-outposts (>=1.24.0,<1.25.0)", "mypy-boto3-panorama (>=1.24.0,<1.25.0)", "mypy-boto3-personalize (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-events (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-pi (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-email (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.24.0,<1.25.0)", "mypy-boto3-polly (>=1.24.0,<1.25.0)", "mypy-boto3-pricing (>=1.24.0,<1.25.0)", "mypy-boto3-proton (>=1.24.0,<1.25.0)", "mypy-boto3-qldb (>=1.24.0,<1.25.0)", "mypy-boto3-qldb-session (>=1.24.0,<1.25.0)", "mypy-boto3-quicksight (>=1.24.0,<1.25.0)", "mypy-boto3-ram (>=1.24.0,<1.25.0)", "mypy-boto3-rbin (>=1.24.0,<1.25.0)", "mypy-boto3-rds (>=1.24.0,<1.25.0)", "mypy-boto3-rds-data (>=1.24.0,<1.25.0)", "mypy-boto3-redshift (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-data (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-rekognition (>=1.24.0,<1.25.0)", "mypy-boto3-resiliencehub (>=1.24.0,<1.25.0)", "mypy-boto3-resource-groups (>=1.24.0,<1.25.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.24.0,<1.25.0)", "mypy-boto3-robomaker (>=1.24.0,<1.25.0)", "mypy-boto3-rolesanywhere (>=1.24.0,<1.25.0)", "mypy-boto3-route53 (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-cluster (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-control-config (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-readiness (>=1.24.0,<1.25.0)", "mypy-boto3-route53domains (>=1.24.0,<1.25.0)", "mypy-boto3-route53resolver (>=1.24.0,<1.25.0)", "mypy-boto3-rum (>=1.24.0,<1.25.0)", "mypy-boto3-s3 (>=1.24.0,<1.25.0)", "mypy-boto3-s3control (>=1.24.0,<1.25.0)", "mypy-boto3-s3outposts (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-edge (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-savingsplans (>=1.24.0,<1.25.0)", "mypy-boto3-schemas (>=1.24.0,<1.25.0)", "mypy-boto3-sdb (>=1.24.0,<1.25.0)", "mypy-boto3-secretsmanager (>=1.24.0,<1.25.0)", "mypy-boto3-securityhub (>=1.24.0,<1.25.0)", "mypy-boto3-serverlessrepo (>=1.24.0,<1.25.0)", "mypy-boto3-service-quotas (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog-appregistry (>=1.24.0,<1.25.0)", "mypy-boto3-servicediscovery (>=1.24.0,<1.25.0)", "mypy-boto3-ses (>=1.24.0,<1.25.0)", "mypy-boto3-sesv2 (>=1.24.0,<1.25.0)", "mypy-boto3-shield (>=1.24.0,<1.25.0)", "mypy-boto3-signer (>=1.24.0,<1.25.0)", "mypy-boto3-sms (>=1.24.0,<1.25.0)", "mypy-boto3-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-snow-device-management (>=1.24.0,<1.25.0)", "mypy-boto3-snowball (>=1.24.0,<1.25.0)", "mypy-boto3-sns (>=1.24.0,<1.25.0)", "mypy-boto3-sqs (>=1.24.0,<1.25.0)", "mypy-boto3-ssm (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-contacts (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-incidents (>=1.24.0,<1.25.0)", "mypy-boto3-sso (>=1.24.0,<1.25.0)", "mypy-boto3-sso-admin (>=1.24.0,<1.25.0)", "mypy-boto3-sso-oidc (>=1.24.0,<1.25.0)", "mypy-boto3-stepfunctions (>=1.24.0,<1.25.0)", "mypy-boto3-storagegateway (>=1.24.0,<1.25.0)", "mypy-boto3-sts (>=1.24.0,<1.25.0)", "mypy-boto3-support (>=1.24.0,<1.25.0)", "mypy-boto3-swf (>=1.24.0,<1.25.0)", "mypy-boto3-synthetics (>=1.24.0,<1.25.0)", "mypy-boto3-textract (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-query (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-write (>=1.24.0,<1.25.0)", "mypy-boto3-transcribe (>=1.24.0,<1.25.0)", "mypy-boto3-transfer (>=1.24.0,<1.25.0)", "mypy-boto3-translate (>=1.24.0,<1.25.0)", "mypy-boto3-voice-id (>=1.24.0,<1.25.0)", "mypy-boto3-waf (>=1.24.0,<1.25.0)", "mypy-boto3-waf-regional (>=1.24.0,<1.25.0)", "mypy-boto3-wafv2 (>=1.24.0,<1.25.0)", "mypy-boto3-wellarchitected (>=1.24.0,<1.25.0)", "mypy-boto3-wisdom (>=1.24.0,<1.25.0)", "mypy-boto3-workdocs (>=1.24.0,<1.25.0)", "mypy-boto3-worklink (>=1.24.0,<1.25.0)", "mypy-boto3-workmail (>=1.24.0,<1.25.0)", "mypy-boto3-workmailmessageflow (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces-web (>=1.24.0,<1.25.0)", "mypy-boto3-xray (>=1.24.0,<1.25.0)"] -amp = ["mypy-boto3-amp (>=1.24.0,<1.25.0)"] -amplify = ["mypy-boto3-amplify (>=1.24.0,<1.25.0)"] -amplifybackend = ["mypy-boto3-amplifybackend (>=1.24.0,<1.25.0)"] -amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.24.0,<1.25.0)"] -apigateway = ["mypy-boto3-apigateway (>=1.24.0,<1.25.0)"] -apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.24.0,<1.25.0)"] -apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.24.0,<1.25.0)"] -appconfig = ["mypy-boto3-appconfig (>=1.24.0,<1.25.0)"] -appconfigdata = ["mypy-boto3-appconfigdata (>=1.24.0,<1.25.0)"] -appflow = ["mypy-boto3-appflow (>=1.24.0,<1.25.0)"] -appintegrations = ["mypy-boto3-appintegrations (>=1.24.0,<1.25.0)"] -application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.24.0,<1.25.0)"] -application-insights = ["mypy-boto3-application-insights (>=1.24.0,<1.25.0)"] -applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.24.0,<1.25.0)"] -appmesh = ["mypy-boto3-appmesh (>=1.24.0,<1.25.0)"] -apprunner = ["mypy-boto3-apprunner (>=1.24.0,<1.25.0)"] -appstream = ["mypy-boto3-appstream (>=1.24.0,<1.25.0)"] -appsync = ["mypy-boto3-appsync (>=1.24.0,<1.25.0)"] -athena = ["mypy-boto3-athena (>=1.24.0,<1.25.0)"] -auditmanager = ["mypy-boto3-auditmanager (>=1.24.0,<1.25.0)"] -autoscaling = ["mypy-boto3-autoscaling (>=1.24.0,<1.25.0)"] -autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.24.0,<1.25.0)"] -backup = ["mypy-boto3-backup (>=1.24.0,<1.25.0)"] -backup-gateway = ["mypy-boto3-backup-gateway (>=1.24.0,<1.25.0)"] -batch = ["mypy-boto3-batch (>=1.24.0,<1.25.0)"] -billingconductor = ["mypy-boto3-billingconductor (>=1.24.0,<1.25.0)"] -braket = ["mypy-boto3-braket (>=1.24.0,<1.25.0)"] -budgets = ["mypy-boto3-budgets (>=1.24.0,<1.25.0)"] -ce = ["mypy-boto3-ce (>=1.24.0,<1.25.0)"] -chime = ["mypy-boto3-chime (>=1.24.0,<1.25.0)"] -chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.24.0,<1.25.0)"] -chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.24.0,<1.25.0)"] -chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.24.0,<1.25.0)"] -chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.24.0,<1.25.0)"] -cloud9 = ["mypy-boto3-cloud9 (>=1.24.0,<1.25.0)"] -cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.24.0,<1.25.0)"] -clouddirectory = ["mypy-boto3-clouddirectory (>=1.24.0,<1.25.0)"] -cloudformation = ["mypy-boto3-cloudformation (>=1.24.0,<1.25.0)"] -cloudfront = ["mypy-boto3-cloudfront (>=1.24.0,<1.25.0)"] -cloudhsm = ["mypy-boto3-cloudhsm (>=1.24.0,<1.25.0)"] -cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.24.0,<1.25.0)"] -cloudsearch = ["mypy-boto3-cloudsearch (>=1.24.0,<1.25.0)"] -cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.24.0,<1.25.0)"] -cloudtrail = ["mypy-boto3-cloudtrail (>=1.24.0,<1.25.0)"] -cloudwatch = ["mypy-boto3-cloudwatch (>=1.24.0,<1.25.0)"] -codeartifact = ["mypy-boto3-codeartifact (>=1.24.0,<1.25.0)"] -codebuild = ["mypy-boto3-codebuild (>=1.24.0,<1.25.0)"] -codecommit = ["mypy-boto3-codecommit (>=1.24.0,<1.25.0)"] -codedeploy = ["mypy-boto3-codedeploy (>=1.24.0,<1.25.0)"] -codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.24.0,<1.25.0)"] -codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.24.0,<1.25.0)"] -codepipeline = ["mypy-boto3-codepipeline (>=1.24.0,<1.25.0)"] -codestar = ["mypy-boto3-codestar (>=1.24.0,<1.25.0)"] -codestar-connections = ["mypy-boto3-codestar-connections (>=1.24.0,<1.25.0)"] -codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.24.0,<1.25.0)"] -cognito-identity = ["mypy-boto3-cognito-identity (>=1.24.0,<1.25.0)"] -cognito-idp = ["mypy-boto3-cognito-idp (>=1.24.0,<1.25.0)"] -cognito-sync = ["mypy-boto3-cognito-sync (>=1.24.0,<1.25.0)"] -comprehend = ["mypy-boto3-comprehend (>=1.24.0,<1.25.0)"] -comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.24.0,<1.25.0)"] -compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.24.0,<1.25.0)"] -config = ["mypy-boto3-config (>=1.24.0,<1.25.0)"] -connect = ["mypy-boto3-connect (>=1.24.0,<1.25.0)"] -connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.24.0,<1.25.0)"] -connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.24.0,<1.25.0)"] -connectparticipant = ["mypy-boto3-connectparticipant (>=1.24.0,<1.25.0)"] -cur = ["mypy-boto3-cur (>=1.24.0,<1.25.0)"] -customer-profiles = ["mypy-boto3-customer-profiles (>=1.24.0,<1.25.0)"] -databrew = ["mypy-boto3-databrew (>=1.24.0,<1.25.0)"] -dataexchange = ["mypy-boto3-dataexchange (>=1.24.0,<1.25.0)"] -datapipeline = ["mypy-boto3-datapipeline (>=1.24.0,<1.25.0)"] -datasync = ["mypy-boto3-datasync (>=1.24.0,<1.25.0)"] -dax = ["mypy-boto3-dax (>=1.24.0,<1.25.0)"] -detective = ["mypy-boto3-detective (>=1.24.0,<1.25.0)"] -devicefarm = ["mypy-boto3-devicefarm (>=1.24.0,<1.25.0)"] -devops-guru = ["mypy-boto3-devops-guru (>=1.24.0,<1.25.0)"] -directconnect = ["mypy-boto3-directconnect (>=1.24.0,<1.25.0)"] -discovery = ["mypy-boto3-discovery (>=1.24.0,<1.25.0)"] -dlm = ["mypy-boto3-dlm (>=1.24.0,<1.25.0)"] -dms = ["mypy-boto3-dms (>=1.24.0,<1.25.0)"] -docdb = ["mypy-boto3-docdb (>=1.24.0,<1.25.0)"] -drs = ["mypy-boto3-drs (>=1.24.0,<1.25.0)"] -ds = ["mypy-boto3-ds (>=1.24.0,<1.25.0)"] -dynamodb = ["mypy-boto3-dynamodb (>=1.24.0,<1.25.0)"] -dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.24.0,<1.25.0)"] -ebs = ["mypy-boto3-ebs (>=1.24.0,<1.25.0)"] -ec2 = ["mypy-boto3-ec2 (>=1.24.0,<1.25.0)"] -ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.24.0,<1.25.0)"] -ecr = ["mypy-boto3-ecr (>=1.24.0,<1.25.0)"] -ecr-public = ["mypy-boto3-ecr-public (>=1.24.0,<1.25.0)"] -ecs = ["mypy-boto3-ecs (>=1.24.0,<1.25.0)"] -efs = ["mypy-boto3-efs (>=1.24.0,<1.25.0)"] -eks = ["mypy-boto3-eks (>=1.24.0,<1.25.0)"] -elastic-inference = ["mypy-boto3-elastic-inference (>=1.24.0,<1.25.0)"] -elasticache = ["mypy-boto3-elasticache (>=1.24.0,<1.25.0)"] -elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.24.0,<1.25.0)"] -elastictranscoder = ["mypy-boto3-elastictranscoder (>=1.24.0,<1.25.0)"] -elb = ["mypy-boto3-elb (>=1.24.0,<1.25.0)"] -elbv2 = ["mypy-boto3-elbv2 (>=1.24.0,<1.25.0)"] -emr = ["mypy-boto3-emr (>=1.24.0,<1.25.0)"] -emr-containers = ["mypy-boto3-emr-containers (>=1.24.0,<1.25.0)"] -emr-serverless = ["mypy-boto3-emr-serverless (>=1.24.0,<1.25.0)"] -es = ["mypy-boto3-es (>=1.24.0,<1.25.0)"] -essential = ["mypy-boto3-cloudformation (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodb (>=1.24.0,<1.25.0)", "mypy-boto3-ec2 (>=1.24.0,<1.25.0)", "mypy-boto3-lambda (>=1.24.0,<1.25.0)", "mypy-boto3-rds (>=1.24.0,<1.25.0)", "mypy-boto3-s3 (>=1.24.0,<1.25.0)", "mypy-boto3-sqs (>=1.24.0,<1.25.0)"] -events = ["mypy-boto3-events (>=1.24.0,<1.25.0)"] -evidently = ["mypy-boto3-evidently (>=1.24.0,<1.25.0)"] -finspace = ["mypy-boto3-finspace (>=1.24.0,<1.25.0)"] -finspace-data = ["mypy-boto3-finspace-data (>=1.24.0,<1.25.0)"] -firehose = ["mypy-boto3-firehose (>=1.24.0,<1.25.0)"] -fis = ["mypy-boto3-fis (>=1.24.0,<1.25.0)"] -fms = ["mypy-boto3-fms (>=1.24.0,<1.25.0)"] -forecast = ["mypy-boto3-forecast (>=1.24.0,<1.25.0)"] -forecastquery = ["mypy-boto3-forecastquery (>=1.24.0,<1.25.0)"] -frauddetector = ["mypy-boto3-frauddetector (>=1.24.0,<1.25.0)"] -fsx = ["mypy-boto3-fsx (>=1.24.0,<1.25.0)"] -gamelift = ["mypy-boto3-gamelift (>=1.24.0,<1.25.0)"] -gamesparks = ["mypy-boto3-gamesparks (>=1.24.0,<1.25.0)"] -glacier = ["mypy-boto3-glacier (>=1.24.0,<1.25.0)"] -globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.24.0,<1.25.0)"] -glue = ["mypy-boto3-glue (>=1.24.0,<1.25.0)"] -grafana = ["mypy-boto3-grafana (>=1.24.0,<1.25.0)"] -greengrass = ["mypy-boto3-greengrass (>=1.24.0,<1.25.0)"] -greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.24.0,<1.25.0)"] -groundstation = ["mypy-boto3-groundstation (>=1.24.0,<1.25.0)"] -guardduty = ["mypy-boto3-guardduty (>=1.24.0,<1.25.0)"] -health = ["mypy-boto3-health (>=1.24.0,<1.25.0)"] -healthlake = ["mypy-boto3-healthlake (>=1.24.0,<1.25.0)"] -honeycode = ["mypy-boto3-honeycode (>=1.24.0,<1.25.0)"] -iam = ["mypy-boto3-iam (>=1.24.0,<1.25.0)"] -identitystore = ["mypy-boto3-identitystore (>=1.24.0,<1.25.0)"] -imagebuilder = ["mypy-boto3-imagebuilder (>=1.24.0,<1.25.0)"] -importexport = ["mypy-boto3-importexport (>=1.24.0,<1.25.0)"] -inspector = ["mypy-boto3-inspector (>=1.24.0,<1.25.0)"] -inspector2 = ["mypy-boto3-inspector2 (>=1.24.0,<1.25.0)"] -iot = ["mypy-boto3-iot (>=1.24.0,<1.25.0)"] -iot-data = ["mypy-boto3-iot-data (>=1.24.0,<1.25.0)"] -iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.24.0,<1.25.0)"] -iot1click-devices = ["mypy-boto3-iot1click-devices (>=1.24.0,<1.25.0)"] -iot1click-projects = ["mypy-boto3-iot1click-projects (>=1.24.0,<1.25.0)"] -iotanalytics = ["mypy-boto3-iotanalytics (>=1.24.0,<1.25.0)"] -iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.24.0,<1.25.0)"] -iotevents = ["mypy-boto3-iotevents (>=1.24.0,<1.25.0)"] -iotevents-data = ["mypy-boto3-iotevents-data (>=1.24.0,<1.25.0)"] -iotfleethub = ["mypy-boto3-iotfleethub (>=1.24.0,<1.25.0)"] -iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.24.0,<1.25.0)"] -iotsitewise = ["mypy-boto3-iotsitewise (>=1.24.0,<1.25.0)"] -iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.24.0,<1.25.0)"] -iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.24.0,<1.25.0)"] -iotwireless = ["mypy-boto3-iotwireless (>=1.24.0,<1.25.0)"] -ivs = ["mypy-boto3-ivs (>=1.24.0,<1.25.0)"] -ivschat = ["mypy-boto3-ivschat (>=1.24.0,<1.25.0)"] -kafka = ["mypy-boto3-kafka (>=1.24.0,<1.25.0)"] -kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.24.0,<1.25.0)"] -kendra = ["mypy-boto3-kendra (>=1.24.0,<1.25.0)"] -keyspaces = ["mypy-boto3-keyspaces (>=1.24.0,<1.25.0)"] -kinesis = ["mypy-boto3-kinesis (>=1.24.0,<1.25.0)"] -kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.24.0,<1.25.0)"] -kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.24.0,<1.25.0)"] -kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.24.0,<1.25.0)"] -kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.24.0,<1.25.0)"] -kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.24.0,<1.25.0)"] -kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.24.0,<1.25.0)"] -kms = ["mypy-boto3-kms (>=1.24.0,<1.25.0)"] -lakeformation = ["mypy-boto3-lakeformation (>=1.24.0,<1.25.0)"] -lambda = ["mypy-boto3-lambda (>=1.24.0,<1.25.0)"] -lex-models = ["mypy-boto3-lex-models (>=1.24.0,<1.25.0)"] -lex-runtime = ["mypy-boto3-lex-runtime (>=1.24.0,<1.25.0)"] -lexv2-models = ["mypy-boto3-lexv2-models (>=1.24.0,<1.25.0)"] -lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.24.0,<1.25.0)"] -license-manager = ["mypy-boto3-license-manager (>=1.24.0,<1.25.0)"] -license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.24.0,<1.25.0)"] -lightsail = ["mypy-boto3-lightsail (>=1.24.0,<1.25.0)"] -location = ["mypy-boto3-location (>=1.24.0,<1.25.0)"] -logs = ["mypy-boto3-logs (>=1.24.0,<1.25.0)"] -lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.24.0,<1.25.0)"] -lookoutmetrics = ["mypy-boto3-lookoutmetrics (>=1.24.0,<1.25.0)"] -lookoutvision = ["mypy-boto3-lookoutvision (>=1.24.0,<1.25.0)"] -m2 = ["mypy-boto3-m2 (>=1.24.0,<1.25.0)"] -machinelearning = ["mypy-boto3-machinelearning (>=1.24.0,<1.25.0)"] -macie = ["mypy-boto3-macie (>=1.24.0,<1.25.0)"] -macie2 = ["mypy-boto3-macie2 (>=1.24.0,<1.25.0)"] -managedblockchain = ["mypy-boto3-managedblockchain (>=1.24.0,<1.25.0)"] -marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.24.0,<1.25.0)"] -marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.24.0,<1.25.0)"] -marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.24.0,<1.25.0)"] -mediaconnect = ["mypy-boto3-mediaconnect (>=1.24.0,<1.25.0)"] -mediaconvert = ["mypy-boto3-mediaconvert (>=1.24.0,<1.25.0)"] -medialive = ["mypy-boto3-medialive (>=1.24.0,<1.25.0)"] -mediapackage = ["mypy-boto3-mediapackage (>=1.24.0,<1.25.0)"] -mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.24.0,<1.25.0)"] -mediastore = ["mypy-boto3-mediastore (>=1.24.0,<1.25.0)"] -mediastore-data = ["mypy-boto3-mediastore-data (>=1.24.0,<1.25.0)"] -mediatailor = ["mypy-boto3-mediatailor (>=1.24.0,<1.25.0)"] -memorydb = ["mypy-boto3-memorydb (>=1.24.0,<1.25.0)"] -meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.24.0,<1.25.0)"] -mgh = ["mypy-boto3-mgh (>=1.24.0,<1.25.0)"] -mgn = ["mypy-boto3-mgn (>=1.24.0,<1.25.0)"] -migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.24.0,<1.25.0)"] -migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.24.0,<1.25.0)"] -migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.24.0,<1.25.0)"] -mobile = ["mypy-boto3-mobile (>=1.24.0,<1.25.0)"] -mq = ["mypy-boto3-mq (>=1.24.0,<1.25.0)"] -mturk = ["mypy-boto3-mturk (>=1.24.0,<1.25.0)"] -mwaa = ["mypy-boto3-mwaa (>=1.24.0,<1.25.0)"] -neptune = ["mypy-boto3-neptune (>=1.24.0,<1.25.0)"] -network-firewall = ["mypy-boto3-network-firewall (>=1.24.0,<1.25.0)"] -networkmanager = ["mypy-boto3-networkmanager (>=1.24.0,<1.25.0)"] -nimble = ["mypy-boto3-nimble (>=1.24.0,<1.25.0)"] -opensearch = ["mypy-boto3-opensearch (>=1.24.0,<1.25.0)"] -opsworks = ["mypy-boto3-opsworks (>=1.24.0,<1.25.0)"] -opsworkscm = ["mypy-boto3-opsworkscm (>=1.24.0,<1.25.0)"] -organizations = ["mypy-boto3-organizations (>=1.24.0,<1.25.0)"] -outposts = ["mypy-boto3-outposts (>=1.24.0,<1.25.0)"] -panorama = ["mypy-boto3-panorama (>=1.24.0,<1.25.0)"] -personalize = ["mypy-boto3-personalize (>=1.24.0,<1.25.0)"] -personalize-events = ["mypy-boto3-personalize-events (>=1.24.0,<1.25.0)"] -personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.24.0,<1.25.0)"] -pi = ["mypy-boto3-pi (>=1.24.0,<1.25.0)"] -pinpoint = ["mypy-boto3-pinpoint (>=1.24.0,<1.25.0)"] -pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.24.0,<1.25.0)"] -pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.24.0,<1.25.0)"] -pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.24.0,<1.25.0)"] -polly = ["mypy-boto3-polly (>=1.24.0,<1.25.0)"] -pricing = ["mypy-boto3-pricing (>=1.24.0,<1.25.0)"] -proton = ["mypy-boto3-proton (>=1.24.0,<1.25.0)"] -qldb = ["mypy-boto3-qldb (>=1.24.0,<1.25.0)"] -qldb-session = ["mypy-boto3-qldb-session (>=1.24.0,<1.25.0)"] -quicksight = ["mypy-boto3-quicksight (>=1.24.0,<1.25.0)"] -ram = ["mypy-boto3-ram (>=1.24.0,<1.25.0)"] -rbin = ["mypy-boto3-rbin (>=1.24.0,<1.25.0)"] -rds = ["mypy-boto3-rds (>=1.24.0,<1.25.0)"] -rds-data = ["mypy-boto3-rds-data (>=1.24.0,<1.25.0)"] -redshift = ["mypy-boto3-redshift (>=1.24.0,<1.25.0)"] -redshift-data = ["mypy-boto3-redshift-data (>=1.24.0,<1.25.0)"] -redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.24.0,<1.25.0)"] -rekognition = ["mypy-boto3-rekognition (>=1.24.0,<1.25.0)"] -resiliencehub = ["mypy-boto3-resiliencehub (>=1.24.0,<1.25.0)"] -resource-groups = ["mypy-boto3-resource-groups (>=1.24.0,<1.25.0)"] -resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.24.0,<1.25.0)"] -robomaker = ["mypy-boto3-robomaker (>=1.24.0,<1.25.0)"] -rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.24.0,<1.25.0)"] -route53 = ["mypy-boto3-route53 (>=1.24.0,<1.25.0)"] -route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.24.0,<1.25.0)"] -route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.24.0,<1.25.0)"] -route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.24.0,<1.25.0)"] -route53domains = ["mypy-boto3-route53domains (>=1.24.0,<1.25.0)"] -route53resolver = ["mypy-boto3-route53resolver (>=1.24.0,<1.25.0)"] -rum = ["mypy-boto3-rum (>=1.24.0,<1.25.0)"] -s3 = ["mypy-boto3-s3 (>=1.24.0,<1.25.0)"] -s3control = ["mypy-boto3-s3control (>=1.24.0,<1.25.0)"] -s3outposts = ["mypy-boto3-s3outposts (>=1.24.0,<1.25.0)"] -sagemaker = ["mypy-boto3-sagemaker (>=1.24.0,<1.25.0)"] -sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.24.0,<1.25.0)"] -sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.24.0,<1.25.0)"] -sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.24.0,<1.25.0)"] -sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.24.0,<1.25.0)"] -savingsplans = ["mypy-boto3-savingsplans (>=1.24.0,<1.25.0)"] -schemas = ["mypy-boto3-schemas (>=1.24.0,<1.25.0)"] -sdb = ["mypy-boto3-sdb (>=1.24.0,<1.25.0)"] -secretsmanager = ["mypy-boto3-secretsmanager (>=1.24.0,<1.25.0)"] -securityhub = ["mypy-boto3-securityhub (>=1.24.0,<1.25.0)"] -serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.24.0,<1.25.0)"] -service-quotas = ["mypy-boto3-service-quotas (>=1.24.0,<1.25.0)"] -servicecatalog = ["mypy-boto3-servicecatalog (>=1.24.0,<1.25.0)"] -servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.24.0,<1.25.0)"] -servicediscovery = ["mypy-boto3-servicediscovery (>=1.24.0,<1.25.0)"] -ses = ["mypy-boto3-ses (>=1.24.0,<1.25.0)"] -sesv2 = ["mypy-boto3-sesv2 (>=1.24.0,<1.25.0)"] -shield = ["mypy-boto3-shield (>=1.24.0,<1.25.0)"] -signer = ["mypy-boto3-signer (>=1.24.0,<1.25.0)"] -sms = ["mypy-boto3-sms (>=1.24.0,<1.25.0)"] -sms-voice = ["mypy-boto3-sms-voice (>=1.24.0,<1.25.0)"] -snow-device-management = ["mypy-boto3-snow-device-management (>=1.24.0,<1.25.0)"] -snowball = ["mypy-boto3-snowball (>=1.24.0,<1.25.0)"] -sns = ["mypy-boto3-sns (>=1.24.0,<1.25.0)"] -sqs = ["mypy-boto3-sqs (>=1.24.0,<1.25.0)"] -ssm = ["mypy-boto3-ssm (>=1.24.0,<1.25.0)"] -ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.24.0,<1.25.0)"] -ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.24.0,<1.25.0)"] -sso = ["mypy-boto3-sso (>=1.24.0,<1.25.0)"] -sso-admin = ["mypy-boto3-sso-admin (>=1.24.0,<1.25.0)"] -sso-oidc = ["mypy-boto3-sso-oidc (>=1.24.0,<1.25.0)"] -stepfunctions = ["mypy-boto3-stepfunctions (>=1.24.0,<1.25.0)"] -storagegateway = ["mypy-boto3-storagegateway (>=1.24.0,<1.25.0)"] -sts = ["mypy-boto3-sts (>=1.24.0,<1.25.0)"] -support = ["mypy-boto3-support (>=1.24.0,<1.25.0)"] -swf = ["mypy-boto3-swf (>=1.24.0,<1.25.0)"] -synthetics = ["mypy-boto3-synthetics (>=1.24.0,<1.25.0)"] -textract = ["mypy-boto3-textract (>=1.24.0,<1.25.0)"] -timestream-query = ["mypy-boto3-timestream-query (>=1.24.0,<1.25.0)"] -timestream-write = ["mypy-boto3-timestream-write (>=1.24.0,<1.25.0)"] -transcribe = ["mypy-boto3-transcribe (>=1.24.0,<1.25.0)"] -transfer = ["mypy-boto3-transfer (>=1.24.0,<1.25.0)"] -translate = ["mypy-boto3-translate (>=1.24.0,<1.25.0)"] -voice-id = ["mypy-boto3-voice-id (>=1.24.0,<1.25.0)"] -waf = ["mypy-boto3-waf (>=1.24.0,<1.25.0)"] -waf-regional = ["mypy-boto3-waf-regional (>=1.24.0,<1.25.0)"] -wafv2 = ["mypy-boto3-wafv2 (>=1.24.0,<1.25.0)"] -wellarchitected = ["mypy-boto3-wellarchitected (>=1.24.0,<1.25.0)"] -wisdom = ["mypy-boto3-wisdom (>=1.24.0,<1.25.0)"] -workdocs = ["mypy-boto3-workdocs (>=1.24.0,<1.25.0)"] worklink = ["mypy-boto3-worklink (>=1.24.0,<1.25.0)"] -workmail = ["mypy-boto3-workmail (>=1.24.0,<1.25.0)"] -workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.24.0,<1.25.0)"] -workspaces = ["mypy-boto3-workspaces (>=1.24.0,<1.25.0)"] -workspaces-web = ["mypy-boto3-workspaces-web (>=1.24.0,<1.25.0)"] +workdocs = ["mypy-boto3-workdocs (>=1.24.0,<1.25.0)"] +wisdom = ["mypy-boto3-wisdom (>=1.24.0,<1.25.0)"] +wellarchitected = ["mypy-boto3-wellarchitected (>=1.24.0,<1.25.0)"] +wafv2 = ["mypy-boto3-wafv2 (>=1.24.0,<1.25.0)"] +waf-regional = ["mypy-boto3-waf-regional (>=1.24.0,<1.25.0)"] +waf = ["mypy-boto3-waf (>=1.24.0,<1.25.0)"] +voice-id = ["mypy-boto3-voice-id (>=1.24.0,<1.25.0)"] +translate = ["mypy-boto3-translate (>=1.24.0,<1.25.0)"] +transfer = ["mypy-boto3-transfer (>=1.24.0,<1.25.0)"] +transcribe = ["mypy-boto3-transcribe (>=1.24.0,<1.25.0)"] +timestream-write = ["mypy-boto3-timestream-write (>=1.24.0,<1.25.0)"] +timestream-query = ["mypy-boto3-timestream-query (>=1.24.0,<1.25.0)"] +textract = ["mypy-boto3-textract (>=1.24.0,<1.25.0)"] +synthetics = ["mypy-boto3-synthetics (>=1.24.0,<1.25.0)"] +swf = ["mypy-boto3-swf (>=1.24.0,<1.25.0)"] +support = ["mypy-boto3-support (>=1.24.0,<1.25.0)"] +sts = ["mypy-boto3-sts (>=1.24.0,<1.25.0)"] +storagegateway = ["mypy-boto3-storagegateway (>=1.24.0,<1.25.0)"] +stepfunctions = ["mypy-boto3-stepfunctions (>=1.24.0,<1.25.0)"] +sso-oidc = ["mypy-boto3-sso-oidc (>=1.24.0,<1.25.0)"] +sso-admin = ["mypy-boto3-sso-admin (>=1.24.0,<1.25.0)"] +sso = ["mypy-boto3-sso (>=1.24.0,<1.25.0)"] +ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.24.0,<1.25.0)"] +ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.24.0,<1.25.0)"] +ssm = ["mypy-boto3-ssm (>=1.24.0,<1.25.0)"] +sqs = ["mypy-boto3-sqs (>=1.24.0,<1.25.0)"] +sns = ["mypy-boto3-sns (>=1.24.0,<1.25.0)"] +snowball = ["mypy-boto3-snowball (>=1.24.0,<1.25.0)"] +snow-device-management = ["mypy-boto3-snow-device-management (>=1.24.0,<1.25.0)"] +sms-voice = ["mypy-boto3-sms-voice (>=1.24.0,<1.25.0)"] +sms = ["mypy-boto3-sms (>=1.24.0,<1.25.0)"] +signer = ["mypy-boto3-signer (>=1.24.0,<1.25.0)"] +shield = ["mypy-boto3-shield (>=1.24.0,<1.25.0)"] +sesv2 = ["mypy-boto3-sesv2 (>=1.24.0,<1.25.0)"] +ses = ["mypy-boto3-ses (>=1.24.0,<1.25.0)"] +servicediscovery = ["mypy-boto3-servicediscovery (>=1.24.0,<1.25.0)"] +servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.24.0,<1.25.0)"] +servicecatalog = ["mypy-boto3-servicecatalog (>=1.24.0,<1.25.0)"] +service-quotas = ["mypy-boto3-service-quotas (>=1.24.0,<1.25.0)"] +serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.24.0,<1.25.0)"] +securityhub = ["mypy-boto3-securityhub (>=1.24.0,<1.25.0)"] +secretsmanager = ["mypy-boto3-secretsmanager (>=1.24.0,<1.25.0)"] +sdb = ["mypy-boto3-sdb (>=1.24.0,<1.25.0)"] +schemas = ["mypy-boto3-schemas (>=1.24.0,<1.25.0)"] +savingsplans = ["mypy-boto3-savingsplans (>=1.24.0,<1.25.0)"] +sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.24.0,<1.25.0)"] +sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.24.0,<1.25.0)"] +sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.24.0,<1.25.0)"] +sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.24.0,<1.25.0)"] +sagemaker = ["mypy-boto3-sagemaker (>=1.24.0,<1.25.0)"] +s3outposts = ["mypy-boto3-s3outposts (>=1.24.0,<1.25.0)"] +s3control = ["mypy-boto3-s3control (>=1.24.0,<1.25.0)"] +s3 = ["mypy-boto3-s3 (>=1.24.0,<1.25.0)"] +rum = ["mypy-boto3-rum (>=1.24.0,<1.25.0)"] +route53resolver = ["mypy-boto3-route53resolver (>=1.24.0,<1.25.0)"] +route53domains = ["mypy-boto3-route53domains (>=1.24.0,<1.25.0)"] +route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.24.0,<1.25.0)"] +route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.24.0,<1.25.0)"] +route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.24.0,<1.25.0)"] +route53 = ["mypy-boto3-route53 (>=1.24.0,<1.25.0)"] +rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.24.0,<1.25.0)"] +robomaker = ["mypy-boto3-robomaker (>=1.24.0,<1.25.0)"] +resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.24.0,<1.25.0)"] +resource-groups = ["mypy-boto3-resource-groups (>=1.24.0,<1.25.0)"] +resiliencehub = ["mypy-boto3-resiliencehub (>=1.24.0,<1.25.0)"] +rekognition = ["mypy-boto3-rekognition (>=1.24.0,<1.25.0)"] +redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.24.0,<1.25.0)"] +redshift-data = ["mypy-boto3-redshift-data (>=1.24.0,<1.25.0)"] +redshift = ["mypy-boto3-redshift (>=1.24.0,<1.25.0)"] +rds-data = ["mypy-boto3-rds-data (>=1.24.0,<1.25.0)"] +rds = ["mypy-boto3-rds (>=1.24.0,<1.25.0)"] +rbin = ["mypy-boto3-rbin (>=1.24.0,<1.25.0)"] +ram = ["mypy-boto3-ram (>=1.24.0,<1.25.0)"] +quicksight = ["mypy-boto3-quicksight (>=1.24.0,<1.25.0)"] +qldb-session = ["mypy-boto3-qldb-session (>=1.24.0,<1.25.0)"] +qldb = ["mypy-boto3-qldb (>=1.24.0,<1.25.0)"] +proton = ["mypy-boto3-proton (>=1.24.0,<1.25.0)"] +privatenetworks = ["mypy-boto3-privatenetworks (>=1.24.0,<1.25.0)"] +pricing = ["mypy-boto3-pricing (>=1.24.0,<1.25.0)"] +polly = ["mypy-boto3-polly (>=1.24.0,<1.25.0)"] +pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.24.0,<1.25.0)"] +pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.24.0,<1.25.0)"] +pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.24.0,<1.25.0)"] +pinpoint = ["mypy-boto3-pinpoint (>=1.24.0,<1.25.0)"] +pi = ["mypy-boto3-pi (>=1.24.0,<1.25.0)"] +personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.24.0,<1.25.0)"] +personalize-events = ["mypy-boto3-personalize-events (>=1.24.0,<1.25.0)"] +personalize = ["mypy-boto3-personalize (>=1.24.0,<1.25.0)"] +panorama = ["mypy-boto3-panorama (>=1.24.0,<1.25.0)"] +outposts = ["mypy-boto3-outposts (>=1.24.0,<1.25.0)"] +organizations = ["mypy-boto3-organizations (>=1.24.0,<1.25.0)"] +opsworkscm = ["mypy-boto3-opsworkscm (>=1.24.0,<1.25.0)"] +opsworks = ["mypy-boto3-opsworks (>=1.24.0,<1.25.0)"] +opensearch = ["mypy-boto3-opensearch (>=1.24.0,<1.25.0)"] +nimble = ["mypy-boto3-nimble (>=1.24.0,<1.25.0)"] +networkmanager = ["mypy-boto3-networkmanager (>=1.24.0,<1.25.0)"] +network-firewall = ["mypy-boto3-network-firewall (>=1.24.0,<1.25.0)"] +neptune = ["mypy-boto3-neptune (>=1.24.0,<1.25.0)"] +mwaa = ["mypy-boto3-mwaa (>=1.24.0,<1.25.0)"] +mturk = ["mypy-boto3-mturk (>=1.24.0,<1.25.0)"] +mq = ["mypy-boto3-mq (>=1.24.0,<1.25.0)"] +mobile = ["mypy-boto3-mobile (>=1.24.0,<1.25.0)"] +migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.24.0,<1.25.0)"] +migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.24.0,<1.25.0)"] +migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.24.0,<1.25.0)"] +mgn = ["mypy-boto3-mgn (>=1.24.0,<1.25.0)"] +mgh = ["mypy-boto3-mgh (>=1.24.0,<1.25.0)"] +meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.24.0,<1.25.0)"] +memorydb = ["mypy-boto3-memorydb (>=1.24.0,<1.25.0)"] +mediatailor = ["mypy-boto3-mediatailor (>=1.24.0,<1.25.0)"] +mediastore-data = ["mypy-boto3-mediastore-data (>=1.24.0,<1.25.0)"] +mediastore = ["mypy-boto3-mediastore (>=1.24.0,<1.25.0)"] +mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.24.0,<1.25.0)"] +mediapackage = ["mypy-boto3-mediapackage (>=1.24.0,<1.25.0)"] xray = ["mypy-boto3-xray (>=1.24.0,<1.25.0)"] +workspaces-web = ["mypy-boto3-workspaces-web (>=1.24.0,<1.25.0)"] +workspaces = ["mypy-boto3-workspaces (>=1.24.0,<1.25.0)"] +workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.24.0,<1.25.0)"] +workmail = ["mypy-boto3-workmail (>=1.24.0,<1.25.0)"] +medialive = ["mypy-boto3-medialive (>=1.24.0,<1.25.0)"] +kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.24.0,<1.25.0)"] +kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.24.0,<1.25.0)"] +kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.24.0,<1.25.0)"] +kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.24.0,<1.25.0)"] +kinesis = ["mypy-boto3-kinesis (>=1.24.0,<1.25.0)"] +keyspaces = ["mypy-boto3-keyspaces (>=1.24.0,<1.25.0)"] +kendra = ["mypy-boto3-kendra (>=1.24.0,<1.25.0)"] +kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.24.0,<1.25.0)"] +kafka = ["mypy-boto3-kafka (>=1.24.0,<1.25.0)"] +ivschat = ["mypy-boto3-ivschat (>=1.24.0,<1.25.0)"] +ivs = ["mypy-boto3-ivs (>=1.24.0,<1.25.0)"] +iotwireless = ["mypy-boto3-iotwireless (>=1.24.0,<1.25.0)"] +iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.24.0,<1.25.0)"] +iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.24.0,<1.25.0)"] +iotsitewise = ["mypy-boto3-iotsitewise (>=1.24.0,<1.25.0)"] +iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.24.0,<1.25.0)"] +iotfleethub = ["mypy-boto3-iotfleethub (>=1.24.0,<1.25.0)"] +iotevents-data = ["mypy-boto3-iotevents-data (>=1.24.0,<1.25.0)"] +iotevents = ["mypy-boto3-iotevents (>=1.24.0,<1.25.0)"] +iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.24.0,<1.25.0)"] +iotanalytics = ["mypy-boto3-iotanalytics (>=1.24.0,<1.25.0)"] +iot1click-projects = ["mypy-boto3-iot1click-projects (>=1.24.0,<1.25.0)"] +iot1click-devices = ["mypy-boto3-iot1click-devices (>=1.24.0,<1.25.0)"] +iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.24.0,<1.25.0)"] +iot-data = ["mypy-boto3-iot-data (>=1.24.0,<1.25.0)"] +iot = ["mypy-boto3-iot (>=1.24.0,<1.25.0)"] +inspector2 = ["mypy-boto3-inspector2 (>=1.24.0,<1.25.0)"] +inspector = ["mypy-boto3-inspector (>=1.24.0,<1.25.0)"] +importexport = ["mypy-boto3-importexport (>=1.24.0,<1.25.0)"] +imagebuilder = ["mypy-boto3-imagebuilder (>=1.24.0,<1.25.0)"] +identitystore = ["mypy-boto3-identitystore (>=1.24.0,<1.25.0)"] +iam = ["mypy-boto3-iam (>=1.24.0,<1.25.0)"] +honeycode = ["mypy-boto3-honeycode (>=1.24.0,<1.25.0)"] +healthlake = ["mypy-boto3-healthlake (>=1.24.0,<1.25.0)"] +health = ["mypy-boto3-health (>=1.24.0,<1.25.0)"] +guardduty = ["mypy-boto3-guardduty (>=1.24.0,<1.25.0)"] +groundstation = ["mypy-boto3-groundstation (>=1.24.0,<1.25.0)"] +greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.24.0,<1.25.0)"] +greengrass = ["mypy-boto3-greengrass (>=1.24.0,<1.25.0)"] +grafana = ["mypy-boto3-grafana (>=1.24.0,<1.25.0)"] +glue = ["mypy-boto3-glue (>=1.24.0,<1.25.0)"] +globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.24.0,<1.25.0)"] +glacier = ["mypy-boto3-glacier (>=1.24.0,<1.25.0)"] +gamesparks = ["mypy-boto3-gamesparks (>=1.24.0,<1.25.0)"] +gamelift = ["mypy-boto3-gamelift (>=1.24.0,<1.25.0)"] +fsx = ["mypy-boto3-fsx (>=1.24.0,<1.25.0)"] +frauddetector = ["mypy-boto3-frauddetector (>=1.24.0,<1.25.0)"] +forecastquery = ["mypy-boto3-forecastquery (>=1.24.0,<1.25.0)"] +forecast = ["mypy-boto3-forecast (>=1.24.0,<1.25.0)"] +fms = ["mypy-boto3-fms (>=1.24.0,<1.25.0)"] +fis = ["mypy-boto3-fis (>=1.24.0,<1.25.0)"] +firehose = ["mypy-boto3-firehose (>=1.24.0,<1.25.0)"] +finspace-data = ["mypy-boto3-finspace-data (>=1.24.0,<1.25.0)"] +finspace = ["mypy-boto3-finspace (>=1.24.0,<1.25.0)"] +evidently = ["mypy-boto3-evidently (>=1.24.0,<1.25.0)"] +events = ["mypy-boto3-events (>=1.24.0,<1.25.0)"] +essential = ["mypy-boto3-sqs (>=1.24.0,<1.25.0)", "mypy-boto3-s3 (>=1.24.0,<1.25.0)", "mypy-boto3-rds (>=1.24.0,<1.25.0)", "mypy-boto3-lambda (>=1.24.0,<1.25.0)", "mypy-boto3-ec2 (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodb (>=1.24.0,<1.25.0)", "mypy-boto3-cloudformation (>=1.24.0,<1.25.0)"] +es = ["mypy-boto3-es (>=1.24.0,<1.25.0)"] +emr-serverless = ["mypy-boto3-emr-serverless (>=1.24.0,<1.25.0)"] +emr-containers = ["mypy-boto3-emr-containers (>=1.24.0,<1.25.0)"] +emr = ["mypy-boto3-emr (>=1.24.0,<1.25.0)"] +elbv2 = ["mypy-boto3-elbv2 (>=1.24.0,<1.25.0)"] +elb = ["mypy-boto3-elb (>=1.24.0,<1.25.0)"] +elastictranscoder = ["mypy-boto3-elastictranscoder (>=1.24.0,<1.25.0)"] +elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.24.0,<1.25.0)"] +elasticache = ["mypy-boto3-elasticache (>=1.24.0,<1.25.0)"] +elastic-inference = ["mypy-boto3-elastic-inference (>=1.24.0,<1.25.0)"] +eks = ["mypy-boto3-eks (>=1.24.0,<1.25.0)"] +efs = ["mypy-boto3-efs (>=1.24.0,<1.25.0)"] +ecs = ["mypy-boto3-ecs (>=1.24.0,<1.25.0)"] +ecr-public = ["mypy-boto3-ecr-public (>=1.24.0,<1.25.0)"] +ecr = ["mypy-boto3-ecr (>=1.24.0,<1.25.0)"] +ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.24.0,<1.25.0)"] +ec2 = ["mypy-boto3-ec2 (>=1.24.0,<1.25.0)"] +ebs = ["mypy-boto3-ebs (>=1.24.0,<1.25.0)"] +dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.24.0,<1.25.0)"] +dynamodb = ["mypy-boto3-dynamodb (>=1.24.0,<1.25.0)"] +ds = ["mypy-boto3-ds (>=1.24.0,<1.25.0)"] +drs = ["mypy-boto3-drs (>=1.24.0,<1.25.0)"] +docdb = ["mypy-boto3-docdb (>=1.24.0,<1.25.0)"] +dms = ["mypy-boto3-dms (>=1.24.0,<1.25.0)"] +dlm = ["mypy-boto3-dlm (>=1.24.0,<1.25.0)"] +discovery = ["mypy-boto3-discovery (>=1.24.0,<1.25.0)"] +directconnect = ["mypy-boto3-directconnect (>=1.24.0,<1.25.0)"] +devops-guru = ["mypy-boto3-devops-guru (>=1.24.0,<1.25.0)"] +devicefarm = ["mypy-boto3-devicefarm (>=1.24.0,<1.25.0)"] +detective = ["mypy-boto3-detective (>=1.24.0,<1.25.0)"] +dax = ["mypy-boto3-dax (>=1.24.0,<1.25.0)"] +datasync = ["mypy-boto3-datasync (>=1.24.0,<1.25.0)"] +datapipeline = ["mypy-boto3-datapipeline (>=1.24.0,<1.25.0)"] +dataexchange = ["mypy-boto3-dataexchange (>=1.24.0,<1.25.0)"] +databrew = ["mypy-boto3-databrew (>=1.24.0,<1.25.0)"] +customer-profiles = ["mypy-boto3-customer-profiles (>=1.24.0,<1.25.0)"] +cur = ["mypy-boto3-cur (>=1.24.0,<1.25.0)"] +connectparticipant = ["mypy-boto3-connectparticipant (>=1.24.0,<1.25.0)"] +connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.24.0,<1.25.0)"] +connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.24.0,<1.25.0)"] +connect = ["mypy-boto3-connect (>=1.24.0,<1.25.0)"] +config = ["mypy-boto3-config (>=1.24.0,<1.25.0)"] +compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.24.0,<1.25.0)"] +comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.24.0,<1.25.0)"] +comprehend = ["mypy-boto3-comprehend (>=1.24.0,<1.25.0)"] +cognito-sync = ["mypy-boto3-cognito-sync (>=1.24.0,<1.25.0)"] +cognito-idp = ["mypy-boto3-cognito-idp (>=1.24.0,<1.25.0)"] +cognito-identity = ["mypy-boto3-cognito-identity (>=1.24.0,<1.25.0)"] +codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.24.0,<1.25.0)"] +codestar-connections = ["mypy-boto3-codestar-connections (>=1.24.0,<1.25.0)"] +codestar = ["mypy-boto3-codestar (>=1.24.0,<1.25.0)"] +codepipeline = ["mypy-boto3-codepipeline (>=1.24.0,<1.25.0)"] +mediaconvert = ["mypy-boto3-mediaconvert (>=1.24.0,<1.25.0)"] +mediaconnect = ["mypy-boto3-mediaconnect (>=1.24.0,<1.25.0)"] +marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.24.0,<1.25.0)"] +marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.24.0,<1.25.0)"] +marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.24.0,<1.25.0)"] +managedblockchain = ["mypy-boto3-managedblockchain (>=1.24.0,<1.25.0)"] +macie2 = ["mypy-boto3-macie2 (>=1.24.0,<1.25.0)"] +macie = ["mypy-boto3-macie (>=1.24.0,<1.25.0)"] +machinelearning = ["mypy-boto3-machinelearning (>=1.24.0,<1.25.0)"] +m2 = ["mypy-boto3-m2 (>=1.24.0,<1.25.0)"] +lookoutvision = ["mypy-boto3-lookoutvision (>=1.24.0,<1.25.0)"] +lookoutmetrics = ["mypy-boto3-lookoutmetrics (>=1.24.0,<1.25.0)"] +lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.24.0,<1.25.0)"] +logs = ["mypy-boto3-logs (>=1.24.0,<1.25.0)"] +location = ["mypy-boto3-location (>=1.24.0,<1.25.0)"] +lightsail = ["mypy-boto3-lightsail (>=1.24.0,<1.25.0)"] +license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.24.0,<1.25.0)"] +license-manager = ["mypy-boto3-license-manager (>=1.24.0,<1.25.0)"] +lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.24.0,<1.25.0)"] +lexv2-models = ["mypy-boto3-lexv2-models (>=1.24.0,<1.25.0)"] +lex-runtime = ["mypy-boto3-lex-runtime (>=1.24.0,<1.25.0)"] +lex-models = ["mypy-boto3-lex-models (>=1.24.0,<1.25.0)"] +lambda = ["mypy-boto3-lambda (>=1.24.0,<1.25.0)"] +lakeformation = ["mypy-boto3-lakeformation (>=1.24.0,<1.25.0)"] +kms = ["mypy-boto3-kms (>=1.24.0,<1.25.0)"] +kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.24.0,<1.25.0)"] +kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.24.0,<1.25.0)"] +codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.24.0,<1.25.0)"] +all = ["mypy-boto3-kafkaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-kafka (>=1.24.0,<1.25.0)", "mypy-boto3-ivschat (>=1.24.0,<1.25.0)", "mypy-boto3-ivs (>=1.24.0,<1.25.0)", "mypy-boto3-iotwireless (>=1.24.0,<1.25.0)", "mypy-boto3-iottwinmaker (>=1.24.0,<1.25.0)", "mypy-boto3-iotthingsgraph (>=1.24.0,<1.25.0)", "mypy-boto3-iotsitewise (>=1.24.0,<1.25.0)", "mypy-boto3-iotsecuretunneling (>=1.24.0,<1.25.0)", "mypy-boto3-iotfleethub (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents-data (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents (>=1.24.0,<1.25.0)", "mypy-boto3-iotdeviceadvisor (>=1.24.0,<1.25.0)", "mypy-boto3-iotanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-projects (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-devices (>=1.24.0,<1.25.0)", "mypy-boto3-iot-jobs-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot (>=1.24.0,<1.25.0)", "mypy-boto3-inspector2 (>=1.24.0,<1.25.0)", "mypy-boto3-inspector (>=1.24.0,<1.25.0)", "mypy-boto3-importexport (>=1.24.0,<1.25.0)", "mypy-boto3-imagebuilder (>=1.24.0,<1.25.0)", "mypy-boto3-identitystore (>=1.24.0,<1.25.0)", "mypy-boto3-iam (>=1.24.0,<1.25.0)", "mypy-boto3-honeycode (>=1.24.0,<1.25.0)", "mypy-boto3-healthlake (>=1.24.0,<1.25.0)", "mypy-boto3-health (>=1.24.0,<1.25.0)", "mypy-boto3-guardduty (>=1.24.0,<1.25.0)", "mypy-boto3-groundstation (>=1.24.0,<1.25.0)", "mypy-boto3-greengrassv2 (>=1.24.0,<1.25.0)", "mypy-boto3-greengrass (>=1.24.0,<1.25.0)", "mypy-boto3-grafana (>=1.24.0,<1.25.0)", "mypy-boto3-glue (>=1.24.0,<1.25.0)", "mypy-boto3-globalaccelerator (>=1.24.0,<1.25.0)", "mypy-boto3-glacier (>=1.24.0,<1.25.0)", "mypy-boto3-gamesparks (>=1.24.0,<1.25.0)", "mypy-boto3-gamelift (>=1.24.0,<1.25.0)", "mypy-boto3-fsx (>=1.24.0,<1.25.0)", "mypy-boto3-frauddetector (>=1.24.0,<1.25.0)", "mypy-boto3-forecastquery (>=1.24.0,<1.25.0)", "mypy-boto3-forecast (>=1.24.0,<1.25.0)", "mypy-boto3-fms (>=1.24.0,<1.25.0)", "mypy-boto3-fis (>=1.24.0,<1.25.0)", "mypy-boto3-firehose (>=1.24.0,<1.25.0)", "mypy-boto3-finspace-data (>=1.24.0,<1.25.0)", "mypy-boto3-finspace (>=1.24.0,<1.25.0)", "mypy-boto3-evidently (>=1.24.0,<1.25.0)", "mypy-boto3-events (>=1.24.0,<1.25.0)", "mypy-boto3-es (>=1.24.0,<1.25.0)", "mypy-boto3-emr-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-emr-containers (>=1.24.0,<1.25.0)", "mypy-boto3-emr (>=1.24.0,<1.25.0)", "mypy-boto3-elbv2 (>=1.24.0,<1.25.0)", "mypy-boto3-elb (>=1.24.0,<1.25.0)", "mypy-boto3-elastictranscoder (>=1.24.0,<1.25.0)", "mypy-boto3-elasticbeanstalk (>=1.24.0,<1.25.0)", "mypy-boto3-elasticache (>=1.24.0,<1.25.0)", "mypy-boto3-elastic-inference (>=1.24.0,<1.25.0)", "mypy-boto3-eks (>=1.24.0,<1.25.0)", "mypy-boto3-efs (>=1.24.0,<1.25.0)", "mypy-boto3-ecs (>=1.24.0,<1.25.0)", "mypy-boto3-ecr-public (>=1.24.0,<1.25.0)", "mypy-boto3-ecr (>=1.24.0,<1.25.0)", "mypy-boto3-ec2-instance-connect (>=1.24.0,<1.25.0)", "mypy-boto3-ec2 (>=1.24.0,<1.25.0)", "mypy-boto3-ebs (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodbstreams (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodb (>=1.24.0,<1.25.0)", "mypy-boto3-ds (>=1.24.0,<1.25.0)", "mypy-boto3-drs (>=1.24.0,<1.25.0)", "mypy-boto3-docdb (>=1.24.0,<1.25.0)", "mypy-boto3-dms (>=1.24.0,<1.25.0)", "mypy-boto3-dlm (>=1.24.0,<1.25.0)", "mypy-boto3-discovery (>=1.24.0,<1.25.0)", "mypy-boto3-directconnect (>=1.24.0,<1.25.0)", "mypy-boto3-devops-guru (>=1.24.0,<1.25.0)", "mypy-boto3-devicefarm (>=1.24.0,<1.25.0)", "mypy-boto3-detective (>=1.24.0,<1.25.0)", "mypy-boto3-dax (>=1.24.0,<1.25.0)", "mypy-boto3-datasync (>=1.24.0,<1.25.0)", "mypy-boto3-datapipeline (>=1.24.0,<1.25.0)", "mypy-boto3-dataexchange (>=1.24.0,<1.25.0)", "mypy-boto3-databrew (>=1.24.0,<1.25.0)", "mypy-boto3-customer-profiles (>=1.24.0,<1.25.0)", "mypy-boto3-cur (>=1.24.0,<1.25.0)", "mypy-boto3-connectparticipant (>=1.24.0,<1.25.0)", "mypy-boto3-connectcampaigns (>=1.24.0,<1.25.0)", "mypy-boto3-connect-contact-lens (>=1.24.0,<1.25.0)", "mypy-boto3-connect (>=1.24.0,<1.25.0)", "mypy-boto3-config (>=1.24.0,<1.25.0)", "mypy-boto3-compute-optimizer (>=1.24.0,<1.25.0)", "mypy-boto3-comprehendmedical (>=1.24.0,<1.25.0)", "mypy-boto3-comprehend (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-sync (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-idp (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-identity (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-notifications (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-connections (>=1.24.0,<1.25.0)", "mypy-boto3-codestar (>=1.24.0,<1.25.0)", "mypy-boto3-codepipeline (>=1.24.0,<1.25.0)", "mypy-boto3-codeguruprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-codeguru-reviewer (>=1.24.0,<1.25.0)", "mypy-boto3-codedeploy (>=1.24.0,<1.25.0)", "mypy-boto3-codecommit (>=1.24.0,<1.25.0)", "mypy-boto3-codebuild (>=1.24.0,<1.25.0)", "mypy-boto3-codeartifact (>=1.24.0,<1.25.0)", "mypy-boto3-cloudwatch (>=1.24.0,<1.25.0)", "mypy-boto3-cloudtrail (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearchdomain (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearch (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsmv2 (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsm (>=1.24.0,<1.25.0)", "mypy-boto3-cloudfront (>=1.24.0,<1.25.0)", "mypy-boto3-cloudformation (>=1.24.0,<1.25.0)", "mypy-boto3-xray (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces-web (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces (>=1.24.0,<1.25.0)", "mypy-boto3-workmailmessageflow (>=1.24.0,<1.25.0)", "mypy-boto3-workmail (>=1.24.0,<1.25.0)", "mypy-boto3-worklink (>=1.24.0,<1.25.0)", "mypy-boto3-workdocs (>=1.24.0,<1.25.0)", "mypy-boto3-wisdom (>=1.24.0,<1.25.0)", "mypy-boto3-wellarchitected (>=1.24.0,<1.25.0)", "mypy-boto3-wafv2 (>=1.24.0,<1.25.0)", "mypy-boto3-waf-regional (>=1.24.0,<1.25.0)", "mypy-boto3-waf (>=1.24.0,<1.25.0)", "mypy-boto3-voice-id (>=1.24.0,<1.25.0)", "mypy-boto3-translate (>=1.24.0,<1.25.0)", "mypy-boto3-transfer (>=1.24.0,<1.25.0)", "mypy-boto3-transcribe (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-write (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-query (>=1.24.0,<1.25.0)", "mypy-boto3-textract (>=1.24.0,<1.25.0)", "mypy-boto3-synthetics (>=1.24.0,<1.25.0)", "mypy-boto3-swf (>=1.24.0,<1.25.0)", "mypy-boto3-support (>=1.24.0,<1.25.0)", "mypy-boto3-sts (>=1.24.0,<1.25.0)", "mypy-boto3-storagegateway (>=1.24.0,<1.25.0)", "mypy-boto3-stepfunctions (>=1.24.0,<1.25.0)", "mypy-boto3-sso-oidc (>=1.24.0,<1.25.0)", "mypy-boto3-sso-admin (>=1.24.0,<1.25.0)", "mypy-boto3-sso (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-incidents (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-contacts (>=1.24.0,<1.25.0)", "mypy-boto3-ssm (>=1.24.0,<1.25.0)", "mypy-boto3-sqs (>=1.24.0,<1.25.0)", "mypy-boto3-sns (>=1.24.0,<1.25.0)", "mypy-boto3-snowball (>=1.24.0,<1.25.0)", "mypy-boto3-snow-device-management (>=1.24.0,<1.25.0)", "mypy-boto3-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-sms (>=1.24.0,<1.25.0)", "mypy-boto3-signer (>=1.24.0,<1.25.0)", "mypy-boto3-shield (>=1.24.0,<1.25.0)", "mypy-boto3-sesv2 (>=1.24.0,<1.25.0)", "mypy-boto3-ses (>=1.24.0,<1.25.0)", "mypy-boto3-servicediscovery (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog-appregistry (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog (>=1.24.0,<1.25.0)", "mypy-boto3-service-quotas (>=1.24.0,<1.25.0)", "mypy-boto3-serverlessrepo (>=1.24.0,<1.25.0)", "mypy-boto3-securityhub (>=1.24.0,<1.25.0)", "mypy-boto3-secretsmanager (>=1.24.0,<1.25.0)", "mypy-boto3-sdb (>=1.24.0,<1.25.0)", "mypy-boto3-schemas (>=1.24.0,<1.25.0)", "mypy-boto3-savingsplans (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-edge (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker (>=1.24.0,<1.25.0)", "mypy-boto3-s3outposts (>=1.24.0,<1.25.0)", "mypy-boto3-s3control (>=1.24.0,<1.25.0)", "mypy-boto3-s3 (>=1.24.0,<1.25.0)", "mypy-boto3-rum (>=1.24.0,<1.25.0)", "mypy-boto3-route53resolver (>=1.24.0,<1.25.0)", "mypy-boto3-route53domains (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-readiness (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-control-config (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-cluster (>=1.24.0,<1.25.0)", "mypy-boto3-route53 (>=1.24.0,<1.25.0)", "mypy-boto3-rolesanywhere (>=1.24.0,<1.25.0)", "mypy-boto3-robomaker (>=1.24.0,<1.25.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.24.0,<1.25.0)", "mypy-boto3-resource-groups (>=1.24.0,<1.25.0)", "mypy-boto3-resiliencehub (>=1.24.0,<1.25.0)", "mypy-boto3-rekognition (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-data (>=1.24.0,<1.25.0)", "mypy-boto3-redshift (>=1.24.0,<1.25.0)", "mypy-boto3-rds-data (>=1.24.0,<1.25.0)", "mypy-boto3-rds (>=1.24.0,<1.25.0)", "mypy-boto3-rbin (>=1.24.0,<1.25.0)", "mypy-boto3-ram (>=1.24.0,<1.25.0)", "mypy-boto3-quicksight (>=1.24.0,<1.25.0)", "mypy-boto3-qldb-session (>=1.24.0,<1.25.0)", "mypy-boto3-qldb (>=1.24.0,<1.25.0)", "mypy-boto3-proton (>=1.24.0,<1.25.0)", "mypy-boto3-privatenetworks (>=1.24.0,<1.25.0)", "mypy-boto3-pricing (>=1.24.0,<1.25.0)", "mypy-boto3-polly (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-email (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint (>=1.24.0,<1.25.0)", "mypy-boto3-pi (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-events (>=1.24.0,<1.25.0)", "mypy-boto3-personalize (>=1.24.0,<1.25.0)", "mypy-boto3-panorama (>=1.24.0,<1.25.0)", "mypy-boto3-outposts (>=1.24.0,<1.25.0)", "mypy-boto3-organizations (>=1.24.0,<1.25.0)", "mypy-boto3-opsworkscm (>=1.24.0,<1.25.0)", "mypy-boto3-opsworks (>=1.24.0,<1.25.0)", "mypy-boto3-opensearch (>=1.24.0,<1.25.0)", "mypy-boto3-nimble (>=1.24.0,<1.25.0)", "mypy-boto3-networkmanager (>=1.24.0,<1.25.0)", "mypy-boto3-network-firewall (>=1.24.0,<1.25.0)", "mypy-boto3-neptune (>=1.24.0,<1.25.0)", "mypy-boto3-mwaa (>=1.24.0,<1.25.0)", "mypy-boto3-mturk (>=1.24.0,<1.25.0)", "mypy-boto3-mq (>=1.24.0,<1.25.0)", "mypy-boto3-mobile (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhubstrategy (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhub-config (>=1.24.0,<1.25.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.24.0,<1.25.0)", "mypy-boto3-mgn (>=1.24.0,<1.25.0)", "mypy-boto3-mgh (>=1.24.0,<1.25.0)", "mypy-boto3-meteringmarketplace (>=1.24.0,<1.25.0)", "mypy-boto3-memorydb (>=1.24.0,<1.25.0)", "mypy-boto3-mediatailor (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore-data (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage-vod (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage (>=1.24.0,<1.25.0)", "mypy-boto3-medialive (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconvert (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-entitlement (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-catalog (>=1.24.0,<1.25.0)", "mypy-boto3-managedblockchain (>=1.24.0,<1.25.0)", "mypy-boto3-macie2 (>=1.24.0,<1.25.0)", "mypy-boto3-macie (>=1.24.0,<1.25.0)", "mypy-boto3-machinelearning (>=1.24.0,<1.25.0)", "mypy-boto3-m2 (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutvision (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutmetrics (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutequipment (>=1.24.0,<1.25.0)", "mypy-boto3-logs (>=1.24.0,<1.25.0)", "mypy-boto3-location (>=1.24.0,<1.25.0)", "mypy-boto3-lightsail (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-models (>=1.24.0,<1.25.0)", "mypy-boto3-lex-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-lex-models (>=1.24.0,<1.25.0)", "mypy-boto3-lambda (>=1.24.0,<1.25.0)", "mypy-boto3-lakeformation (>=1.24.0,<1.25.0)", "mypy-boto3-kms (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisvideo (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-signaling (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-archived-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis (>=1.24.0,<1.25.0)", "mypy-boto3-keyspaces (>=1.24.0,<1.25.0)", "mypy-boto3-kendra (>=1.24.0,<1.25.0)", "mypy-boto3-clouddirectory (>=1.24.0,<1.25.0)", "mypy-boto3-cloudcontrol (>=1.24.0,<1.25.0)", "mypy-boto3-cloud9 (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-messaging (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-meetings (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-identity (>=1.24.0,<1.25.0)", "mypy-boto3-chime (>=1.24.0,<1.25.0)", "mypy-boto3-ce (>=1.24.0,<1.25.0)", "mypy-boto3-budgets (>=1.24.0,<1.25.0)", "mypy-boto3-braket (>=1.24.0,<1.25.0)", "mypy-boto3-billingconductor (>=1.24.0,<1.25.0)", "mypy-boto3-batch (>=1.24.0,<1.25.0)", "mypy-boto3-backupstorage (>=1.24.0,<1.25.0)", "mypy-boto3-backup-gateway (>=1.24.0,<1.25.0)", "mypy-boto3-backup (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling-plans (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-auditmanager (>=1.24.0,<1.25.0)", "mypy-boto3-athena (>=1.24.0,<1.25.0)", "mypy-boto3-appsync (>=1.24.0,<1.25.0)", "mypy-boto3-appstream (>=1.24.0,<1.25.0)", "mypy-boto3-apprunner (>=1.24.0,<1.25.0)", "mypy-boto3-appmesh (>=1.24.0,<1.25.0)", "mypy-boto3-applicationcostprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-application-insights (>=1.24.0,<1.25.0)", "mypy-boto3-application-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-appintegrations (>=1.24.0,<1.25.0)", "mypy-boto3-appflow (>=1.24.0,<1.25.0)", "mypy-boto3-appconfigdata (>=1.24.0,<1.25.0)", "mypy-boto3-appconfig (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewayv2 (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewaymanagementapi (>=1.24.0,<1.25.0)", "mypy-boto3-apigateway (>=1.24.0,<1.25.0)", "mypy-boto3-amplifyuibuilder (>=1.24.0,<1.25.0)", "mypy-boto3-amplifybackend (>=1.24.0,<1.25.0)", "mypy-boto3-amplify (>=1.24.0,<1.25.0)", "mypy-boto3-amp (>=1.24.0,<1.25.0)", "mypy-boto3-alexaforbusiness (>=1.24.0,<1.25.0)", "mypy-boto3-acm-pca (>=1.24.0,<1.25.0)", "mypy-boto3-acm (>=1.24.0,<1.25.0)", "mypy-boto3-account (>=1.24.0,<1.25.0)", "mypy-boto3-accessanalyzer (>=1.24.0,<1.25.0)"] +budgets = ["mypy-boto3-budgets (>=1.24.0,<1.25.0)"] +braket = ["mypy-boto3-braket (>=1.24.0,<1.25.0)"] +billingconductor = ["mypy-boto3-billingconductor (>=1.24.0,<1.25.0)"] +batch = ["mypy-boto3-batch (>=1.24.0,<1.25.0)"] +backupstorage = ["mypy-boto3-backupstorage (>=1.24.0,<1.25.0)"] +backup-gateway = ["mypy-boto3-backup-gateway (>=1.24.0,<1.25.0)"] +backup = ["mypy-boto3-backup (>=1.24.0,<1.25.0)"] +autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.24.0,<1.25.0)"] +autoscaling = ["mypy-boto3-autoscaling (>=1.24.0,<1.25.0)"] +auditmanager = ["mypy-boto3-auditmanager (>=1.24.0,<1.25.0)"] +athena = ["mypy-boto3-athena (>=1.24.0,<1.25.0)"] +appsync = ["mypy-boto3-appsync (>=1.24.0,<1.25.0)"] +appstream = ["mypy-boto3-appstream (>=1.24.0,<1.25.0)"] +apprunner = ["mypy-boto3-apprunner (>=1.24.0,<1.25.0)"] +appmesh = ["mypy-boto3-appmesh (>=1.24.0,<1.25.0)"] +applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.24.0,<1.25.0)"] +application-insights = ["mypy-boto3-application-insights (>=1.24.0,<1.25.0)"] +application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.24.0,<1.25.0)"] +appintegrations = ["mypy-boto3-appintegrations (>=1.24.0,<1.25.0)"] +appflow = ["mypy-boto3-appflow (>=1.24.0,<1.25.0)"] +appconfigdata = ["mypy-boto3-appconfigdata (>=1.24.0,<1.25.0)"] +appconfig = ["mypy-boto3-appconfig (>=1.24.0,<1.25.0)"] +apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.24.0,<1.25.0)"] +apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.24.0,<1.25.0)"] +apigateway = ["mypy-boto3-apigateway (>=1.24.0,<1.25.0)"] +amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.24.0,<1.25.0)"] +amplifybackend = ["mypy-boto3-amplifybackend (>=1.24.0,<1.25.0)"] +amplify = ["mypy-boto3-amplify (>=1.24.0,<1.25.0)"] +amp = ["mypy-boto3-amp (>=1.24.0,<1.25.0)"] +codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.24.0,<1.25.0)"] +codedeploy = ["mypy-boto3-codedeploy (>=1.24.0,<1.25.0)"] +codecommit = ["mypy-boto3-codecommit (>=1.24.0,<1.25.0)"] +codebuild = ["mypy-boto3-codebuild (>=1.24.0,<1.25.0)"] +codeartifact = ["mypy-boto3-codeartifact (>=1.24.0,<1.25.0)"] +cloudwatch = ["mypy-boto3-cloudwatch (>=1.24.0,<1.25.0)"] +cloudtrail = ["mypy-boto3-cloudtrail (>=1.24.0,<1.25.0)"] +cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.24.0,<1.25.0)"] +cloudsearch = ["mypy-boto3-cloudsearch (>=1.24.0,<1.25.0)"] +cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.24.0,<1.25.0)"] +cloudhsm = ["mypy-boto3-cloudhsm (>=1.24.0,<1.25.0)"] +cloudfront = ["mypy-boto3-cloudfront (>=1.24.0,<1.25.0)"] +cloudformation = ["mypy-boto3-cloudformation (>=1.24.0,<1.25.0)"] +clouddirectory = ["mypy-boto3-clouddirectory (>=1.24.0,<1.25.0)"] +cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.24.0,<1.25.0)"] +cloud9 = ["mypy-boto3-cloud9 (>=1.24.0,<1.25.0)"] +chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.24.0,<1.25.0)"] +chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.24.0,<1.25.0)"] +chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.24.0,<1.25.0)"] +chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.24.0,<1.25.0)"] +chime = ["mypy-boto3-chime (>=1.24.0,<1.25.0)"] +ce = ["mypy-boto3-ce (>=1.24.0,<1.25.0)"] +alexaforbusiness = ["mypy-boto3-alexaforbusiness (>=1.24.0,<1.25.0)"] +acm-pca = ["mypy-boto3-acm-pca (>=1.24.0,<1.25.0)"] +acm = ["mypy-boto3-acm (>=1.24.0,<1.25.0)"] +account = ["mypy-boto3-account (>=1.24.0,<1.25.0)"] +accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.24.0,<1.25.0)"] [[package]] name = "botocore" @@ -814,7 +842,7 @@ python-versions = "*" [[package]] name = "moto" -version = "3.1.17" +version = "3.1.18" description = "A library that allows your python tests to easily mock out the boto library" category = "main" optional = false @@ -828,7 +856,7 @@ cfn-lint = {version = ">=0.4.0", optional = true, markers = "extra == \"server\" cryptography = ">=3.3.1" docker = {version = ">=2.5.1", optional = true, markers = "extra == \"server\""} ecdsa = {version = "!=0.15", optional = true, markers = "extra == \"server\""} -flask = {version = "*", optional = true, markers = "extra == \"server\""} +flask = {version = "<2.2.0", optional = true, markers = "extra == \"server\""} flask-cors = {version = "*", optional = true, markers = "extra == \"server\""} graphql-core = {version = "*", optional = true, markers = "extra == \"server\""} idna = {version = ">=2.5,<4", optional = true, markers = "extra == \"server\""} @@ -848,28 +876,28 @@ werkzeug = ">=0.5,<2.2.0" xmltodict = "*" [package.extras] -all = ["PyYAML (>=5.1)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "ecdsa (!=0.15)", "docker (>=2.5.1)", "graphql-core", "jsondiff (>=1.1.2)", "aws-xray-sdk (>=0.93,!=0.96)", "idna (>=2.5,<4)", "cfn-lint (>=0.4.0)", "sshpubkeys (>=3.1.0)", "pyparsing (>=3.0.7)", "openapi-spec-validator (>=0.2.8)", "setuptools"] -apigateway = ["PyYAML (>=5.1)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "ecdsa (!=0.15)", "openapi-spec-validator (>=0.2.8)"] -apigatewayv2 = ["PyYAML (>=5.1)"] -appsync = ["graphql-core"] -awslambda = ["docker (>=2.5.1)"] -batch = ["docker (>=2.5.1)"] -cloudformation = ["PyYAML (>=5.1)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "ecdsa (!=0.15)", "docker (>=2.5.1)", "graphql-core", "jsondiff (>=1.1.2)", "aws-xray-sdk (>=0.93,!=0.96)", "idna (>=2.5,<4)", "cfn-lint (>=0.4.0)", "sshpubkeys (>=3.1.0)", "pyparsing (>=3.0.7)", "openapi-spec-validator (>=0.2.8)", "setuptools"] -cognitoidp = ["python-jose[cryptography] (>=3.1.0,<4.0.0)", "ecdsa (!=0.15)"] -ds = ["sshpubkeys (>=3.1.0)"] -dynamodb = ["docker (>=2.5.1)"] -dynamodb2 = ["docker (>=2.5.1)"] -dynamodbstreams = ["docker (>=2.5.1)"] -ebs = ["sshpubkeys (>=3.1.0)"] -ec2 = ["sshpubkeys (>=3.1.0)"] -efs = ["sshpubkeys (>=3.1.0)"] -glue = ["pyparsing (>=3.0.7)"] -iotdata = ["jsondiff (>=1.1.2)"] -route53resolver = ["sshpubkeys (>=3.1.0)"] +xray = ["setuptools", "aws-xray-sdk (>=0.93,!=0.96)"] +ssm = ["dataclasses", "PyYAML (>=5.1)"] +server = ["flask-cors", "flask (<2.2.0)", "setuptools", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "sshpubkeys (>=3.1.0)", "cfn-lint (>=0.4.0)", "idna (>=2.5,<4)", "aws-xray-sdk (>=0.93,!=0.96)", "jsondiff (>=1.1.2)", "graphql-core", "docker (>=2.5.1)", "ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "PyYAML (>=5.1)"] s3 = ["PyYAML (>=5.1)"] -server = ["PyYAML (>=5.1)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "ecdsa (!=0.15)", "docker (>=2.5.1)", "graphql-core", "jsondiff (>=1.1.2)", "aws-xray-sdk (>=0.93,!=0.96)", "idna (>=2.5,<4)", "cfn-lint (>=0.4.0)", "sshpubkeys (>=3.1.0)", "pyparsing (>=3.0.7)", "openapi-spec-validator (>=0.2.8)", "setuptools", "flask", "flask-cors"] -ssm = ["PyYAML (>=5.1)", "dataclasses"] -xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"] +route53resolver = ["sshpubkeys (>=3.1.0)"] +iotdata = ["jsondiff (>=1.1.2)"] +glue = ["pyparsing (>=3.0.7)"] +efs = ["sshpubkeys (>=3.1.0)"] +ec2 = ["sshpubkeys (>=3.1.0)"] +ebs = ["sshpubkeys (>=3.1.0)"] +dynamodbstreams = ["docker (>=2.5.1)"] +dynamodb2 = ["docker (>=2.5.1)"] +dynamodb = ["docker (>=2.5.1)"] +ds = ["sshpubkeys (>=3.1.0)"] +cognitoidp = ["ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"] +cloudformation = ["setuptools", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "sshpubkeys (>=3.1.0)", "cfn-lint (>=0.4.0)", "idna (>=2.5,<4)", "aws-xray-sdk (>=0.93,!=0.96)", "jsondiff (>=1.1.2)", "graphql-core", "docker (>=2.5.1)", "ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "PyYAML (>=5.1)"] +batch = ["docker (>=2.5.1)"] +awslambda = ["docker (>=2.5.1)"] +appsync = ["graphql-core"] +apigatewayv2 = ["PyYAML (>=5.1)"] +apigateway = ["openapi-spec-validator (>=0.2.8)", "ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "PyYAML (>=5.1)"] +all = ["setuptools", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "sshpubkeys (>=3.1.0)", "cfn-lint (>=0.4.0)", "idna (>=2.5,<4)", "aws-xray-sdk (>=0.93,!=0.96)", "jsondiff (>=1.1.2)", "graphql-core", "docker (>=2.5.1)", "ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "PyYAML (>=5.1)"] [[package]] name = "mypy" @@ -1461,13 +1489,21 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "e58b30774603aa0f31579899a6c78579329c580f2f4bbaec209b0f9d52079fc6" +content-hash = "453b90e40481ca6e4395e84beb73489b58c0983e826e369eb0f412ef633ea5e1" [metadata.files] aiopg = [ {file = "aiopg-1.3.4-py3-none-any.whl", hash = "sha256:b5b74a124831aad71608c3c203479db90bac4a7eb3f8982bc48c3d3e6f1e57bf"}, {file = "aiopg-1.3.4.tar.gz", hash = "sha256:23f9e4cd9f28e9d91a6de3b4fb517e8bed25511cd954acccba9fe3a702d9b7d0"}, ] +allure-pytest = [ + {file = "allure-pytest-2.9.45.tar.gz", hash = "sha256:20620fde08a597578b157a60ff38bdcc300e312d12eaa38cf28e4a62e22bdaa3"}, + {file = "allure_pytest-2.9.45-py3-none-any.whl", hash = "sha256:9b0325e06f8f79cf03289d4f4d741e57607d0fa12d9c094e243cbb042283f083"}, +] +allure-python-commons = [ + {file = "allure-python-commons-2.9.45.tar.gz", hash = "sha256:c238d28aeac35e8c7c517d8a2327e25ae5bbf2c30b5e2313d20ef11d75f5549d"}, + {file = "allure_python_commons-2.9.45-py3-none-any.whl", hash = "sha256:3572f0526db3946fb14470c58b0b41d343483aad91d37d414e4641815e13691a"}, +] async-timeout = [ {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"}, {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"}, @@ -1512,8 +1548,8 @@ boto3 = [ {file = "boto3-1.24.38.tar.gz", hash = "sha256:f4c6b025f392c934338c7f01badfddbd0d3cf2397ff5df35c31409798dce33f5"}, ] boto3-stubs = [ - {file = "boto3-stubs-1.24.46.tar.gz", hash = "sha256:9482238ed9ea7794e6e66a41376bf75d5950f0328de09fac9d224906dcc624ef"}, - {file = "boto3_stubs-1.24.46-py3-none-any.whl", hash = "sha256:3aa84f2925b4b50b7f47ac41a11ac05302e744cdf460cb7bcf6488319393d8a4"}, + {file = "boto3-stubs-1.24.51.tar.gz", hash = "sha256:ea69c707e9ceab7c11cab1f11fb4bbe98fa5ff8da593f888946d297daa083870"}, + {file = "boto3_stubs-1.24.51-py3-none-any.whl", hash = "sha256:432aebdb18e7c26bf2b148e04eb33e145976cb932bfe0f72b2d512e945927e57"}, ] botocore = [ {file = "botocore-1.27.38-py3-none-any.whl", hash = "sha256:46a0264ff3335496bd9cb404f83ec0d8eb7bfdef8f74a830c13e6a6b9612adea"}, @@ -1763,8 +1799,8 @@ mccabe = [ {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, ] moto = [ - {file = "moto-3.1.17-py3-none-any.whl", hash = "sha256:84797321fad9a9e924c1c0385b302c80ec23429724c016b504f4bfca9d40d33a"}, - {file = "moto-3.1.17.tar.gz", hash = "sha256:f2e5b32e8910c51c0b0de5b73f902bc53e06fb1c1d077d2b848d27e0b0cbe65e"}, + {file = "moto-3.1.18-py3-none-any.whl", hash = "sha256:b6eb096e7880c46ac44d6d90988c0043e31462115cfdc913a0ee8f470bd9555c"}, + {file = "moto-3.1.18.tar.gz", hash = "sha256:1e05276a62aa5a4aa821b441647c2cbaa2ea175388980b10d5de88d41b327cf7"}, ] mypy = [ {file = "mypy-0.971-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f2899a3cbd394da157194f913a931edfd4be5f274a88041c9dc2d9cdcb1c315c"}, diff --git a/pyproject.toml b/pyproject.toml index 8a3d22f088..a54dbe9ebd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ prometheus-client = "^0.14.1" pytest-timeout = "^2.1.0" Werkzeug = "2.1.2" pytest-order = "^1.0.1" +allure-pytest = "^2.9.45" [tool.poetry.dev-dependencies] yapf = "==0.31.0" diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 5292bc1789..4483355c4c 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -24,6 +24,7 @@ import subprocess import time import filecmp import tempfile +import tarfile from contextlib import closing from pathlib import Path @@ -35,6 +36,7 @@ from psycopg2.extensions import make_dsn, parse_dsn from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple from typing_extensions import Literal +import allure # type: ignore import requests import backoff # type: ignore @@ -2237,6 +2239,14 @@ def get_test_output_dir(request: Any) -> pathlib.Path: return test_dir +ATTACHMENT_SUFFIXES = frozenset(( + '.log', + '.stderr', + '.stdout', + '.diffs', +)) + + # This is autouse, so the test output directory always gets created, even # if a test doesn't put anything there. It also solves a problem with the # neon_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it @@ -2247,7 +2257,7 @@ def get_test_output_dir(request: Any) -> pathlib.Path: # this fixture ensures that the directory exists. That works because # 'autouse' fixtures are run before other fixtures. @pytest.fixture(scope='function', autouse=True) -def test_output_dir(request: Any) -> pathlib.Path: +def test_output_dir(request: Any) -> Iterator[pathlib.Path]: """ Create the working directory for an individual test. """ # one directory per test @@ -2255,7 +2265,26 @@ def test_output_dir(request: Any) -> pathlib.Path: log.info(f'test_output_dir is {test_dir}') shutil.rmtree(test_dir, ignore_errors=True) test_dir.mkdir() - return test_dir + + yield test_dir + + for attachment in test_dir.glob('**/*'): + if attachment.suffix in ATTACHMENT_SUFFIXES: + source = str(attachment) + name = str(attachment.relative_to(test_dir)) + attachment_type = 'text/plain' + extension = attachment.suffix.removeprefix('.') + + # compress files larger than 1Mb, they're hardly readable in a browser + if attachment.stat().st_size > 1024 * 1024: + source = f'{attachment}.tar.gz' + with tarfile.open(source, 'w:gz') as tar: + tar.add(attachment, arcname=attachment.name) + name = f'{name}.tar.gz' + attachment_type = 'application/gzip' + extension = 'tar.gz' + + allure.attach.file(source, name, attachment_type, extension) SKIP_DIRS = frozenset(('pg_wal', From 6b2e1d9065eb39d6533362bbbba53cec2e77ac7d Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Wed, 17 Aug 2022 15:05:37 +0100 Subject: [PATCH 21/63] test_runner: replace yapf with black and isort --- .github/workflows/codestyle.yml | 7 +- .yapfignore | 10 - docs/sourcetree.md | 7 +- poetry.lock | 743 ++++++++++++++++++-------------- pre-commit.py | 53 ++- pyproject.toml | 42 +- setup.cfg | 43 -- 7 files changed, 496 insertions(+), 409 deletions(-) delete mode 100644 .yapfignore delete mode 100644 setup.cfg diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index d0685f8fd2..bd0f368499 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -128,8 +128,11 @@ jobs: - name: Install Python deps run: ./scripts/pysync - - name: Run yapf to ensure code format - run: poetry run yapf --recursive --diff . + - name: Run isort to ensure code format + run: poetry run isort --diff --check . + + - name: Run black to ensure code format + run: poetry run black --diff --check . - name: Run mypy to check types run: poetry run mypy . diff --git a/.yapfignore b/.yapfignore deleted file mode 100644 index 149428e452..0000000000 --- a/.yapfignore +++ /dev/null @@ -1,10 +0,0 @@ -# This file is only read when `yapf` is run from this directory. -# Hence we only top-level directories here to avoid confusion. -# See source code for the exact file format: https://github.com/google/yapf/blob/c6077954245bc3add82dafd853a1c7305a6ebd20/yapf/yapflib/file_resources.py#L40-L43 -vendor/ -target/ -tmp_install/ -__pycache__/ -test_output/ -.neon/ -.git/ diff --git a/docs/sourcetree.md b/docs/sourcetree.md index 39f7be89a0..f189134865 100644 --- a/docs/sourcetree.md +++ b/docs/sourcetree.md @@ -112,11 +112,12 @@ Run `poetry shell` to activate the virtual environment. Alternatively, use `poetry run` to run a single command in the venv, e.g. `poetry run pytest`. ### Obligatory checks -We force code formatting via `yapf` and type hints via `mypy`. -Run the following commands in the repository's root (next to `setup.cfg`): +We force code formatting via `black`, `isort` and type hints via `mypy`. +Run the following commands in the repository's root (next to `pyproject.toml`): ```bash -poetry run yapf -ri . # All code is reformatted +poetry run isort . # Imports are reformatted +poetry run black . # All code is reformatted poetry run mypy . # Ensure there are no typing errors ``` diff --git a/poetry.lock b/poetry.lock index 17b59852f4..cd24641a4f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -117,6 +117,28 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "black" +version = "22.6.0" +description = "The uncompromising code formatter." +category = "dev" +optional = false +python-versions = ">=3.6.2" + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""} +typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + [[package]] name = "boto3" version = "1.24.38" @@ -135,8 +157,8 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "boto3-stubs" -version = "1.24.51" -description = "Type annotations for boto3 1.24.51 generated with mypy-boto3-builder 7.11.6" +version = "1.24.56" +description = "Type annotations for boto3 1.24.56 generated with mypy-boto3-builder 7.11.7" category = "main" optional = false python-versions = ">=3.7" @@ -148,321 +170,321 @@ types-s3transfer = "*" typing-extensions = ">=4.1.0" [package.extras] -worklink = ["mypy-boto3-worklink (>=1.24.0,<1.25.0)"] -workdocs = ["mypy-boto3-workdocs (>=1.24.0,<1.25.0)"] -wisdom = ["mypy-boto3-wisdom (>=1.24.0,<1.25.0)"] -wellarchitected = ["mypy-boto3-wellarchitected (>=1.24.0,<1.25.0)"] -wafv2 = ["mypy-boto3-wafv2 (>=1.24.0,<1.25.0)"] -waf-regional = ["mypy-boto3-waf-regional (>=1.24.0,<1.25.0)"] -waf = ["mypy-boto3-waf (>=1.24.0,<1.25.0)"] -voice-id = ["mypy-boto3-voice-id (>=1.24.0,<1.25.0)"] -translate = ["mypy-boto3-translate (>=1.24.0,<1.25.0)"] -transfer = ["mypy-boto3-transfer (>=1.24.0,<1.25.0)"] -transcribe = ["mypy-boto3-transcribe (>=1.24.0,<1.25.0)"] -timestream-write = ["mypy-boto3-timestream-write (>=1.24.0,<1.25.0)"] -timestream-query = ["mypy-boto3-timestream-query (>=1.24.0,<1.25.0)"] -textract = ["mypy-boto3-textract (>=1.24.0,<1.25.0)"] -synthetics = ["mypy-boto3-synthetics (>=1.24.0,<1.25.0)"] -swf = ["mypy-boto3-swf (>=1.24.0,<1.25.0)"] -support = ["mypy-boto3-support (>=1.24.0,<1.25.0)"] -sts = ["mypy-boto3-sts (>=1.24.0,<1.25.0)"] -storagegateway = ["mypy-boto3-storagegateway (>=1.24.0,<1.25.0)"] -stepfunctions = ["mypy-boto3-stepfunctions (>=1.24.0,<1.25.0)"] -sso-oidc = ["mypy-boto3-sso-oidc (>=1.24.0,<1.25.0)"] -sso-admin = ["mypy-boto3-sso-admin (>=1.24.0,<1.25.0)"] -sso = ["mypy-boto3-sso (>=1.24.0,<1.25.0)"] -ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.24.0,<1.25.0)"] -ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.24.0,<1.25.0)"] -ssm = ["mypy-boto3-ssm (>=1.24.0,<1.25.0)"] -sqs = ["mypy-boto3-sqs (>=1.24.0,<1.25.0)"] -sns = ["mypy-boto3-sns (>=1.24.0,<1.25.0)"] -snowball = ["mypy-boto3-snowball (>=1.24.0,<1.25.0)"] -snow-device-management = ["mypy-boto3-snow-device-management (>=1.24.0,<1.25.0)"] -sms-voice = ["mypy-boto3-sms-voice (>=1.24.0,<1.25.0)"] -sms = ["mypy-boto3-sms (>=1.24.0,<1.25.0)"] -signer = ["mypy-boto3-signer (>=1.24.0,<1.25.0)"] -shield = ["mypy-boto3-shield (>=1.24.0,<1.25.0)"] -sesv2 = ["mypy-boto3-sesv2 (>=1.24.0,<1.25.0)"] -ses = ["mypy-boto3-ses (>=1.24.0,<1.25.0)"] -servicediscovery = ["mypy-boto3-servicediscovery (>=1.24.0,<1.25.0)"] -servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.24.0,<1.25.0)"] -servicecatalog = ["mypy-boto3-servicecatalog (>=1.24.0,<1.25.0)"] -service-quotas = ["mypy-boto3-service-quotas (>=1.24.0,<1.25.0)"] -serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.24.0,<1.25.0)"] -securityhub = ["mypy-boto3-securityhub (>=1.24.0,<1.25.0)"] -secretsmanager = ["mypy-boto3-secretsmanager (>=1.24.0,<1.25.0)"] -sdb = ["mypy-boto3-sdb (>=1.24.0,<1.25.0)"] -schemas = ["mypy-boto3-schemas (>=1.24.0,<1.25.0)"] -savingsplans = ["mypy-boto3-savingsplans (>=1.24.0,<1.25.0)"] -sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.24.0,<1.25.0)"] -sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.24.0,<1.25.0)"] -sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.24.0,<1.25.0)"] -sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.24.0,<1.25.0)"] -sagemaker = ["mypy-boto3-sagemaker (>=1.24.0,<1.25.0)"] -s3outposts = ["mypy-boto3-s3outposts (>=1.24.0,<1.25.0)"] -s3control = ["mypy-boto3-s3control (>=1.24.0,<1.25.0)"] -s3 = ["mypy-boto3-s3 (>=1.24.0,<1.25.0)"] -rum = ["mypy-boto3-rum (>=1.24.0,<1.25.0)"] -route53resolver = ["mypy-boto3-route53resolver (>=1.24.0,<1.25.0)"] -route53domains = ["mypy-boto3-route53domains (>=1.24.0,<1.25.0)"] -route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.24.0,<1.25.0)"] -route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.24.0,<1.25.0)"] -route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.24.0,<1.25.0)"] -route53 = ["mypy-boto3-route53 (>=1.24.0,<1.25.0)"] -rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.24.0,<1.25.0)"] -robomaker = ["mypy-boto3-robomaker (>=1.24.0,<1.25.0)"] -resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.24.0,<1.25.0)"] -resource-groups = ["mypy-boto3-resource-groups (>=1.24.0,<1.25.0)"] -resiliencehub = ["mypy-boto3-resiliencehub (>=1.24.0,<1.25.0)"] -rekognition = ["mypy-boto3-rekognition (>=1.24.0,<1.25.0)"] -redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.24.0,<1.25.0)"] -redshift-data = ["mypy-boto3-redshift-data (>=1.24.0,<1.25.0)"] -redshift = ["mypy-boto3-redshift (>=1.24.0,<1.25.0)"] -rds-data = ["mypy-boto3-rds-data (>=1.24.0,<1.25.0)"] -rds = ["mypy-boto3-rds (>=1.24.0,<1.25.0)"] -rbin = ["mypy-boto3-rbin (>=1.24.0,<1.25.0)"] -ram = ["mypy-boto3-ram (>=1.24.0,<1.25.0)"] -quicksight = ["mypy-boto3-quicksight (>=1.24.0,<1.25.0)"] -qldb-session = ["mypy-boto3-qldb-session (>=1.24.0,<1.25.0)"] -qldb = ["mypy-boto3-qldb (>=1.24.0,<1.25.0)"] -proton = ["mypy-boto3-proton (>=1.24.0,<1.25.0)"] -privatenetworks = ["mypy-boto3-privatenetworks (>=1.24.0,<1.25.0)"] -pricing = ["mypy-boto3-pricing (>=1.24.0,<1.25.0)"] -polly = ["mypy-boto3-polly (>=1.24.0,<1.25.0)"] -pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.24.0,<1.25.0)"] -pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.24.0,<1.25.0)"] -pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.24.0,<1.25.0)"] -pinpoint = ["mypy-boto3-pinpoint (>=1.24.0,<1.25.0)"] -pi = ["mypy-boto3-pi (>=1.24.0,<1.25.0)"] -personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.24.0,<1.25.0)"] -personalize-events = ["mypy-boto3-personalize-events (>=1.24.0,<1.25.0)"] -personalize = ["mypy-boto3-personalize (>=1.24.0,<1.25.0)"] -panorama = ["mypy-boto3-panorama (>=1.24.0,<1.25.0)"] -outposts = ["mypy-boto3-outposts (>=1.24.0,<1.25.0)"] -organizations = ["mypy-boto3-organizations (>=1.24.0,<1.25.0)"] -opsworkscm = ["mypy-boto3-opsworkscm (>=1.24.0,<1.25.0)"] -opsworks = ["mypy-boto3-opsworks (>=1.24.0,<1.25.0)"] -opensearch = ["mypy-boto3-opensearch (>=1.24.0,<1.25.0)"] -nimble = ["mypy-boto3-nimble (>=1.24.0,<1.25.0)"] -networkmanager = ["mypy-boto3-networkmanager (>=1.24.0,<1.25.0)"] -network-firewall = ["mypy-boto3-network-firewall (>=1.24.0,<1.25.0)"] -neptune = ["mypy-boto3-neptune (>=1.24.0,<1.25.0)"] -mwaa = ["mypy-boto3-mwaa (>=1.24.0,<1.25.0)"] -mturk = ["mypy-boto3-mturk (>=1.24.0,<1.25.0)"] -mq = ["mypy-boto3-mq (>=1.24.0,<1.25.0)"] -mobile = ["mypy-boto3-mobile (>=1.24.0,<1.25.0)"] -migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.24.0,<1.25.0)"] -migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.24.0,<1.25.0)"] -migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.24.0,<1.25.0)"] -mgn = ["mypy-boto3-mgn (>=1.24.0,<1.25.0)"] -mgh = ["mypy-boto3-mgh (>=1.24.0,<1.25.0)"] -meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.24.0,<1.25.0)"] -memorydb = ["mypy-boto3-memorydb (>=1.24.0,<1.25.0)"] -mediatailor = ["mypy-boto3-mediatailor (>=1.24.0,<1.25.0)"] -mediastore-data = ["mypy-boto3-mediastore-data (>=1.24.0,<1.25.0)"] -mediastore = ["mypy-boto3-mediastore (>=1.24.0,<1.25.0)"] -mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.24.0,<1.25.0)"] -mediapackage = ["mypy-boto3-mediapackage (>=1.24.0,<1.25.0)"] -xray = ["mypy-boto3-xray (>=1.24.0,<1.25.0)"] -workspaces-web = ["mypy-boto3-workspaces-web (>=1.24.0,<1.25.0)"] -workspaces = ["mypy-boto3-workspaces (>=1.24.0,<1.25.0)"] -workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.24.0,<1.25.0)"] -workmail = ["mypy-boto3-workmail (>=1.24.0,<1.25.0)"] -medialive = ["mypy-boto3-medialive (>=1.24.0,<1.25.0)"] -kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.24.0,<1.25.0)"] -kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.24.0,<1.25.0)"] -kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.24.0,<1.25.0)"] -kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.24.0,<1.25.0)"] -kinesis = ["mypy-boto3-kinesis (>=1.24.0,<1.25.0)"] -keyspaces = ["mypy-boto3-keyspaces (>=1.24.0,<1.25.0)"] -kendra = ["mypy-boto3-kendra (>=1.24.0,<1.25.0)"] -kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.24.0,<1.25.0)"] -kafka = ["mypy-boto3-kafka (>=1.24.0,<1.25.0)"] -ivschat = ["mypy-boto3-ivschat (>=1.24.0,<1.25.0)"] -ivs = ["mypy-boto3-ivs (>=1.24.0,<1.25.0)"] -iotwireless = ["mypy-boto3-iotwireless (>=1.24.0,<1.25.0)"] -iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.24.0,<1.25.0)"] -iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.24.0,<1.25.0)"] -iotsitewise = ["mypy-boto3-iotsitewise (>=1.24.0,<1.25.0)"] -iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.24.0,<1.25.0)"] -iotfleethub = ["mypy-boto3-iotfleethub (>=1.24.0,<1.25.0)"] -iotevents-data = ["mypy-boto3-iotevents-data (>=1.24.0,<1.25.0)"] -iotevents = ["mypy-boto3-iotevents (>=1.24.0,<1.25.0)"] -iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.24.0,<1.25.0)"] -iotanalytics = ["mypy-boto3-iotanalytics (>=1.24.0,<1.25.0)"] -iot1click-projects = ["mypy-boto3-iot1click-projects (>=1.24.0,<1.25.0)"] -iot1click-devices = ["mypy-boto3-iot1click-devices (>=1.24.0,<1.25.0)"] -iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.24.0,<1.25.0)"] -iot-data = ["mypy-boto3-iot-data (>=1.24.0,<1.25.0)"] -iot = ["mypy-boto3-iot (>=1.24.0,<1.25.0)"] -inspector2 = ["mypy-boto3-inspector2 (>=1.24.0,<1.25.0)"] -inspector = ["mypy-boto3-inspector (>=1.24.0,<1.25.0)"] -importexport = ["mypy-boto3-importexport (>=1.24.0,<1.25.0)"] -imagebuilder = ["mypy-boto3-imagebuilder (>=1.24.0,<1.25.0)"] -identitystore = ["mypy-boto3-identitystore (>=1.24.0,<1.25.0)"] -iam = ["mypy-boto3-iam (>=1.24.0,<1.25.0)"] -honeycode = ["mypy-boto3-honeycode (>=1.24.0,<1.25.0)"] -healthlake = ["mypy-boto3-healthlake (>=1.24.0,<1.25.0)"] -health = ["mypy-boto3-health (>=1.24.0,<1.25.0)"] -guardduty = ["mypy-boto3-guardduty (>=1.24.0,<1.25.0)"] -groundstation = ["mypy-boto3-groundstation (>=1.24.0,<1.25.0)"] -greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.24.0,<1.25.0)"] -greengrass = ["mypy-boto3-greengrass (>=1.24.0,<1.25.0)"] -grafana = ["mypy-boto3-grafana (>=1.24.0,<1.25.0)"] -glue = ["mypy-boto3-glue (>=1.24.0,<1.25.0)"] -globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.24.0,<1.25.0)"] -glacier = ["mypy-boto3-glacier (>=1.24.0,<1.25.0)"] -gamesparks = ["mypy-boto3-gamesparks (>=1.24.0,<1.25.0)"] -gamelift = ["mypy-boto3-gamelift (>=1.24.0,<1.25.0)"] -fsx = ["mypy-boto3-fsx (>=1.24.0,<1.25.0)"] -frauddetector = ["mypy-boto3-frauddetector (>=1.24.0,<1.25.0)"] -forecastquery = ["mypy-boto3-forecastquery (>=1.24.0,<1.25.0)"] -forecast = ["mypy-boto3-forecast (>=1.24.0,<1.25.0)"] -fms = ["mypy-boto3-fms (>=1.24.0,<1.25.0)"] -fis = ["mypy-boto3-fis (>=1.24.0,<1.25.0)"] -firehose = ["mypy-boto3-firehose (>=1.24.0,<1.25.0)"] -finspace-data = ["mypy-boto3-finspace-data (>=1.24.0,<1.25.0)"] -finspace = ["mypy-boto3-finspace (>=1.24.0,<1.25.0)"] -evidently = ["mypy-boto3-evidently (>=1.24.0,<1.25.0)"] -events = ["mypy-boto3-events (>=1.24.0,<1.25.0)"] -essential = ["mypy-boto3-sqs (>=1.24.0,<1.25.0)", "mypy-boto3-s3 (>=1.24.0,<1.25.0)", "mypy-boto3-rds (>=1.24.0,<1.25.0)", "mypy-boto3-lambda (>=1.24.0,<1.25.0)", "mypy-boto3-ec2 (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodb (>=1.24.0,<1.25.0)", "mypy-boto3-cloudformation (>=1.24.0,<1.25.0)"] -es = ["mypy-boto3-es (>=1.24.0,<1.25.0)"] -emr-serverless = ["mypy-boto3-emr-serverless (>=1.24.0,<1.25.0)"] -emr-containers = ["mypy-boto3-emr-containers (>=1.24.0,<1.25.0)"] -emr = ["mypy-boto3-emr (>=1.24.0,<1.25.0)"] -elbv2 = ["mypy-boto3-elbv2 (>=1.24.0,<1.25.0)"] -elb = ["mypy-boto3-elb (>=1.24.0,<1.25.0)"] -elastictranscoder = ["mypy-boto3-elastictranscoder (>=1.24.0,<1.25.0)"] -elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.24.0,<1.25.0)"] -elasticache = ["mypy-boto3-elasticache (>=1.24.0,<1.25.0)"] -elastic-inference = ["mypy-boto3-elastic-inference (>=1.24.0,<1.25.0)"] -eks = ["mypy-boto3-eks (>=1.24.0,<1.25.0)"] -efs = ["mypy-boto3-efs (>=1.24.0,<1.25.0)"] -ecs = ["mypy-boto3-ecs (>=1.24.0,<1.25.0)"] -ecr-public = ["mypy-boto3-ecr-public (>=1.24.0,<1.25.0)"] -ecr = ["mypy-boto3-ecr (>=1.24.0,<1.25.0)"] -ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.24.0,<1.25.0)"] -ec2 = ["mypy-boto3-ec2 (>=1.24.0,<1.25.0)"] -ebs = ["mypy-boto3-ebs (>=1.24.0,<1.25.0)"] -dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.24.0,<1.25.0)"] -dynamodb = ["mypy-boto3-dynamodb (>=1.24.0,<1.25.0)"] -ds = ["mypy-boto3-ds (>=1.24.0,<1.25.0)"] -drs = ["mypy-boto3-drs (>=1.24.0,<1.25.0)"] -docdb = ["mypy-boto3-docdb (>=1.24.0,<1.25.0)"] -dms = ["mypy-boto3-dms (>=1.24.0,<1.25.0)"] -dlm = ["mypy-boto3-dlm (>=1.24.0,<1.25.0)"] -discovery = ["mypy-boto3-discovery (>=1.24.0,<1.25.0)"] -directconnect = ["mypy-boto3-directconnect (>=1.24.0,<1.25.0)"] -devops-guru = ["mypy-boto3-devops-guru (>=1.24.0,<1.25.0)"] -devicefarm = ["mypy-boto3-devicefarm (>=1.24.0,<1.25.0)"] -detective = ["mypy-boto3-detective (>=1.24.0,<1.25.0)"] -dax = ["mypy-boto3-dax (>=1.24.0,<1.25.0)"] -datasync = ["mypy-boto3-datasync (>=1.24.0,<1.25.0)"] -datapipeline = ["mypy-boto3-datapipeline (>=1.24.0,<1.25.0)"] -dataexchange = ["mypy-boto3-dataexchange (>=1.24.0,<1.25.0)"] -databrew = ["mypy-boto3-databrew (>=1.24.0,<1.25.0)"] -customer-profiles = ["mypy-boto3-customer-profiles (>=1.24.0,<1.25.0)"] -cur = ["mypy-boto3-cur (>=1.24.0,<1.25.0)"] -connectparticipant = ["mypy-boto3-connectparticipant (>=1.24.0,<1.25.0)"] -connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.24.0,<1.25.0)"] -connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.24.0,<1.25.0)"] -connect = ["mypy-boto3-connect (>=1.24.0,<1.25.0)"] -config = ["mypy-boto3-config (>=1.24.0,<1.25.0)"] -compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.24.0,<1.25.0)"] -comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.24.0,<1.25.0)"] -comprehend = ["mypy-boto3-comprehend (>=1.24.0,<1.25.0)"] -cognito-sync = ["mypy-boto3-cognito-sync (>=1.24.0,<1.25.0)"] -cognito-idp = ["mypy-boto3-cognito-idp (>=1.24.0,<1.25.0)"] -cognito-identity = ["mypy-boto3-cognito-identity (>=1.24.0,<1.25.0)"] -codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.24.0,<1.25.0)"] -codestar-connections = ["mypy-boto3-codestar-connections (>=1.24.0,<1.25.0)"] -codestar = ["mypy-boto3-codestar (>=1.24.0,<1.25.0)"] -codepipeline = ["mypy-boto3-codepipeline (>=1.24.0,<1.25.0)"] -mediaconvert = ["mypy-boto3-mediaconvert (>=1.24.0,<1.25.0)"] -mediaconnect = ["mypy-boto3-mediaconnect (>=1.24.0,<1.25.0)"] -marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.24.0,<1.25.0)"] -marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.24.0,<1.25.0)"] -marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.24.0,<1.25.0)"] -managedblockchain = ["mypy-boto3-managedblockchain (>=1.24.0,<1.25.0)"] -macie2 = ["mypy-boto3-macie2 (>=1.24.0,<1.25.0)"] -macie = ["mypy-boto3-macie (>=1.24.0,<1.25.0)"] -machinelearning = ["mypy-boto3-machinelearning (>=1.24.0,<1.25.0)"] -m2 = ["mypy-boto3-m2 (>=1.24.0,<1.25.0)"] -lookoutvision = ["mypy-boto3-lookoutvision (>=1.24.0,<1.25.0)"] -lookoutmetrics = ["mypy-boto3-lookoutmetrics (>=1.24.0,<1.25.0)"] -lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.24.0,<1.25.0)"] -logs = ["mypy-boto3-logs (>=1.24.0,<1.25.0)"] -location = ["mypy-boto3-location (>=1.24.0,<1.25.0)"] -lightsail = ["mypy-boto3-lightsail (>=1.24.0,<1.25.0)"] -license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.24.0,<1.25.0)"] -license-manager = ["mypy-boto3-license-manager (>=1.24.0,<1.25.0)"] -lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.24.0,<1.25.0)"] -lexv2-models = ["mypy-boto3-lexv2-models (>=1.24.0,<1.25.0)"] -lex-runtime = ["mypy-boto3-lex-runtime (>=1.24.0,<1.25.0)"] -lex-models = ["mypy-boto3-lex-models (>=1.24.0,<1.25.0)"] -lambda = ["mypy-boto3-lambda (>=1.24.0,<1.25.0)"] -lakeformation = ["mypy-boto3-lakeformation (>=1.24.0,<1.25.0)"] -kms = ["mypy-boto3-kms (>=1.24.0,<1.25.0)"] -kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.24.0,<1.25.0)"] -kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.24.0,<1.25.0)"] -codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.24.0,<1.25.0)"] -all = ["mypy-boto3-kafkaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-kafka (>=1.24.0,<1.25.0)", "mypy-boto3-ivschat (>=1.24.0,<1.25.0)", "mypy-boto3-ivs (>=1.24.0,<1.25.0)", "mypy-boto3-iotwireless (>=1.24.0,<1.25.0)", "mypy-boto3-iottwinmaker (>=1.24.0,<1.25.0)", "mypy-boto3-iotthingsgraph (>=1.24.0,<1.25.0)", "mypy-boto3-iotsitewise (>=1.24.0,<1.25.0)", "mypy-boto3-iotsecuretunneling (>=1.24.0,<1.25.0)", "mypy-boto3-iotfleethub (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents-data (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents (>=1.24.0,<1.25.0)", "mypy-boto3-iotdeviceadvisor (>=1.24.0,<1.25.0)", "mypy-boto3-iotanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-projects (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-devices (>=1.24.0,<1.25.0)", "mypy-boto3-iot-jobs-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot (>=1.24.0,<1.25.0)", "mypy-boto3-inspector2 (>=1.24.0,<1.25.0)", "mypy-boto3-inspector (>=1.24.0,<1.25.0)", "mypy-boto3-importexport (>=1.24.0,<1.25.0)", "mypy-boto3-imagebuilder (>=1.24.0,<1.25.0)", "mypy-boto3-identitystore (>=1.24.0,<1.25.0)", "mypy-boto3-iam (>=1.24.0,<1.25.0)", "mypy-boto3-honeycode (>=1.24.0,<1.25.0)", "mypy-boto3-healthlake (>=1.24.0,<1.25.0)", "mypy-boto3-health (>=1.24.0,<1.25.0)", "mypy-boto3-guardduty (>=1.24.0,<1.25.0)", "mypy-boto3-groundstation (>=1.24.0,<1.25.0)", "mypy-boto3-greengrassv2 (>=1.24.0,<1.25.0)", "mypy-boto3-greengrass (>=1.24.0,<1.25.0)", "mypy-boto3-grafana (>=1.24.0,<1.25.0)", "mypy-boto3-glue (>=1.24.0,<1.25.0)", "mypy-boto3-globalaccelerator (>=1.24.0,<1.25.0)", "mypy-boto3-glacier (>=1.24.0,<1.25.0)", "mypy-boto3-gamesparks (>=1.24.0,<1.25.0)", "mypy-boto3-gamelift (>=1.24.0,<1.25.0)", "mypy-boto3-fsx (>=1.24.0,<1.25.0)", "mypy-boto3-frauddetector (>=1.24.0,<1.25.0)", "mypy-boto3-forecastquery (>=1.24.0,<1.25.0)", "mypy-boto3-forecast (>=1.24.0,<1.25.0)", "mypy-boto3-fms (>=1.24.0,<1.25.0)", "mypy-boto3-fis (>=1.24.0,<1.25.0)", "mypy-boto3-firehose (>=1.24.0,<1.25.0)", "mypy-boto3-finspace-data (>=1.24.0,<1.25.0)", "mypy-boto3-finspace (>=1.24.0,<1.25.0)", "mypy-boto3-evidently (>=1.24.0,<1.25.0)", "mypy-boto3-events (>=1.24.0,<1.25.0)", "mypy-boto3-es (>=1.24.0,<1.25.0)", "mypy-boto3-emr-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-emr-containers (>=1.24.0,<1.25.0)", "mypy-boto3-emr (>=1.24.0,<1.25.0)", "mypy-boto3-elbv2 (>=1.24.0,<1.25.0)", "mypy-boto3-elb (>=1.24.0,<1.25.0)", "mypy-boto3-elastictranscoder (>=1.24.0,<1.25.0)", "mypy-boto3-elasticbeanstalk (>=1.24.0,<1.25.0)", "mypy-boto3-elasticache (>=1.24.0,<1.25.0)", "mypy-boto3-elastic-inference (>=1.24.0,<1.25.0)", "mypy-boto3-eks (>=1.24.0,<1.25.0)", "mypy-boto3-efs (>=1.24.0,<1.25.0)", "mypy-boto3-ecs (>=1.24.0,<1.25.0)", "mypy-boto3-ecr-public (>=1.24.0,<1.25.0)", "mypy-boto3-ecr (>=1.24.0,<1.25.0)", "mypy-boto3-ec2-instance-connect (>=1.24.0,<1.25.0)", "mypy-boto3-ec2 (>=1.24.0,<1.25.0)", "mypy-boto3-ebs (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodbstreams (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodb (>=1.24.0,<1.25.0)", "mypy-boto3-ds (>=1.24.0,<1.25.0)", "mypy-boto3-drs (>=1.24.0,<1.25.0)", "mypy-boto3-docdb (>=1.24.0,<1.25.0)", "mypy-boto3-dms (>=1.24.0,<1.25.0)", "mypy-boto3-dlm (>=1.24.0,<1.25.0)", "mypy-boto3-discovery (>=1.24.0,<1.25.0)", "mypy-boto3-directconnect (>=1.24.0,<1.25.0)", "mypy-boto3-devops-guru (>=1.24.0,<1.25.0)", "mypy-boto3-devicefarm (>=1.24.0,<1.25.0)", "mypy-boto3-detective (>=1.24.0,<1.25.0)", "mypy-boto3-dax (>=1.24.0,<1.25.0)", "mypy-boto3-datasync (>=1.24.0,<1.25.0)", "mypy-boto3-datapipeline (>=1.24.0,<1.25.0)", "mypy-boto3-dataexchange (>=1.24.0,<1.25.0)", "mypy-boto3-databrew (>=1.24.0,<1.25.0)", "mypy-boto3-customer-profiles (>=1.24.0,<1.25.0)", "mypy-boto3-cur (>=1.24.0,<1.25.0)", "mypy-boto3-connectparticipant (>=1.24.0,<1.25.0)", "mypy-boto3-connectcampaigns (>=1.24.0,<1.25.0)", "mypy-boto3-connect-contact-lens (>=1.24.0,<1.25.0)", "mypy-boto3-connect (>=1.24.0,<1.25.0)", "mypy-boto3-config (>=1.24.0,<1.25.0)", "mypy-boto3-compute-optimizer (>=1.24.0,<1.25.0)", "mypy-boto3-comprehendmedical (>=1.24.0,<1.25.0)", "mypy-boto3-comprehend (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-sync (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-idp (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-identity (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-notifications (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-connections (>=1.24.0,<1.25.0)", "mypy-boto3-codestar (>=1.24.0,<1.25.0)", "mypy-boto3-codepipeline (>=1.24.0,<1.25.0)", "mypy-boto3-codeguruprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-codeguru-reviewer (>=1.24.0,<1.25.0)", "mypy-boto3-codedeploy (>=1.24.0,<1.25.0)", "mypy-boto3-codecommit (>=1.24.0,<1.25.0)", "mypy-boto3-codebuild (>=1.24.0,<1.25.0)", "mypy-boto3-codeartifact (>=1.24.0,<1.25.0)", "mypy-boto3-cloudwatch (>=1.24.0,<1.25.0)", "mypy-boto3-cloudtrail (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearchdomain (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearch (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsmv2 (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsm (>=1.24.0,<1.25.0)", "mypy-boto3-cloudfront (>=1.24.0,<1.25.0)", "mypy-boto3-cloudformation (>=1.24.0,<1.25.0)", "mypy-boto3-xray (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces-web (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces (>=1.24.0,<1.25.0)", "mypy-boto3-workmailmessageflow (>=1.24.0,<1.25.0)", "mypy-boto3-workmail (>=1.24.0,<1.25.0)", "mypy-boto3-worklink (>=1.24.0,<1.25.0)", "mypy-boto3-workdocs (>=1.24.0,<1.25.0)", "mypy-boto3-wisdom (>=1.24.0,<1.25.0)", "mypy-boto3-wellarchitected (>=1.24.0,<1.25.0)", "mypy-boto3-wafv2 (>=1.24.0,<1.25.0)", "mypy-boto3-waf-regional (>=1.24.0,<1.25.0)", "mypy-boto3-waf (>=1.24.0,<1.25.0)", "mypy-boto3-voice-id (>=1.24.0,<1.25.0)", "mypy-boto3-translate (>=1.24.0,<1.25.0)", "mypy-boto3-transfer (>=1.24.0,<1.25.0)", "mypy-boto3-transcribe (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-write (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-query (>=1.24.0,<1.25.0)", "mypy-boto3-textract (>=1.24.0,<1.25.0)", "mypy-boto3-synthetics (>=1.24.0,<1.25.0)", "mypy-boto3-swf (>=1.24.0,<1.25.0)", "mypy-boto3-support (>=1.24.0,<1.25.0)", "mypy-boto3-sts (>=1.24.0,<1.25.0)", "mypy-boto3-storagegateway (>=1.24.0,<1.25.0)", "mypy-boto3-stepfunctions (>=1.24.0,<1.25.0)", "mypy-boto3-sso-oidc (>=1.24.0,<1.25.0)", "mypy-boto3-sso-admin (>=1.24.0,<1.25.0)", "mypy-boto3-sso (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-incidents (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-contacts (>=1.24.0,<1.25.0)", "mypy-boto3-ssm (>=1.24.0,<1.25.0)", "mypy-boto3-sqs (>=1.24.0,<1.25.0)", "mypy-boto3-sns (>=1.24.0,<1.25.0)", "mypy-boto3-snowball (>=1.24.0,<1.25.0)", "mypy-boto3-snow-device-management (>=1.24.0,<1.25.0)", "mypy-boto3-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-sms (>=1.24.0,<1.25.0)", "mypy-boto3-signer (>=1.24.0,<1.25.0)", "mypy-boto3-shield (>=1.24.0,<1.25.0)", "mypy-boto3-sesv2 (>=1.24.0,<1.25.0)", "mypy-boto3-ses (>=1.24.0,<1.25.0)", "mypy-boto3-servicediscovery (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog-appregistry (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog (>=1.24.0,<1.25.0)", "mypy-boto3-service-quotas (>=1.24.0,<1.25.0)", "mypy-boto3-serverlessrepo (>=1.24.0,<1.25.0)", "mypy-boto3-securityhub (>=1.24.0,<1.25.0)", "mypy-boto3-secretsmanager (>=1.24.0,<1.25.0)", "mypy-boto3-sdb (>=1.24.0,<1.25.0)", "mypy-boto3-schemas (>=1.24.0,<1.25.0)", "mypy-boto3-savingsplans (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-edge (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker (>=1.24.0,<1.25.0)", "mypy-boto3-s3outposts (>=1.24.0,<1.25.0)", "mypy-boto3-s3control (>=1.24.0,<1.25.0)", "mypy-boto3-s3 (>=1.24.0,<1.25.0)", "mypy-boto3-rum (>=1.24.0,<1.25.0)", "mypy-boto3-route53resolver (>=1.24.0,<1.25.0)", "mypy-boto3-route53domains (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-readiness (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-control-config (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-cluster (>=1.24.0,<1.25.0)", "mypy-boto3-route53 (>=1.24.0,<1.25.0)", "mypy-boto3-rolesanywhere (>=1.24.0,<1.25.0)", "mypy-boto3-robomaker (>=1.24.0,<1.25.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.24.0,<1.25.0)", "mypy-boto3-resource-groups (>=1.24.0,<1.25.0)", "mypy-boto3-resiliencehub (>=1.24.0,<1.25.0)", "mypy-boto3-rekognition (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-data (>=1.24.0,<1.25.0)", "mypy-boto3-redshift (>=1.24.0,<1.25.0)", "mypy-boto3-rds-data (>=1.24.0,<1.25.0)", "mypy-boto3-rds (>=1.24.0,<1.25.0)", "mypy-boto3-rbin (>=1.24.0,<1.25.0)", "mypy-boto3-ram (>=1.24.0,<1.25.0)", "mypy-boto3-quicksight (>=1.24.0,<1.25.0)", "mypy-boto3-qldb-session (>=1.24.0,<1.25.0)", "mypy-boto3-qldb (>=1.24.0,<1.25.0)", "mypy-boto3-proton (>=1.24.0,<1.25.0)", "mypy-boto3-privatenetworks (>=1.24.0,<1.25.0)", "mypy-boto3-pricing (>=1.24.0,<1.25.0)", "mypy-boto3-polly (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-email (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint (>=1.24.0,<1.25.0)", "mypy-boto3-pi (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-events (>=1.24.0,<1.25.0)", "mypy-boto3-personalize (>=1.24.0,<1.25.0)", "mypy-boto3-panorama (>=1.24.0,<1.25.0)", "mypy-boto3-outposts (>=1.24.0,<1.25.0)", "mypy-boto3-organizations (>=1.24.0,<1.25.0)", "mypy-boto3-opsworkscm (>=1.24.0,<1.25.0)", "mypy-boto3-opsworks (>=1.24.0,<1.25.0)", "mypy-boto3-opensearch (>=1.24.0,<1.25.0)", "mypy-boto3-nimble (>=1.24.0,<1.25.0)", "mypy-boto3-networkmanager (>=1.24.0,<1.25.0)", "mypy-boto3-network-firewall (>=1.24.0,<1.25.0)", "mypy-boto3-neptune (>=1.24.0,<1.25.0)", "mypy-boto3-mwaa (>=1.24.0,<1.25.0)", "mypy-boto3-mturk (>=1.24.0,<1.25.0)", "mypy-boto3-mq (>=1.24.0,<1.25.0)", "mypy-boto3-mobile (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhubstrategy (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhub-config (>=1.24.0,<1.25.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.24.0,<1.25.0)", "mypy-boto3-mgn (>=1.24.0,<1.25.0)", "mypy-boto3-mgh (>=1.24.0,<1.25.0)", "mypy-boto3-meteringmarketplace (>=1.24.0,<1.25.0)", "mypy-boto3-memorydb (>=1.24.0,<1.25.0)", "mypy-boto3-mediatailor (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore-data (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage-vod (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage (>=1.24.0,<1.25.0)", "mypy-boto3-medialive (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconvert (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-entitlement (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-catalog (>=1.24.0,<1.25.0)", "mypy-boto3-managedblockchain (>=1.24.0,<1.25.0)", "mypy-boto3-macie2 (>=1.24.0,<1.25.0)", "mypy-boto3-macie (>=1.24.0,<1.25.0)", "mypy-boto3-machinelearning (>=1.24.0,<1.25.0)", "mypy-boto3-m2 (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutvision (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutmetrics (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutequipment (>=1.24.0,<1.25.0)", "mypy-boto3-logs (>=1.24.0,<1.25.0)", "mypy-boto3-location (>=1.24.0,<1.25.0)", "mypy-boto3-lightsail (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-models (>=1.24.0,<1.25.0)", "mypy-boto3-lex-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-lex-models (>=1.24.0,<1.25.0)", "mypy-boto3-lambda (>=1.24.0,<1.25.0)", "mypy-boto3-lakeformation (>=1.24.0,<1.25.0)", "mypy-boto3-kms (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisvideo (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-signaling (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-archived-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis (>=1.24.0,<1.25.0)", "mypy-boto3-keyspaces (>=1.24.0,<1.25.0)", "mypy-boto3-kendra (>=1.24.0,<1.25.0)", "mypy-boto3-clouddirectory (>=1.24.0,<1.25.0)", "mypy-boto3-cloudcontrol (>=1.24.0,<1.25.0)", "mypy-boto3-cloud9 (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-messaging (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-meetings (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-identity (>=1.24.0,<1.25.0)", "mypy-boto3-chime (>=1.24.0,<1.25.0)", "mypy-boto3-ce (>=1.24.0,<1.25.0)", "mypy-boto3-budgets (>=1.24.0,<1.25.0)", "mypy-boto3-braket (>=1.24.0,<1.25.0)", "mypy-boto3-billingconductor (>=1.24.0,<1.25.0)", "mypy-boto3-batch (>=1.24.0,<1.25.0)", "mypy-boto3-backupstorage (>=1.24.0,<1.25.0)", "mypy-boto3-backup-gateway (>=1.24.0,<1.25.0)", "mypy-boto3-backup (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling-plans (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-auditmanager (>=1.24.0,<1.25.0)", "mypy-boto3-athena (>=1.24.0,<1.25.0)", "mypy-boto3-appsync (>=1.24.0,<1.25.0)", "mypy-boto3-appstream (>=1.24.0,<1.25.0)", "mypy-boto3-apprunner (>=1.24.0,<1.25.0)", "mypy-boto3-appmesh (>=1.24.0,<1.25.0)", "mypy-boto3-applicationcostprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-application-insights (>=1.24.0,<1.25.0)", "mypy-boto3-application-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-appintegrations (>=1.24.0,<1.25.0)", "mypy-boto3-appflow (>=1.24.0,<1.25.0)", "mypy-boto3-appconfigdata (>=1.24.0,<1.25.0)", "mypy-boto3-appconfig (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewayv2 (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewaymanagementapi (>=1.24.0,<1.25.0)", "mypy-boto3-apigateway (>=1.24.0,<1.25.0)", "mypy-boto3-amplifyuibuilder (>=1.24.0,<1.25.0)", "mypy-boto3-amplifybackend (>=1.24.0,<1.25.0)", "mypy-boto3-amplify (>=1.24.0,<1.25.0)", "mypy-boto3-amp (>=1.24.0,<1.25.0)", "mypy-boto3-alexaforbusiness (>=1.24.0,<1.25.0)", "mypy-boto3-acm-pca (>=1.24.0,<1.25.0)", "mypy-boto3-acm (>=1.24.0,<1.25.0)", "mypy-boto3-account (>=1.24.0,<1.25.0)", "mypy-boto3-accessanalyzer (>=1.24.0,<1.25.0)"] -budgets = ["mypy-boto3-budgets (>=1.24.0,<1.25.0)"] -braket = ["mypy-boto3-braket (>=1.24.0,<1.25.0)"] -billingconductor = ["mypy-boto3-billingconductor (>=1.24.0,<1.25.0)"] -batch = ["mypy-boto3-batch (>=1.24.0,<1.25.0)"] -backupstorage = ["mypy-boto3-backupstorage (>=1.24.0,<1.25.0)"] -backup-gateway = ["mypy-boto3-backup-gateway (>=1.24.0,<1.25.0)"] -backup = ["mypy-boto3-backup (>=1.24.0,<1.25.0)"] -autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.24.0,<1.25.0)"] -autoscaling = ["mypy-boto3-autoscaling (>=1.24.0,<1.25.0)"] -auditmanager = ["mypy-boto3-auditmanager (>=1.24.0,<1.25.0)"] -athena = ["mypy-boto3-athena (>=1.24.0,<1.25.0)"] -appsync = ["mypy-boto3-appsync (>=1.24.0,<1.25.0)"] -appstream = ["mypy-boto3-appstream (>=1.24.0,<1.25.0)"] -apprunner = ["mypy-boto3-apprunner (>=1.24.0,<1.25.0)"] -appmesh = ["mypy-boto3-appmesh (>=1.24.0,<1.25.0)"] -applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.24.0,<1.25.0)"] -application-insights = ["mypy-boto3-application-insights (>=1.24.0,<1.25.0)"] -application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.24.0,<1.25.0)"] -appintegrations = ["mypy-boto3-appintegrations (>=1.24.0,<1.25.0)"] -appflow = ["mypy-boto3-appflow (>=1.24.0,<1.25.0)"] -appconfigdata = ["mypy-boto3-appconfigdata (>=1.24.0,<1.25.0)"] -appconfig = ["mypy-boto3-appconfig (>=1.24.0,<1.25.0)"] -apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.24.0,<1.25.0)"] -apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.24.0,<1.25.0)"] -apigateway = ["mypy-boto3-apigateway (>=1.24.0,<1.25.0)"] -amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.24.0,<1.25.0)"] -amplifybackend = ["mypy-boto3-amplifybackend (>=1.24.0,<1.25.0)"] -amplify = ["mypy-boto3-amplify (>=1.24.0,<1.25.0)"] -amp = ["mypy-boto3-amp (>=1.24.0,<1.25.0)"] -codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.24.0,<1.25.0)"] -codedeploy = ["mypy-boto3-codedeploy (>=1.24.0,<1.25.0)"] -codecommit = ["mypy-boto3-codecommit (>=1.24.0,<1.25.0)"] -codebuild = ["mypy-boto3-codebuild (>=1.24.0,<1.25.0)"] -codeartifact = ["mypy-boto3-codeartifact (>=1.24.0,<1.25.0)"] -cloudwatch = ["mypy-boto3-cloudwatch (>=1.24.0,<1.25.0)"] -cloudtrail = ["mypy-boto3-cloudtrail (>=1.24.0,<1.25.0)"] -cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.24.0,<1.25.0)"] -cloudsearch = ["mypy-boto3-cloudsearch (>=1.24.0,<1.25.0)"] -cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.24.0,<1.25.0)"] -cloudhsm = ["mypy-boto3-cloudhsm (>=1.24.0,<1.25.0)"] -cloudfront = ["mypy-boto3-cloudfront (>=1.24.0,<1.25.0)"] -cloudformation = ["mypy-boto3-cloudformation (>=1.24.0,<1.25.0)"] -clouddirectory = ["mypy-boto3-clouddirectory (>=1.24.0,<1.25.0)"] -cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.24.0,<1.25.0)"] -cloud9 = ["mypy-boto3-cloud9 (>=1.24.0,<1.25.0)"] -chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.24.0,<1.25.0)"] -chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.24.0,<1.25.0)"] -chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.24.0,<1.25.0)"] -chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.24.0,<1.25.0)"] -chime = ["mypy-boto3-chime (>=1.24.0,<1.25.0)"] -ce = ["mypy-boto3-ce (>=1.24.0,<1.25.0)"] -alexaforbusiness = ["mypy-boto3-alexaforbusiness (>=1.24.0,<1.25.0)"] -acm-pca = ["mypy-boto3-acm-pca (>=1.24.0,<1.25.0)"] -acm = ["mypy-boto3-acm (>=1.24.0,<1.25.0)"] -account = ["mypy-boto3-account (>=1.24.0,<1.25.0)"] accessanalyzer = ["mypy-boto3-accessanalyzer (>=1.24.0,<1.25.0)"] +account = ["mypy-boto3-account (>=1.24.0,<1.25.0)"] +acm = ["mypy-boto3-acm (>=1.24.0,<1.25.0)"] +acm-pca = ["mypy-boto3-acm-pca (>=1.24.0,<1.25.0)"] +alexaforbusiness = ["mypy-boto3-alexaforbusiness (>=1.24.0,<1.25.0)"] +all = ["mypy-boto3-accessanalyzer (>=1.24.0,<1.25.0)", "mypy-boto3-account (>=1.24.0,<1.25.0)", "mypy-boto3-acm (>=1.24.0,<1.25.0)", "mypy-boto3-acm-pca (>=1.24.0,<1.25.0)", "mypy-boto3-alexaforbusiness (>=1.24.0,<1.25.0)", "mypy-boto3-amp (>=1.24.0,<1.25.0)", "mypy-boto3-amplify (>=1.24.0,<1.25.0)", "mypy-boto3-amplifybackend (>=1.24.0,<1.25.0)", "mypy-boto3-amplifyuibuilder (>=1.24.0,<1.25.0)", "mypy-boto3-apigateway (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewaymanagementapi (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewayv2 (>=1.24.0,<1.25.0)", "mypy-boto3-appconfig (>=1.24.0,<1.25.0)", "mypy-boto3-appconfigdata (>=1.24.0,<1.25.0)", "mypy-boto3-appflow (>=1.24.0,<1.25.0)", "mypy-boto3-appintegrations (>=1.24.0,<1.25.0)", "mypy-boto3-application-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-application-insights (>=1.24.0,<1.25.0)", "mypy-boto3-applicationcostprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-appmesh (>=1.24.0,<1.25.0)", "mypy-boto3-apprunner (>=1.24.0,<1.25.0)", "mypy-boto3-appstream (>=1.24.0,<1.25.0)", "mypy-boto3-appsync (>=1.24.0,<1.25.0)", "mypy-boto3-athena (>=1.24.0,<1.25.0)", "mypy-boto3-auditmanager (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling-plans (>=1.24.0,<1.25.0)", "mypy-boto3-backup (>=1.24.0,<1.25.0)", "mypy-boto3-backup-gateway (>=1.24.0,<1.25.0)", "mypy-boto3-backupstorage (>=1.24.0,<1.25.0)", "mypy-boto3-batch (>=1.24.0,<1.25.0)", "mypy-boto3-billingconductor (>=1.24.0,<1.25.0)", "mypy-boto3-braket (>=1.24.0,<1.25.0)", "mypy-boto3-budgets (>=1.24.0,<1.25.0)", "mypy-boto3-ce (>=1.24.0,<1.25.0)", "mypy-boto3-chime (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-identity (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-meetings (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-messaging (>=1.24.0,<1.25.0)", "mypy-boto3-cloud9 (>=1.24.0,<1.25.0)", "mypy-boto3-cloudcontrol (>=1.24.0,<1.25.0)", "mypy-boto3-clouddirectory (>=1.24.0,<1.25.0)", "mypy-boto3-cloudformation (>=1.24.0,<1.25.0)", "mypy-boto3-cloudfront (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsm (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsmv2 (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearch (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearchdomain (>=1.24.0,<1.25.0)", "mypy-boto3-cloudtrail (>=1.24.0,<1.25.0)", "mypy-boto3-cloudwatch (>=1.24.0,<1.25.0)", "mypy-boto3-codeartifact (>=1.24.0,<1.25.0)", "mypy-boto3-codebuild (>=1.24.0,<1.25.0)", "mypy-boto3-codecommit (>=1.24.0,<1.25.0)", "mypy-boto3-codedeploy (>=1.24.0,<1.25.0)", "mypy-boto3-codeguru-reviewer (>=1.24.0,<1.25.0)", "mypy-boto3-codeguruprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-codepipeline (>=1.24.0,<1.25.0)", "mypy-boto3-codestar (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-connections (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-notifications (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-identity (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-idp (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-sync (>=1.24.0,<1.25.0)", "mypy-boto3-comprehend (>=1.24.0,<1.25.0)", "mypy-boto3-comprehendmedical (>=1.24.0,<1.25.0)", "mypy-boto3-compute-optimizer (>=1.24.0,<1.25.0)", "mypy-boto3-config (>=1.24.0,<1.25.0)", "mypy-boto3-connect (>=1.24.0,<1.25.0)", "mypy-boto3-connect-contact-lens (>=1.24.0,<1.25.0)", "mypy-boto3-connectcampaigns (>=1.24.0,<1.25.0)", "mypy-boto3-connectparticipant (>=1.24.0,<1.25.0)", "mypy-boto3-cur (>=1.24.0,<1.25.0)", "mypy-boto3-customer-profiles (>=1.24.0,<1.25.0)", "mypy-boto3-databrew (>=1.24.0,<1.25.0)", "mypy-boto3-dataexchange (>=1.24.0,<1.25.0)", "mypy-boto3-datapipeline (>=1.24.0,<1.25.0)", "mypy-boto3-datasync (>=1.24.0,<1.25.0)", "mypy-boto3-dax (>=1.24.0,<1.25.0)", "mypy-boto3-detective (>=1.24.0,<1.25.0)", "mypy-boto3-devicefarm (>=1.24.0,<1.25.0)", "mypy-boto3-devops-guru (>=1.24.0,<1.25.0)", "mypy-boto3-directconnect (>=1.24.0,<1.25.0)", "mypy-boto3-discovery (>=1.24.0,<1.25.0)", "mypy-boto3-dlm (>=1.24.0,<1.25.0)", "mypy-boto3-dms (>=1.24.0,<1.25.0)", "mypy-boto3-docdb (>=1.24.0,<1.25.0)", "mypy-boto3-drs (>=1.24.0,<1.25.0)", "mypy-boto3-ds (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodb (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodbstreams (>=1.24.0,<1.25.0)", "mypy-boto3-ebs (>=1.24.0,<1.25.0)", "mypy-boto3-ec2 (>=1.24.0,<1.25.0)", "mypy-boto3-ec2-instance-connect (>=1.24.0,<1.25.0)", "mypy-boto3-ecr (>=1.24.0,<1.25.0)", "mypy-boto3-ecr-public (>=1.24.0,<1.25.0)", "mypy-boto3-ecs (>=1.24.0,<1.25.0)", "mypy-boto3-efs (>=1.24.0,<1.25.0)", "mypy-boto3-eks (>=1.24.0,<1.25.0)", "mypy-boto3-elastic-inference (>=1.24.0,<1.25.0)", "mypy-boto3-elasticache (>=1.24.0,<1.25.0)", "mypy-boto3-elasticbeanstalk (>=1.24.0,<1.25.0)", "mypy-boto3-elastictranscoder (>=1.24.0,<1.25.0)", "mypy-boto3-elb (>=1.24.0,<1.25.0)", "mypy-boto3-elbv2 (>=1.24.0,<1.25.0)", "mypy-boto3-emr (>=1.24.0,<1.25.0)", "mypy-boto3-emr-containers (>=1.24.0,<1.25.0)", "mypy-boto3-emr-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-es (>=1.24.0,<1.25.0)", "mypy-boto3-events (>=1.24.0,<1.25.0)", "mypy-boto3-evidently (>=1.24.0,<1.25.0)", "mypy-boto3-finspace (>=1.24.0,<1.25.0)", "mypy-boto3-finspace-data (>=1.24.0,<1.25.0)", "mypy-boto3-firehose (>=1.24.0,<1.25.0)", "mypy-boto3-fis (>=1.24.0,<1.25.0)", "mypy-boto3-fms (>=1.24.0,<1.25.0)", "mypy-boto3-forecast (>=1.24.0,<1.25.0)", "mypy-boto3-forecastquery (>=1.24.0,<1.25.0)", "mypy-boto3-frauddetector (>=1.24.0,<1.25.0)", "mypy-boto3-fsx (>=1.24.0,<1.25.0)", "mypy-boto3-gamelift (>=1.24.0,<1.25.0)", "mypy-boto3-gamesparks (>=1.24.0,<1.25.0)", "mypy-boto3-glacier (>=1.24.0,<1.25.0)", "mypy-boto3-globalaccelerator (>=1.24.0,<1.25.0)", "mypy-boto3-glue (>=1.24.0,<1.25.0)", "mypy-boto3-grafana (>=1.24.0,<1.25.0)", "mypy-boto3-greengrass (>=1.24.0,<1.25.0)", "mypy-boto3-greengrassv2 (>=1.24.0,<1.25.0)", "mypy-boto3-groundstation (>=1.24.0,<1.25.0)", "mypy-boto3-guardduty (>=1.24.0,<1.25.0)", "mypy-boto3-health (>=1.24.0,<1.25.0)", "mypy-boto3-healthlake (>=1.24.0,<1.25.0)", "mypy-boto3-honeycode (>=1.24.0,<1.25.0)", "mypy-boto3-iam (>=1.24.0,<1.25.0)", "mypy-boto3-identitystore (>=1.24.0,<1.25.0)", "mypy-boto3-imagebuilder (>=1.24.0,<1.25.0)", "mypy-boto3-importexport (>=1.24.0,<1.25.0)", "mypy-boto3-inspector (>=1.24.0,<1.25.0)", "mypy-boto3-inspector2 (>=1.24.0,<1.25.0)", "mypy-boto3-iot (>=1.24.0,<1.25.0)", "mypy-boto3-iot-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot-jobs-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-devices (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-projects (>=1.24.0,<1.25.0)", "mypy-boto3-iotanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-iotdeviceadvisor (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents-data (>=1.24.0,<1.25.0)", "mypy-boto3-iotfleethub (>=1.24.0,<1.25.0)", "mypy-boto3-iotsecuretunneling (>=1.24.0,<1.25.0)", "mypy-boto3-iotsitewise (>=1.24.0,<1.25.0)", "mypy-boto3-iotthingsgraph (>=1.24.0,<1.25.0)", "mypy-boto3-iottwinmaker (>=1.24.0,<1.25.0)", "mypy-boto3-iotwireless (>=1.24.0,<1.25.0)", "mypy-boto3-ivs (>=1.24.0,<1.25.0)", "mypy-boto3-ivschat (>=1.24.0,<1.25.0)", "mypy-boto3-kafka (>=1.24.0,<1.25.0)", "mypy-boto3-kafkaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-kendra (>=1.24.0,<1.25.0)", "mypy-boto3-keyspaces (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-archived-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-signaling (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisvideo (>=1.24.0,<1.25.0)", "mypy-boto3-kms (>=1.24.0,<1.25.0)", "mypy-boto3-lakeformation (>=1.24.0,<1.25.0)", "mypy-boto3-lambda (>=1.24.0,<1.25.0)", "mypy-boto3-lex-models (>=1.24.0,<1.25.0)", "mypy-boto3-lex-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-models (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.24.0,<1.25.0)", "mypy-boto3-lightsail (>=1.24.0,<1.25.0)", "mypy-boto3-location (>=1.24.0,<1.25.0)", "mypy-boto3-logs (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutequipment (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutmetrics (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutvision (>=1.24.0,<1.25.0)", "mypy-boto3-m2 (>=1.24.0,<1.25.0)", "mypy-boto3-machinelearning (>=1.24.0,<1.25.0)", "mypy-boto3-macie (>=1.24.0,<1.25.0)", "mypy-boto3-macie2 (>=1.24.0,<1.25.0)", "mypy-boto3-managedblockchain (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-catalog (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-entitlement (>=1.24.0,<1.25.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconvert (>=1.24.0,<1.25.0)", "mypy-boto3-medialive (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage-vod (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore-data (>=1.24.0,<1.25.0)", "mypy-boto3-mediatailor (>=1.24.0,<1.25.0)", "mypy-boto3-memorydb (>=1.24.0,<1.25.0)", "mypy-boto3-meteringmarketplace (>=1.24.0,<1.25.0)", "mypy-boto3-mgh (>=1.24.0,<1.25.0)", "mypy-boto3-mgn (>=1.24.0,<1.25.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhub-config (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhubstrategy (>=1.24.0,<1.25.0)", "mypy-boto3-mobile (>=1.24.0,<1.25.0)", "mypy-boto3-mq (>=1.24.0,<1.25.0)", "mypy-boto3-mturk (>=1.24.0,<1.25.0)", "mypy-boto3-mwaa (>=1.24.0,<1.25.0)", "mypy-boto3-neptune (>=1.24.0,<1.25.0)", "mypy-boto3-network-firewall (>=1.24.0,<1.25.0)", "mypy-boto3-networkmanager (>=1.24.0,<1.25.0)", "mypy-boto3-nimble (>=1.24.0,<1.25.0)", "mypy-boto3-opensearch (>=1.24.0,<1.25.0)", "mypy-boto3-opsworks (>=1.24.0,<1.25.0)", "mypy-boto3-opsworkscm (>=1.24.0,<1.25.0)", "mypy-boto3-organizations (>=1.24.0,<1.25.0)", "mypy-boto3-outposts (>=1.24.0,<1.25.0)", "mypy-boto3-panorama (>=1.24.0,<1.25.0)", "mypy-boto3-personalize (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-events (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-pi (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-email (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.24.0,<1.25.0)", "mypy-boto3-polly (>=1.24.0,<1.25.0)", "mypy-boto3-pricing (>=1.24.0,<1.25.0)", "mypy-boto3-privatenetworks (>=1.24.0,<1.25.0)", "mypy-boto3-proton (>=1.24.0,<1.25.0)", "mypy-boto3-qldb (>=1.24.0,<1.25.0)", "mypy-boto3-qldb-session (>=1.24.0,<1.25.0)", "mypy-boto3-quicksight (>=1.24.0,<1.25.0)", "mypy-boto3-ram (>=1.24.0,<1.25.0)", "mypy-boto3-rbin (>=1.24.0,<1.25.0)", "mypy-boto3-rds (>=1.24.0,<1.25.0)", "mypy-boto3-rds-data (>=1.24.0,<1.25.0)", "mypy-boto3-redshift (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-data (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-rekognition (>=1.24.0,<1.25.0)", "mypy-boto3-resiliencehub (>=1.24.0,<1.25.0)", "mypy-boto3-resource-groups (>=1.24.0,<1.25.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.24.0,<1.25.0)", "mypy-boto3-robomaker (>=1.24.0,<1.25.0)", "mypy-boto3-rolesanywhere (>=1.24.0,<1.25.0)", "mypy-boto3-route53 (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-cluster (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-control-config (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-readiness (>=1.24.0,<1.25.0)", "mypy-boto3-route53domains (>=1.24.0,<1.25.0)", "mypy-boto3-route53resolver (>=1.24.0,<1.25.0)", "mypy-boto3-rum (>=1.24.0,<1.25.0)", "mypy-boto3-s3 (>=1.24.0,<1.25.0)", "mypy-boto3-s3control (>=1.24.0,<1.25.0)", "mypy-boto3-s3outposts (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-edge (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-savingsplans (>=1.24.0,<1.25.0)", "mypy-boto3-schemas (>=1.24.0,<1.25.0)", "mypy-boto3-sdb (>=1.24.0,<1.25.0)", "mypy-boto3-secretsmanager (>=1.24.0,<1.25.0)", "mypy-boto3-securityhub (>=1.24.0,<1.25.0)", "mypy-boto3-serverlessrepo (>=1.24.0,<1.25.0)", "mypy-boto3-service-quotas (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog-appregistry (>=1.24.0,<1.25.0)", "mypy-boto3-servicediscovery (>=1.24.0,<1.25.0)", "mypy-boto3-ses (>=1.24.0,<1.25.0)", "mypy-boto3-sesv2 (>=1.24.0,<1.25.0)", "mypy-boto3-shield (>=1.24.0,<1.25.0)", "mypy-boto3-signer (>=1.24.0,<1.25.0)", "mypy-boto3-sms (>=1.24.0,<1.25.0)", "mypy-boto3-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-snow-device-management (>=1.24.0,<1.25.0)", "mypy-boto3-snowball (>=1.24.0,<1.25.0)", "mypy-boto3-sns (>=1.24.0,<1.25.0)", "mypy-boto3-sqs (>=1.24.0,<1.25.0)", "mypy-boto3-ssm (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-contacts (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-incidents (>=1.24.0,<1.25.0)", "mypy-boto3-sso (>=1.24.0,<1.25.0)", "mypy-boto3-sso-admin (>=1.24.0,<1.25.0)", "mypy-boto3-sso-oidc (>=1.24.0,<1.25.0)", "mypy-boto3-stepfunctions (>=1.24.0,<1.25.0)", "mypy-boto3-storagegateway (>=1.24.0,<1.25.0)", "mypy-boto3-sts (>=1.24.0,<1.25.0)", "mypy-boto3-support (>=1.24.0,<1.25.0)", "mypy-boto3-swf (>=1.24.0,<1.25.0)", "mypy-boto3-synthetics (>=1.24.0,<1.25.0)", "mypy-boto3-textract (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-query (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-write (>=1.24.0,<1.25.0)", "mypy-boto3-transcribe (>=1.24.0,<1.25.0)", "mypy-boto3-transfer (>=1.24.0,<1.25.0)", "mypy-boto3-translate (>=1.24.0,<1.25.0)", "mypy-boto3-voice-id (>=1.24.0,<1.25.0)", "mypy-boto3-waf (>=1.24.0,<1.25.0)", "mypy-boto3-waf-regional (>=1.24.0,<1.25.0)", "mypy-boto3-wafv2 (>=1.24.0,<1.25.0)", "mypy-boto3-wellarchitected (>=1.24.0,<1.25.0)", "mypy-boto3-wisdom (>=1.24.0,<1.25.0)", "mypy-boto3-workdocs (>=1.24.0,<1.25.0)", "mypy-boto3-worklink (>=1.24.0,<1.25.0)", "mypy-boto3-workmail (>=1.24.0,<1.25.0)", "mypy-boto3-workmailmessageflow (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces-web (>=1.24.0,<1.25.0)", "mypy-boto3-xray (>=1.24.0,<1.25.0)"] +amp = ["mypy-boto3-amp (>=1.24.0,<1.25.0)"] +amplify = ["mypy-boto3-amplify (>=1.24.0,<1.25.0)"] +amplifybackend = ["mypy-boto3-amplifybackend (>=1.24.0,<1.25.0)"] +amplifyuibuilder = ["mypy-boto3-amplifyuibuilder (>=1.24.0,<1.25.0)"] +apigateway = ["mypy-boto3-apigateway (>=1.24.0,<1.25.0)"] +apigatewaymanagementapi = ["mypy-boto3-apigatewaymanagementapi (>=1.24.0,<1.25.0)"] +apigatewayv2 = ["mypy-boto3-apigatewayv2 (>=1.24.0,<1.25.0)"] +appconfig = ["mypy-boto3-appconfig (>=1.24.0,<1.25.0)"] +appconfigdata = ["mypy-boto3-appconfigdata (>=1.24.0,<1.25.0)"] +appflow = ["mypy-boto3-appflow (>=1.24.0,<1.25.0)"] +appintegrations = ["mypy-boto3-appintegrations (>=1.24.0,<1.25.0)"] +application-autoscaling = ["mypy-boto3-application-autoscaling (>=1.24.0,<1.25.0)"] +application-insights = ["mypy-boto3-application-insights (>=1.24.0,<1.25.0)"] +applicationcostprofiler = ["mypy-boto3-applicationcostprofiler (>=1.24.0,<1.25.0)"] +appmesh = ["mypy-boto3-appmesh (>=1.24.0,<1.25.0)"] +apprunner = ["mypy-boto3-apprunner (>=1.24.0,<1.25.0)"] +appstream = ["mypy-boto3-appstream (>=1.24.0,<1.25.0)"] +appsync = ["mypy-boto3-appsync (>=1.24.0,<1.25.0)"] +athena = ["mypy-boto3-athena (>=1.24.0,<1.25.0)"] +auditmanager = ["mypy-boto3-auditmanager (>=1.24.0,<1.25.0)"] +autoscaling = ["mypy-boto3-autoscaling (>=1.24.0,<1.25.0)"] +autoscaling-plans = ["mypy-boto3-autoscaling-plans (>=1.24.0,<1.25.0)"] +backup = ["mypy-boto3-backup (>=1.24.0,<1.25.0)"] +backup-gateway = ["mypy-boto3-backup-gateway (>=1.24.0,<1.25.0)"] +backupstorage = ["mypy-boto3-backupstorage (>=1.24.0,<1.25.0)"] +batch = ["mypy-boto3-batch (>=1.24.0,<1.25.0)"] +billingconductor = ["mypy-boto3-billingconductor (>=1.24.0,<1.25.0)"] +braket = ["mypy-boto3-braket (>=1.24.0,<1.25.0)"] +budgets = ["mypy-boto3-budgets (>=1.24.0,<1.25.0)"] +ce = ["mypy-boto3-ce (>=1.24.0,<1.25.0)"] +chime = ["mypy-boto3-chime (>=1.24.0,<1.25.0)"] +chime-sdk-identity = ["mypy-boto3-chime-sdk-identity (>=1.24.0,<1.25.0)"] +chime-sdk-media-pipelines = ["mypy-boto3-chime-sdk-media-pipelines (>=1.24.0,<1.25.0)"] +chime-sdk-meetings = ["mypy-boto3-chime-sdk-meetings (>=1.24.0,<1.25.0)"] +chime-sdk-messaging = ["mypy-boto3-chime-sdk-messaging (>=1.24.0,<1.25.0)"] +cloud9 = ["mypy-boto3-cloud9 (>=1.24.0,<1.25.0)"] +cloudcontrol = ["mypy-boto3-cloudcontrol (>=1.24.0,<1.25.0)"] +clouddirectory = ["mypy-boto3-clouddirectory (>=1.24.0,<1.25.0)"] +cloudformation = ["mypy-boto3-cloudformation (>=1.24.0,<1.25.0)"] +cloudfront = ["mypy-boto3-cloudfront (>=1.24.0,<1.25.0)"] +cloudhsm = ["mypy-boto3-cloudhsm (>=1.24.0,<1.25.0)"] +cloudhsmv2 = ["mypy-boto3-cloudhsmv2 (>=1.24.0,<1.25.0)"] +cloudsearch = ["mypy-boto3-cloudsearch (>=1.24.0,<1.25.0)"] +cloudsearchdomain = ["mypy-boto3-cloudsearchdomain (>=1.24.0,<1.25.0)"] +cloudtrail = ["mypy-boto3-cloudtrail (>=1.24.0,<1.25.0)"] +cloudwatch = ["mypy-boto3-cloudwatch (>=1.24.0,<1.25.0)"] +codeartifact = ["mypy-boto3-codeartifact (>=1.24.0,<1.25.0)"] +codebuild = ["mypy-boto3-codebuild (>=1.24.0,<1.25.0)"] +codecommit = ["mypy-boto3-codecommit (>=1.24.0,<1.25.0)"] +codedeploy = ["mypy-boto3-codedeploy (>=1.24.0,<1.25.0)"] +codeguru-reviewer = ["mypy-boto3-codeguru-reviewer (>=1.24.0,<1.25.0)"] +codeguruprofiler = ["mypy-boto3-codeguruprofiler (>=1.24.0,<1.25.0)"] +codepipeline = ["mypy-boto3-codepipeline (>=1.24.0,<1.25.0)"] +codestar = ["mypy-boto3-codestar (>=1.24.0,<1.25.0)"] +codestar-connections = ["mypy-boto3-codestar-connections (>=1.24.0,<1.25.0)"] +codestar-notifications = ["mypy-boto3-codestar-notifications (>=1.24.0,<1.25.0)"] +cognito-identity = ["mypy-boto3-cognito-identity (>=1.24.0,<1.25.0)"] +cognito-idp = ["mypy-boto3-cognito-idp (>=1.24.0,<1.25.0)"] +cognito-sync = ["mypy-boto3-cognito-sync (>=1.24.0,<1.25.0)"] +comprehend = ["mypy-boto3-comprehend (>=1.24.0,<1.25.0)"] +comprehendmedical = ["mypy-boto3-comprehendmedical (>=1.24.0,<1.25.0)"] +compute-optimizer = ["mypy-boto3-compute-optimizer (>=1.24.0,<1.25.0)"] +config = ["mypy-boto3-config (>=1.24.0,<1.25.0)"] +connect = ["mypy-boto3-connect (>=1.24.0,<1.25.0)"] +connect-contact-lens = ["mypy-boto3-connect-contact-lens (>=1.24.0,<1.25.0)"] +connectcampaigns = ["mypy-boto3-connectcampaigns (>=1.24.0,<1.25.0)"] +connectparticipant = ["mypy-boto3-connectparticipant (>=1.24.0,<1.25.0)"] +cur = ["mypy-boto3-cur (>=1.24.0,<1.25.0)"] +customer-profiles = ["mypy-boto3-customer-profiles (>=1.24.0,<1.25.0)"] +databrew = ["mypy-boto3-databrew (>=1.24.0,<1.25.0)"] +dataexchange = ["mypy-boto3-dataexchange (>=1.24.0,<1.25.0)"] +datapipeline = ["mypy-boto3-datapipeline (>=1.24.0,<1.25.0)"] +datasync = ["mypy-boto3-datasync (>=1.24.0,<1.25.0)"] +dax = ["mypy-boto3-dax (>=1.24.0,<1.25.0)"] +detective = ["mypy-boto3-detective (>=1.24.0,<1.25.0)"] +devicefarm = ["mypy-boto3-devicefarm (>=1.24.0,<1.25.0)"] +devops-guru = ["mypy-boto3-devops-guru (>=1.24.0,<1.25.0)"] +directconnect = ["mypy-boto3-directconnect (>=1.24.0,<1.25.0)"] +discovery = ["mypy-boto3-discovery (>=1.24.0,<1.25.0)"] +dlm = ["mypy-boto3-dlm (>=1.24.0,<1.25.0)"] +dms = ["mypy-boto3-dms (>=1.24.0,<1.25.0)"] +docdb = ["mypy-boto3-docdb (>=1.24.0,<1.25.0)"] +drs = ["mypy-boto3-drs (>=1.24.0,<1.25.0)"] +ds = ["mypy-boto3-ds (>=1.24.0,<1.25.0)"] +dynamodb = ["mypy-boto3-dynamodb (>=1.24.0,<1.25.0)"] +dynamodbstreams = ["mypy-boto3-dynamodbstreams (>=1.24.0,<1.25.0)"] +ebs = ["mypy-boto3-ebs (>=1.24.0,<1.25.0)"] +ec2 = ["mypy-boto3-ec2 (>=1.24.0,<1.25.0)"] +ec2-instance-connect = ["mypy-boto3-ec2-instance-connect (>=1.24.0,<1.25.0)"] +ecr = ["mypy-boto3-ecr (>=1.24.0,<1.25.0)"] +ecr-public = ["mypy-boto3-ecr-public (>=1.24.0,<1.25.0)"] +ecs = ["mypy-boto3-ecs (>=1.24.0,<1.25.0)"] +efs = ["mypy-boto3-efs (>=1.24.0,<1.25.0)"] +eks = ["mypy-boto3-eks (>=1.24.0,<1.25.0)"] +elastic-inference = ["mypy-boto3-elastic-inference (>=1.24.0,<1.25.0)"] +elasticache = ["mypy-boto3-elasticache (>=1.24.0,<1.25.0)"] +elasticbeanstalk = ["mypy-boto3-elasticbeanstalk (>=1.24.0,<1.25.0)"] +elastictranscoder = ["mypy-boto3-elastictranscoder (>=1.24.0,<1.25.0)"] +elb = ["mypy-boto3-elb (>=1.24.0,<1.25.0)"] +elbv2 = ["mypy-boto3-elbv2 (>=1.24.0,<1.25.0)"] +emr = ["mypy-boto3-emr (>=1.24.0,<1.25.0)"] +emr-containers = ["mypy-boto3-emr-containers (>=1.24.0,<1.25.0)"] +emr-serverless = ["mypy-boto3-emr-serverless (>=1.24.0,<1.25.0)"] +es = ["mypy-boto3-es (>=1.24.0,<1.25.0)"] +essential = ["mypy-boto3-cloudformation (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodb (>=1.24.0,<1.25.0)", "mypy-boto3-ec2 (>=1.24.0,<1.25.0)", "mypy-boto3-lambda (>=1.24.0,<1.25.0)", "mypy-boto3-rds (>=1.24.0,<1.25.0)", "mypy-boto3-s3 (>=1.24.0,<1.25.0)", "mypy-boto3-sqs (>=1.24.0,<1.25.0)"] +events = ["mypy-boto3-events (>=1.24.0,<1.25.0)"] +evidently = ["mypy-boto3-evidently (>=1.24.0,<1.25.0)"] +finspace = ["mypy-boto3-finspace (>=1.24.0,<1.25.0)"] +finspace-data = ["mypy-boto3-finspace-data (>=1.24.0,<1.25.0)"] +firehose = ["mypy-boto3-firehose (>=1.24.0,<1.25.0)"] +fis = ["mypy-boto3-fis (>=1.24.0,<1.25.0)"] +fms = ["mypy-boto3-fms (>=1.24.0,<1.25.0)"] +forecast = ["mypy-boto3-forecast (>=1.24.0,<1.25.0)"] +forecastquery = ["mypy-boto3-forecastquery (>=1.24.0,<1.25.0)"] +frauddetector = ["mypy-boto3-frauddetector (>=1.24.0,<1.25.0)"] +fsx = ["mypy-boto3-fsx (>=1.24.0,<1.25.0)"] +gamelift = ["mypy-boto3-gamelift (>=1.24.0,<1.25.0)"] +gamesparks = ["mypy-boto3-gamesparks (>=1.24.0,<1.25.0)"] +glacier = ["mypy-boto3-glacier (>=1.24.0,<1.25.0)"] +globalaccelerator = ["mypy-boto3-globalaccelerator (>=1.24.0,<1.25.0)"] +glue = ["mypy-boto3-glue (>=1.24.0,<1.25.0)"] +grafana = ["mypy-boto3-grafana (>=1.24.0,<1.25.0)"] +greengrass = ["mypy-boto3-greengrass (>=1.24.0,<1.25.0)"] +greengrassv2 = ["mypy-boto3-greengrassv2 (>=1.24.0,<1.25.0)"] +groundstation = ["mypy-boto3-groundstation (>=1.24.0,<1.25.0)"] +guardduty = ["mypy-boto3-guardduty (>=1.24.0,<1.25.0)"] +health = ["mypy-boto3-health (>=1.24.0,<1.25.0)"] +healthlake = ["mypy-boto3-healthlake (>=1.24.0,<1.25.0)"] +honeycode = ["mypy-boto3-honeycode (>=1.24.0,<1.25.0)"] +iam = ["mypy-boto3-iam (>=1.24.0,<1.25.0)"] +identitystore = ["mypy-boto3-identitystore (>=1.24.0,<1.25.0)"] +imagebuilder = ["mypy-boto3-imagebuilder (>=1.24.0,<1.25.0)"] +importexport = ["mypy-boto3-importexport (>=1.24.0,<1.25.0)"] +inspector = ["mypy-boto3-inspector (>=1.24.0,<1.25.0)"] +inspector2 = ["mypy-boto3-inspector2 (>=1.24.0,<1.25.0)"] +iot = ["mypy-boto3-iot (>=1.24.0,<1.25.0)"] +iot-data = ["mypy-boto3-iot-data (>=1.24.0,<1.25.0)"] +iot-jobs-data = ["mypy-boto3-iot-jobs-data (>=1.24.0,<1.25.0)"] +iot1click-devices = ["mypy-boto3-iot1click-devices (>=1.24.0,<1.25.0)"] +iot1click-projects = ["mypy-boto3-iot1click-projects (>=1.24.0,<1.25.0)"] +iotanalytics = ["mypy-boto3-iotanalytics (>=1.24.0,<1.25.0)"] +iotdeviceadvisor = ["mypy-boto3-iotdeviceadvisor (>=1.24.0,<1.25.0)"] +iotevents = ["mypy-boto3-iotevents (>=1.24.0,<1.25.0)"] +iotevents-data = ["mypy-boto3-iotevents-data (>=1.24.0,<1.25.0)"] +iotfleethub = ["mypy-boto3-iotfleethub (>=1.24.0,<1.25.0)"] +iotsecuretunneling = ["mypy-boto3-iotsecuretunneling (>=1.24.0,<1.25.0)"] +iotsitewise = ["mypy-boto3-iotsitewise (>=1.24.0,<1.25.0)"] +iotthingsgraph = ["mypy-boto3-iotthingsgraph (>=1.24.0,<1.25.0)"] +iottwinmaker = ["mypy-boto3-iottwinmaker (>=1.24.0,<1.25.0)"] +iotwireless = ["mypy-boto3-iotwireless (>=1.24.0,<1.25.0)"] +ivs = ["mypy-boto3-ivs (>=1.24.0,<1.25.0)"] +ivschat = ["mypy-boto3-ivschat (>=1.24.0,<1.25.0)"] +kafka = ["mypy-boto3-kafka (>=1.24.0,<1.25.0)"] +kafkaconnect = ["mypy-boto3-kafkaconnect (>=1.24.0,<1.25.0)"] +kendra = ["mypy-boto3-kendra (>=1.24.0,<1.25.0)"] +keyspaces = ["mypy-boto3-keyspaces (>=1.24.0,<1.25.0)"] +kinesis = ["mypy-boto3-kinesis (>=1.24.0,<1.25.0)"] +kinesis-video-archived-media = ["mypy-boto3-kinesis-video-archived-media (>=1.24.0,<1.25.0)"] +kinesis-video-media = ["mypy-boto3-kinesis-video-media (>=1.24.0,<1.25.0)"] +kinesis-video-signaling = ["mypy-boto3-kinesis-video-signaling (>=1.24.0,<1.25.0)"] +kinesisanalytics = ["mypy-boto3-kinesisanalytics (>=1.24.0,<1.25.0)"] +kinesisanalyticsv2 = ["mypy-boto3-kinesisanalyticsv2 (>=1.24.0,<1.25.0)"] +kinesisvideo = ["mypy-boto3-kinesisvideo (>=1.24.0,<1.25.0)"] +kms = ["mypy-boto3-kms (>=1.24.0,<1.25.0)"] +lakeformation = ["mypy-boto3-lakeformation (>=1.24.0,<1.25.0)"] +lambda = ["mypy-boto3-lambda (>=1.24.0,<1.25.0)"] +lex-models = ["mypy-boto3-lex-models (>=1.24.0,<1.25.0)"] +lex-runtime = ["mypy-boto3-lex-runtime (>=1.24.0,<1.25.0)"] +lexv2-models = ["mypy-boto3-lexv2-models (>=1.24.0,<1.25.0)"] +lexv2-runtime = ["mypy-boto3-lexv2-runtime (>=1.24.0,<1.25.0)"] +license-manager = ["mypy-boto3-license-manager (>=1.24.0,<1.25.0)"] +license-manager-user-subscriptions = ["mypy-boto3-license-manager-user-subscriptions (>=1.24.0,<1.25.0)"] +lightsail = ["mypy-boto3-lightsail (>=1.24.0,<1.25.0)"] +location = ["mypy-boto3-location (>=1.24.0,<1.25.0)"] +logs = ["mypy-boto3-logs (>=1.24.0,<1.25.0)"] +lookoutequipment = ["mypy-boto3-lookoutequipment (>=1.24.0,<1.25.0)"] +lookoutmetrics = ["mypy-boto3-lookoutmetrics (>=1.24.0,<1.25.0)"] +lookoutvision = ["mypy-boto3-lookoutvision (>=1.24.0,<1.25.0)"] +m2 = ["mypy-boto3-m2 (>=1.24.0,<1.25.0)"] +machinelearning = ["mypy-boto3-machinelearning (>=1.24.0,<1.25.0)"] +macie = ["mypy-boto3-macie (>=1.24.0,<1.25.0)"] +macie2 = ["mypy-boto3-macie2 (>=1.24.0,<1.25.0)"] +managedblockchain = ["mypy-boto3-managedblockchain (>=1.24.0,<1.25.0)"] +marketplace-catalog = ["mypy-boto3-marketplace-catalog (>=1.24.0,<1.25.0)"] +marketplace-entitlement = ["mypy-boto3-marketplace-entitlement (>=1.24.0,<1.25.0)"] +marketplacecommerceanalytics = ["mypy-boto3-marketplacecommerceanalytics (>=1.24.0,<1.25.0)"] +mediaconnect = ["mypy-boto3-mediaconnect (>=1.24.0,<1.25.0)"] +mediaconvert = ["mypy-boto3-mediaconvert (>=1.24.0,<1.25.0)"] +medialive = ["mypy-boto3-medialive (>=1.24.0,<1.25.0)"] +mediapackage = ["mypy-boto3-mediapackage (>=1.24.0,<1.25.0)"] +mediapackage-vod = ["mypy-boto3-mediapackage-vod (>=1.24.0,<1.25.0)"] +mediastore = ["mypy-boto3-mediastore (>=1.24.0,<1.25.0)"] +mediastore-data = ["mypy-boto3-mediastore-data (>=1.24.0,<1.25.0)"] +mediatailor = ["mypy-boto3-mediatailor (>=1.24.0,<1.25.0)"] +memorydb = ["mypy-boto3-memorydb (>=1.24.0,<1.25.0)"] +meteringmarketplace = ["mypy-boto3-meteringmarketplace (>=1.24.0,<1.25.0)"] +mgh = ["mypy-boto3-mgh (>=1.24.0,<1.25.0)"] +mgn = ["mypy-boto3-mgn (>=1.24.0,<1.25.0)"] +migration-hub-refactor-spaces = ["mypy-boto3-migration-hub-refactor-spaces (>=1.24.0,<1.25.0)"] +migrationhub-config = ["mypy-boto3-migrationhub-config (>=1.24.0,<1.25.0)"] +migrationhubstrategy = ["mypy-boto3-migrationhubstrategy (>=1.24.0,<1.25.0)"] +mobile = ["mypy-boto3-mobile (>=1.24.0,<1.25.0)"] +mq = ["mypy-boto3-mq (>=1.24.0,<1.25.0)"] +mturk = ["mypy-boto3-mturk (>=1.24.0,<1.25.0)"] +mwaa = ["mypy-boto3-mwaa (>=1.24.0,<1.25.0)"] +neptune = ["mypy-boto3-neptune (>=1.24.0,<1.25.0)"] +network-firewall = ["mypy-boto3-network-firewall (>=1.24.0,<1.25.0)"] +networkmanager = ["mypy-boto3-networkmanager (>=1.24.0,<1.25.0)"] +nimble = ["mypy-boto3-nimble (>=1.24.0,<1.25.0)"] +opensearch = ["mypy-boto3-opensearch (>=1.24.0,<1.25.0)"] +opsworks = ["mypy-boto3-opsworks (>=1.24.0,<1.25.0)"] +opsworkscm = ["mypy-boto3-opsworkscm (>=1.24.0,<1.25.0)"] +organizations = ["mypy-boto3-organizations (>=1.24.0,<1.25.0)"] +outposts = ["mypy-boto3-outposts (>=1.24.0,<1.25.0)"] +panorama = ["mypy-boto3-panorama (>=1.24.0,<1.25.0)"] +personalize = ["mypy-boto3-personalize (>=1.24.0,<1.25.0)"] +personalize-events = ["mypy-boto3-personalize-events (>=1.24.0,<1.25.0)"] +personalize-runtime = ["mypy-boto3-personalize-runtime (>=1.24.0,<1.25.0)"] +pi = ["mypy-boto3-pi (>=1.24.0,<1.25.0)"] +pinpoint = ["mypy-boto3-pinpoint (>=1.24.0,<1.25.0)"] +pinpoint-email = ["mypy-boto3-pinpoint-email (>=1.24.0,<1.25.0)"] +pinpoint-sms-voice = ["mypy-boto3-pinpoint-sms-voice (>=1.24.0,<1.25.0)"] +pinpoint-sms-voice-v2 = ["mypy-boto3-pinpoint-sms-voice-v2 (>=1.24.0,<1.25.0)"] +polly = ["mypy-boto3-polly (>=1.24.0,<1.25.0)"] +pricing = ["mypy-boto3-pricing (>=1.24.0,<1.25.0)"] +privatenetworks = ["mypy-boto3-privatenetworks (>=1.24.0,<1.25.0)"] +proton = ["mypy-boto3-proton (>=1.24.0,<1.25.0)"] +qldb = ["mypy-boto3-qldb (>=1.24.0,<1.25.0)"] +qldb-session = ["mypy-boto3-qldb-session (>=1.24.0,<1.25.0)"] +quicksight = ["mypy-boto3-quicksight (>=1.24.0,<1.25.0)"] +ram = ["mypy-boto3-ram (>=1.24.0,<1.25.0)"] +rbin = ["mypy-boto3-rbin (>=1.24.0,<1.25.0)"] +rds = ["mypy-boto3-rds (>=1.24.0,<1.25.0)"] +rds-data = ["mypy-boto3-rds-data (>=1.24.0,<1.25.0)"] +redshift = ["mypy-boto3-redshift (>=1.24.0,<1.25.0)"] +redshift-data = ["mypy-boto3-redshift-data (>=1.24.0,<1.25.0)"] +redshift-serverless = ["mypy-boto3-redshift-serverless (>=1.24.0,<1.25.0)"] +rekognition = ["mypy-boto3-rekognition (>=1.24.0,<1.25.0)"] +resiliencehub = ["mypy-boto3-resiliencehub (>=1.24.0,<1.25.0)"] +resource-groups = ["mypy-boto3-resource-groups (>=1.24.0,<1.25.0)"] +resourcegroupstaggingapi = ["mypy-boto3-resourcegroupstaggingapi (>=1.24.0,<1.25.0)"] +robomaker = ["mypy-boto3-robomaker (>=1.24.0,<1.25.0)"] +rolesanywhere = ["mypy-boto3-rolesanywhere (>=1.24.0,<1.25.0)"] +route53 = ["mypy-boto3-route53 (>=1.24.0,<1.25.0)"] +route53-recovery-cluster = ["mypy-boto3-route53-recovery-cluster (>=1.24.0,<1.25.0)"] +route53-recovery-control-config = ["mypy-boto3-route53-recovery-control-config (>=1.24.0,<1.25.0)"] +route53-recovery-readiness = ["mypy-boto3-route53-recovery-readiness (>=1.24.0,<1.25.0)"] +route53domains = ["mypy-boto3-route53domains (>=1.24.0,<1.25.0)"] +route53resolver = ["mypy-boto3-route53resolver (>=1.24.0,<1.25.0)"] +rum = ["mypy-boto3-rum (>=1.24.0,<1.25.0)"] +s3 = ["mypy-boto3-s3 (>=1.24.0,<1.25.0)"] +s3control = ["mypy-boto3-s3control (>=1.24.0,<1.25.0)"] +s3outposts = ["mypy-boto3-s3outposts (>=1.24.0,<1.25.0)"] +sagemaker = ["mypy-boto3-sagemaker (>=1.24.0,<1.25.0)"] +sagemaker-a2i-runtime = ["mypy-boto3-sagemaker-a2i-runtime (>=1.24.0,<1.25.0)"] +sagemaker-edge = ["mypy-boto3-sagemaker-edge (>=1.24.0,<1.25.0)"] +sagemaker-featurestore-runtime = ["mypy-boto3-sagemaker-featurestore-runtime (>=1.24.0,<1.25.0)"] +sagemaker-runtime = ["mypy-boto3-sagemaker-runtime (>=1.24.0,<1.25.0)"] +savingsplans = ["mypy-boto3-savingsplans (>=1.24.0,<1.25.0)"] +schemas = ["mypy-boto3-schemas (>=1.24.0,<1.25.0)"] +sdb = ["mypy-boto3-sdb (>=1.24.0,<1.25.0)"] +secretsmanager = ["mypy-boto3-secretsmanager (>=1.24.0,<1.25.0)"] +securityhub = ["mypy-boto3-securityhub (>=1.24.0,<1.25.0)"] +serverlessrepo = ["mypy-boto3-serverlessrepo (>=1.24.0,<1.25.0)"] +service-quotas = ["mypy-boto3-service-quotas (>=1.24.0,<1.25.0)"] +servicecatalog = ["mypy-boto3-servicecatalog (>=1.24.0,<1.25.0)"] +servicecatalog-appregistry = ["mypy-boto3-servicecatalog-appregistry (>=1.24.0,<1.25.0)"] +servicediscovery = ["mypy-boto3-servicediscovery (>=1.24.0,<1.25.0)"] +ses = ["mypy-boto3-ses (>=1.24.0,<1.25.0)"] +sesv2 = ["mypy-boto3-sesv2 (>=1.24.0,<1.25.0)"] +shield = ["mypy-boto3-shield (>=1.24.0,<1.25.0)"] +signer = ["mypy-boto3-signer (>=1.24.0,<1.25.0)"] +sms = ["mypy-boto3-sms (>=1.24.0,<1.25.0)"] +sms-voice = ["mypy-boto3-sms-voice (>=1.24.0,<1.25.0)"] +snow-device-management = ["mypy-boto3-snow-device-management (>=1.24.0,<1.25.0)"] +snowball = ["mypy-boto3-snowball (>=1.24.0,<1.25.0)"] +sns = ["mypy-boto3-sns (>=1.24.0,<1.25.0)"] +sqs = ["mypy-boto3-sqs (>=1.24.0,<1.25.0)"] +ssm = ["mypy-boto3-ssm (>=1.24.0,<1.25.0)"] +ssm-contacts = ["mypy-boto3-ssm-contacts (>=1.24.0,<1.25.0)"] +ssm-incidents = ["mypy-boto3-ssm-incidents (>=1.24.0,<1.25.0)"] +sso = ["mypy-boto3-sso (>=1.24.0,<1.25.0)"] +sso-admin = ["mypy-boto3-sso-admin (>=1.24.0,<1.25.0)"] +sso-oidc = ["mypy-boto3-sso-oidc (>=1.24.0,<1.25.0)"] +stepfunctions = ["mypy-boto3-stepfunctions (>=1.24.0,<1.25.0)"] +storagegateway = ["mypy-boto3-storagegateway (>=1.24.0,<1.25.0)"] +sts = ["mypy-boto3-sts (>=1.24.0,<1.25.0)"] +support = ["mypy-boto3-support (>=1.24.0,<1.25.0)"] +swf = ["mypy-boto3-swf (>=1.24.0,<1.25.0)"] +synthetics = ["mypy-boto3-synthetics (>=1.24.0,<1.25.0)"] +textract = ["mypy-boto3-textract (>=1.24.0,<1.25.0)"] +timestream-query = ["mypy-boto3-timestream-query (>=1.24.0,<1.25.0)"] +timestream-write = ["mypy-boto3-timestream-write (>=1.24.0,<1.25.0)"] +transcribe = ["mypy-boto3-transcribe (>=1.24.0,<1.25.0)"] +transfer = ["mypy-boto3-transfer (>=1.24.0,<1.25.0)"] +translate = ["mypy-boto3-translate (>=1.24.0,<1.25.0)"] +voice-id = ["mypy-boto3-voice-id (>=1.24.0,<1.25.0)"] +waf = ["mypy-boto3-waf (>=1.24.0,<1.25.0)"] +waf-regional = ["mypy-boto3-waf-regional (>=1.24.0,<1.25.0)"] +wafv2 = ["mypy-boto3-wafv2 (>=1.24.0,<1.25.0)"] +wellarchitected = ["mypy-boto3-wellarchitected (>=1.24.0,<1.25.0)"] +wisdom = ["mypy-boto3-wisdom (>=1.24.0,<1.25.0)"] +workdocs = ["mypy-boto3-workdocs (>=1.24.0,<1.25.0)"] +worklink = ["mypy-boto3-worklink (>=1.24.0,<1.25.0)"] +workmail = ["mypy-boto3-workmail (>=1.24.0,<1.25.0)"] +workmailmessageflow = ["mypy-boto3-workmailmessageflow (>=1.24.0,<1.25.0)"] +workspaces = ["mypy-boto3-workspaces (>=1.24.0,<1.25.0)"] +workspaces-web = ["mypy-boto3-workspaces-web (>=1.24.0,<1.25.0)"] +xray = ["mypy-boto3-xray (>=1.24.0,<1.25.0)"] [[package]] name = "botocore" @@ -713,6 +735,20 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "isort" +version = "5.10.1" +description = "A Python utility / library to sort Python imports." +category = "dev" +optional = false +python-versions = ">=3.6.1,<4.0" + +[package.extras] +pipfile_deprecated_finder = ["pipreqs", "requirementslib"] +requirements_deprecated_finder = ["pipreqs", "pip-api"] +colors = ["colorama (>=0.4.3,<0.5.0)"] +plugins = ["setuptools"] + [[package]] name = "itsdangerous" version = "2.1.2" @@ -994,6 +1030,14 @@ python-versions = ">=3.6" [package.dependencies] pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" +[[package]] +name = "pathspec" +version = "0.9.0" +description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + [[package]] name = "pbr" version = "5.9.0" @@ -1002,6 +1046,18 @@ category = "main" optional = false python-versions = ">=2.6" +[[package]] +name = "platformdirs" +version = "2.5.2" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)", "sphinx (>=4)"] +test = ["appdirs (==1.4.4)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)", "pytest (>=6)"] + [[package]] name = "pluggy" version = "1.0.0" @@ -1466,14 +1522,6 @@ category = "main" optional = false python-versions = ">=3.4" -[[package]] -name = "yapf" -version = "0.31.0" -description = "A formatter for Python code." -category = "dev" -optional = false -python-versions = "*" - [[package]] name = "zipp" version = "3.8.1" @@ -1489,7 +1537,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "453b90e40481ca6e4395e84beb73489b58c0983e826e369eb0f412ef633ea5e1" +content-hash = "497b963e7a2f80a751ccd201e950cf533caddb6c7c96163c94cea69874840843" [metadata.files] aiopg = [ @@ -1543,13 +1591,38 @@ backoff = [ {file = "backoff-1.11.1-py2.py3-none-any.whl", hash = "sha256:61928f8fa48d52e4faa81875eecf308eccfb1016b018bb6bd21e05b5d90a96c5"}, {file = "backoff-1.11.1.tar.gz", hash = "sha256:ccb962a2378418c667b3c979b504fdeb7d9e0d29c0579e3b13b86467177728cb"}, ] +black = [ + {file = "black-22.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f586c26118bc6e714ec58c09df0157fe2d9ee195c764f630eb0d8e7ccce72e69"}, + {file = "black-22.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b270a168d69edb8b7ed32c193ef10fd27844e5c60852039599f9184460ce0807"}, + {file = "black-22.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6797f58943fceb1c461fb572edbe828d811e719c24e03375fd25170ada53825e"}, + {file = "black-22.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c85928b9d5f83b23cee7d0efcb310172412fbf7cb9d9ce963bd67fd141781def"}, + {file = "black-22.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:f6fe02afde060bbeef044af7996f335fbe90b039ccf3f5eb8f16df8b20f77666"}, + {file = "black-22.6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cfaf3895a9634e882bf9d2363fed5af8888802d670f58b279b0bece00e9a872d"}, + {file = "black-22.6.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94783f636bca89f11eb5d50437e8e17fbc6a929a628d82304c80fa9cd945f256"}, + {file = "black-22.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:2ea29072e954a4d55a2ff58971b83365eba5d3d357352a07a7a4df0d95f51c78"}, + {file = "black-22.6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e439798f819d49ba1c0bd9664427a05aab79bfba777a6db94fd4e56fae0cb849"}, + {file = "black-22.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187d96c5e713f441a5829e77120c269b6514418f4513a390b0499b0987f2ff1c"}, + {file = "black-22.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:074458dc2f6e0d3dab7928d4417bb6957bb834434516f21514138437accdbe90"}, + {file = "black-22.6.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a218d7e5856f91d20f04e931b6f16d15356db1c846ee55f01bac297a705ca24f"}, + {file = "black-22.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:568ac3c465b1c8b34b61cd7a4e349e93f91abf0f9371eda1cf87194663ab684e"}, + {file = "black-22.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6c1734ab264b8f7929cef8ae5f900b85d579e6cbfde09d7387da8f04771b51c6"}, + {file = "black-22.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9a3ac16efe9ec7d7381ddebcc022119794872abce99475345c5a61aa18c45ad"}, + {file = "black-22.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:b9fd45787ba8aa3f5e0a0a98920c1012c884622c6c920dbe98dbd05bc7c70fbf"}, + {file = "black-22.6.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7ba9be198ecca5031cd78745780d65a3f75a34b2ff9be5837045dce55db83d1c"}, + {file = "black-22.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a3db5b6409b96d9bd543323b23ef32a1a2b06416d525d27e0f67e74f1446c8f2"}, + {file = "black-22.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:560558527e52ce8afba936fcce93a7411ab40c7d5fe8c2463e279e843c0328ee"}, + {file = "black-22.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b154e6bbde1e79ea3260c4b40c0b7b3109ffcdf7bc4ebf8859169a6af72cd70b"}, + {file = "black-22.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:4af5bc0e1f96be5ae9bd7aaec219c901a94d6caa2484c21983d043371c733fc4"}, + {file = "black-22.6.0-py3-none-any.whl", hash = "sha256:ac609cf8ef5e7115ddd07d85d988d074ed00e10fbc3445aee393e70164a2219c"}, + {file = "black-22.6.0.tar.gz", hash = "sha256:6c6d39e28aed379aec40da1c65434c77d75e65bb59a1e1c283de545fb4e7c6c9"}, +] boto3 = [ {file = "boto3-1.24.38-py3-none-any.whl", hash = "sha256:bcf97fd7c494f4e2bbbe2511625500654179c0a6b3bea977d46f97af764e85a4"}, {file = "boto3-1.24.38.tar.gz", hash = "sha256:f4c6b025f392c934338c7f01badfddbd0d3cf2397ff5df35c31409798dce33f5"}, ] boto3-stubs = [ - {file = "boto3-stubs-1.24.51.tar.gz", hash = "sha256:ea69c707e9ceab7c11cab1f11fb4bbe98fa5ff8da593f888946d297daa083870"}, - {file = "boto3_stubs-1.24.51-py3-none-any.whl", hash = "sha256:432aebdb18e7c26bf2b148e04eb33e145976cb932bfe0f72b2d512e945927e57"}, + {file = "boto3-stubs-1.24.56.tar.gz", hash = "sha256:02e11b3669481469b45eee53fa5e0b587e5710f86bb95bd40667d1353d1e4bf6"}, + {file = "boto3_stubs-1.24.56-py3-none-any.whl", hash = "sha256:e5df3a68ddb8299404f63d19decc1f706ebdac64f3133c1e1cab747820337a75"}, ] botocore = [ {file = "botocore-1.27.38-py3-none-any.whl", hash = "sha256:46a0264ff3335496bd9cb404f83ec0d8eb7bfdef8f74a830c13e6a6b9612adea"}, @@ -1713,6 +1786,10 @@ iniconfig = [ {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, ] +isort = [ + {file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"}, + {file = "isort-5.10.1.tar.gz", hash = "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951"}, +] itsdangerous = [ {file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"}, {file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"}, @@ -1851,10 +1928,18 @@ packaging = [ {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, ] +pathspec = [ + {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, + {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, +] pbr = [ {file = "pbr-5.9.0-py2.py3-none-any.whl", hash = "sha256:e547125940bcc052856ded43be8e101f63828c2d94239ffbe2b327ba3d5ccf0a"}, {file = "pbr-5.9.0.tar.gz", hash = "sha256:e8dca2f4b43560edef58813969f52a56cef023146cbb8931626db80e6c1c4308"}, ] +platformdirs = [ + {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"}, + {file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"}, +] pluggy = [ {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, @@ -2208,10 +2293,6 @@ xmltodict = [ {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"}, {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"}, ] -yapf = [ - {file = "yapf-0.31.0-py2.py3-none-any.whl", hash = "sha256:e3a234ba8455fe201eaa649cdac872d590089a18b661e39bbac7020978dd9c2e"}, - {file = "yapf-0.31.0.tar.gz", hash = "sha256:408fb9a2b254c302f49db83c59f9aa0b4b0fd0ec25be3a5c51181327922ff63d"}, -] zipp = [ {file = "zipp-3.8.1-py3-none-any.whl", hash = "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009"}, {file = "zipp-3.8.1.tar.gz", hash = "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2"}, diff --git a/pre-commit.py b/pre-commit.py index ea6a22a7fe..45f140d43a 100755 --- a/pre-commit.py +++ b/pre-commit.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 -from typing import List +import argparse +import enum import subprocess import sys -import enum -import argparse -import os +from typing import List @enum.unique @@ -37,12 +36,17 @@ def rustfmt(fix_inplace: bool = False, no_color: bool = False) -> str: return cmd -def yapf(fix_inplace: bool) -> str: - cmd = "poetry run yapf --recursive" - if fix_inplace: - cmd += " --in-place" - else: - cmd += " --diff" +def black(fix_inplace: bool) -> str: + cmd = "poetry run black" + if not fix_inplace: + cmd += " --diff --check" + return cmd + + +def isort(fix_inplace: bool) -> str: + cmd = "poetry run isort" + if not fix_inplace: + cmd += " --diff --check" return cmd @@ -71,11 +75,13 @@ def check(name: str, suffix: str, cmd: str, changed_files: List[str], no_color: else: print("Please inspect the output below and run make fmt to fix automatically.") if suffix == ".py": - print("If the output is empty, ensure that you've installed Python tooling by\n" - "running './scripts/pysync' in the current directory (no root needed)") + print( + "If the output is empty, ensure that you've installed Python tooling by\n" + "running './scripts/pysync' in the current directory (no root needed)" + ) print() print(res.stdout.decode()) - exit(1) + sys.exit(1) print(colorify("[OK]", Color.GREEN, no_color)) @@ -83,10 +89,12 @@ def check(name: str, suffix: str, cmd: str, changed_files: List[str], no_color: if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--fix-inplace", action="store_true", help="apply fixes inplace") - parser.add_argument("--no-color", - action="store_true", - help="disable colored output", - default=not sys.stdout.isatty()) + parser.add_argument( + "--no-color", + action="store_true", + help="disable colored output", + default=not sys.stdout.isatty(), + ) args = parser.parse_args() files = get_commit_files() @@ -101,9 +109,16 @@ if __name__ == "__main__": no_color=args.no_color, ) check( - name="yapf", + name="isort", suffix=".py", - cmd=yapf(fix_inplace=args.fix_inplace), + cmd=isort(fix_inplace=args.fix_inplace), + changed_files=files, + no_color=args.no_color, + ) + check( + name="black", + suffix=".py", + cmd=black(fix_inplace=args.fix_inplace), changed_files=files, no_color=args.no_color, ) diff --git a/pyproject.toml b/pyproject.toml index a54dbe9ebd..4f8a49a024 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,10 +30,50 @@ pytest-order = "^1.0.1" allure-pytest = "^2.9.45" [tool.poetry.dev-dependencies] -yapf = "==0.31.0" flake8 = "^3.9.2" mypy = "==0.971" +black = "^22.6.0" +isort = "^5.10.1" [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" + +[tool.black] +line-length = 100 +extend-exclude = ''' +/( + vendor +)/ +''' + +[tool.isort] +profile = "black" +line_length = 100 +skip_gitignore = true +skip = [ + "vendor", +] + +[tool.mypy] +# mypy uses regex +exclude = "^vendor/" +# some tests don't typecheck when this flag is set +check_untyped_defs = false +# Help mypy find imports when running against list of individual files. +# Without this line it would behave differently when executed on the entire project. +mypy_path = "$MYPY_CONFIG_FILE_DIR:$MYPY_CONFIG_FILE_DIR/test_runner" + +disallow_incomplete_defs = false +disallow_untyped_calls = false +disallow_untyped_decorators = false +disallow_untyped_defs = false +strict = true + +[[tool.mypy.overrides]] +module = [ + "asyncpg.*", + "cached_property.*", + "pg8000.*", +] +ignore_missing_imports = true diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 7f8c45c8c3..0000000000 --- a/setup.cfg +++ /dev/null @@ -1,43 +0,0 @@ -# Just trying to gather linter settings in one file. -# I wonder if there's a way to de-duplicate them... - -[flake8] -max-line-length = 100 - -[pycodestyle] -max-line-length = 100 - -[yapf] -based_on_style = pep8 -column_limit = 100 -split_all_top_level_comma_separated_values = true - -[mypy] -# mypy uses regex -exclude = ^vendor/ -# some tests don't typecheck when this flag is set -check_untyped_defs = false - -# Help mypy find imports when running against list of individual files. -# Without this line it would behave differently when executed on the entire project. -mypy_path = $MYPY_CONFIG_FILE_DIR:$MYPY_CONFIG_FILE_DIR/test_runner - -disallow_incomplete_defs = false -disallow_untyped_calls = false -disallow_untyped_decorators = false -disallow_untyped_defs = false -strict = true - -[mypy-asyncpg.*] -# There is some work in progress, though: https://github.com/MagicStack/asyncpg/pull/577 -ignore_missing_imports = true - -[mypy-pg8000.*] -# Used only in testing clients -ignore_missing_imports = true - -[mypy-cached_property.*] -ignore_missing_imports = true - -[mypy-pytest.*] -ignore_missing_imports = true From 4c2bb43775947775401cbb9d774823c5723a91f8 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Thu, 18 Aug 2022 13:37:28 +0100 Subject: [PATCH 22/63] Reformat all python files by black & isort --- scripts/coverage | 13 +- scripts/export_import_between_pageservers.py | 347 +++--- scripts/generate_perf_report_page.py | 152 +-- scripts/git-upload | 11 +- scripts/ingest_perf_test_result.py | 82 +- .../batch_others/test_ancestor_branch.py | 71 +- test_runner/batch_others/test_auth.py | 42 +- test_runner/batch_others/test_backpressure.py | 59 +- .../batch_others/test_basebackup_error.py | 3 +- .../batch_others/test_branch_and_gc.py | 101 +- .../batch_others/test_branch_behind.py | 92 +- test_runner/batch_others/test_branching.py | 58 +- .../batch_others/test_broken_timeline.py | 33 +- .../batch_others/test_clog_truncate.py | 55 +- test_runner/batch_others/test_close_fds.py | 28 +- test_runner/batch_others/test_config.py | 14 +- .../batch_others/test_crafted_wal_end.py | 72 +- test_runner/batch_others/test_createdropdb.py | 62 +- test_runner/batch_others/test_createuser.py | 18 +- test_runner/batch_others/test_fsm_truncate.py | 7 +- test_runner/batch_others/test_fullbackup.py | 49 +- .../batch_others/test_gc_aggressive.py | 20 +- test_runner/batch_others/test_import.py | 168 +-- test_runner/batch_others/test_large_schema.py | 19 +- test_runner/batch_others/test_lsn_mapping.py | 32 +- test_runner/batch_others/test_multixact.py | 31 +- test_runner/batch_others/test_neon_cli.py | 35 +- test_runner/batch_others/test_next_xid.py | 12 +- test_runner/batch_others/test_normal_work.py | 24 +- .../batch_others/test_old_request_lsn.py | 30 +- .../batch_others/test_pageserver_api.py | 138 ++- .../batch_others/test_pageserver_catchup.py | 27 +- .../batch_others/test_pageserver_restart.py | 22 +- .../batch_others/test_parallel_copy.py | 14 +- test_runner/batch_others/test_pitr_gc.py | 36 +- test_runner/batch_others/test_proxy.py | 23 +- .../batch_others/test_read_validation.py | 57 +- .../batch_others/test_readonly_node.py | 78 +- test_runner/batch_others/test_recovery.py | 15 +- .../batch_others/test_remote_storage.py | 69 +- test_runner/batch_others/test_subxacts.py | 22 +- test_runner/batch_others/test_tenant_conf.py | 65 +- .../batch_others/test_tenant_detach.py | 38 +- .../batch_others/test_tenant_relocation.py | 179 +-- test_runner/batch_others/test_tenant_tasks.py | 7 +- test_runner/batch_others/test_tenants.py | 52 +- .../test_tenants_with_remote_storage.py | 30 +- .../batch_others/test_timeline_delete.py | 47 +- .../batch_others/test_timeline_size.py | 230 ++-- test_runner/batch_others/test_twophase.py | 30 +- test_runner/batch_others/test_vm_bits.py | 54 +- test_runner/batch_others/test_wal_acceptor.py | 418 ++++--- .../batch_others/test_wal_acceptor_async.py | 267 ++-- test_runner/batch_others/test_wal_restore.py | 46 +- .../batch_pg_regress/test_isolation.py | 35 +- .../batch_pg_regress/test_neon_regress.py | 43 +- .../batch_pg_regress/test_pg_regress.py | 39 +- test_runner/conftest.py | 12 +- test_runner/fixtures/benchmark_fixture.py | 161 ++- test_runner/fixtures/compare_fixtures.py | 105 +- test_runner/fixtures/log_helper.py | 15 +- test_runner/fixtures/metrics.py | 8 +- test_runner/fixtures/neon_fixtures.py | 1093 +++++++++-------- test_runner/fixtures/pg_stats.py | 36 +- test_runner/fixtures/slow.py | 1 + test_runner/fixtures/utils.py | 42 +- .../performance/test_branch_creation.py | 65 +- test_runner/performance/test_bulk_insert.py | 11 +- .../performance/test_bulk_tenant_create.py | 28 +- .../performance/test_compare_pg_stats.py | 79 +- test_runner/performance/test_copy.py | 31 +- test_runner/performance/test_dup_key.py | 26 +- test_runner/performance/test_gist_build.py | 9 +- test_runner/performance/test_hot_page.py | 24 +- test_runner/performance/test_hot_table.py | 20 +- .../performance/test_parallel_copy_to.py | 28 +- test_runner/performance/test_perf_pgbench.py | 83 +- test_runner/performance/test_random_writes.py | 35 +- test_runner/performance/test_seqscans.py | 30 +- test_runner/performance/test_startup.py | 17 +- .../performance/test_wal_backpressure.py | 146 ++- .../performance/test_write_amplification.py | 15 +- test_runner/pg_clients/test_pg_clients.py | 12 +- test_runner/test_broken.py | 16 +- 84 files changed, 3282 insertions(+), 2687 deletions(-) diff --git a/scripts/coverage b/scripts/coverage index f2c46d9ae9..af0d067419 100755 --- a/scripts/coverage +++ b/scripts/coverage @@ -9,13 +9,6 @@ # * https://github.com/taiki-e/cargo-llvm-cov # * https://github.com/llvm/llvm-project/tree/main/llvm/test/tools/llvm-cov -from abc import ABC, abstractmethod -from dataclasses import dataclass -from pathlib import Path -from tempfile import TemporaryDirectory -from textwrap import dedent -from typing import Any, Dict, Iterator, Iterable, List, Optional - import argparse import hashlib import json @@ -24,6 +17,12 @@ import shutil import socket import subprocess import sys +from abc import ABC, abstractmethod +from dataclasses import dataclass +from pathlib import Path +from tempfile import TemporaryDirectory +from textwrap import dedent +from typing import Any, Dict, Iterable, Iterator, List, Optional def file_mtime_or_zero(path: Path) -> int: diff --git a/scripts/export_import_between_pageservers.py b/scripts/export_import_between_pageservers.py index 96f1d36ddb..5b9fc76768 100755 --- a/scripts/export_import_between_pageservers.py +++ b/scripts/export_import_between_pageservers.py @@ -20,20 +20,21 @@ # For more context on how to use this, see: # https://github.com/neondatabase/cloud/wiki/Storage-format-migration -import os -from os import path -import shutil -from pathlib import Path -import tempfile -from contextlib import closing -import psycopg2 -import subprocess import argparse +import os +import shutil +import subprocess +import tempfile import time -import requests import uuid +from contextlib import closing +from os import path +from pathlib import Path +from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypeVar, Union, cast + +import psycopg2 +import requests from psycopg2.extensions import connection as PgConnection -from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple ############################################### ### client-side utils copied from test fixtures @@ -45,7 +46,7 @@ _global_counter = 0 def global_counter() -> int: - """ A really dumb global counter. + """A really dumb global counter. This is useful for giving output files a unique number, so if we run the same command multiple times we can keep their output separate. """ @@ -55,7 +56,7 @@ def global_counter() -> int: def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str: - """ Run a process and capture its output + """Run a process and capture its output Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr" where "cmd" is the name of the program and NNN is an incrementing counter. @@ -63,13 +64,13 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str: Returns basepath for files with captured output. """ assert type(cmd) is list - base = os.path.basename(cmd[0]) + '_{}'.format(global_counter()) + base = os.path.basename(cmd[0]) + "_{}".format(global_counter()) basepath = os.path.join(capture_dir, base) - stdout_filename = basepath + '.stdout' - stderr_filename = basepath + '.stderr' + stdout_filename = basepath + ".stdout" + stderr_filename = basepath + ".stderr" - with open(stdout_filename, 'w') as stdout_f: - with open(stderr_filename, 'w') as stderr_f: + with open(stdout_filename, "w") as stdout_f: + with open(stderr_filename, "w") as stderr_f: print('(capturing output to "{}.stdout")'.format(base)) subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f) @@ -77,15 +78,16 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str: class PgBin: - """ A helper class for executing postgres binaries """ + """A helper class for executing postgres binaries""" + def __init__(self, log_dir: Path, pg_distrib_dir): self.log_dir = log_dir - self.pg_bin_path = os.path.join(str(pg_distrib_dir), 'bin') + self.pg_bin_path = os.path.join(str(pg_distrib_dir), "bin") self.env = os.environ.copy() - self.env['LD_LIBRARY_PATH'] = os.path.join(str(pg_distrib_dir), 'lib') + self.env["LD_LIBRARY_PATH"] = os.path.join(str(pg_distrib_dir), "lib") def _fixpath(self, command: List[str]): - if '/' not in command[0]: + if "/" not in command[0]: command[0] = os.path.join(self.pg_bin_path, command[0]) def _build_env(self, env_add: Optional[Env]) -> Env: @@ -106,15 +108,17 @@ class PgBin: """ self._fixpath(command) - print('Running command "{}"'.format(' '.join(command))) + print('Running command "{}"'.format(" ".join(command))) env = self._build_env(env) subprocess.run(command, env=env, cwd=cwd, check=True) - def run_capture(self, - command: List[str], - env: Optional[Env] = None, - cwd: Optional[str] = None, - **kwargs: Any) -> str: + def run_capture( + self, + command: List[str], + env: Optional[Env] = None, + cwd: Optional[str] = None, + **kwargs: Any, + ) -> str: """ Run one of the postgres binaries, with stderr and stdout redirected to a file. This is just like `run`, but for chatty programs. Returns basepath for files @@ -122,35 +126,33 @@ class PgBin: """ self._fixpath(command) - print('Running command "{}"'.format(' '.join(command))) + print('Running command "{}"'.format(" ".join(command))) env = self._build_env(env) - return subprocess_capture(str(self.log_dir), - command, - env=env, - cwd=cwd, - check=True, - **kwargs) + return subprocess_capture( + str(self.log_dir), command, env=env, cwd=cwd, check=True, **kwargs + ) class PgProtocol: - """ Reusable connection logic """ + """Reusable connection logic""" + def __init__(self, **kwargs): self.default_options = kwargs def conn_options(self, **kwargs): conn_options = self.default_options.copy() - if 'dsn' in kwargs: - conn_options.update(parse_dsn(kwargs['dsn'])) + if "dsn" in kwargs: + conn_options.update(parse_dsn(kwargs["dsn"])) conn_options.update(kwargs) # Individual statement timeout in seconds. 2 minutes should be # enough for our tests, but if you need a longer, you can # change it by calling "SET statement_timeout" after # connecting. - if 'options' in conn_options: - conn_options['options'] = f"-cstatement_timeout=120s " + conn_options['options'] + if "options" in conn_options: + conn_options["options"] = f"-cstatement_timeout=120s " + conn_options["options"] else: - conn_options['options'] = "-cstatement_timeout=120s" + conn_options["options"] = "-cstatement_timeout=120s" return conn_options # autocommit=True here by default because that's what we need most of the time @@ -194,18 +196,18 @@ class PgProtocol: class VanillaPostgres(PgProtocol): def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True): - super().__init__(host='localhost', port=port, dbname='postgres') + super().__init__(host="localhost", port=port, dbname="postgres") self.pgdatadir = pgdatadir self.pg_bin = pg_bin self.running = False if init: - self.pg_bin.run_capture(['initdb', '-D', str(pgdatadir)]) + self.pg_bin.run_capture(["initdb", "-D", str(pgdatadir)]) self.configure([f"port = {port}\n"]) def configure(self, options: List[str]): """Append lines into postgresql.conf file.""" assert not self.running - with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file: + with open(os.path.join(self.pgdatadir, "postgresql.conf"), "a") as conf_file: conf_file.write("\n".join(options)) def start(self, log_path: Optional[str] = None): @@ -216,12 +218,13 @@ class VanillaPostgres(PgProtocol): log_path = os.path.join(self.pgdatadir, "pg.log") self.pg_bin.run_capture( - ['pg_ctl', '-w', '-D', str(self.pgdatadir), '-l', log_path, 'start']) + ["pg_ctl", "-w", "-D", str(self.pgdatadir), "-l", log_path, "start"] + ) def stop(self): assert self.running self.running = False - self.pg_bin.run_capture(['pg_ctl', '-w', '-D', str(self.pgdatadir), 'stop']) + self.pg_bin.run_capture(["pg_ctl", "-w", "-D", str(self.pgdatadir), "stop"]) def __enter__(self): return self @@ -246,9 +249,9 @@ class NeonPageserverHttpClient(requests.Session): res.raise_for_status() except requests.RequestException as e: try: - msg = res.json()['msg'] + msg = res.json()["msg"] except: - msg = '' + msg = "" raise NeonPageserverApiException(msg) from e def check_status(self): @@ -265,17 +268,17 @@ class NeonPageserverHttpClient(requests.Session): res = self.post( f"http://{self.host}:{self.port}/v1/tenant", json={ - 'new_tenant_id': new_tenant_id.hex, + "new_tenant_id": new_tenant_id.hex, }, ) if res.status_code == 409: if ok_if_exists: - print(f'could not create tenant: already exists for id {new_tenant_id}') + print(f"could not create tenant: already exists for id {new_tenant_id}") else: res.raise_for_status() elif res.status_code == 201: - print(f'created tenant {new_tenant_id}') + print(f"created tenant {new_tenant_id}") else: self.verbose_error(res) @@ -299,47 +302,55 @@ class NeonPageserverHttpClient(requests.Session): def lsn_to_hex(num: int) -> str: - """ Convert lsn from int to standard hex notation. """ - return "{:X}/{:X}".format(num >> 32, num & 0xffffffff) + """Convert lsn from int to standard hex notation.""" + return "{:X}/{:X}".format(num >> 32, num & 0xFFFFFFFF) def lsn_from_hex(lsn_hex: str) -> int: - """ Convert lsn from hex notation to int. """ - l, r = lsn_hex.split('/') + """Convert lsn from hex notation to int.""" + l, r = lsn_hex.split("/") return (int(l, 16) << 32) + int(r, 16) -def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID) -> int: +def remote_consistent_lsn( + pageserver_http_client: NeonPageserverHttpClient, tenant: uuid.UUID, timeline: uuid.UUID +) -> int: detail = pageserver_http_client.timeline_detail(tenant, timeline) - if detail['remote'] is None: + if detail["remote"] is None: # No remote information at all. This happens right after creating # a timeline, before any part of it has been uploaded to remote # storage yet. return 0 else: - lsn_str = detail['remote']['remote_consistent_lsn'] + lsn_str = detail["remote"]["remote_consistent_lsn"] assert isinstance(lsn_str, str) return lsn_from_hex(lsn_str) -def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID, - lsn: int): +def wait_for_upload( + pageserver_http_client: NeonPageserverHttpClient, + tenant: uuid.UUID, + timeline: uuid.UUID, + lsn: int, +): """waits for local timeline upload up to specified lsn""" for i in range(10): current_lsn = remote_consistent_lsn(pageserver_http_client, tenant, timeline) if current_lsn >= lsn: return - print("waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1)) + print( + "waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1 + ) + ) time.sleep(1) - raise Exception("timed out while waiting for remote_consistent_lsn to reach {}, was {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn))) + raise Exception( + "timed out while waiting for remote_consistent_lsn to reach {}, was {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn) + ) + ) ############## @@ -399,7 +410,7 @@ def reconstruct_paths(log_dir, pg_bin, base_tar): # Add all template0copy paths to template0 prefix = f"base/{oid}/" if filepath.startswith(prefix): - suffix = filepath[len(prefix):] + suffix = filepath[len(prefix) :] yield f"base/{template0_oid}/{suffix}" elif filepath.startswith("global"): print(f"skipping {database} global file {filepath}") @@ -451,15 +462,17 @@ def get_rlsn(pageserver_connstr, tenant_id, timeline_id): return last_lsn, prev_lsn -def import_timeline(args, - psql_path, - pageserver_connstr, - pageserver_http, - tenant_id, - timeline_id, - last_lsn, - prev_lsn, - tar_filename): +def import_timeline( + args, + psql_path, + pageserver_connstr, + pageserver_http, + tenant_id, + timeline_id, + last_lsn, + prev_lsn, + tar_filename, +): # Import timelines to new pageserver import_cmd = f"import basebackup {tenant_id} {timeline_id} {last_lsn} {last_lsn}" full_cmd = rf"""cat {tar_filename} | {psql_path} {pageserver_connstr} -c '{import_cmd}' """ @@ -469,34 +482,30 @@ def import_timeline(args, print(f"Running: {full_cmd}") - with open(stdout_filename, 'w') as stdout_f: - with open(stderr_filename2, 'w') as stderr_f: + with open(stdout_filename, "w") as stdout_f: + with open(stderr_filename2, "w") as stderr_f: print(f"(capturing output to {stdout_filename})") pg_bin = PgBin(args.work_dir, args.pg_distrib_dir) - subprocess.run(full_cmd, - stdout=stdout_f, - stderr=stderr_f, - env=pg_bin._build_env(None), - shell=True, - check=True) + subprocess.run( + full_cmd, + stdout=stdout_f, + stderr=stderr_f, + env=pg_bin._build_env(None), + shell=True, + check=True, + ) print(f"Done import") # Wait until pageserver persists the files - wait_for_upload(pageserver_http, - uuid.UUID(tenant_id), - uuid.UUID(timeline_id), - lsn_from_hex(last_lsn)) + wait_for_upload( + pageserver_http, uuid.UUID(tenant_id), uuid.UUID(timeline_id), lsn_from_hex(last_lsn) + ) -def export_timeline(args, - psql_path, - pageserver_connstr, - tenant_id, - timeline_id, - last_lsn, - prev_lsn, - tar_filename): +def export_timeline( + args, psql_path, pageserver_connstr, tenant_id, timeline_id, last_lsn, prev_lsn, tar_filename +): # Choose filenames incomplete_filename = tar_filename + ".incomplete" stderr_filename = path.join(args.work_dir, f"{tenant_id}_{timeline_id}.stderr") @@ -507,15 +516,13 @@ def export_timeline(args, # Run export command print(f"Running: {cmd}") - with open(incomplete_filename, 'w') as stdout_f: - with open(stderr_filename, 'w') as stderr_f: + with open(incomplete_filename, "w") as stdout_f: + with open(stderr_filename, "w") as stderr_f: print(f"(capturing output to {incomplete_filename})") pg_bin = PgBin(args.work_dir, args.pg_distrib_dir) - subprocess.run(cmd, - stdout=stdout_f, - stderr=stderr_f, - env=pg_bin._build_env(None), - check=True) + subprocess.run( + cmd, stdout=stdout_f, stderr=stderr_f, env=pg_bin._build_env(None), check=True + ) # Add missing rels pg_bin = PgBin(args.work_dir, args.pg_distrib_dir) @@ -551,27 +558,28 @@ def main(args: argparse.Namespace): for timeline in timelines: # Skip timelines we don't need to export - if args.timelines and timeline['timeline_id'] not in args.timelines: + if args.timelines and timeline["timeline_id"] not in args.timelines: print(f"Skipping timeline {timeline['timeline_id']}") continue # Choose filenames - tar_filename = path.join(args.work_dir, - f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar") + tar_filename = path.join( + args.work_dir, f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar" + ) # Export timeline from old pageserver if args.only_import is False: last_lsn, prev_lsn = get_rlsn( old_pageserver_connstr, - timeline['tenant_id'], - timeline['timeline_id'], + timeline["tenant_id"], + timeline["timeline_id"], ) export_timeline( args, psql_path, old_pageserver_connstr, - timeline['tenant_id'], - timeline['timeline_id'], + timeline["tenant_id"], + timeline["timeline_id"], last_lsn, prev_lsn, tar_filename, @@ -583,8 +591,8 @@ def main(args: argparse.Namespace): psql_path, new_pageserver_connstr, new_http_client, - timeline['tenant_id'], - timeline['timeline_id'], + timeline["tenant_id"], + timeline["timeline_id"], last_lsn, prev_lsn, tar_filename, @@ -592,117 +600,118 @@ def main(args: argparse.Namespace): # Re-export and compare re_export_filename = tar_filename + ".reexport" - export_timeline(args, - psql_path, - new_pageserver_connstr, - timeline['tenant_id'], - timeline['timeline_id'], - last_lsn, - prev_lsn, - re_export_filename) + export_timeline( + args, + psql_path, + new_pageserver_connstr, + timeline["tenant_id"], + timeline["timeline_id"], + last_lsn, + prev_lsn, + re_export_filename, + ) # Check the size is the same - old_size = os.path.getsize(tar_filename), - new_size = os.path.getsize(re_export_filename), + old_size = (os.path.getsize(tar_filename),) + new_size = (os.path.getsize(re_export_filename),) if old_size != new_size: raise AssertionError(f"Sizes don't match old: {old_size} new: {new_size}") -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - '--tenant-id', - dest='tenants', + "--tenant-id", + dest="tenants", required=True, - nargs='+', - help='Id of the tenant to migrate. You can pass multiple arguments', + nargs="+", + help="Id of the tenant to migrate. You can pass multiple arguments", ) parser.add_argument( - '--timeline-id', - dest='timelines', + "--timeline-id", + dest="timelines", required=False, - nargs='+', - help='Id of the timeline to migrate. You can pass multiple arguments', + nargs="+", + help="Id of the timeline to migrate. You can pass multiple arguments", ) parser.add_argument( - '--from-host', - dest='old_pageserver_host', + "--from-host", + dest="old_pageserver_host", required=True, - help='Host of the pageserver to migrate data from', + help="Host of the pageserver to migrate data from", ) parser.add_argument( - '--from-http-port', - dest='old_pageserver_http_port', + "--from-http-port", + dest="old_pageserver_http_port", required=False, type=int, default=9898, - help='HTTP port of the pageserver to migrate data from. Default: 9898', + help="HTTP port of the pageserver to migrate data from. Default: 9898", ) parser.add_argument( - '--from-pg-port', - dest='old_pageserver_pg_port', + "--from-pg-port", + dest="old_pageserver_pg_port", required=False, type=int, default=6400, - help='pg port of the pageserver to migrate data from. Default: 6400', + help="pg port of the pageserver to migrate data from. Default: 6400", ) parser.add_argument( - '--to-host', - dest='new_pageserver_host', + "--to-host", + dest="new_pageserver_host", required=True, - help='Host of the pageserver to migrate data to', + help="Host of the pageserver to migrate data to", ) parser.add_argument( - '--to-http-port', - dest='new_pageserver_http_port', + "--to-http-port", + dest="new_pageserver_http_port", required=False, default=9898, type=int, - help='HTTP port of the pageserver to migrate data to. Default: 9898', + help="HTTP port of the pageserver to migrate data to. Default: 9898", ) parser.add_argument( - '--to-pg-port', - dest='new_pageserver_pg_port', + "--to-pg-port", + dest="new_pageserver_pg_port", required=False, default=6400, type=int, - help='pg port of the pageserver to migrate data to. Default: 6400', + help="pg port of the pageserver to migrate data to. Default: 6400", ) parser.add_argument( - '--ignore-tenant-exists', - dest='ok_if_exists', + "--ignore-tenant-exists", + dest="ok_if_exists", required=False, - help= - 'Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.', + help="Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.", ) parser.add_argument( - '--pg-distrib-dir', - dest='pg_distrib_dir', + "--pg-distrib-dir", + dest="pg_distrib_dir", required=False, - default='/usr/local/', - help='Path where postgres binaries are installed. Default: /usr/local/', + default="/usr/local/", + help="Path where postgres binaries are installed. Default: /usr/local/", ) parser.add_argument( - '--psql-path', - dest='psql_path', + "--psql-path", + dest="psql_path", required=False, - default='/usr/local/bin/psql', - help='Path to the psql binary. Default: /usr/local/bin/psql', + default="/usr/local/bin/psql", + help="Path to the psql binary. Default: /usr/local/bin/psql", ) parser.add_argument( - '--only-import', - dest='only_import', + "--only-import", + dest="only_import", required=False, default=False, - action='store_true', - help='Skip export and tenant creation part', + action="store_true", + help="Skip export and tenant creation part", ) parser.add_argument( - '--work-dir', - dest='work_dir', + "--work-dir", + dest="work_dir", required=True, default=False, - help='directory where temporary tar files are stored', + help="directory where temporary tar files are stored", ) args = parser.parse_args() main(args) diff --git a/scripts/generate_perf_report_page.py b/scripts/generate_perf_report_page.py index 23fa4b76a3..b5b49bb600 100755 --- a/scripts/generate_perf_report_page.py +++ b/scripts/generate_perf_report_page.py @@ -1,31 +1,36 @@ #!/usr/bin/env python3 import argparse +import json from dataclasses import dataclass from pathlib import Path -import json from typing import Any, Dict, List, Optional, Tuple, cast + from jinja2 import Template # skip 'input' columns. They are included in the header and just blow the table -EXCLUDE_COLUMNS = frozenset({ - 'scale', - 'duration', - 'number_of_clients', - 'number_of_threads', - 'init_start_timestamp', - 'init_end_timestamp', - 'run_start_timestamp', - 'run_end_timestamp', -}) +EXCLUDE_COLUMNS = frozenset( + { + "scale", + "duration", + "number_of_clients", + "number_of_threads", + "init_start_timestamp", + "init_end_timestamp", + "run_start_timestamp", + "run_end_timestamp", + } +) -KEY_EXCLUDE_FIELDS = frozenset({ - 'init_start_timestamp', - 'init_end_timestamp', - 'run_start_timestamp', - 'run_end_timestamp', -}) -NEGATIVE_COLOR = 'negative' -POSITIVE_COLOR = 'positive' +KEY_EXCLUDE_FIELDS = frozenset( + { + "init_start_timestamp", + "init_end_timestamp", + "run_start_timestamp", + "run_end_timestamp", + } +) +NEGATIVE_COLOR = "negative" +POSITIVE_COLOR = "positive" EPS = 1e-6 @@ -55,75 +60,76 @@ def get_columns(values: List[Dict[Any, Any]]) -> Tuple[List[Tuple[str, str]], Li value_columns = [] common_columns = [] for item in values: - if item['name'] in KEY_EXCLUDE_FIELDS: + if item["name"] in KEY_EXCLUDE_FIELDS: continue - if item['report'] != 'test_param': - value_columns.append(cast(str, item['name'])) + if item["report"] != "test_param": + value_columns.append(cast(str, item["name"])) else: - common_columns.append((cast(str, item['name']), cast(str, item['value']))) + common_columns.append((cast(str, item["name"]), cast(str, item["value"]))) value_columns.sort() common_columns.sort(key=lambda x: x[0]) # sort by name return common_columns, value_columns def format_ratio(ratio: float, report: str) -> Tuple[str, str]: - color = '' - sign = '+' if ratio > 0 else '' + color = "" + sign = "+" if ratio > 0 else "" if abs(ratio) < 0.05: - return f' ({sign}{ratio:.2f})', color + return f" ({sign}{ratio:.2f})", color - if report not in {'test_param', 'higher_is_better', 'lower_is_better'}: - raise ValueError(f'Unknown report type: {report}') + if report not in {"test_param", "higher_is_better", "lower_is_better"}: + raise ValueError(f"Unknown report type: {report}") - if report == 'test_param': - return f'{ratio:.2f}', color + if report == "test_param": + return f"{ratio:.2f}", color if ratio > 0: - if report == 'higher_is_better': + if report == "higher_is_better": color = POSITIVE_COLOR - elif report == 'lower_is_better': + elif report == "lower_is_better": color = NEGATIVE_COLOR elif ratio < 0: - if report == 'higher_is_better': + if report == "higher_is_better": color = NEGATIVE_COLOR - elif report == 'lower_is_better': + elif report == "lower_is_better": color = POSITIVE_COLOR - return f' ({sign}{ratio:.2f})', color + return f" ({sign}{ratio:.2f})", color def extract_value(name: str, suit_run: SuitRun) -> Optional[Dict[str, Any]]: - for item in suit_run.values['data']: - if item['name'] == name: + for item in suit_run.values["data"]: + if item["name"] == name: return cast(Dict[str, Any], item) return None -def get_row_values(columns: List[str], run_result: SuitRun, - prev_result: Optional[SuitRun]) -> List[RowValue]: +def get_row_values( + columns: List[str], run_result: SuitRun, prev_result: Optional[SuitRun] +) -> List[RowValue]: row_values = [] for column in columns: current_value = extract_value(column, run_result) if current_value is None: # should never happen - raise ValueError(f'{column} not found in {run_result.values}') + raise ValueError(f"{column} not found in {run_result.values}") value = current_value["value"] if isinstance(value, float): - value = f'{value:.2f}' + value = f"{value:.2f}" if prev_result is None: - row_values.append(RowValue(value, '', '')) + row_values.append(RowValue(value, "", "")) continue prev_value = extract_value(column, prev_result) if prev_value is None: # this might happen when new metric is added and there is no value for it in previous run # let this be here, TODO add proper handling when this actually happens - raise ValueError(f'{column} not found in previous result') + raise ValueError(f"{column} not found in previous result") # adding `EPS` to each term to avoid ZeroDivisionError when the denominator is zero - ratio = (float(value) + EPS) / (float(prev_value['value']) + EPS) - 1 - ratio_display, color = format_ratio(ratio, current_value['report']) + ratio = (float(value) + EPS) / (float(prev_value["value"]) + EPS) - 1 + ratio_display, color = format_ratio(ratio, current_value["report"]) row_values.append(RowValue(value, color, ratio_display)) return row_values @@ -139,8 +145,10 @@ def prepare_rows_from_runs(value_columns: List[str], runs: List[SuitRun]) -> Lis prev_run = None for run in runs: rows.append( - SuiteRunTableRow(revision=run.revision, - values=get_row_values(value_columns, run, prev_run))) + SuiteRunTableRow( + revision=run.revision, values=get_row_values(value_columns, run, prev_run) + ) + ) prev_run = run return rows @@ -152,27 +160,29 @@ def main(args: argparse.Namespace) -> None: # we have files in form: _.json # fill them in the hashmap so we have grouped items for the # same run configuration (scale, duration etc.) ordered by counter. - for item in sorted(input_dir.iterdir(), key=lambda x: int(x.name.split('_')[0])): + for item in sorted(input_dir.iterdir(), key=lambda x: int(x.name.split("_")[0])): run_data = json.loads(item.read_text()) - revision = run_data['revision'] + revision = run_data["revision"] - for suit_result in run_data['result']: - key = "{}{}".format(run_data['platform'], suit_result['suit']) + for suit_result in run_data["result"]: + key = "{}{}".format(run_data["platform"], suit_result["suit"]) # pack total duration as a synthetic value - total_duration = suit_result['total_duration'] - suit_result['data'].append({ - 'name': 'total_duration', - 'value': total_duration, - 'unit': 's', - 'report': 'lower_is_better', - }) - common_columns, value_columns = get_columns(suit_result['data']) + total_duration = suit_result["total_duration"] + suit_result["data"].append( + { + "name": "total_duration", + "value": total_duration, + "unit": "s", + "report": "lower_is_better", + } + ) + common_columns, value_columns = get_columns(suit_result["data"]) grouped_runs.setdefault( key, SuitRuns( - platform=run_data['platform'], - suit=suit_result['suit'], + platform=run_data["platform"], + suit=suit_result["suit"], common_columns=common_columns, value_columns=value_columns, runs=[], @@ -184,26 +194,26 @@ def main(args: argparse.Namespace) -> None: for result in grouped_runs.values(): suit = result.suit context[suit] = { - 'common_columns': result.common_columns, - 'value_columns': result.value_columns, - 'platform': result.platform, + "common_columns": result.common_columns, + "value_columns": result.value_columns, + "platform": result.platform, # reverse the order so newest results are on top of the table - 'rows': reversed(prepare_rows_from_runs(result.value_columns, result.runs)), + "rows": reversed(prepare_rows_from_runs(result.value_columns, result.runs)), } - template = Template((Path(__file__).parent / 'perf_report_template.html').read_text()) + template = Template((Path(__file__).parent / "perf_report_template.html").read_text()) Path(args.out).write_text(template.render(context=context)) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - '--input-dir', - dest='input_dir', + "--input-dir", + dest="input_dir", required=True, - help='Directory with jsons generated by the test suite', + help="Directory with jsons generated by the test suite", ) - parser.add_argument('--out', required=True, help='Output html file path') + parser.add_argument("--out", required=True, help="Output html file path") args = parser.parse_args() main(args) diff --git a/scripts/git-upload b/scripts/git-upload index a53987894a..d56c0f8e94 100755 --- a/scripts/git-upload +++ b/scripts/git-upload @@ -1,17 +1,16 @@ #!/usr/bin/env python3 -from contextlib import contextmanager -import shlex -from tempfile import TemporaryDirectory -from distutils.dir_util import copy_tree -from pathlib import Path - import argparse import os +import shlex import shutil import subprocess import sys import textwrap +from contextlib import contextmanager +from distutils.dir_util import copy_tree +from pathlib import Path +from tempfile import TemporaryDirectory from typing import Optional diff --git a/scripts/ingest_perf_test_result.py b/scripts/ingest_perf_test_result.py index 89463c986a..71f7ad3262 100644 --- a/scripts/ingest_perf_test_result.py +++ b/scripts/ingest_perf_test_result.py @@ -1,12 +1,13 @@ #!/usr/bin/env python3 import argparse -from contextlib import contextmanager import json import os +from contextlib import contextmanager +from datetime import datetime +from pathlib import Path + import psycopg2 import psycopg2.extras -from pathlib import Path -from datetime import datetime CREATE_TABLE = """ CREATE TABLE IF NOT EXISTS perf_test_results ( @@ -24,15 +25,15 @@ CREATE TABLE IF NOT EXISTS perf_test_results ( def err(msg): - print(f'error: {msg}') + print(f"error: {msg}") exit(1) @contextmanager def get_connection_cursor(): - connstr = os.getenv('DATABASE_URL') + connstr = os.getenv("DATABASE_URL") if not connstr: - err('DATABASE_URL environment variable is not set') + err("DATABASE_URL environment variable is not set") with psycopg2.connect(connstr) as conn: with conn.cursor() as cur: yield cur @@ -44,33 +45,35 @@ def create_table(cur): def ingest_perf_test_result(cursor, data_dile: Path, recorded_at_timestamp: int) -> int: run_data = json.loads(data_dile.read_text()) - revision = run_data['revision'] - platform = run_data['platform'] + revision = run_data["revision"] + platform = run_data["platform"] - run_result = run_data['result'] + run_result = run_data["result"] args_list = [] for suit_result in run_result: - suit = suit_result['suit'] - total_duration = suit_result['total_duration'] + suit = suit_result["suit"] + total_duration = suit_result["total_duration"] - suit_result['data'].append({ - 'name': 'total_duration', - 'value': total_duration, - 'unit': 's', - 'report': 'lower_is_better', - }) + suit_result["data"].append( + { + "name": "total_duration", + "value": total_duration, + "unit": "s", + "report": "lower_is_better", + } + ) - for metric in suit_result['data']: + for metric in suit_result["data"]: values = { - 'suit': suit, - 'revision': revision, - 'platform': platform, - 'metric_name': metric['name'], - 'metric_value': metric['value'], - 'metric_unit': metric['unit'], - 'metric_report_type': metric['report'], - 'recorded_at_timestamp': datetime.utcfromtimestamp(recorded_at_timestamp), + "suit": suit, + "revision": revision, + "platform": platform, + "metric_name": metric["name"], + "metric_value": metric["value"], + "metric_unit": metric["unit"], + "metric_report_type": metric["report"], + "recorded_at_timestamp": datetime.utcfromtimestamp(recorded_at_timestamp), } args_list.append(values) @@ -104,13 +107,16 @@ def ingest_perf_test_result(cursor, data_dile: Path, recorded_at_timestamp: int) def main(): - parser = argparse.ArgumentParser(description='Perf test result uploader. \ - Database connection string should be provided via DATABASE_URL environment variable', ) + parser = argparse.ArgumentParser( + description="Perf test result uploader. \ + Database connection string should be provided via DATABASE_URL environment variable", + ) parser.add_argument( - '--ingest', + "--ingest", type=Path, - help='Path to perf test result file, or directory with perf test result files') - parser.add_argument('--initdb', action='store_true', help='Initialuze database') + help="Path to perf test result file, or directory with perf test result files", + ) + parser.add_argument("--initdb", action="store_true", help="Initialuze database") args = parser.parse_args() with get_connection_cursor() as cur: @@ -118,19 +124,19 @@ def main(): create_table(cur) if not args.ingest.exists(): - err(f'ingest path {args.ingest} does not exist') + err(f"ingest path {args.ingest} does not exist") if args.ingest: if args.ingest.is_dir(): - for item in sorted(args.ingest.iterdir(), key=lambda x: int(x.name.split('_')[0])): - recorded_at_timestamp = int(item.name.split('_')[0]) + for item in sorted(args.ingest.iterdir(), key=lambda x: int(x.name.split("_")[0])): + recorded_at_timestamp = int(item.name.split("_")[0]) ingested = ingest_perf_test_result(cur, item, recorded_at_timestamp) - print(f'Ingested {ingested} metric values from {item}') + print(f"Ingested {ingested} metric values from {item}") else: - recorded_at_timestamp = int(args.ingest.name.split('_')[0]) + recorded_at_timestamp = int(args.ingest.name.split("_")[0]) ingested = ingest_perf_test_result(cur, args.ingest, recorded_at_timestamp) - print(f'Ingested {ingested} metric values from {args.ingest}') + print(f"Ingested {ingested} metric values from {args.ingest}") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/test_runner/batch_others/test_ancestor_branch.py b/test_runner/batch_others/test_ancestor_branch.py index c4d36da043..96612a8aef 100644 --- a/test_runner/batch_others/test_ancestor_branch.py +++ b/test_runner/batch_others/test_ancestor_branch.py @@ -13,83 +13,90 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder): # Extend compaction_period and gc_period to disable background compaction and gc. tenant, _ = env.neon_cli.create_tenant( conf={ - 'gc_period': '10 m', - 'gc_horizon': '1048576', - 'checkpoint_distance': '4194304', - 'compaction_period': '10 m', - 'compaction_threshold': '2', - 'compaction_target_size': '4194304', - }) + "gc_period": "10 m", + "gc_horizon": "1048576", + "checkpoint_distance": "4194304", + "compaction_period": "10 m", + "compaction_threshold": "2", + "compaction_target_size": "4194304", + } + ) env.pageserver.safe_psql("failpoints flush-frozen-before-sync=sleep(10000)") - pg_branch0 = env.postgres.create_start('main', tenant_id=tenant) + pg_branch0 = env.postgres.create_start("main", tenant_id=tenant) branch0_cur = pg_branch0.connect().cursor() branch0_timeline = query_scalar(branch0_cur, "SHOW neon.timeline_id") log.info(f"b0 timeline {branch0_timeline}") # Create table, and insert 100k rows. - branch0_lsn = query_scalar(branch0_cur, 'SELECT pg_current_wal_insert_lsn()') + branch0_lsn = query_scalar(branch0_cur, "SELECT pg_current_wal_insert_lsn()") log.info(f"b0 at lsn {branch0_lsn}") - branch0_cur.execute('CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)') - branch0_cur.execute(''' + branch0_cur.execute("CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)") + branch0_cur.execute( + """ INSERT INTO foo SELECT '00112233445566778899AABBCCDDEEFF' || ':branch0:' || g FROM generate_series(1, 100000) g - ''') - lsn_100 = query_scalar(branch0_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN after 100k rows: {lsn_100}') + """ + ) + lsn_100 = query_scalar(branch0_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN after 100k rows: {lsn_100}") # Create branch1. - env.neon_cli.create_branch('branch1', 'main', tenant_id=tenant, ancestor_start_lsn=lsn_100) - pg_branch1 = env.postgres.create_start('branch1', tenant_id=tenant) + env.neon_cli.create_branch("branch1", "main", tenant_id=tenant, ancestor_start_lsn=lsn_100) + pg_branch1 = env.postgres.create_start("branch1", tenant_id=tenant) log.info("postgres is running on 'branch1' branch") branch1_cur = pg_branch1.connect().cursor() branch1_timeline = query_scalar(branch1_cur, "SHOW neon.timeline_id") log.info(f"b1 timeline {branch1_timeline}") - branch1_lsn = query_scalar(branch1_cur, 'SELECT pg_current_wal_insert_lsn()') + branch1_lsn = query_scalar(branch1_cur, "SELECT pg_current_wal_insert_lsn()") log.info(f"b1 at lsn {branch1_lsn}") # Insert 100k rows. - branch1_cur.execute(''' + branch1_cur.execute( + """ INSERT INTO foo SELECT '00112233445566778899AABBCCDDEEFF' || ':branch1:' || g FROM generate_series(1, 100000) g - ''') - lsn_200 = query_scalar(branch1_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN after 200k rows: {lsn_200}') + """ + ) + lsn_200 = query_scalar(branch1_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN after 200k rows: {lsn_200}") # Create branch2. - env.neon_cli.create_branch('branch2', 'branch1', tenant_id=tenant, ancestor_start_lsn=lsn_200) - pg_branch2 = env.postgres.create_start('branch2', tenant_id=tenant) + env.neon_cli.create_branch("branch2", "branch1", tenant_id=tenant, ancestor_start_lsn=lsn_200) + pg_branch2 = env.postgres.create_start("branch2", tenant_id=tenant) log.info("postgres is running on 'branch2' branch") branch2_cur = pg_branch2.connect().cursor() branch2_timeline = query_scalar(branch2_cur, "SHOW neon.timeline_id") log.info(f"b2 timeline {branch2_timeline}") - branch2_lsn = query_scalar(branch2_cur, 'SELECT pg_current_wal_insert_lsn()') + branch2_lsn = query_scalar(branch2_cur, "SELECT pg_current_wal_insert_lsn()") log.info(f"b2 at lsn {branch2_lsn}") # Insert 100k rows. - branch2_cur.execute(''' + branch2_cur.execute( + """ INSERT INTO foo SELECT '00112233445566778899AABBCCDDEEFF' || ':branch2:' || g FROM generate_series(1, 100000) g - ''') - lsn_300 = query_scalar(branch2_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN after 300k rows: {lsn_300}') + """ + ) + lsn_300 = query_scalar(branch2_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN after 300k rows: {lsn_300}") # Run compaction on branch1. - compact = f'compact {tenant.hex} {branch1_timeline} {lsn_200}' + compact = f"compact {tenant.hex} {branch1_timeline} {lsn_200}" log.info(compact) env.pageserver.safe_psql(compact) - assert query_scalar(branch0_cur, 'SELECT count(*) FROM foo') == 100000 + assert query_scalar(branch0_cur, "SELECT count(*) FROM foo") == 100000 - assert query_scalar(branch1_cur, 'SELECT count(*) FROM foo') == 200000 + assert query_scalar(branch1_cur, "SELECT count(*) FROM foo") == 200000 - assert query_scalar(branch2_cur, 'SELECT count(*) FROM foo') == 300000 + assert query_scalar(branch2_cur, "SELECT count(*) FROM foo") == 300000 diff --git a/test_runner/batch_others/test_auth.py b/test_runner/batch_others/test_auth.py index 0fd0a5d7e3..16d6ae45c3 100644 --- a/test_runner/batch_others/test_auth.py +++ b/test_runner/batch_others/test_auth.py @@ -1,7 +1,8 @@ from contextlib import closing from uuid import uuid4 -from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException + import pytest +from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException def test_pageserver_auth(neon_env_builder: NeonEnvBuilder): @@ -23,41 +24,46 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder): ps.safe_psql("set FOO", password=tenant_token) ps.safe_psql("set FOO", password=management_token) - new_timeline_id = env.neon_cli.create_branch('test_pageserver_auth', - tenant_id=env.initial_tenant) + new_timeline_id = env.neon_cli.create_branch( + "test_pageserver_auth", tenant_id=env.initial_tenant + ) # tenant can create branches - tenant_http_client.timeline_create(tenant_id=env.initial_tenant, - ancestor_timeline_id=new_timeline_id) + tenant_http_client.timeline_create( + tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id + ) # console can create branches for tenant - management_http_client.timeline_create(tenant_id=env.initial_tenant, - ancestor_timeline_id=new_timeline_id) + management_http_client.timeline_create( + tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id + ) # fail to create branch using token with different tenant_id - with pytest.raises(NeonPageserverApiException, - match='Forbidden: Tenant id mismatch. Permission denied'): - invalid_tenant_http_client.timeline_create(tenant_id=env.initial_tenant, - ancestor_timeline_id=new_timeline_id) + with pytest.raises( + NeonPageserverApiException, match="Forbidden: Tenant id mismatch. Permission denied" + ): + invalid_tenant_http_client.timeline_create( + tenant_id=env.initial_tenant, ancestor_timeline_id=new_timeline_id + ) # create tenant using management token management_http_client.tenant_create() # fail to create tenant using tenant token with pytest.raises( - NeonPageserverApiException, - match='Forbidden: Attempt to access management api with tenant scope. Permission denied' + NeonPageserverApiException, + match="Forbidden: Attempt to access management api with tenant scope. Permission denied", ): tenant_http_client.tenant_create() -@pytest.mark.parametrize('with_safekeepers', [False, True]) +@pytest.mark.parametrize("with_safekeepers", [False, True]) def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool): neon_env_builder.auth_enabled = True if with_safekeepers: neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - branch = f'test_compute_auth_to_pageserver{with_safekeepers}' + branch = f"test_compute_auth_to_pageserver{with_safekeepers}" env.neon_cli.create_branch(branch) pg = env.postgres.create_start(branch) @@ -65,7 +71,7 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder, with_safek with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there - cur.execute('CREATE TABLE t(key int primary key, value text)') + cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") - cur.execute('SELECT sum(key) FROM t') - assert cur.fetchone() == (5000050000, ) + cur.execute("SELECT sum(key) FROM t") + assert cur.fetchone() == (5000050000,) diff --git a/test_runner/batch_others/test_backpressure.py b/test_runner/batch_others/test_backpressure.py index 4ca03b102b..a81fa380a9 100644 --- a/test_runner/batch_others/test_backpressure.py +++ b/test_runner/batch_others/test_backpressure.py @@ -1,13 +1,13 @@ +import threading +import time from contextlib import closing, contextmanager + import psycopg2.extras import pytest -from fixtures.neon_fixtures import NeonEnvBuilder from fixtures.log_helper import log -import time -from fixtures.neon_fixtures import Postgres -import threading +from fixtures.neon_fixtures import NeonEnvBuilder, Postgres -pytest_plugins = ("fixtures.neon_fixtures") +pytest_plugins = "fixtures.neon_fixtures" @contextmanager @@ -44,7 +44,8 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv with pg_cur(pg) as cur: while not stop_event.is_set(): try: - cur.execute(''' + cur.execute( + """ select pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn) as received_lsn_lag, pg_wal_lsn_diff(pg_current_wal_flush_lsn(),disk_consistent_lsn) as disk_consistent_lsn_lag, pg_wal_lsn_diff(pg_current_wal_flush_lsn(),remote_consistent_lsn) as remote_consistent_lsn_lag, @@ -52,16 +53,19 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),disk_consistent_lsn)), pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),remote_consistent_lsn)) from backpressure_lsns(); - ''') + """ + ) res = cur.fetchone() received_lsn_lag = res[0] disk_consistent_lsn_lag = res[1] remote_consistent_lsn_lag = res[2] - log.info(f"received_lsn_lag = {received_lsn_lag} ({res[3]}), " - f"disk_consistent_lsn_lag = {disk_consistent_lsn_lag} ({res[4]}), " - f"remote_consistent_lsn_lag = {remote_consistent_lsn_lag} ({res[5]})") + log.info( + f"received_lsn_lag = {received_lsn_lag} ({res[3]}), " + f"disk_consistent_lsn_lag = {disk_consistent_lsn_lag} ({res[4]}), " + f"remote_consistent_lsn_lag = {remote_consistent_lsn_lag} ({res[5]})" + ) # Since feedback from pageserver is not immediate, we should allow some lag overflow lag_overflow = 5 * 1024 * 1024 # 5MB @@ -71,7 +75,9 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv if max_replication_flush_lag_bytes > 0: assert disk_consistent_lsn_lag < max_replication_flush_lag_bytes + lag_overflow if max_replication_apply_lag_bytes > 0: - assert remote_consistent_lsn_lag < max_replication_apply_lag_bytes + lag_overflow + assert ( + remote_consistent_lsn_lag < max_replication_apply_lag_bytes + lag_overflow + ) time.sleep(polling_interval) @@ -79,7 +85,7 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv log.info(f"backpressure check query failed: {e}") stop_event.set() - log.info('check thread stopped') + log.info("check thread stopped") # This test illustrates how to tune backpressure to control the lag @@ -94,10 +100,11 @@ def check_backpressure(pg: Postgres, stop_event: threading.Event, polling_interv def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() # Create a branch for us - env.neon_cli.create_branch('test_backpressure') + env.neon_cli.create_branch("test_backpressure") - pg = env.postgres.create_start('test_backpressure', - config_lines=['max_replication_write_lag=30MB']) + pg = env.postgres.create_start( + "test_backpressure", config_lines=["max_replication_write_lag=30MB"] + ) log.info("postgres is running on 'test_backpressure' branch") # setup check thread @@ -131,23 +138,29 @@ def test_backpressure_received_lsn_lag(neon_env_builder: NeonEnvBuilder): rows_inserted += 100000 except Exception as e: if check_thread.is_alive(): - log.info('stopping check thread') + log.info("stopping check thread") check_stop_event.set() check_thread.join() - assert False, f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly" + assert ( + False + ), f"Exception {e} while inserting rows, but WAL lag is within configured threshold. That means backpressure is not tuned properly" else: - assert False, f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work." + assert ( + False + ), f"Exception {e} while inserting rows and WAL lag overflowed configured threshold. That means backpressure doesn't work." log.info(f"inserted {rows_inserted} rows") if check_thread.is_alive(): - log.info('stopping check thread') + log.info("stopping check thread") check_stop_event.set() check_thread.join() - log.info('check thread stopped') + log.info("check thread stopped") else: - assert False, "WAL lag overflowed configured threshold. That means backpressure doesn't work." + assert ( + False + ), "WAL lag overflowed configured threshold. That means backpressure doesn't work." -#TODO test_backpressure_disk_consistent_lsn_lag. Play with pageserver's checkpoint settings -#TODO test_backpressure_remote_consistent_lsn_lag +# TODO test_backpressure_disk_consistent_lsn_lag. Play with pageserver's checkpoint settings +# TODO test_backpressure_remote_consistent_lsn_lag diff --git a/test_runner/batch_others/test_basebackup_error.py b/test_runner/batch_others/test_basebackup_error.py index 0909ed98a7..9960f3afbf 100644 --- a/test_runner/batch_others/test_basebackup_error.py +++ b/test_runner/batch_others/test_basebackup_error.py @@ -1,5 +1,4 @@ import pytest - from fixtures.neon_fixtures import NeonEnv @@ -15,4 +14,4 @@ def test_basebackup_error(neon_simple_env: NeonEnv): env.pageserver.safe_psql(f"failpoints basebackup-before-control-file=return") with pytest.raises(Exception, match="basebackup-before-control-file"): - pg = env.postgres.create_start('test_basebackup_error') + pg = env.postgres.create_start("test_basebackup_error") diff --git a/test_runner/batch_others/test_branch_and_gc.py b/test_runner/batch_others/test_branch_and_gc.py index 8e433f65ad..bc8374543f 100644 --- a/test_runner/batch_others/test_branch_and_gc.py +++ b/test_runner/batch_others/test_branch_and_gc.py @@ -1,6 +1,7 @@ import threading -import pytest import time + +import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv from fixtures.utils import lsn_from_hex, query_scalar @@ -49,55 +50,52 @@ def test_branch_and_gc(neon_simple_env: NeonEnv): tenant, _ = env.neon_cli.create_tenant( conf={ # disable background GC - 'gc_period': '10 m', - 'gc_horizon': f'{10 * 1024 ** 3}', - + "gc_period": "10 m", + "gc_horizon": f"{10 * 1024 ** 3}", # small checkpoint distance to create more delta layer files - 'checkpoint_distance': f'{1024 ** 2}', - + "checkpoint_distance": f"{1024 ** 2}", # set the target size to be large to allow the image layer to cover the whole key space - 'compaction_target_size': f'{1024 ** 3}', - + "compaction_target_size": f"{1024 ** 3}", # tweak the default settings to allow quickly create image layers and L1 layers - 'compaction_period': '1 s', - 'compaction_threshold': '2', - 'image_creation_threshold': '1', - + "compaction_period": "1 s", + "compaction_threshold": "2", + "image_creation_threshold": "1", # set PITR interval to be small, so we can do GC - 'pitr_interval': '1 s' - }) + "pitr_interval": "1 s", + } + ) - timeline_main = env.neon_cli.create_timeline(f'test_main', tenant_id=tenant) - pg_main = env.postgres.create_start('test_main', tenant_id=tenant) + timeline_main = env.neon_cli.create_timeline(f"test_main", tenant_id=tenant) + pg_main = env.postgres.create_start("test_main", tenant_id=tenant) main_cur = pg_main.connect().cursor() main_cur.execute( "CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')" ) - main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)') - lsn1 = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN1: {lsn1}') + main_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)") + lsn1 = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN1: {lsn1}") - main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)') - lsn2 = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN2: {lsn2}') + main_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)") + lsn2 = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN2: {lsn2}") # Set the GC horizon so that lsn1 is inside the horizon, which means # we can create a new branch starting from lsn1. env.pageserver.safe_psql( - f'do_gc {tenant.hex} {timeline_main.hex} {lsn_from_hex(lsn2) - lsn_from_hex(lsn1) + 1024}') + f"do_gc {tenant.hex} {timeline_main.hex} {lsn_from_hex(lsn2) - lsn_from_hex(lsn1) + 1024}" + ) - env.neon_cli.create_branch('test_branch', - 'test_main', - tenant_id=tenant, - ancestor_start_lsn=lsn1) - pg_branch = env.postgres.create_start('test_branch', tenant_id=tenant) + env.neon_cli.create_branch( + "test_branch", "test_main", tenant_id=tenant, ancestor_start_lsn=lsn1 + ) + pg_branch = env.postgres.create_start("test_branch", tenant_id=tenant) branch_cur = pg_branch.connect().cursor() - branch_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)') + branch_cur.execute("INSERT INTO foo SELECT FROM generate_series(1, 100000)") - assert query_scalar(branch_cur, 'SELECT count(*) FROM foo') == 200000 + assert query_scalar(branch_cur, "SELECT count(*) FROM foo") == 200000 # This test simulates a race condition happening when branch creation and GC are performed concurrently. @@ -120,32 +118,31 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): tenant, _ = env.neon_cli.create_tenant( conf={ # disable background GC - 'gc_period': '10 m', - 'gc_horizon': f'{10 * 1024 ** 3}', - + "gc_period": "10 m", + "gc_horizon": f"{10 * 1024 ** 3}", # small checkpoint distance to create more delta layer files - 'checkpoint_distance': f'{1024 ** 2}', - + "checkpoint_distance": f"{1024 ** 2}", # set the target size to be large to allow the image layer to cover the whole key space - 'compaction_target_size': f'{1024 ** 3}', - + "compaction_target_size": f"{1024 ** 3}", # tweak the default settings to allow quickly create image layers and L1 layers - 'compaction_period': '1 s', - 'compaction_threshold': '2', - 'image_creation_threshold': '1', - + "compaction_period": "1 s", + "compaction_threshold": "2", + "image_creation_threshold": "1", # set PITR interval to be small, so we can do GC - 'pitr_interval': '0 s' - }) + "pitr_interval": "0 s", + } + ) - b0 = env.neon_cli.create_branch('b0', tenant_id=tenant) - pg0 = env.postgres.create_start('b0', tenant_id=tenant) - res = pg0.safe_psql_many(queries=[ - "CREATE TABLE t(key serial primary key)", - "INSERT INTO t SELECT FROM generate_series(1, 100000)", - "SELECT pg_current_wal_insert_lsn()", - "INSERT INTO t SELECT FROM generate_series(1, 100000)", - ]) + b0 = env.neon_cli.create_branch("b0", tenant_id=tenant) + pg0 = env.postgres.create_start("b0", tenant_id=tenant) + res = pg0.safe_psql_many( + queries=[ + "CREATE TABLE t(key serial primary key)", + "INSERT INTO t SELECT FROM generate_series(1, 100000)", + "SELECT pg_current_wal_insert_lsn()", + "INSERT INTO t SELECT FROM generate_series(1, 100000)", + ] + ) lsn = res[2][0][0] # Use `failpoint=sleep` and `threading` to make the GC iteration triggers *before* the @@ -166,6 +163,6 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): # The starting LSN is invalid as the corresponding record is scheduled to be removed by in-queue GC. with pytest.raises(Exception, match="invalid branch start lsn"): - env.neon_cli.create_branch('b1', 'b0', tenant_id=tenant, ancestor_start_lsn=lsn) + env.neon_cli.create_branch("b1", "b0", tenant_id=tenant, ancestor_start_lsn=lsn) thread.join() diff --git a/test_runner/batch_others/test_branch_behind.py b/test_runner/batch_others/test_branch_behind.py index 95f478dda8..51946380d2 100644 --- a/test_runner/batch_others/test_branch_behind.py +++ b/test_runner/batch_others/test_branch_behind.py @@ -1,8 +1,8 @@ import psycopg2.extras import pytest from fixtures.log_helper import log -from fixtures.utils import print_gc_result, query_scalar from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.utils import print_gc_result, query_scalar # @@ -21,8 +21,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() # Branch at the point where only 100 rows were inserted - env.neon_cli.create_branch('test_branch_behind') - pgmain = env.postgres.create_start('test_branch_behind') + env.neon_cli.create_branch("test_branch_behind") + pgmain = env.postgres.create_start("test_branch_behind") log.info("postgres is running on 'test_branch_behind' branch") main_cur = pgmain.connect().cursor() @@ -30,80 +30,86 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): timeline = query_scalar(main_cur, "SHOW neon.timeline_id") # Create table, and insert the first 100 rows - main_cur.execute('CREATE TABLE foo (t text)') + main_cur.execute("CREATE TABLE foo (t text)") # keep some early lsn to test branch creation on out of date lsn - gced_lsn = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') + gced_lsn = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100) g - ''') - lsn_a = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN after 100 rows: {lsn_a}') + """ + ) + lsn_a = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN after 100 rows: {lsn_a}") # Insert some more rows. (This generates enough WAL to fill a few segments.) - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g - ''') - lsn_b = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info(f'LSN after 200100 rows: {lsn_b}') + """ + ) + lsn_b = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info(f"LSN after 200100 rows: {lsn_b}") # Branch at the point where only 100 rows were inserted - env.neon_cli.create_branch('test_branch_behind_hundred', - 'test_branch_behind', - ancestor_start_lsn=lsn_a) + env.neon_cli.create_branch( + "test_branch_behind_hundred", "test_branch_behind", ancestor_start_lsn=lsn_a + ) # Insert many more rows. This generates enough WAL to fill a few segments. - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g - ''') - lsn_c = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') + """ + ) + lsn_c = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") - log.info(f'LSN after 400100 rows: {lsn_c}') + log.info(f"LSN after 400100 rows: {lsn_c}") # Branch at the point where only 200100 rows were inserted - env.neon_cli.create_branch('test_branch_behind_more', - 'test_branch_behind', - ancestor_start_lsn=lsn_b) + env.neon_cli.create_branch( + "test_branch_behind_more", "test_branch_behind", ancestor_start_lsn=lsn_b + ) - pg_hundred = env.postgres.create_start('test_branch_behind_hundred') - pg_more = env.postgres.create_start('test_branch_behind_more') + pg_hundred = env.postgres.create_start("test_branch_behind_hundred") + pg_more = env.postgres.create_start("test_branch_behind_more") # On the 'hundred' branch, we should see only 100 rows hundred_cur = pg_hundred.connect().cursor() - assert query_scalar(hundred_cur, 'SELECT count(*) FROM foo') == 100 + assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100 # On the 'more' branch, we should see 100200 rows more_cur = pg_more.connect().cursor() - assert query_scalar(more_cur, 'SELECT count(*) FROM foo') == 200100 + assert query_scalar(more_cur, "SELECT count(*) FROM foo") == 200100 # All the rows are visible on the main branch - assert query_scalar(main_cur, 'SELECT count(*) FROM foo') == 400100 + assert query_scalar(main_cur, "SELECT count(*) FROM foo") == 400100 # Check bad lsn's for branching # branch at segment boundary - env.neon_cli.create_branch('test_branch_segment_boundary', - 'test_branch_behind', - ancestor_start_lsn="0/3000000") - pg = env.postgres.create_start('test_branch_segment_boundary') - assert pg.safe_psql('SELECT 1')[0][0] == 1 + env.neon_cli.create_branch( + "test_branch_segment_boundary", "test_branch_behind", ancestor_start_lsn="0/3000000" + ) + pg = env.postgres.create_start("test_branch_segment_boundary") + assert pg.safe_psql("SELECT 1")[0][0] == 1 # branch at pre-initdb lsn with pytest.raises(Exception, match="invalid branch start lsn"): - env.neon_cli.create_branch('test_branch_preinitdb', ancestor_start_lsn="0/42") + env.neon_cli.create_branch("test_branch_preinitdb", ancestor_start_lsn="0/42") # branch at pre-ancestor lsn with pytest.raises(Exception, match="less than timeline ancestor lsn"): - env.neon_cli.create_branch('test_branch_preinitdb', - 'test_branch_behind', - ancestor_start_lsn="0/42") + env.neon_cli.create_branch( + "test_branch_preinitdb", "test_branch_behind", ancestor_start_lsn="0/42" + ) # check that we cannot create branch based on garbage collected data with env.pageserver.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur: @@ -114,13 +120,13 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder): with pytest.raises(Exception, match="invalid branch start lsn"): # this gced_lsn is pretty random, so if gc is disabled this woudln't fail - env.neon_cli.create_branch('test_branch_create_fail', - 'test_branch_behind', - ancestor_start_lsn=gced_lsn) + env.neon_cli.create_branch( + "test_branch_create_fail", "test_branch_behind", ancestor_start_lsn=gced_lsn + ) # check that after gc everything is still there - assert query_scalar(hundred_cur, 'SELECT count(*) FROM foo') == 100 + assert query_scalar(hundred_cur, "SELECT count(*) FROM foo") == 100 - assert query_scalar(more_cur, 'SELECT count(*) FROM foo') == 200100 + assert query_scalar(more_cur, "SELECT count(*) FROM foo") == 200100 - assert query_scalar(main_cur, 'SELECT count(*) FROM foo') == 400100 + assert query_scalar(main_cur, "SELECT count(*) FROM foo") == 400100 diff --git a/test_runner/batch_others/test_branching.py b/test_runner/batch_others/test_branching.py index c61bac7a58..2d08b07f82 100644 --- a/test_runner/batch_others/test_branching.py +++ b/test_runner/batch_others/test_branching.py @@ -1,10 +1,11 @@ -from typing import List -import threading -import pytest -from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres -import time import random +import threading +import time +from typing import List + +import pytest from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres from performance.test_perf_pgbench import get_scales_matrix @@ -20,38 +21,37 @@ from performance.test_perf_pgbench import get_scales_matrix @pytest.mark.parametrize("n_branches", [10]) @pytest.mark.parametrize("scale", get_scales_matrix(1)) @pytest.mark.parametrize("ty", ["cascade", "flat"]) -def test_branching_with_pgbench(neon_simple_env: NeonEnv, - pg_bin: PgBin, - n_branches: int, - scale: int, - ty: str): +def test_branching_with_pgbench( + neon_simple_env: NeonEnv, pg_bin: PgBin, n_branches: int, scale: int, ty: str +): env = neon_simple_env # Use aggressive GC and checkpoint settings, so that we also exercise GC during the test tenant, _ = env.neon_cli.create_tenant( - conf={ - 'gc_period': '5 s', - 'gc_horizon': f'{1024 ** 2}', - 'checkpoint_distance': f'{1024 ** 2}', - 'compaction_target_size': f'{1024 ** 2}', - # set PITR interval to be small, so we can do GC - 'pitr_interval': '5 s' - }) + conf={ + "gc_period": "5 s", + "gc_horizon": f"{1024 ** 2}", + "checkpoint_distance": f"{1024 ** 2}", + "compaction_target_size": f"{1024 ** 2}", + # set PITR interval to be small, so we can do GC + "pitr_interval": "5 s", + } + ) def run_pgbench(pg: Postgres): connstr = pg.connstr() log.info(f"Start a pgbench workload on pg {connstr}") - pg_bin.run_capture(['pgbench', '-i', f'-s{scale}', connstr]) - pg_bin.run_capture(['pgbench', '-T15', connstr]) + pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr]) + pg_bin.run_capture(["pgbench", "-T15", connstr]) - env.neon_cli.create_branch('b0', tenant_id=tenant) + env.neon_cli.create_branch("b0", tenant_id=tenant) pgs: List[Postgres] = [] - pgs.append(env.postgres.create_start('b0', tenant_id=tenant)) + pgs.append(env.postgres.create_start("b0", tenant_id=tenant)) threads: List[threading.Thread] = [] - threads.append(threading.Thread(target=run_pgbench, args=(pgs[0], ), daemon=True)) + threads.append(threading.Thread(target=run_pgbench, args=(pgs[0],), daemon=True)) threads[-1].start() thread_limit = 4 @@ -72,18 +72,18 @@ def test_branching_with_pgbench(neon_simple_env: NeonEnv, threads = [] if ty == "cascade": - env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(i), tenant_id=tenant) + env.neon_cli.create_branch("b{}".format(i + 1), "b{}".format(i), tenant_id=tenant) else: - env.neon_cli.create_branch('b{}'.format(i + 1), 'b0', tenant_id=tenant) + env.neon_cli.create_branch("b{}".format(i + 1), "b0", tenant_id=tenant) - pgs.append(env.postgres.create_start('b{}'.format(i + 1), tenant_id=tenant)) + pgs.append(env.postgres.create_start("b{}".format(i + 1), tenant_id=tenant)) - threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1], ), daemon=True)) + threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1],), daemon=True)) threads[-1].start() for thread in threads: thread.join() for pg in pgs: - res = pg.safe_psql('SELECT count(*) from pgbench_accounts') - assert res[0] == (100000 * scale, ) + res = pg.safe_psql("SELECT count(*) from pgbench_accounts") + assert res[0] == (100000 * scale,) diff --git a/test_runner/batch_others/test_broken_timeline.py b/test_runner/batch_others/test_broken_timeline.py index b9e5f637ab..b96a7895eb 100644 --- a/test_runner/batch_others/test_broken_timeline.py +++ b/test_runner/batch_others/test_broken_timeline.py @@ -1,12 +1,12 @@ +import concurrent.futures +import os +from contextlib import closing from typing import List, Tuple from uuid import UUID -import pytest -import concurrent.futures -from contextlib import closing -from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres -from fixtures.log_helper import log -import os +import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres from fixtures.utils import query_scalar @@ -24,7 +24,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder): tenant_id = tenant_id_uuid.hex timeline_id = timeline_id_uuid.hex - pg = env.postgres.create_start(f'main', tenant_id=tenant_id_uuid) + pg = env.postgres.create_start(f"main", tenant_id=tenant_id_uuid) with pg.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'") @@ -42,7 +42,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder): # Corrupt metadata file on timeline 1 (tenant1, timeline1, pg1) = tenant_timelines[1] metadata_path = "{}/tenants/{}/timelines/{}/metadata".format(env.repo_dir, tenant1, timeline1) - print(f'overwriting metadata file at {metadata_path}') + print(f"overwriting metadata file at {metadata_path}") f = open(metadata_path, "w") f.write("overwritten with garbage!") f.close() @@ -52,17 +52,17 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder): (tenant2, timeline2, pg2) = tenant_timelines[2] timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant2, timeline2) for filename in os.listdir(timeline_path): - if filename.startswith('00000'): + if filename.startswith("00000"): # Looks like a layer file. Remove it - os.remove(f'{timeline_path}/{filename}') + os.remove(f"{timeline_path}/{filename}") # Corrupt layer files file on timeline 3 (tenant3, timeline3, pg3) = tenant_timelines[3] timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant3, timeline3) for filename in os.listdir(timeline_path): - if filename.startswith('00000'): + if filename.startswith("00000"): # Looks like a layer file. Corrupt it - f = open(f'{timeline_path}/{filename}', "w") + f = open(f"{timeline_path}/{filename}", "w") f.write("overwritten with garbage!") f.close() @@ -77,7 +77,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder): (tenant, timeline, pg) = tenant_timelines[n] with pytest.raises(Exception, match="Cannot load local timeline") as err: pg.start() - log.info(f'compute startup failed as expected: {err}') + log.info(f"compute startup failed as expected: {err}") def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv): @@ -87,9 +87,10 @@ def test_create_multiple_timelines_parallel(neon_simple_env: NeonEnv): with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: futures = [ - executor.submit(env.neon_cli.create_timeline, - f"test-create-multiple-timelines-{i}", - tenant_id) for i in range(4) + executor.submit( + env.neon_cli.create_timeline, f"test-create-multiple-timelines-{i}", tenant_id + ) + for i in range(4) ] for future in futures: future.result() diff --git a/test_runner/batch_others/test_clog_truncate.py b/test_runner/batch_others/test_clog_truncate.py index cdb577f480..1f5df1c130 100644 --- a/test_runner/batch_others/test_clog_truncate.py +++ b/test_runner/batch_others/test_clog_truncate.py @@ -1,10 +1,9 @@ -import time import os - +import time from contextlib import closing -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv from fixtures.utils import query_scalar @@ -13,40 +12,40 @@ from fixtures.utils import query_scalar # def test_clog_truncate(neon_simple_env: NeonEnv): env = neon_simple_env - env.neon_cli.create_branch('test_clog_truncate', 'empty') + env.neon_cli.create_branch("test_clog_truncate", "empty") # set aggressive autovacuum to make sure that truncation will happen config = [ - 'autovacuum_max_workers=10', - 'autovacuum_vacuum_threshold=0', - 'autovacuum_vacuum_insert_threshold=0', - 'autovacuum_vacuum_cost_delay=0', - 'autovacuum_vacuum_cost_limit=10000', - 'autovacuum_naptime =1s', - 'autovacuum_freeze_max_age=100000' + "autovacuum_max_workers=10", + "autovacuum_vacuum_threshold=0", + "autovacuum_vacuum_insert_threshold=0", + "autovacuum_vacuum_cost_delay=0", + "autovacuum_vacuum_cost_limit=10000", + "autovacuum_naptime =1s", + "autovacuum_freeze_max_age=100000", ] - pg = env.postgres.create_start('test_clog_truncate', config_lines=config) - log.info('postgres is running on test_clog_truncate branch') + pg = env.postgres.create_start("test_clog_truncate", config_lines=config) + log.info("postgres is running on test_clog_truncate branch") # Install extension containing function needed for test - pg.safe_psql('CREATE EXTENSION neon_test_utils') + pg.safe_psql("CREATE EXTENSION neon_test_utils") # Consume many xids to advance clog with pg.cursor() as cur: - cur.execute('select test_consume_xids(1000*1000*10);') - log.info('xids consumed') + cur.execute("select test_consume_xids(1000*1000*10);") + log.info("xids consumed") # call a checkpoint to trigger TruncateSubtrans - cur.execute('CHECKPOINT;') + cur.execute("CHECKPOINT;") # ensure WAL flush - cur.execute('select txid_current()') + cur.execute("select txid_current()") log.info(cur.fetchone()) # wait for autovacuum to truncate the pg_xact # XXX Is it worth to add a timeout here? - pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), '0000') + pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), "0000") log.info(f"pg_xact_0000_path = {pg_xact_0000_path}") while os.path.isfile(pg_xact_0000_path): @@ -55,18 +54,18 @@ def test_clog_truncate(neon_simple_env: NeonEnv): # checkpoint to advance latest lsn with pg.cursor() as cur: - cur.execute('CHECKPOINT;') - lsn_after_truncation = query_scalar(cur, 'select pg_current_wal_insert_lsn()') + cur.execute("CHECKPOINT;") + lsn_after_truncation = query_scalar(cur, "select pg_current_wal_insert_lsn()") # create new branch after clog truncation and start a compute node on it - log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}') - env.neon_cli.create_branch('test_clog_truncate_new', - 'test_clog_truncate', - ancestor_start_lsn=lsn_after_truncation) - pg2 = env.postgres.create_start('test_clog_truncate_new') - log.info('postgres is running on test_clog_truncate_new branch') + log.info(f"create branch at lsn_after_truncation {lsn_after_truncation}") + env.neon_cli.create_branch( + "test_clog_truncate_new", "test_clog_truncate", ancestor_start_lsn=lsn_after_truncation + ) + pg2 = env.postgres.create_start("test_clog_truncate_new") + log.info("postgres is running on test_clog_truncate_new branch") # check that new node doesn't contain truncated segment - pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), '0000') + pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), "0000") log.info(f"pg_xact_0000_path_new = {pg_xact_0000_path_new}") assert os.path.isfile(pg_xact_0000_path_new) is False diff --git a/test_runner/batch_others/test_close_fds.py b/test_runner/batch_others/test_close_fds.py index 9521b1bb4a..c7ea37f9c8 100644 --- a/test_runner/batch_others/test_close_fds.py +++ b/test_runner/batch_others/test_close_fds.py @@ -1,18 +1,18 @@ -from contextlib import closing -import shutil -import time -import subprocess import os.path +import shutil +import subprocess +import time +from contextlib import closing from cached_property import threading -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv def lsof_path() -> str: path_output = shutil.which("lsof") if path_output is None: - raise RuntimeError('lsof not found in PATH') + raise RuntimeError("lsof not found in PATH") else: return path_output @@ -36,16 +36,18 @@ def test_lsof_pageserver_pid(neon_simple_env: NeonEnv): path = os.path.join(env.repo_dir, "pageserver.pid") lsof = lsof_path() while workload_thread.is_alive(): - res = subprocess.run([lsof, path], - check=False, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + res = subprocess.run( + [lsof, path], + check=False, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) # parse the `lsof` command's output to get only the list of commands - commands = [line.split(' ')[0] for line in res.stdout.strip().split('\n')[1:]] + commands = [line.split(" ")[0] for line in res.stdout.strip().split("\n")[1:]] if len(commands) > 0: log.info(f"lsof commands: {commands}") - assert commands == ['pageserve'] + assert commands == ["pageserve"] time.sleep(1.0) diff --git a/test_runner/batch_others/test_config.py b/test_runner/batch_others/test_config.py index 51deeebeed..3477d96b89 100644 --- a/test_runner/batch_others/test_config.py +++ b/test_runner/batch_others/test_config.py @@ -1,7 +1,7 @@ from contextlib import closing -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv # @@ -12,19 +12,21 @@ def test_config(neon_simple_env: NeonEnv): env.neon_cli.create_branch("test_config", "empty") # change config - pg = env.postgres.create_start('test_config', config_lines=['log_min_messages=debug1']) - log.info('postgres is running on test_config branch') + pg = env.postgres.create_start("test_config", config_lines=["log_min_messages=debug1"]) + log.info("postgres is running on test_config branch") with closing(pg.connect()) as conn: with conn.cursor() as cur: - cur.execute(''' + cur.execute( + """ SELECT setting FROM pg_settings WHERE source != 'default' AND source != 'override' AND name = 'log_min_messages' - ''') + """ + ) # check that config change was applied - assert cur.fetchone() == ('debug1', ) + assert cur.fetchone() == ("debug1",) diff --git a/test_runner/batch_others/test_crafted_wal_end.py b/test_runner/batch_others/test_crafted_wal_end.py index d1c46fc73a..32e5366945 100644 --- a/test_runner/batch_others/test_crafted_wal_end.py +++ b/test_runner/batch_others/test_crafted_wal_end.py @@ -1,34 +1,38 @@ -from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft -from fixtures.log_helper import log import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft # Restart nodes with WAL end having specially crafted shape, like last record # crossing segment boundary, to test decoding issues. -@pytest.mark.parametrize('wal_type', - [ - 'simple', - 'last_wal_record_xlog_switch', - 'last_wal_record_xlog_switch_ends_on_page_boundary', - 'last_wal_record_crossing_segment', - 'wal_record_crossing_segment_followed_by_small_one', - ]) +@pytest.mark.parametrize( + "wal_type", + [ + "simple", + "last_wal_record_xlog_switch", + "last_wal_record_xlog_switch_ends_on_page_boundary", + "last_wal_record_crossing_segment", + "wal_record_crossing_segment_followed_by_small_one", + ], +) def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): neon_env_builder.num_safekeepers = 1 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_crafted_wal_end') + env.neon_cli.create_branch("test_crafted_wal_end") - pg = env.postgres.create('test_crafted_wal_end') + pg = env.postgres.create("test_crafted_wal_end") wal_craft = WalCraft(env) pg.config(wal_craft.postgres_config()) pg.start() - res = pg.safe_psql_many(queries=[ - 'CREATE TABLE keys(key int primary key)', - 'INSERT INTO keys SELECT generate_series(1, 100)', - 'SELECT SUM(key) FROM keys' - ]) - assert res[-1][0] == (5050, ) + res = pg.safe_psql_many( + queries=[ + "CREATE TABLE keys(key int primary key)", + "INSERT INTO keys SELECT generate_series(1, 100)", + "SELECT SUM(key) FROM keys", + ] + ) + assert res[-1][0] == (5050,) wal_craft.in_existing(wal_type, pg.connstr()) @@ -39,13 +43,15 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): env.pageserver.start() log.info("Trying more queries") - res = pg.safe_psql_many(queries=[ - 'SELECT SUM(key) FROM keys', - 'INSERT INTO keys SELECT generate_series(101, 200)', - 'SELECT SUM(key) FROM keys', - ]) - assert res[0][0] == (5050, ) - assert res[-1][0] == (20100, ) + res = pg.safe_psql_many( + queries=[ + "SELECT SUM(key) FROM keys", + "INSERT INTO keys SELECT generate_series(101, 200)", + "SELECT SUM(key) FROM keys", + ] + ) + assert res[0][0] == (5050,) + assert res[-1][0] == (20100,) log.info("Restarting all safekeepers and pageservers (again)") env.pageserver.stop() @@ -54,10 +60,12 @@ def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str): env.pageserver.start() log.info("Trying more queries (again)") - res = pg.safe_psql_many(queries=[ - 'SELECT SUM(key) FROM keys', - 'INSERT INTO keys SELECT generate_series(201, 300)', - 'SELECT SUM(key) FROM keys', - ]) - assert res[0][0] == (20100, ) - assert res[-1][0] == (45150, ) + res = pg.safe_psql_many( + queries=[ + "SELECT SUM(key) FROM keys", + "INSERT INTO keys SELECT generate_series(201, 300)", + "SELECT SUM(key) FROM keys", + ] + ) + assert res[0][0] == (20100,) + assert res[-1][0] == (45150,) diff --git a/test_runner/batch_others/test_createdropdb.py b/test_runner/batch_others/test_createdropdb.py index 0fbf6e2a47..fdb704ff15 100644 --- a/test_runner/batch_others/test_createdropdb.py +++ b/test_runner/batch_others/test_createdropdb.py @@ -1,9 +1,9 @@ import os import pathlib - from contextlib import closing -from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content + from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content from fixtures.utils import query_scalar @@ -12,35 +12,37 @@ from fixtures.utils import query_scalar # def test_createdb(neon_simple_env: NeonEnv): env = neon_simple_env - env.neon_cli.create_branch('test_createdb', 'empty') + env.neon_cli.create_branch("test_createdb", "empty") - pg = env.postgres.create_start('test_createdb') + pg = env.postgres.create_start("test_createdb") log.info("postgres is running on 'test_createdb' branch") with pg.cursor() as cur: # Cause a 'relmapper' change in the original branch - cur.execute('VACUUM FULL pg_class') + cur.execute("VACUUM FULL pg_class") - cur.execute('CREATE DATABASE foodb') + cur.execute("CREATE DATABASE foodb") - lsn = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()') + lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") # Create a branch - env.neon_cli.create_branch('test_createdb2', 'test_createdb', ancestor_start_lsn=lsn) - pg2 = env.postgres.create_start('test_createdb2') + env.neon_cli.create_branch("test_createdb2", "test_createdb", ancestor_start_lsn=lsn) + pg2 = env.postgres.create_start("test_createdb2") # Test that you can connect to the new database on both branches for db in (pg, pg2): - with db.cursor(dbname='foodb') as cur: + with db.cursor(dbname="foodb") as cur: # Check database size in both branches - cur.execute(""" + cur.execute( + """ select pg_size_pretty(pg_database_size('foodb')), pg_size_pretty( sum(pg_relation_size(oid, 'main')) +sum(pg_relation_size(oid, 'vm')) +sum(pg_relation_size(oid, 'fsm')) ) FROM pg_class where relisshared is false - """) + """ + ) res = cur.fetchone() assert res is not None # check that dbsize equals sum of all relation sizes, excluding shared ones @@ -53,48 +55,48 @@ def test_createdb(neon_simple_env: NeonEnv): # def test_dropdb(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env - env.neon_cli.create_branch('test_dropdb', 'empty') - pg = env.postgres.create_start('test_dropdb') + env.neon_cli.create_branch("test_dropdb", "empty") + pg = env.postgres.create_start("test_dropdb") log.info("postgres is running on 'test_dropdb' branch") with pg.cursor() as cur: - cur.execute('CREATE DATABASE foodb') + cur.execute("CREATE DATABASE foodb") - lsn_before_drop = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()') + lsn_before_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") dboid = query_scalar(cur, "SELECT oid FROM pg_database WHERE datname='foodb';") with pg.cursor() as cur: - cur.execute('DROP DATABASE foodb') + cur.execute("DROP DATABASE foodb") - cur.execute('CHECKPOINT') + cur.execute("CHECKPOINT") - lsn_after_drop = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()') + lsn_after_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") # Create two branches before and after database drop. - env.neon_cli.create_branch('test_before_dropdb', - 'test_dropdb', - ancestor_start_lsn=lsn_before_drop) - pg_before = env.postgres.create_start('test_before_dropdb') + env.neon_cli.create_branch( + "test_before_dropdb", "test_dropdb", ancestor_start_lsn=lsn_before_drop + ) + pg_before = env.postgres.create_start("test_before_dropdb") - env.neon_cli.create_branch('test_after_dropdb', - 'test_dropdb', - ancestor_start_lsn=lsn_after_drop) - pg_after = env.postgres.create_start('test_after_dropdb') + env.neon_cli.create_branch( + "test_after_dropdb", "test_dropdb", ancestor_start_lsn=lsn_after_drop + ) + pg_after = env.postgres.create_start("test_after_dropdb") # Test that database exists on the branch before drop - pg_before.connect(dbname='foodb').close() + pg_before.connect(dbname="foodb").close() # Test that database subdir exists on the branch before drop assert pg_before.pgdata_dir - dbpath = pathlib.Path(pg_before.pgdata_dir) / 'base' / str(dboid) + dbpath = pathlib.Path(pg_before.pgdata_dir) / "base" / str(dboid) log.info(dbpath) assert os.path.isdir(dbpath) == True # Test that database subdir doesn't exist on the branch after drop assert pg_after.pgdata_dir - dbpath = pathlib.Path(pg_after.pgdata_dir) / 'base' / str(dboid) + dbpath = pathlib.Path(pg_after.pgdata_dir) / "base" / str(dboid) log.info(dbpath) assert os.path.isdir(dbpath) == False diff --git a/test_runner/batch_others/test_createuser.py b/test_runner/batch_others/test_createuser.py index d48db05395..c5f8246f5b 100644 --- a/test_runner/batch_others/test_createuser.py +++ b/test_runner/batch_others/test_createuser.py @@ -1,5 +1,5 @@ -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv from fixtures.utils import query_scalar @@ -8,21 +8,21 @@ from fixtures.utils import query_scalar # def test_createuser(neon_simple_env: NeonEnv): env = neon_simple_env - env.neon_cli.create_branch('test_createuser', 'empty') - pg = env.postgres.create_start('test_createuser') + env.neon_cli.create_branch("test_createuser", "empty") + pg = env.postgres.create_start("test_createuser") log.info("postgres is running on 'test_createuser' branch") with pg.cursor() as cur: # Cause a 'relmapper' change in the original branch - cur.execute('CREATE USER testuser with password %s', ('testpwd', )) + cur.execute("CREATE USER testuser with password %s", ("testpwd",)) - cur.execute('CHECKPOINT') + cur.execute("CHECKPOINT") - lsn = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()') + lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") # Create a branch - env.neon_cli.create_branch('test_createuser2', 'test_createuser', ancestor_start_lsn=lsn) - pg2 = env.postgres.create_start('test_createuser2') + env.neon_cli.create_branch("test_createuser2", "test_createuser", ancestor_start_lsn=lsn) + pg2 = env.postgres.create_start("test_createuser2") # Test that you can connect to new branch as a new user - assert pg2.safe_psql('select current_user', user='testuser') == [('testuser', )] + assert pg2.safe_psql("select current_user", user="testuser") == [("testuser",)] diff --git a/test_runner/batch_others/test_fsm_truncate.py b/test_runner/batch_others/test_fsm_truncate.py index 0f85942598..54ad2ffa34 100644 --- a/test_runner/batch_others/test_fsm_truncate.py +++ b/test_runner/batch_others/test_fsm_truncate.py @@ -1,11 +1,12 @@ +import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient -import pytest def test_fsm_truncate(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_fsm_truncate") - pg = env.postgres.create_start('test_fsm_truncate') + pg = env.postgres.create_start("test_fsm_truncate") pg.safe_psql( - 'CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;') + "CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;" + ) diff --git a/test_runner/batch_others/test_fullbackup.py b/test_runner/batch_others/test_fullbackup.py index bce085c157..8155f52060 100644 --- a/test_runner/batch_others/test_fullbackup.py +++ b/test_runner/batch_others/test_fullbackup.py @@ -1,22 +1,28 @@ -from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnvBuilder, PgBin, PortDistributor, VanillaPostgres -from fixtures.neon_fixtures import pg_distrib_dir import os + +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + NeonEnvBuilder, + PgBin, + PortDistributor, + VanillaPostgres, + pg_distrib_dir, +) from fixtures.utils import query_scalar, subprocess_capture num_rows = 1000 # Ensure that regular postgres can start from fullbackup -def test_fullbackup(neon_env_builder: NeonEnvBuilder, - pg_bin: PgBin, - port_distributor: PortDistributor): +def test_fullbackup( + neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, port_distributor: PortDistributor +): neon_env_builder.num_safekeepers = 1 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_fullbackup') - pgmain = env.postgres.create_start('test_fullbackup') + env.neon_cli.create_branch("test_fullbackup") + pgmain = env.postgres.create_start("test_fullbackup") log.info("postgres is running on 'test_fullbackup' branch") with pgmain.cursor() as cur: @@ -24,16 +30,18 @@ def test_fullbackup(neon_env_builder: NeonEnvBuilder, # data loading may take a while, so increase statement timeout cur.execute("SET statement_timeout='300s'") - cur.execute(f'''CREATE TABLE tbl AS SELECT 'long string to consume some space' || g - from generate_series(1,{num_rows}) g''') + cur.execute( + f"""CREATE TABLE tbl AS SELECT 'long string to consume some space' || g + from generate_series(1,{num_rows}) g""" + ) cur.execute("CHECKPOINT") - lsn = query_scalar(cur, 'SELECT pg_current_wal_insert_lsn()') + lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()") log.info(f"start_backup_lsn = {lsn}") # Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq. # PgBin sets it automatically, but here we need to pipe psql output to the tar command. - psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')} + psql_env = {"LD_LIBRARY_PATH": os.path.join(str(pg_distrib_dir), "lib")} # Get and unpack fullbackup from pageserver restored_dir_path = env.repo_dir / "restored_datadir" @@ -42,13 +50,14 @@ def test_fullbackup(neon_env_builder: NeonEnvBuilder, cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query] result_basepath = pg_bin.run_capture(cmd, env=psql_env) tar_output_file = result_basepath + ".stdout" - subprocess_capture(str(env.repo_dir), - ["tar", "-xf", tar_output_file, "-C", str(restored_dir_path)]) + subprocess_capture( + str(env.repo_dir), ["tar", "-xf", tar_output_file, "-C", str(restored_dir_path)] + ) # HACK # fullbackup returns neon specific pg_control and first WAL segment # use resetwal to overwrite it - pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, 'pg_resetwal') + pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, "pg_resetwal") cmd = [pg_resetwal_path, "-D", str(restored_dir_path)] pg_bin.run_capture(cmd, env=psql_env) @@ -56,9 +65,11 @@ def test_fullbackup(neon_env_builder: NeonEnvBuilder, port = port_distributor.get_port() with VanillaPostgres(restored_dir_path, pg_bin, port, init=False) as vanilla_pg: # TODO make port an optional argument - vanilla_pg.configure([ - f"port={port}", - ]) + vanilla_pg.configure( + [ + f"port={port}", + ] + ) vanilla_pg.start() - num_rows_found = vanilla_pg.safe_psql('select count(*) from tbl;', user="cloud_admin")[0][0] + num_rows_found = vanilla_pg.safe_psql("select count(*) from tbl;", user="cloud_admin")[0][0] assert num_rows == num_rows_found diff --git a/test_runner/batch_others/test_gc_aggressive.py b/test_runner/batch_others/test_gc_aggressive.py index d7f6308182..be6b437e30 100644 --- a/test_runner/batch_others/test_gc_aggressive.py +++ b/test_runner/batch_others/test_gc_aggressive.py @@ -1,8 +1,8 @@ import asyncio import random -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres from fixtures.utils import query_scalar # Test configuration @@ -24,7 +24,7 @@ async def update_table(pg: Postgres): while updates_performed < updates_to_perform: updates_performed += 1 id = random.randrange(1, num_rows) - row = await pg_conn.fetchrow(f'UPDATE foo SET counter = counter + 1 WHERE id = {id}') + row = await pg_conn.fetchrow(f"UPDATE foo SET counter = counter + 1 WHERE id = {id}") # Perform aggressive GC with 0 horizon @@ -57,24 +57,26 @@ def test_gc_aggressive(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" env = neon_env_builder.init_start() env.neon_cli.create_branch("test_gc_aggressive", "main") - pg = env.postgres.create_start('test_gc_aggressive') - log.info('postgres is running on test_gc_aggressive branch') + pg = env.postgres.create_start("test_gc_aggressive") + log.info("postgres is running on test_gc_aggressive branch") with pg.cursor() as cur: timeline = query_scalar(cur, "SHOW neon.timeline_id") # Create table, and insert the first 100 rows - cur.execute('CREATE TABLE foo (id int, counter int, t text)') - cur.execute(f''' + cur.execute("CREATE TABLE foo (id int, counter int, t text)") + cur.execute( + f""" INSERT INTO foo SELECT g, 0, 'long string to consume some space' || g FROM generate_series(1, {num_rows}) g - ''') - cur.execute('CREATE INDEX ON foo(id)') + """ + ) + cur.execute("CREATE INDEX ON foo(id)") asyncio.run(update_and_gc(env, pg, timeline)) - cur.execute('SELECT COUNT(*), SUM(counter) FROM foo') + cur.execute("SELECT COUNT(*), SUM(counter) FROM foo") r = cur.fetchone() assert r is not None assert r == (num_rows, updates_to_perform) diff --git a/test_runner/batch_others/test_import.py b/test_runner/batch_others/test_import.py index 039945e5e4..a2671727f7 100644 --- a/test_runner/batch_others/test_import.py +++ b/test_runner/batch_others/test_import.py @@ -1,17 +1,24 @@ -import re -import pytest -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, PgBin, Postgres, wait_for_upload, wait_for_last_record_lsn -from fixtures.utils import lsn_from_hex -from uuid import UUID, uuid4 -import os -import tarfile -import shutil -from pathlib import Path import json -from fixtures.utils import subprocess_capture -from fixtures.log_helper import log +import os +import re +import shutil +import tarfile from contextlib import closing -from fixtures.neon_fixtures import pg_distrib_dir +from pathlib import Path +from uuid import UUID, uuid4 + +import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + NeonEnv, + NeonEnvBuilder, + PgBin, + Postgres, + pg_distrib_dir, + wait_for_last_record_lsn, + wait_for_upload, +) +from fixtures.utils import lsn_from_hex, subprocess_capture @pytest.mark.timeout(600) @@ -19,9 +26,11 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build # Put data in vanilla pg vanilla_pg.start() vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser") - vanilla_pg.safe_psql('''create table t as select 'long string to consume some space' || g - from generate_series(1,300000) g''') - assert vanilla_pg.safe_psql('select count(*) from t') == [(300000, )] + vanilla_pg.safe_psql( + """create table t as select 'long string to consume some space' || g + from generate_series(1,300000) g""" + ) + assert vanilla_pg.safe_psql("select count(*) from t") == [(300000,)] # Take basebackup basebackup_dir = os.path.join(test_output_dir, "basebackup") @@ -29,15 +38,17 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build wal_tar = os.path.join(basebackup_dir, "pg_wal.tar") os.mkdir(basebackup_dir) vanilla_pg.safe_psql("CHECKPOINT") - pg_bin.run([ - "pg_basebackup", - "-F", - "tar", - "-d", - vanilla_pg.connstr(), - "-D", - basebackup_dir, - ]) + pg_bin.run( + [ + "pg_basebackup", + "-F", + "tar", + "-d", + vanilla_pg.connstr(), + "-D", + basebackup_dir, + ] + ) # Make corrupt base tar with missing pg_control unpacked_base = os.path.join(basebackup_dir, "unpacked-base") @@ -45,9 +56,11 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build os.mkdir(unpacked_base, 0o750) subprocess_capture(str(test_output_dir), ["tar", "-xf", base_tar, "-C", unpacked_base]) os.remove(os.path.join(unpacked_base, "global/pg_control")) - subprocess_capture(str(test_output_dir), - ["tar", "-cf", "corrupt-base.tar"] + os.listdir(unpacked_base), - cwd=unpacked_base) + subprocess_capture( + str(test_output_dir), + ["tar", "-cf", "corrupt-base.tar"] + os.listdir(unpacked_base), + cwd=unpacked_base, + ) # Get start_lsn and end_lsn with open(os.path.join(basebackup_dir, "backup_manifest")) as f: @@ -65,24 +78,26 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build env.pageserver.http_client().tenant_create(tenant) def import_tar(base, wal): - env.neon_cli.raw_cli([ - "timeline", - "import", - "--tenant-id", - tenant.hex, - "--timeline-id", - timeline.hex, - "--node-name", - node_name, - "--base-lsn", - start_lsn, - "--base-tarfile", - base, - "--end-lsn", - end_lsn, - "--wal-tarfile", - wal, - ]) + env.neon_cli.raw_cli( + [ + "timeline", + "import", + "--tenant-id", + tenant.hex, + "--timeline-id", + timeline.hex, + "--node-name", + node_name, + "--base-lsn", + start_lsn, + "--base-tarfile", + base, + "--end-lsn", + end_lsn, + "--wal-tarfile", + wal, + ] + ) # Importing corrupt backup fails with pytest.raises(Exception): @@ -102,7 +117,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build # Check it worked pg = env.postgres.create_start(node_name, tenant_id=tenant) - assert pg.safe_psql('select count(*) from t') == [(300000, )] + assert pg.safe_psql("select count(*) from t") == [(300000,)] @pytest.mark.timeout(600) @@ -111,8 +126,8 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu neon_env_builder.enable_local_fs_remote_storage() env = neon_env_builder.init_start() - timeline = env.neon_cli.create_branch('test_import_from_pageserver_small') - pg = env.postgres.create_start('test_import_from_pageserver_small') + timeline = env.neon_cli.create_branch("test_import_from_pageserver_small") + pg = env.postgres.create_start("test_import_from_pageserver_small") num_rows = 3000 lsn = _generate_data(num_rows, pg) @@ -129,8 +144,8 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne neon_env_builder.enable_local_fs_remote_storage() env = neon_env_builder.init_start() - timeline = env.neon_cli.create_branch('test_import_from_pageserver_multisegment') - pg = env.postgres.create_start('test_import_from_pageserver_multisegment') + timeline = env.neon_cli.create_branch("test_import_from_pageserver_multisegment") + pg = env.postgres.create_start("test_import_from_pageserver_multisegment") # For `test_import_from_pageserver_multisegment`, we want to make sure that the data # is large enough to create multi-segment files. Typically, a segment file's size is @@ -139,8 +154,9 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne num_rows = 30000000 lsn = _generate_data(num_rows, pg) - logical_size = env.pageserver.http_client().timeline_detail( - env.initial_tenant, timeline)['local']['current_logical_size'] + logical_size = env.pageserver.http_client().timeline_detail(env.initial_tenant, timeline)[ + "local" + ]["current_logical_size"] log.info(f"timeline logical size = {logical_size / (1024 ** 2)}MB") assert logical_size > 1024**3 # = 1GB @@ -148,7 +164,7 @@ def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: Ne # Check if the backup data contains multiple segment files cnt_seg_files = 0 - segfile_re = re.compile('[0-9]+\\.[0-9]+') + segfile_re = re.compile("[0-9]+\\.[0-9]+") with tarfile.open(tar_output_file, "r") as tar_f: for f in tar_f.getnames(): if segfile_re.search(f) is not None: @@ -166,11 +182,13 @@ def _generate_data(num_rows: int, pg: Postgres) -> str: with conn.cursor() as cur: # data loading may take a while, so increase statement timeout cur.execute("SET statement_timeout='300s'") - cur.execute(f'''CREATE TABLE tbl AS SELECT 'long string to consume some space' || g - from generate_series(1,{num_rows}) g''') + cur.execute( + f"""CREATE TABLE tbl AS SELECT 'long string to consume some space' || g + from generate_series(1,{num_rows}) g""" + ) cur.execute("CHECKPOINT") - cur.execute('SELECT pg_current_wal_insert_lsn()') + cur.execute("SELECT pg_current_wal_insert_lsn()") res = cur.fetchone() assert res is not None and isinstance(res[0], str) return res[0] @@ -189,7 +207,7 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel # Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq. # PgBin sets it automatically, but here we need to pipe psql output to the tar command. - psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')} + psql_env = {"LD_LIBRARY_PATH": os.path.join(str(pg_distrib_dir), "lib")} # Get a fullbackup from pageserver query = f"fullbackup { env.initial_tenant.hex} {timeline.hex} {lsn}" @@ -201,11 +219,11 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel env.postgres.stop_all() env.pageserver.stop() - dir_to_clear = Path(env.repo_dir) / 'tenants' + dir_to_clear = Path(env.repo_dir) / "tenants" shutil.rmtree(dir_to_clear) os.mkdir(dir_to_clear) - #start the pageserver again + # start the pageserver again env.pageserver.start() # Import using another tenantid, because we use the same pageserver. @@ -216,20 +234,22 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel node_name = "import_from_pageserver" client = env.pageserver.http_client() client.tenant_create(tenant) - env.neon_cli.raw_cli([ - "timeline", - "import", - "--tenant-id", - tenant.hex, - "--timeline-id", - timeline.hex, - "--node-name", - node_name, - "--base-lsn", - lsn, - "--base-tarfile", - os.path.join(tar_output_file), - ]) + env.neon_cli.raw_cli( + [ + "timeline", + "import", + "--tenant-id", + tenant.hex, + "--timeline-id", + timeline.hex, + "--node-name", + node_name, + "--base-lsn", + lsn, + "--base-tarfile", + os.path.join(tar_output_file), + ] + ) # Wait for data to land in s3 wait_for_last_record_lsn(client, tenant, timeline, lsn_from_hex(lsn)) @@ -237,7 +257,7 @@ def _import(expected_num_rows: int, lsn: str, env: NeonEnv, pg_bin: PgBin, timel # Check it worked pg = env.postgres.create_start(node_name, tenant_id=tenant) - assert pg.safe_psql('select count(*) from tbl') == [(expected_num_rows, )] + assert pg.safe_psql("select count(*) from tbl") == [(expected_num_rows,)] # Take another fullbackup query = f"fullbackup { tenant.hex} {timeline.hex} {lsn}" diff --git a/test_runner/batch_others/test_large_schema.py b/test_runner/batch_others/test_large_schema.py index 18ae0614a9..f14265f6fd 100644 --- a/test_runner/batch_others/test_large_schema.py +++ b/test_runner/batch_others/test_large_schema.py @@ -1,7 +1,8 @@ -import time import os -from fixtures.neon_fixtures import NeonEnvBuilder +import time + from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder # This test creates large number of tables which cause large catalog. @@ -14,7 +15,7 @@ from fixtures.log_helper import log def test_large_schema(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - pg = env.postgres.create_start('main') + pg = env.postgres.create_start("main") conn = pg.connect() cur = conn.cursor() @@ -22,7 +23,7 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): tables = 2 # 10 is too much for debug build partitions = 1000 for i in range(1, tables + 1): - print(f'iteration {i} / {tables}') + print(f"iteration {i} / {tables}") # Restart compute. Restart is actually not strictly needed. # It is done mostly because this test originally tries to model the problem reported by Ketteq. @@ -52,10 +53,10 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): # It's normal that it takes some time for the pageserver to # restart, and for the connection to fail until it does. It # should eventually recover, so retry until it succeeds. - print(f'failed: {error}') + print(f"failed: {error}") if retries < max_retries: retries += 1 - print(f'retry {retries} / {max_retries}') + print(f"retry {retries} / {max_retries}") time.sleep(retry_sleep) continue else: @@ -67,7 +68,7 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): for i in range(1, tables + 1): cur.execute(f"SELECT count(*) FROM t_{i}") - assert cur.fetchone() == (partitions, ) + assert cur.fetchone() == (partitions,) cur.execute("set enable_sort=off") cur.execute("select * from pg_depend order by refclassid, refobjid, refobjsubid") @@ -77,6 +78,6 @@ def test_large_schema(neon_env_builder: NeonEnvBuilder): timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] timeline_path = "{}/tenants/{}/timelines/{}/".format(env.repo_dir, tenant_id, timeline_id) for filename in os.listdir(timeline_path): - if filename.startswith('00000'): - log.info(f'layer {filename} size is {os.path.getsize(timeline_path + filename)}') + if filename.startswith("00000"): + log.info(f"layer {filename} size is {os.path.getsize(timeline_path + filename)}") assert os.path.getsize(timeline_path + filename) < 512_000_000 diff --git a/test_runner/batch_others/test_lsn_mapping.py b/test_runner/batch_others/test_lsn_mapping.py index d8b207135e..4db6951b42 100644 --- a/test_runner/batch_others/test_lsn_mapping.py +++ b/test_runner/batch_others/test_lsn_mapping.py @@ -1,13 +1,13 @@ +import math +import time from contextlib import closing from datetime import timedelta, timezone, tzinfo -import math from uuid import UUID -import psycopg2.extras -import psycopg2.errors -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres -from fixtures.log_helper import log -import time +import psycopg2.errors +import psycopg2.extras +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres from fixtures.utils import query_scalar @@ -18,7 +18,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 1 env = neon_env_builder.init_start() - new_timeline_id = env.neon_cli.create_branch('test_lsn_mapping') + new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping") pgmain = env.postgres.create_start("test_lsn_mapping") log.info("postgres is running on 'test_lsn_mapping' branch") @@ -35,7 +35,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): for i in range(1000): cur.execute(f"INSERT INTO foo VALUES({i})") # Get the timestamp at UTC - after_timestamp = query_scalar(cur, 'SELECT clock_timestamp()').replace(tzinfo=None) + after_timestamp = query_scalar(cur, "SELECT clock_timestamp()").replace(tzinfo=None) tbl.append([i, after_timestamp]) # Execute one more transaction with synchronous_commit enabled, to flush @@ -47,17 +47,17 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): probe_timestamp = tbl[-1][1] + timedelta(hours=1) result = query_scalar( ps_cur, - f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'" + f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'", ) - assert result == 'future' + assert result == "future" # timestamp too the far history probe_timestamp = tbl[0][1] - timedelta(hours=10) result = query_scalar( ps_cur, - f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'" + f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'", ) - assert result == 'past' + assert result == "past" # Probe a bunch of timestamps in the valid range for i in range(1, len(tbl), 100): @@ -66,14 +66,14 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): # Call get_lsn_by_timestamp to get the LSN lsn = query_scalar( ps_cur, - f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'" + f"get_lsn_by_timestamp {env.initial_tenant.hex} {new_timeline_id.hex} '{probe_timestamp.isoformat()}Z'", ) # Launch a new read-only node at that LSN, and check that only the rows # that were supposed to be committed at that point in time are visible. - pg_here = env.postgres.create_start(branch_name='test_lsn_mapping', - node_name='test_lsn_mapping_read', - lsn=lsn) + pg_here = env.postgres.create_start( + branch_name="test_lsn_mapping", node_name="test_lsn_mapping_read", lsn=lsn + ) assert pg_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i pg_here.stop_and_destroy() diff --git a/test_runner/batch_others/test_multixact.py b/test_runner/batch_others/test_multixact.py index dd00066092..635beb16b7 100644 --- a/test_runner/batch_others/test_multixact.py +++ b/test_runner/batch_others/test_multixact.py @@ -1,5 +1,5 @@ -from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content from fixtures.utils import query_scalar @@ -11,18 +11,21 @@ from fixtures.utils import query_scalar # def test_multixact(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env - env.neon_cli.create_branch('test_multixact', 'empty') - pg = env.postgres.create_start('test_multixact') + env.neon_cli.create_branch("test_multixact", "empty") + pg = env.postgres.create_start("test_multixact") log.info("postgres is running on 'test_multixact' branch") cur = pg.connect().cursor() - cur.execute(''' + cur.execute( + """ CREATE TABLE t1(i int primary key); INSERT INTO t1 select * from generate_series(1, 100); - ''') + """ + ) - next_multixact_id_old = query_scalar(cur, - 'SELECT next_multixact_id FROM pg_control_checkpoint()') + next_multixact_id_old = query_scalar( + cur, "SELECT next_multixact_id FROM pg_control_checkpoint()" + ) # Lock entries using parallel connections in a round-robin fashion. nclients = 20 @@ -40,17 +43,18 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir): for i in range(5000): conn = connections[i % nclients] conn.commit() - conn.cursor().execute('select * from t1 for key share') + conn.cursor().execute("select * from t1 for key share") # We have multixacts now. We can close the connections. for c in connections: c.close() # force wal flush - cur.execute('checkpoint') + cur.execute("checkpoint") cur.execute( - 'SELECT next_multixact_id, pg_current_wal_insert_lsn() FROM pg_control_checkpoint()') + "SELECT next_multixact_id, pg_current_wal_insert_lsn() FROM pg_control_checkpoint()" + ) res = cur.fetchone() assert res is not None next_multixact_id = res[0] @@ -60,12 +64,13 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir): assert int(next_multixact_id) > int(next_multixact_id_old) # Branch at this point - env.neon_cli.create_branch('test_multixact_new', 'test_multixact', ancestor_start_lsn=lsn) - pg_new = env.postgres.create_start('test_multixact_new') + env.neon_cli.create_branch("test_multixact_new", "test_multixact", ancestor_start_lsn=lsn) + pg_new = env.postgres.create_start("test_multixact_new") log.info("postgres is running on 'test_multixact_new' branch") next_multixact_id_new = pg_new.safe_psql( - 'SELECT next_multixact_id FROM pg_control_checkpoint()')[0][0] + "SELECT next_multixact_id FROM pg_control_checkpoint()" + )[0][0] # Check that we restored pg_controlfile correctly assert next_multixact_id_new == next_multixact_id diff --git a/test_runner/batch_others/test_neon_cli.py b/test_runner/batch_others/test_neon_cli.py index 728bc7b894..1acfa72127 100644 --- a/test_runner/batch_others/test_neon_cli.py +++ b/test_runner/batch_others/test_neon_cli.py @@ -1,21 +1,29 @@ import uuid -import requests - -from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient from typing import cast +import requests +from fixtures.neon_fixtures import ( + DEFAULT_BRANCH_NAME, + NeonEnv, + NeonEnvBuilder, + NeonPageserverHttpClient, +) -def helper_compare_timeline_list(pageserver_http_client: NeonPageserverHttpClient, - env: NeonEnv, - initial_tenant: uuid.UUID): + +def helper_compare_timeline_list( + pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv, initial_tenant: uuid.UUID +): """ Compare timelines list returned by CLI and directly via API. Filters out timelines created by other tests. """ timelines_api = sorted( - map(lambda t: cast(str, t['timeline_id']), - pageserver_http_client.timeline_list(initial_tenant))) + map( + lambda t: cast(str, t["timeline_id"]), + pageserver_http_client.timeline_list(initial_tenant), + ) + ) timelines_cli = env.neon_cli.list_timelines() assert timelines_cli == env.neon_cli.list_timelines(initial_tenant) @@ -32,12 +40,13 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv): helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant) # Create a branch for us - main_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_main') + main_timeline_id = env.neon_cli.create_branch("test_cli_branch_list_main") helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant) # Create a nested branch - nested_timeline_id = env.neon_cli.create_branch('test_cli_branch_list_nested', - 'test_cli_branch_list_main') + nested_timeline_id = env.neon_cli.create_branch( + "test_cli_branch_list_nested", "test_cli_branch_list_main" + ) helper_compare_timeline_list(pageserver_http_client, env, env.initial_tenant) # Check that all new branches are visible via CLI @@ -49,7 +58,7 @@ def test_cli_timeline_list(neon_simple_env: NeonEnv): def helper_compare_tenant_list(pageserver_http_client: NeonPageserverHttpClient, env: NeonEnv): tenants = pageserver_http_client.tenant_list() - tenants_api = sorted(map(lambda t: cast(str, t['id']), tenants)) + tenants_api = sorted(map(lambda t: cast(str, t["id"]), tenants)) res = env.neon_cli.list_tenants() tenants_cli = sorted(map(lambda t: t.split()[0], res.stdout.splitlines())) @@ -97,7 +106,7 @@ def test_cli_ipv4_listeners(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() # Connect to sk port on v4 loopback - res = requests.get(f'http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status') + res = requests.get(f"http://127.0.0.1:{env.safekeepers[0].port.http}/v1/status") assert res.ok # FIXME Test setup is using localhost:xx in ps config. diff --git a/test_runner/batch_others/test_next_xid.py b/test_runner/batch_others/test_next_xid.py index f8d11a9381..698ea0e1d3 100644 --- a/test_runner/batch_others/test_next_xid.py +++ b/test_runner/batch_others/test_next_xid.py @@ -8,15 +8,15 @@ from fixtures.neon_fixtures import NeonEnvBuilder def test_next_xid(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - pg = env.postgres.create_start('main') + pg = env.postgres.create_start("main") conn = pg.connect() cur = conn.cursor() - cur.execute('CREATE TABLE t(x integer)') + cur.execute("CREATE TABLE t(x integer)") iterations = 32 for i in range(1, iterations + 1): - print(f'iteration {i} / {iterations}') + print(f"iteration {i} / {iterations}") # Kill and restart the pageserver. pg.stop() @@ -38,10 +38,10 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder): # It's normal that it takes some time for the pageserver to # restart, and for the connection to fail until it does. It # should eventually recover, so retry until it succeeds. - print(f'failed: {error}') + print(f"failed: {error}") if retries < max_retries: retries += 1 - print(f'retry {retries} / {max_retries}') + print(f"retry {retries} / {max_retries}") time.sleep(retry_sleep) continue else: @@ -51,4 +51,4 @@ def test_next_xid(neon_env_builder: NeonEnvBuilder): conn = pg.connect() cur = conn.cursor() cur.execute("SELECT count(*) FROM t") - assert cur.fetchone() == (iterations, ) + assert cur.fetchone() == (iterations,) diff --git a/test_runner/batch_others/test_normal_work.py b/test_runner/batch_others/test_normal_work.py index 5b25691517..002d697288 100644 --- a/test_runner/batch_others/test_normal_work.py +++ b/test_runner/batch_others/test_normal_work.py @@ -1,33 +1,35 @@ +import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient -import pytest def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient): tenant_id, timeline_id = env.neon_cli.create_tenant() - pg = env.postgres.create_start('main', tenant_id=tenant_id) + pg = env.postgres.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - res_1 = pg.safe_psql_many(queries=[ - 'CREATE TABLE t(key int primary key, value text)', - 'INSERT INTO t SELECT generate_series(1,100000), \'payload\'', - 'SELECT sum(key) FROM t', - ]) + res_1 = pg.safe_psql_many( + queries=[ + "CREATE TABLE t(key int primary key, value text)", + "INSERT INTO t SELECT generate_series(1,100000), 'payload'", + "SELECT sum(key) FROM t", + ] + ) - assert res_1[-1][0] == (5000050000, ) + assert res_1[-1][0] == (5000050000,) # TODO check detach on live instance log.info("stopping compute") pg.stop() log.info("compute stopped") pg.start() - res_2 = pg.safe_psql('SELECT sum(key) FROM t') - assert res_2[0] == (5000050000, ) + res_2 = pg.safe_psql("SELECT sum(key) FROM t") + assert res_2[0] == (5000050000,) pg.stop() pageserver_http.tenant_detach(tenant_id) -@pytest.mark.parametrize('num_timelines,num_safekeepers', [(3, 1)]) +@pytest.mark.parametrize("num_timelines,num_safekeepers", [(3, 1)]) def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_safekeepers: int): """ Basic test: diff --git a/test_runner/batch_others/test_old_request_lsn.py b/test_runner/batch_others/test_old_request_lsn.py index 78a936af19..257913ef3f 100644 --- a/test_runner/batch_others/test_old_request_lsn.py +++ b/test_runner/batch_others/test_old_request_lsn.py @@ -1,7 +1,7 @@ -from fixtures.neon_fixtures import NeonEnvBuilder -from fixtures.log_helper import log -from fixtures.utils import print_gc_result, query_scalar import psycopg2.extras +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.utils import print_gc_result, query_scalar # @@ -19,8 +19,8 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder): neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}" env = neon_env_builder.init_start() env.neon_cli.create_branch("test_old_request_lsn", "main") - pg = env.postgres.create_start('test_old_request_lsn') - log.info('postgres is running on test_old_request_lsn branch') + pg = env.postgres.create_start("test_old_request_lsn") + log.info("postgres is running on test_old_request_lsn branch") pg_conn = pg.connect() cur = pg_conn.cursor() @@ -33,25 +33,29 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder): # Create table, and insert some rows. Make it big enough that it doesn't fit in # shared_buffers. - cur.execute('CREATE TABLE foo (id int4 PRIMARY KEY, val int, t text)') - cur.execute(''' + cur.execute("CREATE TABLE foo (id int4 PRIMARY KEY, val int, t text)") + cur.execute( + """ INSERT INTO foo SELECT g, 1, 'long string to consume some space' || g FROM generate_series(1, 100000) g - ''') + """ + ) # Verify that the table is larger than shared_buffers, so that the SELECT below # will cause GetPage requests. - cur.execute(''' + cur.execute( + """ select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_ize from pg_settings where name = 'shared_buffers' - ''') + """ + ) row = cur.fetchone() assert row is not None - log.info(f'shared_buffers is {row[0]}, table size {row[1]}') + log.info(f"shared_buffers is {row[0]}, table size {row[1]}") assert int(row[0]) < int(row[1]) - cur.execute('VACUUM foo') + cur.execute("VACUUM foo") # Make a lot of updates on a single row, generating a lot of WAL. Trigger # garbage collections so that the page server will remove old page versions. @@ -61,7 +65,7 @@ def test_old_request_lsn(neon_env_builder: NeonEnvBuilder): print_gc_result(row) for j in range(100): - cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;') + cur.execute("UPDATE foo SET val = val + 1 WHERE id = 1;") # All (or at least most of) the updates should've been on the same page, so # that we haven't had to evict any dirty pages for a long time. Now run diff --git a/test_runner/batch_others/test_pageserver_api.py b/test_runner/batch_others/test_pageserver_api.py index 710b220ae8..5d7619c1b2 100644 --- a/test_runner/batch_others/test_pageserver_api.py +++ b/test_runner/batch_others/test_pageserver_api.py @@ -1,54 +1,65 @@ -from typing import Optional -from uuid import uuid4, UUID -import pytest -import pathlib import os +import pathlib import subprocess -from fixtures.utils import lsn_from_hex +from typing import Optional +from uuid import UUID, uuid4 + +import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( DEFAULT_BRANCH_NAME, NeonEnv, NeonEnvBuilder, - NeonPageserverHttpClient, NeonPageserverApiException, - wait_until, + NeonPageserverHttpClient, neon_binpath, pg_distrib_dir, + wait_until, ) +from fixtures.utils import lsn_from_hex # test that we cannot override node id after init def test_pageserver_init_node_id(neon_simple_env: NeonEnv): repo_dir = neon_simple_env.repo_dir - pageserver_config = repo_dir / 'pageserver.toml' - pageserver_bin = pathlib.Path(neon_binpath) / 'pageserver' - run_pageserver = lambda args: subprocess.run([str(pageserver_bin), '-D', str(repo_dir), *args], - check=False, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + pageserver_config = repo_dir / "pageserver.toml" + pageserver_bin = pathlib.Path(neon_binpath) / "pageserver" + run_pageserver = lambda args: subprocess.run( + [str(pageserver_bin), "-D", str(repo_dir), *args], + check=False, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) # remove initial config pageserver_config.unlink() - bad_init = run_pageserver(['--init', '-c', f'pg_distrib_dir="{pg_distrib_dir}"']) - assert bad_init.returncode == 1, 'pageserver should not be able to init new config without the node id' + bad_init = run_pageserver(["--init", "-c", f'pg_distrib_dir="{pg_distrib_dir}"']) + assert ( + bad_init.returncode == 1 + ), "pageserver should not be able to init new config without the node id" assert "missing id" in bad_init.stderr - assert not pageserver_config.exists(), 'config file should not be created after init error' + assert not pageserver_config.exists(), "config file should not be created after init error" completed_init = run_pageserver( - ['--init', '-c', 'id = 12345', '-c', f'pg_distrib_dir="{pg_distrib_dir}"']) - assert completed_init.returncode == 0, 'pageserver should be able to create a new config with the node id given' - assert pageserver_config.exists(), 'config file should be created successfully' + ["--init", "-c", "id = 12345", "-c", f'pg_distrib_dir="{pg_distrib_dir}"'] + ) + assert ( + completed_init.returncode == 0 + ), "pageserver should be able to create a new config with the node id given" + assert pageserver_config.exists(), "config file should be created successfully" bad_reinit = run_pageserver( - ['--init', '-c', 'id = 12345', '-c', f'pg_distrib_dir="{pg_distrib_dir}"']) - assert bad_reinit.returncode == 1, 'pageserver should not be able to init new config without the node id' + ["--init", "-c", "id = 12345", "-c", f'pg_distrib_dir="{pg_distrib_dir}"'] + ) + assert ( + bad_reinit.returncode == 1 + ), "pageserver should not be able to init new config without the node id" assert "already exists, cannot init it" in bad_reinit.stderr - bad_update = run_pageserver(['--update-config', '-c', 'id = 3']) - assert bad_update.returncode == 1, 'pageserver should not allow updating node id' + bad_update = run_pageserver(["--update-config", "-c", "id = 3"]) + assert bad_update.returncode == 1, "pageserver should not allow updating node id" assert "has node id already, it cannot be overridden" in bad_update.stderr @@ -56,12 +67,12 @@ def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID): client.check_status() # check initial tenant is there - assert initial_tenant.hex in {t['id'] for t in client.tenant_list()} + assert initial_tenant.hex in {t["id"] for t in client.tenant_list()} # create new tenant and check it is also there tenant_id = uuid4() client.tenant_create(tenant_id) - assert tenant_id.hex in {t['id'] for t in client.tenant_list()} + assert tenant_id.hex in {t["id"] for t in client.tenant_list()} timelines = client.timeline_list(tenant_id) assert len(timelines) == 0, "initial tenant should not have any timelines" @@ -74,19 +85,21 @@ def check_client(client: NeonPageserverHttpClient, initial_tenant: UUID): assert len(timelines) > 0 # check it is there - assert timeline_id.hex in {b['timeline_id'] for b in client.timeline_list(tenant_id)} + assert timeline_id.hex in {b["timeline_id"] for b in client.timeline_list(tenant_id)} for timeline in timelines: - timeline_id_str = str(timeline['timeline_id']) - timeline_details = client.timeline_detail(tenant_id=tenant_id, - timeline_id=UUID(timeline_id_str), - include_non_incremental_logical_size=True) + timeline_id_str = str(timeline["timeline_id"]) + timeline_details = client.timeline_detail( + tenant_id=tenant_id, + timeline_id=UUID(timeline_id_str), + include_non_incremental_logical_size=True, + ) - assert timeline_details['tenant_id'] == tenant_id.hex - assert timeline_details['timeline_id'] == timeline_id_str + assert timeline_details["tenant_id"] == tenant_id.hex + assert timeline_details["timeline_id"] == timeline_id_str - local_timeline_details = timeline_details.get('local') + local_timeline_details = timeline_details.get("local") assert local_timeline_details is not None - assert local_timeline_details['timeline_state'] == 'Loaded' + assert local_timeline_details["timeline_state"] == "Loaded" def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv): @@ -94,32 +107,43 @@ def test_pageserver_http_get_wal_receiver_not_found(neon_simple_env: NeonEnv): with env.pageserver.http_client() as client: tenant_id, timeline_id = env.neon_cli.create_tenant() - timeline_details = client.timeline_detail(tenant_id=tenant_id, - timeline_id=timeline_id, - include_non_incremental_logical_size=True) + timeline_details = client.timeline_detail( + tenant_id=tenant_id, timeline_id=timeline_id, include_non_incremental_logical_size=True + ) - assert timeline_details.get('wal_source_connstr') is None, 'Should not be able to connect to WAL streaming without PG compute node running' - assert timeline_details.get('last_received_msg_lsn') is None, 'Should not be able to connect to WAL streaming without PG compute node running' - assert timeline_details.get('last_received_msg_ts') is None, 'Should not be able to connect to WAL streaming without PG compute node running' + assert ( + timeline_details.get("wal_source_connstr") is None + ), "Should not be able to connect to WAL streaming without PG compute node running" + assert ( + timeline_details.get("last_received_msg_lsn") is None + ), "Should not be able to connect to WAL streaming without PG compute node running" + assert ( + timeline_details.get("last_received_msg_ts") is None + ), "Should not be able to connect to WAL streaming without PG compute node running" -def expect_updated_msg_lsn(client: NeonPageserverHttpClient, - tenant_id: UUID, - timeline_id: UUID, - prev_msg_lsn: Optional[int]) -> int: +def expect_updated_msg_lsn( + client: NeonPageserverHttpClient, + tenant_id: UUID, + timeline_id: UUID, + prev_msg_lsn: Optional[int], +) -> int: timeline_details = client.timeline_detail(tenant_id, timeline_id=timeline_id) # a successful `timeline_details` response must contain the below fields - local_timeline_details = timeline_details['local'] + local_timeline_details = timeline_details["local"] assert "wal_source_connstr" in local_timeline_details.keys() assert "last_received_msg_lsn" in local_timeline_details.keys() assert "last_received_msg_ts" in local_timeline_details.keys() - assert local_timeline_details["last_received_msg_lsn"] is not None, "the last received message's LSN is empty" + assert ( + local_timeline_details["last_received_msg_lsn"] is not None + ), "the last received message's LSN is empty" last_msg_lsn = lsn_from_hex(local_timeline_details["last_received_msg_lsn"]) - assert prev_msg_lsn is None or prev_msg_lsn < last_msg_lsn, \ - f"the last received message's LSN {last_msg_lsn} hasn't been updated \ + assert ( + prev_msg_lsn is None or prev_msg_lsn < last_msg_lsn + ), f"the last received message's LSN {last_msg_lsn} hasn't been updated \ compared to the previous message's LSN {prev_msg_lsn}" return last_msg_lsn @@ -139,15 +163,19 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv): # We need to wait here because it's possible that we don't have access to # the latest WAL yet, when the `timeline_detail` API is first called. # See: https://github.com/neondatabase/neon/issues/1768. - lsn = wait_until(number_of_iterations=5, - interval=1, - func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, None)) + lsn = wait_until( + number_of_iterations=5, + interval=1, + func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, None), + ) # Make a DB modification then expect getting a new WAL receiver's data. pg.safe_psql("CREATE TABLE t(key int primary key, value text)") - wait_until(number_of_iterations=5, - interval=1, - func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, lsn)) + wait_until( + number_of_iterations=5, + interval=1, + func=lambda: expect_updated_msg_lsn(client, tenant_id, timeline_id, lsn), + ) def test_pageserver_http_api_client(neon_simple_env: NeonEnv): diff --git a/test_runner/batch_others/test_pageserver_catchup.py b/test_runner/batch_others/test_pageserver_catchup.py index dd24351e17..cba3203591 100644 --- a/test_runner/batch_others/test_pageserver_catchup.py +++ b/test_runner/batch_others/test_pageserver_catchup.py @@ -9,24 +9,27 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder) neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_pageserver_catchup_while_compute_down') + env.neon_cli.create_branch("test_pageserver_catchup_while_compute_down") # Make shared_buffers large to ensure we won't query pageserver while it is down. - pg = env.postgres.create_start('test_pageserver_catchup_while_compute_down', - config_lines=['shared_buffers=512MB']) + pg = env.postgres.create_start( + "test_pageserver_catchup_while_compute_down", config_lines=["shared_buffers=512MB"] + ) pg_conn = pg.connect() cur = pg_conn.cursor() # Create table, and insert some rows. - cur.execute('CREATE TABLE foo (t text)') - cur.execute(''' + cur.execute("CREATE TABLE foo (t text)") + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10000) g - ''') + """ + ) cur.execute("SELECT count(*) FROM foo") - assert cur.fetchone() == (10000, ) + assert cur.fetchone() == (10000,) # Stop and restart pageserver. This is a more or less graceful shutdown, although # the page server doesn't currently have a shutdown routine so there's no difference @@ -35,11 +38,13 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder) # insert some more rows # since pageserver is shut down, these will be only on safekeepers - cur.execute(''' + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10000) g - ''') + """ + ) # stop safekeepers gracefully env.safekeepers[0].stop() @@ -54,11 +59,11 @@ def test_pageserver_catchup_while_compute_down(neon_env_builder: NeonEnvBuilder) env.safekeepers[2].start() # restart compute node - pg.stop_and_destroy().create_start('test_pageserver_catchup_while_compute_down') + pg.stop_and_destroy().create_start("test_pageserver_catchup_while_compute_down") # Ensure that basebackup went correct and pageserver returned all data pg_conn = pg.connect() cur = pg_conn.cursor() cur.execute("SELECT count(*) FROM foo") - assert cur.fetchone() == (20000, ) + assert cur.fetchone() == (20000,) diff --git a/test_runner/batch_others/test_pageserver_restart.py b/test_runner/batch_others/test_pageserver_restart.py index c656469cb7..e2bd8be9b7 100644 --- a/test_runner/batch_others/test_pageserver_restart.py +++ b/test_runner/batch_others/test_pageserver_restart.py @@ -1,5 +1,5 @@ -from fixtures.neon_fixtures import NeonEnvBuilder from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder # Test restarting page server, while safekeeper and compute node keep @@ -7,8 +7,8 @@ from fixtures.log_helper import log def test_pageserver_restart(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_pageserver_restart') - pg = env.postgres.create_start('test_pageserver_restart') + env.neon_cli.create_branch("test_pageserver_restart") + pg = env.postgres.create_start("test_pageserver_restart") pg_conn = pg.connect() cur = pg_conn.cursor() @@ -17,18 +17,22 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder): # shared_buffers, otherwise the SELECT after restart will just return answer # from shared_buffers without hitting the page server, which defeats the point # of this test. - cur.execute('CREATE TABLE foo (t text)') - cur.execute(''' + cur.execute("CREATE TABLE foo (t text)") + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g - ''') + """ + ) # Verify that the table is larger than shared_buffers - cur.execute(''' + cur.execute( + """ select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_ize from pg_settings where name = 'shared_buffers' - ''') + """ + ) row = cur.fetchone() assert row is not None log.info(f"shared_buffers is {row[0]}, table size {row[1]}") @@ -49,7 +53,7 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder): cur = pg_conn.cursor() cur.execute("SELECT count(*) FROM foo") - assert cur.fetchone() == (100000, ) + assert cur.fetchone() == (100000,) # Stop the page server by force, and restart it env.pageserver.stop() diff --git a/test_runner/batch_others/test_parallel_copy.py b/test_runner/batch_others/test_parallel_copy.py index 55947fe427..6b7fe4fdda 100644 --- a/test_runner/batch_others/test_parallel_copy.py +++ b/test_runner/batch_others/test_parallel_copy.py @@ -1,7 +1,8 @@ -from io import BytesIO import asyncio -from fixtures.neon_fixtures import NeonEnv, Postgres +from io import BytesIO + from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, Postgres async def repeat_bytes(buf, repetitions: int): @@ -13,7 +14,8 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str) buf = BytesIO() for i in range(1000): buf.write( - f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()) + f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode() + ) buf.seek(0) copy_input = repeat_bytes(buf.read(), 5000) @@ -30,7 +32,7 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str) async def parallel_load_same_table(pg: Postgres, n_parallel: int): workers = [] for worker_id in range(n_parallel): - worker = copy_test_data_to_table(pg, worker_id, f'copytest') + worker = copy_test_data_to_table(pg, worker_id, f"copytest") workers.append(asyncio.create_task(worker)) # await all workers @@ -41,13 +43,13 @@ async def parallel_load_same_table(pg: Postgres, n_parallel: int): def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5): env = neon_simple_env env.neon_cli.create_branch("test_parallel_copy", "empty") - pg = env.postgres.create_start('test_parallel_copy') + pg = env.postgres.create_start("test_parallel_copy") log.info("postgres is running on 'test_parallel_copy' branch") # Create test table conn = pg.connect() cur = conn.cursor() - cur.execute(f'CREATE TABLE copytest (i int, t text)') + cur.execute(f"CREATE TABLE copytest (i int, t text)") # Run COPY TO to load the table with parallel connections. asyncio.run(parallel_load_same_table(pg, n_parallel)) diff --git a/test_runner/batch_others/test_pitr_gc.py b/test_runner/batch_others/test_pitr_gc.py index d63fc4b584..1fc18ebbc4 100644 --- a/test_runner/batch_others/test_pitr_gc.py +++ b/test_runner/batch_others/test_pitr_gc.py @@ -2,8 +2,8 @@ from contextlib import closing import psycopg2.extras from fixtures.log_helper import log -from fixtures.utils import print_gc_result, query_scalar from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.utils import print_gc_result, query_scalar # @@ -14,10 +14,12 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 1 # Set pitr interval such that we need to keep the data - neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '1 day', gc_horizon = 0}" + neon_env_builder.pageserver_config_override = ( + "tenant_config={pitr_interval = '1 day', gc_horizon = 0}" + ) env = neon_env_builder.init_start() - pgmain = env.postgres.create_start('main') + pgmain = env.postgres.create_start("main") log.info("postgres is running on 'main' branch") main_pg_conn = pgmain.connect() @@ -25,30 +27,32 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder): timeline = query_scalar(main_cur, "SHOW neon.timeline_id") # Create table - main_cur.execute('CREATE TABLE foo (t text)') + main_cur.execute("CREATE TABLE foo (t text)") for i in range(10000): - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space'; - ''') + """ + ) if i == 99: # keep some early lsn to test branch creation after GC - main_cur.execute('SELECT pg_current_wal_insert_lsn(), txid_current()') + main_cur.execute("SELECT pg_current_wal_insert_lsn(), txid_current()") res = main_cur.fetchone() assert res is not None lsn_a = res[0] xid_a = res[1] - log.info(f'LSN after 100 rows: {lsn_a} xid {xid_a}') + log.info(f"LSN after 100 rows: {lsn_a} xid {xid_a}") - main_cur.execute('SELECT pg_current_wal_insert_lsn(), txid_current()') + main_cur.execute("SELECT pg_current_wal_insert_lsn(), txid_current()") res = main_cur.fetchone() assert res is not None debug_lsn = res[0] debug_xid = res[1] - log.info(f'LSN after 10000 rows: {debug_lsn} xid {debug_xid}') + log.info(f"LSN after 10000 rows: {debug_lsn} xid {debug_xid}") # run GC with closing(env.pageserver.connect()) as psconn: @@ -61,16 +65,16 @@ def test_pitr_gc(neon_env_builder: NeonEnvBuilder): # Branch at the point where only 100 rows were inserted # It must have been preserved by PITR setting - env.neon_cli.create_branch('test_pitr_gc_hundred', 'main', ancestor_start_lsn=lsn_a) + env.neon_cli.create_branch("test_pitr_gc_hundred", "main", ancestor_start_lsn=lsn_a) - pg_hundred = env.postgres.create_start('test_pitr_gc_hundred') + pg_hundred = env.postgres.create_start("test_pitr_gc_hundred") # On the 'hundred' branch, we should see only 100 rows hundred_pg_conn = pg_hundred.connect() hundred_cur = hundred_pg_conn.cursor() - hundred_cur.execute('SELECT count(*) FROM foo') - assert hundred_cur.fetchone() == (100, ) + hundred_cur.execute("SELECT count(*) FROM foo") + assert hundred_cur.fetchone() == (100,) # All the rows are visible on the main branch - main_cur.execute('SELECT count(*) FROM foo') - assert main_cur.fetchone() == (10000, ) + main_cur.execute("SELECT count(*) FROM foo") + assert main_cur.fetchone() == (10000,) diff --git a/test_runner/batch_others/test_proxy.py b/test_runner/batch_others/test_proxy.py index 2d9957fc38..dcff177044 100644 --- a/test_runner/batch_others/test_proxy.py +++ b/test_runner/batch_others/test_proxy.py @@ -1,25 +1,26 @@ -import pytest import psycopg2 +import pytest def test_proxy_select_1(static_proxy): - static_proxy.safe_psql('select 1', options='project=generic-project-name') + static_proxy.safe_psql("select 1", options="project=generic-project-name") def test_password_hack(static_proxy): - user = 'borat' - password = 'password' - static_proxy.safe_psql(f"create role {user} with login password '{password}'", - options='project=irrelevant') + user = "borat" + password = "password" + static_proxy.safe_psql( + f"create role {user} with login password '{password}'", options="project=irrelevant" + ) # Note the format of `magic`! magic = f"project=irrelevant;{password}" - static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic) + static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic) # Must also check that invalid magic won't be accepted. with pytest.raises(psycopg2.errors.OperationalError): magic = "broken" - static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic) + static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic) # Pass extra options to the server. @@ -28,8 +29,8 @@ def test_password_hack(static_proxy): # See https://github.com/neondatabase/neon/issues/1287 @pytest.mark.xfail def test_proxy_options(static_proxy): - with static_proxy.connect(options='-cproxytest.option=value') as conn: + with static_proxy.connect(options="-cproxytest.option=value") as conn: with conn.cursor() as cur: - cur.execute('SHOW proxytest.option') + cur.execute("SHOW proxytest.option") value = cur.fetchall()[0][0] - assert value == 'value' + assert value == "value" diff --git a/test_runner/batch_others/test_read_validation.py b/test_runner/batch_others/test_read_validation.py index 4be7af4c10..beaae0351b 100644 --- a/test_runner/batch_others/test_read_validation.py +++ b/test_runner/batch_others/test_read_validation.py @@ -1,14 +1,11 @@ from contextlib import closing -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log - -from psycopg2.errors import UndefinedTable -from psycopg2.errors import IoError - +from fixtures.neon_fixtures import NeonEnv from fixtures.utils import query_scalar +from psycopg2.errors import IoError, UndefinedTable -pytest_plugins = ("fixtures.neon_fixtures") +pytest_plugins = "fixtures.neon_fixtures" extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"] @@ -47,13 +44,15 @@ def test_read_validation(neon_simple_env: NeonEnv): log.info("Test table is populated, validating buffer cache") cache_entries = query_scalar( - c, - "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)) + c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode) + ) assert cache_entries > 0, "No buffers cached for the test relation" c.execute( - "select reltablespace, reldatabase, relfilenode from pg_buffercache where relfilenode = {}" - .format(relfilenode)) + "select reltablespace, reldatabase, relfilenode from pg_buffercache where relfilenode = {}".format( + relfilenode + ) + ) reln = c.fetchone() assert reln is not None @@ -62,21 +61,23 @@ def test_read_validation(neon_simple_env: NeonEnv): c.execute("select clear_buffer_cache()") cache_entries = query_scalar( - c, - "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)) + c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode) + ) assert cache_entries == 0, "Failed to clear buffer cache" log.info("Cache is clear, reading stale page version") c.execute( - "select lsn, lower, upper from page_header(get_raw_page_at_lsn('foo', 'main', 0, '{}'))" - .format(first[0])) + "select lsn, lower, upper from page_header(get_raw_page_at_lsn('foo', 'main', 0, '{}'))".format( + first[0] + ) + ) direct_first = c.fetchone() assert first == direct_first, "Failed fetch page at historic lsn" cache_entries = query_scalar( - c, - "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)) + c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode) + ) assert cache_entries == 0, "relation buffers detected after invalidation" log.info("Cache is clear, reading latest page version without cache") @@ -88,8 +89,8 @@ def test_read_validation(neon_simple_env: NeonEnv): assert second == direct_latest, "Failed fetch page at latest lsn" cache_entries = query_scalar( - c, - "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode)) + c, "select count(*) from pg_buffercache where relfilenode = {}".format(relfilenode) + ) assert cache_entries == 0, "relation buffers detected after invalidation" log.info( @@ -97,8 +98,10 @@ def test_read_validation(neon_simple_env: NeonEnv): ) c.execute( - "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))" - .format(reln[0], reln[1], reln[2], first[0])) + "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))".format( + reln[0], reln[1], reln[2], first[0] + ) + ) direct_first = c.fetchone() assert first == direct_first, "Failed fetch page at historic lsn using oid" @@ -107,20 +110,24 @@ def test_read_validation(neon_simple_env: NeonEnv): ) c.execute( - "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, NULL ))" - .format(reln[0], reln[1], reln[2])) + "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, NULL ))".format( + reln[0], reln[1], reln[2] + ) + ) direct_latest = c.fetchone() assert second == direct_latest, "Failed fetch page at latest lsn" - c.execute('drop table foo;') + c.execute("drop table foo;") log.info( "Relation dropped, attempting reading stale page version without cache using relation identifiers" ) c.execute( - "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))" - .format(reln[0], reln[1], reln[2], first[0])) + "select lsn, lower, upper from page_header(get_raw_page_at_lsn( {}, {}, {}, 0, 0, '{}' ))".format( + reln[0], reln[1], reln[2], first[0] + ) + ) direct_first = c.fetchone() assert first == direct_first, "Failed fetch page at historic lsn using oid" diff --git a/test_runner/batch_others/test_readonly_node.py b/test_runner/batch_others/test_readonly_node.py index 82fc6329cf..0bd78c62a3 100644 --- a/test_runner/batch_others/test_readonly_node.py +++ b/test_runner/batch_others/test_readonly_node.py @@ -12,81 +12,87 @@ from fixtures.utils import query_scalar # def test_readonly_node(neon_simple_env: NeonEnv): env = neon_simple_env - env.neon_cli.create_branch('test_readonly_node', 'empty') - pgmain = env.postgres.create_start('test_readonly_node') + env.neon_cli.create_branch("test_readonly_node", "empty") + pgmain = env.postgres.create_start("test_readonly_node") log.info("postgres is running on 'test_readonly_node' branch") main_pg_conn = pgmain.connect() main_cur = main_pg_conn.cursor() # Create table, and insert the first 100 rows - main_cur.execute('CREATE TABLE foo (t text)') + main_cur.execute("CREATE TABLE foo (t text)") - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100) g - ''') - main_cur.execute('SELECT pg_current_wal_insert_lsn()') - lsn_a = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info('LSN after 100 rows: ' + lsn_a) + """ + ) + main_cur.execute("SELECT pg_current_wal_insert_lsn()") + lsn_a = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info("LSN after 100 rows: " + lsn_a) # Insert some more rows. (This generates enough WAL to fill a few segments.) - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g - ''') - lsn_b = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info('LSN after 200100 rows: ' + lsn_b) + """ + ) + lsn_b = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info("LSN after 200100 rows: " + lsn_b) # Insert many more rows. This generates enough WAL to fill a few segments. - main_cur.execute(''' + main_cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 200000) g - ''') + """ + ) - lsn_c = query_scalar(main_cur, 'SELECT pg_current_wal_insert_lsn()') - log.info('LSN after 400100 rows: ' + lsn_c) + lsn_c = query_scalar(main_cur, "SELECT pg_current_wal_insert_lsn()") + log.info("LSN after 400100 rows: " + lsn_c) # Create first read-only node at the point where only 100 rows were inserted - pg_hundred = env.postgres.create_start(branch_name='test_readonly_node', - node_name='test_readonly_node_hundred', - lsn=lsn_a) + pg_hundred = env.postgres.create_start( + branch_name="test_readonly_node", node_name="test_readonly_node_hundred", lsn=lsn_a + ) # And another at the point where 200100 rows were inserted - pg_more = env.postgres.create_start(branch_name='test_readonly_node', - node_name='test_readonly_node_more', - lsn=lsn_b) + pg_more = env.postgres.create_start( + branch_name="test_readonly_node", node_name="test_readonly_node_more", lsn=lsn_b + ) # On the 'hundred' node, we should see only 100 rows hundred_pg_conn = pg_hundred.connect() hundred_cur = hundred_pg_conn.cursor() - hundred_cur.execute('SELECT count(*) FROM foo') - assert hundred_cur.fetchone() == (100, ) + hundred_cur.execute("SELECT count(*) FROM foo") + assert hundred_cur.fetchone() == (100,) # On the 'more' node, we should see 100200 rows more_pg_conn = pg_more.connect() more_cur = more_pg_conn.cursor() - more_cur.execute('SELECT count(*) FROM foo') - assert more_cur.fetchone() == (200100, ) + more_cur.execute("SELECT count(*) FROM foo") + assert more_cur.fetchone() == (200100,) # All the rows are visible on the main branch - main_cur.execute('SELECT count(*) FROM foo') - assert main_cur.fetchone() == (400100, ) + main_cur.execute("SELECT count(*) FROM foo") + assert main_cur.fetchone() == (400100,) # Check creating a node at segment boundary - pg = env.postgres.create_start(branch_name='test_readonly_node', - node_name='test_branch_segment_boundary', - lsn='0/3000000') + pg = env.postgres.create_start( + branch_name="test_readonly_node", node_name="test_branch_segment_boundary", lsn="0/3000000" + ) cur = pg.connect().cursor() - cur.execute('SELECT 1') - assert cur.fetchone() == (1, ) + cur.execute("SELECT 1") + assert cur.fetchone() == (1,) # Create node at pre-initdb lsn with pytest.raises(Exception, match="invalid basebackup lsn"): # compute node startup with invalid LSN should fail - env.postgres.create_start(branch_name='test_readonly_node', - node_name='test_readonly_node_preinitdb', - lsn='0/42') + env.postgres.create_start( + branch_name="test_readonly_node", node_name="test_readonly_node_preinitdb", lsn="0/42" + ) diff --git a/test_runner/batch_others/test_recovery.py b/test_runner/batch_others/test_recovery.py index 5ba783b802..5220aa6c2e 100644 --- a/test_runner/batch_others/test_recovery.py +++ b/test_runner/batch_others/test_recovery.py @@ -1,11 +1,12 @@ +import json import os import time -import psycopg2.extras -import json from ast import Assert from contextlib import closing -from fixtures.neon_fixtures import NeonEnvBuilder + +import psycopg2.extras from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder # @@ -21,13 +22,15 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): # Check if failpoints enables. Otherwise the test doesn't make sense f = env.neon_cli.pageserver_enabled_features() - assert "failpoints" in f["features"], "Build pageserver with --features=failpoints option to run this test" + assert ( + "failpoints" in f["features"] + ), "Build pageserver with --features=failpoints option to run this test" neon_env_builder.start() # Create a branch for us env.neon_cli.create_branch("test_pageserver_recovery", "main") - pg = env.postgres.create_start('test_pageserver_recovery') + pg = env.postgres.create_start("test_pageserver_recovery") log.info("postgres is running on 'test_pageserver_recovery' branch") connstr = pg.connstr() @@ -62,4 +65,4 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): with closing(pg.connect()) as conn: with conn.cursor() as cur: cur.execute("select count(*) from foo") - assert cur.fetchone() == (100000, ) + assert cur.fetchone() == (100000,) diff --git a/test_runner/batch_others/test_remote_storage.py b/test_runner/batch_others/test_remote_storage.py index ca46010dca..974d3402f6 100644 --- a/test_runner/batch_others/test_remote_storage.py +++ b/test_runner/batch_others/test_remote_storage.py @@ -1,14 +1,24 @@ # It's possible to run any regular test with the local fs remote storage via # env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/neon_zzz/'}" poetry ...... -import shutil, os -from pathlib import Path +import os +import shutil import time +from pathlib import Path from uuid import UUID -from fixtures.neon_fixtures import NeonEnvBuilder, RemoteStorageKind, assert_timeline_local, available_remote_storages, wait_until, wait_for_last_record_lsn, wait_for_upload -from fixtures.log_helper import log -from fixtures.utils import lsn_from_hex, query_scalar + import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + NeonEnvBuilder, + RemoteStorageKind, + assert_timeline_local, + available_remote_storages, + wait_for_last_record_lsn, + wait_for_upload, + wait_until, +) +from fixtures.utils import lsn_from_hex, query_scalar # @@ -28,7 +38,7 @@ import pytest # * queries the specific data, ensuring that it matches the one stored before # # The tests are done for all types of remote storage pageserver supports. -@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages()) +@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) def test_remote_storage_backup_and_restore( neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind, @@ -39,15 +49,15 @@ def test_remote_storage_backup_and_restore( neon_env_builder.enable_remote_storage( remote_storage_kind=remote_storatge_kind, - test_name='test_remote_storage_backup_and_restore', + test_name="test_remote_storage_backup_and_restore", ) data_id = 1 - data_secret = 'very secret secret' + data_secret = "very secret secret" ##### First start, insert secret data and upload it to the remote storage env = neon_env_builder.init_start() - pg = env.postgres.create_start('main') + pg = env.postgres.create_start("main") client = env.pageserver.http_client() @@ -58,10 +68,12 @@ def test_remote_storage_backup_and_restore( for checkpoint_number in checkpoint_numbers: with pg.cursor() as cur: - cur.execute(f''' + cur.execute( + f""" CREATE TABLE t{checkpoint_number}(id int primary key, secret text); INSERT INTO t{checkpoint_number} VALUES ({data_id}, '{data_secret}|{checkpoint_number}'); - ''') + """ + ) current_lsn = lsn_from_hex(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()")) # wait until pageserver receives that data @@ -70,16 +82,16 @@ def test_remote_storage_backup_and_restore( # run checkpoint manually to be sure that data landed in remote storage env.pageserver.safe_psql(f"checkpoint {tenant_id} {timeline_id}") - log.info(f'waiting for checkpoint {checkpoint_number} upload') + log.info(f"waiting for checkpoint {checkpoint_number} upload") # wait until pageserver successfully uploaded a checkpoint to remote storage wait_for_upload(client, UUID(tenant_id), UUID(timeline_id), current_lsn) - log.info(f'upload of checkpoint {checkpoint_number} is done') + log.info(f"upload of checkpoint {checkpoint_number} is done") ##### Stop the first pageserver instance, erase all its data env.postgres.stop_all() env.pageserver.stop() - dir_to_clear = Path(env.repo_dir) / 'tenants' + dir_to_clear = Path(env.repo_dir) / "tenants" shutil.rmtree(dir_to_clear) os.mkdir(dir_to_clear) @@ -100,8 +112,8 @@ def test_remote_storage_backup_and_restore( detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id)) log.info("Timeline detail with active failpoint: %s", detail) - assert detail['local'] is None - assert detail['remote']['awaits_download'] + assert detail["local"] is None + assert detail["remote"]["awaits_download"] # trigger temporary download files removal env.pageserver.stop() @@ -110,19 +122,24 @@ def test_remote_storage_backup_and_restore( client.tenant_attach(UUID(tenant_id)) log.info("waiting for timeline redownload") - wait_until(number_of_iterations=20, - interval=1, - func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id))) + wait_until( + number_of_iterations=20, + interval=1, + func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id)), + ) detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id)) - assert detail['local'] is not None + assert detail["local"] is not None log.info("Timeline detail after attach completed: %s", detail) - assert lsn_from_hex(detail['local']['last_record_lsn']) >= current_lsn, 'current db Lsn should should not be less than the one stored on remote storage' - assert not detail['remote']['awaits_download'] + assert ( + lsn_from_hex(detail["local"]["last_record_lsn"]) >= current_lsn + ), "current db Lsn should should not be less than the one stored on remote storage" + assert not detail["remote"]["awaits_download"] - pg = env.postgres.create_start('main') + pg = env.postgres.create_start("main") with pg.cursor() as cur: for checkpoint_number in checkpoint_numbers: - assert query_scalar(cur, - f'SELECT secret FROM t{checkpoint_number} WHERE id = {data_id};' - ) == f'{data_secret}|{checkpoint_number}' + assert ( + query_scalar(cur, f"SELECT secret FROM t{checkpoint_number} WHERE id = {data_id};") + == f"{data_secret}|{checkpoint_number}" + ) diff --git a/test_runner/batch_others/test_subxacts.py b/test_runner/batch_others/test_subxacts.py index d06877825e..42234bf535 100644 --- a/test_runner/batch_others/test_subxacts.py +++ b/test_runner/batch_others/test_subxacts.py @@ -1,5 +1,5 @@ -from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content # Test subtransactions @@ -11,28 +11,30 @@ from fixtures.log_helper import log def test_subxacts(neon_simple_env: NeonEnv, test_output_dir): env = neon_simple_env env.neon_cli.create_branch("test_subxacts", "empty") - pg = env.postgres.create_start('test_subxacts') + pg = env.postgres.create_start("test_subxacts") log.info("postgres is running on 'test_subxacts' branch") pg_conn = pg.connect() cur = pg_conn.cursor() - cur.execute(''' + cur.execute( + """ CREATE TABLE t1(i int, j int); - ''') + """ + ) - cur.execute('select pg_switch_wal();') + cur.execute("select pg_switch_wal();") # Issue 100 transactions, with 1000 subtransactions in each. for i in range(100): - cur.execute('begin') + cur.execute("begin") for j in range(1000): - cur.execute(f'savepoint sp{j}') - cur.execute(f'insert into t1 values ({i}, {j})') - cur.execute('commit') + cur.execute(f"savepoint sp{j}") + cur.execute(f"insert into t1 values ({i}, {j})") + cur.execute("commit") # force wal flush - cur.execute('checkpoint') + cur.execute("checkpoint") # Check that we can restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, pg) diff --git a/test_runner/batch_others/test_tenant_conf.py b/test_runner/batch_others/test_tenant_conf.py index d25aad742e..1e09ae8db7 100644 --- a/test_runner/batch_others/test_tenant_conf.py +++ b/test_runner/batch_others/test_tenant_conf.py @@ -1,27 +1,28 @@ from contextlib import closing -import pytest import psycopg2.extras - -from fixtures.neon_fixtures import NeonEnvBuilder +import pytest from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder def test_tenant_config(neon_env_builder: NeonEnvBuilder): # set some non-default global config - neon_env_builder.pageserver_config_override = ''' + neon_env_builder.pageserver_config_override = """ page_cache_size=444; wait_lsn_timeout='111 s'; -tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' +tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" env = neon_env_builder.init_start() """Test per tenant configuration""" - tenant, _ = env.neon_cli.create_tenant(conf={ - 'checkpoint_distance': '20000', - 'gc_period': '30sec', - }) + tenant, _ = env.neon_cli.create_tenant( + conf={ + "checkpoint_distance": "20000", + "gc_period": "30sec", + } + ) - env.neon_cli.create_timeline(f'test_tenant_conf', tenant_id=tenant) + env.neon_cli.create_timeline(f"test_tenant_conf", tenant_id=tenant) pg = env.postgres.create_start( "test_tenant_conf", "main", @@ -36,7 +37,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' pscur.execute(f"show {env.initial_tenant.hex}") res = pscur.fetchone() assert all( - i in res.items() for i in { + i in res.items() + for i in { "checkpoint_distance": 10000, "compaction_target_size": 1048576, "compaction_period": 1, @@ -44,8 +46,9 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' "gc_horizon": 67108864, "gc_period": 100, "image_creation_threshold": 3, - "pitr_interval": 2592000 - }.items()) + "pitr_interval": 2592000, + }.items() + ) # check the configuration of the new tenant with closing(env.pageserver.connect()) as psconn: @@ -54,7 +57,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' res = pscur.fetchone() log.info(f"res: {res}") assert all( - i in res.items() for i in { + i in res.items() + for i in { "checkpoint_distance": 20000, "compaction_target_size": 1048576, "compaction_period": 1, @@ -62,15 +66,18 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' "gc_horizon": 67108864, "gc_period": 30, "image_creation_threshold": 3, - "pitr_interval": 2592000 - }.items()) + "pitr_interval": 2592000, + }.items() + ) # update the config and ensure that it has changed - env.neon_cli.config_tenant(tenant_id=tenant, - conf={ - 'checkpoint_distance': '15000', - 'gc_period': '80sec', - }) + env.neon_cli.config_tenant( + tenant_id=tenant, + conf={ + "checkpoint_distance": "15000", + "gc_period": "80sec", + }, + ) with closing(env.pageserver.connect()) as psconn: with psconn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as pscur: @@ -78,7 +85,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' res = pscur.fetchone() log.info(f"after config res: {res}") assert all( - i in res.items() for i in { + i in res.items() + for i in { "checkpoint_distance": 15000, "compaction_target_size": 1048576, "compaction_period": 1, @@ -86,8 +94,9 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' "gc_horizon": 67108864, "gc_period": 80, "image_creation_threshold": 3, - "pitr_interval": 2592000 - }.items()) + "pitr_interval": 2592000, + }.items() + ) # restart the pageserver and ensure that the config is still correct env.pageserver.stop() @@ -99,7 +108,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' res = pscur.fetchone() log.info(f"after restart res: {res}") assert all( - i in res.items() for i in { + i in res.items() + for i in { "checkpoint_distance": 15000, "compaction_target_size": 1048576, "compaction_period": 1, @@ -107,5 +117,6 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}''' "gc_horizon": 67108864, "gc_period": 80, "image_creation_threshold": 3, - "pitr_interval": 2592000 - }.items()) + "pitr_interval": 2592000, + }.items() + ) diff --git a/test_runner/batch_others/test_tenant_detach.py b/test_runner/batch_others/test_tenant_detach.py index afc4f89bbf..f1b30429bf 100644 --- a/test_runner/batch_others/test_tenant_detach.py +++ b/test_runner/batch_others/test_tenant_detach.py @@ -1,9 +1,9 @@ +import uuid from threading import Thread from uuid import uuid4 -import uuid + import psycopg2 import pytest - from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException @@ -11,7 +11,7 @@ from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiExc def do_gc_target(env: NeonEnv, tenant_id: uuid.UUID, timeline_id: uuid.UUID): """Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211""" try: - env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0') + env.pageserver.safe_psql(f"do_gc {tenant_id.hex} {timeline_id.hex} 0") except Exception as e: log.error("do_gc failed: %s", e) @@ -22,8 +22,10 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): # first check for non existing tenant tenant_id = uuid4() - with pytest.raises(expected_exception=NeonPageserverApiException, - match=f'Tenant not found for id {tenant_id.hex}'): + with pytest.raises( + expected_exception=NeonPageserverApiException, + match=f"Tenant not found for id {tenant_id.hex}", + ): pageserver_http.tenant_detach(tenant_id) # create new nenant @@ -32,17 +34,20 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): # assert tenant exists on disk assert (env.repo_dir / "tenants" / tenant_id.hex).exists() - pg = env.postgres.create_start('main', tenant_id=tenant_id) + pg = env.postgres.create_start("main", tenant_id=tenant_id) # we rely upon autocommit after each statement - pg.safe_psql_many(queries=[ - 'CREATE TABLE t(key int primary key, value text)', - 'INSERT INTO t SELECT generate_series(1,100000), \'payload\'', - ]) + pg.safe_psql_many( + queries=[ + "CREATE TABLE t(key int primary key, value text)", + "INSERT INTO t SELECT generate_series(1,100000), 'payload'", + ] + ) # gc should not try to even start - with pytest.raises(expected_exception=psycopg2.DatabaseError, - match='gc target timeline does not exist'): - env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {uuid4().hex} 0') + with pytest.raises( + expected_exception=psycopg2.DatabaseError, match="gc target timeline does not exist" + ): + env.pageserver.safe_psql(f"do_gc {tenant_id.hex} {uuid4().hex} 0") # try to concurrently run gc and detach gc_thread = Thread(target=lambda: do_gc_target(env, tenant_id, timeline_id)) @@ -67,6 +72,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): # check that nothing is left on disk for deleted tenant assert not (env.repo_dir / "tenants" / tenant_id.hex).exists() - with pytest.raises(expected_exception=psycopg2.DatabaseError, - match=f'Tenant {tenant_id.hex} not found'): - env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0') + with pytest.raises( + expected_exception=psycopg2.DatabaseError, match=f"Tenant {tenant_id.hex} not found" + ): + env.pageserver.safe_psql(f"do_gc {tenant_id.hex} {timeline_id.hex} 0") diff --git a/test_runner/batch_others/test_tenant_relocation.py b/test_runner/batch_others/test_tenant_relocation.py index eb65e2e3b5..a30804ee8e 100644 --- a/test_runner/batch_others/test_tenant_relocation.py +++ b/test_runner/batch_others/test_tenant_relocation.py @@ -34,12 +34,14 @@ def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float): @contextmanager -def new_pageserver_helper(new_pageserver_dir: pathlib.Path, - pageserver_bin: pathlib.Path, - remote_storage_mock_path: pathlib.Path, - pg_port: int, - http_port: int, - broker: Optional[Etcd]): +def new_pageserver_helper( + new_pageserver_dir: pathlib.Path, + pageserver_bin: pathlib.Path, + remote_storage_mock_path: pathlib.Path, + pg_port: int, + http_port: int, + broker: Optional[Etcd], +): """ cannot use NeonPageserver yet because it depends on neon cli which currently lacks support for multiple pageservers @@ -47,10 +49,10 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path, # actually run new pageserver cmd = [ str(pageserver_bin), - '--workdir', + "--workdir", str(new_pageserver_dir), - '--daemonize', - '--update-config', + "--daemonize", + "--update-config", f"-c listen_pg_addr='localhost:{pg_port}'", f"-c listen_http_addr='localhost:{http_port}'", f"-c pg_distrib_dir='{pg_distrib_dir}'", @@ -58,7 +60,9 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path, f"-c remote_storage={{local_path='{remote_storage_mock_path}'}}", ] if broker is not None: - cmd.append(f"-c broker_endpoints=['{broker.client_url()}']", ) + cmd.append( + f"-c broker_endpoints=['{broker.client_url()}']", + ) log.info("starting new pageserver %s", cmd) out = subprocess.check_output(cmd, text=True) @@ -67,7 +71,7 @@ def new_pageserver_helper(new_pageserver_dir: pathlib.Path, yield finally: log.info("stopping new pageserver") - pid = int((new_pageserver_dir / 'pageserver.pid').read_text()) + pid = int((new_pageserver_dir / "pageserver.pid").read_text()) os.kill(pid, signal.SIGQUIT) @@ -105,7 +109,7 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve log.info("successfully recovered %s", inserted_ctr) failed = False load_ok_event.set() - log.info('load thread stopped') + log.info("load thread stopped") def populate_branch( @@ -123,8 +127,10 @@ def populate_branch( cur.execute("SELECT pg_current_wal_flush_lsn()") log.info("pg_current_wal_flush_lsn() %s", lsn_from_hex(cur.fetchone()[0])) - log.info("timeline detail %s", - ps_http.timeline_detail(tenant_id=tenant_id, timeline_id=timeline_id)) + log.info( + "timeline detail %s", + ps_http.timeline_detail(tenant_id=tenant_id, timeline_id=timeline_id), + ) # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -133,7 +139,7 @@ def populate_branch( cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'some payload'") if expected_sum is not None: cur.execute("SELECT sum(key) FROM t") - assert cur.fetchone() == (expected_sum, ) + assert cur.fetchone() == (expected_sum,) cur.execute("SELECT pg_current_wal_flush_lsn()") current_lsn = lsn_from_hex(cur.fetchone()[0]) @@ -166,34 +172,41 @@ def check_timeline_attached( # when load is active these checks can break because lsns are not static # so lets check with some margin - assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']), - lsn_from_hex(old_timeline_detail['local']['disk_consistent_lsn']), - 0.03) + assert_abs_margin_ratio( + lsn_from_hex(new_timeline_detail["local"]["disk_consistent_lsn"]), + lsn_from_hex(old_timeline_detail["local"]["disk_consistent_lsn"]), + 0.03, + ) - assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']), - old_current_lsn, - 0.03) + assert_abs_margin_ratio( + lsn_from_hex(new_timeline_detail["local"]["disk_consistent_lsn"]), old_current_lsn, 0.03 + ) -def switch_pg_to_new_pageserver(env: NeonEnv, - pg: Postgres, - new_pageserver_port: int, - tenant_id: UUID, - timeline_id: UUID) -> pathlib.Path: +def switch_pg_to_new_pageserver( + env: NeonEnv, pg: Postgres, new_pageserver_port: int, tenant_id: UUID, timeline_id: UUID +) -> pathlib.Path: pg.stop() pg_config_file_path = pathlib.Path(pg.config_file_path()) - pg_config_file_path.open('a').write( - f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'") + pg_config_file_path.open("a").write( + f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'" + ) pg.start() - timeline_to_detach_local_path = env.repo_dir / 'tenants' / tenant_id.hex / 'timelines' / timeline_id.hex + timeline_to_detach_local_path = ( + env.repo_dir / "tenants" / tenant_id.hex / "timelines" / timeline_id.hex + ) files_before_detach = os.listdir(timeline_to_detach_local_path) - assert 'metadata' in files_before_detach, f'Regular timeline {timeline_to_detach_local_path} should have the metadata file,\ - but got: {files_before_detach}' - assert len(files_before_detach) >= 2, f'Regular timeline {timeline_to_detach_local_path} should have at least one layer file,\ - but got {files_before_detach}' + assert ( + "metadata" in files_before_detach + ), f"Regular timeline {timeline_to_detach_local_path} should have the metadata file,\ + but got: {files_before_detach}" + assert ( + len(files_before_detach) >= 2 + ), f"Regular timeline {timeline_to_detach_local_path} should have at least one layer file,\ + but got {files_before_detach}" return timeline_to_detach_local_path @@ -202,39 +215,44 @@ def post_migration_check(pg: Postgres, sum_before_migration: int, old_local_path with pg_cur(pg) as cur: # check that data is still there cur.execute("SELECT sum(key) FROM t") - assert cur.fetchone() == (sum_before_migration, ) + assert cur.fetchone() == (sum_before_migration,) # check that we can write new data cur.execute("INSERT INTO t SELECT generate_series(1001,2000), 'some payload'") cur.execute("SELECT sum(key) FROM t") - assert cur.fetchone() == (sum_before_migration + 1500500, ) + assert cur.fetchone() == (sum_before_migration + 1500500,) - assert not os.path.exists(old_local_path), f'After detach, local timeline dir {old_local_path} should be removed' + assert not os.path.exists( + old_local_path + ), f"After detach, local timeline dir {old_local_path} should be removed" @pytest.mark.parametrize( - 'method', + "method", [ # A minor migration involves no storage breaking changes. # It is done by attaching the tenant to a new pageserver. - 'minor', + "minor", # A major migration involves exporting a postgres datadir # basebackup and importing it into the new pageserver. # This kind of migration can tolerate breaking changes # to storage format - 'major', - ]) -@pytest.mark.parametrize('with_load', ['with_load', 'without_load']) -def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, - port_distributor: PortDistributor, - test_output_dir, - method: str, - with_load: str): + "major", + ], +) +@pytest.mark.parametrize("with_load", ["with_load", "without_load"]) +def test_tenant_relocation( + neon_env_builder: NeonEnvBuilder, + port_distributor: PortDistributor, + test_output_dir, + method: str, + with_load: str, +): neon_env_builder.enable_local_fs_remote_storage() env = neon_env_builder.init_start() # create folder for remote storage mock - remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage' + remote_storage_mock_path = env.repo_dir / "local_fs_remote_storage" # we use two branches to check that they are both relocated # first branch is used for load, compute for second one is used to @@ -242,12 +260,15 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, pageserver_http = env.pageserver.http_client() - tenant_id, initial_timeline_id = env.neon_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209")) + tenant_id, initial_timeline_id = env.neon_cli.create_tenant( + UUID("74ee8b079a0e437eb0afea7d26a07209") + ) log.info("tenant to relocate %s initial_timeline_id %s", tenant_id, initial_timeline_id) env.neon_cli.create_branch("test_tenant_relocation_main", tenant_id=tenant_id) - pg_main = env.postgres.create_start(branch_name='test_tenant_relocation_main', - tenant_id=tenant_id) + pg_main = env.postgres.create_start( + branch_name="test_tenant_relocation_main", tenant_id=tenant_id + ) timeline_id_main, current_lsn_main = populate_branch( pg_main, @@ -263,8 +284,9 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, ancestor_start_lsn=lsn_to_hex(current_lsn_main), tenant_id=tenant_id, ) - pg_second = env.postgres.create_start(branch_name='test_tenant_relocation_second', - tenant_id=tenant_id) + pg_second = env.postgres.create_start( + branch_name="test_tenant_relocation_second", tenant_id=tenant_id + ) timeline_id_second, current_lsn_second = populate_branch( pg_second, @@ -281,7 +303,7 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_second, current_lsn_second) timeline_detail_second = assert_timeline_local(pageserver_http, tenant_id, timeline_id_second) - if with_load == 'with_load': + if with_load == "with_load": # create load table with pg_cur(pg_main) as cur: cur.execute("CREATE TABLE load(value text)") @@ -317,22 +339,24 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, log.info("inititalizing new pageserver") # bootstrap second pageserver - new_pageserver_dir = env.repo_dir / 'new_pageserver' + new_pageserver_dir = env.repo_dir / "new_pageserver" new_pageserver_dir.mkdir() new_pageserver_pg_port = port_distributor.get_port() new_pageserver_http_port = port_distributor.get_port() log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port, new_pageserver_http_port) - pageserver_bin = pathlib.Path(neon_binpath) / 'pageserver' + pageserver_bin = pathlib.Path(neon_binpath) / "pageserver" new_pageserver_http = NeonPageserverHttpClient(port=new_pageserver_http_port, auth_token=None) - with new_pageserver_helper(new_pageserver_dir, - pageserver_bin, - remote_storage_mock_path, - new_pageserver_pg_port, - new_pageserver_http_port, - neon_env_builder.broker): + with new_pageserver_helper( + new_pageserver_dir, + pageserver_bin, + remote_storage_mock_path, + new_pageserver_pg_port, + new_pageserver_http_port, + neon_env_builder.broker, + ): # Migrate either by attaching from s3 or import/export basebackup if method == "major": @@ -367,13 +391,16 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, # check that it shows that download is in progress tenant_status = new_pageserver_http.tenant_status(tenant_id=tenant_id) - assert tenant_status.get('has_in_progress_downloads'), tenant_status + assert tenant_status.get("has_in_progress_downloads"), tenant_status # wait until tenant is downloaded - wait_until(number_of_iterations=10, - interval=1, - func=lambda: assert_no_in_progress_downloads_for_tenant( - new_pageserver_http, tenant_id)) + wait_until( + number_of_iterations=10, + interval=1, + func=lambda: assert_no_in_progress_downloads_for_tenant( + new_pageserver_http, tenant_id + ), + ) check_timeline_attached( new_pageserver_http, @@ -392,10 +419,10 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, ) # rewrite neon cli config to use new pageserver for basebackup to start new compute - cli_config_lines = (env.repo_dir / 'config').read_text().splitlines() + cli_config_lines = (env.repo_dir / "config").read_text().splitlines() cli_config_lines[-2] = f"listen_http_addr = 'localhost:{new_pageserver_http_port}'" cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'" - (env.repo_dir / 'config').write_text('\n'.join(cli_config_lines)) + (env.repo_dir / "config").write_text("\n".join(cli_config_lines)) old_local_path_main = switch_pg_to_new_pageserver( env, @@ -423,7 +450,8 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, # ensure that we can successfully read all relations on the new pageserver with pg_cur(pg_second) as cur: - cur.execute(''' + cur.execute( + """ DO $$ DECLARE r RECORD; @@ -435,18 +463,19 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder, EXECUTE 'SELECT count(*) FROM quote_ident($1)' USING r.relname; END LOOP; END$$; - ''') + """ + ) - if with_load == 'with_load': + if with_load == "with_load": assert load_ok_event.wait(3) - log.info('stopping load thread') + log.info("stopping load thread") load_stop_event.set() load_thread.join(timeout=10) - log.info('load thread stopped') + log.info("load thread stopped") # bring old pageserver back for clean shutdown via neon cli # new pageserver will be shut down by the context manager - cli_config_lines = (env.repo_dir / 'config').read_text().splitlines() + cli_config_lines = (env.repo_dir / "config").read_text().splitlines() cli_config_lines[-2] = f"listen_http_addr = 'localhost:{env.pageserver.service_port.http}'" cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{env.pageserver.service_port.pg}'" - (env.repo_dir / 'config').write_text('\n'.join(cli_config_lines)) + (env.repo_dir / "config").write_text("\n".join(cli_config_lines)) diff --git a/test_runner/batch_others/test_tenant_tasks.py b/test_runner/batch_others/test_tenant_tasks.py index fae2a2199d..8075756ffb 100644 --- a/test_runner/batch_others/test_tenant_tasks.py +++ b/test_runner/batch_others/test_tenant_tasks.py @@ -1,6 +1,7 @@ -from fixtures.neon_fixtures import NeonEnvBuilder, wait_until -from uuid import UUID import time +from uuid import UUID + +from fixtures.neon_fixtures import NeonEnvBuilder, wait_until def get_only_element(l): @@ -47,7 +48,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder): tenant, _ = env.neon_cli.create_tenant() timeline = env.neon_cli.create_timeline(name, tenant_id=tenant) pg = env.postgres.create_start(name, tenant_id=tenant) - assert (get_state(tenant) == "Active") + assert get_state(tenant) == "Active" # Stop compute pg.stop() diff --git a/test_runner/batch_others/test_tenants.py b/test_runner/batch_others/test_tenants.py index 8d73d8185c..0e0cd44471 100644 --- a/test_runner/batch_others/test_tenants.py +++ b/test_runner/batch_others/test_tenants.py @@ -1,15 +1,15 @@ +import os from contextlib import closing from datetime import datetime -import os -import pytest -from fixtures.neon_fixtures import NeonEnvBuilder +import pytest from fixtures.log_helper import log from fixtures.metrics import parse_metrics +from fixtures.neon_fixtures import NeonEnvBuilder from fixtures.utils import lsn_to_hex -@pytest.mark.parametrize('with_safekeepers', [False, True]) +@pytest.mark.parametrize("with_safekeepers", [False, True]) def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool): if with_safekeepers: neon_env_builder.num_safekeepers = 3 @@ -19,17 +19,19 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers: tenant_1, _ = env.neon_cli.create_tenant() tenant_2, _ = env.neon_cli.create_tenant() - env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', - tenant_id=tenant_1) - env.neon_cli.create_timeline(f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', - tenant_id=tenant_2) + env.neon_cli.create_timeline( + f"test_tenants_normal_work_with_safekeepers{with_safekeepers}", tenant_id=tenant_1 + ) + env.neon_cli.create_timeline( + f"test_tenants_normal_work_with_safekeepers{with_safekeepers}", tenant_id=tenant_2 + ) pg_tenant1 = env.postgres.create_start( - f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', + f"test_tenants_normal_work_with_safekeepers{with_safekeepers}", tenant_id=tenant_1, ) pg_tenant2 = env.postgres.create_start( - f'test_tenants_normal_work_with_safekeepers{with_safekeepers}', + f"test_tenants_normal_work_with_safekeepers{with_safekeepers}", tenant_id=tenant_2, ) @@ -41,7 +43,7 @@ def test_tenants_normal_work(neon_env_builder: NeonEnvBuilder, with_safekeepers: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") cur.execute("SELECT sum(key) FROM t") - assert cur.fetchone() == (5000050000, ) + assert cur.fetchone() == (5000050000,) def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder): @@ -51,11 +53,11 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder): tenant_1, _ = env.neon_cli.create_tenant() tenant_2, _ = env.neon_cli.create_tenant() - timeline_1 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_1) - timeline_2 = env.neon_cli.create_timeline('test_metrics_normal_work', tenant_id=tenant_2) + timeline_1 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_1) + timeline_2 = env.neon_cli.create_timeline("test_metrics_normal_work", tenant_id=tenant_2) - pg_tenant1 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_1) - pg_tenant2 = env.postgres.create_start('test_metrics_normal_work', tenant_id=tenant_2) + pg_tenant1 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_1) + pg_tenant2 = env.postgres.create_start("test_metrics_normal_work", tenant_id=tenant_2) for pg in [pg_tenant1, pg_tenant2]: with closing(pg.connect()) as conn: @@ -63,29 +65,28 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder): cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") cur.execute("SELECT sum(key) FROM t") - assert cur.fetchone() == (5000050000, ) + assert cur.fetchone() == (5000050000,) collected_metrics = { "pageserver": env.pageserver.http_client().get_metrics(), } for sk in env.safekeepers: - collected_metrics[f'safekeeper{sk.id}'] = sk.http_client().get_metrics_str() + collected_metrics[f"safekeeper{sk.id}"] = sk.http_client().get_metrics_str() for name in collected_metrics: - basepath = os.path.join(neon_env_builder.repo_dir, f'{name}.metrics') + basepath = os.path.join(neon_env_builder.repo_dir, f"{name}.metrics") - with open(basepath, 'w') as stdout_f: + with open(basepath, "w") as stdout_f: print(collected_metrics[name], file=stdout_f, flush=True) all_metrics = [parse_metrics(m, name) for name, m in collected_metrics.items()] ps_metrics = all_metrics[0] sk_metrics = all_metrics[1:] - ttids = [{ - 'tenant_id': tenant_1.hex, 'timeline_id': timeline_1.hex - }, { - 'tenant_id': tenant_2.hex, 'timeline_id': timeline_2.hex - }] + ttids = [ + {"tenant_id": tenant_1.hex, "timeline_id": timeline_1.hex}, + {"tenant_id": tenant_2.hex, "timeline_id": timeline_2.hex}, + ] # Test metrics per timeline for tt in ttids: @@ -105,7 +106,8 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder): log.info(f"Checking common metrics for {metrics.name}") log.info( - f"process_cpu_seconds_total: {metrics.query_one('process_cpu_seconds_total').value}") + f"process_cpu_seconds_total: {metrics.query_one('process_cpu_seconds_total').value}" + ) log.info(f"process_threads: {int(metrics.query_one('process_threads').value)}") log.info( f"process_resident_memory_bytes (MB): {metrics.query_one('process_resident_memory_bytes').value / 1024 / 1024}" diff --git a/test_runner/batch_others/test_tenants_with_remote_storage.py b/test_runner/batch_others/test_tenants_with_remote_storage.py index 636616a45b..a127693c32 100644 --- a/test_runner/batch_others/test_tenants_with_remote_storage.py +++ b/test_runner/batch_others/test_tenants_with_remote_storage.py @@ -12,8 +12,15 @@ from typing import List, Tuple from uuid import UUID import pytest - -from fixtures.neon_fixtures import NeonEnvBuilder, NeonEnv, Postgres, RemoteStorageKind, available_remote_storages, wait_for_last_record_lsn, wait_for_upload +from fixtures.neon_fixtures import ( + NeonEnv, + NeonEnvBuilder, + Postgres, + RemoteStorageKind, + available_remote_storages, + wait_for_last_record_lsn, + wait_for_upload, +) from fixtures.utils import lsn_from_hex @@ -28,7 +35,8 @@ async def tenant_workload(env: NeonEnv, pg: Postgres): await pg_conn.execute("CREATE TABLE t(key int primary key, value text)") for i in range(1, 100): await pg_conn.execute( - f"INSERT INTO t SELECT {i}*1000 + g, 'payload' from generate_series(1,1000) g") + f"INSERT INTO t SELECT {i}*1000 + g, 'payload' from generate_series(1,1000) g" + ) # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -46,11 +54,11 @@ async def all_tenants_workload(env: NeonEnv, tenants_pgs): await asyncio.gather(*workers) -@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages()) +@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind): neon_env_builder.enable_remote_storage( remote_storage_kind=remote_storatge_kind, - test_name='test_tenants_many', + test_name="test_tenants_many", ) env = neon_env_builder.init_start() @@ -61,12 +69,13 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Re # Use a tiny checkpoint distance, to create a lot of layers quickly tenant, _ = env.neon_cli.create_tenant( conf={ - 'checkpoint_distance': '5000000', - }) - env.neon_cli.create_timeline(f'test_tenants_many', tenant_id=tenant) + "checkpoint_distance": "5000000", + } + ) + env.neon_cli.create_timeline(f"test_tenants_many", tenant_id=tenant) pg = env.postgres.create_start( - f'test_tenants_many', + f"test_tenants_many", tenant_id=tenant, ) tenants_pgs.append((tenant, pg)) @@ -77,7 +86,8 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Re pageserver_http = env.pageserver.http_client() for tenant, pg in tenants_pgs: res = pg.safe_psql_many( - ["SHOW neon.tenant_id", "SHOW neon.timeline_id", "SELECT pg_current_wal_flush_lsn()"]) + ["SHOW neon.tenant_id", "SHOW neon.timeline_id", "SELECT pg_current_wal_flush_lsn()"] + ) tenant_id = res[0][0][0] timeline_id = res[1][0][0] current_lsn = lsn_from_hex(res[2][0][0]) diff --git a/test_runner/batch_others/test_timeline_delete.py b/test_runner/batch_others/test_timeline_delete.py index 594475faf4..7a55ffb769 100644 --- a/test_runner/batch_others/test_timeline_delete.py +++ b/test_runner/batch_others/test_timeline_delete.py @@ -1,6 +1,6 @@ from uuid import uuid4 -import pytest +import pytest from fixtures.neon_fixtures import NeonEnv, NeonPageserverApiException, wait_until @@ -17,44 +17,57 @@ def test_timeline_delete(neon_simple_env: NeonEnv): # for non existing tenant: invalid_tenant_id = uuid4() - with pytest.raises(NeonPageserverApiException, - match=f"Tenant {invalid_tenant_id.hex} not found in local tenant state"): + with pytest.raises( + NeonPageserverApiException, + match=f"Tenant {invalid_tenant_id.hex} not found in local tenant state", + ): ps_http.timeline_delete(tenant_id=invalid_tenant_id, timeline_id=invalid_timeline_id) # construct pair of branches to validate that pageserver prohibits # deletion of ancestor timelines when they have child branches parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_parent", "empty") - leaf_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_branch1", - "test_ancestor_branch_delete_parent") + leaf_timeline_id = env.neon_cli.create_branch( + "test_ancestor_branch_delete_branch1", "test_ancestor_branch_delete_parent" + ) ps_http = env.pageserver.http_client() - with pytest.raises(NeonPageserverApiException, - match="Cannot detach timeline which has child timelines"): + with pytest.raises( + NeonPageserverApiException, match="Cannot detach timeline which has child timelines" + ): - timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / parent_timeline_id.hex + timeline_path = ( + env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / parent_timeline_id.hex + ) assert timeline_path.exists() ps_http.timeline_delete(env.initial_tenant, parent_timeline_id) assert not timeline_path.exists() - timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / leaf_timeline_id.hex + timeline_path = ( + env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / leaf_timeline_id.hex + ) assert timeline_path.exists() # retry deletes when compaction or gc is running in pageserver - wait_until(number_of_iterations=3, - interval=0.2, - func=lambda: ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id)) + wait_until( + number_of_iterations=3, + interval=0.2, + func=lambda: ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id), + ) assert not timeline_path.exists() # check 404 - with pytest.raises(NeonPageserverApiException, - match="is not found neither locally nor remotely"): + with pytest.raises( + NeonPageserverApiException, match="is not found neither locally nor remotely" + ): ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id) # FIXME leaves tenant without timelines, should we prevent deletion of root timeline? - wait_until(number_of_iterations=3, - interval=0.2, - func=lambda: ps_http.timeline_delete(env.initial_tenant, parent_timeline_id)) + wait_until( + number_of_iterations=3, + interval=0.2, + func=lambda: ps_http.timeline_delete(env.initial_tenant, parent_timeline_id), + ) diff --git a/test_runner/batch_others/test_timeline_size.py b/test_runner/batch_others/test_timeline_size.py index 4a9359cf43..76342cdf98 100644 --- a/test_runner/batch_others/test_timeline_size.py +++ b/test_runner/batch_others/test_timeline_size.py @@ -1,25 +1,33 @@ -from contextlib import closing import math import random -from uuid import UUID import re -import psycopg2.extras -import psycopg2.errors -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_timeline_local, wait_for_last_flush_lsn -from fixtures.log_helper import log import time +from contextlib import closing +from uuid import UUID +import psycopg2.errors +import psycopg2.extras +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + NeonEnv, + NeonEnvBuilder, + Postgres, + assert_timeline_local, + wait_for_last_flush_lsn, +) from fixtures.utils import get_timeline_dir_size def test_timeline_size(neon_simple_env: NeonEnv): env = neon_simple_env - new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty') + new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "empty") client = env.pageserver.http_client() timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - assert timeline_details['local']['current_logical_size'] == timeline_details['local'][ - 'current_logical_size_non_incremental'] + assert ( + timeline_details["local"]["current_logical_size"] + == timeline_details["local"]["current_logical_size_non_incremental"] + ) pgmain = env.postgres.create_start("test_timeline_size") log.info("postgres is running on 'test_timeline_size' branch") @@ -29,32 +37,40 @@ def test_timeline_size(neon_simple_env: NeonEnv): cur.execute("SHOW neon.timeline_id") cur.execute("CREATE TABLE foo (t text)") - cur.execute(""" + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10) g - """) + """ + ) res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - local_details = res['local'] - assert local_details["current_logical_size"] == local_details[ - "current_logical_size_non_incremental"] + local_details = res["local"] + assert ( + local_details["current_logical_size"] + == local_details["current_logical_size_non_incremental"] + ) cur.execute("TRUNCATE foo") res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - local_details = res['local'] - assert local_details["current_logical_size"] == local_details[ - "current_logical_size_non_incremental"] + local_details = res["local"] + assert ( + local_details["current_logical_size"] + == local_details["current_logical_size_non_incremental"] + ) def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): env = neon_simple_env - new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty') + new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "empty") client = env.pageserver.http_client() timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - assert timeline_details['local']['current_logical_size'] == timeline_details['local'][ - 'current_logical_size_non_incremental'] + assert ( + timeline_details["local"]["current_logical_size"] + == timeline_details["local"]["current_logical_size_non_incremental"] + ) pgmain = env.postgres.create_start("test_timeline_size") log.info("postgres is running on 'test_timeline_size' branch") @@ -64,32 +80,40 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv): cur.execute("SHOW neon.timeline_id") res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - local_details = res['local'] - assert local_details["current_logical_size"] == local_details[ - "current_logical_size_non_incremental"] + local_details = res["local"] + assert ( + local_details["current_logical_size"] + == local_details["current_logical_size_non_incremental"] + ) - cur.execute('CREATE DATABASE foodb') - with closing(pgmain.connect(dbname='foodb')) as conn: + cur.execute("CREATE DATABASE foodb") + with closing(pgmain.connect(dbname="foodb")) as conn: with conn.cursor() as cur2: cur2.execute("CREATE TABLE foo (t text)") - cur2.execute(""" + cur2.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10) g - """) + """ + ) res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - local_details = res['local'] - assert local_details["current_logical_size"] == local_details[ - "current_logical_size_non_incremental"] + local_details = res["local"] + assert ( + local_details["current_logical_size"] + == local_details["current_logical_size_non_incremental"] + ) - cur.execute('DROP DATABASE foodb') + cur.execute("DROP DATABASE foodb") res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - local_details = res['local'] - assert local_details["current_logical_size"] == local_details[ - "current_logical_size_non_incremental"] + local_details = res["local"] + assert ( + local_details["current_logical_size"] + == local_details["current_logical_size_non_incremental"] + ) # wait until received_lsn_lag is 0 @@ -101,14 +125,17 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60 elapsed = time.time() - started_at if elapsed > timeout: raise RuntimeError( - f"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()") + f"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()" + ) - res = pgmain.safe_psql(''' + res = pgmain.safe_psql( + """ SELECT pg_size_pretty(pg_cluster_size()), pg_wal_lsn_diff(pg_current_wal_flush_lsn(), received_lsn) as received_lsn_lag FROM backpressure_lsns(); - ''')[0] + """ + )[0] log.info(f"pg_cluster_size = {res[0]}, received_lsn_lag = {res[1]}") received_lsn_lag = res[1] @@ -117,17 +144,19 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60 def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - new_timeline_id = env.neon_cli.create_branch('test_timeline_size_quota') + new_timeline_id = env.neon_cli.create_branch("test_timeline_size_quota") client = env.pageserver.http_client() res = assert_timeline_local(client, env.initial_tenant, new_timeline_id) - assert res['local']["current_logical_size"] == res['local'][ - "current_logical_size_non_incremental"] + assert ( + res["local"]["current_logical_size"] == res["local"]["current_logical_size_non_incremental"] + ) pgmain = env.postgres.create_start( "test_timeline_size_quota", # Set small limit for the test - config_lines=['neon.max_cluster_size=30MB']) + config_lines=["neon.max_cluster_size=30MB"], + ) log.info("postgres is running on 'test_timeline_size_quota' branch") with closing(pgmain.connect()) as conn: @@ -140,19 +169,23 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): # Insert many rows. This query must fail because of space limit try: - cur.execute(''' + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g - ''') + """ + ) wait_for_pageserver_catchup(pgmain) - cur.execute(''' + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 500000) g - ''') + """ + ) # If we get here, the timeline size limit failed log.error("Query unexpectedly succeeded") @@ -162,17 +195,19 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): log.info(f"Query expectedly failed with: {err}") # drop table to free space - cur.execute('DROP TABLE foo') + cur.execute("DROP TABLE foo") wait_for_pageserver_catchup(pgmain) # create it again and insert some rows. This query must succeed cur.execute("CREATE TABLE foo (t text)") - cur.execute(''' + cur.execute( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10000) g - ''') + """ + ) wait_for_pageserver_catchup(pgmain) @@ -183,15 +218,17 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder): def test_timeline_physical_size_init(neon_simple_env: NeonEnv): env = neon_simple_env - new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_init') + new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_init") pg = env.postgres.create_start("test_timeline_physical_size_init") - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - """INSERT INTO foo + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + """INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 1000) g""", - ]) + ] + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) @@ -204,15 +241,17 @@ def test_timeline_physical_size_init(neon_simple_env: NeonEnv): def test_timeline_physical_size_post_checkpoint(neon_simple_env: NeonEnv): env = neon_simple_env - new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_post_checkpoint') + new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_checkpoint") pg = env.postgres.create_start("test_timeline_physical_size_post_checkpoint") - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - """INSERT INTO foo + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + """INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 1000) g""", - ]) + ] + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}") @@ -223,19 +262,23 @@ def test_timeline_physical_size_post_checkpoint(neon_simple_env: NeonEnv): def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder): # Disable background compaction as we don't want it to happen after `get_physical_size` request # and before checking the expected size on disk, which makes the assertion failed - neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='10m'}" + neon_env_builder.pageserver_config_override = ( + "tenant_config={checkpoint_distance=100000, compaction_period='10m'}" + ) env = neon_env_builder.init_start() - new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_post_compaction') + new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_compaction") pg = env.postgres.create_start("test_timeline_physical_size_post_compaction") - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - """INSERT INTO foo + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + """INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g""", - ]) + ] + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}") @@ -247,29 +290,32 @@ def test_timeline_physical_size_post_compaction(neon_env_builder: NeonEnvBuilder def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): # Disable background compaction and GC as we don't want it to happen after `get_physical_size` request # and before checking the expected size on disk, which makes the assertion failed - neon_env_builder.pageserver_config_override = \ - "tenant_config={checkpoint_distance=100000, compaction_period='10m', gc_period='10m', pitr_interval='1s'}" + neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance=100000, compaction_period='10m', gc_period='10m', pitr_interval='1s'}" env = neon_env_builder.init_start() - new_timeline_id = env.neon_cli.create_branch('test_timeline_physical_size_post_gc') + new_timeline_id = env.neon_cli.create_branch("test_timeline_physical_size_post_gc") pg = env.postgres.create_start("test_timeline_physical_size_post_gc") - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - """INSERT INTO foo + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + """INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g""", - ]) + ] + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}") - pg.safe_psql(""" + pg.safe_psql( + """ INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g - """) + """ + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}") @@ -284,15 +330,17 @@ def test_timeline_physical_size_post_gc(neon_env_builder: NeonEnvBuilder): def test_timeline_size_metrics(neon_simple_env: NeonEnv): env = neon_simple_env - new_timeline_id = env.neon_cli.create_branch('test_timeline_size_metrics') + new_timeline_id = env.neon_cli.create_branch("test_timeline_size_metrics") pg = env.postgres.create_start("test_timeline_size_metrics") - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - """INSERT INTO foo + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + """INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 100000) g""", - ]) + ] + ) wait_for_last_flush_lsn(env, pg, env.initial_tenant, new_timeline_id) env.pageserver.safe_psql(f"checkpoint {env.initial_tenant.hex} {new_timeline_id.hex}") @@ -302,7 +350,8 @@ def test_timeline_size_metrics(neon_simple_env: NeonEnv): matches = re.search( f'^pageserver_current_physical_size{{tenant_id="{env.initial_tenant.hex}",timeline_id="{new_timeline_id.hex}"}} (\\S+)$', metrics, - re.MULTILINE) + re.MULTILINE, + ) assert matches tl_physical_size_metric = int(matches.group(1)) @@ -314,7 +363,8 @@ def test_timeline_size_metrics(neon_simple_env: NeonEnv): matches = re.search( f'^pageserver_current_logical_size{{tenant_id="{env.initial_tenant.hex}",timeline_id="{new_timeline_id.hex}"}} (\\S+)$', metrics, - re.MULTILINE) + re.MULTILINE, + ) assert matches tl_logical_size_metric = int(matches.group(1)) @@ -341,7 +391,7 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv): def get_timeline_physical_size(timeline: UUID): res = client.timeline_detail(tenant, timeline) - return res['local']['current_physical_size_non_incremental'] + return res["local"]["current_physical_size_non_incremental"] timeline_total_size = get_timeline_physical_size(timeline) for i in range(10): @@ -350,10 +400,12 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv): timeline = env.neon_cli.create_branch(f"test_tenant_physical_size_{i}", tenant_id=tenant) pg = env.postgres.create_start(f"test_tenant_physical_size_{i}", tenant_id=tenant) - pg.safe_psql_many([ - "CREATE TABLE foo (t text)", - f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g", - ]) + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text)", + f"INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, {n_rows}) g", + ] + ) wait_for_last_flush_lsn(env, pg, tenant, timeline) env.pageserver.safe_psql(f"checkpoint {tenant.hex} {timeline.hex}") @@ -362,7 +414,7 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv): pg.stop() - tenant_physical_size = int(client.tenant_status(tenant_id=tenant)['current_physical_size']) + tenant_physical_size = int(client.tenant_status(tenant_id=tenant)["current_physical_size"]) assert tenant_physical_size == timeline_total_size @@ -372,6 +424,8 @@ def assert_physical_size(env: NeonEnv, tenant_id: UUID, timeline_id: UUID): client = env.pageserver.http_client() res = assert_timeline_local(client, tenant_id, timeline_id) timeline_path = env.timeline_dir(tenant_id, timeline_id) - assert res["local"]["current_physical_size"] == res["local"][ - "current_physical_size_non_incremental"] + assert ( + res["local"]["current_physical_size"] + == res["local"]["current_physical_size_non_incremental"] + ) assert res["local"]["current_physical_size"] == get_timeline_dir_size(timeline_path) diff --git a/test_runner/batch_others/test_twophase.py b/test_runner/batch_others/test_twophase.py index 04e3d0b7bc..e01ba7caef 100644 --- a/test_runner/batch_others/test_twophase.py +++ b/test_runner/batch_others/test_twophase.py @@ -1,7 +1,7 @@ import os -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv # @@ -10,37 +10,37 @@ from fixtures.log_helper import log def test_twophase(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_twophase", "empty") - pg = env.postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5']) + pg = env.postgres.create_start("test_twophase", config_lines=["max_prepared_transactions=5"]) log.info("postgres is running on 'test_twophase' branch") conn = pg.connect() cur = conn.cursor() - cur.execute('CREATE TABLE foo (t text)') + cur.execute("CREATE TABLE foo (t text)") # Prepare a transaction that will insert a row - cur.execute('BEGIN') + cur.execute("BEGIN") cur.execute("INSERT INTO foo VALUES ('one')") cur.execute("PREPARE TRANSACTION 'insert_one'") # Prepare another transaction that will insert a row - cur.execute('BEGIN') + cur.execute("BEGIN") cur.execute("INSERT INTO foo VALUES ('two')") cur.execute("PREPARE TRANSACTION 'insert_two'") # Prepare a transaction that will insert a row - cur.execute('BEGIN') + cur.execute("BEGIN") cur.execute("INSERT INTO foo VALUES ('three')") cur.execute("PREPARE TRANSACTION 'insert_three'") # Prepare another transaction that will insert a row - cur.execute('BEGIN') + cur.execute("BEGIN") cur.execute("INSERT INTO foo VALUES ('four')") cur.execute("PREPARE TRANSACTION 'insert_four'") # On checkpoint state data copied to files in # pg_twophase directory and fsynced - cur.execute('CHECKPOINT') + cur.execute("CHECKPOINT") twophase_files = os.listdir(pg.pg_twophase_dir_path()) log.info(twophase_files) @@ -48,7 +48,7 @@ def test_twophase(neon_simple_env: NeonEnv): cur.execute("COMMIT PREPARED 'insert_three'") cur.execute("ROLLBACK PREPARED 'insert_four'") - cur.execute('CHECKPOINT') + cur.execute("CHECKPOINT") twophase_files = os.listdir(pg.pg_twophase_dir_path()) log.info(twophase_files) @@ -59,8 +59,8 @@ def test_twophase(neon_simple_env: NeonEnv): # Start compute on the new branch pg2 = env.postgres.create_start( - 'test_twophase_prepared', - config_lines=['max_prepared_transactions=5'], + "test_twophase_prepared", + config_lines=["max_prepared_transactions=5"], ) # Check that we restored only needed twophase files @@ -76,9 +76,9 @@ def test_twophase(neon_simple_env: NeonEnv): cur2.execute("COMMIT PREPARED 'insert_one'") cur2.execute("ROLLBACK PREPARED 'insert_two'") - cur2.execute('SELECT * FROM foo') - assert cur2.fetchall() == [('one', ), ('three', )] + cur2.execute("SELECT * FROM foo") + assert cur2.fetchall() == [("one",), ("three",)] # Only one committed insert is visible on the original branch - cur.execute('SELECT * FROM foo') - assert cur.fetchall() == [('three', )] + cur.execute("SELECT * FROM foo") + assert cur.fetchall() == [("three",)] diff --git a/test_runner/batch_others/test_vm_bits.py b/test_runner/batch_others/test_vm_bits.py index 29b55f5b8c..c147c6dff5 100644 --- a/test_runner/batch_others/test_vm_bits.py +++ b/test_runner/batch_others/test_vm_bits.py @@ -1,5 +1,5 @@ -from fixtures.neon_fixtures import NeonEnv from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv # @@ -10,48 +10,50 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): env = neon_simple_env env.neon_cli.create_branch("test_vm_bit_clear", "empty") - pg = env.postgres.create_start('test_vm_bit_clear') + pg = env.postgres.create_start("test_vm_bit_clear") log.info("postgres is running on 'test_vm_bit_clear' branch") pg_conn = pg.connect() cur = pg_conn.cursor() # Install extension containing function needed for test - cur.execute('CREATE EXTENSION neon_test_utils') + cur.execute("CREATE EXTENSION neon_test_utils") # Create a test table and freeze it to set the VM bit. - cur.execute('CREATE TABLE vmtest_delete (id integer PRIMARY KEY)') - cur.execute('INSERT INTO vmtest_delete VALUES (1)') - cur.execute('VACUUM FREEZE vmtest_delete') + cur.execute("CREATE TABLE vmtest_delete (id integer PRIMARY KEY)") + cur.execute("INSERT INTO vmtest_delete VALUES (1)") + cur.execute("VACUUM FREEZE vmtest_delete") - cur.execute('CREATE TABLE vmtest_update (id integer PRIMARY KEY)') - cur.execute('INSERT INTO vmtest_update SELECT g FROM generate_series(1, 1000) g') - cur.execute('VACUUM FREEZE vmtest_update') + cur.execute("CREATE TABLE vmtest_update (id integer PRIMARY KEY)") + cur.execute("INSERT INTO vmtest_update SELECT g FROM generate_series(1, 1000) g") + cur.execute("VACUUM FREEZE vmtest_update") # DELETE and UPDATE the rows. - cur.execute('DELETE FROM vmtest_delete WHERE id = 1') - cur.execute('UPDATE vmtest_update SET id = 5000 WHERE id = 1') + cur.execute("DELETE FROM vmtest_delete WHERE id = 1") + cur.execute("UPDATE vmtest_update SET id = 5000 WHERE id = 1") # Branch at this point, to test that later env.neon_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear") # Clear the buffer cache, to force the VM page to be re-fetched from # the page server - cur.execute('SELECT clear_buffer_cache()') + cur.execute("SELECT clear_buffer_cache()") # Check that an index-only scan doesn't see the deleted row. If the # clearing of the VM bit was not replayed correctly, this would incorrectly # return deleted row. - cur.execute(''' + cur.execute( + """ set enable_seqscan=off; set enable_indexscan=on; set enable_bitmapscan=off; - ''') + """ + ) - cur.execute('SELECT * FROM vmtest_delete WHERE id = 1') - assert (cur.fetchall() == []) - cur.execute('SELECT * FROM vmtest_update WHERE id = 1') - assert (cur.fetchall() == []) + cur.execute("SELECT * FROM vmtest_delete WHERE id = 1") + assert cur.fetchall() == [] + cur.execute("SELECT * FROM vmtest_update WHERE id = 1") + assert cur.fetchall() == [] cur.close() @@ -61,19 +63,21 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): # a dirty VM page is evicted. If the VM bit was not correctly cleared by the # earlier WAL record, the full-page image hides the problem. Starting a new # server at the right point-in-time avoids that full-page image. - pg_new = env.postgres.create_start('test_vm_bit_clear_new') + pg_new = env.postgres.create_start("test_vm_bit_clear_new") log.info("postgres is running on 'test_vm_bit_clear_new' branch") pg_new_conn = pg_new.connect() cur_new = pg_new_conn.cursor() - cur_new.execute(''' + cur_new.execute( + """ set enable_seqscan=off; set enable_indexscan=on; set enable_bitmapscan=off; - ''') + """ + ) - cur_new.execute('SELECT * FROM vmtest_delete WHERE id = 1') - assert (cur_new.fetchall() == []) - cur_new.execute('SELECT * FROM vmtest_update WHERE id = 1') - assert (cur_new.fetchall() == []) + cur_new.execute("SELECT * FROM vmtest_delete WHERE id = 1") + assert cur_new.fetchall() == [] + cur_new.execute("SELECT * FROM vmtest_update WHERE id = 1") + assert cur_new.fetchall() == [] diff --git a/test_runner/batch_others/test_wal_acceptor.py b/test_runner/batch_others/test_wal_acceptor.py index d922dd0cb4..7710ef86cd 100644 --- a/test_runner/batch_others/test_wal_acceptor.py +++ b/test_runner/batch_others/test_wal_acceptor.py @@ -1,42 +1,59 @@ -import pathlib -import pytest -import random -import time import os +import pathlib +import random import shutil import signal import subprocess import sys import threading +import time import uuid - from contextlib import closing from dataclasses import dataclass, field from pathlib import Path -from fixtures.neon_fixtures import NeonPageserver, PgBin, Etcd, Postgres, RemoteStorageKind, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, available_remote_storages, neon_binpath, PgProtocol, wait_for_last_record_lsn, wait_for_upload -from fixtures.utils import get_dir_size, lsn_to_hex, lsn_from_hex, query_scalar -from fixtures.log_helper import log -from typing import List, Optional, Any +from typing import Any, List, Optional from uuid import uuid4 +import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + Etcd, + NeonEnv, + NeonEnvBuilder, + NeonPageserver, + PgBin, + PgProtocol, + PortDistributor, + Postgres, + RemoteStorageKind, + RemoteStorageUsers, + Safekeeper, + SafekeeperPort, + available_remote_storages, + neon_binpath, + wait_for_last_record_lsn, + wait_for_upload, +) +from fixtures.utils import get_dir_size, lsn_from_hex, lsn_to_hex, query_scalar -def wait_lsn_force_checkpoint(tenant_id: str, - timeline_id: str, - pg: Postgres, - ps: NeonPageserver, - pageserver_conn_options={}): - lsn = lsn_from_hex(pg.safe_psql('SELECT pg_current_wal_flush_lsn()')[0][0]) + +def wait_lsn_force_checkpoint( + tenant_id: str, timeline_id: str, pg: Postgres, ps: NeonPageserver, pageserver_conn_options={} +): + lsn = lsn_from_hex(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) log.info(f"pg_current_wal_flush_lsn is {lsn_to_hex(lsn)}, waiting for it on pageserver") auth_token = None - if 'password' in pageserver_conn_options: - auth_token = pageserver_conn_options['password'] + if "password" in pageserver_conn_options: + auth_token = pageserver_conn_options["password"] # wait for the pageserver to catch up - wait_for_last_record_lsn(ps.http_client(auth_token=auth_token), - uuid.UUID(hex=tenant_id), - uuid.UUID(hex=timeline_id), - lsn) + wait_for_last_record_lsn( + ps.http_client(auth_token=auth_token), + uuid.UUID(hex=tenant_id), + uuid.UUID(hex=timeline_id), + lsn, + ) # force checkpoint to advance remote_consistent_lsn with closing(ps.connect(**pageserver_conn_options)) as psconn: @@ -44,10 +61,12 @@ def wait_lsn_force_checkpoint(tenant_id: str, pscur.execute(f"checkpoint {tenant_id} {timeline_id}") # ensure that remote_consistent_lsn is advanced - wait_for_upload(ps.http_client(auth_token=auth_token), - uuid.UUID(hex=tenant_id), - uuid.UUID(hex=timeline_id), - lsn) + wait_for_upload( + ps.http_client(auth_token=auth_token), + uuid.UUID(hex=tenant_id), + uuid.UUID(hex=timeline_id), + lsn, + ) @dataclass @@ -89,7 +108,8 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): with env.pageserver.http_client() as pageserver_http: timeline_details = [ pageserver_http.timeline_detail( - tenant_id=tenant_id, timeline_id=branch_names_to_timeline_ids[branch_name]) + tenant_id=tenant_id, timeline_id=branch_names_to_timeline_ids[branch_name] + ) for branch_name in branch_names ] # All changes visible to pageserver (last_record_lsn) should be @@ -105,14 +125,14 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): for timeline_detail in timeline_details: timeline_id: str = timeline_detail["timeline_id"] - local_timeline_detail = timeline_detail.get('local') + local_timeline_detail = timeline_detail.get("local") if local_timeline_detail is None: log.debug(f"Timeline {timeline_id} is not present locally, skipping") continue m = TimelineMetrics( timeline_id=timeline_id, - last_record_lsn=lsn_from_hex(local_timeline_detail['last_record_lsn']), + last_record_lsn=lsn_from_hex(local_timeline_detail["last_record_lsn"]), ) for sk_m in sk_metrics: m.flush_lsns.append(sk_m.flush_lsn_inexact[(tenant_id.hex, timeline_id)]) @@ -120,14 +140,20 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): for flush_lsn, commit_lsn in zip(m.flush_lsns, m.commit_lsns): # Invariant. May be < when transaction is in progress. - assert commit_lsn <= flush_lsn, f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" + assert ( + commit_lsn <= flush_lsn + ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" # We only call collect_metrics() after a transaction is confirmed by # the compute node, which only happens after a consensus of safekeepers # has confirmed the transaction. We assume majority consensus here. - assert (2 * sum(m.last_record_lsn <= lsn - for lsn in m.flush_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" - assert (2 * sum(m.last_record_lsn <= lsn - for lsn in m.commit_lsns) > neon_env_builder.num_safekeepers), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" + assert ( + 2 * sum(m.last_record_lsn <= lsn for lsn in m.flush_lsns) + > neon_env_builder.num_safekeepers + ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" + assert ( + 2 * sum(m.last_record_lsn <= lsn for lsn in m.commit_lsns) + > neon_env_builder.num_safekeepers + ), f"timeline_id={timeline_id}, timeline_detail={timeline_detail}, sk_metrics={sk_metrics}" timeline_metrics.append(m) log.info(f"{message}: {timeline_metrics}") return timeline_metrics @@ -155,8 +181,10 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): collect_metrics("during INSERT INTO") time.sleep(1) except: - log.error("MetricsChecker's thread failed, the test will be failed on .stop() call", - exc_info=True) + log.error( + "MetricsChecker's thread failed, the test will be failed on .stop() call", + exc_info=True, + ) # We want to preserve traceback as well as the exception exc_type, exc_value, exc_tb = sys.exc_info() assert exc_type @@ -183,7 +211,7 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): # Check data for 2/3 timelines for pg in pgs[:-1]: res = pg.safe_psql("SELECT sum(key) FROM t") - assert res[0] == (5000050000, ) + assert res[0] == (5000050000,) final_m = collect_metrics("after SELECT") # Assume that LSNs (a) behave similarly in all timelines; and (b) INSERT INTO alters LSN significantly. @@ -208,8 +236,8 @@ def test_restarts(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = n_acceptors env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_restarts') - pg = env.postgres.create_start('test_safekeepers_restarts') + env.neon_cli.create_branch("test_safekeepers_restarts") + pg = env.postgres.create_start("test_safekeepers_restarts") # we rely upon autocommit after each statement # as waiting for acceptors happens there @@ -217,9 +245,9 @@ def test_restarts(neon_env_builder: NeonEnvBuilder): cur = pg_conn.cursor() failed_node = None - cur.execute('CREATE TABLE t(key int primary key, value text)') + cur.execute("CREATE TABLE t(key int primary key, value text)") for i in range(n_inserts): - cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1, )) + cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1,)) if random.random() <= fault_probability: if failed_node is None: @@ -228,7 +256,7 @@ def test_restarts(neon_env_builder: NeonEnvBuilder): else: failed_node.start() failed_node = None - assert query_scalar(cur, 'SELECT sum(key) FROM t') == 500500 + assert query_scalar(cur, "SELECT sum(key) FROM t") == 500500 # Test that safekeepers push their info to the broker and learn peer status from it @@ -238,7 +266,7 @@ def test_broker(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_broker", "main") - pg = env.postgres.create_start('test_broker') + pg = env.postgres.create_start("test_broker") pg.safe_psql("CREATE TABLE t(key int primary key, value text)") # learn neon timeline from compute @@ -260,9 +288,10 @@ def test_broker(neon_env_builder: NeonEnvBuilder): while True: stat_after = [cli.timeline_status(tenant_id, timeline_id) for cli in clients] if all( - lsn_from_hex(s_after.remote_consistent_lsn) > lsn_from_hex( - s_before.remote_consistent_lsn) for s_after, - s_before in zip(stat_after, stat_before)): + lsn_from_hex(s_after.remote_consistent_lsn) + > lsn_from_hex(s_before.remote_consistent_lsn) + for s_after, s_before in zip(stat_after, stat_before) + ): break elapsed = time.time() - started_at if elapsed > 20: @@ -273,7 +302,7 @@ def test_broker(neon_env_builder: NeonEnvBuilder): # Test that old WAL consumed by peers and pageserver is removed from safekeepers. -@pytest.mark.parametrize('auth_enabled', [False, True]) +@pytest.mark.parametrize("auth_enabled", [False, True]) def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.num_safekeepers = 2 # to advance remote_consistent_lsn @@ -281,16 +310,18 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.auth_enabled = auth_enabled env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_wal_removal') - pg = env.postgres.create_start('test_safekeepers_wal_removal') + env.neon_cli.create_branch("test_safekeepers_wal_removal") + pg = env.postgres.create_start("test_safekeepers_wal_removal") # Note: it is important to insert at least two segments, as currently # control file is synced roughly once in segment range and WAL is not # removed until all horizons are persisted. - pg.safe_psql_many([ - 'CREATE TABLE t(key int primary key, value text)', - "INSERT INTO t SELECT generate_series(1,200000), 'payload'", - ]) + pg.safe_psql_many( + [ + "CREATE TABLE t(key int primary key, value text)", + "INSERT INTO t SELECT generate_series(1,200000), 'payload'", + ] + ) tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] timeline_id = pg.safe_psql("show neon.timeline_id")[0][0] @@ -298,12 +329,12 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): # force checkpoint to advance remote_consistent_lsn pageserver_conn_options = {} if auth_enabled: - pageserver_conn_options['password'] = env.auth_keys.generate_tenant_token(tenant_id) + pageserver_conn_options["password"] = env.auth_keys.generate_tenant_token(tenant_id) wait_lsn_force_checkpoint(tenant_id, timeline_id, pg, env.pageserver, pageserver_conn_options) # We will wait for first segment removal. Make sure they exist for starter. first_segments = [ - os.path.join(sk.data_dir(), tenant_id, timeline_id, '000000010000000000000001') + os.path.join(sk.data_dir(), tenant_id, timeline_id, "000000010000000000000001") for sk in env.safekeepers ] assert all(os.path.exists(p) for p in first_segments) @@ -312,25 +343,33 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): http_cli = env.safekeepers[0].http_client() else: http_cli = env.safekeepers[0].http_client( - auth_token=env.auth_keys.generate_tenant_token(tenant_id)) + auth_token=env.auth_keys.generate_tenant_token(tenant_id) + ) http_cli_other = env.safekeepers[0].http_client( - auth_token=env.auth_keys.generate_tenant_token(uuid4().hex)) + auth_token=env.auth_keys.generate_tenant_token(uuid4().hex) + ) http_cli_noauth = env.safekeepers[0].http_client() # Pretend WAL is offloaded to s3. if auth_enabled: - old_backup_lsn = http_cli.timeline_status(tenant_id=tenant_id, - timeline_id=timeline_id).backup_lsn - assert 'FFFFFFFF/FEFFFFFF' != old_backup_lsn + old_backup_lsn = http_cli.timeline_status( + tenant_id=tenant_id, timeline_id=timeline_id + ).backup_lsn + assert "FFFFFFFF/FEFFFFFF" != old_backup_lsn for cli in [http_cli_other, http_cli_noauth]: - with pytest.raises(cli.HTTPError, match='Forbidden|Unauthorized'): - cli.record_safekeeper_info(tenant_id, - timeline_id, {'backup_lsn': 'FFFFFFFF/FEFFFFFF'}) - assert old_backup_lsn == http_cli.timeline_status(tenant_id=tenant_id, - timeline_id=timeline_id).backup_lsn - http_cli.record_safekeeper_info(tenant_id, timeline_id, {'backup_lsn': 'FFFFFFFF/FEFFFFFF'}) - assert 'FFFFFFFF/FEFFFFFF' == http_cli.timeline_status(tenant_id=tenant_id, - timeline_id=timeline_id).backup_lsn + with pytest.raises(cli.HTTPError, match="Forbidden|Unauthorized"): + cli.record_safekeeper_info( + tenant_id, timeline_id, {"backup_lsn": "FFFFFFFF/FEFFFFFF"} + ) + assert ( + old_backup_lsn + == http_cli.timeline_status(tenant_id=tenant_id, timeline_id=timeline_id).backup_lsn + ) + http_cli.record_safekeeper_info(tenant_id, timeline_id, {"backup_lsn": "FFFFFFFF/FEFFFFFF"}) + assert ( + "FFFFFFFF/FEFFFFFF" + == http_cli.timeline_status(tenant_id=tenant_id, timeline_id=timeline_id).backup_lsn + ) # wait till first segment is removed on all safekeepers started_at = time.time() @@ -355,7 +394,8 @@ def wait_segment_offload(tenant_id, timeline_id, live_sk, seg_end): elapsed = time.time() - started_at if elapsed > 30: raise RuntimeError( - f"timed out waiting {elapsed:.0f}s for segment ending at {seg_end} get offloaded") + f"timed out waiting {elapsed:.0f}s for segment ending at {seg_end} get offloaded" + ) time.sleep(0.5) @@ -364,8 +404,9 @@ def wait_wal_trim(tenant_id, timeline_id, sk, target_size): http_cli = sk.http_client() while True: tli_status = http_cli.timeline_status(tenant_id, timeline_id) - sk_wal_size = get_dir_size(os.path.join(sk.data_dir(), tenant_id, - timeline_id)) / 1024 / 1024 + sk_wal_size = ( + get_dir_size(os.path.join(sk.data_dir(), tenant_id, timeline_id)) / 1024 / 1024 + ) log.info(f"Safekeeper id={sk.id} wal_size={sk_wal_size:.2f}MB status={tli_status}") if sk_wal_size <= target_size: @@ -379,21 +420,21 @@ def wait_wal_trim(tenant_id, timeline_id, sk, target_size): time.sleep(0.5) -@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages()) +@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind): neon_env_builder.num_safekeepers = 3 neon_env_builder.enable_remote_storage( remote_storage_kind=remote_storatge_kind, - test_name='test_safekeepers_wal_backup', + test_name="test_safekeepers_wal_backup", ) neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_wal_backup') - pg = env.postgres.create_start('test_safekeepers_wal_backup') + env.neon_cli.create_branch("test_safekeepers_wal_backup") + pg = env.postgres.create_start("test_safekeepers_wal_backup") # learn neon timeline from compute tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] @@ -401,11 +442,11 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Remo pg_conn = pg.connect() cur = pg_conn.cursor() - cur.execute('create table t(key int, value text)') + cur.execute("create table t(key int, value text)") # Shut down subsequently each of safekeepers and fill a segment while sk is # down; ensure segment gets offloaded by others. - offloaded_seg_end = ['0/2000000', '0/3000000', '0/4000000'] + offloaded_seg_end = ["0/2000000", "0/3000000", "0/4000000"] for victim, seg_end in zip(env.safekeepers, offloaded_seg_end): victim.stop() # roughly fills one segment @@ -419,36 +460,36 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Remo # put one of safekeepers down again env.safekeepers[0].stop() # restart postgres - pg.stop_and_destroy().create_start('test_safekeepers_wal_backup') + pg.stop_and_destroy().create_start("test_safekeepers_wal_backup") # and ensure offloading still works with closing(pg.connect()) as conn: with conn.cursor() as cur: cur.execute("insert into t select generate_series(1,250000), 'payload'") - wait_segment_offload(tenant_id, timeline_id, env.safekeepers[1], '0/5000000') + wait_segment_offload(tenant_id, timeline_id, env.safekeepers[1], "0/5000000") -@pytest.mark.parametrize('remote_storatge_kind', available_remote_storages()) +@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind): neon_env_builder.num_safekeepers = 3 neon_env_builder.enable_remote_storage( remote_storage_kind=remote_storatge_kind, - test_name='test_s3_wal_replay', + test_name="test_s3_wal_replay", ) neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_s3_wal_replay') + env.neon_cli.create_branch("test_s3_wal_replay") env.pageserver.stop() - pageserver_tenants_dir = os.path.join(env.repo_dir, 'tenants') - pageserver_fresh_copy = os.path.join(env.repo_dir, 'tenants_fresh') + pageserver_tenants_dir = os.path.join(env.repo_dir, "tenants") + pageserver_fresh_copy = os.path.join(env.repo_dir, "tenants_fresh") log.info(f"Creating a copy of pageserver in a fresh state at {pageserver_fresh_copy}") shutil.copytree(pageserver_tenants_dir, pageserver_fresh_copy) env.pageserver.start() - pg = env.postgres.create_start('test_s3_wal_replay') + pg = env.postgres.create_start("test_s3_wal_replay") # learn neon timeline from compute tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] @@ -462,7 +503,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: R cur.execute("insert into t values (1, 'payload')") expected_sum += 1 - offloaded_seg_end = ['0/3000000'] + offloaded_seg_end = ["0/3000000"] for seg_end in offloaded_seg_end: # roughly fills two segments cur.execute("insert into t select generate_series(1,500000), 'payload'") @@ -476,28 +517,30 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: R # advance remote_consistent_lsn to trigger WAL trimming # this LSN should be less than commit_lsn, so timeline will be active=true in safekeepers, to push etcd updates env.safekeepers[0].http_client().record_safekeeper_info( - tenant_id, timeline_id, {'remote_consistent_lsn': offloaded_seg_end[-1]}) + tenant_id, timeline_id, {"remote_consistent_lsn": offloaded_seg_end[-1]} + ) for sk in env.safekeepers: # require WAL to be trimmed, so no more than one segment is left on disk wait_wal_trim(tenant_id, timeline_id, sk, 16 * 1.5) - last_lsn = query_scalar(cur, 'SELECT pg_current_wal_flush_lsn()') + last_lsn = query_scalar(cur, "SELECT pg_current_wal_flush_lsn()") pageserver_lsn = env.pageserver.http_client().timeline_detail( - uuid.UUID(tenant_id), uuid.UUID((timeline_id)))["local"]["last_record_lsn"] + uuid.UUID(tenant_id), uuid.UUID((timeline_id)) + )["local"]["last_record_lsn"] lag = lsn_from_hex(last_lsn) - lsn_from_hex(pageserver_lsn) log.info( - f'Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb' + f"Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb" ) # replace pageserver with a fresh copy pg.stop_and_destroy() env.pageserver.stop() - log.info(f'Removing current pageserver state at {pageserver_tenants_dir}') + log.info(f"Removing current pageserver state at {pageserver_tenants_dir}") shutil.rmtree(pageserver_tenants_dir) - log.info(f'Copying fresh pageserver state from {pageserver_fresh_copy}') + log.info(f"Copying fresh pageserver state from {pageserver_fresh_copy}") shutil.move(pageserver_fresh_copy, pageserver_tenants_dir) # start pageserver and wait for replay @@ -509,39 +552,43 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: R while True: elapsed = time.time() - started_at if elapsed > wait_lsn_timeout: - raise RuntimeError(f'Timed out waiting for WAL redo') + raise RuntimeError(f"Timed out waiting for WAL redo") pageserver_lsn = env.pageserver.http_client().timeline_detail( - uuid.UUID(tenant_id), uuid.UUID((timeline_id)))["local"]["last_record_lsn"] + uuid.UUID(tenant_id), uuid.UUID((timeline_id)) + )["local"]["last_record_lsn"] lag = lsn_from_hex(last_lsn) - lsn_from_hex(pageserver_lsn) if time.time() > last_debug_print + 10 or lag <= 0: last_debug_print = time.time() - log.info(f'Pageserver last_record_lsn={pageserver_lsn}; lag is {lag / 1024}kb') + log.info(f"Pageserver last_record_lsn={pageserver_lsn}; lag is {lag / 1024}kb") if lag <= 0: break time.sleep(1) - log.info(f'WAL redo took {elapsed} s') + log.info(f"WAL redo took {elapsed} s") # verify data - pg.create_start('test_s3_wal_replay') + pg.create_start("test_s3_wal_replay") assert pg.safe_psql("select sum(key) from t")[0][0] == expected_sum class ProposerPostgres(PgProtocol): """Object for running postgres without NeonEnv""" - def __init__(self, - pgdata_dir: str, - pg_bin, - timeline_id: uuid.UUID, - tenant_id: uuid.UUID, - listen_addr: str, - port: int): - super().__init__(host=listen_addr, port=port, user='cloud_admin', dbname='postgres') + + def __init__( + self, + pgdata_dir: str, + pg_bin, + timeline_id: uuid.UUID, + tenant_id: uuid.UUID, + listen_addr: str, + port: int, + ): + super().__init__(host=listen_addr, port=port, user="cloud_admin", dbname="postgres") self.pgdata_dir: str = pgdata_dir self.pg_bin: PgBin = pg_bin @@ -551,15 +598,15 @@ class ProposerPostgres(PgProtocol): self.port: int = port def pg_data_dir_path(self) -> str: - """ Path to data directory """ + """Path to data directory""" return self.pgdata_dir def config_file_path(self) -> str: - """ Path to postgresql.conf """ - return os.path.join(self.pgdata_dir, 'postgresql.conf') + """Path to postgresql.conf""" + return os.path.join(self.pgdata_dir, "postgresql.conf") def create_dir_config(self, safekeepers: str): - """ Create dir and config for running --sync-safekeepers """ + """Create dir and config for running --sync-safekeepers""" pathlib.Path(self.pg_data_dir_path()).mkdir(exist_ok=True) with open(self.config_file_path(), "w") as f: @@ -588,36 +635,36 @@ class ProposerPostgres(PgProtocol): } basepath = self.pg_bin.run_capture(command, env) - stdout_filename = basepath + '.stdout' + stdout_filename = basepath + ".stdout" - with open(stdout_filename, 'r') as stdout_f: + with open(stdout_filename, "r") as stdout_f: stdout = stdout_f.read() return stdout.strip("\n ") def initdb(self): - """ Run initdb """ + """Run initdb""" args = ["initdb", "-U", "cloud_admin", "-D", self.pg_data_dir_path()] self.pg_bin.run(args) def start(self): - """ Start postgres with pg_ctl """ + """Start postgres with pg_ctl""" log_path = os.path.join(self.pg_data_dir_path(), "pg.log") args = ["pg_ctl", "-D", self.pg_data_dir_path(), "-l", log_path, "-w", "start"] self.pg_bin.run(args) def stop(self): - """ Stop postgres with pg_ctl """ + """Stop postgres with pg_ctl""" args = ["pg_ctl", "-D", self.pg_data_dir_path(), "-m", "immediate", "-w", "stop"] self.pg_bin.run(args) # insert wal in all safekeepers and run sync on proposer -def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder, - pg_bin: PgBin, - port_distributor: PortDistributor): +def test_sync_safekeepers( + neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, port_distributor: PortDistributor +): # We don't really need the full environment for this test, just the # safekeepers would be enough. @@ -629,12 +676,9 @@ def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder, # write config for proposer pgdata_dir = os.path.join(env.repo_dir, "proposer_pgdata") - pg = ProposerPostgres(pgdata_dir, - pg_bin, - timeline_id, - tenant_id, - '127.0.0.1', - port_distributor.get_port()) + pg = ProposerPostgres( + pgdata_dir, pg_bin, timeline_id, tenant_id, "127.0.0.1", port_distributor.get_port() + ) pg.create_dir_config(env.get_safekeeper_connstrs()) # valid lsn, which is not in the segment start, nor in zero segment @@ -669,13 +713,13 @@ def test_sync_safekeepers(neon_env_builder: NeonEnvBuilder, assert all(lsn_after_sync == lsn for lsn in lsn_after_append) -@pytest.mark.parametrize('auth_enabled', [False, True]) +@pytest.mark.parametrize("auth_enabled", [False, True]) def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.auth_enabled = auth_enabled env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_timeline_status') - pg = env.postgres.create_start('test_timeline_status') + env.neon_cli.create_branch("test_timeline_status") + pg = env.postgres.create_start("test_timeline_status") wa = env.safekeepers[0] @@ -690,7 +734,8 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): wa_http_cli = wa.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id)) wa_http_cli.check_status() wa_http_cli_bad = wa.http_client( - auth_token=env.auth_keys.generate_tenant_token(uuid4().hex)) + auth_token=env.auth_keys.generate_tenant_token(uuid4().hex) + ) wa_http_cli_bad.check_status() wa_http_cli_noauth = wa.http_client() wa_http_cli_noauth.check_status() @@ -702,7 +747,7 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): if auth_enabled: for cli in [wa_http_cli_bad, wa_http_cli_noauth]: - with pytest.raises(cli.HTTPError, match='Forbidden|Unauthorized'): + with pytest.raises(cli.HTTPError, match="Forbidden|Unauthorized"): cli.timeline_status(tenant_id, timeline_id) pg.safe_psql("create table t(i int)") @@ -720,19 +765,23 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): class SafekeeperEnv: - def __init__(self, - repo_dir: Path, - port_distributor: PortDistributor, - pg_bin: PgBin, - num_safekeepers: int = 1): + def __init__( + self, + repo_dir: Path, + port_distributor: PortDistributor, + pg_bin: PgBin, + num_safekeepers: int = 1, + ): self.repo_dir = repo_dir self.port_distributor = port_distributor - self.broker = Etcd(datadir=os.path.join(self.repo_dir, "etcd"), - port=self.port_distributor.get_port(), - peer_port=self.port_distributor.get_port()) + self.broker = Etcd( + datadir=os.path.join(self.repo_dir, "etcd"), + port=self.port_distributor.get_port(), + peer_port=self.port_distributor.get_port(), + ) self.pg_bin = pg_bin self.num_safekeepers = num_safekeepers - self.bin_safekeeper = os.path.join(str(neon_binpath), 'safekeeper') + self.bin_safekeeper = os.path.join(str(neon_binpath), "safekeeper") self.safekeepers: Optional[List[subprocess.CompletedProcess[Any]]] = None self.postgres: Optional[ProposerPostgres] = None self.tenant_id: Optional[uuid.UUID] = None @@ -778,23 +827,25 @@ class SafekeeperEnv: str(i), "--broker-endpoints", self.broker.client_url(), - "--daemonize" + "--daemonize", ] log.info(f'Running command "{" ".join(args)}"') return subprocess.run(args, check=True) def get_safekeeper_connstrs(self): - return ','.join([sk_proc.args[2] for sk_proc in self.safekeepers]) + return ",".join([sk_proc.args[2] for sk_proc in self.safekeepers]) def create_postgres(self): pgdata_dir = os.path.join(self.repo_dir, "proposer_pgdata") - pg = ProposerPostgres(pgdata_dir, - self.pg_bin, - self.timeline_id, - self.tenant_id, - "127.0.0.1", - self.port_distributor.get_port()) + pg = ProposerPostgres( + pgdata_dir, + self.pg_bin, + self.timeline_id, + self.tenant_id, + "127.0.0.1", + self.port_distributor.get_port(), + ) pg.initdb() pg.create_dir_config(self.get_safekeeper_connstrs()) return pg @@ -811,7 +862,7 @@ class SafekeeperEnv: return self def __exit__(self, exc_type, exc_value, traceback): - log.info('Cleaning up all safekeeper and compute nodes') + log.info("Cleaning up all safekeeper and compute nodes") # Stop all the nodes if self.postgres is not None: @@ -821,9 +872,9 @@ class SafekeeperEnv: self.kill_safekeeper(sk_proc.args[6]) -def test_safekeeper_without_pageserver(test_output_dir: str, - port_distributor: PortDistributor, - pg_bin: PgBin): +def test_safekeeper_without_pageserver( + test_output_dir: str, port_distributor: PortDistributor, pg_bin: PgBin +): # Create the environment in the test-specific output dir repo_dir = Path(os.path.join(test_output_dir, "repo")) @@ -845,19 +896,19 @@ def test_safekeeper_without_pageserver(test_output_dir: str, def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str: - return ','.join([f'localhost:{sk.port.pg}' for sk in env.safekeepers if sk.id in sk_names]) + return ",".join([f"localhost:{sk.port.pg}" for sk in env.safekeepers if sk.id in sk_names]) def execute_payload(pg: Postgres): with closing(pg.connect()) as conn: with conn.cursor() as cur: # we rely upon autocommit after each statement # as waiting for acceptors happens there - cur.execute('CREATE TABLE IF NOT EXISTS t(key int, value text)') + cur.execute("CREATE TABLE IF NOT EXISTS t(key int, value text)") cur.execute("INSERT INTO t VALUES (0, 'something')") - sum_before = query_scalar(cur, 'SELECT SUM(key) FROM t') + sum_before = query_scalar(cur, "SELECT SUM(key) FROM t") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") - sum_after = query_scalar(cur, 'SELECT SUM(key) FROM t') + sum_after = query_scalar(cur, "SELECT SUM(key) FROM t") assert sum_after == sum_before + 5000050000 def show_statuses(safekeepers: List[Safekeeper], tenant_id: str, timeline_id: str): @@ -871,12 +922,12 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 4 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_replace_safekeeper') + env.neon_cli.create_branch("test_replace_safekeeper") log.info("Use only first 3 safekeepers") env.safekeepers[3].stop() active_safekeepers = [1, 2, 3] - pg = env.postgres.create('test_replace_safekeeper') + pg = env.postgres.create("test_replace_safekeeper") pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) pg.start() @@ -914,7 +965,7 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): show_statuses(env.safekeepers, tenant_id, timeline_id) log.info("Recreate postgres to replace failed sk1 with new sk4") - pg.stop_and_destroy().create('test_replace_safekeeper') + pg.stop_and_destroy().create("test_replace_safekeeper") active_safekeepers = [2, 3, 4] env.safekeepers[3].start() pg.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers)) @@ -934,16 +985,16 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): # of WAL segments. def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder): # used to calculate delta in collect_stats - last_lsn = .0 + last_lsn = 0.0 # returns LSN and pg_wal size, all in MB def collect_stats(pg: Postgres, cur, enable_logs=True): nonlocal last_lsn assert pg.pgdata_dir is not None - log.info('executing INSERT to generate WAL') + log.info("executing INSERT to generate WAL") current_lsn = lsn_from_hex(query_scalar(cur, "select pg_current_wal_lsn()")) / 1024 / 1024 - pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir, 'pg_wal')) / 1024 / 1024 + pg_wal_size = get_dir_size(os.path.join(pg.pgdata_dir, "pg_wal")) / 1024 / 1024 if enable_logs: log.info(f"LSN delta: {current_lsn - last_lsn} MB, current WAL size: {pg_wal_size} MB") last_lsn = current_lsn @@ -956,15 +1007,16 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_wal_deleted_after_broadcast') + env.neon_cli.create_branch("test_wal_deleted_after_broadcast") # Adjust checkpoint config to prevent keeping old WAL segments pg = env.postgres.create_start( - 'test_wal_deleted_after_broadcast', - config_lines=['min_wal_size=32MB', 'max_wal_size=32MB', 'log_checkpoints=on']) + "test_wal_deleted_after_broadcast", + config_lines=["min_wal_size=32MB", "max_wal_size=32MB", "log_checkpoints=on"], + ) pg_conn = pg.connect() cur = pg_conn.cursor() - cur.execute('CREATE TABLE t(key int, value text)') + cur.execute("CREATE TABLE t(key int, value text)") collect_stats(pg, cur) @@ -973,15 +1025,15 @@ def test_wal_deleted_after_broadcast(neon_env_builder: NeonEnvBuilder): generate_wal(cur) collect_stats(pg, cur) - log.info('executing checkpoint') - cur.execute('CHECKPOINT') + log.info("executing checkpoint") + cur.execute("CHECKPOINT") wal_size_after_checkpoint = collect_stats(pg, cur)[1] # there shouldn't be more than 2 WAL segments (but dir may have archive_status files) assert wal_size_after_checkpoint < 16 * 2.5 -@pytest.mark.parametrize('auth_enabled', [False, True]) +@pytest.mark.parametrize("auth_enabled", [False, True]) def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.num_safekeepers = 1 neon_env_builder.auth_enabled = auth_enabled @@ -989,25 +1041,25 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): # Create two tenants: one will be deleted, other should be preserved. tenant_id = env.initial_tenant.hex - timeline_id_1 = env.neon_cli.create_branch('br1').hex # Active, delete explicitly - timeline_id_2 = env.neon_cli.create_branch('br2').hex # Inactive, delete explicitly - timeline_id_3 = env.neon_cli.create_branch('br3').hex # Active, delete with the tenant - timeline_id_4 = env.neon_cli.create_branch('br4').hex # Inactive, delete with the tenant + timeline_id_1 = env.neon_cli.create_branch("br1").hex # Active, delete explicitly + timeline_id_2 = env.neon_cli.create_branch("br2").hex # Inactive, delete explicitly + timeline_id_3 = env.neon_cli.create_branch("br3").hex # Active, delete with the tenant + timeline_id_4 = env.neon_cli.create_branch("br4").hex # Inactive, delete with the tenant tenant_id_other_uuid, timeline_id_other_uuid = env.neon_cli.create_tenant() tenant_id_other = tenant_id_other_uuid.hex timeline_id_other = timeline_id_other_uuid.hex # Populate branches - pg_1 = env.postgres.create_start('br1') - pg_2 = env.postgres.create_start('br2') - pg_3 = env.postgres.create_start('br3') - pg_4 = env.postgres.create_start('br4') - pg_other = env.postgres.create_start('main', tenant_id=uuid.UUID(hex=tenant_id_other)) + pg_1 = env.postgres.create_start("br1") + pg_2 = env.postgres.create_start("br2") + pg_3 = env.postgres.create_start("br3") + pg_4 = env.postgres.create_start("br4") + pg_other = env.postgres.create_start("main", tenant_id=uuid.UUID(hex=tenant_id_other)) for pg in [pg_1, pg_2, pg_3, pg_4, pg_other]: with closing(pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('CREATE TABLE t(key int primary key)') + cur.execute("CREATE TABLE t(key int primary key)") sk = env.safekeepers[0] sk_data_dir = Path(sk.data_dir()) if not auth_enabled: @@ -1016,7 +1068,8 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): else: sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id)) sk_http_other = sk.http_client( - auth_token=env.auth_keys.generate_tenant_token(tenant_id_other)) + auth_token=env.auth_keys.generate_tenant_token(tenant_id_other) + ) sk_http_noauth = sk.http_client() assert (sk_data_dir / tenant_id / timeline_id_1).is_dir() assert (sk_data_dir / tenant_id / timeline_id_2).is_dir() @@ -1034,7 +1087,7 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): for pg in [pg_1, pg_3, pg_other]: with closing(pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('INSERT INTO t (key) VALUES (1)') + cur.execute("INSERT INTO t (key) VALUES (1)") # Remove initial tenant's br1 (active) assert sk_http.timeline_delete_force(tenant_id, timeline_id_1) == { @@ -1049,7 +1102,8 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): # Ensure repeated deletion succeeds assert sk_http.timeline_delete_force(tenant_id, timeline_id_1) == { - "dir_existed": False, "was_active": False + "dir_existed": False, + "was_active": False, } assert not (sk_data_dir / tenant_id / timeline_id_1).exists() assert (sk_data_dir / tenant_id / timeline_id_2).is_dir() @@ -1060,9 +1114,9 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): if auth_enabled: # Ensure we cannot delete the other tenant for sk_h in [sk_http, sk_http_noauth]: - with pytest.raises(sk_h.HTTPError, match='Forbidden|Unauthorized'): + with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"): assert sk_h.timeline_delete_force(tenant_id_other, timeline_id_other) - with pytest.raises(sk_h.HTTPError, match='Forbidden|Unauthorized'): + with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"): assert sk_h.tenant_delete_force(tenant_id_other) assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir() @@ -1078,7 +1132,7 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir() # Remove non-existing branch, should succeed - assert sk_http.timeline_delete_force(tenant_id, '00' * 16) == { + assert sk_http.timeline_delete_force(tenant_id, "00" * 16) == { "dir_existed": False, "was_active": False, } @@ -1107,4 +1161,4 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): sk_http_other.timeline_status(tenant_id_other, timeline_id_other) with closing(pg_other.connect()) as conn: with conn.cursor() as cur: - cur.execute('INSERT INTO t (key) VALUES (123)') + cur.execute("INSERT INTO t (key) VALUES (123)") diff --git a/test_runner/batch_others/test_wal_acceptor_async.py b/test_runner/batch_others/test_wal_acceptor_async.py index e1d3ba0919..83285e0cbe 100644 --- a/test_runner/batch_others/test_wal_acceptor_async.py +++ b/test_runner/batch_others/test_wal_acceptor_async.py @@ -1,17 +1,16 @@ import asyncio -import uuid - -import asyncpg import random import time - -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper -from fixtures.log_helper import getLogger -from fixtures.utils import lsn_from_hex, lsn_to_hex -from typing import List, Optional +import uuid from dataclasses import dataclass +from typing import List, Optional -log = getLogger('root.safekeeper_async') +import asyncpg +from fixtures.log_helper import getLogger +from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper +from fixtures.utils import lsn_from_hex, lsn_to_hex + +log = getLogger("root.safekeeper_async") class BankClient(object): @@ -21,21 +20,22 @@ class BankClient(object): self.init_amount = init_amount async def initdb(self): - await self.conn.execute('DROP TABLE IF EXISTS bank_accs') - await self.conn.execute('CREATE TABLE bank_accs(uid int primary key, amount int)') + await self.conn.execute("DROP TABLE IF EXISTS bank_accs") + await self.conn.execute("CREATE TABLE bank_accs(uid int primary key, amount int)") await self.conn.execute( - ''' + """ INSERT INTO bank_accs SELECT *, $1 FROM generate_series(0, $2) - ''', + """, self.init_amount, - self.n_accounts - 1) - await self.conn.execute('DROP TABLE IF EXISTS bank_log') - await self.conn.execute('CREATE TABLE bank_log(from_uid int, to_uid int, amount int)') + self.n_accounts - 1, + ) + await self.conn.execute("DROP TABLE IF EXISTS bank_log") + await self.conn.execute("CREATE TABLE bank_log(from_uid int, to_uid int, amount int)") async def check_invariant(self): - row = await self.conn.fetchrow('SELECT sum(amount) AS sum FROM bank_accs') - assert row['sum'] == self.n_accounts * self.init_amount + row = await self.conn.fetchrow("SELECT sum(amount) AS sum FROM bank_accs") + assert row["sum"] == self.n_accounts * self.init_amount async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount): @@ -45,17 +45,17 @@ async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount): async with conn.transaction(): await conn.execute( - 'UPDATE bank_accs SET amount = amount + ($1) WHERE uid = $2', + "UPDATE bank_accs SET amount = amount + ($1) WHERE uid = $2", amount, to_uid, ) await conn.execute( - 'UPDATE bank_accs SET amount = amount - ($1) WHERE uid = $2', + "UPDATE bank_accs SET amount = amount - ($1) WHERE uid = $2", amount, from_uid, ) await conn.execute( - 'INSERT INTO bank_log VALUES ($1, $2, $3)', + "INSERT INTO bank_log VALUES ($1, $2, $3)", from_uid, to_uid, amount, @@ -80,12 +80,12 @@ class WorkerStats(object): assert all(cnt > 0 for cnt in self.counters) progress = sum(self.counters) - log.info('All workers made {} transactions'.format(progress)) + log.info("All workers made {} transactions".format(progress)) async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accounts, max_transfer): pg_conn = await pg.connect_async() - log.debug('Started worker {}'.format(worker_id)) + log.debug("Started worker {}".format(worker_id)) while stats.running: from_uid = random.randint(0, n_accounts - 1) @@ -95,19 +95,21 @@ async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accou await bank_transfer(pg_conn, from_uid, to_uid, amount) stats.inc_progress(worker_id) - log.debug('Executed transfer({}) {} => {}'.format(amount, from_uid, to_uid)) + log.debug("Executed transfer({}) {} => {}".format(amount, from_uid, to_uid)) - log.debug('Finished worker {}'.format(worker_id)) + log.debug("Finished worker {}".format(worker_id)) await pg_conn.close() -async def wait_for_lsn(safekeeper: Safekeeper, - tenant_id: str, - timeline_id: str, - wait_lsn: str, - polling_interval=1, - timeout=60): +async def wait_for_lsn( + safekeeper: Safekeeper, + tenant_id: str, + timeline_id: str, + wait_lsn: str, + polling_interval=1, + timeout=60, +): """ Poll flush_lsn from safekeeper until it's greater or equal than provided wait_lsn. To do that, timeline_status is fetched from @@ -119,7 +121,7 @@ async def wait_for_lsn(safekeeper: Safekeeper, flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn log.info( - f'Safekeeper at port {safekeeper.port.pg} has flush_lsn {flush_lsn}, waiting for lsn {wait_lsn}' + f"Safekeeper at port {safekeeper.port.pg} has flush_lsn {flush_lsn}, waiting for lsn {wait_lsn}" ) while lsn_from_hex(wait_lsn) > lsn_from_hex(flush_lsn): @@ -131,22 +133,24 @@ async def wait_for_lsn(safekeeper: Safekeeper, await asyncio.sleep(polling_interval) flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn - log.debug(f'safekeeper port={safekeeper.port.pg} flush_lsn={flush_lsn} wait_lsn={wait_lsn}') + log.debug(f"safekeeper port={safekeeper.port.pg} flush_lsn={flush_lsn} wait_lsn={wait_lsn}") # This test will run several iterations and check progress in each of them. # On each iteration 1 acceptor is stopped, and 2 others should allow # background workers execute transactions. In the end, state should remain # consistent. -async def run_restarts_under_load(env: NeonEnv, - pg: Postgres, - acceptors: List[Safekeeper], - n_workers=10, - n_accounts=100, - init_amount=100000, - max_transfer=100, - period_time=4, - iterations=10): +async def run_restarts_under_load( + env: NeonEnv, + pg: Postgres, + acceptors: List[Safekeeper], + n_workers=10, + n_accounts=100, + init_amount=100000, + max_transfer=100, + period_time=4, + iterations=10, +): # Set timeout for this test at 5 minutes. It should be enough for test to complete, # taking into account that this timeout is checked only at the beginning of every iteration. test_timeout_at = time.monotonic() + 5 * 60 @@ -166,20 +170,21 @@ async def run_restarts_under_load(env: NeonEnv, workers.append(asyncio.create_task(worker)) for it in range(iterations): - assert time.monotonic() < test_timeout_at, 'test timed out' + assert time.monotonic() < test_timeout_at, "test timed out" victim_idx = it % len(acceptors) victim = acceptors[victim_idx] victim.stop() - flush_lsn = await pg_conn.fetchval('SELECT pg_current_wal_flush_lsn()') + flush_lsn = await pg_conn.fetchval("SELECT pg_current_wal_flush_lsn()") flush_lsn = lsn_to_hex(flush_lsn) - log.info(f'Postgres flush_lsn {flush_lsn}') + log.info(f"Postgres flush_lsn {flush_lsn}") pageserver_lsn = env.pageserver.http_client().timeline_detail( - uuid.UUID(tenant_id), uuid.UUID((timeline_id)))["local"]["last_record_lsn"] + uuid.UUID(tenant_id), uuid.UUID((timeline_id)) + )["local"]["last_record_lsn"] sk_ps_lag = lsn_from_hex(flush_lsn) - lsn_from_hex(pageserver_lsn) - log.info(f'Pageserver last_record_lsn={pageserver_lsn} lag={sk_ps_lag / 1024}kb') + log.info(f"Pageserver last_record_lsn={pageserver_lsn} lag={sk_ps_lag / 1024}kb") # Wait until alive safekeepers catch up with postgres for idx, safekeeper in enumerate(acceptors): @@ -193,7 +198,7 @@ async def run_restarts_under_load(env: NeonEnv, victim.start() - log.info('Iterations are finished, exiting coroutines...') + log.info("Iterations are finished, exiting coroutines...") stats.running = False # await all workers await asyncio.gather(*workers) @@ -207,10 +212,11 @@ def test_restarts_under_load(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_restarts_under_load') + env.neon_cli.create_branch("test_safekeepers_restarts_under_load") # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long - pg = env.postgres.create_start('test_safekeepers_restarts_under_load', - config_lines=['max_replication_write_lag=1MB']) + pg = env.postgres.create_start( + "test_safekeepers_restarts_under_load", config_lines=["max_replication_write_lag=1MB"] + ) asyncio.run(run_restarts_under_load(env, pg, env.safekeepers)) @@ -222,15 +228,17 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_restarts_frequent_checkpoints') + env.neon_cli.create_branch("test_restarts_frequent_checkpoints") # Enable backpressure with 1MB maximal lag, because we don't want to block on `wait_for_lsn()` for too long - pg = env.postgres.create_start('test_restarts_frequent_checkpoints', - config_lines=[ - 'max_replication_write_lag=1MB', - 'min_wal_size=32MB', - 'max_wal_size=32MB', - 'log_checkpoints=on' - ]) + pg = env.postgres.create_start( + "test_restarts_frequent_checkpoints", + config_lines=[ + "max_replication_write_lag=1MB", + "min_wal_size=32MB", + "max_wal_size=32MB", + "log_checkpoints=on", + ], + ) # we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments # are not removed before broadcasted to all safekeepers, with the help of replication slot @@ -244,51 +252,51 @@ def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]): port=env.port_distributor.get_port(), # In these tests compute has high probability of terminating on its own # before our stop() due to lost consensus leadership. - check_stop_result=False) + check_stop_result=False, + ) # embed current time in node name - node_name = pgdir_name or f'pg_node_{time.time()}' - return pg.create_start(branch_name=branch, - node_name=node_name, - config_lines=['log_statement=all']) + node_name = pgdir_name or f"pg_node_{time.time()}" + return pg.create_start( + branch_name=branch, node_name=node_name, config_lines=["log_statement=all"] + ) -async def exec_compute_query(env: NeonEnv, - branch: str, - query: str, - pgdir_name: Optional[str] = None): +async def exec_compute_query( + env: NeonEnv, branch: str, query: str, pgdir_name: Optional[str] = None +): with postgres_create_start(env, branch=branch, pgdir_name=pgdir_name) as pg: before_conn = time.time() conn = await pg.connect_async() res = await conn.fetch(query) await conn.close() after_conn = time.time() - log.info(f'{query} took {after_conn - before_conn}s') + log.info(f"{query} took {after_conn - before_conn}s") return res -async def run_compute_restarts(env: NeonEnv, - queries=16, - batch_insert=10000, - branch='test_compute_restarts'): +async def run_compute_restarts( + env: NeonEnv, queries=16, batch_insert=10000, branch="test_compute_restarts" +): cnt = 0 sum = 0 - await exec_compute_query(env, branch, 'CREATE TABLE t (i int)') + await exec_compute_query(env, branch, "CREATE TABLE t (i int)") for i in range(queries): if i % 4 == 0: await exec_compute_query( - env, branch, f'INSERT INTO t SELECT 1 FROM generate_series(1, {batch_insert})') + env, branch, f"INSERT INTO t SELECT 1 FROM generate_series(1, {batch_insert})" + ) sum += batch_insert cnt += batch_insert elif (i % 4 == 1) or (i % 4 == 3): # Note that select causes lots of FPI's and increases probability of safekeepers # standing at different LSNs after compute termination. - actual_sum = (await exec_compute_query(env, branch, 'SELECT SUM(i) FROM t'))[0][0] - assert actual_sum == sum, f'Expected sum={sum}, actual={actual_sum}' + actual_sum = (await exec_compute_query(env, branch, "SELECT SUM(i) FROM t"))[0][0] + assert actual_sum == sum, f"Expected sum={sum}, actual={actual_sum}" elif i % 4 == 2: - await exec_compute_query(env, branch, 'UPDATE t SET i = i + 1') + await exec_compute_query(env, branch, "UPDATE t SET i = i + 1") sum += cnt @@ -297,7 +305,7 @@ def test_compute_restarts(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_compute_restarts') + env.neon_cli.create_branch("test_compute_restarts") asyncio.run(run_compute_restarts(env)) @@ -315,7 +323,7 @@ class BackgroundCompute(object): async def run(self): if self.running: - raise Exception('BackgroundCompute is already running') + raise Exception("BackgroundCompute is already running") self.running = True i = 0 @@ -327,17 +335,17 @@ class BackgroundCompute(object): res = await exec_compute_query( self.env, self.branch, - f'INSERT INTO query_log(index, verify_key) VALUES ({self.index}, {verify_key}) RETURNING verify_key', - pgdir_name=f'bgcompute{self.index}_key{verify_key}', + f"INSERT INTO query_log(index, verify_key) VALUES ({self.index}, {verify_key}) RETURNING verify_key", + pgdir_name=f"bgcompute{self.index}_key{verify_key}", ) - log.info(f'result: {res}') + log.info(f"result: {res}") if len(res) != 1: - raise Exception('No result returned') + raise Exception("No result returned") if res[0][0] != verify_key: - raise Exception('Wrong result returned') + raise Exception("Wrong result returned") self.successful_queries.append(verify_key) except Exception as e: - log.info(f'BackgroundCompute {self.index} query failed: {e}') + log.info(f"BackgroundCompute {self.index} query failed: {e}") # With less sleep, there is a very big chance of not committing # anything or only 1 xact during test run. @@ -345,14 +353,12 @@ class BackgroundCompute(object): self.running = False -async def run_concurrent_computes(env: NeonEnv, - num_computes=10, - run_seconds=20, - branch='test_concurrent_computes'): +async def run_concurrent_computes( + env: NeonEnv, num_computes=10, run_seconds=20, branch="test_concurrent_computes" +): await exec_compute_query( - env, - branch, - 'CREATE TABLE query_log (t timestamp default now(), index int, verify_key int)') + env, branch, "CREATE TABLE query_log (t timestamp default now(), index int, verify_key int)" + ) computes = [BackgroundCompute(i, env, branch) for i in range(num_computes)] background_tasks = [asyncio.create_task(compute.run()) for compute in computes] @@ -367,13 +373,17 @@ async def run_concurrent_computes(env: NeonEnv, # work for some time with only one compute -- it should be able to make some xacts TIMEOUT_SECONDS = computes[0].MAX_QUERY_GAP_SECONDS + 3 initial_queries_by_0 = len(computes[0].successful_queries) - log.info(f'Waiting for another query by computes[0], ' - f'it already had {initial_queries_by_0}, timeout is {TIMEOUT_SECONDS}s') + log.info( + f"Waiting for another query by computes[0], " + f"it already had {initial_queries_by_0}, timeout is {TIMEOUT_SECONDS}s" + ) for _ in range(10 * TIMEOUT_SECONDS): current_queries_by_0 = len(computes[0].successful_queries) - initial_queries_by_0 if current_queries_by_0 >= 1: - log.info(f'Found {current_queries_by_0} successful queries ' - f'by computes[0], completing the test') + log.info( + f"Found {current_queries_by_0} successful queries " + f"by computes[0], completing the test" + ) break await asyncio.sleep(0.1) else: @@ -382,12 +392,14 @@ async def run_concurrent_computes(env: NeonEnv, await asyncio.gather(background_tasks[0]) - result = await exec_compute_query(env, branch, 'SELECT * FROM query_log') + result = await exec_compute_query(env, branch, "SELECT * FROM query_log") # we should have inserted something while single compute was running - log.info(f'Executed {len(result)} queries, {current_queries_by_0} of them ' - f'by computes[0] after we started stopping the others') + log.info( + f"Executed {len(result)} queries, {current_queries_by_0} of them " + f"by computes[0] after we started stopping the others" + ) for row in result: - log.info(f'{row[0]} {row[1]} {row[2]}') + log.info(f"{row[0]} {row[1]} {row[2]}") # ensure everything reported as committed wasn't lost for compute in computes: @@ -402,16 +414,15 @@ def test_concurrent_computes(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_concurrent_computes') + env.neon_cli.create_branch("test_concurrent_computes") asyncio.run(run_concurrent_computes(env)) # Stop safekeeper and check that query cannot be executed while safekeeper is down. # Query will insert a single row into a table. -async def check_unavailability(sk: Safekeeper, - conn: asyncpg.Connection, - key: int, - start_delay_sec: int = 2): +async def check_unavailability( + sk: Safekeeper, conn: asyncpg.Connection, key: int, start_delay_sec: int = 2 +): # shutdown one of two acceptors, that is, majority sk.stop() @@ -431,7 +442,7 @@ async def run_unavailability(env: NeonEnv, pg: Postgres): conn = await pg.connect_async() # check basic work with table - await conn.execute('CREATE TABLE t(key int primary key, value text)') + await conn.execute("CREATE TABLE t(key int primary key, value text)") await conn.execute("INSERT INTO t values (1, 'payload')") # stop safekeeper and check that query cannot be executed while safekeeper is down @@ -443,7 +454,7 @@ async def run_unavailability(env: NeonEnv, pg: Postgres): # check that we can execute queries after restart await conn.execute("INSERT INTO t values (4, 'payload')") - result_sum = await conn.fetchval('SELECT sum(key) FROM t') + result_sum = await conn.fetchval("SELECT sum(key) FROM t") assert result_sum == 10 @@ -452,8 +463,8 @@ def test_unavailability(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 2 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_unavailability') - pg = env.postgres.create_start('test_safekeepers_unavailability') + env.neon_cli.create_branch("test_safekeepers_unavailability") + pg = env.postgres.create_start("test_safekeepers_unavailability") asyncio.run(run_unavailability(env, pg)) @@ -473,20 +484,20 @@ async def xmas_garland(safekeepers: List[Safekeeper], data: RaceConditionTest): if random.random() >= 0.5: victims.append(sk) log.info( - f'Iteration {data.iteration}: stopping {list(map(lambda sk: sk.id, victims))} safekeepers' + f"Iteration {data.iteration}: stopping {list(map(lambda sk: sk.id, victims))} safekeepers" ) for v in victims: v.stop() await asyncio.sleep(1) for v in victims: v.start() - log.info(f'Iteration {data.iteration} finished') + log.info(f"Iteration {data.iteration} finished") await asyncio.sleep(1) async def run_race_conditions(env: NeonEnv, pg: Postgres): conn = await pg.connect_async() - await conn.execute('CREATE TABLE t(key int primary key, value text)') + await conn.execute("CREATE TABLE t(key int primary key, value text)") data = RaceConditionTest(0, False) bg_xmas = asyncio.create_task(xmas_garland(env.safekeepers, data)) @@ -501,9 +512,9 @@ async def run_race_conditions(env: NeonEnv, pg: Postgres): expected_sum += i i += 1 - log.info(f'Executed {i-1} queries') + log.info(f"Executed {i-1} queries") - res = await conn.fetchval('SELECT sum(key) FROM t') + res = await conn.fetchval("SELECT sum(key) FROM t") assert res == expected_sum data.is_stopped = True @@ -516,8 +527,8 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_safekeepers_race_conditions') - pg = env.postgres.create_start('test_safekeepers_race_conditions') + env.neon_cli.create_branch("test_safekeepers_race_conditions") + pg = env.postgres.create_start("test_safekeepers_race_conditions") asyncio.run(run_race_conditions(env, pg)) @@ -527,13 +538,15 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder): async def run_wal_lagging(env: NeonEnv, pg: Postgres): def safekeepers_guc(env: NeonEnv, active_sk: List[bool]) -> str: # use ports 10, 11 and 12 to simulate unavailable safekeepers - return ','.join([ - f'localhost:{sk.port.pg if active else 10 + i}' - for i, (sk, active) in enumerate(zip(env.safekeepers, active_sk)) - ]) + return ",".join( + [ + f"localhost:{sk.port.pg if active else 10 + i}" + for i, (sk, active) in enumerate(zip(env.safekeepers, active_sk)) + ] + ) conn = await pg.connect_async() - await conn.execute('CREATE TABLE t(key int primary key, value text)') + await conn.execute("CREATE TABLE t(key int primary key, value text)") await conn.close() pg.stop() @@ -552,7 +565,7 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres): continue pg.adjust_for_safekeepers(safekeepers_guc(env, active_sk)) - log.info(f'Iteration {it}: {active_sk}') + log.info(f"Iteration {it}: {active_sk}") pg.start() conn = await pg.connect_async() @@ -569,9 +582,9 @@ async def run_wal_lagging(env: NeonEnv, pg: Postgres): pg.start() conn = await pg.connect_async() - log.info(f'Executed {i-1} queries') + log.info(f"Executed {i-1} queries") - res = await conn.fetchval('SELECT sum(key) FROM t') + res = await conn.fetchval("SELECT sum(key) FROM t") assert res == expected_sum @@ -581,7 +594,7 @@ def test_wal_lagging(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - env.neon_cli.create_branch('test_wal_lagging') - pg = env.postgres.create_start('test_wal_lagging') + env.neon_cli.create_branch("test_wal_lagging") + pg = env.postgres.create_start("test_wal_lagging") asyncio.run(run_wal_lagging(env, pg)) diff --git a/test_runner/batch_others/test_wal_restore.py b/test_runner/batch_others/test_wal_restore.py index 809e942415..0847b5a505 100644 --- a/test_runner/batch_others/test_wal_restore.py +++ b/test_runner/batch_others/test_wal_restore.py @@ -1,33 +1,39 @@ import os from pathlib import Path -from fixtures.neon_fixtures import (NeonEnvBuilder, - VanillaPostgres, - PortDistributor, - PgBin, - base_dir, - pg_distrib_dir) +from fixtures.neon_fixtures import ( + NeonEnvBuilder, + PgBin, + PortDistributor, + VanillaPostgres, + base_dir, + pg_distrib_dir, +) -def test_wal_restore(neon_env_builder: NeonEnvBuilder, - pg_bin: PgBin, - test_output_dir: Path, - port_distributor: PortDistributor): +def test_wal_restore( + neon_env_builder: NeonEnvBuilder, + pg_bin: PgBin, + test_output_dir: Path, + port_distributor: PortDistributor, +): env = neon_env_builder.init_start() env.neon_cli.create_branch("test_wal_restore") - pg = env.postgres.create_start('test_wal_restore') + pg = env.postgres.create_start("test_wal_restore") pg.safe_psql("create table t as select generate_series(1,300000)") tenant_id = pg.safe_psql("show neon.tenant_id")[0][0] env.neon_cli.pageserver_stop() port = port_distributor.get_port() - data_dir = test_output_dir / 'pgsql.restored' + data_dir = test_output_dir / "pgsql.restored" with VanillaPostgres(data_dir, PgBin(test_output_dir), port) as restored: - pg_bin.run_capture([ - os.path.join(base_dir, 'libs/utils/scripts/restore_from_wal.sh'), - os.path.join(pg_distrib_dir, 'bin'), - str(test_output_dir / 'repo' / 'safekeepers' / 'sk1' / str(tenant_id) / '*'), - str(data_dir), - str(port) - ]) + pg_bin.run_capture( + [ + os.path.join(base_dir, "libs/utils/scripts/restore_from_wal.sh"), + os.path.join(pg_distrib_dir, "bin"), + str(test_output_dir / "repo" / "safekeepers" / "sk1" / str(tenant_id) / "*"), + str(data_dir), + str(port), + ] + ) restored.start() - assert restored.safe_psql('select count(*) from t', user='cloud_admin') == [(300000, )] + assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)] diff --git a/test_runner/batch_pg_regress/test_isolation.py b/test_runner/batch_pg_regress/test_isolation.py index 0124459440..7127a069b0 100644 --- a/test_runner/batch_pg_regress/test_isolation.py +++ b/test_runner/batch_pg_regress/test_isolation.py @@ -1,5 +1,6 @@ import os from pathlib import Path + import pytest from fixtures.neon_fixtures import NeonEnv, base_dir, pg_distrib_dir @@ -13,33 +14,33 @@ def test_isolation(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, caps env.neon_cli.create_branch("test_isolation", "empty") # Connect to postgres and create a database called "regression". # isolation tests use prepared transactions, so enable them - pg = env.postgres.create_start('test_isolation', config_lines=['max_prepared_transactions=100']) - pg.safe_psql('CREATE DATABASE isolation_regression') + pg = env.postgres.create_start("test_isolation", config_lines=["max_prepared_transactions=100"]) + pg.safe_psql("CREATE DATABASE isolation_regression") # Create some local directories for pg_isolation_regress to run in. - runpath = test_output_dir / 'regress' - (runpath / 'testtablespace').mkdir(parents=True) + runpath = test_output_dir / "regress" + (runpath / "testtablespace").mkdir(parents=True) # Compute all the file locations that pg_isolation_regress will need. - build_path = os.path.join(pg_distrib_dir, 'build/src/test/isolation') - src_path = os.path.join(base_dir, 'vendor/postgres/src/test/isolation') - bindir = os.path.join(pg_distrib_dir, 'bin') - schedule = os.path.join(src_path, 'isolation_schedule') - pg_isolation_regress = os.path.join(build_path, 'pg_isolation_regress') + build_path = os.path.join(pg_distrib_dir, "build/src/test/isolation") + src_path = os.path.join(base_dir, "vendor/postgres/src/test/isolation") + bindir = os.path.join(pg_distrib_dir, "bin") + schedule = os.path.join(src_path, "isolation_schedule") + pg_isolation_regress = os.path.join(build_path, "pg_isolation_regress") pg_isolation_regress_command = [ pg_isolation_regress, - '--use-existing', - '--bindir={}'.format(bindir), - '--dlpath={}'.format(build_path), - '--inputdir={}'.format(src_path), - '--schedule={}'.format(schedule), + "--use-existing", + "--bindir={}".format(bindir), + "--dlpath={}".format(build_path), + "--inputdir={}".format(src_path), + "--schedule={}".format(schedule), ] env_vars = { - 'PGPORT': str(pg.default_options['port']), - 'PGUSER': pg.default_options['user'], - 'PGHOST': pg.default_options['host'], + "PGPORT": str(pg.default_options["port"]), + "PGUSER": pg.default_options["user"], + "PGHOST": pg.default_options["host"], } # Run the command. diff --git a/test_runner/batch_pg_regress/test_neon_regress.py b/test_runner/batch_pg_regress/test_neon_regress.py index 66ea67d9f1..5f13e6b2de 100644 --- a/test_runner/batch_pg_regress/test_neon_regress.py +++ b/test_runner/batch_pg_regress/test_neon_regress.py @@ -1,11 +1,8 @@ import os from pathlib import Path -from fixtures.neon_fixtures import (NeonEnv, - check_restored_datadir_content, - base_dir, - pg_distrib_dir) from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, base_dir, check_restored_datadir_content, pg_distrib_dir def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys): @@ -13,35 +10,35 @@ def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, c env.neon_cli.create_branch("test_neon_regress", "empty") # Connect to postgres and create a database called "regression". - pg = env.postgres.create_start('test_neon_regress') - pg.safe_psql('CREATE DATABASE regression') + pg = env.postgres.create_start("test_neon_regress") + pg.safe_psql("CREATE DATABASE regression") # Create some local directories for pg_regress to run in. - runpath = test_output_dir / 'regress' - (runpath / 'testtablespace').mkdir(parents=True) + runpath = test_output_dir / "regress" + (runpath / "testtablespace").mkdir(parents=True) # Compute all the file locations that pg_regress will need. # This test runs neon specific tests - build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress') - src_path = os.path.join(base_dir, 'test_runner/neon_regress') - bindir = os.path.join(pg_distrib_dir, 'bin') - schedule = os.path.join(src_path, 'parallel_schedule') - pg_regress = os.path.join(build_path, 'pg_regress') + build_path = os.path.join(pg_distrib_dir, "build/src/test/regress") + src_path = os.path.join(base_dir, "test_runner/neon_regress") + bindir = os.path.join(pg_distrib_dir, "bin") + schedule = os.path.join(src_path, "parallel_schedule") + pg_regress = os.path.join(build_path, "pg_regress") pg_regress_command = [ pg_regress, - '--use-existing', - '--bindir={}'.format(bindir), - '--dlpath={}'.format(build_path), - '--schedule={}'.format(schedule), - '--inputdir={}'.format(src_path), + "--use-existing", + "--bindir={}".format(bindir), + "--dlpath={}".format(build_path), + "--schedule={}".format(schedule), + "--inputdir={}".format(src_path), ] log.info(pg_regress_command) env_vars = { - 'PGPORT': str(pg.default_options['port']), - 'PGUSER': pg.default_options['user'], - 'PGHOST': pg.default_options['host'], + "PGPORT": str(pg.default_options["port"]), + "PGUSER": pg.default_options["user"], + "PGHOST": pg.default_options["host"], } # Run the command. @@ -51,8 +48,8 @@ def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, c pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) # checkpoint one more time to ensure that the lsn we get is the latest one - pg.safe_psql('CHECKPOINT') - lsn = pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0] + pg.safe_psql("CHECKPOINT") + lsn = pg.safe_psql("select pg_current_wal_insert_lsn()")[0][0] # Check that we restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, pg) diff --git a/test_runner/batch_pg_regress/test_pg_regress.py b/test_runner/batch_pg_regress/test_pg_regress.py index 28066d7a32..478dbf0a91 100644 --- a/test_runner/batch_pg_regress/test_pg_regress.py +++ b/test_runner/batch_pg_regress/test_pg_regress.py @@ -1,7 +1,8 @@ import os import pathlib + import pytest -from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content, base_dir, pg_distrib_dir +from fixtures.neon_fixtures import NeonEnv, base_dir, check_restored_datadir_content, pg_distrib_dir # The pg_regress tests run for a long time, especially in debug mode, @@ -12,34 +13,34 @@ def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_ env.neon_cli.create_branch("test_pg_regress", "empty") # Connect to postgres and create a database called "regression". - pg = env.postgres.create_start('test_pg_regress') - pg.safe_psql('CREATE DATABASE regression') + pg = env.postgres.create_start("test_pg_regress") + pg.safe_psql("CREATE DATABASE regression") # Create some local directories for pg_regress to run in. - runpath = test_output_dir / 'regress' - (runpath / 'testtablespace').mkdir(parents=True) + runpath = test_output_dir / "regress" + (runpath / "testtablespace").mkdir(parents=True) # Compute all the file locations that pg_regress will need. - build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress') - src_path = os.path.join(base_dir, 'vendor/postgres/src/test/regress') - bindir = os.path.join(pg_distrib_dir, 'bin') - schedule = os.path.join(src_path, 'parallel_schedule') - pg_regress = os.path.join(build_path, 'pg_regress') + build_path = os.path.join(pg_distrib_dir, "build/src/test/regress") + src_path = os.path.join(base_dir, "vendor/postgres/src/test/regress") + bindir = os.path.join(pg_distrib_dir, "bin") + schedule = os.path.join(src_path, "parallel_schedule") + pg_regress = os.path.join(build_path, "pg_regress") pg_regress_command = [ pg_regress, '--bindir=""', - '--use-existing', - '--bindir={}'.format(bindir), - '--dlpath={}'.format(build_path), - '--schedule={}'.format(schedule), - '--inputdir={}'.format(src_path), + "--use-existing", + "--bindir={}".format(bindir), + "--dlpath={}".format(build_path), + "--schedule={}".format(schedule), + "--inputdir={}".format(src_path), ] env_vars = { - 'PGPORT': str(pg.default_options['port']), - 'PGUSER': pg.default_options['user'], - 'PGHOST': pg.default_options['host'], + "PGPORT": str(pg.default_options["port"]), + "PGUSER": pg.default_options["user"], + "PGHOST": pg.default_options["host"], } # Run the command. @@ -49,7 +50,7 @@ def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_ pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) # checkpoint one more time to ensure that the lsn we get is the latest one - pg.safe_psql('CHECKPOINT') + pg.safe_psql("CHECKPOINT") # Check that we restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, pg) diff --git a/test_runner/conftest.py b/test_runner/conftest.py index 51545d0217..8b7f6a2eea 100644 --- a/test_runner/conftest.py +++ b/test_runner/conftest.py @@ -1,5 +1,7 @@ -pytest_plugins = ("fixtures.neon_fixtures", - "fixtures.benchmark_fixture", - "fixtures.pg_stats", - "fixtures.compare_fixtures", - "fixtures.slow") +pytest_plugins = ( + "fixtures.neon_fixtures", + "fixtures.benchmark_fixture", + "fixtures.pg_stats", + "fixtures.compare_fixtures", + "fixtures.slow", +) diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py index cca4f7ce17..cec46f9f6d 100644 --- a/test_runner/fixtures/benchmark_fixture.py +++ b/test_runner/fixtures/benchmark_fixture.py @@ -10,12 +10,14 @@ import warnings from contextlib import contextmanager from datetime import datetime from pathlib import Path + # Type-related stuff from typing import Iterator, Optional import pytest from _pytest.config import Config from _pytest.terminal import TerminalReporter + """ This file contains fixtures for micro-benchmarks. @@ -112,8 +114,10 @@ class PgBenchRunResult: # pgbench v14: # initial connection time = 3.858 ms # tps = 309.281539 (without initial connection time) - if (line.startswith("tps = ") and ("(excluding connections establishing)" in line - or "(without initial connection time)")): + if line.startswith("tps = ") and ( + "(excluding connections establishing)" in line + or "(without initial connection time)" + ): tps = float(line.split()[2]) return cls( @@ -154,17 +158,21 @@ class PgBenchInitResult: last_line = stderr.splitlines()[-1] - regex = re.compile(r"done in (\d+\.\d+) s " - r"\(" - r"(?:drop tables (\d+\.\d+) s)?(?:, )?" - r"(?:create tables (\d+\.\d+) s)?(?:, )?" - r"(?:client-side generate (\d+\.\d+) s)?(?:, )?" - r"(?:vacuum (\d+\.\d+) s)?(?:, )?" - r"(?:primary keys (\d+\.\d+) s)?(?:, )?" - r"\)\.") + regex = re.compile( + r"done in (\d+\.\d+) s " + r"\(" + r"(?:drop tables (\d+\.\d+) s)?(?:, )?" + r"(?:create tables (\d+\.\d+) s)?(?:, )?" + r"(?:client-side generate (\d+\.\d+) s)?(?:, )?" + r"(?:vacuum (\d+\.\d+) s)?(?:, )?" + r"(?:primary keys (\d+\.\d+) s)?(?:, )?" + r"\)\." + ) if (m := regex.match(last_line)) is not None: - total, drop_tables, create_tables, client_side_generate, vacuum, primary_keys = [float(v) for v in m.groups() if v is not None] + total, drop_tables, create_tables, client_side_generate, vacuum, primary_keys = [ + float(v) for v in m.groups() if v is not None + ] else: raise RuntimeError(f"can't parse pgbench initialize results from `{last_line}`") @@ -185,11 +193,11 @@ class PgBenchInitResult: class MetricReport(str, enum.Enum): # str is a hack to make it json serializable # this means that this is a constant test parameter # like number of transactions, or number of clients - TEST_PARAM = 'test_param' + TEST_PARAM = "test_param" # reporter can use it to mark test runs with higher values as improvements - HIGHER_IS_BETTER = 'higher_is_better' + HIGHER_IS_BETTER = "higher_is_better" # the same but for lower values - LOWER_IS_BETTER = 'lower_is_better' + LOWER_IS_BETTER = "lower_is_better" class NeonBenchmarker: @@ -197,6 +205,7 @@ class NeonBenchmarker: An object for recording benchmark results. This is created for each test function by the zenbenchmark fixture """ + def __init__(self, property_recorder): # property recorder here is a pytest fixture provided by junitxml module # https://docs.pytest.org/en/6.2.x/reference.html#pytest.junitxml.record_property @@ -244,43 +253,57 @@ class NeonBenchmarker: ) def record_pg_bench_result(self, prefix: str, pg_bench_result: PgBenchRunResult): - self.record(f"{prefix}.number_of_clients", - pg_bench_result.number_of_clients, - '', - MetricReport.TEST_PARAM) - self.record(f"{prefix}.number_of_threads", - pg_bench_result.number_of_threads, - '', - MetricReport.TEST_PARAM) + self.record( + f"{prefix}.number_of_clients", + pg_bench_result.number_of_clients, + "", + MetricReport.TEST_PARAM, + ) + self.record( + f"{prefix}.number_of_threads", + pg_bench_result.number_of_threads, + "", + MetricReport.TEST_PARAM, + ) self.record( f"{prefix}.number_of_transactions_actually_processed", pg_bench_result.number_of_transactions_actually_processed, - '', + "", # that's because this is predefined by test matrix and doesn't change across runs report=MetricReport.TEST_PARAM, ) - self.record(f"{prefix}.latency_average", - pg_bench_result.latency_average, - unit="ms", - report=MetricReport.LOWER_IS_BETTER) + self.record( + f"{prefix}.latency_average", + pg_bench_result.latency_average, + unit="ms", + report=MetricReport.LOWER_IS_BETTER, + ) if pg_bench_result.latency_stddev is not None: - self.record(f"{prefix}.latency_stddev", - pg_bench_result.latency_stddev, - unit="ms", - report=MetricReport.LOWER_IS_BETTER) - self.record(f"{prefix}.tps", pg_bench_result.tps, '', report=MetricReport.HIGHER_IS_BETTER) - self.record(f"{prefix}.run_duration", - pg_bench_result.run_duration, - unit="s", - report=MetricReport.LOWER_IS_BETTER) - self.record(f"{prefix}.run_start_timestamp", - pg_bench_result.run_start_timestamp, - '', - MetricReport.TEST_PARAM) - self.record(f"{prefix}.run_end_timestamp", - pg_bench_result.run_end_timestamp, - '', - MetricReport.TEST_PARAM) + self.record( + f"{prefix}.latency_stddev", + pg_bench_result.latency_stddev, + unit="ms", + report=MetricReport.LOWER_IS_BETTER, + ) + self.record(f"{prefix}.tps", pg_bench_result.tps, "", report=MetricReport.HIGHER_IS_BETTER) + self.record( + f"{prefix}.run_duration", + pg_bench_result.run_duration, + unit="s", + report=MetricReport.LOWER_IS_BETTER, + ) + self.record( + f"{prefix}.run_start_timestamp", + pg_bench_result.run_start_timestamp, + "", + MetricReport.TEST_PARAM, + ) + self.record( + f"{prefix}.run_end_timestamp", + pg_bench_result.run_end_timestamp, + "", + MetricReport.TEST_PARAM, + ) def record_pg_bench_init_result(self, prefix: str, result: PgBenchInitResult): test_params = [ @@ -288,10 +311,9 @@ class NeonBenchmarker: "end_timestamp", ] for test_param in test_params: - self.record(f"{prefix}.{test_param}", - getattr(result, test_param), - '', - MetricReport.TEST_PARAM) + self.record( + f"{prefix}.{test_param}", getattr(result, test_param), "", MetricReport.TEST_PARAM + ) metrics = [ "duration", @@ -303,10 +325,9 @@ class NeonBenchmarker: ] for metric in metrics: if (value := getattr(result, metric)) is not None: - self.record(f"{prefix}.{metric}", - value, - unit="s", - report=MetricReport.LOWER_IS_BETTER) + self.record( + f"{prefix}.{metric}", value, unit="s", report=MetricReport.LOWER_IS_BETTER + ) def get_io_writes(self, pageserver) -> int: """ @@ -319,7 +340,7 @@ class NeonBenchmarker: """ Fetch the "maxrss" metric from the pageserver """ - metric_name = r'libmetrics_maxrss_kb' + metric_name = r"libmetrics_maxrss_kb" return self.get_int_counter_value(pageserver, metric_name) def get_int_counter_value(self, pageserver, metric_name) -> int: @@ -332,7 +353,7 @@ class NeonBenchmarker: # all prometheus metrics are floats. So to be pedantic, read it as a float # and round to integer. all_metrics = pageserver.http_client().get_metrics() - matches = re.search(fr'^{metric_name} (\S+)$', all_metrics, re.MULTILINE) + matches = re.search(rf"^{metric_name} (\S+)$", all_metrics, re.MULTILINE) assert matches return int(round(float(matches.group(1)))) @@ -358,10 +379,12 @@ class NeonBenchmarker: yield after = self.get_io_writes(pageserver) - self.record(metric_name, - round((after - before) / (1024 * 1024)), - "MB", - report=MetricReport.LOWER_IS_BETTER) + self.record( + metric_name, + round((after - before) / (1024 * 1024)), + "MB", + report=MetricReport.LOWER_IS_BETTER, + ) @pytest.fixture(scope="function") @@ -410,8 +433,9 @@ def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, result_entry = [] for _, recorded_property in test_report.user_properties: - terminalreporter.write("{}.{}: ".format(test_report.head_line, - recorded_property["name"])) + terminalreporter.write( + "{}.{}: ".format(test_report.head_line, recorded_property["name"]) + ) unit = recorded_property["unit"] value = recorded_property["value"] if unit == "MB": @@ -426,11 +450,13 @@ def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, result_entry.append(recorded_property) - result.append({ - "suit": test_report.nodeid, - "total_duration": test_report.duration, - "data": result_entry, - }) + result.append( + { + "suit": test_report.nodeid, + "total_duration": test_report.duration, + "data": result_entry, + } + ) out_dir = config.getoption("out_dir") if out_dir is None: @@ -442,6 +468,5 @@ def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, return get_out_path(Path(out_dir), revision=revision).write_text( - json.dumps({ - "revision": revision, "platform": platform, "result": result - }, indent=4)) + json.dumps({"revision": revision, "platform": platform, "result": result}, indent=4) + ) diff --git a/test_runner/fixtures/compare_fixtures.py b/test_runner/fixtures/compare_fixtures.py index e6c3a79697..6bca5be335 100644 --- a/test_runner/fixtures/compare_fixtures.py +++ b/test_runner/fixtures/compare_fixtures.py @@ -1,14 +1,14 @@ -import pytest -from contextlib import contextmanager from abc import ABC, abstractmethod -from fixtures.pg_stats import PgStatTable - -from fixtures.neon_fixtures import PgBin, PgProtocol, VanillaPostgres, RemotePostgres, NeonEnv -from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from contextlib import contextmanager # Type-related stuff from typing import Dict, List +import pytest +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.neon_fixtures import NeonEnv, PgBin, PgProtocol, RemotePostgres, VanillaPostgres +from fixtures.pg_stats import PgStatTable + class PgCompare(ABC): """Common interface of all postgres implementations, useful for benchmarks. @@ -16,6 +16,7 @@ class PgCompare(ABC): This class is a helper class for the neon_with_baseline fixture. See its documentation for more details. """ + @property @abstractmethod def pg(self) -> PgProtocol: @@ -61,7 +62,7 @@ class PgCompare(ABC): data = self._retrieve_pg_stats(pg_stats) for k in set(init_data) & set(data): - self.zenbenchmark.record(k, data[k] - init_data[k], '', MetricReport.HIGHER_IS_BETTER) + self.zenbenchmark.record(k, data[k] - init_data[k], "", MetricReport.HIGHER_IS_BETTER) def _retrieve_pg_stats(self, pg_stats: List[PgStatTable]) -> Dict[str, int]: results: Dict[str, int] = {} @@ -81,17 +82,16 @@ class PgCompare(ABC): class NeonCompare(PgCompare): """PgCompare interface for the neon stack.""" - def __init__(self, - zenbenchmark: NeonBenchmarker, - neon_simple_env: NeonEnv, - pg_bin: PgBin, - branch_name): + + def __init__( + self, zenbenchmark: NeonBenchmarker, neon_simple_env: NeonEnv, pg_bin: PgBin, branch_name + ): self.env = neon_simple_env self._zenbenchmark = zenbenchmark self._pg_bin = pg_bin # We only use one branch and one timeline - self.env.neon_cli.create_branch(branch_name, 'empty') + self.env.neon_cli.create_branch(branch_name, "empty") self._pg = self.env.postgres.create_start(branch_name) self.timeline = self.pg.safe_psql("SHOW neon.timeline_id")[0][0] @@ -118,32 +118,33 @@ class NeonCompare(PgCompare): self.pscur.execute(f"compact {self.env.initial_tenant.hex} {self.timeline}") def report_peak_memory_use(self) -> None: - self.zenbenchmark.record("peak_mem", - self.zenbenchmark.get_peak_mem(self.env.pageserver) / 1024, - 'MB', - report=MetricReport.LOWER_IS_BETTER) + self.zenbenchmark.record( + "peak_mem", + self.zenbenchmark.get_peak_mem(self.env.pageserver) / 1024, + "MB", + report=MetricReport.LOWER_IS_BETTER, + ) def report_size(self) -> None: - timeline_size = self.zenbenchmark.get_timeline_size(self.env.repo_dir, - self.env.initial_tenant, - self.timeline) - self.zenbenchmark.record('size', - timeline_size / (1024 * 1024), - 'MB', - report=MetricReport.LOWER_IS_BETTER) + timeline_size = self.zenbenchmark.get_timeline_size( + self.env.repo_dir, self.env.initial_tenant, self.timeline + ) + self.zenbenchmark.record( + "size", timeline_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER + ) total_files = self.zenbenchmark.get_int_counter_value( - self.env.pageserver, "pageserver_created_persistent_files_total") + self.env.pageserver, "pageserver_created_persistent_files_total" + ) total_bytes = self.zenbenchmark.get_int_counter_value( - self.env.pageserver, "pageserver_written_persistent_bytes_total") - self.zenbenchmark.record("data_uploaded", - total_bytes / (1024 * 1024), - "MB", - report=MetricReport.LOWER_IS_BETTER) - self.zenbenchmark.record("num_files_uploaded", - total_files, - "", - report=MetricReport.LOWER_IS_BETTER) + self.env.pageserver, "pageserver_written_persistent_bytes_total" + ) + self.zenbenchmark.record( + "data_uploaded", total_bytes / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER + ) + self.zenbenchmark.record( + "num_files_uploaded", total_files, "", report=MetricReport.LOWER_IS_BETTER + ) def record_pageserver_writes(self, out_name): return self.zenbenchmark.record_pageserver_writes(self.env.pageserver, out_name) @@ -154,13 +155,16 @@ class NeonCompare(PgCompare): class VanillaCompare(PgCompare): """PgCompare interface for vanilla postgres.""" + def __init__(self, zenbenchmark, vanilla_pg: VanillaPostgres): self._pg = vanilla_pg self._zenbenchmark = zenbenchmark - vanilla_pg.configure([ - 'shared_buffers=1MB', - 'synchronous_commit=off', - ]) + vanilla_pg.configure( + [ + "shared_buffers=1MB", + "synchronous_commit=off", + ] + ) vanilla_pg.start() # Long-lived cursor, useful for flushing @@ -186,16 +190,14 @@ class VanillaCompare(PgCompare): pass # TODO find something def report_size(self) -> None: - data_size = self.pg.get_subdir_size('base') - self.zenbenchmark.record('data_size', - data_size / (1024 * 1024), - 'MB', - report=MetricReport.LOWER_IS_BETTER) - wal_size = self.pg.get_subdir_size('pg_wal') - self.zenbenchmark.record('wal_size', - wal_size / (1024 * 1024), - 'MB', - report=MetricReport.LOWER_IS_BETTER) + data_size = self.pg.get_subdir_size("base") + self.zenbenchmark.record( + "data_size", data_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER + ) + wal_size = self.pg.get_subdir_size("pg_wal") + self.zenbenchmark.record( + "wal_size", wal_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER + ) @contextmanager def record_pageserver_writes(self, out_name): @@ -207,6 +209,7 @@ class VanillaCompare(PgCompare): class RemoteCompare(PgCompare): """PgCompare interface for a remote postgres instance.""" + def __init__(self, zenbenchmark, remote_pg: RemotePostgres): self._pg = remote_pg self._zenbenchmark = zenbenchmark @@ -247,18 +250,18 @@ class RemoteCompare(PgCompare): return self.zenbenchmark.record_duration(out_name) -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def neon_compare(request, zenbenchmark, pg_bin, neon_simple_env) -> NeonCompare: branch_name = request.node.name return NeonCompare(zenbenchmark, neon_simple_env, pg_bin, branch_name) -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def vanilla_compare(zenbenchmark, vanilla_pg) -> VanillaCompare: return VanillaCompare(zenbenchmark, vanilla_pg) -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def remote_compare(zenbenchmark, remote_pg) -> RemoteCompare: return RemoteCompare(zenbenchmark, remote_pg) diff --git a/test_runner/fixtures/log_helper.py b/test_runner/fixtures/log_helper.py index 7c2d83d4e3..17f2402391 100644 --- a/test_runner/fixtures/log_helper.py +++ b/test_runner/fixtures/log_helper.py @@ -1,5 +1,6 @@ import logging import logging.config + """ This file configures logging to use in python tests. Logs are automatically captured and shown in their @@ -22,20 +23,16 @@ https://docs.pytest.org/en/6.2.x/logging.html LOGGING = { "version": 1, "loggers": { - "root": { - "level": "INFO" - }, - "root.safekeeper_async": { - "level": "INFO" # a lot of logs on DEBUG level - } - } + "root": {"level": "INFO"}, + "root.safekeeper_async": {"level": "INFO"}, # a lot of logs on DEBUG level + }, } -def getLogger(name='root') -> logging.Logger: +def getLogger(name="root") -> logging.Logger: """Method to get logger for tests. - Should be used to get correctly initialized logger. """ + Should be used to get correctly initialized logger.""" return logging.getLogger(name) diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 6fc62c6ea9..6159e273c0 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -1,10 +1,10 @@ -from dataclasses import dataclass -from prometheus_client.parser import text_string_to_metric_families -from prometheus_client.samples import Sample -from typing import Dict, List from collections import defaultdict +from dataclasses import dataclass +from typing import Dict, List from fixtures.log_helper import log +from prometheus_client.parser import text_string_to_metric_families +from prometheus_client.samples import Sample class Metrics: diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 4483355c4c..388cc34182 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1,47 +1,45 @@ from __future__ import annotations -from dataclasses import field -from contextlib import contextmanager -from enum import Flag, auto -import enum -import textwrap -from cached_property import cached_property import abc -import asyncpg -import os -import boto3 -import pathlib -import uuid -import warnings -import jwt +import enum +import filecmp import json -import psycopg2 -import pytest +import os +import pathlib import re import shutil import socket import subprocess -import time -import filecmp -import tempfile import tarfile - -from contextlib import closing +import tempfile +import textwrap +import time +import uuid +import warnings +from contextlib import closing, contextmanager +from dataclasses import dataclass, field +from enum import Flag, auto from pathlib import Path -from dataclasses import dataclass +from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypeVar, Union, cast + +import allure # type: ignore +import asyncpg +import backoff # type: ignore +import boto3 +import jwt +import psycopg2 +import pytest +import requests +from cached_property import cached_property +from fixtures.log_helper import log # Type-related stuff from psycopg2.extensions import connection as PgConnection from psycopg2.extensions import make_dsn, parse_dsn -from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple from typing_extensions import Literal -import allure # type: ignore -import requests -import backoff # type: ignore +from .utils import etcd_path, get_self_dir, lsn_from_hex, lsn_to_hex, subprocess_capture -from .utils import (etcd_path, get_self_dir, subprocess_capture, lsn_from_hex, lsn_to_hex) -from fixtures.log_helper import log """ This file contains pytest fixtures. A fixture is a test resource that can be summoned by placing its name in the test's arguments. @@ -60,11 +58,11 @@ put directly-importable functions into utils.py or another separate file. """ Env = Dict[str, str] -Fn = TypeVar('Fn', bound=Callable[..., Any]) +Fn = TypeVar("Fn", bound=Callable[..., Any]) -DEFAULT_OUTPUT_DIR = 'test_output' -DEFAULT_POSTGRES_DIR = 'tmp_install' -DEFAULT_BRANCH_NAME = 'main' +DEFAULT_OUTPUT_DIR = "test_output" +DEFAULT_POSTGRES_DIR = "tmp_install" +DEFAULT_BRANCH_NAME = "main" BASE_PORT = 15000 WORKER_PORT_NUM = 1000 @@ -92,7 +90,7 @@ def check_interferring_processes(config): return # does not use -c as it is not supported on macOS - cmd = ['pgrep', 'pageserver|postgres|safekeeper'] + cmd = ["pgrep", "pageserver|postgres|safekeeper"] result = subprocess.run(cmd, stdout=subprocess.DEVNULL) if result.returncode == 0: # returncode of 0 means it found something. @@ -100,7 +98,7 @@ def check_interferring_processes(config): # result of the test. # NOTE this shows as an internal pytest error, there might be a better way raise Exception( - 'Found interfering processes running. Stop all Neon pageservers, nodes, safekeepers, as well as stand-alone Postgres.' + "Found interfering processes running. Stop all Neon pageservers, nodes, safekeepers, as well as stand-alone Postgres." ) @@ -111,18 +109,20 @@ def pytest_configure(config): """ check_interferring_processes(config) - numprocesses = config.getoption('numprocesses') - if numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768: # do not use ephemeral ports - raise Exception('Too many workers configured. Cannot distribute ports for services.') + numprocesses = config.getoption("numprocesses") + if ( + numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768 + ): # do not use ephemeral ports + raise Exception("Too many workers configured. Cannot distribute ports for services.") # find the base directory (currently this is the git root) global base_dir - base_dir = os.path.normpath(os.path.join(get_self_dir(), '../..')) - log.info(f'base_dir is {base_dir}') + base_dir = os.path.normpath(os.path.join(get_self_dir(), "../..")) + log.info(f"base_dir is {base_dir}") # Compute the top-level directory for all tests. global top_output_dir - env_test_output = os.environ.get('TEST_OUTPUT') + env_test_output = os.environ.get("TEST_OUTPUT") if env_test_output is not None: top_output_dir = env_test_output else: @@ -131,18 +131,18 @@ def pytest_configure(config): # Find the postgres installation. global pg_distrib_dir - env_postgres_bin = os.environ.get('POSTGRES_DISTRIB_DIR') + env_postgres_bin = os.environ.get("POSTGRES_DISTRIB_DIR") if env_postgres_bin: pg_distrib_dir = env_postgres_bin else: pg_distrib_dir = os.path.normpath(os.path.join(base_dir, DEFAULT_POSTGRES_DIR)) - log.info(f'pg_distrib_dir is {pg_distrib_dir}') + log.info(f"pg_distrib_dir is {pg_distrib_dir}") if os.getenv("REMOTE_ENV"): # When testing against a remote server, we only need the client binary. - if not os.path.exists(os.path.join(pg_distrib_dir, 'bin/psql')): + if not os.path.exists(os.path.join(pg_distrib_dir, "bin/psql")): raise Exception('psql not found at "{}"'.format(pg_distrib_dir)) else: - if not os.path.exists(os.path.join(pg_distrib_dir, 'bin/postgres')): + if not os.path.exists(os.path.join(pg_distrib_dir, "bin/postgres")): raise Exception('postgres not found at "{}"'.format(pg_distrib_dir)) if os.getenv("REMOTE_ENV"): @@ -151,25 +151,26 @@ def pytest_configure(config): return # Find the neon binaries. global neon_binpath - env_neon_bin = os.environ.get('NEON_BIN') + env_neon_bin = os.environ.get("NEON_BIN") if env_neon_bin: neon_binpath = env_neon_bin else: - neon_binpath = os.path.join(base_dir, 'target/debug') - log.info(f'neon_binpath is {neon_binpath}') - if not os.path.exists(os.path.join(neon_binpath, 'pageserver')): + neon_binpath = os.path.join(base_dir, "target/debug") + log.info(f"neon_binpath is {neon_binpath}") + if not os.path.exists(os.path.join(neon_binpath, "pageserver")): raise Exception('neon binaries not found at "{}"'.format(neon_binpath)) def profiling_supported(): - """Return True if the pageserver was compiled with the 'profiling' feature - """ - bin_pageserver = os.path.join(str(neon_binpath), 'pageserver') - res = subprocess.run([bin_pageserver, '--version'], - check=True, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + """Return True if the pageserver was compiled with the 'profiling' feature""" + bin_pageserver = os.path.join(str(neon_binpath), "pageserver") + res = subprocess.run( + [bin_pageserver, "--version"], + check=True, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) return "profiling:true" in res.stdout @@ -181,21 +182,21 @@ def shareable_scope(fixture_name, config) -> Literal["session", "function"]: def myfixture(...) ... """ - return 'function' if os.environ.get('TEST_SHARED_FIXTURES') is None else 'session' + return "function" if os.environ.get("TEST_SHARED_FIXTURES") is None else "session" -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def worker_seq_no(worker_id: str): # worker_id is a pytest-xdist fixture # it can be master or gw # parse it to always get a number - if worker_id == 'master': + if worker_id == "master": return 0 - assert worker_id.startswith('gw') + assert worker_id.startswith("gw") return int(worker_id[2:]) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def worker_base_port(worker_seq_no: int): # so we divide ports in ranges of 100 ports # so workers have disjoint set of ports for services @@ -247,15 +248,16 @@ class PortDistributor: return port else: raise RuntimeError( - 'port range configured for test is exhausted, consider enlarging the range') + "port range configured for test is exhausted, consider enlarging the range" + ) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def port_distributor(worker_base_port): return PortDistributor(base_port=worker_base_port, port_number=WORKER_PORT_NUM) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def default_broker(request: Any, port_distributor: PortDistributor): client_port = port_distributor.get_port() # multiple pytest sessions could get launched in parallel, get them different datadirs @@ -267,12 +269,12 @@ def default_broker(request: Any, port_distributor: PortDistributor): broker.stop() -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def run_id(): yield uuid.uuid4() -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def mock_s3_server(port_distributor: PortDistributor): mock_s3_server = MockS3Server(port_distributor.get_port()) yield mock_s3_server @@ -280,7 +282,8 @@ def mock_s3_server(port_distributor: PortDistributor): class PgProtocol: - """ Reusable connection logic """ + """Reusable connection logic""" + def __init__(self, **kwargs): self.default_options = kwargs @@ -292,18 +295,18 @@ class PgProtocol: def conn_options(self, **kwargs): result = self.default_options.copy() - if 'dsn' in kwargs: - result.update(parse_dsn(kwargs['dsn'])) + if "dsn" in kwargs: + result.update(parse_dsn(kwargs["dsn"])) result.update(kwargs) # Individual statement timeout in seconds. 2 minutes should be # enough for our tests, but if you need a longer, you can # change it by calling "SET statement_timeout" after # connecting. - options = result.get('options', '') + options = result.get("options", "") if "statement_timeout" not in options: - options = f'-cstatement_timeout=120s {options}' - result['options'] = options + options = f"-cstatement_timeout=120s {options}" + result["options"] = options return result # autocommit=True here by default because that's what we need most of the time @@ -339,19 +342,19 @@ class PgProtocol: # The psycopg2 option 'dbname' is called 'database' is asyncpg conn_options = self.conn_options(**kwargs) - if 'dbname' in conn_options: - conn_options['database'] = conn_options.pop('dbname') + if "dbname" in conn_options: + conn_options["database"] = conn_options.pop("dbname") # Convert options='-c=' to server_settings - if 'options' in conn_options: - options = conn_options.pop('options') - for match in re.finditer(r'-c(\w*)=(\w*)', options): + if "options" in conn_options: + options = conn_options.pop("options") + for match in re.finditer(r"-c(\w*)=(\w*)", options): key = match.group(1) val = match.group(2) - if 'server_options' in conn_options: - conn_options['server_settings'].update({key: val}) + if "server_options" in conn_options: + conn_options["server_settings"].update({key: val}) else: - conn_options['server_settings'] = {key: val} + conn_options["server_settings"] = {key: val} return await asyncpg.connect(**conn_options) def safe_psql(self, query: str, **kwargs: Any) -> List[Tuple[Any, ...]]: @@ -397,11 +400,9 @@ class AuthKeys: return token def generate_tenant_token(self, tenant_id): - token = jwt.encode({ - "scope": "tenant", "tenant_id": tenant_id - }, - self.priv, - algorithm="RS256") + token = jwt.encode( + {"scope": "tenant", "tenant_id": tenant_id}, self.priv, algorithm="RS256" + ) if isinstance(token, bytes): token = token.decode() @@ -416,6 +417,7 @@ class MockS3Server: Also provides a set of methods to derive the connection properties from and the method to kill the underlying server. """ + def __init__( self, port: int, @@ -425,7 +427,7 @@ class MockS3Server: # XXX: do not use `shell=True` or add `exec ` to the command here otherwise. # We use `self.subprocess.kill()` to shut down the server, which would not "just" work in Linux # if a process is started from the shell process. - self.subprocess = subprocess.Popen(['poetry', 'run', 'moto_server', 's3', f'-p{port}']) + self.subprocess = subprocess.Popen(["poetry", "run", "moto_server", "s3", f"-p{port}"]) error = None try: return_code = self.subprocess.poll() @@ -442,13 +444,13 @@ class MockS3Server: return f"http://127.0.0.1:{self.port}" def region(self) -> str: - return 'us-east-1' + return "us-east-1" def access_key(self) -> str: - return 'test' + return "test" def secret_key(self) -> str: - return 'test' + return "test" def kill(self): self.subprocess.kill() @@ -487,8 +489,8 @@ class S3Storage: def access_env_vars(self) -> Dict[str, str]: return { - 'AWS_ACCESS_KEY_ID': self.access_key, - 'AWS_SECRET_ACCESS_KEY': self.secret_key, + "AWS_ACCESS_KEY_ID": self.access_key, + "AWS_SECRET_ACCESS_KEY": self.secret_key, } @@ -528,6 +530,7 @@ class NeonEnvBuilder: created in the right directory, based on the test name, and it's properly cleaned up after the test has finished. """ + def __init__( self, repo_dir: Path, @@ -592,7 +595,7 @@ class NeonEnvBuilder: elif remote_storage_kind == RemoteStorageKind.REAL_S3: self.enable_real_s3_remote_storage(test_name=test_name, force_enable=force_enable) else: - raise RuntimeError(f'Unknown storage type: {remote_storage_kind}') + raise RuntimeError(f"Unknown storage type: {remote_storage_kind}") def enable_local_fs_remote_storage(self, force_enable=True): """ @@ -600,7 +603,7 @@ class NeonEnvBuilder: Errors, if the pageserver has some remote storage configuration already, unless `force_enable` is not set to `True`. """ assert force_enable or self.remote_storage is None, "remote storage is enabled already" - self.remote_storage = LocalFsStorage(Path(self.repo_dir / 'local_fs_remote_storage')) + self.remote_storage = LocalFsStorage(Path(self.repo_dir / "local_fs_remote_storage")) def enable_mock_s3_remote_storage(self, bucket_name: str, force_enable=True): """ @@ -613,7 +616,7 @@ class NeonEnvBuilder: mock_region = self.mock_s3_server.region() self.remote_storage_client = boto3.client( - 's3', + "s3", endpoint_url=mock_endpoint, region_name=mock_region, aws_access_key_id=self.mock_s3_server.access_key(), @@ -652,20 +655,22 @@ class NeonEnvBuilder: self.keep_remote_storage_contents = False # construct a prefix inside bucket for the particular test case and test run - self.remote_storage_prefix = f'{self.run_id}/{test_name}' + self.remote_storage_prefix = f"{self.run_id}/{test_name}" self.remote_storage_client = boto3.client( - 's3', + "s3", region_name=region, aws_access_key_id=access_key, aws_secret_access_key=secret_key, aws_session_token=session_token, ) - self.remote_storage = S3Storage(bucket_name=bucket_name, - bucket_region=region, - access_key=access_key, - secret_key=secret_key, - prefix_in_bucket=self.remote_storage_prefix) + self.remote_storage = S3Storage( + bucket_name=bucket_name, + bucket_region=region, + access_key=access_key, + secret_key=secret_key, + prefix_in_bucket=self.remote_storage_prefix, + ) def cleanup_remote_storage(self): # here wee check for true remote storage, no the local one @@ -678,26 +683,28 @@ class NeonEnvBuilder: log.info("keep_remote_storage_contents skipping remote storage cleanup") return - log.info("removing data from test s3 bucket %s by prefix %s", - self.remote_storage.bucket_name, - self.remote_storage_prefix) - paginator = self.remote_storage_client.get_paginator('list_objects_v2') + log.info( + "removing data from test s3 bucket %s by prefix %s", + self.remote_storage.bucket_name, + self.remote_storage_prefix, + ) + paginator = self.remote_storage_client.get_paginator("list_objects_v2") pages = paginator.paginate( Bucket=self.remote_storage.bucket_name, Prefix=self.remote_storage_prefix, ) - objects_to_delete = {'Objects': []} + objects_to_delete = {"Objects": []} cnt = 0 - for item in pages.search('Contents'): + for item in pages.search("Contents"): # weirdly when nothing is found it returns [None] if item is None: break - objects_to_delete['Objects'].append({'Key': item['Key']}) + objects_to_delete["Objects"].append({"Key": item["Key"]}) # flush once aws limit reached - if len(objects_to_delete['Objects']) >= 1000: + if len(objects_to_delete["Objects"]) >= 1000: self.remote_storage_client.delete_objects( Bucket=self.remote_storage.bucket_name, Delete=objects_to_delete, @@ -706,9 +713,10 @@ class NeonEnvBuilder: cnt += 1 # flush rest - if len(objects_to_delete['Objects']): - self.remote_storage_client.delete_objects(Bucket=self.remote_storage.bucket_name, - Delete=objects_to_delete) + if len(objects_to_delete["Objects"]): + self.remote_storage_client.delete_objects( + Bucket=self.remote_storage.bucket_name, Delete=objects_to_delete + ) log.info("deleted %s objects from remote storage", cnt) @@ -718,7 +726,7 @@ class NeonEnvBuilder: def __exit__(self, exc_type, exc_value, traceback): # Stop all the nodes. if self.env: - log.info('Cleaning up all storage and compute nodes') + log.info("Cleaning up all storage and compute nodes") self.env.postgres.stop_all() for sk in self.env.safekeepers: sk.stop(immediate=True) @@ -759,6 +767,7 @@ class NeonEnv: create_tenant() - initializes a new tenant in the page server, returns the tenant id """ + def __init__(self, config: NeonEnvBuilder): self.repo_dir = config.repo_dir self.rust_log_override = config.rust_log_override @@ -776,15 +785,19 @@ class NeonEnv: self.initial_tenant = uuid.uuid4() # Create a config file corresponding to the options - toml = textwrap.dedent(f""" + toml = textwrap.dedent( + f""" default_tenant_id = '{self.initial_tenant.hex}' - """) + """ + ) - toml += textwrap.dedent(f""" + toml += textwrap.dedent( + f""" [etcd_broker] broker_endpoints = ['{self.broker.client_url()}'] etcd_binary_path = '{self.broker.binary_path}' - """) + """ + ) # Create config for pageserver pageserver_port = PageserverPort( @@ -793,18 +806,20 @@ class NeonEnv: ) pageserver_auth_type = "ZenithJWT" if config.auth_enabled else "Trust" - toml += textwrap.dedent(f""" + toml += textwrap.dedent( + f""" [pageserver] id=1 listen_pg_addr = 'localhost:{pageserver_port.pg}' listen_http_addr = 'localhost:{pageserver_port.http}' auth_type = '{pageserver_auth_type}' - """) + """ + ) # Create a corresponding NeonPageserver object - self.pageserver = NeonPageserver(self, - port=pageserver_port, - config_override=config.pageserver_config_override) + self.pageserver = NeonPageserver( + self, port=pageserver_port, config_override=config.pageserver_config_override + ) # Create config and a Safekeeper object for each safekeeper for i in range(1, config.num_safekeepers + 1): @@ -813,21 +828,29 @@ class NeonEnv: http=self.port_distributor.get_port(), ) id = config.safekeepers_id_start + i # assign ids sequentially - toml += textwrap.dedent(f""" + toml += textwrap.dedent( + f""" [[safekeepers]] id = {id} pg_port = {port.pg} http_port = {port.http} - sync = {'true' if config.safekeepers_enable_fsync else 'false'}""") + sync = {'true' if config.safekeepers_enable_fsync else 'false'}""" + ) if config.auth_enabled: - toml += textwrap.dedent(f""" + toml += textwrap.dedent( + f""" auth_enabled = true - """) - if bool(self.remote_storage_users - & RemoteStorageUsers.SAFEKEEPER) and self.remote_storage is not None: - toml += textwrap.dedent(f""" + """ + ) + if ( + bool(self.remote_storage_users & RemoteStorageUsers.SAFEKEEPER) + and self.remote_storage is not None + ): + toml += textwrap.dedent( + f""" remote_storage = "{remote_storage_to_toml_inline_table(self.remote_storage)}" - """) + """ + ) safekeeper = Safekeeper(env=self, id=id, port=port) self.safekeepers.append(safekeeper) @@ -843,8 +866,8 @@ class NeonEnv: safekeeper.start() def get_safekeeper_connstrs(self) -> str: - """ Get list of safekeeper endpoints suitable for safekeepers GUC """ - return ','.join([f'localhost:{wa.port.pg}' for wa in self.safekeepers]) + """Get list of safekeeper endpoints suitable for safekeepers GUC""" + return ",".join([f"localhost:{wa.port.pg}" for wa in self.safekeepers]) def timeline_dir(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Path: """Get a timeline directory's path based on the repo directory of the test environment""" @@ -852,8 +875,8 @@ class NeonEnv: @cached_property def auth_keys(self) -> AuthKeys: - pub = (Path(self.repo_dir) / 'auth_public_key.pem').read_bytes() - priv = (Path(self.repo_dir) / 'auth_private_key.pem').read_bytes() + pub = (Path(self.repo_dir) / "auth_public_key.pem").read_bytes() + priv = (Path(self.repo_dir) / "auth_private_key.pem").read_bytes() return AuthKeys(pub=pub, priv=priv) @@ -866,11 +889,11 @@ def _shared_simple_env( run_id: uuid.UUID, ) -> Iterator[NeonEnv]: """ - # Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES - is set, this is shared by all tests using `neon_simple_env`. + # Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES + is set, this is shared by all tests using `neon_simple_env`. """ - if os.environ.get('TEST_SHARED_FIXTURES') is None: + if os.environ.get("TEST_SHARED_FIXTURES") is None: # Create the environment in the per-test output directory repo_dir = os.path.join(get_test_output_dir(request), "repo") else: @@ -879,21 +902,21 @@ def _shared_simple_env( shutil.rmtree(repo_dir, ignore_errors=True) with NeonEnvBuilder( - repo_dir=Path(repo_dir), - port_distributor=port_distributor, - broker=default_broker, - mock_s3_server=mock_s3_server, - run_id=run_id, + repo_dir=Path(repo_dir), + port_distributor=port_distributor, + broker=default_broker, + mock_s3_server=mock_s3_server, + run_id=run_id, ) as builder: env = builder.init_start() # For convenience in tests, create a branch from the freshly-initialized cluster. - env.neon_cli.create_branch('empty', ancestor_branch_name=DEFAULT_BRANCH_NAME) + env.neon_cli.create_branch("empty", ancestor_branch_name=DEFAULT_BRANCH_NAME) yield env -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]: """ Simple Neon environment, with no authentication and no safekeepers. @@ -908,7 +931,7 @@ def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]: _shared_simple_env.postgres.stop_all() -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def neon_env_builder( test_output_dir, port_distributor: PortDistributor, @@ -934,11 +957,11 @@ def neon_env_builder( # Return the builder to the caller with NeonEnvBuilder( - repo_dir=Path(repo_dir), - port_distributor=port_distributor, - mock_s3_server=mock_s3_server, - broker=default_broker, - run_id=run_id, + repo_dir=Path(repo_dir), + port_distributor=port_distributor, + mock_s3_server=mock_s3_server, + broker=default_broker, + run_id=run_id, ) as builder: yield builder @@ -954,16 +977,16 @@ class NeonPageserverHttpClient(requests.Session): self.auth_token = auth_token if auth_token is not None: - self.headers['Authorization'] = f'Bearer {auth_token}' + self.headers["Authorization"] = f"Bearer {auth_token}" def verbose_error(self, res: requests.Response): try: res.raise_for_status() except requests.RequestException as e: try: - msg = res.json()['msg'] + msg = res.json()["msg"] except: - msg = '' + msg = "" raise NeonPageserverApiException(msg) from e def check_status(self): @@ -980,12 +1003,12 @@ class NeonPageserverHttpClient(requests.Session): res = self.post( f"http://localhost:{self.port}/v1/tenant", json={ - 'new_tenant_id': new_tenant_id.hex if new_tenant_id else None, + "new_tenant_id": new_tenant_id.hex if new_tenant_id else None, }, ) self.verbose_error(res) if res.status_code == 409: - raise Exception(f'could not create tenant: already exists for id {new_tenant_id}') + raise Exception(f"could not create tenant: already exists for id {new_tenant_id}") new_tenant_id = res.json() assert isinstance(new_tenant_id, str) return uuid.UUID(new_tenant_id) @@ -1019,28 +1042,29 @@ class NeonPageserverHttpClient(requests.Session): ancestor_timeline_id: Optional[uuid.UUID] = None, ancestor_start_lsn: Optional[str] = None, ) -> Dict[Any, Any]: - res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline", - json={ - 'new_timeline_id': - new_timeline_id.hex if new_timeline_id else None, - 'ancestor_start_lsn': - ancestor_start_lsn, - 'ancestor_timeline_id': - ancestor_timeline_id.hex if ancestor_timeline_id else None, - }) + res = self.post( + f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline", + json={ + "new_timeline_id": new_timeline_id.hex if new_timeline_id else None, + "ancestor_start_lsn": ancestor_start_lsn, + "ancestor_timeline_id": ancestor_timeline_id.hex if ancestor_timeline_id else None, + }, + ) self.verbose_error(res) if res.status_code == 409: - raise Exception(f'could not create timeline: already exists for id {new_timeline_id}') + raise Exception(f"could not create timeline: already exists for id {new_timeline_id}") res_json = res.json() assert isinstance(res_json, dict) return res_json - def timeline_detail(self, - tenant_id: uuid.UUID, - timeline_id: uuid.UUID, - include_non_incremental_logical_size: bool = False, - include_non_incremental_physical_size: bool = False) -> Dict[Any, Any]: + def timeline_detail( + self, + tenant_id: uuid.UUID, + timeline_id: uuid.UUID, + include_non_incremental_logical_size: bool = False, + include_non_incremental_physical_size: bool = False, + ) -> Dict[Any, Any]: include_non_incremental_logical_size_str = "0" if include_non_incremental_logical_size: @@ -1051,9 +1075,10 @@ class NeonPageserverHttpClient(requests.Session): include_non_incremental_physical_size_str = "1" res = self.get( - f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}" + - "?include-non-incremental-logical-size={include_non_incremental_logical_size_str}" + - "&include-non-incremental-physical-size={include_non_incremental_physical_size_str}") + f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}" + + "?include-non-incremental-logical-size={include_non_incremental_logical_size_str}" + + "&include-non-incremental-physical-size={include_non_incremental_physical_size_str}" + ) self.verbose_error(res) res_json = res.json() assert isinstance(res_json, dict) @@ -1061,7 +1086,8 @@ class NeonPageserverHttpClient(requests.Session): def timeline_delete(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID): res = self.delete( - f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}") + f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}" + ) self.verbose_error(res) res_json = res.json() assert res_json is None @@ -1079,12 +1105,15 @@ class PageserverPort: http: int -CREATE_TIMELINE_ID_EXTRACTOR = re.compile(r"^Created timeline '(?P[^']+)'", - re.MULTILINE) -CREATE_TIMELINE_ID_EXTRACTOR = re.compile(r"^Created timeline '(?P[^']+)'", - re.MULTILINE) -TIMELINE_DATA_EXTRACTOR = re.compile(r"\s(?P[^\s]+)\s\[(?P[^\]]+)\]", - re.MULTILINE) +CREATE_TIMELINE_ID_EXTRACTOR = re.compile( + r"^Created timeline '(?P[^']+)'", re.MULTILINE +) +CREATE_TIMELINE_ID_EXTRACTOR = re.compile( + r"^Created timeline '(?P[^']+)'", re.MULTILINE +) +TIMELINE_DATA_EXTRACTOR = re.compile( + r"\s(?P[^\s]+)\s\[(?P[^\]]+)\]", re.MULTILINE +) class AbstractNeonCli(abc.ABC): @@ -1093,15 +1122,18 @@ class AbstractNeonCli(abc.ABC): Supports a way to run arbitrary command directly via CLI. Do not use directly, use specific subclasses instead. """ + def __init__(self, env: NeonEnv): self.env = env COMMAND: str = cast(str, None) # To be overwritten by the derived class. - def raw_cli(self, - arguments: List[str], - extra_env_vars: Optional[Dict[str, str]] = None, - check_return_code=True) -> 'subprocess.CompletedProcess[str]': + def raw_cli( + self, + arguments: List[str], + extra_env_vars: Optional[Dict[str, str]] = None, + check_return_code=True, + ) -> "subprocess.CompletedProcess[str]": """ Run the command with the specified arguments. @@ -1122,30 +1154,32 @@ class AbstractNeonCli(abc.ABC): bin_neon = os.path.join(str(neon_binpath), self.COMMAND) args = [bin_neon] + arguments - log.info('Running command "{}"'.format(' '.join(args))) + log.info('Running command "{}"'.format(" ".join(args))) log.info(f'Running in "{self.env.repo_dir}"') env_vars = os.environ.copy() - env_vars['NEON_REPO_DIR'] = str(self.env.repo_dir) - env_vars['POSTGRES_DISTRIB_DIR'] = str(pg_distrib_dir) + env_vars["NEON_REPO_DIR"] = str(self.env.repo_dir) + env_vars["POSTGRES_DISTRIB_DIR"] = str(pg_distrib_dir) if self.env.rust_log_override is not None: - env_vars['RUST_LOG'] = self.env.rust_log_override + env_vars["RUST_LOG"] = self.env.rust_log_override for (extra_env_key, extra_env_value) in (extra_env_vars or {}).items(): env_vars[extra_env_key] = extra_env_value # Pass coverage settings - var = 'LLVM_PROFILE_FILE' + var = "LLVM_PROFILE_FILE" val = os.environ.get(var) if val: env_vars[var] = val # Intercept CalledProcessError and print more info - res = subprocess.run(args, - env=env_vars, - check=False, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + res = subprocess.run( + args, + env=env_vars, + check=False, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) if not res.returncode: log.info(f"Run success: {res.stdout}") elif check_return_code: @@ -1156,10 +1190,9 @@ class AbstractNeonCli(abc.ABC): stderr: {res.stderr} """ log.info(msg) - raise Exception(msg) from subprocess.CalledProcessError(res.returncode, - res.args, - res.stdout, - res.stderr) + raise Exception(msg) from subprocess.CalledProcessError( + res.returncode, res.args, res.stdout, res.stderr + ) return res @@ -1169,12 +1202,14 @@ class NeonCli(AbstractNeonCli): Supports main commands via typed methods and a way to run arbitrary command directly via CLI. """ - COMMAND = 'neon_local' + COMMAND = "neon_local" - def create_tenant(self, - tenant_id: Optional[uuid.UUID] = None, - timeline_id: Optional[uuid.UUID] = None, - conf: Optional[Dict[str, str]] = None) -> Tuple[uuid.UUID, uuid.UUID]: + def create_tenant( + self, + tenant_id: Optional[uuid.UUID] = None, + timeline_id: Optional[uuid.UUID] = None, + conf: Optional[Dict[str, str]] = None, + ) -> Tuple[uuid.UUID, uuid.UUID]: """ Creates a new tenant, returns its id and its initial timeline's id. """ @@ -1183,13 +1218,14 @@ class NeonCli(AbstractNeonCli): if timeline_id is None: timeline_id = uuid.uuid4() if conf is None: - res = self.raw_cli([ - 'tenant', 'create', '--tenant-id', tenant_id.hex, '--timeline-id', timeline_id.hex - ]) + res = self.raw_cli( + ["tenant", "create", "--tenant-id", tenant_id.hex, "--timeline-id", timeline_id.hex] + ) else: - res = self.raw_cli([ - 'tenant', 'create', '--tenant-id', tenant_id.hex, '--timeline-id', timeline_id.hex - ] + sum(list(map(lambda kv: (['-c', kv[0] + ':' + kv[1]]), conf.items())), [])) + res = self.raw_cli( + ["tenant", "create", "--tenant-id", tenant_id.hex, "--timeline-id", timeline_id.hex] + + sum(list(map(lambda kv: (["-c", kv[0] + ":" + kv[1]]), conf.items())), []) + ) res.check_returncode() return tenant_id, timeline_id @@ -1198,27 +1234,28 @@ class NeonCli(AbstractNeonCli): Update tenant config. """ if conf is None: - res = self.raw_cli(['tenant', 'config', '--tenant-id', tenant_id.hex]) + res = self.raw_cli(["tenant", "config", "--tenant-id", tenant_id.hex]) else: res = self.raw_cli( - ['tenant', 'config', '--tenant-id', tenant_id.hex] + - sum(list(map(lambda kv: (['-c', kv[0] + ':' + kv[1]]), conf.items())), [])) + ["tenant", "config", "--tenant-id", tenant_id.hex] + + sum(list(map(lambda kv: (["-c", kv[0] + ":" + kv[1]]), conf.items())), []) + ) res.check_returncode() - def list_tenants(self) -> 'subprocess.CompletedProcess[str]': - res = self.raw_cli(['tenant', 'list']) + def list_tenants(self) -> "subprocess.CompletedProcess[str]": + res = self.raw_cli(["tenant", "list"]) res.check_returncode() return res - def create_timeline(self, - new_branch_name: str, - tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID: + def create_timeline( + self, new_branch_name: str, tenant_id: Optional[uuid.UUID] = None + ) -> uuid.UUID: cmd = [ - 'timeline', - 'create', - '--branch-name', + "timeline", + "create", + "--branch-name", new_branch_name, - '--tenant-id', + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, ] @@ -1229,17 +1266,17 @@ class NeonCli(AbstractNeonCli): created_timeline_id = None if matches is not None: - created_timeline_id = matches.group('timeline_id') + created_timeline_id = matches.group("timeline_id") return uuid.UUID(created_timeline_id) def create_root_branch(self, branch_name: str, tenant_id: Optional[uuid.UUID] = None): cmd = [ - 'timeline', - 'create', - '--branch-name', + "timeline", + "create", + "--branch-name", branch_name, - '--tenant-id', + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, ] @@ -1250,30 +1287,32 @@ class NeonCli(AbstractNeonCli): created_timeline_id = None if matches is not None: - created_timeline_id = matches.group('timeline_id') + created_timeline_id = matches.group("timeline_id") if created_timeline_id is None: - raise Exception('could not find timeline id after `neon timeline create` invocation') + raise Exception("could not find timeline id after `neon timeline create` invocation") else: return uuid.UUID(created_timeline_id) - def create_branch(self, - new_branch_name: str = DEFAULT_BRANCH_NAME, - ancestor_branch_name: Optional[str] = None, - tenant_id: Optional[uuid.UUID] = None, - ancestor_start_lsn: Optional[str] = None) -> uuid.UUID: + def create_branch( + self, + new_branch_name: str = DEFAULT_BRANCH_NAME, + ancestor_branch_name: Optional[str] = None, + tenant_id: Optional[uuid.UUID] = None, + ancestor_start_lsn: Optional[str] = None, + ) -> uuid.UUID: cmd = [ - 'timeline', - 'branch', - '--branch-name', + "timeline", + "branch", + "--branch-name", new_branch_name, - '--tenant-id', + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, ] if ancestor_branch_name is not None: - cmd.extend(['--ancestor-branch-name', ancestor_branch_name]) + cmd.extend(["--ancestor-branch-name", ancestor_branch_name]) if ancestor_start_lsn is not None: - cmd.extend(['--ancestor-start-lsn', ancestor_start_lsn]) + cmd.extend(["--ancestor-start-lsn", ancestor_start_lsn]) res = self.raw_cli(cmd) res.check_returncode() @@ -1282,10 +1321,10 @@ class NeonCli(AbstractNeonCli): created_timeline_id = None if matches is not None: - created_timeline_id = matches.group('timeline_id') + created_timeline_id = matches.group("timeline_id") if created_timeline_id is None: - raise Exception('could not find timeline id after `neon timeline create` invocation') + raise Exception("could not find timeline id after `neon timeline create` invocation") else: return uuid.UUID(created_timeline_id) @@ -1297,52 +1336,60 @@ class NeonCli(AbstractNeonCli): # (L) main [b49f7954224a0ad25cc0013ea107b54b] # (L) ┣━ @0/16B5A50: test_cli_branch_list_main [20f98c79111b9015d84452258b7d5540] res = self.raw_cli( - ['timeline', 'list', '--tenant-id', (tenant_id or self.env.initial_tenant).hex]) + ["timeline", "list", "--tenant-id", (tenant_id or self.env.initial_tenant).hex] + ) timelines_cli = sorted( - map(lambda branch_and_id: (branch_and_id[0], branch_and_id[1]), - TIMELINE_DATA_EXTRACTOR.findall(res.stdout))) + map( + lambda branch_and_id: (branch_and_id[0], branch_and_id[1]), + TIMELINE_DATA_EXTRACTOR.findall(res.stdout), + ) + ) return timelines_cli - def init(self, - config_toml: str, - initial_timeline_id: Optional[uuid.UUID] = None) -> 'subprocess.CompletedProcess[str]': - with tempfile.NamedTemporaryFile(mode='w+') as tmp: + def init( + self, config_toml: str, initial_timeline_id: Optional[uuid.UUID] = None + ) -> "subprocess.CompletedProcess[str]": + with tempfile.NamedTemporaryFile(mode="w+") as tmp: tmp.write(config_toml) tmp.flush() - cmd = ['init', f'--config={tmp.name}'] + cmd = ["init", f"--config={tmp.name}"] if initial_timeline_id: - cmd.extend(['--timeline-id', initial_timeline_id.hex]) + cmd.extend(["--timeline-id", initial_timeline_id.hex]) append_pageserver_param_overrides( params_to_update=cmd, remote_storage=self.env.remote_storage, remote_storage_users=self.env.remote_storage_users, - pageserver_config_override=self.env.pageserver.config_override) + pageserver_config_override=self.env.pageserver.config_override, + ) res = self.raw_cli(cmd) res.check_returncode() return res def pageserver_enabled_features(self) -> Any: - bin_pageserver = os.path.join(str(neon_binpath), 'pageserver') - args = [bin_pageserver, '--enabled-features'] - log.info('Running command "{}"'.format(' '.join(args))) + bin_pageserver = os.path.join(str(neon_binpath), "pageserver") + args = [bin_pageserver, "--enabled-features"] + log.info('Running command "{}"'.format(" ".join(args))) - res = subprocess.run(args, - check=True, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + res = subprocess.run( + args, + check=True, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) log.info(f"pageserver_enabled_features success: {res.stdout}") return json.loads(res.stdout) - def pageserver_start(self, overrides=()) -> 'subprocess.CompletedProcess[str]': - start_args = ['pageserver', 'start', *overrides] + def pageserver_start(self, overrides=()) -> "subprocess.CompletedProcess[str]": + start_args = ["pageserver", "start", *overrides] append_pageserver_param_overrides( params_to_update=start_args, remote_storage=self.env.remote_storage, remote_storage_users=self.env.remote_storage_users, - pageserver_config_override=self.env.pageserver.config_override) + pageserver_config_override=self.env.pageserver.config_override, + ) s3_env_vars = None if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage): @@ -1350,29 +1397,29 @@ class NeonCli(AbstractNeonCli): return self.raw_cli(start_args, extra_env_vars=s3_env_vars) - def pageserver_stop(self, immediate=False) -> 'subprocess.CompletedProcess[str]': - cmd = ['pageserver', 'stop'] + def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]": + cmd = ["pageserver", "stop"] if immediate: - cmd.extend(['-m', 'immediate']) + cmd.extend(["-m", "immediate"]) log.info(f"Stopping pageserver with {cmd}") return self.raw_cli(cmd) - def safekeeper_start(self, id: int) -> 'subprocess.CompletedProcess[str]': + def safekeeper_start(self, id: int) -> "subprocess.CompletedProcess[str]": s3_env_vars = None if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage): s3_env_vars = self.env.remote_storage.access_env_vars() - return self.raw_cli(['safekeeper', 'start', str(id)], extra_env_vars=s3_env_vars) + return self.raw_cli(["safekeeper", "start", str(id)], extra_env_vars=s3_env_vars) - def safekeeper_stop(self, - id: Optional[int] = None, - immediate=False) -> 'subprocess.CompletedProcess[str]': - args = ['safekeeper', 'stop'] + def safekeeper_stop( + self, id: Optional[int] = None, immediate=False + ) -> "subprocess.CompletedProcess[str]": + args = ["safekeeper", "stop"] if id is not None: args.append(str(id)) if immediate: - args.extend(['-m', 'immediate']) + args.extend(["-m", "immediate"]) return self.raw_cli(args) def pg_create( @@ -1382,19 +1429,19 @@ class NeonCli(AbstractNeonCli): tenant_id: Optional[uuid.UUID] = None, lsn: Optional[str] = None, port: Optional[int] = None, - ) -> 'subprocess.CompletedProcess[str]': + ) -> "subprocess.CompletedProcess[str]": args = [ - 'pg', - 'create', - '--tenant-id', + "pg", + "create", + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, - '--branch-name', + "--branch-name", branch_name, ] if lsn is not None: - args.extend(['--lsn', lsn]) + args.extend(["--lsn", lsn]) if port is not None: - args.extend(['--port', str(port)]) + args.extend(["--port", str(port)]) if node_name is not None: args.append(node_name) @@ -1408,17 +1455,17 @@ class NeonCli(AbstractNeonCli): tenant_id: Optional[uuid.UUID] = None, lsn: Optional[str] = None, port: Optional[int] = None, - ) -> 'subprocess.CompletedProcess[str]': + ) -> "subprocess.CompletedProcess[str]": args = [ - 'pg', - 'start', - '--tenant-id', + "pg", + "start", + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, ] if lsn is not None: - args.append(f'--lsn={lsn}') + args.append(f"--lsn={lsn}") if port is not None: - args.append(f'--port={port}') + args.append(f"--port={port}") if node_name is not None: args.append(node_name) @@ -1432,15 +1479,15 @@ class NeonCli(AbstractNeonCli): tenant_id: Optional[uuid.UUID] = None, destroy=False, check_return_code=True, - ) -> 'subprocess.CompletedProcess[str]': + ) -> "subprocess.CompletedProcess[str]": args = [ - 'pg', - 'stop', - '--tenant-id', + "pg", + "stop", + "--tenant-id", (tenant_id or self.env.initial_tenant).hex, ] if destroy: - args.append('--destroy') + args.append("--destroy") if node_name is not None: args.append(node_name) @@ -1453,12 +1500,12 @@ class WalCraft(AbstractNeonCli): Supports main commands via typed methods and a way to run arbitrary command directly via CLI. """ - COMMAND = 'wal_craft' + COMMAND = "wal_craft" def postgres_config(self) -> List[str]: res = self.raw_cli(["print-postgres-config"]) res.check_returncode() - return res.stdout.split('\n') + return res.stdout.split("\n") def in_existing(self, type: str, connection: str) -> None: res = self.raw_cli(["in-existing", type, connection]) @@ -1471,14 +1518,15 @@ class NeonPageserver(PgProtocol): Initializes the repository via `neon init`. """ + def __init__(self, env: NeonEnv, port: PageserverPort, config_override: Optional[str] = None): - super().__init__(host='localhost', port=port.pg, user='cloud_admin') + super().__init__(host="localhost", port=port.pg, user="cloud_admin") self.env = env self.running = False self.service_port = port self.config_override = config_override - def start(self, overrides=()) -> 'NeonPageserver': + def start(self, overrides=()) -> "NeonPageserver": """ Start the page server. `overrides` allows to add some config to this pageserver start. @@ -1490,7 +1538,7 @@ class NeonPageserver(PgProtocol): self.running = True return self - def stop(self, immediate=False) -> 'NeonPageserver': + def stop(self, immediate=False) -> "NeonPageserver": """ Stop the page server. Returns self. @@ -1523,31 +1571,33 @@ def append_pageserver_param_overrides( remote_storage_toml_table = remote_storage_to_toml_inline_table(remote_storage) params_to_update.append( - f'--pageserver-config-override=remote_storage={remote_storage_toml_table}') + f"--pageserver-config-override=remote_storage={remote_storage_toml_table}" + ) - env_overrides = os.getenv('ZENITH_PAGESERVER_OVERRIDES') + env_overrides = os.getenv("ZENITH_PAGESERVER_OVERRIDES") if env_overrides is not None: params_to_update += [ - f'--pageserver-config-override={o.strip()}' for o in env_overrides.split(';') + f"--pageserver-config-override={o.strip()}" for o in env_overrides.split(";") ] if pageserver_config_override is not None: params_to_update += [ - f'--pageserver-config-override={o.strip()}' - for o in pageserver_config_override.split(';') + f"--pageserver-config-override={o.strip()}" + for o in pageserver_config_override.split(";") ] class PgBin: - """ A helper class for executing postgres binaries """ + """A helper class for executing postgres binaries""" + def __init__(self, log_dir: Path): self.log_dir = log_dir - self.pg_bin_path = os.path.join(str(pg_distrib_dir), 'bin') + self.pg_bin_path = os.path.join(str(pg_distrib_dir), "bin") self.env = os.environ.copy() - self.env['LD_LIBRARY_PATH'] = os.path.join(str(pg_distrib_dir), 'lib') + self.env["LD_LIBRARY_PATH"] = os.path.join(str(pg_distrib_dir), "lib") def _fixpath(self, command: List[str]): - if '/' not in command[0]: + if "/" not in command[0]: command[0] = os.path.join(self.pg_bin_path, command[0]) def _build_env(self, env_add: Optional[Env]) -> Env: @@ -1572,15 +1622,17 @@ class PgBin: """ self._fixpath(command) - log.info('Running command "{}"'.format(' '.join(command))) + log.info('Running command "{}"'.format(" ".join(command))) env = self._build_env(env) subprocess.run(command, env=env, cwd=cwd, check=True) - def run_capture(self, - command: List[str], - env: Optional[Env] = None, - cwd: Optional[str] = None, - **kwargs: Any) -> str: + def run_capture( + self, + command: List[str], + env: Optional[Env] = None, + cwd: Optional[str] = None, + **kwargs: Any, + ) -> str: """ Run one of the postgres binaries, with stderr and stdout redirected to a file. @@ -1589,35 +1641,32 @@ class PgBin: """ self._fixpath(command) - log.info('Running command "{}"'.format(' '.join(command))) + log.info('Running command "{}"'.format(" ".join(command))) env = self._build_env(env) - return subprocess_capture(str(self.log_dir), - command, - env=env, - cwd=cwd, - check=True, - **kwargs) + return subprocess_capture( + str(self.log_dir), command, env=env, cwd=cwd, check=True, **kwargs + ) -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def pg_bin(test_output_dir: Path) -> PgBin: return PgBin(test_output_dir) class VanillaPostgres(PgProtocol): def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True): - super().__init__(host='localhost', port=port, dbname='postgres') + super().__init__(host="localhost", port=port, dbname="postgres") self.pgdatadir = pgdatadir self.pg_bin = pg_bin self.running = False if init: - self.pg_bin.run_capture(['initdb', '-D', str(pgdatadir)]) + self.pg_bin.run_capture(["initdb", "-D", str(pgdatadir)]) self.configure([f"port = {port}\n"]) def configure(self, options: List[str]): """Append lines into postgresql.conf file.""" assert not self.running - with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file: + with open(os.path.join(self.pgdatadir, "postgresql.conf"), "a") as conf_file: conf_file.write("\n".join(options)) def start(self, log_path: Optional[str] = None): @@ -1628,12 +1677,13 @@ class VanillaPostgres(PgProtocol): log_path = os.path.join(self.pgdatadir, "pg.log") self.pg_bin.run_capture( - ['pg_ctl', '-w', '-D', str(self.pgdatadir), '-l', log_path, 'start']) + ["pg_ctl", "-w", "-D", str(self.pgdatadir), "-l", log_path, "start"] + ) def stop(self): assert self.running self.running = False - self.pg_bin.run_capture(['pg_ctl', '-w', '-D', str(self.pgdatadir), 'stop']) + self.pg_bin.run_capture(["pg_ctl", "-w", "-D", str(self.pgdatadir), "stop"]) def get_subdir_size(self, subdir) -> int: """Return size of pgdatadir subdirectory in bytes.""" @@ -1647,9 +1697,10 @@ class VanillaPostgres(PgProtocol): self.stop() -@pytest.fixture(scope='function') -def vanilla_pg(test_output_dir: Path, - port_distributor: PortDistributor) -> Iterator[VanillaPostgres]: +@pytest.fixture(scope="function") +def vanilla_pg( + test_output_dir: Path, port_distributor: PortDistributor +) -> Iterator[VanillaPostgres]: pgdatadir = test_output_dir / "pgdata-vanilla" pg_bin = PgBin(test_output_dir) port = port_distributor.get_port() @@ -1665,18 +1716,18 @@ class RemotePostgres(PgProtocol): self.running = True def configure(self, options: List[str]): - raise Exception('cannot change configuration of remote Posgres instance') + raise Exception("cannot change configuration of remote Posgres instance") def start(self): - raise Exception('cannot start a remote Postgres instance') + raise Exception("cannot start a remote Postgres instance") def stop(self): - raise Exception('cannot stop a remote Postgres instance') + raise Exception("cannot stop a remote Postgres instance") def get_subdir_size(self, subdir) -> int: # TODO: Could use the server's Generic File Access functions if superuser. # See https://www.postgresql.org/docs/14/functions-admin.html#FUNCTIONS-ADMIN-GENFILE - raise Exception('cannot get size of a Postgres instance') + raise Exception("cannot get size of a Postgres instance") def __enter__(self): return self @@ -1686,7 +1737,7 @@ class RemotePostgres(PgProtocol): pass -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def remote_pg(test_output_dir: Path) -> Iterator[RemotePostgres]: pg_bin = PgBin(test_output_dir) @@ -1701,7 +1752,7 @@ def remote_pg(test_output_dir: Path) -> Iterator[RemotePostgres]: class NeonProxy(PgProtocol): def __init__(self, proxy_port: int, http_port: int, auth_endpoint: str): super().__init__(dsn=auth_endpoint, port=proxy_port) - self.host = '127.0.0.1' + self.host = "127.0.0.1" self.http_port = http_port self.proxy_port = proxy_port self.auth_endpoint = auth_endpoint @@ -1712,7 +1763,7 @@ class NeonProxy(PgProtocol): # Start proxy args = [ - os.path.join(str(neon_binpath), 'proxy'), + os.path.join(str(neon_binpath), "proxy"), *["--http", f"{self.host}:{self.http_port}"], *["--proxy", f"{self.host}:{self.proxy_port}"], *["--auth-backend", "postgres"], @@ -1735,7 +1786,7 @@ class NeonProxy(PgProtocol): self._popen.kill() -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def static_proxy(vanilla_pg, port_distributor) -> Iterator[NeonProxy]: """Neon proxy that routes directly to vanilla postgres.""" @@ -1743,28 +1794,28 @@ def static_proxy(vanilla_pg, port_distributor) -> Iterator[NeonProxy]: vanilla_pg.start() vanilla_pg.safe_psql("create user proxy with login superuser password 'password'") - port = vanilla_pg.default_options['port'] - host = vanilla_pg.default_options['host'] - dbname = vanilla_pg.default_options['dbname'] - auth_endpoint = f'postgres://proxy:password@{host}:{port}/{dbname}' + port = vanilla_pg.default_options["port"] + host = vanilla_pg.default_options["host"] + dbname = vanilla_pg.default_options["dbname"] + auth_endpoint = f"postgres://proxy:password@{host}:{port}/{dbname}" proxy_port = port_distributor.get_port() http_port = port_distributor.get_port() - with NeonProxy(proxy_port=proxy_port, http_port=http_port, - auth_endpoint=auth_endpoint) as proxy: + with NeonProxy( + proxy_port=proxy_port, http_port=http_port, auth_endpoint=auth_endpoint + ) as proxy: proxy.start() yield proxy class Postgres(PgProtocol): - """ An object representing a running postgres daemon. """ - def __init__(self, - env: NeonEnv, - tenant_id: uuid.UUID, - port: int, - check_stop_result: bool = True): - super().__init__(host='localhost', port=port, user='cloud_admin', dbname='postgres') + """An object representing a running postgres daemon.""" + + def __init__( + self, env: NeonEnv, tenant_id: uuid.UUID, port: int, check_stop_result: bool = True + ): + super().__init__(host="localhost", port=port, user="cloud_admin", dbname="postgres") self.env = env self.running = False self.node_name: Optional[str] = None # dubious, see asserts below @@ -1780,7 +1831,7 @@ class Postgres(PgProtocol): node_name: Optional[str] = None, lsn: Optional[str] = None, config_lines: Optional[List[str]] = None, - ) -> 'Postgres': + ) -> "Postgres": """ Create the pg data directory. Returns self. @@ -1789,13 +1840,11 @@ class Postgres(PgProtocol): if not config_lines: config_lines = [] - self.node_name = node_name or f'{branch_name}_pg_node' - self.env.neon_cli.pg_create(branch_name, - node_name=self.node_name, - tenant_id=self.tenant_id, - lsn=lsn, - port=self.port) - path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id.hex / self.node_name + self.node_name = node_name or f"{branch_name}_pg_node" + self.env.neon_cli.pg_create( + branch_name, node_name=self.node_name, tenant_id=self.tenant_id, lsn=lsn, port=self.port + ) + path = pathlib.Path("pgdatadirs") / "tenants" / self.tenant_id.hex / self.node_name self.pgdata_dir = os.path.join(self.env.repo_dir, path) if config_lines is None: @@ -1803,12 +1852,12 @@ class Postgres(PgProtocol): # set small 'max_replication_write_lag' to enable backpressure # and make tests more stable. - config_lines = ['max_replication_write_lag=15MB'] + config_lines + config_lines = ["max_replication_write_lag=15MB"] + config_lines self.config(config_lines) return self - def start(self) -> 'Postgres': + def start(self) -> "Postgres": """ Start the Postgres instance. Returns self. @@ -1818,32 +1867,32 @@ class Postgres(PgProtocol): log.info(f"Starting postgres node {self.node_name}") - run_result = self.env.neon_cli.pg_start(self.node_name, - tenant_id=self.tenant_id, - port=self.port) + run_result = self.env.neon_cli.pg_start( + self.node_name, tenant_id=self.tenant_id, port=self.port + ) self.running = True return self def pg_data_dir_path(self) -> str: - """ Path to data directory """ + """Path to data directory""" assert self.node_name - path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id.hex / self.node_name + path = pathlib.Path("pgdatadirs") / "tenants" / self.tenant_id.hex / self.node_name return os.path.join(self.env.repo_dir, path) def pg_xact_dir_path(self) -> str: - """ Path to pg_xact dir """ - return os.path.join(self.pg_data_dir_path(), 'pg_xact') + """Path to pg_xact dir""" + return os.path.join(self.pg_data_dir_path(), "pg_xact") def pg_twophase_dir_path(self) -> str: - """ Path to pg_twophase dir """ - return os.path.join(self.pg_data_dir_path(), 'pg_twophase') + """Path to pg_twophase dir""" + return os.path.join(self.pg_data_dir_path(), "pg_twophase") def config_file_path(self) -> str: - """ Path to postgresql.conf """ - return os.path.join(self.pg_data_dir_path(), 'postgresql.conf') + """Path to postgresql.conf""" + return os.path.join(self.pg_data_dir_path(), "postgresql.conf") - def adjust_for_safekeepers(self, safekeepers: str) -> 'Postgres': + def adjust_for_safekeepers(self, safekeepers: str) -> "Postgres": """ Adjust instance config for working with wal acceptors instead of pageserver (pre-configured by CLI) directly. @@ -1855,30 +1904,33 @@ class Postgres(PgProtocol): with open(self.config_file_path(), "w") as f: for cfg_line in cfg_lines: # walproposer uses different application_name - if ("synchronous_standby_names" in cfg_line or - # don't repeat safekeepers/wal_acceptors multiple times - "neon.safekeepers" in cfg_line): + if ( + "synchronous_standby_names" in cfg_line + or + # don't repeat safekeepers/wal_acceptors multiple times + "neon.safekeepers" in cfg_line + ): continue f.write(cfg_line) f.write("synchronous_standby_names = 'walproposer'\n") f.write("neon.safekeepers = '{}'\n".format(safekeepers)) return self - def config(self, lines: List[str]) -> 'Postgres': + def config(self, lines: List[str]) -> "Postgres": """ Add lines to postgresql.conf. Lines should be an array of valid postgresql.conf rows. Returns self. """ - with open(self.config_file_path(), 'a') as conf: + with open(self.config_file_path(), "a") as conf: for line in lines: conf.write(line) - conf.write('\n') + conf.write("\n") return self - def stop(self) -> 'Postgres': + def stop(self) -> "Postgres": """ Stop the Postgres instance if it's running. Returns self. @@ -1886,24 +1938,23 @@ class Postgres(PgProtocol): if self.running: assert self.node_name is not None - self.env.neon_cli.pg_stop(self.node_name, - self.tenant_id, - check_return_code=self.check_stop_result) + self.env.neon_cli.pg_stop( + self.node_name, self.tenant_id, check_return_code=self.check_stop_result + ) self.running = False return self - def stop_and_destroy(self) -> 'Postgres': + def stop_and_destroy(self) -> "Postgres": """ Stop the Postgres instance, then destroy it. Returns self. """ assert self.node_name is not None - self.env.neon_cli.pg_stop(self.node_name, - self.tenant_id, - True, - check_return_code=self.check_stop_result) + self.env.neon_cli.pg_stop( + self.node_name, self.tenant_id, True, check_return_code=self.check_stop_result + ) self.node_name = None self.running = False @@ -1915,7 +1966,7 @@ class Postgres(PgProtocol): node_name: Optional[str] = None, lsn: Optional[str] = None, config_lines: Optional[List[str]] = None, - ) -> 'Postgres': + ) -> "Postgres": """ Create a Postgres instance, apply config and then start it. @@ -1943,18 +1994,21 @@ class Postgres(PgProtocol): class PostgresFactory: - """ An object representing multiple running postgres daemons. """ + """An object representing multiple running postgres daemons.""" + def __init__(self, env: NeonEnv): self.env = env self.num_instances = 0 self.instances: List[Postgres] = [] - def create_start(self, - branch_name: str, - node_name: Optional[str] = None, - tenant_id: Optional[uuid.UUID] = None, - lsn: Optional[str] = None, - config_lines: Optional[List[str]] = None) -> Postgres: + def create_start( + self, + branch_name: str, + node_name: Optional[str] = None, + tenant_id: Optional[uuid.UUID] = None, + lsn: Optional[str] = None, + config_lines: Optional[List[str]] = None, + ) -> Postgres: pg = Postgres( self.env, @@ -1971,12 +2025,14 @@ class PostgresFactory: lsn=lsn, ) - def create(self, - branch_name: str, - node_name: Optional[str] = None, - tenant_id: Optional[uuid.UUID] = None, - lsn: Optional[str] = None, - config_lines: Optional[List[str]] = None) -> Postgres: + def create( + self, + branch_name: str, + node_name: Optional[str] = None, + tenant_id: Optional[uuid.UUID] = None, + lsn: Optional[str] = None, + config_lines: Optional[List[str]] = None, + ) -> Postgres: pg = Postgres( self.env, @@ -1994,7 +2050,7 @@ class PostgresFactory: config_lines=config_lines, ) - def stop_all(self) -> 'PostgresFactory': + def stop_all(self) -> "PostgresFactory": for pg in self.instances: pg.stop() @@ -2002,7 +2058,7 @@ class PostgresFactory: def read_pid(path: Path) -> int: - """ Read content of file into number """ + """Read content of file into number""" return int(path.read_text()) @@ -2014,13 +2070,14 @@ class SafekeeperPort: @dataclass class Safekeeper: - """ An object representing a running safekeeper daemon. """ + """An object representing a running safekeeper daemon.""" + env: NeonEnv port: SafekeeperPort id: int running: bool = False - def start(self) -> 'Safekeeper': + def start(self) -> "Safekeeper": assert self.running == False self.env.neon_cli.safekeeper_start(self.id) self.running = True @@ -2034,22 +2091,22 @@ class Safekeeper: elapsed = time.time() - started_at if elapsed > 3: raise RuntimeError( - f"timed out waiting {elapsed:.0f}s for wal acceptor start: {e}") + f"timed out waiting {elapsed:.0f}s for wal acceptor start: {e}" + ) time.sleep(0.5) else: break # success return self - def stop(self, immediate=False) -> 'Safekeeper': - log.info('Stopping safekeeper {}'.format(self.id)) + def stop(self, immediate=False) -> "Safekeeper": + log.info("Stopping safekeeper {}".format(self.id)) self.env.neon_cli.safekeeper_stop(self.id, immediate) self.running = False return self - def append_logical_message(self, - tenant_id: uuid.UUID, - timeline_id: uuid.UUID, - request: Dict[str, Any]) -> Dict[str, Any]: + def append_logical_message( + self, tenant_id: uuid.UUID, timeline_id: uuid.UUID, request: Dict[str, Any] + ) -> Dict[str, Any]: """ Send JSON_CTRL query to append LogicalMessage to WAL and modify safekeeper state. It will construct LogicalMessage from provided @@ -2106,7 +2163,7 @@ class SafekeeperHttpClient(requests.Session): self.auth_token = auth_token if auth_token is not None: - self.headers['Authorization'] = f'Bearer {auth_token}' + self.headers["Authorization"] = f"Bearer {auth_token}" def check_status(self): self.get(f"http://localhost:{self.port}/v1/status").raise_for_status() @@ -2115,21 +2172,25 @@ class SafekeeperHttpClient(requests.Session): res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}") res.raise_for_status() resj = res.json() - return SafekeeperTimelineStatus(acceptor_epoch=resj['acceptor_state']['epoch'], - flush_lsn=resj['flush_lsn'], - timeline_start_lsn=resj['timeline_start_lsn'], - backup_lsn=resj['backup_lsn'], - remote_consistent_lsn=resj['remote_consistent_lsn']) + return SafekeeperTimelineStatus( + acceptor_epoch=resj["acceptor_state"]["epoch"], + flush_lsn=resj["flush_lsn"], + timeline_start_lsn=resj["timeline_start_lsn"], + backup_lsn=resj["backup_lsn"], + remote_consistent_lsn=resj["remote_consistent_lsn"], + ) def record_safekeeper_info(self, tenant_id: str, timeline_id: str, body): res = self.post( f"http://localhost:{self.port}/v1/record_safekeeper_info/{tenant_id}/{timeline_id}", - json=body) + json=body, + ) res.raise_for_status() def timeline_delete_force(self, tenant_id: str, timeline_id: str) -> Dict[Any, Any]: res = self.delete( - f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}") + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}" + ) res.raise_for_status() res_json = res.json() assert isinstance(res_json, dict) @@ -2152,21 +2213,24 @@ class SafekeeperHttpClient(requests.Session): metrics = SafekeeperMetrics() for match in re.finditer( - r'^safekeeper_flush_lsn{tenant_id="([0-9a-f]+)",timeline_id="([0-9a-f]+)"} (\S+)$', - all_metrics_text, - re.MULTILINE): + r'^safekeeper_flush_lsn{tenant_id="([0-9a-f]+)",timeline_id="([0-9a-f]+)"} (\S+)$', + all_metrics_text, + re.MULTILINE, + ): metrics.flush_lsn_inexact[(match.group(1), match.group(2))] = int(match.group(3)) for match in re.finditer( - r'^safekeeper_commit_lsn{tenant_id="([0-9a-f]+)",timeline_id="([0-9a-f]+)"} (\S+)$', - all_metrics_text, - re.MULTILINE): + r'^safekeeper_commit_lsn{tenant_id="([0-9a-f]+)",timeline_id="([0-9a-f]+)"} (\S+)$', + all_metrics_text, + re.MULTILINE, + ): metrics.commit_lsn_inexact[(match.group(1), match.group(2))] = int(match.group(3)) return metrics @dataclass class Etcd: - """ An object managing etcd instance """ + """An object managing etcd instance""" + datadir: str port: int peer_port: int @@ -2177,16 +2241,16 @@ class Etcd: self.binary_path = etcd_path() def client_url(self): - return f'http://127.0.0.1:{self.port}' + return f"http://127.0.0.1:{self.port}" def check_status(self): with requests.Session() as s: - s.mount('http://', requests.adapters.HTTPAdapter(max_retries=1)) # do not retry + s.mount("http://", requests.adapters.HTTPAdapter(max_retries=1)) # do not retry s.get(f"{self.client_url()}/health").raise_for_status() def try_start(self): if self.handle is not None: - log.debug(f'etcd is already running on port {self.port}') + log.debug(f"etcd is already running on port {self.port}") return pathlib.Path(self.datadir).mkdir(exist_ok=True) @@ -2206,7 +2270,7 @@ class Etcd: # Set --quota-backend-bytes to keep the etcd virtual memory # size smaller. Our test etcd clusters are very small. # See https://github.com/etcd-io/etcd/issues/7910 - f"--quota-backend-bytes=100000000" + f"--quota-backend-bytes=100000000", ] self.handle = subprocess.Popen(args, stdout=log_file, stderr=log_file) @@ -2230,21 +2294,23 @@ class Etcd: def get_test_output_dir(request: Any) -> pathlib.Path: - """ Compute the working directory for an individual test. """ + """Compute the working directory for an individual test.""" test_name = request.node.name test_dir = pathlib.Path(top_output_dir) / test_name.replace("/", "-") - log.info(f'get_test_output_dir is {test_dir}') + log.info(f"get_test_output_dir is {test_dir}") # make mypy happy assert isinstance(test_dir, pathlib.Path) return test_dir -ATTACHMENT_SUFFIXES = frozenset(( - '.log', - '.stderr', - '.stdout', - '.diffs', -)) +ATTACHMENT_SUFFIXES = frozenset( + ( + ".log", + ".stderr", + ".stdout", + ".diffs", + ) +) # This is autouse, so the test output directory always gets created, even @@ -2256,51 +2322,59 @@ ATTACHMENT_SUFFIXES = frozenset(( # scope. So it uses the get_test_output_dir() function to get the path, and # this fixture ensures that the directory exists. That works because # 'autouse' fixtures are run before other fixtures. -@pytest.fixture(scope='function', autouse=True) +@pytest.fixture(scope="function", autouse=True) def test_output_dir(request: Any) -> Iterator[pathlib.Path]: - """ Create the working directory for an individual test. """ + """Create the working directory for an individual test.""" # one directory per test test_dir = get_test_output_dir(request) - log.info(f'test_output_dir is {test_dir}') + log.info(f"test_output_dir is {test_dir}") shutil.rmtree(test_dir, ignore_errors=True) test_dir.mkdir() yield test_dir - for attachment in test_dir.glob('**/*'): + for attachment in test_dir.glob("**/*"): if attachment.suffix in ATTACHMENT_SUFFIXES: source = str(attachment) name = str(attachment.relative_to(test_dir)) - attachment_type = 'text/plain' - extension = attachment.suffix.removeprefix('.') + attachment_type = "text/plain" + extension = attachment.suffix.removeprefix(".") # compress files larger than 1Mb, they're hardly readable in a browser if attachment.stat().st_size > 1024 * 1024: - source = f'{attachment}.tar.gz' - with tarfile.open(source, 'w:gz') as tar: + source = f"{attachment}.tar.gz" + with tarfile.open(source, "w:gz") as tar: tar.add(attachment, arcname=attachment.name) - name = f'{name}.tar.gz' - attachment_type = 'application/gzip' - extension = 'tar.gz' + name = f"{name}.tar.gz" + attachment_type = "application/gzip" + extension = "tar.gz" allure.attach.file(source, name, attachment_type, extension) -SKIP_DIRS = frozenset(('pg_wal', - 'pg_stat', - 'pg_stat_tmp', - 'pg_subtrans', - 'pg_logical', - 'pg_replslot/wal_proposer_slot')) +SKIP_DIRS = frozenset( + ( + "pg_wal", + "pg_stat", + "pg_stat_tmp", + "pg_subtrans", + "pg_logical", + "pg_replslot/wal_proposer_slot", + ) +) -SKIP_FILES = frozenset(('pg_internal.init', - 'pg.log', - 'zenith.signal', - 'postgresql.conf', - 'postmaster.opts', - 'postmaster.pid', - 'pg_control')) +SKIP_FILES = frozenset( + ( + "pg_internal.init", + "pg.log", + "zenith.signal", + "postgresql.conf", + "postmaster.opts", + "postmaster.pid", + "pg_control", + ) +) def should_skip_dir(dirname: str) -> bool: @@ -2312,10 +2386,10 @@ def should_skip_file(filename: str) -> bool: return True # check for temp table files according to https://www.postgresql.org/docs/current/storage-file-layout.html # i e "tBBB_FFF" - if not filename.startswith('t'): + if not filename.startswith("t"): return False - tmp_name = filename[1:].split('_') + tmp_name = filename[1:].split("_") if len(tmp_name) != 2: return False @@ -2358,7 +2432,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post restored_dir_path.mkdir(exist_ok=True) pg_bin = PgBin(test_output_dir) - psql_path = os.path.join(pg_bin.pg_bin_path, 'psql') + psql_path = os.path.join(pg_bin.pg_bin_path, "psql") cmd = rf""" {psql_path} \ @@ -2370,12 +2444,12 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post # Set LD_LIBRARY_PATH in the env properly, otherwise we may use the wrong libpq. # PgBin sets it automatically, but here we need to pipe psql output to the tar command. - psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')} + psql_env = {"LD_LIBRARY_PATH": os.path.join(str(pg_distrib_dir), "lib")} result = subprocess.run(cmd, env=psql_env, capture_output=True, text=True, shell=True) # Print captured stdout/stderr if basebackup cmd failed. if result.returncode != 0: - log.error('Basebackup shell command failed with:') + log.error("Basebackup shell command failed with:") log.error(result.stdout) log.error(result.stderr) assert result.returncode == 0 @@ -2392,11 +2466,10 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post # filecmp returns (match, mismatch, error) lists # We've already filtered all mismatching files in list_files_to_compare(), # so here expect that the content is identical - (match, mismatch, error) = filecmp.cmpfiles(pg.pgdata_dir, - restored_dir_path, - pgdata_files, - shallow=False) - log.info(f'filecmp result mismatch and error lists:\n\t mismatch={mismatch}\n\t error={error}') + (match, mismatch, error) = filecmp.cmpfiles( + pg.pgdata_dir, restored_dir_path, pgdata_files, shallow=False + ) + log.info(f"filecmp result mismatch and error lists:\n\t mismatch={mismatch}\n\t error={error}") for f in mismatch: @@ -2404,11 +2477,11 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post f2 = os.path.join(restored_dir_path, f) stdout_filename = "{}.filediff".format(f2) - with open(stdout_filename, 'w') as stdout_f: + with open(stdout_filename, "w") as stdout_f: subprocess.run("xxd -b {} > {}.hex ".format(f1, f1), shell=True) subprocess.run("xxd -b {} > {}.hex ".format(f2, f2), shell=True) - cmd = 'diff {}.hex {}.hex'.format(f1, f2) + cmd = "diff {}.hex {}.hex".format(f1, f2) subprocess.run([cmd], stdout=stdout_f, shell=True) assert (mismatch, error) == ([], []) @@ -2432,11 +2505,11 @@ def wait_until(number_of_iterations: int, interval: float, func): raise Exception("timed out while waiting for %s" % func) from last_exception -def assert_timeline_local(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID): +def assert_timeline_local( + pageserver_http_client: NeonPageserverHttpClient, tenant: uuid.UUID, timeline: uuid.UUID +): timeline_detail = pageserver_http_client.timeline_detail(tenant, timeline) - assert timeline_detail.get('local', {}).get("disk_consistent_lsn"), timeline_detail + assert timeline_detail.get("local", {}).get("disk_consistent_lsn"), timeline_detail return timeline_detail @@ -2445,65 +2518,81 @@ def assert_no_in_progress_downloads_for_tenant( tenant: uuid.UUID, ): tenant_status = pageserver_http_client.tenant_status(tenant) - assert tenant_status['has_in_progress_downloads'] is False, tenant_status + assert tenant_status["has_in_progress_downloads"] is False, tenant_status -def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID) -> int: +def remote_consistent_lsn( + pageserver_http_client: NeonPageserverHttpClient, tenant: uuid.UUID, timeline: uuid.UUID +) -> int: detail = pageserver_http_client.timeline_detail(tenant, timeline) - if detail['remote'] is None: + if detail["remote"] is None: # No remote information at all. This happens right after creating # a timeline, before any part of it has been uploaded to remote # storage yet. return 0 else: - lsn_str = detail['remote']['remote_consistent_lsn'] + lsn_str = detail["remote"]["remote_consistent_lsn"] assert isinstance(lsn_str, str) return lsn_from_hex(lsn_str) -def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID, - lsn: int): +def wait_for_upload( + pageserver_http_client: NeonPageserverHttpClient, + tenant: uuid.UUID, + timeline: uuid.UUID, + lsn: int, +): """waits for local timeline upload up to specified lsn""" for i in range(20): current_lsn = remote_consistent_lsn(pageserver_http_client, tenant, timeline) if current_lsn >= lsn: return - log.info("waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1)) + log.info( + "waiting for remote_consistent_lsn to reach {}, now {}, iteration {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1 + ) + ) time.sleep(1) - raise Exception("timed out while waiting for remote_consistent_lsn to reach {}, was {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn))) + raise Exception( + "timed out while waiting for remote_consistent_lsn to reach {}, was {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn) + ) + ) -def last_record_lsn(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID) -> int: +def last_record_lsn( + pageserver_http_client: NeonPageserverHttpClient, tenant: uuid.UUID, timeline: uuid.UUID +) -> int: detail = pageserver_http_client.timeline_detail(tenant, timeline) - lsn_str = detail['local']['last_record_lsn'] + lsn_str = detail["local"]["last_record_lsn"] assert isinstance(lsn_str, str) return lsn_from_hex(lsn_str) -def wait_for_last_record_lsn(pageserver_http_client: NeonPageserverHttpClient, - tenant: uuid.UUID, - timeline: uuid.UUID, - lsn: int): +def wait_for_last_record_lsn( + pageserver_http_client: NeonPageserverHttpClient, + tenant: uuid.UUID, + timeline: uuid.UUID, + lsn: int, +): """waits for pageserver to catch up to a certain lsn""" for i in range(10): current_lsn = last_record_lsn(pageserver_http_client, tenant, timeline) if current_lsn >= lsn: return - log.info("waiting for last_record_lsn to reach {}, now {}, iteration {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1)) + log.info( + "waiting for last_record_lsn to reach {}, now {}, iteration {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn), i + 1 + ) + ) time.sleep(1) - raise Exception("timed out while waiting for last_record_lsn to reach {}, was {}".format( - lsn_to_hex(lsn), lsn_to_hex(current_lsn))) + raise Exception( + "timed out while waiting for last_record_lsn to reach {}, was {}".format( + lsn_to_hex(lsn), lsn_to_hex(current_lsn) + ) + ) def wait_for_last_flush_lsn(env: NeonEnv, pg: Postgres, tenant: uuid.UUID, timeline: uuid.UUID): diff --git a/test_runner/fixtures/pg_stats.py b/test_runner/fixtures/pg_stats.py index e113d37248..b2e6886eb3 100644 --- a/test_runner/fixtures/pg_stats.py +++ b/test_runner/fixtures/pg_stats.py @@ -18,35 +18,43 @@ class PgStatTable: return f"SELECT {','.join(self.columns)} FROM {self.table} {self.additional_query}" -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def pg_stats_rw() -> List[PgStatTable]: return [ - PgStatTable("pg_stat_database", - ["tup_returned", "tup_fetched", "tup_inserted", "tup_updated", "tup_deleted"], - "WHERE datname='postgres'"), + PgStatTable( + "pg_stat_database", + ["tup_returned", "tup_fetched", "tup_inserted", "tup_updated", "tup_deleted"], + "WHERE datname='postgres'", + ), ] -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def pg_stats_ro() -> List[PgStatTable]: return [ - PgStatTable("pg_stat_database", ["tup_returned", "tup_fetched"], - "WHERE datname='postgres'"), + PgStatTable( + "pg_stat_database", ["tup_returned", "tup_fetched"], "WHERE datname='postgres'" + ), ] -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def pg_stats_wo() -> List[PgStatTable]: return [ - PgStatTable("pg_stat_database", ["tup_inserted", "tup_updated", "tup_deleted"], - "WHERE datname='postgres'"), + PgStatTable( + "pg_stat_database", + ["tup_inserted", "tup_updated", "tup_deleted"], + "WHERE datname='postgres'", + ), ] -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def pg_stats_wal() -> List[PgStatTable]: return [ - PgStatTable("pg_stat_wal", - ["wal_records", "wal_fpi", "wal_bytes", "wal_buffers_full", "wal_write"], - "") + PgStatTable( + "pg_stat_wal", + ["wal_records", "wal_fpi", "wal_bytes", "wal_buffers_full", "wal_write"], + "", + ) ] diff --git a/test_runner/fixtures/slow.py b/test_runner/fixtures/slow.py index c20b766a93..94199ae785 100644 --- a/test_runner/fixtures/slow.py +++ b/test_runner/fixtures/slow.py @@ -1,4 +1,5 @@ import pytest + """ This plugin allows tests to be marked as slow using pytest.mark.slow. By default slow tests are excluded. They need to be specifically requested with the --runslow flag in diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index a37d40014c..48889a8697 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -4,20 +4,19 @@ import pathlib import shutil import subprocess from pathlib import Path - from typing import Any, List, Tuple -from psycopg2.extensions import cursor from fixtures.log_helper import log +from psycopg2.extensions import cursor def get_self_dir() -> str: - """ Get the path to the directory where this script lives. """ + """Get the path to the directory where this script lives.""" return os.path.dirname(os.path.abspath(__file__)) def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str: - """ Run a process and capture its output + """Run a process and capture its output Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr" where "cmd" is the name of the program and NNN is an incrementing @@ -27,14 +26,14 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str: Returns basepath for files with captured output. """ assert type(cmd) is list - base = os.path.basename(cmd[0]) + '_{}'.format(global_counter()) + base = os.path.basename(cmd[0]) + "_{}".format(global_counter()) basepath = os.path.join(capture_dir, base) - stdout_filename = basepath + '.stdout' - stderr_filename = basepath + '.stderr' + stdout_filename = basepath + ".stdout" + stderr_filename = basepath + ".stderr" try: - with open(stdout_filename, 'w') as stdout_f: - with open(stderr_filename, 'w') as stderr_f: + with open(stdout_filename, "w") as stdout_f: + with open(stderr_filename, "w") as stderr_f: log.info(f'Capturing stdout to "{base}.stdout" and stderr to "{base}.stderr"') subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f) finally: @@ -50,7 +49,7 @@ _global_counter = 0 def global_counter() -> int: - """ A really dumb global counter. + """A really dumb global counter. This is useful for giving output files a unique number, so if we run the same command multiple times we can keep their output separate. @@ -61,13 +60,13 @@ def global_counter() -> int: def lsn_to_hex(num: int) -> str: - """ Convert lsn from int to standard hex notation. """ - return "{:X}/{:X}".format(num >> 32, num & 0xffffffff) + """Convert lsn from int to standard hex notation.""" + return "{:X}/{:X}".format(num >> 32, num & 0xFFFFFFFF) def lsn_from_hex(lsn_hex: str) -> int: - """ Convert lsn from hex notation to int. """ - l, r = lsn_hex.split('/') + """Convert lsn from hex notation to int.""" + l, r = lsn_hex.split("/") return (int(l, 16) << 32) + int(r, 16) @@ -75,14 +74,16 @@ def print_gc_result(row): log.info("GC duration {elapsed} ms".format_map(row)) log.info( " total: {layers_total}, needed_by_cutoff {layers_needed_by_cutoff}, needed_by_pitr {layers_needed_by_pitr}" - " needed_by_branches: {layers_needed_by_branches}, not_updated: {layers_not_updated}, removed: {layers_removed}" - .format_map(row)) + " needed_by_branches: {layers_needed_by_branches}, not_updated: {layers_not_updated}, removed: {layers_removed}".format_map( + row + ) + ) def etcd_path() -> Path: path_output = shutil.which("etcd") if path_output is None: - raise RuntimeError('etcd not found in PATH') + raise RuntimeError("etcd not found in PATH") else: return Path(path_output) @@ -145,7 +146,12 @@ def parse_delta_layer(f_name: str) -> Tuple[int, int, int, int]: parts = f_name.split("__") key_parts = parts[0].split("-") lsn_parts = parts[1].split("-") - return int(key_parts[0], 16), int(key_parts[1], 16), int(lsn_parts[0], 16), int(lsn_parts[1], 16) + return ( + int(key_parts[0], 16), + int(key_parts[1], 16), + int(lsn_parts[0], 16), + int(lsn_parts[1], 16), + ) def get_scale_for_db(size_mb: int) -> int: diff --git a/test_runner/performance/test_branch_creation.py b/test_runner/performance/test_branch_creation.py index 1d39b0830d..9cb346de47 100644 --- a/test_runner/performance/test_branch_creation.py +++ b/test_runner/performance/test_branch_creation.py @@ -1,28 +1,26 @@ import random -import time import statistics import threading +import time import timeit -import pytest from typing import List + +import pytest from fixtures.benchmark_fixture import MetricReport from fixtures.compare_fixtures import NeonCompare from fixtures.log_helper import log def _record_branch_creation_durations(neon_compare: NeonCompare, durs: List[float]): - neon_compare.zenbenchmark.record("branch_creation_duration_max", - max(durs), - 's', - MetricReport.LOWER_IS_BETTER) - neon_compare.zenbenchmark.record("branch_creation_duration_avg", - statistics.mean(durs), - 's', - MetricReport.LOWER_IS_BETTER) - neon_compare.zenbenchmark.record("branch_creation_duration_stdev", - statistics.stdev(durs), - 's', - MetricReport.LOWER_IS_BETTER) + neon_compare.zenbenchmark.record( + "branch_creation_duration_max", max(durs), "s", MetricReport.LOWER_IS_BETTER + ) + neon_compare.zenbenchmark.record( + "branch_creation_duration_avg", statistics.mean(durs), "s", MetricReport.LOWER_IS_BETTER + ) + neon_compare.zenbenchmark.record( + "branch_creation_duration_stdev", statistics.stdev(durs), "s", MetricReport.LOWER_IS_BETTER + ) @pytest.mark.parametrize("n_branches", [20]) @@ -37,15 +35,16 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int) # Use aggressive GC and checkpoint settings, so GC and compaction happen more often during the test tenant, _ = env.neon_cli.create_tenant( - conf={ - 'gc_period': '5 s', - 'gc_horizon': f'{4 * 1024 ** 2}', - 'checkpoint_distance': f'{2 * 1024 ** 2}', - 'compaction_target_size': f'{1024 ** 2}', - 'compaction_threshold': '2', - # set PITR interval to be small, so we can do GC - 'pitr_interval': '5 s' - }) + conf={ + "gc_period": "5 s", + "gc_horizon": f"{4 * 1024 ** 2}", + "checkpoint_distance": f"{2 * 1024 ** 2}", + "compaction_target_size": f"{1024 ** 2}", + "compaction_threshold": "2", + # set PITR interval to be small, so we can do GC + "pitr_interval": "5 s", + } + ) def run_pgbench(branch: str): log.info(f"Start a pgbench workload on branch {branch}") @@ -53,15 +52,15 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int) pg = env.postgres.create_start(branch, tenant_id=tenant) connstr = pg.connstr() - pg_bin.run_capture(['pgbench', '-i', connstr]) - pg_bin.run_capture(['pgbench', '-c10', '-T10', connstr]) + pg_bin.run_capture(["pgbench", "-i", connstr]) + pg_bin.run_capture(["pgbench", "-c10", "-T10", connstr]) pg.stop() - env.neon_cli.create_branch('b0', tenant_id=tenant) + env.neon_cli.create_branch("b0", tenant_id=tenant) threads: List[threading.Thread] = [] - threads.append(threading.Thread(target=run_pgbench, args=('b0', ), daemon=True)) + threads.append(threading.Thread(target=run_pgbench, args=("b0",), daemon=True)) threads[-1].start() branch_creation_durations = [] @@ -72,13 +71,13 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int) p = random.randint(0, i) timer = timeit.default_timer() - env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(p), tenant_id=tenant) + env.neon_cli.create_branch("b{}".format(i + 1), "b{}".format(p), tenant_id=tenant) dur = timeit.default_timer() - timer log.info(f"Creating branch b{i+1} took {dur}s") branch_creation_durations.append(dur) - threads.append(threading.Thread(target=run_pgbench, args=(f'b{i+1}', ), daemon=True)) + threads.append(threading.Thread(target=run_pgbench, args=(f"b{i+1}",), daemon=True)) threads[-1].start() for thread in threads: @@ -92,10 +91,10 @@ def test_branch_creation_heavy_write(neon_compare: NeonCompare, n_branches: int) def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int): env = neon_compare.env - env.neon_cli.create_branch('b0') + env.neon_cli.create_branch("b0") - pg = env.postgres.create_start('b0') - neon_compare.pg_bin.run_capture(['pgbench', '-i', '-s10', pg.connstr()]) + pg = env.postgres.create_start("b0") + neon_compare.pg_bin.run_capture(["pgbench", "-i", "-s10", pg.connstr()]) branch_creation_durations = [] @@ -103,7 +102,7 @@ def test_branch_creation_many(neon_compare: NeonCompare, n_branches: int): # random a source branch p = random.randint(0, i) timer = timeit.default_timer() - env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(p)) + env.neon_cli.create_branch("b{}".format(i + 1), "b{}".format(p)) dur = timeit.default_timer() - timer branch_creation_durations.append(dur) diff --git a/test_runner/performance/test_bulk_insert.py b/test_runner/performance/test_bulk_insert.py index 6a5bad8757..9aaf0cbc77 100644 --- a/test_runner/performance/test_bulk_insert.py +++ b/test_runner/performance/test_bulk_insert.py @@ -1,8 +1,9 @@ from contextlib import closing -from fixtures.neon_fixtures import NeonEnv -from fixtures.log_helper import log + from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv # @@ -23,8 +24,8 @@ def test_bulk_insert(neon_with_baseline: PgCompare): cur.execute("create table huge (i int, j int);") # Run INSERT, recording the time and I/O it takes - with env.record_pageserver_writes('pageserver_writes'): - with env.record_duration('insert'): + with env.record_pageserver_writes("pageserver_writes"): + with env.record_duration("insert"): cur.execute("insert into huge values (generate_series(1, 5000000), 0);") env.flush() diff --git a/test_runner/performance/test_bulk_tenant_create.py b/test_runner/performance/test_bulk_tenant_create.py index fe3c3afe37..cef7ce0c6b 100644 --- a/test_runner/performance/test_bulk_tenant_create.py +++ b/test_runner/performance/test_bulk_tenant_create.py @@ -1,7 +1,7 @@ import timeit -from fixtures.benchmark_fixture import MetricReport -import pytest +import pytest +from fixtures.benchmark_fixture import MetricReport from fixtures.neon_fixtures import NeonEnvBuilder # Run bulk tenant creation test. @@ -12,7 +12,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder # 2. Average creation time per tenant -@pytest.mark.parametrize('tenants_count', [1, 5, 10]) +@pytest.mark.parametrize("tenants_count", [1, 5, 10]) def test_bulk_tenant_create( neon_env_builder: NeonEnvBuilder, tenants_count: int, @@ -27,22 +27,26 @@ def test_bulk_tenant_create( start = timeit.default_timer() tenant, _ = env.neon_cli.create_tenant() - env.neon_cli.create_timeline(f'test_bulk_tenant_create_{tenants_count}_{i}', - tenant_id=tenant) + env.neon_cli.create_timeline( + f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant + ) # FIXME: We used to start new safekeepers here. Did that make sense? Should we do it now? - #if use_safekeepers == 'with_sa': + # if use_safekeepers == 'with_sa': # wa_factory.start_n_new(3) - pg_tenant = env.postgres.create_start(f'test_bulk_tenant_create_{tenants_count}_{i}', - tenant_id=tenant) + pg_tenant = env.postgres.create_start( + f"test_bulk_tenant_create_{tenants_count}_{i}", tenant_id=tenant + ) end = timeit.default_timer() time_slices.append(end - start) pg_tenant.stop() - zenbenchmark.record('tenant_creation_time', - sum(time_slices) / len(time_slices), - 's', - report=MetricReport.LOWER_IS_BETTER) + zenbenchmark.record( + "tenant_creation_time", + sum(time_slices) / len(time_slices), + "s", + report=MetricReport.LOWER_IS_BETTER, + ) diff --git a/test_runner/performance/test_compare_pg_stats.py b/test_runner/performance/test_compare_pg_stats.py index b9bca90231..d39ea55fbb 100644 --- a/test_runner/performance/test_compare_pg_stats.py +++ b/test_runner/performance/test_compare_pg_stats.py @@ -6,7 +6,6 @@ from typing import List import pytest from fixtures.compare_fixtures import PgCompare from fixtures.pg_stats import PgStatTable - from performance.test_perf_pgbench import get_durations_matrix, get_scales_matrix @@ -18,85 +17,96 @@ def get_seeds_matrix(default: int = 100): @pytest.mark.parametrize("seed", get_seeds_matrix()) @pytest.mark.parametrize("scale", get_scales_matrix()) @pytest.mark.parametrize("duration", get_durations_matrix(5)) -def test_compare_pg_stats_rw_with_pgbench_default(neon_with_baseline: PgCompare, - seed: int, - scale: int, - duration: int, - pg_stats_rw: List[PgStatTable]): +def test_compare_pg_stats_rw_with_pgbench_default( + neon_with_baseline: PgCompare, + seed: int, + scale: int, + duration: int, + pg_stats_rw: List[PgStatTable], +): env = neon_with_baseline # initialize pgbench - env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()]) + env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()]) env.flush() with env.record_pg_stats(pg_stats_rw): env.pg_bin.run_capture( - ['pgbench', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()]) + ["pgbench", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()] + ) env.flush() @pytest.mark.parametrize("seed", get_seeds_matrix()) @pytest.mark.parametrize("scale", get_scales_matrix()) @pytest.mark.parametrize("duration", get_durations_matrix(5)) -def test_compare_pg_stats_wo_with_pgbench_simple_update(neon_with_baseline: PgCompare, - seed: int, - scale: int, - duration: int, - pg_stats_wo: List[PgStatTable]): +def test_compare_pg_stats_wo_with_pgbench_simple_update( + neon_with_baseline: PgCompare, + seed: int, + scale: int, + duration: int, + pg_stats_wo: List[PgStatTable], +): env = neon_with_baseline # initialize pgbench - env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()]) + env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()]) env.flush() with env.record_pg_stats(pg_stats_wo): env.pg_bin.run_capture( - ['pgbench', '-N', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()]) + ["pgbench", "-N", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()] + ) env.flush() @pytest.mark.parametrize("seed", get_seeds_matrix()) @pytest.mark.parametrize("scale", get_scales_matrix()) @pytest.mark.parametrize("duration", get_durations_matrix(5)) -def test_compare_pg_stats_ro_with_pgbench_select_only(neon_with_baseline: PgCompare, - seed: int, - scale: int, - duration: int, - pg_stats_ro: List[PgStatTable]): +def test_compare_pg_stats_ro_with_pgbench_select_only( + neon_with_baseline: PgCompare, + seed: int, + scale: int, + duration: int, + pg_stats_ro: List[PgStatTable], +): env = neon_with_baseline # initialize pgbench - env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()]) + env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()]) env.flush() with env.record_pg_stats(pg_stats_ro): env.pg_bin.run_capture( - ['pgbench', '-S', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()]) + ["pgbench", "-S", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()] + ) env.flush() @pytest.mark.parametrize("seed", get_seeds_matrix()) @pytest.mark.parametrize("scale", get_scales_matrix()) @pytest.mark.parametrize("duration", get_durations_matrix(5)) -def test_compare_pg_stats_wal_with_pgbench_default(neon_with_baseline: PgCompare, - seed: int, - scale: int, - duration: int, - pg_stats_wal: List[PgStatTable]): +def test_compare_pg_stats_wal_with_pgbench_default( + neon_with_baseline: PgCompare, + seed: int, + scale: int, + duration: int, + pg_stats_wal: List[PgStatTable], +): env = neon_with_baseline # initialize pgbench - env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()]) + env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()]) env.flush() with env.record_pg_stats(pg_stats_wal): env.pg_bin.run_capture( - ['pgbench', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()]) + ["pgbench", f"-T{duration}", f"--random-seed={seed}", env.pg.connstr()] + ) env.flush() @pytest.mark.parametrize("n_tables", [1, 10]) @pytest.mark.parametrize("duration", get_durations_matrix(10)) -def test_compare_pg_stats_wo_with_heavy_write(neon_with_baseline: PgCompare, - n_tables: int, - duration: int, - pg_stats_wo: List[PgStatTable]): +def test_compare_pg_stats_wo_with_heavy_write( + neon_with_baseline: PgCompare, n_tables: int, duration: int, pg_stats_wo: List[PgStatTable] +): env = neon_with_baseline with env.pg.connect().cursor() as cur: for i in range(n_tables): @@ -112,8 +122,7 @@ def test_compare_pg_stats_wo_with_heavy_write(neon_with_baseline: PgCompare, with env.record_pg_stats(pg_stats_wo): threads = [ - threading.Thread(target=start_single_table_workload, args=(i, )) - for i in range(n_tables) + threading.Thread(target=start_single_table_workload, args=(i,)) for i in range(n_tables) ] for thread in threads: diff --git a/test_runner/performance/test_copy.py b/test_runner/performance/test_copy.py index ad088684d5..bf4804fc07 100644 --- a/test_runner/performance/test_copy.py +++ b/test_runner/performance/test_copy.py @@ -1,11 +1,12 @@ from contextlib import closing -from fixtures.neon_fixtures import NeonEnv -from fixtures.log_helper import log -from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare from io import BufferedReader, RawIOBase from itertools import repeat +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv + class CopyTestData(RawIOBase): def __init__(self, rows: int): @@ -29,7 +30,7 @@ class CopyTestData(RawIOBase): # Number of bytes to read in this call l = min(len(self.linebuf) - self.ptr, len(b)) - b[:l] = self.linebuf[self.ptr:(self.ptr + l)] + b[:l] = self.linebuf[self.ptr : (self.ptr + l)] self.ptr += l return l @@ -52,19 +53,19 @@ def test_copy(neon_with_baseline: PgCompare): # Load data with COPY, recording the time and I/O it takes. # # Since there's no data in the table previously, this extends it. - with env.record_pageserver_writes('copy_extend_pageserver_writes'): - with env.record_duration('copy_extend'): - cur.copy_from(copy_test_data(1000000), 'copytest') + with env.record_pageserver_writes("copy_extend_pageserver_writes"): + with env.record_duration("copy_extend"): + cur.copy_from(copy_test_data(1000000), "copytest") env.flush() # Delete most rows, and VACUUM to make the space available for reuse. - with env.record_pageserver_writes('delete_pageserver_writes'): - with env.record_duration('delete'): + with env.record_pageserver_writes("delete_pageserver_writes"): + with env.record_duration("delete"): cur.execute("delete from copytest where i % 100 <> 0;") env.flush() - with env.record_pageserver_writes('vacuum_pageserver_writes'): - with env.record_duration('vacuum'): + with env.record_pageserver_writes("vacuum_pageserver_writes"): + with env.record_duration("vacuum"): cur.execute("vacuum copytest") env.flush() @@ -72,9 +73,9 @@ def test_copy(neon_with_baseline: PgCompare): # by the VACUUM. # # This will also clear all the VM bits. - with env.record_pageserver_writes('copy_reuse_pageserver_writes'): - with env.record_duration('copy_reuse'): - cur.copy_from(copy_test_data(1000000), 'copytest') + with env.record_pageserver_writes("copy_reuse_pageserver_writes"): + with env.record_duration("copy_reuse"): + cur.copy_from(copy_test_data(1000000), "copytest") env.flush() env.report_peak_memory_use() diff --git a/test_runner/performance/test_dup_key.py b/test_runner/performance/test_dup_key.py index ee867a9845..60fe3014ba 100644 --- a/test_runner/performance/test_dup_key.py +++ b/test_runner/performance/test_dup_key.py @@ -1,5 +1,6 @@ -import pytest from contextlib import closing + +import pytest from fixtures.compare_fixtures import PgCompare from pytest_lazyfixture import lazy_fixture # type: ignore @@ -11,22 +12,24 @@ from pytest_lazyfixture import lazy_fixture # type: ignore pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow), pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow), pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster), - ]) + ], +) def test_dup_key(env: PgCompare): # Update the same page many times, then measure read performance with closing(env.pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('drop table if exists t, f;') + cur.execute("drop table if exists t, f;") cur.execute("SET synchronous_commit=off") cur.execute("SET statement_timeout=0") # Write many updates to the same row - with env.record_duration('write'): + with env.record_duration("write"): cur.execute("create table t (i integer, filler text);") - cur.execute('insert into t values (0);') - cur.execute(""" + cur.execute("insert into t values (0);") + cur.execute( + """ do $$ begin for ivar in 1..5000000 loop @@ -38,13 +41,14 @@ begin end loop; end; $$; -""") +""" + ) # Write 3-4 MB to evict t from compute cache - cur.execute('create table f (i integer);') - cur.execute(f'insert into f values (generate_series(1,100000));') + cur.execute("create table f (i integer);") + cur.execute(f"insert into f values (generate_series(1,100000));") # Read - with env.record_duration('read'): - cur.execute('select * from t;') + with env.record_duration("read"): + cur.execute("select * from t;") cur.fetchall() diff --git a/test_runner/performance/test_gist_build.py b/test_runner/performance/test_gist_build.py index 839eb3f57d..d8fa97fbbf 100644 --- a/test_runner/performance/test_gist_build.py +++ b/test_runner/performance/test_gist_build.py @@ -1,9 +1,10 @@ import os from contextlib import closing + from fixtures.benchmark_fixture import MetricReport -from fixtures.neon_fixtures import NeonEnv -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv # @@ -24,8 +25,8 @@ def test_gist_buffering_build(neon_with_baseline: PgCompare): ) # Build the index. - with env.record_pageserver_writes('pageserver_writes'): - with env.record_duration('build'): + with env.record_pageserver_writes("pageserver_writes"): + with env.record_duration("build"): cur.execute( "create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)" ) diff --git a/test_runner/performance/test_hot_page.py b/test_runner/performance/test_hot_page.py index d3da0310ce..8e8ab9849a 100644 --- a/test_runner/performance/test_hot_page.py +++ b/test_runner/performance/test_hot_page.py @@ -1,5 +1,6 @@ -import pytest from contextlib import closing + +import pytest from fixtures.compare_fixtures import PgCompare from pytest_lazyfixture import lazy_fixture # type: ignore @@ -11,27 +12,28 @@ from pytest_lazyfixture import lazy_fixture # type: ignore pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow), pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow), pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster), - ]) + ], +) def test_hot_page(env: PgCompare): # Update the same page many times, then measure read performance num_writes = 1000000 with closing(env.pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('drop table if exists t, f;') + cur.execute("drop table if exists t, f;") # Write many updates to the same row - with env.record_duration('write'): - cur.execute('create table t (i integer);') - cur.execute('insert into t values (0);') + with env.record_duration("write"): + cur.execute("create table t (i integer);") + cur.execute("insert into t values (0);") for i in range(num_writes): - cur.execute(f'update t set i = {i};') + cur.execute(f"update t set i = {i};") # Write 3-4 MB to evict t from compute cache - cur.execute('create table f (i integer);') - cur.execute(f'insert into f values (generate_series(1,100000));') + cur.execute("create table f (i integer);") + cur.execute(f"insert into f values (generate_series(1,100000));") # Read - with env.record_duration('read'): - cur.execute('select * from t;') + with env.record_duration("read"): + cur.execute("select * from t;") cur.fetchall() diff --git a/test_runner/performance/test_hot_table.py b/test_runner/performance/test_hot_table.py index 997c772f88..2f519e152c 100644 --- a/test_runner/performance/test_hot_table.py +++ b/test_runner/performance/test_hot_table.py @@ -1,5 +1,6 @@ -import pytest from contextlib import closing + +import pytest from fixtures.compare_fixtures import PgCompare from pytest_lazyfixture import lazy_fixture # type: ignore @@ -11,7 +12,8 @@ from pytest_lazyfixture import lazy_fixture # type: ignore pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow), pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow), pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster), - ]) + ], +) def test_hot_table(env: PgCompare): # Update a small table many times, then measure read performance num_rows = 100000 # Slightly larger than shared buffers size TODO validate @@ -20,17 +22,17 @@ def test_hot_table(env: PgCompare): with closing(env.pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('drop table if exists t;') + cur.execute("drop table if exists t;") # Write many updates to a small table - with env.record_duration('write'): - cur.execute('create table t (i integer primary key);') - cur.execute(f'insert into t values (generate_series(1,{num_rows}));') + with env.record_duration("write"): + cur.execute("create table t (i integer primary key);") + cur.execute(f"insert into t values (generate_series(1,{num_rows}));") for i in range(num_writes): - cur.execute(f'update t set i = {i + num_rows} WHERE i = {i};') + cur.execute(f"update t set i = {i + num_rows} WHERE i = {i};") # Read the table - with env.record_duration('read'): + with env.record_duration("read"): for i in range(num_reads): - cur.execute('select * from t;') + cur.execute("select * from t;") cur.fetchall() diff --git a/test_runner/performance/test_parallel_copy_to.py b/test_runner/performance/test_parallel_copy_to.py index d4e74ce195..c1883dec7b 100644 --- a/test_runner/performance/test_parallel_copy_to.py +++ b/test_runner/performance/test_parallel_copy_to.py @@ -1,10 +1,11 @@ -from io import BytesIO import asyncio +from io import BytesIO + import asyncpg -from fixtures.neon_fixtures import NeonEnv, Postgres, PgProtocol -from fixtures.log_helper import log from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv, PgProtocol, Postgres async def repeat_bytes(buf, repetitions: int): @@ -16,7 +17,8 @@ async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: st buf = BytesIO() for i in range(1000): buf.write( - f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode()) + f"{i}\tLoaded by worker {worker_id}. Long string to consume some space.\n".encode() + ) buf.seek(0) copy_input = repeat_bytes(buf.read(), 5000) @@ -28,7 +30,7 @@ async def copy_test_data_to_table(pg: PgProtocol, worker_id: int, table_name: st async def parallel_load_different_tables(pg: PgProtocol, n_parallel: int): workers = [] for worker_id in range(n_parallel): - worker = copy_test_data_to_table(pg, worker_id, f'copytest_{worker_id}') + worker = copy_test_data_to_table(pg, worker_id, f"copytest_{worker_id}") workers.append(asyncio.create_task(worker)) # await all workers @@ -43,10 +45,10 @@ def test_parallel_copy_different_tables(neon_with_baseline: PgCompare, n_paralle cur = conn.cursor() for worker_id in range(n_parallel): - cur.execute(f'CREATE TABLE copytest_{worker_id} (i int, t text)') + cur.execute(f"CREATE TABLE copytest_{worker_id} (i int, t text)") - with env.record_pageserver_writes('pageserver_writes'): - with env.record_duration('load'): + with env.record_pageserver_writes("pageserver_writes"): + with env.record_duration("load"): asyncio.run(parallel_load_different_tables(env.pg, n_parallel)) env.flush() @@ -57,7 +59,7 @@ def test_parallel_copy_different_tables(neon_with_baseline: PgCompare, n_paralle async def parallel_load_same_table(pg: PgProtocol, n_parallel: int): workers = [] for worker_id in range(n_parallel): - worker = copy_test_data_to_table(pg, worker_id, f'copytest') + worker = copy_test_data_to_table(pg, worker_id, f"copytest") workers.append(asyncio.create_task(worker)) # await all workers @@ -70,10 +72,10 @@ def test_parallel_copy_same_table(neon_with_baseline: PgCompare, n_parallel=5): conn = env.pg.connect() cur = conn.cursor() - cur.execute(f'CREATE TABLE copytest (i int, t text)') + cur.execute(f"CREATE TABLE copytest (i int, t text)") - with env.record_pageserver_writes('pageserver_writes'): - with env.record_duration('load'): + with env.record_pageserver_writes("pageserver_writes"): + with env.record_duration("load"): asyncio.run(parallel_load_same_table(env.pg, n_parallel)) env.flush() diff --git a/test_runner/performance/test_perf_pgbench.py b/test_runner/performance/test_perf_pgbench.py index 89c510e76e..934642d095 100644 --- a/test_runner/performance/test_perf_pgbench.py +++ b/test_runner/performance/test_perf_pgbench.py @@ -30,7 +30,7 @@ def init_pgbench(env: PgCompare, cmdline): # duration is actually a metric and uses float instead of int for timestamp start_timestamp = utc_now_timestamp() t0 = timeit.default_timer() - with env.record_pageserver_writes('init.pageserver_writes'): + with env.record_pageserver_writes("init.pageserver_writes"): out = env.pg_bin.run_capture(cmdline) env.flush() @@ -49,10 +49,12 @@ def init_pgbench(env: PgCompare, cmdline): def run_pgbench(env: PgCompare, prefix: str, cmdline): - with env.record_pageserver_writes(f'{prefix}.pageserver_writes'): + with env.record_pageserver_writes(f"{prefix}.pageserver_writes"): run_start_timestamp = utc_now_timestamp() t0 = timeit.default_timer() - out = env.pg_bin.run_capture(cmdline, ) + out = env.pg_bin.run_capture( + cmdline, + ) run_duration = timeit.default_timer() - t0 run_end_timestamp = utc_now_timestamp() env.flush() @@ -78,40 +80,45 @@ def run_pgbench(env: PgCompare, prefix: str, cmdline): # # Currently, the # of connections is hardcoded at 4 def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: PgBenchLoadType): - env.zenbenchmark.record("scale", scale, '', MetricReport.TEST_PARAM) + env.zenbenchmark.record("scale", scale, "", MetricReport.TEST_PARAM) if workload_type == PgBenchLoadType.INIT: # Run initialize init_pgbench( - env, ['pgbench', f'-s{scale}', '-i', env.pg.connstr(options='-cstatement_timeout=1h')]) + env, ["pgbench", f"-s{scale}", "-i", env.pg.connstr(options="-cstatement_timeout=1h")] + ) if workload_type == PgBenchLoadType.SIMPLE_UPDATE: # Run simple-update workload - run_pgbench(env, - "simple-update", - [ - 'pgbench', - '-N', - '-c4', - f'-T{duration}', - '-P2', - '--progress-timestamp', - env.pg.connstr(), - ]) + run_pgbench( + env, + "simple-update", + [ + "pgbench", + "-N", + "-c4", + f"-T{duration}", + "-P2", + "--progress-timestamp", + env.pg.connstr(), + ], + ) if workload_type == PgBenchLoadType.SELECT_ONLY: # Run SELECT workload - run_pgbench(env, - "select-only", - [ - 'pgbench', - '-S', - '-c4', - f'-T{duration}', - '-P2', - '--progress-timestamp', - env.pg.connstr(), - ]) + run_pgbench( + env, + "select-only", + [ + "pgbench", + "-S", + "-c4", + f"-T{duration}", + "-P2", + "--progress-timestamp", + env.pg.connstr(), + ], + ) env.report_size() @@ -121,12 +128,12 @@ def get_durations_matrix(default: int = 45) -> List[int]: rv = [] for d in durations.split(","): d = d.strip().lower() - if d.endswith('h'): - duration = int(d.removesuffix('h')) * 60 * 60 - elif d.endswith('m'): - duration = int(d.removesuffix('m')) * 60 + if d.endswith("h"): + duration = int(d.removesuffix("h")) * 60 * 60 + elif d.endswith("m"): + duration = int(d.removesuffix("m")) * 60 else: - duration = int(d.removesuffix('s')) + duration = int(d.removesuffix("s")) rv.append(duration) return rv @@ -137,10 +144,10 @@ def get_scales_matrix(default: int = 10) -> List[int]: rv = [] for s in scales.split(","): s = s.strip().lower() - if s.endswith('mb'): - scale = get_scale_for_db(int(s.removesuffix('mb'))) - elif s.endswith('gb'): - scale = get_scale_for_db(int(s.removesuffix('gb')) * 1024) + if s.endswith("mb"): + scale = get_scale_for_db(int(s.removesuffix("mb"))) + elif s.endswith("gb"): + scale = get_scale_for_db(int(s.removesuffix("gb")) * 1024) else: scale = int(s) rv.append(scale) @@ -167,9 +174,9 @@ def test_pgbench(neon_with_baseline: PgCompare, scale: int, duration: int): @pytest.mark.parametrize("duration", get_durations_matrix()) def test_pgbench_flamegraph(zenbenchmark, pg_bin, neon_env_builder, scale: int, duration: int): neon_env_builder.num_safekeepers = 1 - neon_env_builder.pageserver_config_override = ''' + neon_env_builder.pageserver_config_override = """ profiling="page_requests" -''' +""" if not profiling_supported(): pytest.skip("pageserver was built without 'profiling' feature") diff --git a/test_runner/performance/test_random_writes.py b/test_runner/performance/test_random_writes.py index 8931234c51..8ed684af16 100644 --- a/test_runner/performance/test_random_writes.py +++ b/test_runner/performance/test_random_writes.py @@ -1,14 +1,13 @@ import os -from contextlib import closing -from fixtures.benchmark_fixture import MetricReport -from fixtures.neon_fixtures import NeonEnv -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare -from fixtures.log_helper import log - -import psycopg2.extras import random import time +from contextlib import closing +import psycopg2.extras +from fixtures.benchmark_fixture import MetricReport +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv from fixtures.utils import query_scalar @@ -43,13 +42,15 @@ def test_random_writes(neon_with_baseline: PgCompare): with closing(env.pg.connect()) as conn: with conn.cursor() as cur: # Create the test table - with env.record_duration('init'): - cur.execute(""" + with env.record_duration("init"): + cur.execute( + """ CREATE TABLE Big( pk integer primary key, count integer default 0 ); - """) + """ + ) # Insert n_rows in batches to avoid query timeouts rows_inserted = 0 @@ -62,7 +63,7 @@ def test_random_writes(neon_with_baseline: PgCompare): # Get table size (can't be predicted because padding and alignment) table_size = query_scalar(cur, "SELECT pg_relation_size('Big')") - env.zenbenchmark.record("table_size", table_size, 'bytes', MetricReport.TEST_PARAM) + env.zenbenchmark.record("table_size", table_size, "bytes", MetricReport.TEST_PARAM) # Decide how much to write, based on knowledge of pageserver implementation. # Avoiding segment collisions maximizes (neon_runtime / vanilla_runtime). @@ -72,13 +73,15 @@ def test_random_writes(neon_with_baseline: PgCompare): # The closer this is to 250 MB, the more realistic the test is. effective_checkpoint_distance = table_size * n_writes // n_rows - env.zenbenchmark.record("effective_checkpoint_distance", - effective_checkpoint_distance, - 'bytes', - MetricReport.TEST_PARAM) + env.zenbenchmark.record( + "effective_checkpoint_distance", + effective_checkpoint_distance, + "bytes", + MetricReport.TEST_PARAM, + ) # Update random keys - with env.record_duration('run'): + with env.record_duration("run"): for it in range(n_iterations): for i in range(n_writes): key = random.randint(1, n_rows) diff --git a/test_runner/performance/test_seqscans.py b/test_runner/performance/test_seqscans.py index 8d7ad46c1a..6094ed38e5 100644 --- a/test_runner/performance/test_seqscans.py +++ b/test_runner/performance/test_seqscans.py @@ -2,15 +2,16 @@ # from contextlib import closing from dataclasses import dataclass -from fixtures.neon_fixtures import NeonEnv -from fixtures.log_helper import log + +import pytest from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker from fixtures.compare_fixtures import PgCompare -import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv @pytest.mark.parametrize( - 'rows,iters,workers', + "rows,iters,workers", [ # The test table is large enough (3-4 MB) that it doesn't fit in the compute node # cache, so the seqscans go to the page server. But small enough that it fits @@ -18,31 +19,34 @@ import pytest pytest.param(100000, 100, 0), # Also test with a larger table, with and without parallelism pytest.param(10000000, 1, 0), - pytest.param(10000000, 1, 4) - ]) + pytest.param(10000000, 1, 4), + ], +) def test_seqscans(neon_with_baseline: PgCompare, rows: int, iters: int, workers: int): env = neon_with_baseline with closing(env.pg.connect()) as conn: with conn.cursor() as cur: - cur.execute('create table t (i integer);') - cur.execute(f'insert into t values (generate_series(1,{rows}));') + cur.execute("create table t (i integer);") + cur.execute(f"insert into t values (generate_series(1,{rows}));") # Verify that the table is larger than shared_buffers - cur.execute(''' + cur.execute( + """ select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('t') as tbl_ize from pg_settings where name = 'shared_buffers' - ''') + """ + ) row = cur.fetchone() assert row is not None shared_buffers = row[0] table_size = row[1] log.info(f"shared_buffers is {shared_buffers}, table size {table_size}") assert int(shared_buffers) < int(table_size) - env.zenbenchmark.record("table_size", table_size, 'bytes', MetricReport.TEST_PARAM) + env.zenbenchmark.record("table_size", table_size, "bytes", MetricReport.TEST_PARAM) cur.execute(f"set max_parallel_workers_per_gather = {workers}") - with env.record_duration('run'): + with env.record_duration("run"): for i in range(iters): - cur.execute('select count(*) from t;') + cur.execute("select count(*) from t;") diff --git a/test_runner/performance/test_startup.py b/test_runner/performance/test_startup.py index 1cfd128e9b..e91b180154 100644 --- a/test_runner/performance/test_startup.py +++ b/test_runner/performance/test_startup.py @@ -1,7 +1,8 @@ -import pytest from contextlib import closing -from fixtures.neon_fixtures import NeonEnvBuilder + +import pytest from fixtures.benchmark_fixture import NeonBenchmarker +from fixtures.neon_fixtures import NeonEnvBuilder # This test sometimes runs for longer than the global 5 minute timeout. @@ -11,15 +12,15 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker env = neon_env_builder.init_start() # Start - env.neon_cli.create_branch('test_startup') + env.neon_cli.create_branch("test_startup") with zenbenchmark.record_duration("startup_time"): - pg = env.postgres.create_start('test_startup') + pg = env.postgres.create_start("test_startup") pg.safe_psql("select 1;") # Restart pg.stop_and_destroy() with zenbenchmark.record_duration("restart_time"): - pg.create_start('test_startup') + pg.create_start("test_startup") pg.safe_psql("select 1;") # Fill up @@ -28,8 +29,8 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker with closing(pg.connect()) as conn: with conn.cursor() as cur: for i in range(num_tables): - cur.execute(f'create table t_{i} (i integer);') - cur.execute(f'insert into t_{i} values (generate_series(1,{num_rows}));') + cur.execute(f"create table t_{i} (i integer);") + cur.execute(f"insert into t_{i} values (generate_series(1,{num_rows}));") # Read with zenbenchmark.record_duration("read_time"): @@ -42,7 +43,7 @@ def test_startup(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker # Restart pg.stop_and_destroy() with zenbenchmark.record_duration("restart_with_data"): - pg.create_start('test_startup') + pg.create_start("test_startup") pg.safe_psql("select 1;") # Read diff --git a/test_runner/performance/test_wal_backpressure.py b/test_runner/performance/test_wal_backpressure.py index bbb5ddecab..03d5ba208a 100644 --- a/test_runner/performance/test_wal_backpressure.py +++ b/test_runner/performance/test_wal_backpressure.py @@ -10,8 +10,7 @@ from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare from fixtures.log_helper import log from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnvBuilder, PgBin from fixtures.utils import lsn_from_hex - -from performance.test_perf_pgbench import (get_durations_matrix, get_scales_matrix) +from performance.test_perf_pgbench import get_durations_matrix, get_scales_matrix @pytest.fixture(params=["vanilla", "neon_off", "neon_on"]) @@ -30,7 +29,9 @@ def pg_compare(request) -> PgCompare: return fixture else: - assert len(x) == 2, f"request param ({request.param}) should have a format of \ + assert ( + len(x) == 2 + ), f"request param ({request.param}) should have a format of \ `neon_{{safekeepers_enable_fsync}}`" # `NeonCompare` interface @@ -70,8 +71,7 @@ def start_heavy_write_workload(env: PgCompare, n_tables: int, scale: int, num_it with env.record_duration("run_duration"): threads = [ - threading.Thread(target=start_single_table_workload, args=(i, )) - for i in range(n_tables) + threading.Thread(target=start_single_table_workload, args=(i,)) for i in range(n_tables) ] for thread in threads: @@ -95,12 +95,14 @@ def test_heavy_write_workload(pg_compare: PgCompare, n_tables: int, scale: int, ) cur.execute(f"INSERT INTO t{i} (key) VALUES (0)") - workload_thread = threading.Thread(target=start_heavy_write_workload, - args=(env, n_tables, scale, num_iters)) + workload_thread = threading.Thread( + target=start_heavy_write_workload, args=(env, n_tables, scale, num_iters) + ) workload_thread.start() - record_thread = threading.Thread(target=record_lsn_write_lag, - args=(env, lambda: workload_thread.is_alive())) + record_thread = threading.Thread( + target=record_lsn_write_lag, args=(env, lambda: workload_thread.is_alive()) + ) record_thread.start() record_read_latency(env, lambda: workload_thread.is_alive(), "SELECT * from t0 where key = 0") @@ -110,14 +112,16 @@ def test_heavy_write_workload(pg_compare: PgCompare, n_tables: int, scale: int, def start_pgbench_simple_update_workload(env: PgCompare, duration: int): with env.record_duration("run_duration"): - env.pg_bin.run_capture([ - 'pgbench', - '-j10', - '-c10', - '-N', - f'-T{duration}', - env.pg.connstr(options="-csynchronous_commit=off") - ]) + env.pg_bin.run_capture( + [ + "pgbench", + "-j10", + "-c10", + "-N", + f"-T{duration}", + env.pg.connstr(options="-csynchronous_commit=off"), + ] + ) env.flush() @@ -128,20 +132,22 @@ def test_pgbench_simple_update_workload(pg_compare: PgCompare, scale: int, durat env = pg_compare # initialize pgbench tables - env.pg_bin.run_capture(['pgbench', f'-s{scale}', '-i', env.pg.connstr()]) + env.pg_bin.run_capture(["pgbench", f"-s{scale}", "-i", env.pg.connstr()]) env.flush() - workload_thread = threading.Thread(target=start_pgbench_simple_update_workload, - args=(env, duration)) + workload_thread = threading.Thread( + target=start_pgbench_simple_update_workload, args=(env, duration) + ) workload_thread.start() - record_thread = threading.Thread(target=record_lsn_write_lag, - args=(env, lambda: workload_thread.is_alive())) + record_thread = threading.Thread( + target=record_lsn_write_lag, args=(env, lambda: workload_thread.is_alive()) + ) record_thread.start() - record_read_latency(env, - lambda: workload_thread.is_alive(), - "SELECT * from pgbench_accounts where aid = 1") + record_read_latency( + env, lambda: workload_thread.is_alive(), "SELECT * from pgbench_accounts where aid = 1" + ) workload_thread.join() record_thread.join() @@ -150,13 +156,15 @@ def start_pgbench_intensive_initialization(env: PgCompare, scale: int, done_even with env.record_duration("run_duration"): # Needs to increase the statement timeout (default: 120s) because the # initialization step can be slow with a large scale. - env.pg_bin.run_capture([ - 'pgbench', - f'-s{scale}', - '-i', - '-Idtg', - env.pg.connstr(options='-cstatement_timeout=600s') - ]) + env.pg_bin.run_capture( + [ + "pgbench", + f"-s{scale}", + "-i", + "-Idtg", + env.pg.connstr(options="-cstatement_timeout=600s"), + ] + ) done_event.set() @@ -170,12 +178,14 @@ def test_pgbench_intensive_init_workload(pg_compare: PgCompare, scale: int): workload_done_event = threading.Event() - workload_thread = threading.Thread(target=start_pgbench_intensive_initialization, - args=(env, scale, workload_done_event)) + workload_thread = threading.Thread( + target=start_pgbench_intensive_initialization, args=(env, scale, workload_done_event) + ) workload_thread.start() - record_thread = threading.Thread(target=record_lsn_write_lag, - args=(env, lambda: not workload_done_event.is_set())) + record_thread = threading.Thread( + target=record_lsn_write_lag, args=(env, lambda: not workload_done_event.is_set()) + ) record_thread.start() record_read_latency(env, lambda: not workload_done_event.is_set(), "SELECT count(*) from foo") @@ -195,13 +205,15 @@ def record_lsn_write_lag(env: PgCompare, run_cond: Callable[[], bool], pool_inte cur.execute("CREATE EXTENSION neon") while run_cond(): - cur.execute(''' + cur.execute( + """ select pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn), pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_flush_lsn(),received_lsn)), pg_current_wal_flush_lsn(), received_lsn from backpressure_lsns(); - ''') + """ + ) res = cur.fetchone() lsn_write_lags.append(res[0]) @@ -220,24 +232,29 @@ def record_lsn_write_lag(env: PgCompare, run_cond: Callable[[], bool], pool_inte time.sleep(pool_interval) - env.zenbenchmark.record("lsn_write_lag_max", - float(max(lsn_write_lags) / (1024**2)), - "MB", - MetricReport.LOWER_IS_BETTER) - env.zenbenchmark.record("lsn_write_lag_avg", - float(statistics.mean(lsn_write_lags) / (1024**2)), - "MB", - MetricReport.LOWER_IS_BETTER) - env.zenbenchmark.record("lsn_write_lag_stdev", - float(statistics.stdev(lsn_write_lags) / (1024**2)), - "MB", - MetricReport.LOWER_IS_BETTER) + env.zenbenchmark.record( + "lsn_write_lag_max", + float(max(lsn_write_lags) / (1024**2)), + "MB", + MetricReport.LOWER_IS_BETTER, + ) + env.zenbenchmark.record( + "lsn_write_lag_avg", + float(statistics.mean(lsn_write_lags) / (1024**2)), + "MB", + MetricReport.LOWER_IS_BETTER, + ) + env.zenbenchmark.record( + "lsn_write_lag_stdev", + float(statistics.stdev(lsn_write_lags) / (1024**2)), + "MB", + MetricReport.LOWER_IS_BETTER, + ) -def record_read_latency(env: PgCompare, - run_cond: Callable[[], bool], - read_query: str, - read_interval: float = 1.0): +def record_read_latency( + env: PgCompare, run_cond: Callable[[], bool], read_query: str, read_interval: float = 1.0 +): read_latencies = [] with env.pg.connect().cursor() as cur: @@ -256,15 +273,12 @@ def record_read_latency(env: PgCompare, time.sleep(read_interval) - env.zenbenchmark.record("read_latency_max", - max(read_latencies), - 's', - MetricReport.LOWER_IS_BETTER) - env.zenbenchmark.record("read_latency_avg", - statistics.mean(read_latencies), - 's', - MetricReport.LOWER_IS_BETTER) - env.zenbenchmark.record("read_latency_stdev", - statistics.stdev(read_latencies), - 's', - MetricReport.LOWER_IS_BETTER) + env.zenbenchmark.record( + "read_latency_max", max(read_latencies), "s", MetricReport.LOWER_IS_BETTER + ) + env.zenbenchmark.record( + "read_latency_avg", statistics.mean(read_latencies), "s", MetricReport.LOWER_IS_BETTER + ) + env.zenbenchmark.record( + "read_latency_stdev", statistics.stdev(read_latencies), "s", MetricReport.LOWER_IS_BETTER + ) diff --git a/test_runner/performance/test_write_amplification.py b/test_runner/performance/test_write_amplification.py index 1d729fd78f..7aab469387 100644 --- a/test_runner/performance/test_write_amplification.py +++ b/test_runner/performance/test_write_amplification.py @@ -12,10 +12,11 @@ # Amplification problem at its finest. import os from contextlib import closing + from fixtures.benchmark_fixture import MetricReport -from fixtures.neon_fixtures import NeonEnv -from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare +from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv def test_write_amplification(neon_with_baseline: PgCompare): @@ -23,18 +24,20 @@ def test_write_amplification(neon_with_baseline: PgCompare): with closing(env.pg.connect()) as conn: with conn.cursor() as cur: - with env.record_pageserver_writes('pageserver_writes'): - with env.record_duration('run'): + with env.record_pageserver_writes("pageserver_writes"): + with env.record_duration("run"): # NOTE: Because each iteration updates every table already created, # the runtime and write amplification is O(n^2), where n is the # number of iterations. for i in range(25): - cur.execute(f''' + cur.execute( + f""" CREATE TABLE tbl{i} AS SELECT g as i, 'long string to consume some space' || g as t FROM generate_series(1, 100000) g - ''') + """ + ) cur.execute(f"create index on tbl{i} (i);") for j in range(1, i): cur.execute(f"delete from tbl{j} where i = {i}") diff --git a/test_runner/pg_clients/test_pg_clients.py b/test_runner/pg_clients/test_pg_clients.py index a117616358..f91a2adf7d 100644 --- a/test_runner/pg_clients/test_pg_clients.py +++ b/test_runner/pg_clients/test_pg_clients.py @@ -18,10 +18,12 @@ from fixtures.utils import subprocess_capture "python/asyncpg", pytest.param( "python/pg8000", # See https://github.com/neondatabase/neon/pull/2008#discussion_r912264281 - marks=pytest.mark.xfail(reason="Handles SSL in incompatible with Neon way")), + marks=pytest.mark.xfail(reason="Handles SSL in incompatible with Neon way"), + ), pytest.param( "swift/PostgresClientKit", # See https://github.com/neondatabase/neon/pull/2008#discussion_r911896592 - marks=pytest.mark.xfail(reason="Neither SNI nor parameters is supported")), + marks=pytest.mark.xfail(reason="Neither SNI nor parameters is supported"), + ), "typescript/postgresql-client", ], ) @@ -31,12 +33,14 @@ def test_pg_clients(test_output_dir: Path, remote_pg: RemotePostgres, client: st env_file = None with NamedTemporaryFile(mode="w", delete=False) as f: env_file = f.name - f.write(f""" + f.write( + f""" NEON_HOST={conn_options["host"]} NEON_DATABASE={conn_options["dbname"]} NEON_USER={conn_options["user"]} NEON_PASSWORD={conn_options["password"]} - """) + """ + ) image_tag = client.lower() docker_bin = shutil.which("docker") diff --git a/test_runner/test_broken.py b/test_runner/test_broken.py index 3960546689..0281f4f48b 100644 --- a/test_runner/test_broken.py +++ b/test_runner/test_broken.py @@ -1,8 +1,9 @@ -import pytest import os -from fixtures.neon_fixtures import NeonEnv +import pytest from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnv + """ Use this test to see what happens when tests fail. @@ -13,8 +14,9 @@ Set the environment variable RUN_BROKEN to see this test run (and fail, and hopefully not leave any server processes behind). """ -run_broken = pytest.mark.skipif(os.environ.get('RUN_BROKEN') is None, - reason="only used for testing the fixtures") +run_broken = pytest.mark.skipif( + os.environ.get("RUN_BROKEN") is None, reason="only used for testing the fixtures" +) @run_broken @@ -23,7 +25,7 @@ def test_broken(neon_simple_env: NeonEnv, pg_bin): env.neon_cli.create_branch("test_broken", "empty") env.postgres.create_start("test_broken") - log.info('postgres is running') + log.info("postgres is running") - log.info('THIS NEXT COMMAND WILL FAIL:') - pg_bin.run('pgbench -i_am_a_broken_test'.split()) + log.info("THIS NEXT COMMAND WILL FAIL:") + pg_bin.run("pgbench -i_am_a_broken_test".split()) From ae3227509c36ae4e6529fdc397933c8b2372c47a Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Thu, 18 Aug 2022 13:42:06 +0100 Subject: [PATCH 23/63] test_runner: revive flake8 --- .github/workflows/codestyle.yml | 3 +++ docs/sourcetree.md | 3 ++- poetry.lock | 40 ++++++++++++++++----------------- pre-commit.py | 11 +++++++++ pyproject.toml | 2 +- setup.cfg | 8 +++++++ 6 files changed, 45 insertions(+), 22 deletions(-) create mode 100644 setup.cfg diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index bd0f368499..029beba351 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -134,5 +134,8 @@ jobs: - name: Run black to ensure code format run: poetry run black --diff --check . + - name: Run flake8 to ensure code format + run: poetry run flake8 . + - name: Run mypy to check types run: poetry run mypy . diff --git a/docs/sourcetree.md b/docs/sourcetree.md index f189134865..88f4b0e559 100644 --- a/docs/sourcetree.md +++ b/docs/sourcetree.md @@ -118,6 +118,7 @@ Run the following commands in the repository's root (next to `pyproject.toml`): ```bash poetry run isort . # Imports are reformatted poetry run black . # All code is reformatted +poetry run flake8 . # Python linter poetry run mypy . # Ensure there are no typing errors ``` @@ -126,7 +127,7 @@ Otherwise it will not find its configuration. Also consider: -* Running `flake8` (or a linter of your choice, e.g. `pycodestyle`) and fixing possible defects, if any. +* Running `pycodestyle` (or a linter of your choice) and fixing possible defects, if any. * Adding more type hints to your code to avoid `Any`. ### Changing dependencies diff --git a/poetry.lock b/poetry.lock index cd24641a4f..e1f2e576eb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -653,16 +653,16 @@ testing = ["pre-commit"] [[package]] name = "flake8" -version = "3.9.2" +version = "5.0.4" description = "the modular source code checker: pep8 pyflakes and co" category = "dev" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +python-versions = ">=3.6.1" [package.dependencies] -mccabe = ">=0.6.0,<0.7.0" -pycodestyle = ">=2.7.0,<2.8.0" -pyflakes = ">=2.3.0,<2.4.0" +mccabe = ">=0.7.0,<0.8.0" +pycodestyle = ">=2.9.0,<2.10.0" +pyflakes = ">=2.5.0,<2.6.0" [[package]] name = "flask" @@ -870,11 +870,11 @@ python-versions = ">=3.7" [[package]] name = "mccabe" -version = "0.6.1" +version = "0.7.0" description = "McCabe checker, plugin for flake8" category = "dev" optional = false -python-versions = "*" +python-versions = ">=3.6" [[package]] name = "moto" @@ -1107,11 +1107,11 @@ python-versions = "*" [[package]] name = "pycodestyle" -version = "2.7.0" +version = "2.9.1" description = "Python style guide checker" category = "dev" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.6" [[package]] name = "pycparser" @@ -1123,11 +1123,11 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] name = "pyflakes" -version = "2.3.1" +version = "2.5.0" description = "passive checker of Python programs" category = "dev" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.6" [[package]] name = "pyjwt" @@ -1537,7 +1537,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "497b963e7a2f80a751ccd201e950cf533caddb6c7c96163c94cea69874840843" +content-hash = "2112382a6723ed3b77d242db926c7445fa809fafcf11da127b5292565d2ba798" [metadata.files] aiopg = [ @@ -1759,8 +1759,8 @@ execnet = [ {file = "execnet-1.9.0.tar.gz", hash = "sha256:8f694f3ba9cc92cab508b152dcfe322153975c29bda272e2fd7f3f00f36e47c5"}, ] flake8 = [ - {file = "flake8-3.9.2-py2.py3-none-any.whl", hash = "sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"}, - {file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"}, + {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"}, + {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"}, ] flask = [ {file = "Flask-2.1.3-py3-none-any.whl", hash = "sha256:9013281a7402ad527f8fd56375164f3aa021ecfaff89bfe3825346c24f87e04c"}, @@ -1872,8 +1872,8 @@ markupsafe = [ {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"}, ] mccabe = [ - {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"}, - {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] moto = [ {file = "moto-3.1.18-py3-none-any.whl", hash = "sha256:b6eb096e7880c46ac44d6d90988c0043e31462115cfdc913a0ee8f470bd9555c"}, @@ -2026,16 +2026,16 @@ pyasn1 = [ {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, ] pycodestyle = [ - {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"}, - {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"}, + {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"}, + {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"}, ] pycparser = [ {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] pyflakes = [ - {file = "pyflakes-2.3.1-py2.py3-none-any.whl", hash = "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3"}, - {file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"}, + {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"}, + {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"}, ] pyjwt = [ {file = "PyJWT-2.4.0-py3-none-any.whl", hash = "sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf"}, diff --git a/pre-commit.py b/pre-commit.py index 45f140d43a..560df6cd0c 100755 --- a/pre-commit.py +++ b/pre-commit.py @@ -50,6 +50,10 @@ def isort(fix_inplace: bool) -> str: return cmd +def flake8() -> str: + return "poetry run flake8" + + def mypy() -> str: return "poetry run mypy" @@ -122,6 +126,13 @@ if __name__ == "__main__": changed_files=files, no_color=args.no_color, ) + check( + name="flake8", + suffix=".py", + cmd=flake8(), + changed_files=files, + no_color=args.no_color, + ) check( name="mypy", suffix=".py", diff --git a/pyproject.toml b/pyproject.toml index 4f8a49a024..d648d1050a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ pytest-order = "^1.0.1" allure-pytest = "^2.9.45" [tool.poetry.dev-dependencies] -flake8 = "^3.9.2" +flake8 = "^5.0.4" mypy = "==0.971" black = "^22.6.0" isort = "^5.10.1" diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000..a067ee731d --- /dev/null +++ b/setup.cfg @@ -0,0 +1,8 @@ +[flake8] +# Move config to pyproject.toml as soon as flake8 supports it +# https://github.com/PyCQA/flake8/issues/234 +extend-ignore = + E203, # Whitespace before ':' -- conflicts with black + E266, # Too many leading '#' for block comment -- we use it for formatting sometimes + E501 # Line too long -- black sorts it out +extend-exclude = vendor/ From 39a3bcac360220b1e406f3616a9d1570bb7bb9b3 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Thu, 18 Aug 2022 20:41:13 +0100 Subject: [PATCH 24/63] test_runner: fix flake8 warnings --- scripts/export_import_between_pageservers.py | 22 +++++------ .../batch_others/test_basebackup_error.py | 4 +- .../batch_others/test_branch_and_gc.py | 4 +- .../batch_others/test_broken_timeline.py | 6 +-- .../batch_others/test_clog_truncate.py | 3 +- test_runner/batch_others/test_createdropdb.py | 5 +-- test_runner/batch_others/test_fsm_truncate.py | 4 +- .../batch_others/test_gc_aggressive.py | 2 +- test_runner/batch_others/test_lsn_mapping.py | 10 +---- .../batch_others/test_pageserver_api.py | 20 +++++----- .../batch_others/test_parallel_copy.py | 4 +- test_runner/batch_others/test_recovery.py | 5 --- .../batch_others/test_remote_storage.py | 2 +- test_runner/batch_others/test_tenant_conf.py | 5 +-- .../batch_others/test_tenant_relocation.py | 5 +-- test_runner/batch_others/test_tenant_tasks.py | 5 +-- .../test_tenants_with_remote_storage.py | 11 +++--- .../batch_others/test_timeline_size.py | 4 +- test_runner/batch_others/test_wal_acceptor.py | 6 +-- .../batch_pg_regress/test_neon_regress.py | 2 +- test_runner/fixtures/benchmark_fixture.py | 8 ++++ test_runner/fixtures/metrics.py | 2 - test_runner/fixtures/neon_fixtures.py | 37 +++++++++---------- test_runner/fixtures/utils.py | 2 +- test_runner/performance/test_bulk_insert.py | 5 +-- test_runner/performance/test_copy.py | 8 +--- test_runner/performance/test_dup_key.py | 2 +- test_runner/performance/test_gist_build.py | 6 +-- test_runner/performance/test_hot_page.py | 2 +- .../performance/test_parallel_copy_to.py | 11 ++---- test_runner/performance/test_random_writes.py | 7 +--- test_runner/performance/test_seqscans.py | 4 +- .../performance/test_write_amplification.py | 6 +-- .../python/pg8000/pg8000_example.py | 1 - test_runner/pg_clients/test_pg_clients.py | 2 - 35 files changed, 92 insertions(+), 140 deletions(-) diff --git a/scripts/export_import_between_pageservers.py b/scripts/export_import_between_pageservers.py index 5b9fc76768..af847be49e 100755 --- a/scripts/export_import_between_pageservers.py +++ b/scripts/export_import_between_pageservers.py @@ -28,13 +28,13 @@ import tempfile import time import uuid from contextlib import closing -from os import path from pathlib import Path -from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypeVar, Union, cast +from typing import Any, Dict, List, Optional, Tuple, cast import psycopg2 import requests from psycopg2.extensions import connection as PgConnection +from psycopg2.extensions import parse_dsn ############################################### ### client-side utils copied from test fixtures @@ -149,10 +149,8 @@ class PgProtocol: # enough for our tests, but if you need a longer, you can # change it by calling "SET statement_timeout" after # connecting. - if "options" in conn_options: - conn_options["options"] = f"-cstatement_timeout=120s " + conn_options["options"] - else: - conn_options["options"] = "-cstatement_timeout=120s" + conn_options["options"] = f"-cstatement_timeout=120s {conn_options.get('options', '')}" + return conn_options # autocommit=True here by default because that's what we need most of the time @@ -250,7 +248,7 @@ class NeonPageserverHttpClient(requests.Session): except requests.RequestException as e: try: msg = res.json()["msg"] - except: + except: # noqa: E722 msg = "" raise NeonPageserverApiException(msg) from e @@ -477,8 +475,8 @@ def import_timeline( import_cmd = f"import basebackup {tenant_id} {timeline_id} {last_lsn} {last_lsn}" full_cmd = rf"""cat {tar_filename} | {psql_path} {pageserver_connstr} -c '{import_cmd}' """ - stderr_filename2 = path.join(args.work_dir, f"import_{tenant_id}_{timeline_id}.stderr") - stdout_filename = path.join(args.work_dir, f"import_{tenant_id}_{timeline_id}.stdout") + stderr_filename2 = os.path.join(args.work_dir, f"import_{tenant_id}_{timeline_id}.stderr") + stdout_filename = os.path.join(args.work_dir, f"import_{tenant_id}_{timeline_id}.stdout") print(f"Running: {full_cmd}") @@ -495,7 +493,7 @@ def import_timeline( check=True, ) - print(f"Done import") + print("Done import") # Wait until pageserver persists the files wait_for_upload( @@ -508,7 +506,7 @@ def export_timeline( ): # Choose filenames incomplete_filename = tar_filename + ".incomplete" - stderr_filename = path.join(args.work_dir, f"{tenant_id}_{timeline_id}.stderr") + stderr_filename = os.path.join(args.work_dir, f"{tenant_id}_{timeline_id}.stderr") # Construct export command query = f"fullbackup {tenant_id} {timeline_id} {last_lsn} {prev_lsn}" @@ -563,7 +561,7 @@ def main(args: argparse.Namespace): continue # Choose filenames - tar_filename = path.join( + tar_filename = os.path.join( args.work_dir, f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar" ) diff --git a/test_runner/batch_others/test_basebackup_error.py b/test_runner/batch_others/test_basebackup_error.py index 9960f3afbf..81a46ee2f0 100644 --- a/test_runner/batch_others/test_basebackup_error.py +++ b/test_runner/batch_others/test_basebackup_error.py @@ -11,7 +11,7 @@ def test_basebackup_error(neon_simple_env: NeonEnv): env.neon_cli.create_branch("test_basebackup_error", "empty") # Introduce failpoint - env.pageserver.safe_psql(f"failpoints basebackup-before-control-file=return") + env.pageserver.safe_psql("failpoints basebackup-before-control-file=return") with pytest.raises(Exception, match="basebackup-before-control-file"): - pg = env.postgres.create_start("test_basebackup_error") + env.postgres.create_start("test_basebackup_error") diff --git a/test_runner/batch_others/test_branch_and_gc.py b/test_runner/batch_others/test_branch_and_gc.py index bc8374543f..deb041b5d1 100644 --- a/test_runner/batch_others/test_branch_and_gc.py +++ b/test_runner/batch_others/test_branch_and_gc.py @@ -65,7 +65,7 @@ def test_branch_and_gc(neon_simple_env: NeonEnv): } ) - timeline_main = env.neon_cli.create_timeline(f"test_main", tenant_id=tenant) + timeline_main = env.neon_cli.create_timeline("test_main", tenant_id=tenant) pg_main = env.postgres.create_start("test_main", tenant_id=tenant) main_cur = pg_main.connect().cursor() @@ -148,7 +148,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): # Use `failpoint=sleep` and `threading` to make the GC iteration triggers *before* the # branch creation task but the individual timeline GC iteration happens *after* # the branch creation task. - env.pageserver.safe_psql(f"failpoints before-timeline-gc=sleep(2000)") + env.pageserver.safe_psql("failpoints before-timeline-gc=sleep(2000)") def do_gc(): env.pageserver.safe_psql(f"do_gc {tenant.hex} {b0.hex} 0") diff --git a/test_runner/batch_others/test_broken_timeline.py b/test_runner/batch_others/test_broken_timeline.py index b96a7895eb..c4b23c24b8 100644 --- a/test_runner/batch_others/test_broken_timeline.py +++ b/test_runner/batch_others/test_broken_timeline.py @@ -1,8 +1,6 @@ import concurrent.futures import os -from contextlib import closing from typing import List, Tuple -from uuid import UUID import pytest from fixtures.log_helper import log @@ -24,7 +22,7 @@ def test_broken_timeline(neon_env_builder: NeonEnvBuilder): tenant_id = tenant_id_uuid.hex timeline_id = timeline_id_uuid.hex - pg = env.postgres.create_start(f"main", tenant_id=tenant_id_uuid) + pg = env.postgres.create_start("main", tenant_id=tenant_id_uuid) with pg.cursor() as cur: cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100), 'payload'") @@ -102,7 +100,7 @@ def test_fix_broken_timelines_on_startup(neon_simple_env: NeonEnv): tenant_id, _ = env.neon_cli.create_tenant() # Introduce failpoint when creating a new timeline - env.pageserver.safe_psql(f"failpoints before-checkpoint-new-timeline=return") + env.pageserver.safe_psql("failpoints before-checkpoint-new-timeline=return") with pytest.raises(Exception, match="before-checkpoint-new-timeline"): _ = env.neon_cli.create_timeline("test_fix_broken_timelines", tenant_id) diff --git a/test_runner/batch_others/test_clog_truncate.py b/test_runner/batch_others/test_clog_truncate.py index 1f5df1c130..f47e4a99bf 100644 --- a/test_runner/batch_others/test_clog_truncate.py +++ b/test_runner/batch_others/test_clog_truncate.py @@ -1,6 +1,5 @@ import os import time -from contextlib import closing from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv @@ -49,7 +48,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv): log.info(f"pg_xact_0000_path = {pg_xact_0000_path}") while os.path.isfile(pg_xact_0000_path): - log.info(f"file exists. wait for truncation. " "pg_xact_0000_path = {pg_xact_0000_path}") + log.info(f"file exists. wait for truncation: {pg_xact_0000_path=}") time.sleep(5) # checkpoint to advance latest lsn diff --git a/test_runner/batch_others/test_createdropdb.py b/test_runner/batch_others/test_createdropdb.py index fdb704ff15..036e50e6e8 100644 --- a/test_runner/batch_others/test_createdropdb.py +++ b/test_runner/batch_others/test_createdropdb.py @@ -1,6 +1,5 @@ import os import pathlib -from contextlib import closing from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content @@ -92,14 +91,14 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir): dbpath = pathlib.Path(pg_before.pgdata_dir) / "base" / str(dboid) log.info(dbpath) - assert os.path.isdir(dbpath) == True + assert os.path.isdir(dbpath) is True # Test that database subdir doesn't exist on the branch after drop assert pg_after.pgdata_dir dbpath = pathlib.Path(pg_after.pgdata_dir) / "base" / str(dboid) log.info(dbpath) - assert os.path.isdir(dbpath) == False + assert os.path.isdir(dbpath) is False # Check that we restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, pg) diff --git a/test_runner/batch_others/test_fsm_truncate.py b/test_runner/batch_others/test_fsm_truncate.py index 54ad2ffa34..4551ff97e0 100644 --- a/test_runner/batch_others/test_fsm_truncate.py +++ b/test_runner/batch_others/test_fsm_truncate.py @@ -1,6 +1,4 @@ -import pytest -from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient +from fixtures.neon_fixtures import NeonEnvBuilder def test_fsm_truncate(neon_env_builder: NeonEnvBuilder): diff --git a/test_runner/batch_others/test_gc_aggressive.py b/test_runner/batch_others/test_gc_aggressive.py index be6b437e30..90824f882a 100644 --- a/test_runner/batch_others/test_gc_aggressive.py +++ b/test_runner/batch_others/test_gc_aggressive.py @@ -24,7 +24,7 @@ async def update_table(pg: Postgres): while updates_performed < updates_to_perform: updates_performed += 1 id = random.randrange(1, num_rows) - row = await pg_conn.fetchrow(f"UPDATE foo SET counter = counter + 1 WHERE id = {id}") + await pg_conn.fetchrow(f"UPDATE foo SET counter = counter + 1 WHERE id = {id}") # Perform aggressive GC with 0 horizon diff --git a/test_runner/batch_others/test_lsn_mapping.py b/test_runner/batch_others/test_lsn_mapping.py index 4db6951b42..0c1d3648f2 100644 --- a/test_runner/batch_others/test_lsn_mapping.py +++ b/test_runner/batch_others/test_lsn_mapping.py @@ -1,13 +1,7 @@ -import math -import time -from contextlib import closing -from datetime import timedelta, timezone, tzinfo -from uuid import UUID +from datetime import timedelta -import psycopg2.errors -import psycopg2.extras from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres +from fixtures.neon_fixtures import NeonEnvBuilder from fixtures.utils import query_scalar diff --git a/test_runner/batch_others/test_pageserver_api.py b/test_runner/batch_others/test_pageserver_api.py index 5d7619c1b2..869f53ac0a 100644 --- a/test_runner/batch_others/test_pageserver_api.py +++ b/test_runner/batch_others/test_pageserver_api.py @@ -1,16 +1,12 @@ -import os import pathlib import subprocess from typing import Optional from uuid import UUID, uuid4 -import pytest -from fixtures.log_helper import log from fixtures.neon_fixtures import ( DEFAULT_BRANCH_NAME, NeonEnv, NeonEnvBuilder, - NeonPageserverApiException, NeonPageserverHttpClient, neon_binpath, pg_distrib_dir, @@ -24,13 +20,15 @@ def test_pageserver_init_node_id(neon_simple_env: NeonEnv): repo_dir = neon_simple_env.repo_dir pageserver_config = repo_dir / "pageserver.toml" pageserver_bin = pathlib.Path(neon_binpath) / "pageserver" - run_pageserver = lambda args: subprocess.run( - [str(pageserver_bin), "-D", str(repo_dir), *args], - check=False, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) + + def run_pageserver(args): + return subprocess.run( + [str(pageserver_bin), "-D", str(repo_dir), *args], + check=False, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) # remove initial config pageserver_config.unlink() diff --git a/test_runner/batch_others/test_parallel_copy.py b/test_runner/batch_others/test_parallel_copy.py index 6b7fe4fdda..59f19026cc 100644 --- a/test_runner/batch_others/test_parallel_copy.py +++ b/test_runner/batch_others/test_parallel_copy.py @@ -32,7 +32,7 @@ async def copy_test_data_to_table(pg: Postgres, worker_id: int, table_name: str) async def parallel_load_same_table(pg: Postgres, n_parallel: int): workers = [] for worker_id in range(n_parallel): - worker = copy_test_data_to_table(pg, worker_id, f"copytest") + worker = copy_test_data_to_table(pg, worker_id, "copytest") workers.append(asyncio.create_task(worker)) # await all workers @@ -49,7 +49,7 @@ def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5): # Create test table conn = pg.connect() cur = conn.cursor() - cur.execute(f"CREATE TABLE copytest (i int, t text)") + cur.execute("CREATE TABLE copytest (i int, t text)") # Run COPY TO to load the table with parallel connections. asyncio.run(parallel_load_same_table(pg, n_parallel)) diff --git a/test_runner/batch_others/test_recovery.py b/test_runner/batch_others/test_recovery.py index 5220aa6c2e..6aa8b4e9be 100644 --- a/test_runner/batch_others/test_recovery.py +++ b/test_runner/batch_others/test_recovery.py @@ -1,7 +1,4 @@ -import json -import os import time -from ast import Assert from contextlib import closing import psycopg2.extras @@ -33,8 +30,6 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder): pg = env.postgres.create_start("test_pageserver_recovery") log.info("postgres is running on 'test_pageserver_recovery' branch") - connstr = pg.connstr() - with closing(pg.connect()) as conn: with conn.cursor() as cur: with closing(env.pageserver.connect()) as psconn: diff --git a/test_runner/batch_others/test_remote_storage.py b/test_runner/batch_others/test_remote_storage.py index 974d3402f6..1e4fdc8602 100644 --- a/test_runner/batch_others/test_remote_storage.py +++ b/test_runner/batch_others/test_remote_storage.py @@ -99,7 +99,7 @@ def test_remote_storage_backup_and_restore( env.pageserver.start() # Introduce failpoint in download - env.pageserver.safe_psql(f"failpoints remote-storage-download-pre-rename=return") + env.pageserver.safe_psql("failpoints remote-storage-download-pre-rename=return") client.tenant_attach(UUID(tenant_id)) diff --git a/test_runner/batch_others/test_tenant_conf.py b/test_runner/batch_others/test_tenant_conf.py index 1e09ae8db7..d496edd6dc 100644 --- a/test_runner/batch_others/test_tenant_conf.py +++ b/test_runner/batch_others/test_tenant_conf.py @@ -1,7 +1,6 @@ from contextlib import closing import psycopg2.extras -import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder @@ -22,8 +21,8 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}""" } ) - env.neon_cli.create_timeline(f"test_tenant_conf", tenant_id=tenant) - pg = env.postgres.create_start( + env.neon_cli.create_timeline("test_tenant_conf", tenant_id=tenant) + env.postgres.create_start( "test_tenant_conf", "main", tenant, diff --git a/test_runner/batch_others/test_tenant_relocation.py b/test_runner/batch_others/test_tenant_relocation.py index a30804ee8e..4d949e0c13 100644 --- a/test_runner/batch_others/test_tenant_relocation.py +++ b/test_runner/batch_others/test_tenant_relocation.py @@ -14,7 +14,6 @@ from fixtures.neon_fixtures import ( NeonEnv, NeonEnvBuilder, NeonPageserverHttpClient, - PageserverPort, PortDistributor, Postgres, assert_no_in_progress_downloads_for_tenant, @@ -56,7 +55,7 @@ def new_pageserver_helper( f"-c listen_pg_addr='localhost:{pg_port}'", f"-c listen_http_addr='localhost:{http_port}'", f"-c pg_distrib_dir='{pg_distrib_dir}'", - f"-c id=2", + "-c id=2", f"-c remote_storage={{local_path='{remote_storage_mock_path}'}}", ] if broker is not None: @@ -92,7 +91,7 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve with pg_cur(pg) as cur: cur.execute("INSERT INTO load VALUES ('some payload')") inserted_ctr += 1 - except: + except: # noqa: E722 if not failed: log.info("load failed") failed = True diff --git a/test_runner/batch_others/test_tenant_tasks.py b/test_runner/batch_others/test_tenant_tasks.py index 8075756ffb..8617bc8ea9 100644 --- a/test_runner/batch_others/test_tenant_tasks.py +++ b/test_runner/batch_others/test_tenant_tasks.py @@ -1,10 +1,9 @@ -import time from uuid import UUID from fixtures.neon_fixtures import NeonEnvBuilder, wait_until -def get_only_element(l): +def get_only_element(l): # noqa: E741 assert len(l) == 1 return l[0] @@ -46,7 +45,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder): # Create tenant, start compute tenant, _ = env.neon_cli.create_tenant() - timeline = env.neon_cli.create_timeline(name, tenant_id=tenant) + env.neon_cli.create_timeline(name, tenant_id=tenant) pg = env.postgres.create_start(name, tenant_id=tenant) assert get_state(tenant) == "Active" diff --git a/test_runner/batch_others/test_tenants_with_remote_storage.py b/test_runner/batch_others/test_tenants_with_remote_storage.py index a127693c32..7db58c2a70 100644 --- a/test_runner/batch_others/test_tenants_with_remote_storage.py +++ b/test_runner/batch_others/test_tenants_with_remote_storage.py @@ -7,7 +7,6 @@ # import asyncio -from contextlib import closing from typing import List, Tuple from uuid import UUID @@ -25,12 +24,12 @@ from fixtures.utils import lsn_from_hex async def tenant_workload(env: NeonEnv, pg: Postgres): - pageserver_conn = await env.pageserver.connect_async() + await env.pageserver.connect_async() pg_conn = await pg.connect_async() - tenant_id = await pg_conn.fetchval("show neon.tenant_id") - timeline_id = await pg_conn.fetchval("show neon.timeline_id") + await pg_conn.fetchval("show neon.tenant_id") + await pg_conn.fetchval("show neon.timeline_id") await pg_conn.execute("CREATE TABLE t(key int primary key, value text)") for i in range(1, 100): @@ -72,10 +71,10 @@ def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Re "checkpoint_distance": "5000000", } ) - env.neon_cli.create_timeline(f"test_tenants_many", tenant_id=tenant) + env.neon_cli.create_timeline("test_tenants_many", tenant_id=tenant) pg = env.postgres.create_start( - f"test_tenants_many", + "test_tenants_many", tenant_id=tenant, ) tenants_pgs.append((tenant, pg)) diff --git a/test_runner/batch_others/test_timeline_size.py b/test_runner/batch_others/test_timeline_size.py index 76342cdf98..f6b665ec8c 100644 --- a/test_runner/batch_others/test_timeline_size.py +++ b/test_runner/batch_others/test_timeline_size.py @@ -125,7 +125,7 @@ def wait_for_pageserver_catchup(pgmain: Postgres, polling_interval=1, timeout=60 elapsed = time.time() - started_at if elapsed > timeout: raise RuntimeError( - f"timed out waiting for pageserver to reach pg_current_wal_flush_lsn()" + "timed out waiting for pageserver to reach pg_current_wal_flush_lsn()" ) res = pgmain.safe_psql( @@ -390,7 +390,7 @@ def test_tenant_physical_size(neon_simple_env: NeonEnv): tenant, timeline = env.neon_cli.create_tenant() def get_timeline_physical_size(timeline: UUID): - res = client.timeline_detail(tenant, timeline) + res = client.timeline_detail(tenant, timeline, include_non_incremental_physical_size=True) return res["local"]["current_physical_size_non_incremental"] timeline_total_size = get_timeline_physical_size(timeline) diff --git a/test_runner/batch_others/test_wal_acceptor.py b/test_runner/batch_others/test_wal_acceptor.py index 7710ef86cd..47838ddb76 100644 --- a/test_runner/batch_others/test_wal_acceptor.py +++ b/test_runner/batch_others/test_wal_acceptor.py @@ -180,7 +180,7 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder): while not self.should_stop.is_set(): collect_metrics("during INSERT INTO") time.sleep(1) - except: + except: # noqa: E722 log.error( "MetricsChecker's thread failed, the test will be failed on .stop() call", exc_info=True, @@ -552,7 +552,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: R while True: elapsed = time.time() - started_at if elapsed > wait_lsn_timeout: - raise RuntimeError(f"Timed out waiting for WAL redo") + raise RuntimeError("Timed out waiting for WAL redo") pageserver_lsn = env.pageserver.http_client().timeline_detail( uuid.UUID(tenant_id), uuid.UUID((timeline_id)) @@ -615,7 +615,7 @@ class ProposerPostgres(PgProtocol): "shared_preload_libraries = 'neon'\n", f"neon.timeline_id = '{self.timeline_id.hex}'\n", f"neon.tenant_id = '{self.tenant_id.hex}'\n", - f"neon.pageserver_connstring = ''\n", + "neon.pageserver_connstring = ''\n", f"neon.safekeepers = '{safekeepers}'\n", f"listen_addresses = '{self.listen_addr}'\n", f"port = '{self.port}'\n", diff --git a/test_runner/batch_pg_regress/test_neon_regress.py b/test_runner/batch_pg_regress/test_neon_regress.py index 5f13e6b2de..4619647084 100644 --- a/test_runner/batch_pg_regress/test_neon_regress.py +++ b/test_runner/batch_pg_regress/test_neon_regress.py @@ -49,7 +49,7 @@ def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, c # checkpoint one more time to ensure that the lsn we get is the latest one pg.safe_psql("CHECKPOINT") - lsn = pg.safe_psql("select pg_current_wal_insert_lsn()")[0][0] + pg.safe_psql("select pg_current_wal_insert_lsn()")[0][0] # Check that we restore the content of the datadir correctly check_restored_datadir_content(test_output_dir, env, pg) diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py index cec46f9f6d..655ffed90d 100644 --- a/test_runner/fixtures/benchmark_fixture.py +++ b/test_runner/fixtures/benchmark_fixture.py @@ -60,6 +60,7 @@ class PgBenchRunResult: run_duration: float run_start_timestamp: int run_end_timestamp: int + scale: int # TODO progress @@ -130,6 +131,7 @@ class PgBenchRunResult: run_duration=run_duration, run_start_timestamp=run_start_timestamp, run_end_timestamp=run_end_timestamp, + scale=scale, ) @@ -304,6 +306,12 @@ class NeonBenchmarker: "", MetricReport.TEST_PARAM, ) + self.record( + f"{prefix}.scale", + pg_bench_result.scale, + "", + MetricReport.TEST_PARAM, + ) def record_pg_bench_init_result(self, prefix: str, result: PgBenchInitResult): test_params = [ diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 6159e273c0..b51c7250e0 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -1,8 +1,6 @@ from collections import defaultdict -from dataclasses import dataclass from typing import Dict, List -from fixtures.log_helper import log from prometheus_client.parser import text_string_to_metric_families from prometheus_client.samples import Sample diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 388cc34182..f4ed937f02 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -838,7 +838,7 @@ class NeonEnv: ) if config.auth_enabled: toml += textwrap.dedent( - f""" + """ auth_enabled = true """ ) @@ -985,7 +985,7 @@ class NeonPageserverHttpClient(requests.Session): except requests.RequestException as e: try: msg = res.json()["msg"] - except: + except: # noqa: E722 msg = "" raise NeonPageserverApiException(msg) from e @@ -1065,19 +1065,15 @@ class NeonPageserverHttpClient(requests.Session): include_non_incremental_logical_size: bool = False, include_non_incremental_physical_size: bool = False, ) -> Dict[Any, Any]: - - include_non_incremental_logical_size_str = "0" + params = {} if include_non_incremental_logical_size: - include_non_incremental_logical_size_str = "1" - - include_non_incremental_physical_size_str = "0" + params["include-non-incremental-logical-size"] = "yes" if include_non_incremental_physical_size: - include_non_incremental_physical_size_str = "1" + params["include-non-incremental-physical-size"] = "yes" res = self.get( - f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}" - + "?include-non-incremental-logical-size={include_non_incremental_logical_size_str}" - + "&include-non-incremental-physical-size={include_non_incremental_physical_size_str}" + f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}", + params=params, ) self.verbose_error(res) res_json = res.json() @@ -1532,7 +1528,7 @@ class NeonPageserver(PgProtocol): `overrides` allows to add some config to this pageserver start. Returns self. """ - assert self.running == False + assert self.running is False self.env.neon_cli.pageserver_start(overrides=overrides) self.running = True @@ -1867,9 +1863,7 @@ class Postgres(PgProtocol): log.info(f"Starting postgres node {self.node_name}") - run_result = self.env.neon_cli.pg_start( - self.node_name, tenant_id=self.tenant_id, port=self.port - ) + self.env.neon_cli.pg_start(self.node_name, tenant_id=self.tenant_id, port=self.port) self.running = True return self @@ -2078,7 +2072,7 @@ class Safekeeper: running: bool = False def start(self) -> "Safekeeper": - assert self.running == False + assert self.running is False self.env.neon_cli.safekeeper_start(self.id) self.running = True # wait for wal acceptor start by checking its status @@ -2270,7 +2264,7 @@ class Etcd: # Set --quota-backend-bytes to keep the etcd virtual memory # size smaller. Our test etcd clusters are very small. # See https://github.com/etcd-io/etcd/issues/7910 - f"--quota-backend-bytes=100000000", + "--quota-backend-bytes=100000000", ] self.handle = subprocess.Popen(args, stdout=log_file, stderr=log_file) @@ -2395,7 +2389,7 @@ def should_skip_file(filename: str) -> bool: try: list(map(int, tmp_name)) - except: + except: # noqa: E722 return False return True @@ -2508,7 +2502,12 @@ def wait_until(number_of_iterations: int, interval: float, func): def assert_timeline_local( pageserver_http_client: NeonPageserverHttpClient, tenant: uuid.UUID, timeline: uuid.UUID ): - timeline_detail = pageserver_http_client.timeline_detail(tenant, timeline) + timeline_detail = pageserver_http_client.timeline_detail( + tenant, + timeline, + include_non_incremental_logical_size=True, + include_non_incremental_physical_size=True, + ) assert timeline_detail.get("local", {}).get("disk_consistent_lsn"), timeline_detail return timeline_detail diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index 48889a8697..324c62170b 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -110,7 +110,7 @@ def get_dir_size(path: str) -> int: for name in files: try: totalbytes += os.path.getsize(os.path.join(root, name)) - except FileNotFoundError as e: + except FileNotFoundError: pass # file could be concurrently removed return totalbytes diff --git a/test_runner/performance/test_bulk_insert.py b/test_runner/performance/test_bulk_insert.py index 9aaf0cbc77..d6e67aa361 100644 --- a/test_runner/performance/test_bulk_insert.py +++ b/test_runner/performance/test_bulk_insert.py @@ -1,9 +1,6 @@ from contextlib import closing -from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker -from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare -from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv +from fixtures.compare_fixtures import PgCompare # diff --git a/test_runner/performance/test_copy.py b/test_runner/performance/test_copy.py index bf4804fc07..01b2097112 100644 --- a/test_runner/performance/test_copy.py +++ b/test_runner/performance/test_copy.py @@ -1,11 +1,7 @@ from contextlib import closing from io import BufferedReader, RawIOBase -from itertools import repeat -from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker -from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare -from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv +from fixtures.compare_fixtures import PgCompare class CopyTestData(RawIOBase): @@ -28,7 +24,7 @@ class CopyTestData(RawIOBase): self.rownum += 1 # Number of bytes to read in this call - l = min(len(self.linebuf) - self.ptr, len(b)) + l = min(len(self.linebuf) - self.ptr, len(b)) # noqa: E741 b[:l] = self.linebuf[self.ptr : (self.ptr + l)] self.ptr += l diff --git a/test_runner/performance/test_dup_key.py b/test_runner/performance/test_dup_key.py index 60fe3014ba..81752ae740 100644 --- a/test_runner/performance/test_dup_key.py +++ b/test_runner/performance/test_dup_key.py @@ -46,7 +46,7 @@ $$; # Write 3-4 MB to evict t from compute cache cur.execute("create table f (i integer);") - cur.execute(f"insert into f values (generate_series(1,100000));") + cur.execute("insert into f values (generate_series(1,100000));") # Read with env.record_duration("read"): diff --git a/test_runner/performance/test_gist_build.py b/test_runner/performance/test_gist_build.py index d8fa97fbbf..311030b99d 100644 --- a/test_runner/performance/test_gist_build.py +++ b/test_runner/performance/test_gist_build.py @@ -1,10 +1,6 @@ -import os from contextlib import closing -from fixtures.benchmark_fixture import MetricReport -from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare -from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv +from fixtures.compare_fixtures import PgCompare # diff --git a/test_runner/performance/test_hot_page.py b/test_runner/performance/test_hot_page.py index 8e8ab9849a..aad6ee667a 100644 --- a/test_runner/performance/test_hot_page.py +++ b/test_runner/performance/test_hot_page.py @@ -31,7 +31,7 @@ def test_hot_page(env: PgCompare): # Write 3-4 MB to evict t from compute cache cur.execute("create table f (i integer);") - cur.execute(f"insert into f values (generate_series(1,100000));") + cur.execute("insert into f values (generate_series(1,100000));") # Read with env.record_duration("read"): diff --git a/test_runner/performance/test_parallel_copy_to.py b/test_runner/performance/test_parallel_copy_to.py index c1883dec7b..b4a25e0edc 100644 --- a/test_runner/performance/test_parallel_copy_to.py +++ b/test_runner/performance/test_parallel_copy_to.py @@ -1,11 +1,8 @@ import asyncio from io import BytesIO -import asyncpg -from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker -from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare -from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, PgProtocol, Postgres +from fixtures.compare_fixtures import PgCompare +from fixtures.neon_fixtures import PgProtocol async def repeat_bytes(buf, repetitions: int): @@ -59,7 +56,7 @@ def test_parallel_copy_different_tables(neon_with_baseline: PgCompare, n_paralle async def parallel_load_same_table(pg: PgProtocol, n_parallel: int): workers = [] for worker_id in range(n_parallel): - worker = copy_test_data_to_table(pg, worker_id, f"copytest") + worker = copy_test_data_to_table(pg, worker_id, "copytest") workers.append(asyncio.create_task(worker)) # await all workers @@ -72,7 +69,7 @@ def test_parallel_copy_same_table(neon_with_baseline: PgCompare, n_parallel=5): conn = env.pg.connect() cur = conn.cursor() - cur.execute(f"CREATE TABLE copytest (i int, t text)") + cur.execute("CREATE TABLE copytest (i int, t text)") with env.record_pageserver_writes("pageserver_writes"): with env.record_duration("load"): diff --git a/test_runner/performance/test_random_writes.py b/test_runner/performance/test_random_writes.py index 8ed684af16..df766d52da 100644 --- a/test_runner/performance/test_random_writes.py +++ b/test_runner/performance/test_random_writes.py @@ -1,13 +1,8 @@ -import os import random -import time from contextlib import closing -import psycopg2.extras from fixtures.benchmark_fixture import MetricReport -from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare -from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv +from fixtures.compare_fixtures import PgCompare from fixtures.utils import query_scalar diff --git a/test_runner/performance/test_seqscans.py b/test_runner/performance/test_seqscans.py index 6094ed38e5..c681c50ff5 100644 --- a/test_runner/performance/test_seqscans.py +++ b/test_runner/performance/test_seqscans.py @@ -1,13 +1,11 @@ # Test sequential scan speed # from contextlib import closing -from dataclasses import dataclass import pytest -from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.benchmark_fixture import MetricReport from fixtures.compare_fixtures import PgCompare from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv @pytest.mark.parametrize( diff --git a/test_runner/performance/test_write_amplification.py b/test_runner/performance/test_write_amplification.py index 7aab469387..30c217e392 100644 --- a/test_runner/performance/test_write_amplification.py +++ b/test_runner/performance/test_write_amplification.py @@ -10,13 +10,9 @@ # in LSN order, writing the oldest layer first. That creates a new 10 MB image # layer to be created for each of those small updates. This is the Write # Amplification problem at its finest. -import os from contextlib import closing -from fixtures.benchmark_fixture import MetricReport -from fixtures.compare_fixtures import NeonCompare, PgCompare, VanillaCompare -from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv +from fixtures.compare_fixtures import PgCompare def test_write_amplification(neon_with_baseline: PgCompare): diff --git a/test_runner/pg_clients/python/pg8000/pg8000_example.py b/test_runner/pg_clients/python/pg8000/pg8000_example.py index f463867f88..b1d77af5bb 100755 --- a/test_runner/pg_clients/python/pg8000/pg8000_example.py +++ b/test_runner/pg_clients/python/pg8000/pg8000_example.py @@ -1,7 +1,6 @@ #! /usr/bin/env python3 import os -import ssl import pg8000.dbapi diff --git a/test_runner/pg_clients/test_pg_clients.py b/test_runner/pg_clients/test_pg_clients.py index f91a2adf7d..2dbab19e7a 100644 --- a/test_runner/pg_clients/test_pg_clients.py +++ b/test_runner/pg_clients/test_pg_clients.py @@ -1,6 +1,4 @@ -import os import shutil -import subprocess from pathlib import Path from tempfile import NamedTemporaryFile From 6dc56a9be112d10e9fe2c05babe9d24b0590499b Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Fri, 19 Aug 2022 23:49:51 +0200 Subject: [PATCH 25/63] Add GitHub templates for epics, bugs and release PRs (neondatabase/cloud#2079) After merging this we will be able to: - Pick Epic or Bug template in the GitHub UI, when creating an issue - Use this link to open a release PR formatted in a unified way and containing a checklist with useful links: https://github.com/neondatabase/neon/compare/release...main?template=release-pr.md&title=Release%20202Y-MM-DD --- .github/ISSUE_TEMPLATE/bug-template.md | 23 +++++++++++++++++++ .github/ISSUE_TEMPLATE/epic-template.md | 25 +++++++++++++++++++++ .github/PULL_REQUEST_TEMPLATE/release-pr.md | 20 +++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug-template.md create mode 100644 .github/ISSUE_TEMPLATE/epic-template.md create mode 100644 .github/PULL_REQUEST_TEMPLATE/release-pr.md diff --git a/.github/ISSUE_TEMPLATE/bug-template.md b/.github/ISSUE_TEMPLATE/bug-template.md new file mode 100644 index 0000000000..d33eec3cde --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-template.md @@ -0,0 +1,23 @@ +--- +name: Bug Template +about: Used for describing bugs +title: '' +labels: t/bug +assignees: '' + +--- + +## Steps to reproduce + + +## Expected result + + +## Actual result + + +## Environment + + +## Logs, links +- diff --git a/.github/ISSUE_TEMPLATE/epic-template.md b/.github/ISSUE_TEMPLATE/epic-template.md new file mode 100644 index 0000000000..33ad7b1ef5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/epic-template.md @@ -0,0 +1,25 @@ +--- +name: Epic Template +about: A set of related tasks contributing towards specific outcome, comprizing of + more than 1 week of work. +title: 'Epic: ' +labels: t/Epic +assignees: '' + +--- + +## Motivation + + +## DoD + + +## Implementation ideas + + +## Tasks +- [ ] + + +## Other related tasks and Epics +- diff --git a/.github/PULL_REQUEST_TEMPLATE/release-pr.md b/.github/PULL_REQUEST_TEMPLATE/release-pr.md new file mode 100644 index 0000000000..6f86114060 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/release-pr.md @@ -0,0 +1,20 @@ +## Release 202Y-MM-DD + +**NB: this PR must be merged only by 'Create a merge commit'!** + +### Checklist when preparing for release +- [ ] Read or refresh [the release flow guide](https://github.com/neondatabase/cloud/wiki/Release:-general-flow) +- [ ] Ask in the [cloud Slack channel](https://neondb.slack.com/archives/C033A2WE6BZ) that you are going to rollout the release. Any blockers? +- [ ] Does this release contain any db migrations? Destructive ones? What is the rollback plan? + + + +### Checklist after release +- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/120/files)) +- [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel +- [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true) +- [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some) +- [ ] Check [cloud SLO dashboard](https://observer.zenith.tech/d/_oWcBMJ7k/cloud-slos?orgId=1) +- [ ] Check [compute startup metrics dashboard](https://observer.zenith.tech/d/5OkYJEmVz/compute-startup-time) + + From 832e60c2b4fe700ba703cea3fb0740a37abeb39a Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Mon, 22 Aug 2022 16:38:31 +0100 Subject: [PATCH 26/63] Add .git-blame-ignore-revs file (#2318) --- .git-blame-ignore-revs | 1 + 1 file changed, 1 insertion(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000000..3afa4b683c --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1 @@ +4c2bb43775947775401cbb9d774823c5723a91f8 From 9dd19ec397b27d2766f6a66d5d4000647607a7e7 Mon Sep 17 00:00:00 2001 From: Dmitry Rodionov Date: Mon, 22 Aug 2022 17:54:03 +0300 Subject: [PATCH 27/63] Remove interferring proc check We do not need it anymore because ports_distributor checks whether the port can be used before giving it to service --- .github/workflows/benchmarking.yml | 8 ++++---- test_runner/fixtures/neon_fixtures.py | 29 --------------------------- 2 files changed, 4 insertions(+), 33 deletions(-) diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 8080d6b7db..4ed6ac80fd 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -106,7 +106,7 @@ jobs: mkdir -p perf-report-staging # Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file, # it's important for test_perf_pgbench.py::test_pgbench_remote_* tests - ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --skip-interfering-proc-check --out-dir perf-report-staging --timeout 5400 + ./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --out-dir perf-report-staging --timeout 5400 - name: Submit result env: @@ -186,7 +186,7 @@ jobs: mkdir -p perf-report-captest psql $BENCHMARK_CONNSTR -c "SELECT 1;" - ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_init -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600 + ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_init -v -m "remote_cluster" --out-dir perf-report-captest --timeout 21600 - name: Benchmark simple-update env: @@ -194,7 +194,7 @@ jobs: BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }} run: | psql $BENCHMARK_CONNSTR -c "SELECT 1;" - ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_simple_update -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600 + ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_simple_update -v -m "remote_cluster" --out-dir perf-report-captest --timeout 21600 - name: Benchmark select-only env: @@ -202,7 +202,7 @@ jobs: BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }} run: | psql $BENCHMARK_CONNSTR -c "SELECT 1;" - ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_select_only -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600 + ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_select_only -v -m "remote_cluster" --out-dir perf-report-captest --timeout 21600 - name: Submit result env: diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index f4ed937f02..f1cffbe5ef 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -15,7 +15,6 @@ import tempfile import textwrap import time import uuid -import warnings from contextlib import closing, contextmanager from dataclasses import dataclass, field from enum import Flag, auto @@ -68,15 +67,6 @@ BASE_PORT = 15000 WORKER_PORT_NUM = 1000 -def pytest_addoption(parser): - parser.addoption( - "--skip-interfering-proc-check", - dest="skip_interfering_proc_check", - action="store_true", - help="skip check for interfering processes", - ) - - # These are set in pytest_configure() base_dir = "" neon_binpath = "" @@ -84,30 +74,11 @@ pg_distrib_dir = "" top_output_dir = "" -def check_interferring_processes(config): - if config.getoption("skip_interfering_proc_check"): - warnings.warn("interfering process check is skipped") - return - - # does not use -c as it is not supported on macOS - cmd = ["pgrep", "pageserver|postgres|safekeeper"] - result = subprocess.run(cmd, stdout=subprocess.DEVNULL) - if result.returncode == 0: - # returncode of 0 means it found something. - # This is bad; we don't want any of those processes polluting the - # result of the test. - # NOTE this shows as an internal pytest error, there might be a better way - raise Exception( - "Found interfering processes running. Stop all Neon pageservers, nodes, safekeepers, as well as stand-alone Postgres." - ) - - def pytest_configure(config): """ Ensure that no unwanted daemons are running before we start testing. Check that we do not overflow available ports range. """ - check_interferring_processes(config) numprocesses = config.getoption("numprocesses") if ( From b98fa5d6b0b2e8151f9b1385dcaadc8c2f329618 Mon Sep 17 00:00:00 2001 From: KlimentSerafimov Date: Mon, 22 Aug 2022 20:02:45 -0400 Subject: [PATCH 28/63] Added a new test for making sure the proxy displays a session_id when using link auth. (#2039) Added pytest to check correctness of the link authentication pipeline. Context: this PR is the first step towards refactoring the link authentication pipeline to use https (instead of psql) to send the db info to the proxy. There was a test missing for this pipeline in this repo, so this PR adds that test as preparation for the actual change of psql -> https. Co-authored-by: Bojan Serafimov Co-authored-by: Dmitry Rodionov Co-authored-by: Stas Kelvic Co-authored-by: Dimitrii Ivanov --- poetry.lock | 30 ++++-- pyproject.toml | 1 + test_runner/batch_others/test_proxy.py | 121 +++++++++++++++++++++++++ test_runner/fixtures/neon_fixtures.py | 72 ++++++++++++++- 4 files changed, 216 insertions(+), 8 deletions(-) diff --git a/poetry.lock b/poetry.lock index e1f2e576eb..6bce17008e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -622,8 +622,8 @@ six = ">=1.4.0" websocket-client = ">=0.32.0" [package.extras] +tls = ["idna (>=2.0.0)", "cryptography (>=1.3.4)", "pyOpenSSL (>=17.5.0)"] ssh = ["paramiko (>=2.4.2)"] -tls = ["pyOpenSSL (>=17.5.0)", "cryptography (>=1.3.4)", "idna (>=2.0.0)"] [[package]] name = "ecdsa" @@ -1055,8 +1055,8 @@ optional = false python-versions = ">=3.7" [package.extras] -docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)", "sphinx (>=4)"] -test = ["appdirs (==1.4.4)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)", "pytest (>=6)"] +test = ["pytest (>=6)", "pytest-mock (>=3.6)", "pytest-cov (>=2.7)", "appdirs (==1.4.4)"] +docs = ["sphinx (>=4)", "sphinx-autodoc-typehints (>=1.12)", "proselint (>=0.10.2)", "furo (>=2021.7.5b38)"] [[package]] name = "pluggy" @@ -1067,8 +1067,8 @@ optional = false python-versions = ">=3.6" [package.extras] -dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] +testing = ["pytest-benchmark", "pytest"] +dev = ["tox", "pre-commit"] [[package]] name = "prometheus-client" @@ -1197,6 +1197,20 @@ toml = "*" [package.extras] testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] +[[package]] +name = "pytest-asyncio" +version = "0.19.0" +description = "Pytest support for asyncio" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +pytest = ">=6.1.0" + +[package.extras] +testing = ["pytest-trio (>=0.7.0)", "mypy (>=0.931)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "coverage (>=6.2)"] + [[package]] name = "pytest-forked" version = "1.4.0" @@ -1537,7 +1551,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "2112382a6723ed3b77d242db926c7445fa809fafcf11da127b5292565d2ba798" +content-hash = "badfeff521c68277b10555ab4174847b7315d82818ef5841e600299fb6128698" [metadata.files] aiopg = [ @@ -2076,6 +2090,10 @@ pytest = [ {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, ] +pytest-asyncio = [ + {file = "pytest-asyncio-0.19.0.tar.gz", hash = "sha256:ac4ebf3b6207259750bc32f4c1d8fcd7e79739edbc67ad0c58dd150b1d072fed"}, + {file = "pytest_asyncio-0.19.0-py3-none-any.whl", hash = "sha256:7a97e37cfe1ed296e2e84941384bdd37c376453912d397ed39293e0916f521fa"}, +] pytest-forked = [ {file = "pytest-forked-1.4.0.tar.gz", hash = "sha256:8b67587c8f98cbbadfdd804539ed5455b6ed03802203485dd2f53c1422d7440e"}, {file = "pytest_forked-1.4.0-py3-none-any.whl", hash = "sha256:bbbb6717efc886b9d64537b41fb1497cfaf3c9601276be8da2cccfea5a3c8ad8"}, diff --git a/pyproject.toml b/pyproject.toml index d648d1050a..2c9270934d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ pytest-timeout = "^2.1.0" Werkzeug = "2.1.2" pytest-order = "^1.0.1" allure-pytest = "^2.9.45" +pytest-asyncio = "^0.19.0" [tool.poetry.dev-dependencies] flake8 = "^5.0.4" diff --git a/test_runner/batch_others/test_proxy.py b/test_runner/batch_others/test_proxy.py index dcff177044..4ffd458b22 100644 --- a/test_runner/batch_others/test_proxy.py +++ b/test_runner/batch_others/test_proxy.py @@ -1,5 +1,11 @@ +import json +import subprocess +from urllib.parse import urlparse + import psycopg2 import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres def test_proxy_select_1(static_proxy): @@ -23,6 +29,121 @@ def test_password_hack(static_proxy): static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic) +def get_session_id_from_uri_line(uri_prefix, uri_line): + assert uri_prefix in uri_line + + url_parts = urlparse(uri_line) + psql_session_id = url_parts.path[1:] + assert psql_session_id.isalnum(), "session_id should only contain alphanumeric chars." + link_auth_uri_prefix = uri_line[: -len(url_parts.path)] + # invariant: the prefix must match the uri_prefix. + assert ( + link_auth_uri_prefix == uri_prefix + ), f"Line='{uri_line}' should contain a http auth link of form '{uri_prefix}/'." + # invariant: the entire link_auth_uri should be on its own line, module spaces. + assert " ".join(uri_line.split(" ")) == f"{uri_prefix}/{psql_session_id}" + + return psql_session_id + + +def create_and_send_db_info(local_vanilla_pg, psql_session_id, mgmt_port): + pg_user = "proxy" + pg_password = "password" + + local_vanilla_pg.start() + query = f"create user {pg_user} with login superuser password '{pg_password}'" + local_vanilla_pg.safe_psql(query) + + port = local_vanilla_pg.default_options["port"] + host = local_vanilla_pg.default_options["host"] + dbname = local_vanilla_pg.default_options["dbname"] + + db_info_dict = { + "session_id": psql_session_id, + "result": { + "Success": { + "host": host, + "port": port, + "dbname": dbname, + "user": pg_user, + "password": pg_password, + } + }, + } + db_info_str = json.dumps(db_info_dict) + cmd_args = [ + "psql", + "-h", + "127.0.0.1", # localhost + "-p", + f"{mgmt_port}", + "-c", + db_info_str, + ] + + log.info(f"Sending to proxy the user and db info: {' '.join(cmd_args)}") + p = subprocess.Popen(cmd_args, stdout=subprocess.PIPE) + out, err = p.communicate() + assert "ok" in str(out) + + +async def get_uri_line_from_process_welcome_notice(link_auth_uri_prefix, proc): + """ + Returns the line from the welcome notice from proc containing link_auth_uri_prefix. + :param link_auth_uri_prefix: the uri prefix used to indicate the line of interest + :param proc: the process to read the welcome message from. + :return: a line containing the full link authentication uri. + """ + max_num_lines_of_welcome_message = 15 + for attempt in range(max_num_lines_of_welcome_message): + raw_line = await proc.stderr.readline() + line = raw_line.decode("utf-8").strip() + if link_auth_uri_prefix in line: + return line + assert False, f"did not find line containing '{link_auth_uri_prefix}'" + + +@pytest.mark.asyncio +async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProxy): + """ + Test copied and modified from: test_project_psql_link_auth test from cloud/tests_e2e/tests/test_project.py + Step 1. establish connection to the proxy + Step 2. retrieve session_id: + Step 2.1: read welcome message + Step 2.2: parse session_id + Step 3. create a vanilla_pg and send user and db info via command line (using Popen) a psql query via mgmt port to proxy. + Step 4. assert that select 1 has been executed correctly. + """ + + # Step 1. + psql = PSQL( + host=link_proxy.host, + port=link_proxy.proxy_port, + ) + proc = await psql.run("select 1") + + # Step 2.1 + uri_prefix = link_proxy.link_auth_uri_prefix + line_str = await get_uri_line_from_process_welcome_notice(uri_prefix, proc) + + # step 2.2 + psql_session_id = get_session_id_from_uri_line(uri_prefix, line_str) + log.info(f"Parsed psql_session_id='{psql_session_id}' from Neon welcome message.") + + # Step 3. + create_and_send_db_info(vanilla_pg, psql_session_id, link_proxy.mgmt_port) + + # Step 4. + # Expecting proxy output:: + # b' ?column? \n' + # b'----------\n' + # b' 1\n' + # b'(1 row)\n' + out_bytes = await proc.stdout.read() + expected_out_bytes = b" ?column? \n----------\n 1\n(1 row)\n\n" + assert out_bytes == expected_out_bytes + + # Pass extra options to the server. # # Currently, proxy eats the extra connection options, so this fails. diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index f1cffbe5ef..3af0cf4dcb 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1,6 +1,7 @@ from __future__ import annotations import abc +import asyncio import enum import filecmp import json @@ -1716,21 +1717,58 @@ def remote_pg(test_output_dir: Path) -> Iterator[RemotePostgres]: yield remote_pg +class PSQL: + """ + Helper class to make it easier to run psql in the proxy tests. + Copied and modified from PSQL from cloud/tests_e2e/common/psql.py + """ + + path: str + database_url: str + + def __init__( + self, + path: str = "psql", + host: str = "127.0.0.1", + port: int = 5432, + ): + assert shutil.which(path) + + self.path = path + self.database_url = f"postgres://{host}:{port}/main?options=project%3Dgeneric-project-name" + + async def run(self, query=None): + run_args = [self.path, self.database_url] + run_args += ["--command", query] if query is not None else [] + + cmd_line = subprocess.list2cmdline(run_args) + log.info(f"Run psql: {cmd_line}") + return await asyncio.create_subprocess_exec( + *run_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + + class NeonProxy(PgProtocol): - def __init__(self, proxy_port: int, http_port: int, auth_endpoint: str): + def __init__(self, proxy_port: int, http_port: int, auth_endpoint=None, mgmt_port=None): super().__init__(dsn=auth_endpoint, port=proxy_port) self.host = "127.0.0.1" self.http_port = http_port self.proxy_port = proxy_port + self.mgmt_port = mgmt_port self.auth_endpoint = auth_endpoint self._popen: Optional[subprocess.Popen[bytes]] = None + self.link_auth_uri_prefix = "http://dummy-uri" def start(self) -> None: + """ + Starts a proxy with option '--auth-backend postgres' and a postgres instance already provided though '--auth-endpoint '." + """ assert self._popen is None + assert self.auth_endpoint is not None # Start proxy args = [ - os.path.join(str(neon_binpath), "proxy"), + os.path.join(neon_binpath, "proxy"), *["--http", f"{self.host}:{self.http_port}"], *["--proxy", f"{self.host}:{self.proxy_port}"], *["--auth-backend", "postgres"], @@ -1739,6 +1777,25 @@ class NeonProxy(PgProtocol): self._popen = subprocess.Popen(args) self._wait_until_ready() + def start_with_link_auth(self) -> None: + """ + Starts a proxy with option '--auth-backend link' and a dummy authentication link '--uri dummy-auth-link'." + """ + assert self._popen is None + + # Start proxy + bin_proxy = os.path.join(str(neon_binpath), "proxy") + args = [bin_proxy] + args.extend(["--http", f"{self.host}:{self.http_port}"]) + args.extend(["--proxy", f"{self.host}:{self.proxy_port}"]) + args.extend(["--mgmt", f"{self.host}:{self.mgmt_port}"]) + args.extend(["--auth-backend", "link"]) + args.extend(["--uri", self.link_auth_uri_prefix]) + arg_str = " ".join(args) + log.info(f"starting proxy with command line ::: {arg_str}") + self._popen = subprocess.Popen(args, stdout=subprocess.PIPE) + self._wait_until_ready() + @backoff.on_exception(backoff.expo, requests.exceptions.RequestException, max_time=10) def _wait_until_ready(self): requests.get(f"http://{self.host}:{self.http_port}/v1/status") @@ -1753,6 +1810,17 @@ class NeonProxy(PgProtocol): self._popen.kill() +@pytest.fixture(scope="function") +def link_proxy(port_distributor) -> Iterator[NeonProxy]: + """Neon proxy that routes through link auth.""" + http_port = port_distributor.get_port() + proxy_port = port_distributor.get_port() + mgmt_port = port_distributor.get_port() + with NeonProxy(proxy_port, http_port, mgmt_port=mgmt_port) as proxy: + proxy.start_with_link_auth() + yield proxy + + @pytest.fixture(scope="function") def static_proxy(vanilla_pg, port_distributor) -> Iterator[NeonProxy]: """Neon proxy that routes directly to vanilla postgres.""" From d110d2c2fddf461cd85bb2d49b86bbe9f7f6998b Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 23 Aug 2022 12:14:06 +0300 Subject: [PATCH 29/63] Reorder permission checks in HTTP API call handlers. Every handler function now follows the same pattern: 1. extract parameters from the call 2. check permissions 3. execute command. Previously, we extracted some parameters before permission check and some after. Let's be consistent. --- pageserver/src/http/routes.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index da21f6883a..2bb181dd9a 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -206,7 +206,6 @@ async fn status_handler(request: Request) -> Result, ApiErr async fn timeline_create_handler(mut request: Request) -> Result, ApiError> { let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?; let request_data: TimelineCreateRequest = json_request(&mut request).await?; - check_permission(&request, Some(tenant_id))?; let new_timeline_info = tokio::task::spawn_blocking(move || { @@ -244,11 +243,12 @@ async fn timeline_create_handler(mut request: Request) -> Result) -> Result, ApiError> { let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?; - check_permission(&request, Some(tenant_id))?; let include_non_incremental_logical_size = query_param_present(&request, "include-non-incremental-logical-size"); let include_non_incremental_physical_size = query_param_present(&request, "include-non-incremental-physical-size"); + check_permission(&request, Some(tenant_id))?; + let local_timeline_infos = tokio::task::spawn_blocking(move || { let _enter = info_span!("timeline_list", tenant = %tenant_id).entered(); list_local_timelines( @@ -299,13 +299,12 @@ fn query_param_present(request: &Request, param: &str) -> bool { async fn timeline_detail_handler(request: Request) -> Result, ApiError> { let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?; - check_permission(&request, Some(tenant_id))?; - let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?; let include_non_incremental_logical_size = query_param_present(&request, "include-non-incremental-logical-size"); let include_non_incremental_physical_size = query_param_present(&request, "include-non-incremental-physical-size"); + check_permission(&request, Some(tenant_id))?; let (local_timeline_info, remote_timeline_info) = async { // any error here will render local timeline as None @@ -369,7 +368,7 @@ async fn tenant_attach_handler(request: Request) -> Result, let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?; check_permission(&request, Some(tenant_id))?; - info!("Handling tenant attach {}", tenant_id,); + info!("Handling tenant attach {}", tenant_id); tokio::task::spawn_blocking(move || { if tenant_mgr::get_tenant_state(tenant_id).is_some() { @@ -478,9 +477,8 @@ async fn gather_tenant_timelines_index_parts( async fn timeline_delete_handler(request: Request) -> Result, ApiError> { let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?; - check_permission(&request, Some(tenant_id))?; - let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?; + check_permission(&request, Some(tenant_id))?; let state = get_state(&request); tokio::task::spawn_blocking(move || { @@ -519,7 +517,6 @@ async fn tenant_detach_handler(request: Request) -> Result, } async fn tenant_list_handler(request: Request) -> Result, ApiError> { - // check for management permission check_permission(&request, None)?; let state = get_state(&request); @@ -587,7 +584,6 @@ async fn tenant_status(request: Request) -> Result, ApiErro } async fn tenant_create_handler(mut request: Request) -> Result, ApiError> { - // check for management permission check_permission(&request, None)?; let request_data: TenantCreateRequest = json_request(&mut request).await?; @@ -656,7 +652,6 @@ async fn tenant_create_handler(mut request: Request) -> Result) -> Result, ApiError> { let request_data: TenantConfigRequest = json_request(&mut request).await?; let tenant_id = request_data.tenant_id; - // check for management permission check_permission(&request, Some(tenant_id))?; let mut tenant_conf: TenantConfOpt = Default::default(); From 1a666a01d672298a1da12771c4b68a28c1d5ebed Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 23 Aug 2022 12:17:20 +0300 Subject: [PATCH 30/63] Improve comments a little. --- pageserver/src/http/models.rs | 3 +++ pageserver/src/layered_repository.rs | 10 +++++----- pageserver/src/layered_repository/timeline.rs | 6 +++--- pageserver/src/page_service.rs | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pageserver/src/http/models.rs b/pageserver/src/http/models.rs index 232c202ed9..654f45a95d 100644 --- a/pageserver/src/http/models.rs +++ b/pageserver/src/http/models.rs @@ -150,6 +150,9 @@ pub struct RemoteTimelineInfo { pub awaits_download: bool, } +/// +/// This represents the output of the "timeline_detail" API call. +/// #[serde_as] #[derive(Debug, Serialize, Deserialize, Clone)] pub struct TimelineInfo { diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index dd173498b9..0bfa1cd268 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -656,9 +656,9 @@ impl Repository { /// Locate and load config pub fn load_tenant_config( conf: &'static PageServerConf, - tenantid: ZTenantId, + tenant_id: ZTenantId, ) -> anyhow::Result { - let target_config_path = TenantConf::path(conf, tenantid); + let target_config_path = TenantConf::path(conf, tenant_id); info!("load tenantconf from {}", target_config_path.display()); @@ -693,11 +693,11 @@ impl Repository { pub fn persist_tenant_config( conf: &'static PageServerConf, - tenantid: ZTenantId, + tenant_id: ZTenantId, tenant_conf: TenantConfOpt, ) -> anyhow::Result<()> { let _enter = info_span!("saving tenantconf").entered(); - let target_config_path = TenantConf::path(conf, tenantid); + let target_config_path = TenantConf::path(conf, tenant_id); info!("save tenantconf to {}", target_config_path.display()); let mut conf_content = r#"# This file contains a specific per-tenant's config. @@ -834,7 +834,7 @@ impl Repository { // compaction (both require `layer_removal_cs` lock), // but the GC iteration can run concurrently with branch creation. // - // See comments in [`LayeredRepository::branch_timeline`] for more information + // See comments in [`Repository::branch_timeline`] for more information // about why branch creation task can run concurrently with timeline's GC iteration. for timeline in gc_timelines { if thread_mgr::is_shutdown_requested() { diff --git a/pageserver/src/layered_repository/timeline.rs b/pageserver/src/layered_repository/timeline.rs index fb5a4d0b83..a909dcb5a1 100644 --- a/pageserver/src/layered_repository/timeline.rs +++ b/pageserver/src/layered_repository/timeline.rs @@ -354,8 +354,8 @@ pub struct Timeline { upload_layers: AtomicBool, /// Ensures layers aren't frozen by checkpointer between - /// [`LayeredTimeline::get_layer_for_write`] and layer reads. - /// Locked automatically by [`LayeredTimelineWriter`] and checkpointer. + /// [`Timeline::get_layer_for_write`] and layer reads. + /// Locked automatically by [`TimelineWriter`] and checkpointer. /// Must always be acquired before the layer map/individual layer lock /// to avoid deadlock. write_lock: Mutex<()>, @@ -365,7 +365,7 @@ pub struct Timeline { /// Layer removal lock. /// A lock to ensure that no layer of the timeline is removed concurrently by other threads. - /// This lock is acquired in [`LayeredTimeline::gc`], [`LayeredTimeline::compact`], + /// This lock is acquired in [`Timeline::gc`], [`Timeline::compact`], /// and [`LayeredRepository::delete_timeline`]. layer_removal_cs: Mutex<()>, diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index e6114c0fc5..c21d5a6acc 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -744,7 +744,7 @@ impl PageServerHandler { let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?; /* - // Add a 1s delay to some requests. The delayed causes the requests to + // Add a 1s delay to some requests. The delay helps the requests to // hit the race condition from github issue #1047 more easily. use rand::Rng; if rand::thread_rng().gen::() < 5 { From 63b9dfb2f21be88eef74734b210698463acb8701 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 23 Aug 2022 12:17:48 +0300 Subject: [PATCH 31/63] Remove unnecessary 'pub' from test module, and remove dead constant. After making the test module private, the compiler noticed and warned that the constant is unused. --- pageserver/src/layered_repository.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index 0bfa1cd268..fae52c3daf 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -1085,7 +1085,7 @@ pub mod repo_harness { } #[cfg(test)] -pub mod tests { +mod tests { use super::metadata::METADATA_FILE_NAME; use super::*; use crate::keyspace::KeySpaceAccum; @@ -1467,12 +1467,6 @@ pub mod tests { Ok(()) } - // Target file size in the unit tests. In production, the target - // file size is much larger, maybe 1 GB. But a small size makes it - // much faster to exercise all the logic for creating the files, - // garbage collection, compaction etc. - pub const TEST_FILE_SIZE: u64 = 4 * 1024 * 1024; - #[test] fn test_images() -> Result<()> { let repo = RepoHarness::create("test_images")?.load(); From 5f0c95182d7584c4c84c21c51dd80cd5b9c075c0 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 23 Aug 2022 12:18:43 +0300 Subject: [PATCH 32/63] Minor cleanup, to pass by reference where possible. --- pageserver/src/page_service.rs | 8 ++++---- pageserver/src/pgdatadir_mapping.rs | 2 +- pageserver/src/walingest.rs | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index c21d5a6acc..ebcff1f2ac 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -494,22 +494,22 @@ impl PageServerHandler { PagestreamFeMessage::Exists(req) => SMGR_QUERY_TIME .with_label_values(&["get_rel_exists", &tenant_id, &timeline_id]) .observe_closure_duration(|| { - self.handle_get_rel_exists_request(timeline.as_ref(), &req) + self.handle_get_rel_exists_request(&timeline, &req) }), PagestreamFeMessage::Nblocks(req) => SMGR_QUERY_TIME .with_label_values(&["get_rel_size", &tenant_id, &timeline_id]) .observe_closure_duration(|| { - self.handle_get_nblocks_request(timeline.as_ref(), &req) + self.handle_get_nblocks_request(&timeline, &req) }), PagestreamFeMessage::GetPage(req) => SMGR_QUERY_TIME .with_label_values(&["get_page_at_lsn", &tenant_id, &timeline_id]) .observe_closure_duration(|| { - self.handle_get_page_at_lsn_request(timeline.as_ref(), &req) + self.handle_get_page_at_lsn_request(&timeline, &req) }), PagestreamFeMessage::DbSize(req) => SMGR_QUERY_TIME .with_label_values(&["get_db_size", &tenant_id, &timeline_id]) .observe_closure_duration(|| { - self.handle_db_size_request(timeline.as_ref(), &req) + self.handle_db_size_request(&timeline, &req) }), }; diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 0ace850a82..0f0bb1ed53 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1391,7 +1391,7 @@ fn is_slru_block_key(key: Key) -> bool { #[cfg(test)] pub fn create_test_timeline( - repo: crate::layered_repository::Repository, + repo: &crate::layered_repository::Repository, timeline_id: utils::zid::ZTimelineId, ) -> Result> { let tline = repo.create_empty_timeline(timeline_id, Lsn(8))?; diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs index f3789d43e3..c0965e7a22 100644 --- a/pageserver/src/walingest.rs +++ b/pageserver/src/walingest.rs @@ -1062,7 +1062,7 @@ mod tests { #[test] fn test_relsize() -> Result<()> { let repo = RepoHarness::create("test_relsize")?.load(); - let tline = create_test_timeline(repo, TIMELINE_ID)?; + let tline = create_test_timeline(&repo, TIMELINE_ID)?; let mut walingest = init_walingest_test(&*tline)?; let mut m = tline.begin_modification(Lsn(0x20)); @@ -1190,7 +1190,7 @@ mod tests { #[test] fn test_drop_extend() -> Result<()> { let repo = RepoHarness::create("test_drop_extend")?.load(); - let tline = create_test_timeline(repo, TIMELINE_ID)?; + let tline = create_test_timeline(&repo, TIMELINE_ID)?; let mut walingest = init_walingest_test(&*tline)?; let mut m = tline.begin_modification(Lsn(0x20)); @@ -1230,7 +1230,7 @@ mod tests { #[test] fn test_truncate_extend() -> Result<()> { let repo = RepoHarness::create("test_truncate_extend")?.load(); - let tline = create_test_timeline(repo, TIMELINE_ID)?; + let tline = create_test_timeline(&repo, TIMELINE_ID)?; let mut walingest = init_walingest_test(&*tline)?; // Create a 20 MB relation (the size is arbitrary) @@ -1318,7 +1318,7 @@ mod tests { #[test] fn test_large_rel() -> Result<()> { let repo = RepoHarness::create("test_large_rel")?.load(); - let tline = create_test_timeline(repo, TIMELINE_ID)?; + let tline = create_test_timeline(&repo, TIMELINE_ID)?; let mut walingest = init_walingest_test(&*tline)?; let mut lsn = 0x10; From 4013290508f3aa266ccb04dc4eff1d488f8ca482 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 23 Aug 2022 12:51:49 +0300 Subject: [PATCH 33/63] Fix module doc comment. `///` is used for comments on the *next* code that follows, so the comment actually applied to the `use std::collections::BTreeMap;` line that follows. rustfmt complained about that: error: an inner attribute is not permitted following an outer doc comment --> /home/heikki/git-sandbox/neon/libs/utils/src/seqwait_async.rs:7:1 | 5 | /// | --- previous doc comment 6 | 7 | #![warn(missing_docs)] | ^^^^^^^^^^^^^^^^^^^^^^ not permitted following an outer attribute 8 | 9 | use std::collections::BTreeMap; | ------------------------------- the inner attribute doesn't annotate this `use` import | = note: inner attributes, like `#![no_std]`, annotate the item enclosing them, and are usually found at the beginning of source files help: to annotate the `use` import, change the attribute from inner to outer style | 7 - #![warn(missing_docs)] 7 + #[warn(missing_docs)] | `//!` is the correct syntax for comments that apply to the whole file. --- libs/utils/src/seqwait_async.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libs/utils/src/seqwait_async.rs b/libs/utils/src/seqwait_async.rs index 09138e9dd4..f685e2b569 100644 --- a/libs/utils/src/seqwait_async.rs +++ b/libs/utils/src/seqwait_async.rs @@ -1,8 +1,8 @@ -/// -/// Async version of 'seqwait.rs' -/// -/// NOTE: This is currently unused. If you need this, you'll need to uncomment this in lib.rs. -/// +//! +//! Async version of 'seqwait.rs' +//! +//! NOTE: This is currently unused. If you need this, you'll need to uncomment this in lib.rs. +//! #![warn(missing_docs)] From 8e1d6dd848da5006f63a4a8088954ee39a3f5a05 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 23 Aug 2022 18:00:02 +0300 Subject: [PATCH 34/63] Minor cleanup in pq_proto (#2322) --- libs/utils/src/postgres_backend.rs | 15 +- libs/utils/src/pq_proto.rs | 330 +++++++++-------------------- 2 files changed, 107 insertions(+), 238 deletions(-) diff --git a/libs/utils/src/postgres_backend.rs b/libs/utils/src/postgres_backend.rs index 4d873bd5ac..604eb75aaf 100644 --- a/libs/utils/src/postgres_backend.rs +++ b/libs/utils/src/postgres_backend.rs @@ -163,14 +163,9 @@ pub fn is_socket_read_timed_out(error: &anyhow::Error) -> bool { false } -// Truncate 0 from C string in Bytes and stringify it (returns slice, no allocations) -// PG protocol strings are always C strings. -fn cstr_to_str(b: &Bytes) -> Result<&str> { - let without_null = if b.last() == Some(&0) { - &b[..b.len() - 1] - } else { - &b[..] - }; +// Cast a byte slice to a string slice, dropping null terminator if there's one. +fn cstr_to_str(bytes: &[u8]) -> Result<&str> { + let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes); std::str::from_utf8(without_null).map_err(|e| e.into()) } @@ -423,9 +418,9 @@ impl PostgresBackend { self.state = ProtoState::Established; } - FeMessage::Query(m) => { + FeMessage::Query(body) => { // remove null terminator - let query_string = cstr_to_str(&m.body)?; + let query_string = cstr_to_str(&body)?; trace!("got query {:?}", query_string); // xxx distinguish fatal and recoverable errors? diff --git a/libs/utils/src/pq_proto.rs b/libs/utils/src/pq_proto.rs index 3f14acd50d..2f8dcf31d3 100644 --- a/libs/utils/src/pq_proto.rs +++ b/libs/utils/src/pq_proto.rs @@ -25,8 +25,10 @@ pub const TEXT_OID: Oid = 25; #[derive(Debug)] pub enum FeMessage { StartupPacket(FeStartupPacket), - Query(FeQueryMessage), // Simple query - Parse(FeParseMessage), // Extended query protocol + // Simple query. + Query(Bytes), + // Extended query protocol. + Parse(FeParseMessage), Describe(FeDescribeMessage), Bind(FeBindMessage), Execute(FeExecuteMessage), @@ -69,11 +71,6 @@ impl Distribution for Standard { } } -#[derive(Debug)] -pub struct FeQueryMessage { - pub body: Bytes, -} - // We only support the simple case of Parse on unnamed prepared statement and // no params #[derive(Debug)] @@ -89,7 +86,7 @@ pub struct FeDescribeMessage { // we only support unnamed prepared stmt and portal #[derive(Debug)] -pub struct FeBindMessage {} +pub struct FeBindMessage; // we only support unnamed prepared stmt or portal #[derive(Debug)] @@ -100,7 +97,7 @@ pub struct FeExecuteMessage { // we only support unnamed prepared stmt and portal #[derive(Debug)] -pub struct FeCloseMessage {} +pub struct FeCloseMessage; /// Retry a read on EINTR /// @@ -163,22 +160,20 @@ impl FeMessage { Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None), Err(e) => return Err(e.into()), }; - let len = retry_read!(stream.read_u32().await)?; - // The message length includes itself, so it better be at least 4 - let bodylen = len + // The message length includes itself, so it better be at least 4. + let len = retry_read!(stream.read_u32().await)? .checked_sub(4) - .context("invalid message length: parsing u32")?; + .context("invalid message length")?; - // Read message body - let mut body_buf: Vec = vec![0; bodylen as usize]; - stream.read_exact(&mut body_buf).await?; + let body = { + let mut buffer = vec![0u8; len as usize]; + stream.read_exact(&mut buffer).await?; + Bytes::from(buffer) + }; - let body = Bytes::from(body_buf); - - // Parse it match tag { - b'Q' => Ok(Some(FeMessage::Query(FeQueryMessage { body }))), + b'Q' => Ok(Some(FeMessage::Query(body))), b'P' => Ok(Some(FeParseMessage::parse(body)?)), b'D' => Ok(Some(FeDescribeMessage::parse(body)?)), b'E' => Ok(Some(FeExecuteMessage::parse(body)?)), @@ -302,124 +297,71 @@ impl FeStartupPacket { } impl FeParseMessage { - pub fn parse(mut buf: Bytes) -> anyhow::Result { - let _pstmt_name = read_null_terminated(&mut buf)?; - let query_string = read_null_terminated(&mut buf)?; - let nparams = buf.get_i16(); - + fn parse(mut buf: Bytes) -> anyhow::Result { // FIXME: the rust-postgres driver uses a named prepared statement // for copy_out(). We're not prepared to handle that correctly. For // now, just ignore the statement name, assuming that the client never // uses more than one prepared statement at a time. - /* - if !pstmt_name.is_empty() { - return Err(io::Error::new( - io::ErrorKind::InvalidInput, - "named prepared statements not implemented in Parse", - )); - } - */ - if nparams != 0 { - bail!("query params not implemented"); - } + let _pstmt_name = read_cstr(&mut buf)?; + let query_string = read_cstr(&mut buf)?; + let nparams = buf.get_i16(); + + ensure!(nparams == 0, "query params not implemented"); Ok(FeMessage::Parse(FeParseMessage { query_string })) } } impl FeDescribeMessage { - pub fn parse(mut buf: Bytes) -> anyhow::Result { + fn parse(mut buf: Bytes) -> anyhow::Result { let kind = buf.get_u8(); - let _pstmt_name = read_null_terminated(&mut buf)?; + let _pstmt_name = read_cstr(&mut buf)?; // FIXME: see FeParseMessage::parse - /* - if !pstmt_name.is_empty() { - return Err(io::Error::new( - io::ErrorKind::InvalidInput, - "named prepared statements not implemented in Describe", - )); - } - */ - - if kind != b'S' { - bail!("only prepared statmement Describe is implemented"); - } + ensure!( + kind == b'S', + "only prepared statemement Describe is implemented" + ); Ok(FeMessage::Describe(FeDescribeMessage { kind })) } } impl FeExecuteMessage { - pub fn parse(mut buf: Bytes) -> anyhow::Result { - let portal_name = read_null_terminated(&mut buf)?; + fn parse(mut buf: Bytes) -> anyhow::Result { + let portal_name = read_cstr(&mut buf)?; let maxrows = buf.get_i32(); - if !portal_name.is_empty() { - bail!("named portals not implemented"); - } - - if maxrows != 0 { - bail!("row limit in Execute message not supported"); - } + ensure!(portal_name.is_empty(), "named portals not implemented"); + ensure!(maxrows == 0, "row limit in Execute message not implemented"); Ok(FeMessage::Execute(FeExecuteMessage { maxrows })) } } impl FeBindMessage { - pub fn parse(mut buf: Bytes) -> anyhow::Result { - let portal_name = read_null_terminated(&mut buf)?; - let _pstmt_name = read_null_terminated(&mut buf)?; - - if !portal_name.is_empty() { - bail!("named portals not implemented"); - } + fn parse(mut buf: Bytes) -> anyhow::Result { + let portal_name = read_cstr(&mut buf)?; + let _pstmt_name = read_cstr(&mut buf)?; // FIXME: see FeParseMessage::parse - /* - if !pstmt_name.is_empty() { - return Err(io::Error::new( - io::ErrorKind::InvalidInput, - "named prepared statements not implemented", - )); - } - */ + ensure!(portal_name.is_empty(), "named portals not implemented"); - Ok(FeMessage::Bind(FeBindMessage {})) + Ok(FeMessage::Bind(FeBindMessage)) } } impl FeCloseMessage { - pub fn parse(mut buf: Bytes) -> anyhow::Result { + fn parse(mut buf: Bytes) -> anyhow::Result { let _kind = buf.get_u8(); - let _pstmt_or_portal_name = read_null_terminated(&mut buf)?; + let _pstmt_or_portal_name = read_cstr(&mut buf)?; // FIXME: we do nothing with Close - - Ok(FeMessage::Close(FeCloseMessage {})) + Ok(FeMessage::Close(FeCloseMessage)) } } -fn read_null_terminated(buf: &mut Bytes) -> anyhow::Result { - let mut result = BytesMut::new(); - - loop { - if !buf.has_remaining() { - bail!("no null-terminator in string"); - } - - let byte = buf.get_u8(); - - if byte == 0 { - break; - } - result.put_u8(byte); - } - Ok(result.freeze()) -} - // Backend #[derive(Debug)] @@ -441,7 +383,7 @@ pub enum BeMessage<'a> { // None means column is NULL DataRow(&'a [Option<&'a [u8]>]), ErrorResponse(&'a str), - // single byte - used in response to SSLRequest/GSSENCRequest + /// Single byte - used in response to SSLRequest/GSSENCRequest. EncryptionResponse(bool), NoData, ParameterDescription, @@ -554,49 +496,22 @@ pub static SINGLE_COL_ROWDESC: BeMessage = BeMessage::RowDescription(&[RowDescri formatcode: 0, }]); -// Safe usize -> i32|i16 conversion, from rust-postgres -trait FromUsize: Sized { - fn from_usize(x: usize) -> Result; -} - -macro_rules! from_usize { - ($t:ty) => { - impl FromUsize for $t { - #[inline] - fn from_usize(x: usize) -> io::Result<$t> { - if x > <$t>::max_value() as usize { - Err(io::Error::new( - io::ErrorKind::InvalidInput, - "value too large to transmit", - )) - } else { - Ok(x as $t) - } - } - } - }; -} - -from_usize!(i32); - /// Call f() to write body of the message and prepend it with 4-byte len as /// prescribed by the protocol. -fn write_body(buf: &mut BytesMut, f: F) -> io::Result<()> -where - F: FnOnce(&mut BytesMut) -> io::Result<()>, -{ +fn write_body(buf: &mut BytesMut, f: impl FnOnce(&mut BytesMut) -> R) -> R { let base = buf.len(); buf.extend_from_slice(&[0; 4]); - f(buf)?; + let res = f(buf); - let size = i32::from_usize(buf.len() - base)?; + let size = i32::try_from(buf.len() - base).expect("message too big to transmit"); (&mut buf[base..]).put_slice(&size.to_be_bytes()); - Ok(()) + + res } /// Safe write of s into buf as cstring (String in the protocol). -pub fn write_cstr(s: &[u8], buf: &mut BytesMut) -> Result<(), io::Error> { +fn write_cstr(s: &[u8], buf: &mut BytesMut) -> Result<(), io::Error> { if s.contains(&0) { return Err(io::Error::new( io::ErrorKind::InvalidInput, @@ -608,15 +523,11 @@ pub fn write_cstr(s: &[u8], buf: &mut BytesMut) -> Result<(), io::Error> { Ok(()) } -// Truncate 0 from C string in Bytes and stringify it (returns slice, no allocations) -// PG protocol strings are always C strings. -fn cstr_to_str(b: &Bytes) -> Result<&str> { - let without_null = if b.last() == Some(&0) { - &b[..b.len() - 1] - } else { - &b[..] - }; - std::str::from_utf8(without_null).map_err(|e| e.into()) +fn read_cstr(buf: &mut Bytes) -> anyhow::Result { + let pos = buf.iter().position(|x| *x == 0); + let result = buf.split_to(pos.context("missing terminator")?); + buf.advance(1); // drop the null terminator + Ok(result) } impl<'a> BeMessage<'a> { @@ -631,18 +542,14 @@ impl<'a> BeMessage<'a> { buf.put_u8(b'R'); write_body(buf, |buf| { buf.put_i32(0); // Specifies that the authentication was successful. - Ok::<_, io::Error>(()) - }) - .unwrap(); // write into BytesMut can't fail + }); } BeMessage::AuthenticationCleartextPassword => { buf.put_u8(b'R'); write_body(buf, |buf| { buf.put_i32(3); // Specifies that clear text password is required. - Ok::<_, io::Error>(()) - }) - .unwrap(); // write into BytesMut can't fail + }); } BeMessage::AuthenticationMD5Password(salt) => { @@ -650,9 +557,7 @@ impl<'a> BeMessage<'a> { write_body(buf, |buf| { buf.put_i32(5); // Specifies that an MD5-encrypted password is required. buf.put_slice(&salt[..]); - Ok::<_, io::Error>(()) - }) - .unwrap(); // write into BytesMut can't fail + }); } BeMessage::AuthenticationSasl(msg) => { @@ -677,8 +582,7 @@ impl<'a> BeMessage<'a> { } } Ok::<_, io::Error>(()) - }) - .unwrap() + })?; } BeMessage::BackendKeyData(key_data) => { @@ -686,77 +590,64 @@ impl<'a> BeMessage<'a> { write_body(buf, |buf| { buf.put_i32(key_data.backend_pid); buf.put_i32(key_data.cancel_key); - Ok(()) - }) - .unwrap(); + }); } BeMessage::BindComplete => { buf.put_u8(b'2'); - write_body(buf, |_| Ok::<(), io::Error>(())).unwrap(); + write_body(buf, |_| {}); } BeMessage::CloseComplete => { buf.put_u8(b'3'); - write_body(buf, |_| Ok::<(), io::Error>(())).unwrap(); + write_body(buf, |_| {}); } BeMessage::CommandComplete(cmd) => { buf.put_u8(b'C'); - write_body(buf, |buf| { - write_cstr(cmd, buf)?; - Ok::<_, io::Error>(()) - })?; + write_body(buf, |buf| write_cstr(cmd, buf))?; } BeMessage::CopyData(data) => { buf.put_u8(b'd'); write_body(buf, |buf| { buf.put_slice(data); - Ok::<_, io::Error>(()) - }) - .unwrap(); + }); } BeMessage::CopyDone => { buf.put_u8(b'c'); - write_body(buf, |_| Ok::<(), io::Error>(())).unwrap(); + write_body(buf, |_| {}); } BeMessage::CopyFail => { buf.put_u8(b'f'); - write_body(buf, |_| Ok::<(), io::Error>(())).unwrap(); + write_body(buf, |_| {}); } BeMessage::CopyInResponse => { buf.put_u8(b'G'); write_body(buf, |buf| { - buf.put_u8(1); /* copy_is_binary */ - buf.put_i16(0); /* numAttributes */ - Ok::<_, io::Error>(()) - }) - .unwrap(); + buf.put_u8(1); // copy_is_binary + buf.put_i16(0); // numAttributes + }); } BeMessage::CopyOutResponse => { buf.put_u8(b'H'); write_body(buf, |buf| { - buf.put_u8(0); /* copy_is_binary */ - buf.put_i16(0); /* numAttributes */ - Ok::<_, io::Error>(()) - }) - .unwrap(); + buf.put_u8(0); // copy_is_binary + buf.put_i16(0); // numAttributes + }); } BeMessage::CopyBothResponse => { buf.put_u8(b'W'); write_body(buf, |buf| { // doesn't matter, used only for replication - buf.put_u8(0); /* copy_is_binary */ - buf.put_i16(0); /* numAttributes */ - Ok::<_, io::Error>(()) - }) - .unwrap(); + buf.put_u8(0); // copy_is_binary + buf.put_i16(0); // numAttributes + }); } BeMessage::DataRow(vals) => { @@ -771,9 +662,7 @@ impl<'a> BeMessage<'a> { buf.put_i32(-1); } } - Ok::<_, io::Error>(()) - }) - .unwrap(); + }); } // ErrorResponse is a zero-terminated array of zero-terminated fields. @@ -788,18 +677,17 @@ impl<'a> BeMessage<'a> { buf.put_u8(b'E'); write_body(buf, |buf| { buf.put_u8(b'S'); // severity - write_cstr(&Bytes::from("ERROR"), buf)?; + buf.put_slice(b"ERROR\0"); buf.put_u8(b'C'); // SQLSTATE error code - write_cstr(&Bytes::from("CXX000"), buf)?; + buf.put_slice(b"CXX000\0"); buf.put_u8(b'M'); // the message write_cstr(error_msg.as_bytes(), buf)?; buf.put_u8(0); // terminator Ok::<_, io::Error>(()) - }) - .unwrap(); + })?; } // NoticeResponse has the same format as ErrorResponse. From doc: "The frontend should display the @@ -812,23 +700,22 @@ impl<'a> BeMessage<'a> { buf.put_u8(b'N'); write_body(buf, |buf| { buf.put_u8(b'S'); // severity - write_cstr(&Bytes::from("NOTICE"), buf)?; + buf.put_slice(b"NOTICE\0"); buf.put_u8(b'C'); // SQLSTATE error code - write_cstr(&Bytes::from("CXX000"), buf)?; + buf.put_slice(b"CXX000\0"); buf.put_u8(b'M'); // the message write_cstr(error_msg.as_bytes(), buf)?; buf.put_u8(0); // terminator Ok::<_, io::Error>(()) - }) - .unwrap(); + })?; } BeMessage::NoData => { buf.put_u8(b'n'); - write_body(buf, |_| Ok::<(), io::Error>(())).unwrap(); + write_body(buf, |_| {}); } BeMessage::EncryptionResponse(should_negotiate) => { @@ -853,9 +740,7 @@ impl<'a> BeMessage<'a> { buf.put_u8(b'S'); write_body(buf, |buf| { buf.put_slice(&buffer[..cnt]); - Ok::<_, io::Error>(()) - }) - .unwrap(); + }); } BeMessage::ParameterDescription => { @@ -863,23 +748,19 @@ impl<'a> BeMessage<'a> { write_body(buf, |buf| { // we don't support params, so always 0 buf.put_i16(0); - Ok::<_, io::Error>(()) - }) - .unwrap(); + }); } BeMessage::ParseComplete => { buf.put_u8(b'1'); - write_body(buf, |_| Ok::<(), io::Error>(())).unwrap(); + write_body(buf, |_| {}); } BeMessage::ReadyForQuery => { buf.put_u8(b'Z'); write_body(buf, |buf| { buf.put_u8(b'I'); - Ok::<_, io::Error>(()) - }) - .unwrap(); + }); } BeMessage::RowDescription(rows) => { @@ -907,9 +788,7 @@ impl<'a> BeMessage<'a> { buf.put_u64(body.wal_end); buf.put_i64(body.timestamp); buf.put_slice(body.data); - Ok::<_, io::Error>(()) - }) - .unwrap(); + }); } BeMessage::KeepAlive(req) => { @@ -918,10 +797,8 @@ impl<'a> BeMessage<'a> { buf.put_u8(b'k'); buf.put_u64(req.sent_ptr); buf.put_i64(req.timestamp); - buf.put_u8(if req.request_reply { 1u8 } else { 0u8 }); - Ok::<_, io::Error>(()) - }) - .unwrap(); + buf.put_u8(if req.request_reply { 1 } else { 0 }); + }); } } Ok(()) @@ -968,17 +845,17 @@ impl ReplicationFeedback { // value itself pub fn serialize(&self, buf: &mut BytesMut) -> Result<()> { buf.put_u8(REPLICATION_FEEDBACK_FIELDS_NUMBER); // # of keys - write_cstr(&Bytes::from("current_timeline_size"), buf)?; + buf.put_slice(b"current_timeline_size\0"); buf.put_i32(8); buf.put_u64(self.current_timeline_size); - write_cstr(&Bytes::from("ps_writelsn"), buf)?; + buf.put_slice(b"ps_writelsn\0"); buf.put_i32(8); buf.put_u64(self.ps_writelsn); - write_cstr(&Bytes::from("ps_flushlsn"), buf)?; + buf.put_slice(b"ps_flushlsn\0"); buf.put_i32(8); buf.put_u64(self.ps_flushlsn); - write_cstr(&Bytes::from("ps_applylsn"), buf)?; + buf.put_slice(b"ps_applylsn\0"); buf.put_i32(8); buf.put_u64(self.ps_applylsn); @@ -988,7 +865,7 @@ impl ReplicationFeedback { .expect("failed to serialize pg_replytime earlier than PG_EPOCH") .as_micros() as i64; - write_cstr(&Bytes::from("ps_replytime"), buf)?; + buf.put_slice(b"ps_replytime\0"); buf.put_i32(8); buf.put_i64(timestamp); Ok(()) @@ -998,33 +875,30 @@ impl ReplicationFeedback { pub fn parse(mut buf: Bytes) -> ReplicationFeedback { let mut zf = ReplicationFeedback::empty(); let nfields = buf.get_u8(); - let mut i = 0; - while i < nfields { - i += 1; - let key_cstr = read_null_terminated(&mut buf).unwrap(); - let key = cstr_to_str(&key_cstr).unwrap(); - match key { - "current_timeline_size" => { + for _ in 0..nfields { + let key = read_cstr(&mut buf).unwrap(); + match key.as_ref() { + b"current_timeline_size" => { let len = buf.get_i32(); assert_eq!(len, 8); zf.current_timeline_size = buf.get_u64(); } - "ps_writelsn" => { + b"ps_writelsn" => { let len = buf.get_i32(); assert_eq!(len, 8); zf.ps_writelsn = buf.get_u64(); } - "ps_flushlsn" => { + b"ps_flushlsn" => { let len = buf.get_i32(); assert_eq!(len, 8); zf.ps_flushlsn = buf.get_u64(); } - "ps_applylsn" => { + b"ps_applylsn" => { let len = buf.get_i32(); assert_eq!(len, 8); zf.ps_applylsn = buf.get_u64(); } - "ps_replytime" => { + b"ps_replytime" => { let len = buf.get_i32(); assert_eq!(len, 8); let raw_time = buf.get_i64(); @@ -1037,8 +911,8 @@ impl ReplicationFeedback { _ => { let len = buf.get_i32(); warn!( - "ReplicationFeedback parse. unknown key {} of len {}. Skip it.", - key, len + "ReplicationFeedback parse. unknown key {} of len {len}. Skip it.", + String::from_utf8_lossy(key.as_ref()) ); buf.advance(len as usize); } @@ -1084,7 +958,7 @@ mod tests { *first = REPLICATION_FEEDBACK_FIELDS_NUMBER + 1; } - write_cstr(&Bytes::from("new_field_one"), &mut data).unwrap(); + data.put_slice(b"new_field_one\0"); data.put_i32(8); data.put_u64(42); From 0c8ee6bd1d2ffd8f16fe0c34b4b16c8266b4fb9a Mon Sep 17 00:00:00 2001 From: Rory de Zoete <33318916+zoete@users.noreply.github.com> Date: Thu, 25 Aug 2022 09:46:52 +0200 Subject: [PATCH 35/63] Add postgis & plv8 extensions (#2298) * Add postgis & plv8 extensions * Update Dockerfile & Fix typo's * Update dockerfile * Update Dockerfile * Update dockerfile * Use plv8 step * Reduce giga layer * Reduce layer size further * Prepare for rollout * Fix dependency * Pass on correct build tag * No longer dependent on building tools * Use version from vendor * Revert "Use version from vendor" This reverts commit 7c6670c477efa0822907b853df1221909213cf88. * Revert and push correct set * Add configure step for new approach * Re-add configure flags Co-authored-by: Rory de Zoete Co-authored-by: Rory de Zoete --- .github/workflows/build_and_test.yml | 9 +-- Dockerfile.compute-node | 93 ++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 Dockerfile.compute-node diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index dab34c84bc..71b9e8d803 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -472,10 +472,6 @@ jobs: compute-node-image: runs-on: dev container: gcr.io/kaniko-project/executor:v1.9.0-debug - # note: This image depends on neondatabase/compute-tools:latest (or :thisversion), - # which isn't available until after the image is promoted. - # Ergo, we must explicitly build and promote compute-tools separately. - needs: [ compute-tools-image ] steps: - name: Checkout @@ -487,9 +483,8 @@ jobs: - name: Configure ECR login run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json - - name: Kaniko build compute node - working-directory: ./vendor/postgres/ - run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg=TAG=$GITHUB_RUN_ID --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID + - name: Kaniko build compute node with extensions + run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-node --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID promote-images: runs-on: dev diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node new file mode 100644 index 0000000000..97c070d11e --- /dev/null +++ b/Dockerfile.compute-node @@ -0,0 +1,93 @@ +ARG TAG=pinned + +FROM debian:bullseye-slim AS build-deps +RUN apt update && \ + apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \ + libcurl4-openssl-dev libossp-uuid-dev + +# Build Postgres from the neon postgres repository. +FROM build-deps AS pg-build +COPY vendor/postgres postgres +RUN cd postgres && \ + ./configure CFLAGS='-O2 -g3' --enable-debug --with-uuid=ossp && \ + make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \ + make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \ + # Install headers + make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install + +# Build PostGIS from the upstream PostGIS mirror. PostGIS compiles against neon postgres sources without changes. +# Perhaps we could even use the upstream binaries, compiled against vanilla Postgres, but it would require some +# investigation to check that it works, and also keeps working in the future. So for now, we compile our own binaries. +FROM build-deps AS postgis-build +COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ +RUN apt update && \ + apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc wget + +RUN wget https://download.osgeo.org/postgis/source/postgis-3.2.3.tar.gz && \ + tar xvzf postgis-3.2.3.tar.gz && \ + cd postgis-3.2.3 && \ + ./autogen.sh && \ + export PATH="/usr/local/pgsql/bin:$PATH" && \ + ./configure && \ + make -j $(getconf _NPROCESSORS_ONLN) install && \ + cd extensions/postgis && \ + make clean && \ + make -j $(getconf _NPROCESSORS_ONLN) install + +# Build plv8 +FROM build-deps AS plv8-build +COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/ +RUN apt update && \ + apt install -y git curl wget make ninja-build build-essential libncurses5 python3-dev pkg-config libc++-dev libc++abi-dev libglib2.0-dev + +# https://github.com/plv8/plv8/issues/475 +# Debian bullseye provides binutils 2.35 when >= 2.38 is necessary +RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \ + echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \ + apt update && \ + apt install -y --no-install-recommends -t testing binutils + +RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.3.tar.gz && \ + tar xvzf v3.1.3.tar.gz && \ + cd plv8-3.1.3 && \ + export PATH="/usr/local/pgsql/bin:$PATH" && \ + make && \ + make install && \ + rm -rf /plv8-* + +# Compile and run the Neon-specific `compute_ctl` binary +FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools +USER nonroot +COPY --chown=nonroot compute_tools compute_tools +COPY --chown=nonroot workspace_hack workspace_hack +RUN cd compute_tools && cargo build --release + +# Put it all together into the final image +FROM debian:bullseye-slim +# Add user postgres +RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \ + echo "postgres:test_console_pass" | chpasswd && \ + mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \ + chown -R postgres:postgres /var/db/postgres && \ + chmod 0750 /var/db/postgres/compute && \ + echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig + +# TODO: Check if we can make the extension setup more modular versus a linear build +# currently plv8-build copies the output /usr/local/pgsql from postgis-build# +COPY --from=plv8-build --chown=postgres /usr/local/pgsql /usr/local/pgsql +COPY --from=compute-tools --chown=postgres /home/nonroot/compute_tools/target/release/compute_ctl /usr/local/bin/compute_ctl + +RUN apt update && \ + apt install -y libreadline-dev libossp-uuid-dev gdal-bin libgdal-dev libprotobuf-c-dev && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +# Debian bullseye provides GLIBC 2.31 when 2.34 is necessary as we compiled plv8 with that version +RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.list && \ + echo "APT::Default-Release \"stable\";" > /etc/apt/apt.conf.d/default-release && \ + apt update && \ + apt install -y --no-install-recommends -t testing binutils && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +ENV PATH=/usr/local/pgsql/bin:$PATH +USER postgres +ENTRYPOINT ["/usr/local/bin/compute_ctl"] \ No newline at end of file From 344db0b4aa5aaecc9b23479eefc17c234e00acfa Mon Sep 17 00:00:00 2001 From: Rory de Zoete <33318916+zoete@users.noreply.github.com> Date: Thu, 25 Aug 2022 11:17:09 +0200 Subject: [PATCH 36/63] Re-add temporary symlink (#2331) Co-authored-by: Rory de Zoete --- Dockerfile.compute-node | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 97c070d11e..b5e639d5d6 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -88,6 +88,9 @@ RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.lis apt install -y --no-install-recommends -t testing binutils && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* +# "temporary" symlink for compatibility with old control-plane +RUN ln -s /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl + ENV PATH=/usr/local/pgsql/bin:$PATH USER postgres ENTRYPOINT ["/usr/local/bin/compute_ctl"] \ No newline at end of file From f67d109e6ea0dd554c0d6288362a7be0ddc60460 Mon Sep 17 00:00:00 2001 From: Rory de Zoete <33318916+zoete@users.noreply.github.com> Date: Thu, 25 Aug 2022 14:35:01 +0200 Subject: [PATCH 37/63] Copy binaries to /usr/local (#2335) * Add extra symlink * Take other approach Co-authored-by: Rory de Zoete --- Dockerfile.compute-node | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index b5e639d5d6..117a4155cd 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -74,7 +74,7 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \ # TODO: Check if we can make the extension setup more modular versus a linear build # currently plv8-build copies the output /usr/local/pgsql from postgis-build# -COPY --from=plv8-build --chown=postgres /usr/local/pgsql /usr/local/pgsql +COPY --from=plv8-build --chown=postgres /usr/local/pgsql /usr/local COPY --from=compute-tools --chown=postgres /home/nonroot/compute_tools/target/release/compute_ctl /usr/local/bin/compute_ctl RUN apt update && \ @@ -88,9 +88,8 @@ RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.lis apt install -y --no-install-recommends -t testing binutils && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -# "temporary" symlink for compatibility with old control-plane +# "temporary" symlink for old control-plane RUN ln -s /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl -ENV PATH=/usr/local/pgsql/bin:$PATH USER postgres ENTRYPOINT ["/usr/local/bin/compute_ctl"] \ No newline at end of file From c952f022bb4b3703ee8bc20604e2cda34c84128d Mon Sep 17 00:00:00 2001 From: Egor Suvorov Date: Thu, 25 Aug 2022 13:29:37 +0300 Subject: [PATCH 38/63] waldecoder: fix comment --- libs/postgres_ffi/src/waldecoder.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/postgres_ffi/src/waldecoder.rs b/libs/postgres_ffi/src/waldecoder.rs index 768e79621d..b509fc87a5 100644 --- a/libs/postgres_ffi/src/waldecoder.rs +++ b/libs/postgres_ffi/src/waldecoder.rs @@ -170,6 +170,7 @@ impl WalStreamDecoder { } State::SkippingEverything { .. } => {} } + // now read page contents match &mut self.state { State::WaitingForRecord => { // need to have at least the xl_tot_len field @@ -194,8 +195,8 @@ impl WalStreamDecoder { return Ok(Some(self.complete_record(recordbuf)?)); } else { // Need to assemble the record from pieces. Remember the size of the - // record, and loop back. On next iteration, we will reach the 'else' - // branch below, and copy the part of the record that was on this page + // record, and loop back. On next iterations, we will reach the branch + // below, and copy the part of the record that was on this or next page(s) // to 'recordbuf'. Subsequent iterations will skip page headers, and // append the continuations from the next pages to 'recordbuf'. self.state = State::ReassemblingRecord { From bc588f3a533b7e39f144875a8ac38775204ce2dc Mon Sep 17 00:00:00 2001 From: MMeent Date: Thu, 25 Aug 2022 16:17:32 +0200 Subject: [PATCH 39/63] Update WAL redo histograms (#2323) Previously, it could only distinguish REDO task durations down to 5ms, which equates to approx. 200pages/sec or 1.6MB/sec getpage@LSN traffic. This patch improves to 200'000 pages/sec or 1.6GB/sec, allowing for much more precise performance measurement of the redo process. --- pageserver/src/walredo.rs | 46 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index 9cf347573a..bf48bd1759 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -89,15 +89,52 @@ pub trait WalRedoManager: Send + Sync { // for access to the postgres process ('wait') since there is only one for // each tenant. +/// Time buckets are small because we want to be able to measure the +/// smallest redo processing times. These buckets allow us to measure down +/// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec. +/// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec. +macro_rules! redo_histogram_time_buckets { + () => { + vec![ + 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000, + 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, + ] + }; +} + +/// While we're at it, also measure the amount of records replayed in each +/// operation. We have a global 'total replayed' counter, but that's not +/// as useful as 'what is the skew for how many records we replay in one +/// operation'. +macro_rules! redo_histogram_count_buckets { + () => { + vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0] + }; +} + static WAL_REDO_TIME: Lazy = Lazy::new(|| { - register_histogram!("pageserver_wal_redo_seconds", "Time spent on WAL redo") - .expect("failed to define a metric") + register_histogram!( + "pageserver_wal_redo_seconds", + "Time spent on WAL redo", + redo_histogram_time_buckets!() + ) + .expect("failed to define a metric") }); static WAL_REDO_WAIT_TIME: Lazy = Lazy::new(|| { register_histogram!( "pageserver_wal_redo_wait_seconds", - "Time spent waiting for access to the WAL redo process" + "Time spent waiting for access to the WAL redo process", + redo_histogram_time_buckets!(), + ) + .expect("failed to define a metric") +}); + +static WAL_REDO_RECORDS_HISTOGRAM: Lazy = Lazy::new(|| { + register_histogram!( + "pageserver_wal_redo_records_histogram", + "Histogram of number of records replayed per redo", + redo_histogram_count_buckets!(), ) .expect("failed to define a metric") }); @@ -262,7 +299,10 @@ impl PostgresRedoManager { let end_time = Instant::now(); let duration = end_time.duration_since(lock_time); + WAL_REDO_TIME.observe(duration.as_secs_f64()); + WAL_REDO_RECORDS_HISTOGRAM.observe(records.len() as f64); + debug!( "postgres applied {} WAL records in {} us to reconstruct page image at LSN {}", records.len(), From 04a018a5b12735ef3e4a80bdaa2cdb619024cb0e Mon Sep 17 00:00:00 2001 From: MMeent Date: Thu, 25 Aug 2022 18:48:09 +0200 Subject: [PATCH 40/63] Extract neon and neon_test_utils from postgres repo (#2325) * Extract neon and neon_test_utils from postgres repo * Remove neon from vendored postgres repo, and fix build_and_test.yml * Move EmitWarningsOnPlaceholders to end of _PG_init in neon.c (from libpagestore.c) * Fix Makefile location comments * remove Makefile EXTRA_INSTALL flag * Update Dockerfile.compute-node to build and include the neon extension --- .github/workflows/build_and_test.yml | 4 + .github/workflows/codestyle.yml | 3 + Dockerfile.compute-node | 17 +- Makefile | 26 +- pgxn/neon/Makefile | 26 + pgxn/neon/inmem_smgr.c | 286 ++ pgxn/neon/libpagestore.c | 432 +++ pgxn/neon/libpqwalproposer.c | 413 +++ pgxn/neon/neon--1.0.sql | 17 + pgxn/neon/neon.c | 82 + pgxn/neon/neon.control | 4 + pgxn/neon/neon.h | 19 + pgxn/neon/pagestore_client.h | 221 ++ pgxn/neon/pagestore_smgr.c | 1696 ++++++++++++ pgxn/neon/relsize_cache.c | 167 ++ pgxn/neon/walproposer.c | 2403 +++++++++++++++++ pgxn/neon/walproposer.h | 540 ++++ pgxn/neon/walproposer_utils.c | 1110 ++++++++ pgxn/neon/walproposer_utils.h | 19 + pgxn/neon_test_utils/Makefile | 15 + pgxn/neon_test_utils/neon_test_utils--1.0.sql | 29 + pgxn/neon_test_utils/neon_test_utils.control | 5 + pgxn/neon_test_utils/neontest.c | 304 +++ vendor/postgres | 2 +- 24 files changed, 7830 insertions(+), 10 deletions(-) create mode 100644 pgxn/neon/Makefile create mode 100644 pgxn/neon/inmem_smgr.c create mode 100644 pgxn/neon/libpagestore.c create mode 100644 pgxn/neon/libpqwalproposer.c create mode 100644 pgxn/neon/neon--1.0.sql create mode 100644 pgxn/neon/neon.c create mode 100644 pgxn/neon/neon.control create mode 100644 pgxn/neon/neon.h create mode 100644 pgxn/neon/pagestore_client.h create mode 100644 pgxn/neon/pagestore_smgr.c create mode 100644 pgxn/neon/relsize_cache.c create mode 100644 pgxn/neon/walproposer.c create mode 100644 pgxn/neon/walproposer.h create mode 100644 pgxn/neon/walproposer_utils.c create mode 100644 pgxn/neon/walproposer_utils.h create mode 100644 pgxn/neon_test_utils/Makefile create mode 100644 pgxn/neon_test_utils/neon_test_utils--1.0.sql create mode 100644 pgxn/neon_test_utils/neon_test_utils.control create mode 100644 pgxn/neon_test_utils/neontest.c diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 71b9e8d803..6e570b22d4 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -136,6 +136,10 @@ jobs: run: mold -run make postgres -j$(nproc) shell: bash -euxo pipefail {0} + - name: Build neon extensions + run: mold -run make neon-pg-ext -j$(nproc) + shell: bash -euxo pipefail {0} + - name: Run cargo build run: | ${cov_prefix} mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index 029beba351..eddfee88fc 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -81,6 +81,9 @@ jobs: if: steps.cache_pg.outputs.cache-hit != 'true' run: make postgres + - name: Build neon extensions + run: make neon-pg-ext + # Plain configure output can contain weird errors like 'error: C compiler cannot create executables' # and the real cause will be inside config.log - name: Print configure logs in case of failure diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 117a4155cd..4527fb9ece 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -13,7 +13,8 @@ RUN cd postgres && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \ # Install headers - make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install + make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \ + make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install # Build PostGIS from the upstream PostGIS mirror. PostGIS compiles against neon postgres sources without changes. # Perhaps we could even use the upstream binaries, compiled against vanilla Postgres, but it would require some @@ -55,6 +56,16 @@ RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.3.tar.gz && \ make install && \ rm -rf /plv8-* +# compile neon extensions +FROM build-deps AS neon-pg-ext-build +COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/ +COPY pgxn/ pgxn/ + +RUN make -j $(getconf _NPROCESSORS_ONLN) \ + PG_CONFIG=/usr/local/pgsql/bin/pg_config \ + -C pgxn/neon \ + -s install + # Compile and run the Neon-specific `compute_ctl` binary FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools USER nonroot @@ -73,8 +84,8 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \ echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig # TODO: Check if we can make the extension setup more modular versus a linear build -# currently plv8-build copies the output /usr/local/pgsql from postgis-build# -COPY --from=plv8-build --chown=postgres /usr/local/pgsql /usr/local +# currently plv8-build copies the output /usr/local/pgsql from postgis-build, etc# +COPY --from=neon-pg-ext-build --chown=postgres /usr/local/pgsql /usr/local COPY --from=compute-tools --chown=postgres /home/nonroot/compute_tools/target/release/compute_ctl /usr/local/bin/compute_ctl RUN apt update && \ diff --git a/Makefile b/Makefile index fc75e9fc5e..9d7e1497e5 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1 # Top level Makefile to build Zenith and PostgreSQL # .PHONY: all -all: zenith postgres +all: zenith postgres neon-pg-ext ### Zenith Rust bits # @@ -87,25 +87,39 @@ postgres: postgres-configure \ postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers` +@echo "Compiling PostgreSQL" $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 install - +@echo "Compiling contrib/neon" - $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon install - +@echo "Compiling contrib/neon_test_utils" - $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon_test_utils install + +@echo "Compiling libpq" + $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/src/interfaces/libpq install +@echo "Compiling pg_buffercache" $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pg_buffercache install +@echo "Compiling pageinspect" $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pageinspect install - .PHONY: postgres-clean postgres-clean: $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 clean + $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pg_buffercache clean + $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pageinspect clean + $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/src/interfaces/libpq clean + +neon-pg-ext: postgres + +@echo "Compiling neon" + $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/bin/pg_config \ + -C $(ROOT_PROJECT_DIR)/pgxn/neon install + +@echo "Compiling neon_test_utils" + $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/bin/pg_config \ + -C $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils install + +.PHONY: neon-pg-ext-clean + $(MAKE) -C $(ROOT_PROJECT_DIR)/pgxn/neon clean + $(MAKE) -C $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils clean # This doesn't remove the effects of 'configure'. .PHONY: clean clean: cd $(POSTGRES_INSTALL_DIR)/build && $(MAKE) clean $(CARGO_CMD_PREFIX) cargo clean + cd pgxn/neon && $(MAKE) clean + cd pgxn/neon_test_utils && $(MAKE) clean # This removes everything .PHONY: distclean diff --git a/pgxn/neon/Makefile b/pgxn/neon/Makefile new file mode 100644 index 0000000000..a6ce611974 --- /dev/null +++ b/pgxn/neon/Makefile @@ -0,0 +1,26 @@ +# pgxs/neon/Makefile + + +MODULE_big = neon +OBJS = \ + $(WIN32RES) \ + inmem_smgr.o \ + libpagestore.o \ + libpqwalproposer.o \ + pagestore_smgr.o \ + relsize_cache.o \ + neon.o \ + walproposer.o \ + walproposer_utils.o + +PG_CPPFLAGS = -I$(libpq_srcdir) +SHLIB_LINK_INTERNAL = $(libpq) + +EXTENSION = neon +DATA = neon--1.0.sql +PGFILEDESC = "neon - cloud storage for PostgreSQL" + + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) diff --git a/pgxn/neon/inmem_smgr.c b/pgxn/neon/inmem_smgr.c new file mode 100644 index 0000000000..7840292b08 --- /dev/null +++ b/pgxn/neon/inmem_smgr.c @@ -0,0 +1,286 @@ +/*------------------------------------------------------------------------- + * + * inmem_smgr.c + * + * This is an implementation of the SMGR interface, used in the WAL redo + * process (see src/backend/tcop/zenith_wal_redo.c). It has no persistent + * storage, the pages that are written out are kept in a small number of + * in-memory buffers. + * + * Normally, replaying a WAL record only needs to access a handful of + * buffers, which fit in the normal buffer cache, so this is just for + * "overflow" storage when the buffer cache is not large enough. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * contrib/neon/inmem_smgr.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/xlog.h" +#include "pagestore_client.h" +#include "storage/block.h" +#include "storage/buf_internals.h" +#include "storage/relfilenode.h" +#include "storage/smgr.h" + +/* Size of the in-memory smgr */ +#define MAX_PAGES 64 + +/* If more than WARN_PAGES are used, print a warning in the log */ +#define WARN_PAGES 32 + +static BufferTag page_tag[MAX_PAGES]; +static char page_body[MAX_PAGES][BLCKSZ]; +static int used_pages; + +static int +locate_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno) +{ + /* We only hold a small number of pages, so linear search */ + for (int i = 0; i < used_pages; i++) + { + if (RelFileNodeEquals(reln->smgr_rnode.node, page_tag[i].rnode) + && forknum == page_tag[i].forkNum + && blkno == page_tag[i].blockNum) + { + return i; + } + } + return -1; +} + +/* + * inmem_init() -- Initialize private state + */ +void +inmem_init(void) +{ + used_pages = 0; +} + +/* + * inmem_exists() -- Does the physical file exist? + */ +bool +inmem_exists(SMgrRelation reln, ForkNumber forknum) +{ + for (int i = 0; i < used_pages; i++) + { + if (RelFileNodeEquals(reln->smgr_rnode.node, page_tag[i].rnode) + && forknum == page_tag[i].forkNum) + { + return true; + } + } + return false; +} + +/* + * inmem_create() -- Create a new relation on zenithd storage + * + * If isRedo is true, it's okay for the relation to exist already. + */ +void +inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo) +{ +} + +/* + * inmem_unlink() -- Unlink a relation. + */ +void +inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo) +{ +} + +/* + * inmem_extend() -- Add a block to the specified relation. + * + * The semantics are nearly the same as mdwrite(): write at the + * specified position. However, this is to be used for the case of + * extending a relation (i.e., blocknum is at or beyond the current + * EOF). Note that we assume writing a block beyond current EOF + * causes intervening file space to become filled with zeroes. + */ +void +inmem_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, + char *buffer, bool skipFsync) +{ + /* same as smgwrite() for us */ + inmem_write(reln, forknum, blkno, buffer, skipFsync); +} + +/* + * inmem_open() -- Initialize newly-opened relation. + */ +void +inmem_open(SMgrRelation reln) +{ +} + +/* + * inmem_close() -- Close the specified relation, if it isn't closed already. + */ +void +inmem_close(SMgrRelation reln, ForkNumber forknum) +{ +} + +/* + * inmem_prefetch() -- Initiate asynchronous read of the specified block of a relation + */ +bool +inmem_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) +{ + return true; +} + +/* + * inmem_writeback() -- Tell the kernel to write pages back to storage. + */ +void +inmem_writeback(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks) +{ +} + +/* + * inmem_read() -- Read the specified block from a relation. + */ +void +inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, + char *buffer) +{ + int pg; + + pg = locate_page(reln, forknum, blkno); + if (pg < 0) + memset(buffer, 0, BLCKSZ); + else + memcpy(buffer, page_body[pg], BLCKSZ); +} + +/* + * inmem_write() -- Write the supplied block at the appropriate location. + * + * This is to be used only for updating already-existing blocks of a + * relation (ie, those before the current EOF). To extend a relation, + * use mdextend(). + */ +void +inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer, bool skipFsync) +{ + int pg; + + pg = locate_page(reln, forknum, blocknum); + if (pg < 0) + { + /* + * We assume the buffer cache is large enough to hold all the buffers + * needed for most operations. Overflowing to this "in-mem smgr" in rare + * cases is OK. But if we find that we're using more than WARN_PAGES, + * print a warning so that we get alerted and get to investigate why + * we're accessing so many buffers. + */ + elog(used_pages >= WARN_PAGES ? WARNING : DEBUG1, + "inmem_write() called for %u/%u/%u.%u blk %u: used_pages %u", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum, + blocknum, + used_pages); + if (used_pages == MAX_PAGES) + elog(ERROR, "Inmem storage overflow"); + + pg = used_pages; + used_pages++; + INIT_BUFFERTAG(page_tag[pg], reln->smgr_rnode.node, forknum, blocknum); + } else { + elog(DEBUG1, "inmem_write() called for %u/%u/%u.%u blk %u: found at %u", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum, + blocknum, + used_pages); + } + memcpy(page_body[pg], buffer, BLCKSZ); +} + +/* + * inmem_nblocks() -- Get the number of blocks stored in a relation. + */ +BlockNumber +inmem_nblocks(SMgrRelation reln, ForkNumber forknum) +{ + /* + * It's not clear why a WAL redo function would call smgrnblocks(). + * During recovery, at least before reaching consistency, the size of a + * relation could be arbitrarily small, if it was truncated after the + * record being replayed, or arbitrarily large if it was extended + * afterwards. But one place where it's called is in + * XLogReadBufferExtended(): it extends the relation, if it's smaller than + * the requested page. That's a waste of time in the WAL redo + * process. Pretend that all relations are maximally sized to avoid it. + */ + return MaxBlockNumber; +} + +/* + * inmem_truncate() -- Truncate relation to specified number of blocks. + */ +void +inmem_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) +{ +} + +/* + * inmem_immedsync() -- Immediately sync a relation to stable storage. + */ +void +inmem_immedsync(SMgrRelation reln, ForkNumber forknum) +{ +} + +static const struct f_smgr inmem_smgr = +{ + .smgr_init = inmem_init, + .smgr_shutdown = NULL, + .smgr_open = inmem_open, + .smgr_close = inmem_close, + .smgr_create = inmem_create, + .smgr_exists = inmem_exists, + .smgr_unlink = inmem_unlink, + .smgr_extend = inmem_extend, + .smgr_prefetch = inmem_prefetch, + .smgr_read = inmem_read, + .smgr_write = inmem_write, + .smgr_writeback = inmem_writeback, + .smgr_nblocks = inmem_nblocks, + .smgr_truncate = inmem_truncate, + .smgr_immedsync = inmem_immedsync, +}; + +const f_smgr * +smgr_inmem(BackendId backend, RelFileNode rnode) +{ + Assert(InRecovery); + if (backend != InvalidBackendId) + return smgr_standard(backend, rnode); + else + return &inmem_smgr; +} + +void +smgr_init_inmem() +{ + inmem_init(); +} diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c new file mode 100644 index 0000000000..649fc1037e --- /dev/null +++ b/pgxn/neon/libpagestore.c @@ -0,0 +1,432 @@ +/*------------------------------------------------------------------------- + * + * libpagestore.c + * Handles network communications with the remote pagestore. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * contrib/neon/libpqpagestore.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pagestore_client.h" +#include "fmgr.h" +#include "access/xlog.h" + +#include "libpq-fe.h" +#include "libpq/pqformat.h" +#include "libpq/libpq.h" + +#include "miscadmin.h" +#include "pgstat.h" +#include "utils/guc.h" + +#include "neon.h" +#include "walproposer.h" +#include "walproposer_utils.h" + + +#define PageStoreTrace DEBUG5 + +#define NEON_TAG "[NEON_SMGR] " +#define neon_log(tag, fmt, ...) ereport(tag, \ + (errmsg(NEON_TAG fmt, ## __VA_ARGS__), \ + errhidestmt(true), errhidecontext(true))) + +bool connected = false; +PGconn *pageserver_conn = NULL; + +char *page_server_connstring_raw; + +static ZenithResponse *pageserver_call(ZenithRequest *request); +page_server_api api = { + .request = pageserver_call +}; + +static void +pageserver_connect() +{ + char *query; + int ret; + + Assert(!connected); + + pageserver_conn = PQconnectdb(page_server_connstring); + + if (PQstatus(pageserver_conn) == CONNECTION_BAD) + { + char *msg = pchomp(PQerrorMessage(pageserver_conn)); + + PQfinish(pageserver_conn); + pageserver_conn = NULL; + ereport(ERROR, + (errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION), + errmsg(NEON_TAG "could not establish connection to pageserver"), + errdetail_internal("%s", msg))); + } + + query = psprintf("pagestream %s %s", zenith_tenant, zenith_timeline); + ret = PQsendQuery(pageserver_conn, query); + if (ret != 1) + { + PQfinish(pageserver_conn); + pageserver_conn = NULL; + neon_log(ERROR, "could not send pagestream command to pageserver"); + } + + while (PQisBusy(pageserver_conn)) + { + int wc; + + /* Sleep until there's something to do */ + wc = WaitLatchOrSocket(MyLatch, + WL_LATCH_SET | WL_SOCKET_READABLE | + WL_EXIT_ON_PM_DEATH, + PQsocket(pageserver_conn), + -1L, PG_WAIT_EXTENSION); + ResetLatch(MyLatch); + + CHECK_FOR_INTERRUPTS(); + + /* Data available in socket? */ + if (wc & WL_SOCKET_READABLE) + { + if (!PQconsumeInput(pageserver_conn)) + { + char *msg = pchomp(PQerrorMessage(pageserver_conn)); + + PQfinish(pageserver_conn); + pageserver_conn = NULL; + + neon_log(ERROR, "could not complete handshake with pageserver: %s", + msg); + } + } + } + + neon_log(LOG, "libpagestore: connected to '%s'", page_server_connstring_raw); + + connected = true; +} + +/* + * A wrapper around PQgetCopyData that checks for interrupts while sleeping. + */ +static int +call_PQgetCopyData(PGconn *conn, char **buffer) +{ + int ret; + +retry: + ret = PQgetCopyData(conn, buffer, 1 /* async */ ); + + if (ret == 0) + { + int wc; + + /* Sleep until there's something to do */ + wc = WaitLatchOrSocket(MyLatch, + WL_LATCH_SET | WL_SOCKET_READABLE | + WL_EXIT_ON_PM_DEATH, + PQsocket(conn), + -1L, PG_WAIT_EXTENSION); + ResetLatch(MyLatch); + + CHECK_FOR_INTERRUPTS(); + + /* Data available in socket? */ + if (wc & WL_SOCKET_READABLE) + { + if (!PQconsumeInput(conn)) + neon_log(ERROR, "could not get response from pageserver: %s", + PQerrorMessage(conn)); + } + + goto retry; + } + + return ret; +} + + +static ZenithResponse * +pageserver_call(ZenithRequest *request) +{ + StringInfoData req_buff; + StringInfoData resp_buff; + ZenithResponse *resp; + + PG_TRY(); + { + /* If the connection was lost for some reason, reconnect */ + if (connected && PQstatus(pageserver_conn) == CONNECTION_BAD) + { + PQfinish(pageserver_conn); + pageserver_conn = NULL; + connected = false; + } + + if (!connected) + pageserver_connect(); + + req_buff = zm_pack_request(request); + + /* + * Send request. + * + * In principle, this could block if the output buffer is full, and we + * should use async mode and check for interrupts while waiting. In + * practice, our requests are small enough to always fit in the output + * and TCP buffer. + */ + if (PQputCopyData(pageserver_conn, req_buff.data, req_buff.len) <= 0 || PQflush(pageserver_conn)) + { + neon_log(ERROR, "failed to send page request: %s", + PQerrorMessage(pageserver_conn)); + } + pfree(req_buff.data); + + if (message_level_is_interesting(PageStoreTrace)) + { + char *msg = zm_to_string((ZenithMessage *) request); + + neon_log(PageStoreTrace, "sent request: %s", msg); + pfree(msg); + } + + /* read response */ + resp_buff.len = call_PQgetCopyData(pageserver_conn, &resp_buff.data); + resp_buff.cursor = 0; + + if (resp_buff.len == -1) + neon_log(ERROR, "end of COPY"); + else if (resp_buff.len == -2) + neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn)); + + resp = zm_unpack_response(&resp_buff); + PQfreemem(resp_buff.data); + + if (message_level_is_interesting(PageStoreTrace)) + { + char *msg = zm_to_string((ZenithMessage *) resp); + + neon_log(PageStoreTrace, "got response: %s", msg); + pfree(msg); + } + } + PG_CATCH(); + { + /* + * If anything goes wrong while we were sending a request, it's not + * clear what state the connection is in. For example, if we sent the + * request but didn't receive a response yet, we might receive the + * response some time later after we have already sent a new unrelated + * request. Close the connection to avoid getting confused. + */ + if (connected) + { + neon_log(LOG, "dropping connection to page server due to error"); + PQfinish(pageserver_conn); + pageserver_conn = NULL; + connected = false; + } + PG_RE_THROW(); + } + PG_END_TRY(); + + return (ZenithResponse *) resp; +} + + +static bool +check_zenith_id(char **newval, void **extra, GucSource source) +{ + uint8 zid[16]; + + return **newval == '\0' || HexDecodeString(zid, *newval, 16); +} + +static char * +substitute_pageserver_password(const char *page_server_connstring_raw) +{ + char *host = NULL; + char *port = NULL; + char *user = NULL; + char *auth_token = NULL; + char *err = NULL; + char *page_server_connstring = NULL; + PQconninfoOption *conn_options; + PQconninfoOption *conn_option; + MemoryContext oldcontext; + + /* + * Here we substitute password in connection string with an environment + * variable. To simplify things we construct a connection string back with + * only known options. In particular: host port user and password. We do + * not currently use other options and constructing full connstring in an + * URI shape is quite messy. + */ + + if (page_server_connstring_raw == NULL || page_server_connstring_raw[0] == '\0') + return NULL; + + /* extract the auth token from the connection string */ + conn_options = PQconninfoParse(page_server_connstring_raw, &err); + if (conn_options == NULL) + { + /* The error string is malloc'd, so we must free it explicitly */ + char *errcopy = err ? pstrdup(err) : "out of memory"; + + PQfreemem(err); + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid connection string syntax: %s", errcopy))); + } + + /* + * Trying to populate pageserver connection string with auth token from + * environment. We are looking for password in with placeholder value like + * $ENV_VAR_NAME, so if password field is present and starts with $ we try + * to fetch environment variable value and fail loudly if it is not set. + */ + for (conn_option = conn_options; conn_option->keyword != NULL; conn_option++) + { + if (strcmp(conn_option->keyword, "host") == 0) + { + if (conn_option->val != NULL && conn_option->val[0] != '\0') + host = conn_option->val; + } + else if (strcmp(conn_option->keyword, "port") == 0) + { + if (conn_option->val != NULL && conn_option->val[0] != '\0') + port = conn_option->val; + } + else if (strcmp(conn_option->keyword, "user") == 0) + { + if (conn_option->val != NULL && conn_option->val[0] != '\0') + user = conn_option->val; + } + else if (strcmp(conn_option->keyword, "password") == 0) + { + if (conn_option->val != NULL && conn_option->val[0] != '\0') + { + /* ensure that this is a template */ + if (strncmp(conn_option->val, "$", 1) != 0) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_EXCEPTION), + errmsg("expected placeholder value in pageserver password starting from $ but found: %s", &conn_option->val[1]))); + + neon_log(LOG, "found auth token placeholder in pageserver conn string '%s'", &conn_option->val[1]); + auth_token = getenv(&conn_option->val[1]); + if (!auth_token) + { + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_EXCEPTION), + errmsg("cannot get auth token, environment variable %s is not set", &conn_option->val[1]))); + } + else + { + neon_log(LOG, "using auth token from environment passed via env"); + } + } + } + } + + /* + * allocate connection string in TopMemoryContext to make sure it is not + * freed + */ + oldcontext = CurrentMemoryContext; + MemoryContextSwitchTo(TopMemoryContext); + page_server_connstring = psprintf("postgresql://%s:%s@%s:%s", user, auth_token ? auth_token : "", host, port); + MemoryContextSwitchTo(oldcontext); + + PQconninfoFree(conn_options); + return page_server_connstring; +} + +/* + * Module initialization function + */ +void +pg_init_libpagestore(void) +{ + DefineCustomStringVariable("neon.pageserver_connstring", + "connection string to the page server", + NULL, + &page_server_connstring_raw, + "", + PGC_POSTMASTER, + 0, /* no flags required */ + NULL, NULL, NULL); + + DefineCustomStringVariable("neon.timeline_id", + "Zenith timelineid the server is running on", + NULL, + &zenith_timeline, + "", + PGC_POSTMASTER, + 0, /* no flags required */ + check_zenith_id, NULL, NULL); + + DefineCustomStringVariable("neon.tenant_id", + "Neon tenantid the server is running on", + NULL, + &zenith_tenant, + "", + PGC_POSTMASTER, + 0, /* no flags required */ + check_zenith_id, NULL, NULL); + + DefineCustomBoolVariable("neon.wal_redo", + "start in wal-redo mode", + NULL, + &wal_redo, + false, + PGC_POSTMASTER, + 0, + NULL, NULL, NULL); + + DefineCustomIntVariable("neon.max_cluster_size", + "cluster size limit", + NULL, + &max_cluster_size, + -1, -1, INT_MAX, + PGC_SIGHUP, + GUC_UNIT_MB, + NULL, NULL, NULL); + + relsize_hash_init(); + + if (page_server != NULL) + neon_log(ERROR, "libpagestore already loaded"); + + neon_log(PageStoreTrace, "libpagestore already loaded"); + page_server = &api; + + /* substitute password in pageserver_connstring */ + page_server_connstring = substitute_pageserver_password(page_server_connstring_raw); + + /* Is there more correct way to pass CustomGUC to postgres code? */ + zenith_timeline_walproposer = zenith_timeline; + zenith_tenant_walproposer = zenith_tenant; + + if (wal_redo) + { + neon_log(PageStoreTrace, "set inmem_smgr hook"); + smgr_hook = smgr_inmem; + smgr_init_hook = smgr_init_inmem; + } + else if (page_server_connstring && page_server_connstring[0]) + { + neon_log(PageStoreTrace, "set neon_smgr hook"); + smgr_hook = smgr_zenith; + smgr_init_hook = smgr_init_zenith; + dbsize_hook = zenith_dbsize; + } +} diff --git a/pgxn/neon/libpqwalproposer.c b/pgxn/neon/libpqwalproposer.c new file mode 100644 index 0000000000..2b2b7a1a6a --- /dev/null +++ b/pgxn/neon/libpqwalproposer.c @@ -0,0 +1,413 @@ +#include "postgres.h" + +#include "libpq-fe.h" +#include "neon.h" +#include "walproposer.h" + +/* Header in walproposer.h -- Wrapper struct to abstract away the libpq connection */ +struct WalProposerConn +{ + PGconn* pg_conn; + bool is_nonblocking; /* whether the connection is non-blocking */ + char *recvbuf; /* last received data from libpqprop_async_read */ +}; + +/* Prototypes for exported functions */ +static char* libpqprop_error_message(WalProposerConn* conn); +static WalProposerConnStatusType libpqprop_status(WalProposerConn* conn); +static WalProposerConn* libpqprop_connect_start(char* conninfo); +static WalProposerConnectPollStatusType libpqprop_connect_poll(WalProposerConn* conn); +static bool libpqprop_send_query(WalProposerConn* conn, char* query); +static WalProposerExecStatusType libpqprop_get_query_result(WalProposerConn* conn); +static pgsocket libpqprop_socket(WalProposerConn* conn); +static int libpqprop_flush(WalProposerConn* conn); +static void libpqprop_finish(WalProposerConn* conn); +static PGAsyncReadResult libpqprop_async_read(WalProposerConn* conn, char** buf, int* amount); +static PGAsyncWriteResult libpqprop_async_write(WalProposerConn* conn, void const* buf, size_t size); +static bool libpqprop_blocking_write(WalProposerConn* conn, void const* buf, size_t size); + +static WalProposerFunctionsType PQWalProposerFunctions = { + libpqprop_error_message, + libpqprop_status, + libpqprop_connect_start, + libpqprop_connect_poll, + libpqprop_send_query, + libpqprop_get_query_result, + libpqprop_socket, + libpqprop_flush, + libpqprop_finish, + libpqprop_async_read, + libpqprop_async_write, + libpqprop_blocking_write, +}; + +/* Module initialization */ +void +pg_init_libpqwalproposer(void) +{ + if (WalProposerFunctions != NULL) + elog(ERROR, "libpqwalproposer already loaded"); + WalProposerFunctions = &PQWalProposerFunctions; +} + +/* Helper function */ +static bool +ensure_nonblocking_status(WalProposerConn* conn, bool is_nonblocking) +{ + /* If we're already correctly blocking or nonblocking, all good */ + if (is_nonblocking == conn->is_nonblocking) + return true; + + /* Otherwise, set it appropriately */ + if (PQsetnonblocking(conn->pg_conn, is_nonblocking) == -1) + return false; + + conn->is_nonblocking = is_nonblocking; + return true; +} + +/* Exported function definitions */ +static char* +libpqprop_error_message(WalProposerConn* conn) +{ + return PQerrorMessage(conn->pg_conn); +} + +static WalProposerConnStatusType +libpqprop_status(WalProposerConn* conn) +{ + switch (PQstatus(conn->pg_conn)) + { + case CONNECTION_OK: + return WP_CONNECTION_OK; + case CONNECTION_BAD: + return WP_CONNECTION_BAD; + default: + return WP_CONNECTION_IN_PROGRESS; + } +} + +static WalProposerConn* +libpqprop_connect_start(char* conninfo) +{ + WalProposerConn* conn; + PGconn* pg_conn; + + pg_conn = PQconnectStart(conninfo); + /* + * Allocation of a PQconn can fail, and will return NULL. We want to fully replicate the + * behavior of PQconnectStart here. + */ + if (!pg_conn) + return NULL; + + /* + * And in theory this allocation can fail as well, but it's incredibly unlikely if we just + * successfully allocated a PGconn. + * + * palloc will exit on failure though, so there's not much we could do if it *did* fail. + */ + conn = palloc(sizeof(WalProposerConn)); + conn->pg_conn = pg_conn; + conn->is_nonblocking = false; /* connections always start in blocking mode */ + conn->recvbuf = NULL; + return conn; +} + +static WalProposerConnectPollStatusType +libpqprop_connect_poll(WalProposerConn* conn) +{ + WalProposerConnectPollStatusType return_val; + + switch (PQconnectPoll(conn->pg_conn)) + { + case PGRES_POLLING_FAILED: + return_val = WP_CONN_POLLING_FAILED; + break; + case PGRES_POLLING_READING: + return_val = WP_CONN_POLLING_READING; + break; + case PGRES_POLLING_WRITING: + return_val = WP_CONN_POLLING_WRITING; + break; + case PGRES_POLLING_OK: + return_val = WP_CONN_POLLING_OK; + break; + + /* There's a comment at its source about this constant being unused. We'll expect it's never + * returned. */ + case PGRES_POLLING_ACTIVE: + elog(FATAL, "Unexpected PGRES_POLLING_ACTIVE returned from PQconnectPoll"); + /* This return is never actually reached, but it's here to make the compiler happy */ + return WP_CONN_POLLING_FAILED; + + default: + Assert(false); + return_val = WP_CONN_POLLING_FAILED; /* keep the compiler quiet */ + } + + return return_val; +} + +static bool +libpqprop_send_query(WalProposerConn* conn, char* query) +{ + /* We need to be in blocking mode for sending the query to run without + * requiring a call to PQflush */ + if (!ensure_nonblocking_status(conn, false)) + return false; + + /* PQsendQuery returns 1 on success, 0 on failure */ + if (!PQsendQuery(conn->pg_conn, query)) + return false; + + return true; +} + +static WalProposerExecStatusType +libpqprop_get_query_result(WalProposerConn* conn) +{ + PGresult* result; + WalProposerExecStatusType return_val; + + /* Marker variable if we need to log an unexpected success result */ + char* unexpected_success = NULL; + + /* Consume any input that we might be missing */ + if (!PQconsumeInput(conn->pg_conn)) + return WP_EXEC_FAILED; + + if (PQisBusy(conn->pg_conn)) + return WP_EXEC_NEEDS_INPUT; + + + result = PQgetResult(conn->pg_conn); + /* PQgetResult returns NULL only if getting the result was successful & there's no more of the + * result to get. */ + if (!result) + { + elog(WARNING, "[libpqwalproposer] Unexpected successful end of command results"); + return WP_EXEC_UNEXPECTED_SUCCESS; + } + + /* Helper macro to reduce boilerplate */ + #define UNEXPECTED_SUCCESS(msg) \ + return_val = WP_EXEC_UNEXPECTED_SUCCESS; \ + unexpected_success = msg; \ + break; + + + switch (PQresultStatus(result)) + { + /* "true" success case */ + case PGRES_COPY_BOTH: + return_val = WP_EXEC_SUCCESS_COPYBOTH; + break; + + /* Unexpected success case */ + case PGRES_EMPTY_QUERY: + UNEXPECTED_SUCCESS("empty query return"); + case PGRES_COMMAND_OK: + UNEXPECTED_SUCCESS("data-less command end"); + case PGRES_TUPLES_OK: + UNEXPECTED_SUCCESS("tuples return"); + case PGRES_COPY_OUT: + UNEXPECTED_SUCCESS("'Copy Out' response"); + case PGRES_COPY_IN: + UNEXPECTED_SUCCESS("'Copy In' response"); + case PGRES_SINGLE_TUPLE: + UNEXPECTED_SUCCESS("single tuple return"); + case PGRES_PIPELINE_SYNC: + UNEXPECTED_SUCCESS("pipeline sync point"); + + /* Failure cases */ + case PGRES_BAD_RESPONSE: + case PGRES_NONFATAL_ERROR: + case PGRES_FATAL_ERROR: + case PGRES_PIPELINE_ABORTED: + return_val = WP_EXEC_FAILED; + break; + + default: + Assert(false); + return_val = WP_EXEC_FAILED; /* keep the compiler quiet */ + } + + if (unexpected_success) + elog(WARNING, "[libpqwalproposer] Unexpected successful %s", unexpected_success); + + return return_val; +} + +static pgsocket +libpqprop_socket(WalProposerConn* conn) +{ + return PQsocket(conn->pg_conn); +} + +static int +libpqprop_flush(WalProposerConn* conn) +{ + return (PQflush(conn->pg_conn)); +} + +static void +libpqprop_finish(WalProposerConn* conn) +{ + if (conn->recvbuf != NULL) + PQfreemem(conn->recvbuf); + PQfinish(conn->pg_conn); + pfree(conn); +} + +/* + * Receive a message from the safekeeper. + * + * On success, the data is placed in *buf. It is valid until the next call + * to this function. + */ +static PGAsyncReadResult +libpqprop_async_read(WalProposerConn* conn, char** buf, int* amount) +{ + int result; + + if (conn->recvbuf != NULL) + { + PQfreemem(conn->recvbuf); + conn->recvbuf = NULL; + } + + /* Call PQconsumeInput so that we have the data we need */ + if (!PQconsumeInput(conn->pg_conn)) + { + *amount = 0; + *buf = NULL; + return PG_ASYNC_READ_FAIL; + } + + /* The docs for PQgetCopyData list the return values as: + * 0 if the copy is still in progress, but no "complete row" is + * available + * -1 if the copy is done + * -2 if an error occured + * (> 0) if it was successful; that value is the amount transferred. + * + * The protocol we use between walproposer and safekeeper means that we + * *usually* wouldn't expect to see that the copy is done, but this can + * sometimes be triggered by the server returning an ErrorResponse (which + * also happens to have the effect that the copy is done). + */ + switch (result = PQgetCopyData(conn->pg_conn, &conn->recvbuf, true)) + { + case 0: + *amount = 0; + *buf = NULL; + return PG_ASYNC_READ_TRY_AGAIN; + case -1: + { + /* + * If we get -1, it's probably because of a server error; the + * safekeeper won't normally send a CopyDone message. + * + * We can check PQgetResult to make sure that the server failed; + * it'll always result in PGRES_FATAL_ERROR + */ + ExecStatusType status = PQresultStatus(PQgetResult(conn->pg_conn)); + + if (status != PGRES_FATAL_ERROR) + elog(FATAL, "unexpected result status %d after failed PQgetCopyData", status); + + /* If there was actually an error, it'll be properly reported by + * calls to PQerrorMessage -- we don't have to do anything else */ + *amount = 0; + *buf = NULL; + return PG_ASYNC_READ_FAIL; + } + case -2: + *amount = 0; + *buf = NULL; + return PG_ASYNC_READ_FAIL; + default: + /* Positive values indicate the size of the returned result */ + *amount = result; + *buf = conn->recvbuf; + return PG_ASYNC_READ_SUCCESS; + } +} + +static PGAsyncWriteResult +libpqprop_async_write(WalProposerConn* conn, void const* buf, size_t size) +{ + int result; + + /* If we aren't in non-blocking mode, switch to it. */ + if (!ensure_nonblocking_status(conn, true)) + return PG_ASYNC_WRITE_FAIL; + + /* The docs for PQputcopyData list the return values as: + * 1 if the data was queued, + * 0 if it was not queued because of full buffers, or + * -1 if an error occured + */ + result = PQputCopyData(conn->pg_conn, buf, size); + + /* We won't get a result of zero because walproposer always empties the + * connection's buffers before sending more */ + Assert(result != 0); + + switch (result) + { + case 1: + /* good -- continue */ + break; + case -1: + return PG_ASYNC_WRITE_FAIL; + default: + elog(FATAL, "invalid return %d from PQputCopyData", result); + } + + /* After queueing the data, we still need to flush to get it to send. + * This might take multiple tries, but we don't want to wait around + * until it's done. + * + * PQflush has the following returns (directly quoting the docs): + * 0 if sucessful, + * 1 if it was unable to send all the data in the send queue yet + * -1 if it failed for some reason + */ + switch (result = PQflush(conn->pg_conn)) { + case 0: + return PG_ASYNC_WRITE_SUCCESS; + case 1: + return PG_ASYNC_WRITE_TRY_FLUSH; + case -1: + return PG_ASYNC_WRITE_FAIL; + default: + elog(FATAL, "invalid return %d from PQflush", result); + } +} + +static bool +libpqprop_blocking_write(WalProposerConn* conn, void const* buf, size_t size) +{ + int result; + + /* If we are in non-blocking mode, switch out of it. */ + if (!ensure_nonblocking_status(conn, false)) + return false; + + /* Ths function is very similar to libpqprop_async_write. For more + * information, refer to the comments there */ + if ((result = PQputCopyData(conn->pg_conn, buf, size)) == -1) + return false; + + Assert(result == 1); + + /* Because the connection is non-blocking, flushing returns 0 or -1 */ + + if ((result = PQflush(conn->pg_conn)) == -1) + return false; + + Assert(result == 0); + return true; +} diff --git a/pgxn/neon/neon--1.0.sql b/pgxn/neon/neon--1.0.sql new file mode 100644 index 0000000000..34f1ba78d4 --- /dev/null +++ b/pgxn/neon/neon--1.0.sql @@ -0,0 +1,17 @@ +\echo Use "CREATE EXTENSION neon" to load this file. \quit + +CREATE FUNCTION pg_cluster_size() +RETURNS bigint +AS 'MODULE_PATHNAME', 'pg_cluster_size' +LANGUAGE C STRICT +PARALLEL UNSAFE; + +CREATE FUNCTION backpressure_lsns( + OUT received_lsn pg_lsn, + OUT disk_consistent_lsn pg_lsn, + OUT remote_consistent_lsn pg_lsn +) +RETURNS record +AS 'MODULE_PATHNAME', 'backpressure_lsns' +LANGUAGE C STRICT +PARALLEL UNSAFE; diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c new file mode 100644 index 0000000000..595a126f04 --- /dev/null +++ b/pgxn/neon/neon.c @@ -0,0 +1,82 @@ +/*------------------------------------------------------------------------- + * + * neon.c + * Utility functions to expose neon specific information to user + * + * IDENTIFICATION + * contrib/neon/neon.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "fmgr.h" + +#include "access/xact.h" +#include "access/xlog.h" +#include "storage/buf_internals.h" +#include "storage/bufmgr.h" +#include "catalog/pg_type.h" +#include "replication/walsender.h" +#include "funcapi.h" +#include "access/htup_details.h" +#include "utils/pg_lsn.h" +#include "utils/guc.h" + +#include "neon.h" +#include "walproposer.h" + +PG_MODULE_MAGIC; +void _PG_init(void); + + +void _PG_init(void) +{ + pg_init_libpagestore(); + pg_init_libpqwalproposer(); + pg_init_walproposer(); + + EmitWarningsOnPlaceholders("neon"); +} + +PG_FUNCTION_INFO_V1(pg_cluster_size); +PG_FUNCTION_INFO_V1(backpressure_lsns); + +Datum +pg_cluster_size(PG_FUNCTION_ARGS) +{ + int64 size; + + size = GetZenithCurrentClusterSize(); + + if (size == 0) + PG_RETURN_NULL(); + + PG_RETURN_INT64(size); +} + + +Datum +backpressure_lsns(PG_FUNCTION_ARGS) +{ + XLogRecPtr writePtr; + XLogRecPtr flushPtr; + XLogRecPtr applyPtr; + Datum values[3]; + bool nulls[3]; + TupleDesc tupdesc; + + replication_feedback_get_lsns(&writePtr, &flushPtr, &applyPtr); + + tupdesc = CreateTemplateTupleDesc(3); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "received_lsn", PG_LSNOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "disk_consistent_lsn", PG_LSNOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "remote_consistent_lsn", PG_LSNOID, -1, 0); + tupdesc = BlessTupleDesc(tupdesc); + + MemSet(nulls, 0, sizeof(nulls)); + values[0] = LSNGetDatum(writePtr); + values[1] = LSNGetDatum(flushPtr); + values[2] = LSNGetDatum(applyPtr); + + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} diff --git a/pgxn/neon/neon.control b/pgxn/neon/neon.control new file mode 100644 index 0000000000..84f79881c1 --- /dev/null +++ b/pgxn/neon/neon.control @@ -0,0 +1,4 @@ +# neon extension +comment = 'cloud storage for PostgreSQL' +default_version = '1.0' +module_pathname = '$libdir/neon' diff --git a/pgxn/neon/neon.h b/pgxn/neon/neon.h new file mode 100644 index 0000000000..2c66bc7bf0 --- /dev/null +++ b/pgxn/neon/neon.h @@ -0,0 +1,19 @@ +/*------------------------------------------------------------------------- + * + * neon.h + * Functions used in the initialization of this extension. + * + * IDENTIFICATION + * contrib/neon/neon.h + * + *------------------------------------------------------------------------- + */ + +#ifndef NEON_H +#define NEON_H + +extern void pg_init_libpagestore(void); +extern void pg_init_libpqwalproposer(void); +extern void pg_init_walproposer(void); + +#endif /* NEON_H */ diff --git a/pgxn/neon/pagestore_client.h b/pgxn/neon/pagestore_client.h new file mode 100644 index 0000000000..f79a3c9142 --- /dev/null +++ b/pgxn/neon/pagestore_client.h @@ -0,0 +1,221 @@ +/*------------------------------------------------------------------------- + * + * pagestore_client.h + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * contrib/neon/pagestore_client.h + * + *------------------------------------------------------------------------- + */ +#ifndef pageserver_h +#define pageserver_h + +#include "postgres.h" + +#include "access/xlogdefs.h" +#include "storage/relfilenode.h" +#include "storage/block.h" +#include "storage/smgr.h" +#include "lib/stringinfo.h" +#include "libpq/pqformat.h" +#include "utils/memutils.h" + +#include "pg_config.h" + +typedef enum +{ + /* pagestore_client -> pagestore */ + T_ZenithExistsRequest = 0, + T_ZenithNblocksRequest, + T_ZenithGetPageRequest, + T_ZenithDbSizeRequest, + + /* pagestore -> pagestore_client */ + T_ZenithExistsResponse = 100, + T_ZenithNblocksResponse, + T_ZenithGetPageResponse, + T_ZenithErrorResponse, + T_ZenithDbSizeResponse, +} ZenithMessageTag; + + + +/* base struct for c-style inheritance */ +typedef struct +{ + ZenithMessageTag tag; +} ZenithMessage; + +#define messageTag(m) (((const ZenithMessage *)(m))->tag) + +/* + * supertype of all the Zenith*Request structs below + * + * If 'latest' is true, we are requesting the latest page version, and 'lsn' + * is just a hint to the server that we know there are no versions of the page + * (or relation size, for exists/nblocks requests) later than the 'lsn'. + */ +typedef struct +{ + ZenithMessageTag tag; + bool latest; /* if true, request latest page version */ + XLogRecPtr lsn; /* request page version @ this LSN */ +} ZenithRequest; + +typedef struct +{ + ZenithRequest req; + RelFileNode rnode; + ForkNumber forknum; +} ZenithExistsRequest; + +typedef struct +{ + ZenithRequest req; + RelFileNode rnode; + ForkNumber forknum; +} ZenithNblocksRequest; + + +typedef struct +{ + ZenithRequest req; + Oid dbNode; +} ZenithDbSizeRequest; + + +typedef struct +{ + ZenithRequest req; + RelFileNode rnode; + ForkNumber forknum; + BlockNumber blkno; +} ZenithGetPageRequest; + +/* supertype of all the Zenith*Response structs below */ +typedef struct +{ + ZenithMessageTag tag; +} ZenithResponse; + +typedef struct +{ + ZenithMessageTag tag; + bool exists; +} ZenithExistsResponse; + +typedef struct +{ + ZenithMessageTag tag; + uint32 n_blocks; +} ZenithNblocksResponse; + +typedef struct +{ + ZenithMessageTag tag; + char page[FLEXIBLE_ARRAY_MEMBER]; +} ZenithGetPageResponse; + +typedef struct +{ + ZenithMessageTag tag; + int64 db_size; +} ZenithDbSizeResponse; + +typedef struct +{ + ZenithMessageTag tag; + char message[FLEXIBLE_ARRAY_MEMBER]; /* null-terminated error message */ +} ZenithErrorResponse; + +extern StringInfoData zm_pack_request(ZenithRequest *msg); +extern ZenithResponse *zm_unpack_response(StringInfo s); +extern char *zm_to_string(ZenithMessage *msg); + +/* + * API + */ + +typedef struct +{ + ZenithResponse *(*request) (ZenithRequest *request); +} page_server_api; + +extern page_server_api *page_server; + +extern char *page_server_connstring; +extern char *zenith_timeline; +extern char *zenith_tenant; +extern bool wal_redo; +extern int32 max_cluster_size; + +extern const f_smgr *smgr_zenith(BackendId backend, RelFileNode rnode); +extern void smgr_init_zenith(void); + +extern const f_smgr *smgr_inmem(BackendId backend, RelFileNode rnode); +extern void smgr_init_inmem(void); +extern void smgr_shutdown_inmem(void); + +/* zenith storage manager functionality */ + +extern void zenith_init(void); +extern void zenith_open(SMgrRelation reln); +extern void zenith_close(SMgrRelation reln, ForkNumber forknum); +extern void zenith_create(SMgrRelation reln, ForkNumber forknum, bool isRedo); +extern bool zenith_exists(SMgrRelation reln, ForkNumber forknum); +extern void zenith_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo); +extern void zenith_extend(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool skipFsync); +extern bool zenith_prefetch(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum); +extern void zenith_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer); + +extern void zenith_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, + XLogRecPtr request_lsn, bool request_latest, char *buffer); + +extern void zenith_write(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool skipFsync); +extern void zenith_writeback(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks); +extern BlockNumber zenith_nblocks(SMgrRelation reln, ForkNumber forknum); +extern const int64 zenith_dbsize(Oid dbNode); +extern void zenith_truncate(SMgrRelation reln, ForkNumber forknum, + BlockNumber nblocks); +extern void zenith_immedsync(SMgrRelation reln, ForkNumber forknum); + +/* zenith wal-redo storage manager functionality */ + +extern void inmem_init(void); +extern void inmem_open(SMgrRelation reln); +extern void inmem_close(SMgrRelation reln, ForkNumber forknum); +extern void inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo); +extern bool inmem_exists(SMgrRelation reln, ForkNumber forknum); +extern void inmem_unlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo); +extern void inmem_extend(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool skipFsync); +extern bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum); +extern void inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer); +extern void inmem_write(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool skipFsync); +extern void inmem_writeback(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks); +extern BlockNumber inmem_nblocks(SMgrRelation reln, ForkNumber forknum); +extern void inmem_truncate(SMgrRelation reln, ForkNumber forknum, + BlockNumber nblocks); +extern void inmem_immedsync(SMgrRelation reln, ForkNumber forknum); + + +/* utils for zenith relsize cache */ +extern void relsize_hash_init(void); +extern bool get_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber* size); +extern void set_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size); +extern void update_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size); +extern void forget_cached_relsize(RelFileNode rnode, ForkNumber forknum); + +#endif diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c new file mode 100644 index 0000000000..3e1b74dba7 --- /dev/null +++ b/pgxn/neon/pagestore_smgr.c @@ -0,0 +1,1696 @@ +/*------------------------------------------------------------------------- + * + * pagestore_smgr.c + * + * + * + * Temporary and unlogged rels + * --------------------------- + * + * Temporary and unlogged tables are stored locally, by md.c. The functions + * here just pass the calls through to corresponding md.c functions. + * + * Index build operations that use the buffer cache are also handled locally, + * just like unlogged tables. Such operations must be marked by calling + * smgr_start_unlogged_build() and friends. + * + * In order to know what relations are permanent and which ones are not, we + * have added a 'smgr_relpersistence' field to SmgrRelationData, and it is set + * by smgropen() callers, when they have the relcache entry at hand. However, + * sometimes we need to open an SmgrRelation for a relation without the + * relcache. That is needed when we evict a buffer; we might not have the + * SmgrRelation for that relation open yet. To deal with that, the + * 'relpersistence' can be left to zero, meaning we don't know if it's + * permanent or not. Most operations are not allowed with relpersistence==0, + * but smgrwrite() does work, which is what we need for buffer eviction. and + * smgrunlink() so that a backend doesn't need to have the relcache entry at + * transaction commit, where relations that were dropped in the transaction + * are unlinked. + * + * If smgrwrite() is called and smgr_relpersistence == 0, we check if the + * relation file exists locally or not. If it does exist, we assume it's an + * unlogged relation and write the page there. Otherwise it must be a + * permanent relation, WAL-logged and stored on the page server, and we ignore + * the write like we do for permanent relations. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * contrib/neon/pagestore_smgr.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/xact.h" +#include "access/xlog.h" +#include "access/xloginsert.h" +#include "access/xlog_internal.h" +#include "catalog/pg_class.h" +#include "pagestore_client.h" +#include "pagestore_client.h" +#include "storage/smgr.h" +#include "access/xlogdefs.h" +#include "postmaster/interrupt.h" +#include "replication/walsender.h" +#include "storage/bufmgr.h" +#include "storage/md.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "catalog/pg_tablespace_d.h" +#include "postmaster/autovacuum.h" + +/* + * If DEBUG_COMPARE_LOCAL is defined, we pass through all the SMGR API + * calls to md.c, and *also* do the calls to the Page Server. On every + * read, compare the versions we read from local disk and Page Server, + * and Assert that they are identical. + */ +/* #define DEBUG_COMPARE_LOCAL */ + +#ifdef DEBUG_COMPARE_LOCAL +#include "access/nbtree.h" +#include "storage/bufpage.h" +#include "access/xlog_internal.h" + +static char *hexdump_page(char *page); +#endif + +#define IS_LOCAL_REL(reln) (reln->smgr_rnode.node.dbNode != 0 && reln->smgr_rnode.node.relNode > FirstNormalObjectId) + +const int SmgrTrace = DEBUG5; + +page_server_api *page_server; + +/* GUCs */ +char *page_server_connstring; // with substituted password +char *zenith_timeline; +char *zenith_tenant; +bool wal_redo = false; +int32 max_cluster_size; + +/* unlogged relation build states */ +typedef enum +{ + UNLOGGED_BUILD_NOT_IN_PROGRESS = 0, + UNLOGGED_BUILD_PHASE_1, + UNLOGGED_BUILD_PHASE_2, + UNLOGGED_BUILD_NOT_PERMANENT +} UnloggedBuildPhase; + +static SMgrRelation unlogged_build_rel = NULL; +static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS; + +StringInfoData +zm_pack_request(ZenithRequest *msg) +{ + StringInfoData s; + + initStringInfo(&s); + pq_sendbyte(&s, msg->tag); + + switch (messageTag(msg)) + { + /* pagestore_client -> pagestore */ + case T_ZenithExistsRequest: + { + ZenithExistsRequest *msg_req = (ZenithExistsRequest *) msg; + + pq_sendbyte(&s, msg_req->req.latest); + pq_sendint64(&s, msg_req->req.lsn); + pq_sendint32(&s, msg_req->rnode.spcNode); + pq_sendint32(&s, msg_req->rnode.dbNode); + pq_sendint32(&s, msg_req->rnode.relNode); + pq_sendbyte(&s, msg_req->forknum); + + break; + } + case T_ZenithNblocksRequest: + { + ZenithNblocksRequest *msg_req = (ZenithNblocksRequest *) msg; + + pq_sendbyte(&s, msg_req->req.latest); + pq_sendint64(&s, msg_req->req.lsn); + pq_sendint32(&s, msg_req->rnode.spcNode); + pq_sendint32(&s, msg_req->rnode.dbNode); + pq_sendint32(&s, msg_req->rnode.relNode); + pq_sendbyte(&s, msg_req->forknum); + + break; + } + case T_ZenithDbSizeRequest: + { + ZenithDbSizeRequest *msg_req = (ZenithDbSizeRequest *) msg; + + pq_sendbyte(&s, msg_req->req.latest); + pq_sendint64(&s, msg_req->req.lsn); + pq_sendint32(&s, msg_req->dbNode); + + break; + } + case T_ZenithGetPageRequest: + { + ZenithGetPageRequest *msg_req = (ZenithGetPageRequest *) msg; + + pq_sendbyte(&s, msg_req->req.latest); + pq_sendint64(&s, msg_req->req.lsn); + pq_sendint32(&s, msg_req->rnode.spcNode); + pq_sendint32(&s, msg_req->rnode.dbNode); + pq_sendint32(&s, msg_req->rnode.relNode); + pq_sendbyte(&s, msg_req->forknum); + pq_sendint32(&s, msg_req->blkno); + + break; + } + + /* pagestore -> pagestore_client. We never need to create these. */ + case T_ZenithExistsResponse: + case T_ZenithNblocksResponse: + case T_ZenithGetPageResponse: + case T_ZenithErrorResponse: + case T_ZenithDbSizeResponse: + default: + elog(ERROR, "unexpected zenith message tag 0x%02x", msg->tag); + break; + } + return s; +} + +ZenithResponse * +zm_unpack_response(StringInfo s) +{ + ZenithMessageTag tag = pq_getmsgbyte(s); + ZenithResponse *resp = NULL; + + switch (tag) + { + /* pagestore -> pagestore_client */ + case T_ZenithExistsResponse: + { + ZenithExistsResponse *msg_resp = palloc0(sizeof(ZenithExistsResponse)); + + msg_resp->tag = tag; + msg_resp->exists = pq_getmsgbyte(s); + pq_getmsgend(s); + + resp = (ZenithResponse *) msg_resp; + break; + } + + case T_ZenithNblocksResponse: + { + ZenithNblocksResponse *msg_resp = palloc0(sizeof(ZenithNblocksResponse)); + + msg_resp->tag = tag; + msg_resp->n_blocks = pq_getmsgint(s, 4); + pq_getmsgend(s); + + resp = (ZenithResponse *) msg_resp; + break; + } + + case T_ZenithGetPageResponse: + { + ZenithGetPageResponse *msg_resp = palloc0(offsetof(ZenithGetPageResponse, page) + BLCKSZ); + + msg_resp->tag = tag; + /* XXX: should be varlena */ + memcpy(msg_resp->page, pq_getmsgbytes(s, BLCKSZ), BLCKSZ); + pq_getmsgend(s); + + resp = (ZenithResponse *) msg_resp; + break; + } + + case T_ZenithDbSizeResponse: + { + ZenithDbSizeResponse *msg_resp = palloc0(sizeof(ZenithDbSizeResponse)); + + msg_resp->tag = tag; + msg_resp->db_size = pq_getmsgint64(s); + pq_getmsgend(s); + + resp = (ZenithResponse *) msg_resp; + break; + } + + case T_ZenithErrorResponse: + { + ZenithErrorResponse *msg_resp; + size_t msglen; + const char *msgtext; + + msgtext = pq_getmsgrawstring(s); + msglen = strlen(msgtext); + + msg_resp = palloc0(sizeof(ZenithErrorResponse) + msglen + 1); + msg_resp->tag = tag; + memcpy(msg_resp->message, msgtext, msglen + 1); + pq_getmsgend(s); + + resp = (ZenithResponse *) msg_resp; + break; + } + + /* + * pagestore_client -> pagestore + * + * We create these ourselves, and don't need to decode them. + */ + case T_ZenithExistsRequest: + case T_ZenithNblocksRequest: + case T_ZenithGetPageRequest: + case T_ZenithDbSizeRequest: + default: + elog(ERROR, "unexpected zenith message tag 0x%02x", tag); + break; + } + + return resp; +} + +/* dump to json for debugging / error reporting purposes */ +char * +zm_to_string(ZenithMessage *msg) +{ + StringInfoData s; + + initStringInfo(&s); + + switch (messageTag(msg)) + { + /* pagestore_client -> pagestore */ + case T_ZenithExistsRequest: + { + ZenithExistsRequest *msg_req = (ZenithExistsRequest *) msg; + + appendStringInfoString(&s, "{\"type\": \"ZenithExistsRequest\""); + appendStringInfo(&s, ", \"rnode\": \"%u/%u/%u\"", + msg_req->rnode.spcNode, + msg_req->rnode.dbNode, + msg_req->rnode.relNode); + appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum); + appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn)); + appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest); + appendStringInfoChar(&s, '}'); + break; + } + + case T_ZenithNblocksRequest: + { + ZenithNblocksRequest *msg_req = (ZenithNblocksRequest *) msg; + + appendStringInfoString(&s, "{\"type\": \"ZenithNblocksRequest\""); + appendStringInfo(&s, ", \"rnode\": \"%u/%u/%u\"", + msg_req->rnode.spcNode, + msg_req->rnode.dbNode, + msg_req->rnode.relNode); + appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum); + appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn)); + appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest); + appendStringInfoChar(&s, '}'); + break; + } + + case T_ZenithGetPageRequest: + { + ZenithGetPageRequest *msg_req = (ZenithGetPageRequest *) msg; + + appendStringInfoString(&s, "{\"type\": \"ZenithGetPageRequest\""); + appendStringInfo(&s, ", \"rnode\": \"%u/%u/%u\"", + msg_req->rnode.spcNode, + msg_req->rnode.dbNode, + msg_req->rnode.relNode); + appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum); + appendStringInfo(&s, ", \"blkno\": %u", msg_req->blkno); + appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn)); + appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest); + appendStringInfoChar(&s, '}'); + break; + } + case T_ZenithDbSizeRequest: + { + ZenithDbSizeRequest *msg_req = (ZenithDbSizeRequest *) msg; + + appendStringInfoString(&s, "{\"type\": \"ZenithDbSizeRequest\""); + appendStringInfo(&s, ", \"dbnode\": \"%u\"", msg_req->dbNode); + appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn)); + appendStringInfo(&s, ", \"latest\": %d", msg_req->req.latest); + appendStringInfoChar(&s, '}'); + break; + } + + + /* pagestore -> pagestore_client */ + case T_ZenithExistsResponse: + { + ZenithExistsResponse *msg_resp = (ZenithExistsResponse *) msg; + + appendStringInfoString(&s, "{\"type\": \"ZenithExistsResponse\""); + appendStringInfo(&s, ", \"exists\": %d}", + msg_resp->exists + ); + appendStringInfoChar(&s, '}'); + + break; + } + case T_ZenithNblocksResponse: + { + ZenithNblocksResponse *msg_resp = (ZenithNblocksResponse *) msg; + + appendStringInfoString(&s, "{\"type\": \"ZenithNblocksResponse\""); + appendStringInfo(&s, ", \"n_blocks\": %u}", + msg_resp->n_blocks + ); + appendStringInfoChar(&s, '}'); + + break; + } + case T_ZenithGetPageResponse: + { +#if 0 + ZenithGetPageResponse *msg_resp = (ZenithGetPageResponse *) msg; +#endif + + appendStringInfoString(&s, "{\"type\": \"ZenithGetPageResponse\""); + appendStringInfo(&s, ", \"page\": \"XXX\"}"); + appendStringInfoChar(&s, '}'); + break; + } + case T_ZenithErrorResponse: + { + ZenithErrorResponse *msg_resp = (ZenithErrorResponse *) msg; + + /* FIXME: escape double-quotes in the message */ + appendStringInfoString(&s, "{\"type\": \"ZenithErrorResponse\""); + appendStringInfo(&s, ", \"message\": \"%s\"}", msg_resp->message); + appendStringInfoChar(&s, '}'); + break; + } + case T_ZenithDbSizeResponse: + { + ZenithDbSizeResponse *msg_resp = (ZenithDbSizeResponse *) msg; + + appendStringInfoString(&s, "{\"type\": \"ZenithDbSizeResponse\""); + appendStringInfo(&s, ", \"db_size\": %ld}", + msg_resp->db_size + ); + appendStringInfoChar(&s, '}'); + + break; + } + + default: + appendStringInfo(&s, "{\"type\": \"unknown 0x%02x\"", msg->tag); + } + return s.data; +} + +/* + * Wrapper around log_newpage() that makes a temporary copy of the block and + * WAL-logs that. This makes it safe to use while holding only a shared lock + * on the page, see XLogSaveBufferForHint. We don't use XLogSaveBufferForHint + * directly because it skips the logging if the LSN is new enough. + */ +static XLogRecPtr +log_newpage_copy(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno, + Page page, bool page_std) +{ + PGAlignedBlock copied_buffer; + + memcpy(copied_buffer.data, page, BLCKSZ); + return log_newpage(rnode, forkNum, blkno, copied_buffer.data, page_std); +} + +/* + * Is 'buffer' identical to a freshly initialized empty heap page? + */ +static bool +PageIsEmptyHeapPage(char *buffer) +{ + PGAlignedBlock empty_page; + + PageInit((Page) empty_page.data, BLCKSZ, 0); + + return memcmp(buffer, empty_page.data, BLCKSZ) == 0; +} + +static void +zenith_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer) +{ + XLogRecPtr lsn = PageGetLSN(buffer); + + if (ShutdownRequestPending) + return; + + /* + * Whenever a VM or FSM page is evicted, WAL-log it. FSM and (some) VM + * changes are not WAL-logged when the changes are made, so this is our + * last chance to log them, otherwise they're lost. That's OK for + * correctness, the non-logged updates are not critical. But we want to + * have a reasonably up-to-date VM and FSM in the page server. + */ + if (forknum == FSM_FORKNUM && !RecoveryInProgress()) + { + /* FSM is never WAL-logged and we don't care. */ + XLogRecPtr recptr; + + recptr = log_newpage_copy(&reln->smgr_rnode.node, forknum, blocknum, buffer, false); + XLogFlush(recptr); + lsn = recptr; + ereport(SmgrTrace, + (errmsg("FSM page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X", + blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum, LSN_FORMAT_ARGS(lsn)))); + } + else if (forknum == VISIBILITYMAP_FORKNUM && !RecoveryInProgress()) + { + /* + * Always WAL-log vm. We should never miss clearing visibility map + * bits. + * + * TODO Is it too bad for performance? Hopefully we do not evict + * actively used vm too often. + */ + XLogRecPtr recptr; + + recptr = log_newpage_copy(&reln->smgr_rnode.node, forknum, blocknum, buffer, false); + XLogFlush(recptr); + lsn = recptr; + + ereport(SmgrTrace, + (errmsg("Visibilitymap page %u of relation %u/%u/%u.%u was force logged at lsn=%X/%X", + blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum, LSN_FORMAT_ARGS(lsn)))); + } + else if (lsn == InvalidXLogRecPtr) + { + /* + * When PostgreSQL extends a relation, it calls smgrextend() with an all-zeros pages, + * and we can just ignore that in Zenith. We do need to remember the new size, + * though, so that smgrnblocks() returns the right answer after the rel has + * been extended. We rely on the relsize cache for that. + * + * A completely empty heap page doesn't need to be WAL-logged, either. The + * heapam can leave such a page behind, if e.g. an insert errors out after + * initializing the page, but before it has inserted the tuple and WAL-logged + * the change. When we read the page from the page server, it will come back + * as all-zeros. That's OK, the heapam will initialize an all-zeros page on + * first use. + * + * In other scenarios, evicting a dirty page with no LSN is a bad sign: it implies + * that the page was not WAL-logged, and its contents will be lost when it's + * evicted. + */ + if (PageIsNew(buffer)) + { + ereport(SmgrTrace, + (errmsg("Page %u of relation %u/%u/%u.%u is all-zeros", + blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum))); + } + else if (PageIsEmptyHeapPage(buffer)) + { + ereport(SmgrTrace, + (errmsg("Page %u of relation %u/%u/%u.%u is an empty heap page with no LSN", + blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum))); + } + else + { + ereport(PANIC, + (errmsg("Page %u of relation %u/%u/%u.%u is evicted with zero LSN", + blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum))); + } + } + else + { + ereport(SmgrTrace, + (errmsg("Page %u of relation %u/%u/%u.%u is already wal logged at lsn=%X/%X", + blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum, LSN_FORMAT_ARGS(lsn)))); + } + + /* + * Remember the LSN on this page. When we read the page again, we must + * read the same or newer version of it. + */ + SetLastWrittenPageLSN(lsn); +} + + +/* + * zenith_init() -- Initialize private state + */ +void +zenith_init(void) +{ + /* noop */ +#ifdef DEBUG_COMPARE_LOCAL + mdinit(); +#endif +} + +/* + * GetXLogInsertRecPtr uses XLogBytePosToRecPtr to convert logical insert (reserved) position + * to physical position in WAL. It always adds SizeOfXLogShortPHD: + * seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD; + * so even if there are no records on the page, offset will be SizeOfXLogShortPHD. + * It may cause problems with XLogFlush. So return pointer backward to the origin of the page. + */ +static XLogRecPtr +zm_adjust_lsn(XLogRecPtr lsn) +{ + /* + * If lsn points to the beging of first record on page or segment, then + * "return" it back to the page origin + */ + if ((lsn & (XLOG_BLCKSZ - 1)) == SizeOfXLogShortPHD) + { + lsn -= SizeOfXLogShortPHD; + } + else if ((lsn & (wal_segment_size - 1)) == SizeOfXLogLongPHD) + { + lsn -= SizeOfXLogLongPHD; + } + return lsn; +} + +/* + * Return LSN for requesting pages and number of blocks from page server + */ +static XLogRecPtr +zenith_get_request_lsn(bool *latest) +{ + XLogRecPtr lsn; + + if (RecoveryInProgress()) + { + *latest = false; + lsn = GetXLogReplayRecPtr(NULL); + elog(DEBUG1, "zenith_get_request_lsn GetXLogReplayRecPtr %X/%X request lsn 0 ", + (uint32) ((lsn) >> 32), (uint32) (lsn)); + } + else if (am_walsender) + { + *latest = true; + lsn = InvalidXLogRecPtr; + elog(DEBUG1, "am walsender zenith_get_request_lsn lsn 0 "); + } + else + { + XLogRecPtr flushlsn; + + /* + * Use the latest LSN that was evicted from the buffer cache. Any + * pages modified by later WAL records must still in the buffer cache, + * so our request cannot concern those. + */ + *latest = true; + lsn = GetLastWrittenPageLSN(); + Assert(lsn != InvalidXLogRecPtr); + elog(DEBUG1, "zenith_get_request_lsn GetLastWrittenPageLSN lsn %X/%X ", + (uint32) ((lsn) >> 32), (uint32) (lsn)); + + lsn = zm_adjust_lsn(lsn); + + /* + * Is it possible that the last-written LSN is ahead of last flush + * LSN? Generally not, we shouldn't evict a page from the buffer cache + * before all its modifications have been safely flushed. That's the + * "WAL before data" rule. However, such case does exist at index building, + * _bt_blwritepage logs the full page without flushing WAL before + * smgrextend (files are fsynced before build ends). + */ + flushlsn = GetFlushRecPtr(); + if (lsn > flushlsn) + { + elog(DEBUG5, "last-written LSN %X/%X is ahead of last flushed LSN %X/%X", + (uint32) (lsn >> 32), (uint32) lsn, + (uint32) (flushlsn >> 32), (uint32) flushlsn); + XLogFlush(lsn); + } + } + + return lsn; +} + + +/* + * zenith_exists() -- Does the physical file exist? + */ +bool +zenith_exists(SMgrRelation reln, ForkNumber forkNum) +{ + bool exists; + ZenithResponse *resp; + BlockNumber n_blocks; + bool latest; + XLogRecPtr request_lsn; + + switch (reln->smgr_relpersistence) + { + case 0: + /* + * We don't know if it's an unlogged rel stored locally, or permanent + * rel stored in the page server. First check if it exists locally. + * If it does, great. Otherwise check if it exists in the page server. + */ + if (mdexists(reln, forkNum)) + return true; + break; + + case RELPERSISTENCE_PERMANENT: + break; + + case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_UNLOGGED: + return mdexists(reln, forkNum); + + default: + elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); + } + + if (get_cached_relsize(reln->smgr_rnode.node, forkNum, &n_blocks)) + { + return true; + } + + /* + * \d+ on a view calls smgrexists with 0/0/0 relfilenode. The page server + * will error out if you check that, because the whole dbdir for tablespace + * 0, db 0 doesn't exists. We possibly should change the page server to + * accept that and return 'false', to be consistent with mdexists(). But + * we probably also should fix pg_table_size() to not call smgrexists() + * with bogus relfilenode. + * + * For now, handle that special case here. + */ + if (reln->smgr_rnode.node.spcNode == 0 && + reln->smgr_rnode.node.dbNode == 0 && + reln->smgr_rnode.node.relNode == 0) + { + return false; + } + + request_lsn = zenith_get_request_lsn(&latest); + { + ZenithExistsRequest request = { + .req.tag = T_ZenithExistsRequest, + .req.latest = latest, + .req.lsn = request_lsn, + .rnode = reln->smgr_rnode.node, + .forknum = forkNum + }; + + resp = page_server->request((ZenithRequest *) &request); + } + + switch (resp->tag) + { + case T_ZenithExistsResponse: + exists = ((ZenithExistsResponse *) resp)->exists; + break; + + case T_ZenithErrorResponse: + ereport(ERROR, + (errcode(ERRCODE_IO_ERROR), + errmsg("could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forkNum, + (uint32) (request_lsn >> 32), (uint32) request_lsn), + errdetail("page server returned error: %s", + ((ZenithErrorResponse *) resp)->message))); + break; + + default: + elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag); + } + pfree(resp); + return exists; +} + +/* + * zenith_create() -- Create a new relation on zenithd storage + * + * If isRedo is true, it's okay for the relation to exist already. + */ +void +zenith_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo) +{ + switch (reln->smgr_relpersistence) + { + case 0: + elog(ERROR, "cannot call smgrcreate() on rel with unknown persistence"); + + case RELPERSISTENCE_PERMANENT: + break; + + case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_UNLOGGED: + mdcreate(reln, forkNum, isRedo); + return; + + default: + elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); + } + + elog(SmgrTrace, "Create relation %u/%u/%u.%u", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forkNum); + + /* + * Newly created relation is empty, remember that in the relsize cache. + * + * FIXME: This is currently not just an optimization, but required for + * correctness. Postgres can call smgrnblocks() on the newly-created + * relation. Currently, we don't call SetLastWrittenPageLSN() when a new + * relation created, so if we didn't remember the size in the relsize + * cache, we might call smgrnblocks() on the newly-created relation before + * the creation WAL record hass been received by the page server. + */ + set_cached_relsize(reln->smgr_rnode.node, forkNum, 0); + +#ifdef DEBUG_COMPARE_LOCAL + if (IS_LOCAL_REL(reln)) + mdcreate(reln, forkNum, isRedo); +#endif +} + +/* + * zenith_unlink() -- Unlink a relation. + * + * Note that we're passed a RelFileNodeBackend --- by the time this is called, + * there won't be an SMgrRelation hashtable entry anymore. + * + * forkNum can be a fork number to delete a specific fork, or InvalidForkNumber + * to delete all forks. + * + * + * If isRedo is true, it's unsurprising for the relation to be already gone. + * Also, we should remove the file immediately instead of queuing a request + * for later, since during redo there's no possibility of creating a + * conflicting relation. + * + * Note: any failure should be reported as WARNING not ERROR, because + * we are usually not in a transaction anymore when this is called. + */ +void +zenith_unlink(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) +{ + /* + * Might or might not exist locally, depending on whether it's + * an unlogged or permanent relation (or if DEBUG_COMPARE_LOCAL is + * set). Try to unlink, it won't do any harm if the file doesn't + * exist. + */ + mdunlink(rnode, forkNum, isRedo); + if (!RelFileNodeBackendIsTemp(rnode)) { + forget_cached_relsize(rnode.node, forkNum); + } +} + +/* + * zenith_extend() -- Add a block to the specified relation. + * + * The semantics are nearly the same as mdwrite(): write at the + * specified position. However, this is to be used for the case of + * extending a relation (i.e., blocknum is at or beyond the current + * EOF). Note that we assume writing a block beyond current EOF + * causes intervening file space to become filled with zeroes. + */ +void +zenith_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, + char *buffer, bool skipFsync) +{ + XLogRecPtr lsn; + + switch (reln->smgr_relpersistence) + { + case 0: + elog(ERROR, "cannot call smgrextend() on rel with unknown persistence"); + + case RELPERSISTENCE_PERMANENT: + break; + + case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_UNLOGGED: + mdextend(reln, forkNum, blkno, buffer, skipFsync); + return; + + default: + elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); + } + + /* + * Check that the cluster size limit has not been exceeded. + * + * Temporary and unlogged relations are not included in the cluster size measured + * by the page server, so ignore those. Autovacuum processes are also exempt. + */ + if (max_cluster_size > 0 && + reln->smgr_relpersistence == RELPERSISTENCE_PERMANENT && + !IsAutoVacuumWorkerProcess()) + { + uint64 current_size = GetZenithCurrentClusterSize(); + + if (current_size >= ((uint64) max_cluster_size) * 1024 * 1024) + ereport(ERROR, + (errcode(ERRCODE_DISK_FULL), + errmsg("could not extend file because cluster size limit (%d MB) has been exceeded", + max_cluster_size), + errhint("This limit is defined by neon.max_cluster_size GUC"))); + } + + zenith_wallog_page(reln, forkNum, blkno, buffer); + set_cached_relsize(reln->smgr_rnode.node, forkNum, blkno + 1); + + lsn = PageGetLSN(buffer); + elog(SmgrTrace, "smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forkNum, blkno, + (uint32) (lsn >> 32), (uint32) lsn); + +#ifdef DEBUG_COMPARE_LOCAL + if (IS_LOCAL_REL(reln)) + mdextend(reln, forkNum, blkno, buffer, skipFsync); +#endif +} + +/* + * zenith_open() -- Initialize newly-opened relation. + */ +void +zenith_open(SMgrRelation reln) +{ + /* + * We don't have anything special to do here. Call mdopen() to let md.c + * initialize itself. That's only needed for temporary or unlogged + * relations, but it's dirt cheap so do it always to make sure the md + * fields are initialized, for debugging purposes if nothing else. + */ + mdopen(reln); + + /* no work */ + elog(SmgrTrace, "[ZENITH_SMGR] open noop"); +} + +/* + * zenith_close() -- Close the specified relation, if it isn't closed already. + */ +void +zenith_close(SMgrRelation reln, ForkNumber forknum) +{ + /* + * Let md.c close it, if it had it open. Doesn't hurt to do this + * even for permanent relations that have no local storage. + */ + mdclose(reln, forknum); +} + +/* + * zenith_prefetch() -- Initiate asynchronous read of the specified block of a relation + */ +bool +zenith_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) +{ + switch (reln->smgr_relpersistence) + { + case 0: + /* probably shouldn't happen, but ignore it */ + break; + + case RELPERSISTENCE_PERMANENT: + break; + + case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_UNLOGGED: + return mdprefetch(reln, forknum, blocknum); + + default: + elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); + } + + /* not implemented */ + elog(SmgrTrace, "[ZENITH_SMGR] prefetch noop"); + return true; +} + +/* + * zenith_writeback() -- Tell the kernel to write pages back to storage. + * + * This accepts a range of blocks because flushing several pages at once is + * considerably more efficient than doing so individually. + */ +void +zenith_writeback(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks) +{ + switch (reln->smgr_relpersistence) + { + case 0: + /* mdwriteback() does nothing if the file doesn't exist */ + mdwriteback(reln, forknum, blocknum, nblocks); + break; + + case RELPERSISTENCE_PERMANENT: + break; + + case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_UNLOGGED: + mdwriteback(reln, forknum, blocknum, nblocks); + return; + + default: + elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); + } + + /* not implemented */ + elog(SmgrTrace, "[ZENITH_SMGR] writeback noop"); + +#ifdef DEBUG_COMPARE_LOCAL + if (IS_LOCAL_REL(reln)) + mdwriteback(reln, forknum, blocknum, nblocks); +#endif +} + +/* + * While function is defined in the zenith extension it's used within neon_test_utils directly. + * To avoid breaking tests in the runtime please keep function signature in sync. + */ +void zenith_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, + XLogRecPtr request_lsn, bool request_latest, char *buffer) +{ + ZenithResponse *resp; + + { + ZenithGetPageRequest request = { + .req.tag = T_ZenithGetPageRequest, + .req.latest = request_latest, + .req.lsn = request_lsn, + .rnode = rnode, + .forknum = forkNum, + .blkno = blkno + }; + + resp = page_server->request((ZenithRequest *) &request); + } + + switch (resp->tag) + { + case T_ZenithGetPageResponse: + memcpy(buffer, ((ZenithGetPageResponse *) resp)->page, BLCKSZ); + break; + + case T_ZenithErrorResponse: + ereport(ERROR, + (errcode(ERRCODE_IO_ERROR), + errmsg("could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X", + blkno, + rnode.spcNode, + rnode.dbNode, + rnode.relNode, + forkNum, + (uint32) (request_lsn >> 32), (uint32) request_lsn), + errdetail("page server returned error: %s", + ((ZenithErrorResponse *) resp)->message))); + break; + + default: + elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag); + } + + pfree(resp); +} + +/* + * zenith_read() -- Read the specified block from a relation. + */ +void +zenith_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, + char *buffer) +{ + bool latest; + XLogRecPtr request_lsn; + + switch (reln->smgr_relpersistence) + { + case 0: + elog(ERROR, "cannot call smgrread() on rel with unknown persistence"); + + case RELPERSISTENCE_PERMANENT: + break; + + case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_UNLOGGED: + mdread(reln, forkNum, blkno, buffer); + return; + + default: + elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); + } + + request_lsn = zenith_get_request_lsn(&latest); + zenith_read_at_lsn(reln->smgr_rnode.node, forkNum, blkno, request_lsn, latest, buffer); + +#ifdef DEBUG_COMPARE_LOCAL + if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln)) + { + char pageserver_masked[BLCKSZ]; + char mdbuf[BLCKSZ]; + char mdbuf_masked[BLCKSZ]; + + mdread(reln, forkNum, blkno, mdbuf); + + memcpy(pageserver_masked, buffer, BLCKSZ); + memcpy(mdbuf_masked, mdbuf, BLCKSZ); + + if (PageIsNew(mdbuf)) + { + if (!PageIsNew(pageserver_masked)) + { + elog(PANIC, "page is new in MD but not in Page Server at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n", + blkno, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forkNum, + (uint32) (request_lsn >> 32), (uint32) request_lsn, + hexdump_page(buffer)); + } + } + else if (PageIsNew(buffer)) + { + elog(PANIC, "page is new in Page Server but not in MD at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n", + blkno, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forkNum, + (uint32) (request_lsn >> 32), (uint32) request_lsn, + hexdump_page(mdbuf)); + } + else if (PageGetSpecialSize(mdbuf) == 0) + { + /* assume heap */ + RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno); + RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno); + + if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0) + { + elog(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n", + blkno, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forkNum, + (uint32) (request_lsn >> 32), (uint32) request_lsn, + hexdump_page(mdbuf_masked), + hexdump_page(pageserver_masked)); + } + } + else if (PageGetSpecialSize(mdbuf) == MAXALIGN(sizeof(BTPageOpaqueData))) + { + if (((BTPageOpaqueData *) PageGetSpecialPointer(mdbuf))->btpo_cycleid < MAX_BT_CYCLE_ID) + { + /* assume btree */ + RmgrTable[RM_BTREE_ID].rm_mask(mdbuf_masked, blkno); + RmgrTable[RM_BTREE_ID].rm_mask(pageserver_masked, blkno); + + if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0) + { + elog(PANIC, "btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n", + blkno, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forkNum, + (uint32) (request_lsn >> 32), (uint32) request_lsn, + hexdump_page(mdbuf_masked), + hexdump_page(pageserver_masked)); + } + } + } + } +#endif +} + +#ifdef DEBUG_COMPARE_LOCAL +static char * +hexdump_page(char *page) +{ + StringInfoData result; + + initStringInfo(&result); + + for (int i = 0; i < BLCKSZ; i++) + { + if (i % 8 == 0) + appendStringInfo(&result, " "); + if (i % 40 == 0) + appendStringInfo(&result, "\n"); + appendStringInfo(&result, "%02x", (unsigned char) (page[i])); + } + + return result.data; +} +#endif + +/* + * zenith_write() -- Write the supplied block at the appropriate location. + * + * This is to be used only for updating already-existing blocks of a + * relation (ie, those before the current EOF). To extend a relation, + * use mdextend(). + */ +void +zenith_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer, bool skipFsync) +{ + XLogRecPtr lsn; + + switch (reln->smgr_relpersistence) + { + case 0: + /* This is a bit tricky. Check if the relation exists locally */ + if (mdexists(reln, forknum)) + { + /* It exists locally. Guess it's unlogged then. */ + mdwrite(reln, forknum, blocknum, buffer, skipFsync); + + /* + * We could set relpersistence now that we have determined + * that it's local. But we don't dare to do it, because that + * would immediately allow reads as well, which shouldn't + * happen. We could cache it with a different 'relpersistence' + * value, but this isn't performance critical. + */ + return; + } + break; + + case RELPERSISTENCE_PERMANENT: + break; + + case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_UNLOGGED: + mdwrite(reln, forknum, blocknum, buffer, skipFsync); + return; + + default: + elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); + } + + zenith_wallog_page(reln, forknum, blocknum, buffer); + + lsn = PageGetLSN(buffer); + elog(SmgrTrace, "smgrwrite called for %u/%u/%u.%u blk %u, page LSN: %X/%08X", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum, blocknum, + (uint32) (lsn >> 32), (uint32) lsn); + +#ifdef DEBUG_COMPARE_LOCAL + if (IS_LOCAL_REL(reln)) + mdwrite(reln, forknum, blocknum, buffer, skipFsync); +#endif +} + +/* + * zenith_nblocks() -- Get the number of blocks stored in a relation. + */ +BlockNumber +zenith_nblocks(SMgrRelation reln, ForkNumber forknum) +{ + ZenithResponse *resp; + BlockNumber n_blocks; + bool latest; + XLogRecPtr request_lsn; + + switch (reln->smgr_relpersistence) + { + case 0: + elog(ERROR, "cannot call smgrnblocks() on rel with unknown persistence"); + break; + + case RELPERSISTENCE_PERMANENT: + break; + + case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_UNLOGGED: + return mdnblocks(reln, forknum); + + default: + elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); + } + + if (get_cached_relsize(reln->smgr_rnode.node, forknum, &n_blocks)) + { + elog(SmgrTrace, "cached nblocks for %u/%u/%u.%u: %u blocks", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum, n_blocks); + return n_blocks; + } + + request_lsn = zenith_get_request_lsn(&latest); + { + ZenithNblocksRequest request = { + .req.tag = T_ZenithNblocksRequest, + .req.latest = latest, + .req.lsn = request_lsn, + .rnode = reln->smgr_rnode.node, + .forknum = forknum, + }; + + resp = page_server->request((ZenithRequest *) &request); + } + + switch (resp->tag) + { + case T_ZenithNblocksResponse: + n_blocks = ((ZenithNblocksResponse *) resp)->n_blocks; + break; + + case T_ZenithErrorResponse: + ereport(ERROR, + (errcode(ERRCODE_IO_ERROR), + errmsg("could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum, + (uint32) (request_lsn >> 32), (uint32) request_lsn), + errdetail("page server returned error: %s", + ((ZenithErrorResponse *) resp)->message))); + break; + + default: + elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag); + } + update_cached_relsize(reln->smgr_rnode.node, forknum, n_blocks); + + elog(SmgrTrace, "zenith_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + forknum, + (uint32) (request_lsn >> 32), (uint32) request_lsn, + n_blocks); + + pfree(resp); + return n_blocks; +} + +/* + * zenith_db_size() -- Get the size of the database in bytes. + */ +const int64 +zenith_dbsize(Oid dbNode) +{ + ZenithResponse *resp; + int64 db_size; + XLogRecPtr request_lsn; + bool latest; + + request_lsn = zenith_get_request_lsn(&latest); + { + ZenithDbSizeRequest request = { + .req.tag = T_ZenithDbSizeRequest, + .req.latest = latest, + .req.lsn = request_lsn, + .dbNode = dbNode, + }; + + resp = page_server->request((ZenithRequest *) &request); + } + + switch (resp->tag) + { + case T_ZenithDbSizeResponse: + db_size = ((ZenithDbSizeResponse *) resp)->db_size; + break; + + case T_ZenithErrorResponse: + ereport(ERROR, + (errcode(ERRCODE_IO_ERROR), + errmsg("could not read db size of db %u from page server at lsn %X/%08X", + dbNode, + (uint32) (request_lsn >> 32), (uint32) request_lsn), + errdetail("page server returned error: %s", + ((ZenithErrorResponse *) resp)->message))); + break; + + default: + elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag); + } + + elog(SmgrTrace, "zenith_dbsize: db %u (request LSN %X/%08X): %ld bytes", + dbNode, + (uint32) (request_lsn >> 32), (uint32) request_lsn, + db_size); + + pfree(resp); + return db_size; +} + +/* + * zenith_truncate() -- Truncate relation to specified number of blocks. + */ +void +zenith_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) +{ + XLogRecPtr lsn; + + switch (reln->smgr_relpersistence) + { + case 0: + elog(ERROR, "cannot call smgrtruncate() on rel with unknown persistence"); + break; + + case RELPERSISTENCE_PERMANENT: + break; + + case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_UNLOGGED: + mdtruncate(reln, forknum, nblocks); + return; + + default: + elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); + } + + set_cached_relsize(reln->smgr_rnode.node, forknum, nblocks); + + /* + * Truncating a relation drops all its buffers from the buffer cache + * without calling smgrwrite() on them. But we must account for that in + * our tracking of last-written-LSN all the same: any future smgrnblocks() + * request must return the new size after the truncation. We don't know + * what the LSN of the truncation record was, so be conservative and use + * the most recently inserted WAL record's LSN. + */ + lsn = GetXLogInsertRecPtr(); + + lsn = zm_adjust_lsn(lsn); + + /* + * Flush it, too. We don't actually care about it here, but let's uphold + * the invariant that last-written LSN <= flush LSN. + */ + XLogFlush(lsn); + + SetLastWrittenPageLSN(lsn); + +#ifdef DEBUG_COMPARE_LOCAL + if (IS_LOCAL_REL(reln)) + mdtruncate(reln, forknum, nblocks); +#endif +} + +/* + * zenith_immedsync() -- Immediately sync a relation to stable storage. + * + * Note that only writes already issued are synced; this routine knows + * nothing of dirty buffers that may exist inside the buffer manager. We + * sync active and inactive segments; smgrDoPendingSyncs() relies on this. + * Consider a relation skipping WAL. Suppose a checkpoint syncs blocks of + * some segment, then mdtruncate() renders that segment inactive. If we + * crash before the next checkpoint syncs the newly-inactive segment, that + * segment may survive recovery, reintroducing unwanted data into the table. + */ +void +zenith_immedsync(SMgrRelation reln, ForkNumber forknum) +{ + switch (reln->smgr_relpersistence) + { + case 0: + elog(ERROR, "cannot call smgrimmedsync() on rel with unknown persistence"); + break; + + case RELPERSISTENCE_PERMANENT: + break; + + case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_UNLOGGED: + mdimmedsync(reln, forknum); + return; + + default: + elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); + } + + elog(SmgrTrace, "[ZENITH_SMGR] immedsync noop"); + +#ifdef DEBUG_COMPARE_LOCAL + if (IS_LOCAL_REL(reln)) + mdimmedsync(reln, forknum); +#endif +} + +/* + * zenith_start_unlogged_build() -- Starting build operation on a rel. + * + * Some indexes are built in two phases, by first populating the table with + * regular inserts, using the shared buffer cache but skipping WAL-logging, + * and WAL-logging the whole relation after it's done. Zenith relies on the + * WAL to reconstruct pages, so we cannot use the page server in the + * first phase when the changes are not logged. + */ +static void +zenith_start_unlogged_build(SMgrRelation reln) +{ + /* + * Currently, there can be only one unlogged relation build operation in + * progress at a time. That's enough for the current usage. + */ + if (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS) + elog(ERROR, "unlogged relation build is already in progress"); + Assert(unlogged_build_rel == NULL); + + ereport(SmgrTrace, + (errmsg("starting unlogged build of relation %u/%u/%u", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode))); + + switch (reln->smgr_relpersistence) + { + case 0: + elog(ERROR, "cannot call smgr_start_unlogged_build() on rel with unknown persistence"); + break; + + case RELPERSISTENCE_PERMANENT: + break; + + case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_UNLOGGED: + unlogged_build_rel = reln; + unlogged_build_phase = UNLOGGED_BUILD_NOT_PERMANENT; + return; + + default: + elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); + } + + if (smgrnblocks(reln, MAIN_FORKNUM) != 0) + elog(ERROR, "cannot perform unlogged index build, index is not empty "); + + unlogged_build_rel = reln; + unlogged_build_phase = UNLOGGED_BUILD_PHASE_1; + + /* Make the relation look like it's unlogged */ + reln->smgr_relpersistence = RELPERSISTENCE_UNLOGGED; + + /* + * FIXME: should we pass isRedo true to create the tablespace dir if it + * doesn't exist? Is it needed? + */ + mdcreate(reln, MAIN_FORKNUM, false); +} + +/* + * zenith_finish_unlogged_build_phase_1() + * + * Call this after you have finished populating a relation in unlogged mode, + * before you start WAL-logging it. + */ +static void +zenith_finish_unlogged_build_phase_1(SMgrRelation reln) +{ + Assert(unlogged_build_rel == reln); + + ereport(SmgrTrace, + (errmsg("finishing phase 1 of unlogged build of relation %u/%u/%u", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode))); + + if (unlogged_build_phase == UNLOGGED_BUILD_NOT_PERMANENT) + return; + + Assert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_1); + Assert(reln->smgr_relpersistence == RELPERSISTENCE_UNLOGGED); + + unlogged_build_phase = UNLOGGED_BUILD_PHASE_2; +} + +/* + * zenith_end_unlogged_build() -- Finish an unlogged rel build. + * + * Call this after you have finished WAL-logging an relation that was + * first populated without WAL-logging. + * + * This removes the local copy of the rel, since it's now been fully + * WAL-logged and is present in the page server. + */ +static void +zenith_end_unlogged_build(SMgrRelation reln) +{ + Assert(unlogged_build_rel == reln); + + ereport(SmgrTrace, + (errmsg("ending unlogged build of relation %u/%u/%u", + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode))); + + if (unlogged_build_phase != UNLOGGED_BUILD_NOT_PERMANENT) + { + RelFileNodeBackend rnode; + + Assert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_2); + Assert(reln->smgr_relpersistence == RELPERSISTENCE_UNLOGGED); + + /* Make the relation look permanent again */ + reln->smgr_relpersistence = RELPERSISTENCE_PERMANENT; + + /* Remove local copy */ + rnode = reln->smgr_rnode; + for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++) + { + elog(SmgrTrace, "forgetting cached relsize for %u/%u/%u.%u", + rnode.node.spcNode, + rnode.node.dbNode, + rnode.node.relNode, + forknum); + + forget_cached_relsize(rnode.node, forknum); + mdclose(reln, forknum); + /* use isRedo == true, so that we drop it immediately */ + mdunlink(rnode, forknum, true); + } + } + + unlogged_build_rel = NULL; + unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS; +} + +static void +AtEOXact_zenith(XactEvent event, void *arg) +{ + switch (event) + { + case XACT_EVENT_ABORT: + case XACT_EVENT_PARALLEL_ABORT: + + /* + * Forget about any build we might have had in progress. The local + * file will be unlinked by smgrDoPendingDeletes() + */ + unlogged_build_rel = NULL; + unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS; + break; + + case XACT_EVENT_COMMIT: + case XACT_EVENT_PARALLEL_COMMIT: + case XACT_EVENT_PREPARE: + case XACT_EVENT_PRE_COMMIT: + case XACT_EVENT_PARALLEL_PRE_COMMIT: + case XACT_EVENT_PRE_PREPARE: + if (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS) + { + unlogged_build_rel = NULL; + unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS; + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + (errmsg("unlogged index build was not properly finished")))); + } + break; + } +} + +static const struct f_smgr zenith_smgr = +{ + .smgr_init = zenith_init, + .smgr_shutdown = NULL, + .smgr_open = zenith_open, + .smgr_close = zenith_close, + .smgr_create = zenith_create, + .smgr_exists = zenith_exists, + .smgr_unlink = zenith_unlink, + .smgr_extend = zenith_extend, + .smgr_prefetch = zenith_prefetch, + .smgr_read = zenith_read, + .smgr_write = zenith_write, + .smgr_writeback = zenith_writeback, + .smgr_nblocks = zenith_nblocks, + .smgr_truncate = zenith_truncate, + .smgr_immedsync = zenith_immedsync, + + .smgr_start_unlogged_build = zenith_start_unlogged_build, + .smgr_finish_unlogged_build_phase_1 = zenith_finish_unlogged_build_phase_1, + .smgr_end_unlogged_build = zenith_end_unlogged_build, +}; + + +const f_smgr * +smgr_zenith(BackendId backend, RelFileNode rnode) +{ + + /* Don't use page server for temp relations */ + if (backend != InvalidBackendId) + return smgr_standard(backend, rnode); + else + return &zenith_smgr; +} + +void +smgr_init_zenith(void) +{ + RegisterXactCallback(AtEOXact_zenith, NULL); + + smgr_init_standard(); + zenith_init(); +} diff --git a/pgxn/neon/relsize_cache.c b/pgxn/neon/relsize_cache.c new file mode 100644 index 0000000000..8dfcffe1d1 --- /dev/null +++ b/pgxn/neon/relsize_cache.c @@ -0,0 +1,167 @@ +/*------------------------------------------------------------------------- + * + * relsize_cache.c + * Relation size cache for better zentih performance. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * contrib/neon/relsize_cache.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pagestore_client.h" +#include "storage/relfilenode.h" +#include "storage/smgr.h" +#include "storage/lwlock.h" +#include "storage/ipc.h" +#include "storage/shmem.h" +#include "catalog/pg_tablespace_d.h" +#include "utils/dynahash.h" +#include "utils/guc.h" + + +typedef struct +{ + RelFileNode rnode; + ForkNumber forknum; +} RelTag; + +typedef struct +{ + RelTag tag; + BlockNumber size; +} RelSizeEntry; + +static HTAB *relsize_hash; +static LWLockId relsize_lock; +static int relsize_hash_size; +static shmem_startup_hook_type prev_shmem_startup_hook = NULL; + +/* + * Size of a cache entry is 20 bytes. So this default will take about 1.2 MB, + * which seems reasonable. + */ +#define DEFAULT_RELSIZE_HASH_SIZE (64 * 1024) + +static void +zenith_smgr_shmem_startup(void) +{ + static HASHCTL info; + + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + relsize_lock = (LWLockId) GetNamedLWLockTranche("neon_relsize"); + info.keysize = sizeof(RelTag); + info.entrysize = sizeof(RelSizeEntry); + relsize_hash = ShmemInitHash("neon_relsize", + relsize_hash_size, relsize_hash_size, + &info, + HASH_ELEM | HASH_BLOBS); + LWLockRelease(AddinShmemInitLock); +} + +bool +get_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber *size) +{ + bool found = false; + + if (relsize_hash_size > 0) + { + RelTag tag; + RelSizeEntry *entry; + + tag.rnode = rnode; + tag.forknum = forknum; + LWLockAcquire(relsize_lock, LW_SHARED); + entry = hash_search(relsize_hash, &tag, HASH_FIND, NULL); + if (entry != NULL) + { + *size = entry->size; + found = true; + } + LWLockRelease(relsize_lock); + } + return found; +} + +void +set_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size) +{ + if (relsize_hash_size > 0) + { + RelTag tag; + RelSizeEntry *entry; + + tag.rnode = rnode; + tag.forknum = forknum; + LWLockAcquire(relsize_lock, LW_EXCLUSIVE); + entry = hash_search(relsize_hash, &tag, HASH_ENTER, NULL); + entry->size = size; + LWLockRelease(relsize_lock); + } +} + +void +update_cached_relsize(RelFileNode rnode, ForkNumber forknum, BlockNumber size) +{ + if (relsize_hash_size > 0) + { + RelTag tag; + RelSizeEntry *entry; + bool found; + + tag.rnode = rnode; + tag.forknum = forknum; + LWLockAcquire(relsize_lock, LW_EXCLUSIVE); + entry = hash_search(relsize_hash, &tag, HASH_ENTER, &found); + if (!found || entry->size < size) + entry->size = size; + LWLockRelease(relsize_lock); + } +} + +void +forget_cached_relsize(RelFileNode rnode, ForkNumber forknum) +{ + if (relsize_hash_size > 0) + { + RelTag tag; + + tag.rnode = rnode; + tag.forknum = forknum; + LWLockAcquire(relsize_lock, LW_EXCLUSIVE); + hash_search(relsize_hash, &tag, HASH_REMOVE, NULL); + LWLockRelease(relsize_lock); + } +} + +void +relsize_hash_init(void) +{ + DefineCustomIntVariable("neon.relsize_hash_size", + "Sets the maximum number of cached relation sizes for neon", + NULL, + &relsize_hash_size, + DEFAULT_RELSIZE_HASH_SIZE, + 0, + INT_MAX, + PGC_POSTMASTER, + 0, + NULL, NULL, NULL); + + if (relsize_hash_size > 0) + { + RequestAddinShmemSpace(hash_estimate_size(relsize_hash_size, sizeof(RelSizeEntry))); + RequestNamedLWLockTranche("neon_relsize", 1); + + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = zenith_smgr_shmem_startup; + } +} diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c new file mode 100644 index 0000000000..9625325c0a --- /dev/null +++ b/pgxn/neon/walproposer.c @@ -0,0 +1,2403 @@ +/*------------------------------------------------------------------------- + * + * walproposer.c + * + * Proposer/leader part of the total order broadcast protocol between postgres + * and WAL safekeepers. + * + * We have two ways of launching WalProposer: + * + * 1. As a background worker which will run physical WalSender with + * am_wal_proposer flag set to true. WalSender in turn would handle WAL + * reading part and call WalProposer when ready to scatter WAL. + * + * 2. As a standalone utility by running `postgres --sync-safekeepers`. That + * is needed to create LSN from which it is safe to start postgres. More + * specifically it addresses following problems: + * + * a) Chicken-or-the-egg problem: compute postgres needs data directory + * with non-rel files that are downloaded from pageserver by calling + * basebackup@LSN. This LSN is not arbitrary, it must include all + * previously committed transactions and defined through consensus + * voting, which happens... in walproposer, a part of compute node. + * + * b) Just warranting such LSN is not enough, we must also actually commit + * it and make sure there is a safekeeper who knows this LSN is + * committed so WAL before it can be streamed to pageserver -- otherwise + * basebackup will hang waiting for WAL. Advancing commit_lsn without + * playing consensus game is impossible, so speculative 'let's just poll + * safekeepers, learn start LSN of future epoch and run basebackup' + * won't work. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include +#include "access/xlogdefs.h" +#include "access/xlogutils.h" +#include "storage/latch.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "access/xlog.h" +#include "libpq/pqformat.h" +#include "replication/slot.h" +#include "replication/walreceiver.h" +#include "postmaster/bgworker.h" +#include "postmaster/interrupt.h" +#include "postmaster/postmaster.h" +#include "storage/pmsignal.h" +#include "storage/proc.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" +#include "storage/spin.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/guc.h" +#include "utils/memutils.h" +#include "utils/timestamp.h" + +#include "neon.h" +#include "walproposer.h" +#include "walproposer_utils.h" +#include "replication/walpropshim.h" + + +char *wal_acceptors_list; +int wal_acceptor_reconnect_timeout; +int wal_acceptor_connect_timeout; +bool am_wal_proposer; + +char *zenith_timeline_walproposer = NULL; +char *zenith_tenant_walproposer = NULL; + +/* Declared in walproposer.h, defined here, initialized in libpqwalproposer.c */ +WalProposerFunctionsType *WalProposerFunctions = NULL; + +#define WAL_PROPOSER_SLOT_NAME "wal_proposer_slot" + +static int n_safekeepers = 0; +static int quorum = 0; +static Safekeeper safekeeper[MAX_SAFEKEEPERS]; +static XLogRecPtr availableLsn; /* WAL has been generated up to this point */ +static XLogRecPtr lastSentCommitLsn; /* last commitLsn broadcast to safekeepers */ +static ProposerGreeting greetRequest; +static VoteRequest voteRequest; /* Vote request for safekeeper */ +static WaitEventSet *waitEvents; +static AppendResponse quorumFeedback; +/* + * Minimal LSN which may be needed for recovery of some safekeeper, + * record-aligned (first record which might not yet received by someone). + */ +static XLogRecPtr truncateLsn; +/* + * Term of the proposer. We want our term to be highest and unique, + * so we collect terms from safekeepers quorum, choose max and +1. + * After that our term is fixed and must not change. If we observe + * that some safekeeper has higher term, it means that we have another + * running compute, so we must stop immediately. + */ +static term_t propTerm; +static TermHistory propTermHistory; /* term history of the proposer */ +static XLogRecPtr propEpochStartLsn; /* epoch start lsn of the proposer */ +static term_t donorEpoch; /* Most advanced acceptor epoch */ +static int donor; /* Most advanced acceptor */ +static XLogRecPtr timelineStartLsn; /* timeline globally starts at this LSN */ +static int n_votes = 0; +static int n_connected = 0; +static TimestampTz last_reconnect_attempt; + +static WalproposerShmemState *walprop_shared; + +/* Prototypes for private functions */ +static void WalProposerInitImpl(XLogRecPtr flushRecPtr, uint64 systemId); +static void WalProposerStartImpl(void); +static void WalProposerLoop(void); +static void InitEventSet(void); +static void UpdateEventSet(Safekeeper *sk, uint32 events); +static void HackyRemoveWalProposerEvent(Safekeeper *to_remove); +static void ShutdownConnection(Safekeeper *sk); +static void ResetConnection(Safekeeper *sk); +static long TimeToReconnect(TimestampTz now); +static void ReconnectSafekeepers(void); +static void AdvancePollState(Safekeeper *sk, uint32 events); +static void HandleConnectionEvent(Safekeeper *sk); +static void SendStartWALPush(Safekeeper *sk); +static void RecvStartWALPushResult(Safekeeper *sk); +static void SendProposerGreeting(Safekeeper *sk); +static void RecvAcceptorGreeting(Safekeeper *sk); +static void SendVoteRequest(Safekeeper *sk); +static void RecvVoteResponse(Safekeeper *sk); +static void HandleElectedProposer(void); +static term_t GetHighestTerm(TermHistory *th); +static term_t GetEpoch(Safekeeper *sk); +static void DetermineEpochStartLsn(void); +static bool WalProposerRecovery(int donor, TimeLineID timeline, XLogRecPtr startpos, XLogRecPtr endpos); +static void SendProposerElected(Safekeeper *sk); +static void WalProposerStartStreaming(XLogRecPtr startpos); +static void StartStreaming(Safekeeper *sk); +static void SendMessageToNode(Safekeeper *sk); +static void BroadcastAppendRequest(void); +static void HandleActiveState(Safekeeper *sk, uint32 events); +static bool SendAppendRequests(Safekeeper *sk); +static bool RecvAppendResponses(Safekeeper *sk); +static void CombineHotStanbyFeedbacks(HotStandbyFeedback * hs); +static XLogRecPtr CalculateMinFlushLsn(void); +static XLogRecPtr GetAcknowledgedByQuorumWALPosition(void); +static void HandleSafekeeperResponse(void); +static bool AsyncRead(Safekeeper *sk, char **buf, int *buf_size); +static bool AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg); +static bool BlockingWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState success_state); +static bool AsyncWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState flush_state); +static bool AsyncFlush(Safekeeper *sk); + + +static void nwp_shmem_startup_hook(void); +static void nwp_register_gucs(void); +static void nwp_prepare_shmem(void); +static uint64 backpressure_lag_impl(void); + + +static shmem_startup_hook_type prev_shmem_startup_hook_type; + + + +void pg_init_walproposer(void) +{ + if (!process_shared_preload_libraries_in_progress) + return; + + nwp_register_gucs(); + + nwp_prepare_shmem(); + + delay_backend_us = &backpressure_lag_impl; + + WalProposerRegister(); + + WalProposerInit = &WalProposerInitImpl; + WalProposerStart = &WalProposerStartImpl; +} + +static void nwp_register_gucs(void) +{ + DefineCustomStringVariable( + "neon.safekeepers", + "List of Neon WAL acceptors (host:port)", + NULL, /* long_desc */ + &wal_acceptors_list, /* valueAddr */ + "", /* bootValue */ + PGC_POSTMASTER, + GUC_LIST_INPUT, /* extensions can't use GUC_LIST_QUOTE */ + NULL, NULL, NULL + ); + + DefineCustomIntVariable( + "neon.safekeeper_reconnect_timeout", + "Timeout for reconnecting to offline wal acceptor.", + NULL, + &wal_acceptor_reconnect_timeout, + 1000, 0, INT_MAX, /* default, min, max */ + PGC_SIGHUP, /* context */ + GUC_UNIT_MS, /* flags */ + NULL, NULL, NULL + ); + + DefineCustomIntVariable( + "neon.safekeeper_connect_timeout", + "Timeout after which give up connection attempt to safekeeper.", + NULL, + &wal_acceptor_connect_timeout, + 5000, 0, INT_MAX, + PGC_SIGHUP, + GUC_UNIT_MS, + NULL, NULL, NULL + ); + +} + +/* shmem handling */ + +static void nwp_prepare_shmem(void) +{ + RequestAddinShmemSpace(WalproposerShmemSize()); + + prev_shmem_startup_hook_type = shmem_startup_hook; + shmem_startup_hook = nwp_shmem_startup_hook; +} + +static void nwp_shmem_startup_hook(void) +{ + if (prev_shmem_startup_hook_type) + prev_shmem_startup_hook_type(); + + WalproposerShmemInit(); +} + +/* + * WAL proposer bgworker entry point. + */ +void +WalProposerMain(Datum main_arg) +{ + /* Establish signal handlers. */ + pqsignal(SIGUSR1, procsignal_sigusr1_handler); + pqsignal(SIGHUP, SignalHandlerForConfigReload); + pqsignal(SIGTERM, die); + + BackgroundWorkerUnblockSignals(); + + GetXLogReplayRecPtr(&ThisTimeLineID); + + WalProposerInit(GetFlushRecPtr(), GetSystemIdentifier()); + + last_reconnect_attempt = GetCurrentTimestamp(); + + application_name = (char *) "walproposer"; /* for + * synchronous_standby_names */ + am_wal_proposer = true; + am_walsender = true; + InitWalSender(); + InitProcessPhase2(); + + /* Create replication slot for WAL proposer if not exists */ + if (SearchNamedReplicationSlot(WAL_PROPOSER_SLOT_NAME, false) == NULL) + { + ReplicationSlotCreate(WAL_PROPOSER_SLOT_NAME, false, RS_PERSISTENT, false); + ReplicationSlotReserveWal(); + /* Write this slot to disk */ + ReplicationSlotMarkDirty(); + ReplicationSlotSave(); + ReplicationSlotRelease(); + } + + WalProposerStart(); +} + +/* + * Create new AppendRequest message and start sending it. This function is + * called from walsender every time the new WAL is available. + */ +void +WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos) +{ + Assert(startpos == availableLsn && endpos >= availableLsn); + availableLsn = endpos; + BroadcastAppendRequest(); +} + +/* + * Advance the WAL proposer state machine, waiting each time for events to occur. + * Will exit only when latch is set, i.e. new WAL should be pushed from walsender + * to walproposer. + */ +void +WalProposerPoll(void) +{ + while (true) + { + Safekeeper *sk; + int rc; + WaitEvent event; + TimestampTz now = GetCurrentTimestamp(); + + rc = WaitEventSetWait(waitEvents, TimeToReconnect(now), + &event, 1, WAIT_EVENT_WAL_SENDER_MAIN); + sk = (Safekeeper *) event.user_data; + + /* + * If the event contains something that one of our safekeeper states + * was waiting for, we'll advance its state. + */ + if (rc != 0 && (event.events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))) + AdvancePollState(sk, event.events); + + /* + * If the timeout expired, attempt to reconnect to any safekeepers that + * we dropped + */ + ReconnectSafekeepers(); + + /* + * If wait is terminated by latch set (walsenders' latch is set on + * each wal flush), then exit loop. (no need for pm death check due to + * WL_EXIT_ON_PM_DEATH) + */ + if (rc != 0 && (event.events & WL_LATCH_SET)) + { + ResetLatch(MyLatch); + break; + } + if (rc == 0) /* timeout expired: poll state */ + { + TimestampTz now; + + /* + * If no WAL was generated during timeout (and we have already + * collected the quorum), then send pool message + */ + if (availableLsn != InvalidXLogRecPtr) + { + BroadcastAppendRequest(); + } + + /* + * Abandon connection attempts which take too long. + */ + now = GetCurrentTimestamp(); + for (int i = 0; i < n_safekeepers; i++) + { + Safekeeper *sk = &safekeeper[i]; + + if ((sk->state == SS_CONNECTING_WRITE || + sk->state == SS_CONNECTING_READ) && + TimestampDifferenceExceeds(sk->startedConnAt, now, + wal_acceptor_connect_timeout)) + { + elog(WARNING, "failed to connect to node '%s:%s': exceeded connection timeout %dms", + sk->host, sk->port, wal_acceptor_connect_timeout); + ShutdownConnection(sk); + } + } + } + } +} + +/* + * Register a background worker proposing WAL to wal acceptors. + */ +void +WalProposerRegister(void) +{ + BackgroundWorker bgw; + + if (*wal_acceptors_list == '\0') + return; + + memset(&bgw, 0, sizeof(bgw)); + bgw.bgw_flags = BGWORKER_SHMEM_ACCESS; + bgw.bgw_start_time = BgWorkerStart_RecoveryFinished; + snprintf(bgw.bgw_library_name, BGW_MAXLEN, "neon"); + snprintf(bgw.bgw_function_name, BGW_MAXLEN, "WalProposerMain"); + snprintf(bgw.bgw_name, BGW_MAXLEN, "WAL proposer"); + snprintf(bgw.bgw_type, BGW_MAXLEN, "WAL proposer"); + bgw.bgw_restart_time = 5; + bgw.bgw_notify_pid = 0; + bgw.bgw_main_arg = (Datum) 0; + + RegisterBackgroundWorker(&bgw); +} + +static void +WalProposerInitImpl(XLogRecPtr flushRecPtr, uint64 systemId) +{ + char *host; + char *sep; + char *port; + + /* Load the libpq-specific functions */ + if (WalProposerFunctions == NULL) + elog(ERROR, "libpqwalproposer didn't initialize correctly"); + + load_file("libpqwalreceiver", false); + if (WalReceiverFunctions == NULL) + elog(ERROR, "libpqwalreceiver didn't initialize correctly"); + + for (host = wal_acceptors_list; host != NULL && *host != '\0'; host = sep) + { + port = strchr(host, ':'); + if (port == NULL) + { + elog(FATAL, "port is not specified"); + } + *port++ = '\0'; + sep = strchr(port, ','); + if (sep != NULL) + *sep++ = '\0'; + if (n_safekeepers + 1 >= MAX_SAFEKEEPERS) + { + elog(FATAL, "Too many safekeepers"); + } + safekeeper[n_safekeepers].host = host; + safekeeper[n_safekeepers].port = port; + safekeeper[n_safekeepers].state = SS_OFFLINE; + safekeeper[n_safekeepers].conn = NULL; + + /* + * Set conninfo to empty. We'll fill it out once later, in + * `ResetConnection` as needed + */ + safekeeper[n_safekeepers].conninfo[0] = '\0'; + initStringInfo(&safekeeper[n_safekeepers].outbuf); + safekeeper[n_safekeepers].xlogreader = XLogReaderAllocate(wal_segment_size, NULL, XL_ROUTINE(.segment_open = wal_segment_open, .segment_close = wal_segment_close), NULL); + if (safekeeper[n_safekeepers].xlogreader == NULL) + elog(FATAL, "Failed to allocate xlog reader"); + safekeeper[n_safekeepers].flushWrite = false; + safekeeper[n_safekeepers].startStreamingAt = InvalidXLogRecPtr; + safekeeper[n_safekeepers].streamingAt = InvalidXLogRecPtr; + n_safekeepers += 1; + } + if (n_safekeepers < 1) + { + elog(FATAL, "Safekeepers addresses are not specified"); + } + quorum = n_safekeepers / 2 + 1; + + /* Fill the greeting package */ + greetRequest.tag = 'g'; + greetRequest.protocolVersion = SK_PROTOCOL_VERSION; + greetRequest.pgVersion = PG_VERSION_NUM; + pg_strong_random(&greetRequest.proposerId, sizeof(greetRequest.proposerId)); + greetRequest.systemId = systemId; + if (!zenith_timeline_walproposer) + elog(FATAL, "neon.timeline_id is not provided"); + if (*zenith_timeline_walproposer != '\0' && + !HexDecodeString(greetRequest.ztimelineid, zenith_timeline_walproposer, 16)) + elog(FATAL, "Could not parse neon.timeline_id, %s", zenith_timeline_walproposer); + if (!zenith_tenant_walproposer) + elog(FATAL, "neon.tenant_id is not provided"); + if (*zenith_tenant_walproposer != '\0' && + !HexDecodeString(greetRequest.ztenantid, zenith_tenant_walproposer, 16)) + elog(FATAL, "Could not parse neon.tenant_id, %s", zenith_tenant_walproposer); + + greetRequest.timeline = ThisTimeLineID; + greetRequest.walSegSize = wal_segment_size; + + InitEventSet(); +} + +static void +WalProposerStartImpl(void) +{ + + /* Initiate connections to all safekeeper nodes */ + for (int i = 0; i < n_safekeepers; i++) + { + ResetConnection(&safekeeper[i]); + } + + WalProposerLoop(); +} + +static void +WalProposerLoop(void) +{ + while (true) + WalProposerPoll(); +} + +/* Initializes the internal event set, provided that it is currently null */ +static void +InitEventSet(void) +{ + if (waitEvents) + elog(FATAL, "double-initialization of event set"); + + waitEvents = CreateWaitEventSet(TopMemoryContext, 2 + n_safekeepers); + AddWaitEventToSet(waitEvents, WL_LATCH_SET, PGINVALID_SOCKET, + MyLatch, NULL); + AddWaitEventToSet(waitEvents, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET, + NULL, NULL); +} + +/* + * Updates the events we're already waiting on for the safekeeper, setting it to + * the provided `events` + * + * This function is called any time the safekeeper's state switches to one where + * it has to wait to continue. This includes the full body of AdvancePollState + * and calls to IO helper functions. + */ +static void +UpdateEventSet(Safekeeper *sk, uint32 events) +{ + /* eventPos = -1 when we don't have an event */ + Assert(sk->eventPos != -1); + + ModifyWaitEvent(waitEvents, sk->eventPos, events, NULL); +} + +/* Hack: provides a way to remove the event corresponding to an individual walproposer from the set. + * + * Note: Internally, this completely reconstructs the event set. It should be avoided if possible. + */ +static void +HackyRemoveWalProposerEvent(Safekeeper *to_remove) +{ + /* Remove the existing event set */ + if (waitEvents) + { + FreeWaitEventSet(waitEvents); + waitEvents = NULL; + } + /* Re-initialize it without adding any safekeeper events */ + InitEventSet(); + + /* + * loop through the existing safekeepers. If they aren't the one we're + * removing, and if they have a socket we can use, re-add the applicable + * events. + */ + for (int i = 0; i < n_safekeepers; i++) + { + uint32 desired_events = WL_NO_EVENTS; + Safekeeper *sk = &safekeeper[i]; + + sk->eventPos = -1; + + if (sk == to_remove) + continue; + + /* If this safekeeper isn't offline, add an event for it! */ + if (sk->conn != NULL) + { + desired_events = SafekeeperStateDesiredEvents(sk->state); + sk->eventPos = AddWaitEventToSet(waitEvents, desired_events, walprop_socket(sk->conn), NULL, sk); + } + } +} + +/* Shuts down and cleans up the connection for a safekeeper. Sets its state to SS_OFFLINE */ +static void +ShutdownConnection(Safekeeper *sk) +{ + if (sk->conn) + walprop_finish(sk->conn); + sk->conn = NULL; + sk->state = SS_OFFLINE; + sk->flushWrite = false; + sk->streamingAt = InvalidXLogRecPtr; + + if (sk->voteResponse.termHistory.entries) + pfree(sk->voteResponse.termHistory.entries); + sk->voteResponse.termHistory.entries = NULL; + + HackyRemoveWalProposerEvent(sk); +} + +/* + * This function is called to establish new connection or to reestablish + * connection in case of connection failure. + * + * On success, sets the state to SS_CONNECTING_WRITE. + */ +static void +ResetConnection(Safekeeper *sk) +{ + pgsocket sock; /* socket of the new connection */ + + if (sk->state != SS_OFFLINE) + { + ShutdownConnection(sk); + } + + /* + * Try to establish new connection + * + * If the connection information hasn't been filled out, we need to do + * that here. + */ + if (sk->conninfo[0] == '\0') + { + int written = 0; + written = snprintf((char *) &sk->conninfo, MAXCONNINFO, + "host=%s port=%s dbname=replication options='-c ztimelineid=%s ztenantid=%s'", + sk->host, sk->port, zenith_timeline_walproposer, zenith_tenant_walproposer); + // currently connection string is not that long, but once we pass something like jwt we might overflow the buffer, + // so it is better to be defensive and check that everything aligns well + if (written > MAXCONNINFO || written < 0) + elog(FATAL, "could not create connection string for safekeeper %s:%s", sk->host, sk->port); + } + + sk->conn = walprop_connect_start((char *) &sk->conninfo); + + /* + * "If the result is null, then libpq has been unable to allocate a new + * PGconn structure" + */ + if (!sk->conn) + elog(FATAL, "failed to allocate new PGconn object"); + + /* + * PQconnectStart won't actually start connecting until we run + * PQconnectPoll. Before we do that though, we need to check that it + * didn't immediately fail. + */ + if (walprop_status(sk->conn) == WP_CONNECTION_BAD) + { + /*--- + * According to libpq docs: + * "If the result is CONNECTION_BAD, the connection attempt has already failed, + * typically because of invalid connection parameters." + * We should report this failure. + * + * https://www.postgresql.org/docs/devel/libpq-connect.html#LIBPQ-PQCONNECTSTARTPARAMS + */ + elog(WARNING, "Immediate failure to connect with node:\n\t%s\n\terror: %s", + sk->conninfo, walprop_error_message(sk->conn)); + + /* + * Even though the connection failed, we still need to clean up the + * object + */ + walprop_finish(sk->conn); + sk->conn = NULL; + return; + } + + /* + * The documentation for PQconnectStart states that we should call + * PQconnectPoll in a loop until it returns PGRES_POLLING_OK or + * PGRES_POLLING_FAILED. The other two possible returns indicate whether + * we should wait for reading or writing on the socket. For the first + * iteration of the loop, we're expected to wait until the socket becomes + * writable. + * + * The wording of the documentation is a little ambiguous; thankfully + * there's an example in the postgres source itself showing this behavior. + * (see libpqrcv_connect, defined in + * src/backend/replication/libpqwalreceiver/libpqwalreceiver.c) + */ + elog(LOG, "connecting with node %s:%s", sk->host, sk->port); + + sk->state = SS_CONNECTING_WRITE; + sk->startedConnAt = GetCurrentTimestamp(); + + sock = walprop_socket(sk->conn); + sk->eventPos = AddWaitEventToSet(waitEvents, WL_SOCKET_WRITEABLE, sock, NULL, sk); + return; +} + +/* + * How much milliseconds left till we should attempt reconnection to + * safekeepers? Returns 0 if it is already high time, -1 if we never reconnect + * (do we actually need this?). + */ +static long +TimeToReconnect(TimestampTz now) +{ + TimestampTz passed; + TimestampTz till_reconnect; + + if (wal_acceptor_reconnect_timeout <= 0) + return -1; + + passed = now - last_reconnect_attempt; + till_reconnect = wal_acceptor_reconnect_timeout * 1000 - passed; + if (till_reconnect <= 0) + return 0; + return (long) (till_reconnect / 1000); +} + +/* If the timeout has expired, attempt to reconnect to all offline safekeepers */ +static void +ReconnectSafekeepers(void) +{ + TimestampTz now = GetCurrentTimestamp(); + + if (TimeToReconnect(now) == 0) + { + last_reconnect_attempt = now; + for (int i = 0; i < n_safekeepers; i++) + { + if (safekeeper[i].state == SS_OFFLINE) + ResetConnection(&safekeeper[i]); + } + } +} + +/* + * Performs the logic for advancing the state machine of the specified safekeeper, + * given that a certain set of events has occured. + */ +static void +AdvancePollState(Safekeeper *sk, uint32 events) +{ + /* + * Sanity check. We assume further down that the operations don't + * block because the socket is ready. + */ + AssertEventsOkForState(events, sk); + + /* Execute the code corresponding to the current state */ + switch (sk->state) + { + /* + * safekeepers are only taken out of SS_OFFLINE by calls to + * ResetConnection + */ + case SS_OFFLINE: + elog(FATAL, "Unexpected safekeeper %s:%s state advancement: is offline", + sk->host, sk->port); + break; /* actually unreachable, but prevents + * -Wimplicit-fallthrough */ + + /* + * Both connecting states run the same logic. The only + * difference is the events they're expecting + */ + case SS_CONNECTING_READ: + case SS_CONNECTING_WRITE: + HandleConnectionEvent(sk); + break; + + /* + * Waiting for a successful CopyBoth response. + */ + case SS_WAIT_EXEC_RESULT: + RecvStartWALPushResult(sk); + break; + + /* + * Finish handshake comms: receive information about the safekeeper. + */ + case SS_HANDSHAKE_RECV: + RecvAcceptorGreeting(sk); + break; + + /* + * Voting is an idle state - we don't expect any events to trigger. + * Refer to the execution of SS_HANDSHAKE_RECV to see how nodes are + * transferred from SS_VOTING to sending actual vote requests. + */ + case SS_VOTING: + elog(WARNING, "EOF from node %s:%s in %s state", sk->host, + sk->port, FormatSafekeeperState(sk->state)); + ResetConnection(sk); + return; + + /* Read the safekeeper response for our candidate */ + case SS_WAIT_VERDICT: + RecvVoteResponse(sk); + break; + + /* Flush proposer announcement message */ + case SS_SEND_ELECTED_FLUSH: + + /* + * AsyncFlush ensures we only move on to SS_ACTIVE once the flush + * completes. If we still have more to do, we'll wait until the next + * poll comes along. + */ + if (!AsyncFlush(sk)) + return; + + /* flush is done, event set and state will be updated later */ + StartStreaming(sk); + break; + + /* + * Idle state for waiting votes from quorum. + */ + case SS_IDLE: + elog(WARNING, "EOF from node %s:%s in %s state", sk->host, + sk->port, FormatSafekeeperState(sk->state)); + ResetConnection(sk); + return; + + /* + * Active state is used for streaming WAL and receiving feedback. + */ + case SS_ACTIVE: + HandleActiveState(sk, events); + break; + } +} + +static void +HandleConnectionEvent(Safekeeper *sk) +{ + WalProposerConnectPollStatusType result = walprop_connect_poll(sk->conn); + + /* The new set of events we'll wait on, after updating */ + uint32 new_events = WL_NO_EVENTS; + + switch (result) + { + case WP_CONN_POLLING_OK: + elog(LOG, "connected with node %s:%s", sk->host, + sk->port); + + /* + * We have to pick some event to update event set. + * We'll eventually need the socket to be readable, + * so we go with that. + */ + new_events = WL_SOCKET_READABLE; + break; + + /* + * If we need to poll to finish connecting, + * continue doing that + */ + case WP_CONN_POLLING_READING: + sk->state = SS_CONNECTING_READ; + new_events = WL_SOCKET_READABLE; + break; + case WP_CONN_POLLING_WRITING: + sk->state = SS_CONNECTING_WRITE; + new_events = WL_SOCKET_WRITEABLE; + break; + + case WP_CONN_POLLING_FAILED: + elog(WARNING, "failed to connect to node '%s:%s': %s", + sk->host, sk->port, walprop_error_message(sk->conn)); + + /* + * If connecting failed, we don't want to restart + * the connection because that might run us into a + * loop. Instead, shut it down -- it'll naturally + * restart at a slower interval on calls to + * ReconnectSafekeepers. + */ + ShutdownConnection(sk); + return; + } + + /* + * Because PQconnectPoll can change the socket, we have to + * un-register the old event and re-register an event on + * the new socket. + */ + HackyRemoveWalProposerEvent(sk); + sk->eventPos = AddWaitEventToSet(waitEvents, new_events, walprop_socket(sk->conn), NULL, sk); + + /* If we successfully connected, send START_WAL_PUSH query */ + if (result == WP_CONN_POLLING_OK) + SendStartWALPush(sk); +} + +/* + * Send "START_WAL_PUSH" message as an empty query to the safekeeper. Performs + * a blocking send, then immediately moves to SS_WAIT_EXEC_RESULT. If something + * goes wrong, change state to SS_OFFLINE and shutdown the connection. + */ +static void +SendStartWALPush(Safekeeper *sk) +{ + if (!walprop_send_query(sk->conn, "START_WAL_PUSH")) + { + elog(WARNING, "Failed to send 'START_WAL_PUSH' query to safekeeper %s:%s: %s", + sk->host, sk->port, walprop_error_message(sk->conn)); + ShutdownConnection(sk); + return; + } + sk->state = SS_WAIT_EXEC_RESULT; + UpdateEventSet(sk, WL_SOCKET_READABLE); +} + +static void +RecvStartWALPushResult(Safekeeper *sk) +{ + switch (walprop_get_query_result(sk->conn)) + { + /* + * Successful result, move on to starting the + * handshake + */ + case WP_EXEC_SUCCESS_COPYBOTH: + + SendProposerGreeting(sk); + break; + + /* + * Needs repeated calls to finish. Wait until the + * socket is readable + */ + case WP_EXEC_NEEDS_INPUT: + + /* + * SS_WAIT_EXEC_RESULT is always reached through an + * event, so we don't need to update the event set + */ + break; + + case WP_EXEC_FAILED: + elog(WARNING, "Failed to send query to safekeeper %s:%s: %s", + sk->host, sk->port, walprop_error_message(sk->conn)); + ShutdownConnection(sk); + return; + + /* + * Unexpected result -- funamdentally an error, but we + * want to produce a custom message, rather than a + * generic "something went wrong" + */ + case WP_EXEC_UNEXPECTED_SUCCESS: + elog(WARNING, "Received bad response from safekeeper %s:%s query execution", + sk->host, sk->port); + ShutdownConnection(sk); + return; + } +} + +/* + * Start handshake: first of all send information about the + * safekeeper. After sending, we wait on SS_HANDSHAKE_RECV for + * a response to finish the handshake. + */ +static void +SendProposerGreeting(Safekeeper *sk) +{ + /* + * On failure, logging & resetting the connection is handled. + * We just need to handle the control flow. + */ + BlockingWrite(sk, &greetRequest, sizeof(greetRequest), SS_HANDSHAKE_RECV); +} + +static void +RecvAcceptorGreeting(Safekeeper *sk) +{ + /* + * If our reading doesn't immediately succeed, any necessary + * error handling or state setting is taken care of. We can + * leave any other work until later. + */ + sk->greetResponse.apm.tag = 'g'; + if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->greetResponse)) + return; + + /* Protocol is all good, move to voting. */ + sk->state = SS_VOTING; + + ++n_connected; + if (n_connected <= quorum) + { + /* We're still collecting terms from the majority. */ + propTerm = Max(sk->greetResponse.term, propTerm); + + /* Quorum is acquried, prepare the vote request. */ + if (n_connected == quorum) + { + propTerm++; + elog(LOG, "proposer connected to quorum (%d) safekeepers, propTerm=" INT64_FORMAT, quorum, propTerm); + + voteRequest = (VoteRequest) + { + .tag = 'v', + .term = propTerm + }; + memcpy(voteRequest.proposerId.data, greetRequest.proposerId.data, UUID_LEN); + } + } + else if (sk->greetResponse.term > propTerm) + { + /* Another compute with higher term is running. */ + elog(FATAL, "WAL acceptor %s:%s with term " INT64_FORMAT " rejects our connection request with term " INT64_FORMAT "", + sk->host, sk->port, + sk->greetResponse.term, propTerm); + } + + /* + * Check if we have quorum. If there aren't enough safekeepers, + * wait and do nothing. We'll eventually get a task when the + * election starts. + * + * If we do have quorum, we can start an election. + */ + if (n_connected < quorum) + { + /* + * SS_VOTING is an idle state; read-ready indicates the + * connection closed. + */ + UpdateEventSet(sk, WL_SOCKET_READABLE); + } + else + { + /* + * Now send voting request to the cohort and wait + * responses + */ + for (int j = 0; j < n_safekeepers; j++) + { + /* + * Remember: SS_VOTING indicates that the safekeeper is + * participating in voting, but hasn't sent anything + * yet. + */ + if (safekeeper[j].state == SS_VOTING) + SendVoteRequest(&safekeeper[j]); + } + } +} + +static void +SendVoteRequest(Safekeeper *sk) +{ + /* We have quorum for voting, send our vote request */ + elog(LOG, "requesting vote from %s:%s for term " UINT64_FORMAT, sk->host, sk->port, voteRequest.term); + /* On failure, logging & resetting is handled */ + if (!BlockingWrite(sk, &voteRequest, sizeof(voteRequest), SS_WAIT_VERDICT)) + return; + + /* If successful, wait for read-ready with SS_WAIT_VERDICT */ +} + +static void +RecvVoteResponse(Safekeeper *sk) +{ + sk->voteResponse.apm.tag = 'v'; + if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->voteResponse)) + return; + + elog(LOG, + "got VoteResponse from acceptor %s:%s, voteGiven=" UINT64_FORMAT ", epoch=" UINT64_FORMAT ", flushLsn=%X/%X, truncateLsn=%X/%X, timelineStartLsn=%X/%X", + sk->host, sk->port, sk->voteResponse.voteGiven, GetHighestTerm(&sk->voteResponse.termHistory), + LSN_FORMAT_ARGS(sk->voteResponse.flushLsn), + LSN_FORMAT_ARGS(sk->voteResponse.truncateLsn), + LSN_FORMAT_ARGS(sk->voteResponse.timelineStartLsn)); + + /* + * In case of acceptor rejecting our vote, bail out, but only + * if either it already lives in strictly higher term + * (concurrent compute spotted) or we are not elected yet and + * thus need the vote. + */ + if ((!sk->voteResponse.voteGiven) && + (sk->voteResponse.term > propTerm || n_votes < quorum)) + { + elog(FATAL, "WAL acceptor %s:%s with term " INT64_FORMAT " rejects our connection request with term " INT64_FORMAT "", + sk->host, sk->port, + sk->voteResponse.term, propTerm); + } + Assert(sk->voteResponse.term == propTerm); + + /* Handshake completed, do we have quorum? */ + n_votes++; + if (n_votes < quorum) + { + sk->state = SS_IDLE; /* can't do much yet, no quorum */ + } + else if (n_votes > quorum) + { + /* recovery already performed, just start streaming */ + SendProposerElected(sk); + } + else + { + sk->state = SS_IDLE; + UpdateEventSet(sk, WL_SOCKET_READABLE); /* Idle states wait for + * read-ready */ + + HandleElectedProposer(); + } +} + +/* + * Called once a majority of acceptors have voted for us and current proposer + * has been elected. + * + * Sends ProposerElected message to all acceptors in SS_IDLE state and starts + * replication from walsender. + */ +static void +HandleElectedProposer(void) +{ + DetermineEpochStartLsn(); + + /* + * Check if not all safekeepers are up-to-date, we need to + * download WAL needed to synchronize them + */ + if (truncateLsn < propEpochStartLsn) + { + elog(LOG, + "start recovery because truncateLsn=%X/%X is not " + "equal to epochStartLsn=%X/%X", + LSN_FORMAT_ARGS(truncateLsn), + LSN_FORMAT_ARGS(propEpochStartLsn)); + /* Perform recovery */ + if (!WalProposerRecovery(donor, greetRequest.timeline, truncateLsn, propEpochStartLsn)) + elog(FATAL, "Failed to recover state"); + } + else if (syncSafekeepers) + { + /* Sync is not needed: just exit */ + fprintf(stdout, "%X/%X\n", LSN_FORMAT_ARGS(propEpochStartLsn)); + exit(0); + } + + for (int i = 0; i < n_safekeepers; i++) + { + if (safekeeper[i].state == SS_IDLE) + SendProposerElected(&safekeeper[i]); + } + + /* + * The proposer has been elected, and there will be no quorum waiting + * after this point. There will be no safekeeper with state SS_IDLE + * also, because that state is used only for quorum waiting. + */ + + if (syncSafekeepers) + { + /* + * Send empty message to enforce receiving feedback + * even from nodes who are fully recovered; this is + * required to learn they switched epoch which finishes + * sync-safeekepers who doesn't generate any real new + * records. Will go away once we switch to async acks. + */ + BroadcastAppendRequest(); + + /* keep polling until all safekeepers are synced */ + return; + } + + WalProposerStartStreaming(propEpochStartLsn); + /* Should not return here */ +} + +/* latest term in TermHistory, or 0 is there is no entries */ +static term_t +GetHighestTerm(TermHistory *th) +{ + return th->n_entries > 0 ? th->entries[th->n_entries - 1].term : 0; +} + +/* safekeeper's epoch is the term of the highest entry in the log */ +static term_t +GetEpoch(Safekeeper *sk) +{ + return GetHighestTerm(&sk->voteResponse.termHistory); +} + +/* If LSN points to the page header, skip it */ +static XLogRecPtr +SkipXLogPageHeader(XLogRecPtr lsn) +{ + if (XLogSegmentOffset(lsn, wal_segment_size) == 0) + { + lsn += SizeOfXLogLongPHD; + } + else if (lsn % XLOG_BLCKSZ == 0) + { + lsn += SizeOfXLogShortPHD; + } + return lsn; +} + +/* + * Called after majority of acceptors gave votes, it calculates the most + * advanced safekeeper (who will be the donor) and epochStartLsn -- LSN since + * which we'll write WAL in our term. + * + * Sets truncateLsn along the way (though it is not of much use at this point -- + * only for skipping recovery). + */ +static void +DetermineEpochStartLsn(void) +{ + TermHistory *dth; + + propEpochStartLsn = InvalidXLogRecPtr; + donorEpoch = 0; + truncateLsn = InvalidXLogRecPtr; + timelineStartLsn = InvalidXLogRecPtr; + + for (int i = 0; i < n_safekeepers; i++) + { + if (safekeeper[i].state == SS_IDLE) + { + if (GetEpoch(&safekeeper[i]) > donorEpoch || + (GetEpoch(&safekeeper[i]) == donorEpoch && + safekeeper[i].voteResponse.flushLsn > propEpochStartLsn)) + { + donorEpoch = GetEpoch(&safekeeper[i]); + propEpochStartLsn = safekeeper[i].voteResponse.flushLsn; + donor = i; + } + truncateLsn = Max(safekeeper[i].voteResponse.truncateLsn, truncateLsn); + + if (safekeeper[i].voteResponse.timelineStartLsn != InvalidXLogRecPtr) + { + /* timelineStartLsn should be the same everywhere or unknown */ + if (timelineStartLsn != InvalidXLogRecPtr && + timelineStartLsn != safekeeper[i].voteResponse.timelineStartLsn) + { + elog(WARNING, + "inconsistent timelineStartLsn: current %X/%X, received %X/%X", + LSN_FORMAT_ARGS(timelineStartLsn), + LSN_FORMAT_ARGS(safekeeper[i].voteResponse.timelineStartLsn)); + } + timelineStartLsn = safekeeper[i].voteResponse.timelineStartLsn; + } + } + } + + /* + * If propEpochStartLsn is 0 everywhere, we are bootstrapping -- nothing was + * committed yet. Start streaming then from the basebackup LSN. + */ + if (propEpochStartLsn == InvalidXLogRecPtr && !syncSafekeepers) + { + propEpochStartLsn = truncateLsn = GetRedoStartLsn(); + if (timelineStartLsn == InvalidXLogRecPtr) + { + timelineStartLsn = GetRedoStartLsn(); + } + elog(LOG, "bumped epochStartLsn to the first record %X/%X", LSN_FORMAT_ARGS(propEpochStartLsn)); + } + + /* + * If propEpochStartLsn is not 0, at least one msg with WAL was sent to + * some connected safekeeper; it must have carried truncateLsn pointing to + * the first record. + */ + Assert((truncateLsn != InvalidXLogRecPtr) || + (syncSafekeepers && truncateLsn == propEpochStartLsn)); + + /* + * We will be generating WAL since propEpochStartLsn, so we should set + * availableLsn to mark this LSN as the latest available position. + */ + availableLsn = propEpochStartLsn; + + /* + * Proposer's term history is the donor's + its own entry. + */ + dth = &safekeeper[donor].voteResponse.termHistory; + propTermHistory.n_entries = dth->n_entries + 1; + propTermHistory.entries = palloc(sizeof(TermSwitchEntry) * propTermHistory.n_entries); + memcpy(propTermHistory.entries, dth->entries, sizeof(TermSwitchEntry) * dth->n_entries); + propTermHistory.entries[propTermHistory.n_entries - 1].term = propTerm; + propTermHistory.entries[propTermHistory.n_entries - 1].lsn = propEpochStartLsn; + + elog(LOG, "got votes from majority (%d) of nodes, term " UINT64_FORMAT ", epochStartLsn %X/%X, donor %s:%s, truncate_lsn %X/%X", + quorum, + propTerm, + LSN_FORMAT_ARGS(propEpochStartLsn), + safekeeper[donor].host, safekeeper[donor].port, + LSN_FORMAT_ARGS(truncateLsn) + ); + + /* + * Ensure the basebackup we are running (at RedoStartLsn) matches LSN since + * which we are going to write according to the consensus. If not, we must + * bail out, as clog and other non rel data is inconsistent. + */ + if (!syncSafekeepers) + { + /* + * Basebackup LSN always points to the beginning of the record (not the + * page), as StartupXLOG most probably wants it this way. Safekeepers + * don't skip header as they need continious stream of data, so + * correct LSN for comparison. + */ + if (SkipXLogPageHeader(propEpochStartLsn) != GetRedoStartLsn()) + { + /* + * However, allow to proceed if previously elected leader was me; plain + * restart of walproposer not intervened by concurrent compute (who could + * generate WAL) is ok. + */ + if (!((dth->n_entries >= 1) && (dth->entries[dth->n_entries - 1].term == + walprop_shared->mineLastElectedTerm))) + { + elog(PANIC, + "collected propEpochStartLsn %X/%X, but basebackup LSN %X/%X", + LSN_FORMAT_ARGS(propEpochStartLsn), + LSN_FORMAT_ARGS(GetRedoStartLsn())); + } + } + walprop_shared->mineLastElectedTerm = propTerm; + } +} + +/* + * Receive WAL from most advanced safekeeper + */ +static bool +WalProposerRecovery(int donor, TimeLineID timeline, XLogRecPtr startpos, XLogRecPtr endpos) +{ + char conninfo[MAXCONNINFO]; + char *err; + WalReceiverConn *wrconn; + WalRcvStreamOptions options; + + sprintf(conninfo, "host=%s port=%s dbname=replication options='-c ztimelineid=%s ztenantid=%s'", + safekeeper[donor].host, safekeeper[donor].port, zenith_timeline_walproposer, zenith_tenant_walproposer); + wrconn = walrcv_connect(conninfo, false, "wal_proposer_recovery", &err); + if (!wrconn) + { + ereport(WARNING, + (errmsg("could not connect to WAL acceptor %s:%s: %s", + safekeeper[donor].host, safekeeper[donor].port, + err))); + return false; + } + elog(LOG, + "start recovery from %s:%s starting from %X/%08X till %X/%08X timeline " + "%d", + safekeeper[donor].host, safekeeper[donor].port, (uint32) (startpos >> 32), + (uint32) startpos, (uint32) (endpos >> 32), (uint32) endpos, timeline); + + options.logical = false; + options.startpoint = startpos; + options.slotname = NULL; + options.proto.physical.startpointTLI = timeline; + + if (walrcv_startstreaming(wrconn, &options)) + { + XLogRecPtr rec_start_lsn; + XLogRecPtr rec_end_lsn = 0; + int len; + char *buf; + pgsocket wait_fd = PGINVALID_SOCKET; + + while ((len = walrcv_receive(wrconn, &buf, &wait_fd)) >= 0) + { + if (len == 0) + { + (void) WaitLatchOrSocket( + MyLatch, WL_EXIT_ON_PM_DEATH | WL_SOCKET_READABLE, wait_fd, + -1, WAIT_EVENT_WAL_RECEIVER_MAIN); + } + else + { + Assert(buf[0] == 'w' || buf[0] == 'k'); + if (buf[0] == 'k') + continue; /* keepalive */ + memcpy(&rec_start_lsn, &buf[XLOG_HDR_START_POS], + sizeof rec_start_lsn); + rec_start_lsn = pg_ntoh64(rec_start_lsn); + rec_end_lsn = rec_start_lsn + len - XLOG_HDR_SIZE; + + /* write WAL to disk */ + XLogWalPropWrite(&buf[XLOG_HDR_SIZE], len - XLOG_HDR_SIZE, rec_start_lsn); + + ereport(DEBUG1, + (errmsg("Recover message %X/%X length %d", + LSN_FORMAT_ARGS(rec_start_lsn), len))); + if (rec_end_lsn >= endpos) + break; + } + } + ereport(LOG, + (errmsg("end of replication stream at %X/%X: %m", + LSN_FORMAT_ARGS(rec_end_lsn)))); + walrcv_disconnect(wrconn); + + /* failed to receive all WAL till endpos */ + if (rec_end_lsn < endpos) + return false; + } + else + { + ereport(LOG, + (errmsg("primary server contains no more WAL on requested timeline %u LSN %X/%08X", + timeline, (uint32) (startpos >> 32), (uint32) startpos))); + return false; + } + + return true; +} + +/* + * Determine for sk the starting streaming point and send it message + * 1) Announcing we are elected proposer (which immediately advances epoch if + * safekeeper is synced, being important for sync-safekeepers) + * 2) Communicating starting streaming point -- safekeeper must truncate its WAL + * beyond it -- and history of term switching. + * + * Sets sk->startStreamingAt. + */ +static void +SendProposerElected(Safekeeper *sk) +{ + ProposerElected msg; + TermHistory *th; + term_t lastCommonTerm; + int i; + + /* + * Determine start LSN by comparing safekeeper's log term switch history and + * proposer's, searching for the divergence point. + * + * Note: there is a vanishingly small chance of no common point even if + * there is some WAL on safekeeper, if immediately after bootstrap compute + * wrote some WAL on single sk and died; we stream since the beginning then. + */ + th = &sk->voteResponse.termHistory; + /* + * If any WAL is present on the sk, it must be authorized by some term. + * OTOH, without any WAL there are no term swiches in the log. + */ + Assert((th->n_entries == 0) == + (sk->voteResponse.flushLsn == InvalidXLogRecPtr)); + /* We must start somewhere. */ + Assert(propTermHistory.n_entries >= 1); + + for (i = 0; i < Min(propTermHistory.n_entries, th->n_entries); i++) + { + if (propTermHistory.entries[i].term != th->entries[i].term) + break; + /* term must begin everywhere at the same point */ + Assert(propTermHistory.entries[i].lsn == th->entries[i].lsn); + } + i--; /* step back to the last common term */ + if (i < 0) + { + /* safekeeper is empty or no common point, start from the beginning */ + sk->startStreamingAt = propTermHistory.entries[0].lsn; + + if (sk->startStreamingAt < truncateLsn) + { + /* + * There's a gap between the WAL starting point and a truncateLsn, + * which can't appear in a normal working cluster. That gap means + * that all safekeepers reported that they have persisted WAL up + * to the truncateLsn before, but now current safekeeper tells + * otherwise. + * + * Also we have a special condition here, which is empty safekeeper + * with no history. In combination with a gap, that can happen when + * we introduce a new safekeeper to the cluster. This is a rare case, + * which is triggered manually for now, and should be treated with + * care. + */ + + /* + * truncateLsn will not change without ack from current safekeeper, + * and it's aligned to the WAL record, so we can safely start + * streaming from this point. + */ + sk->startStreamingAt = truncateLsn; + + elog(WARNING, "empty safekeeper joined cluster as %s:%s, historyStart=%X/%X, sk->startStreamingAt=%X/%X", + sk->host, sk->port, LSN_FORMAT_ARGS(propTermHistory.entries[0].lsn), + LSN_FORMAT_ARGS(sk->startStreamingAt)); + } + } + else + { + /* + * End of (common) term is the start of the next except it is the last + * one; there it is flush_lsn in case of safekeeper or, in case of + * proposer, LSN it is currently writing, but then we just pick + * safekeeper pos as it obviously can't be higher. + */ + if (propTermHistory.entries[i].term == propTerm) + { + sk->startStreamingAt = sk->voteResponse.flushLsn; + } + else + { + XLogRecPtr propEndLsn = propTermHistory.entries[i + 1].lsn; + XLogRecPtr skEndLsn = (i + 1 < th->n_entries ? th->entries[i + 1].lsn : + sk->voteResponse.flushLsn); + sk->startStreamingAt = Min(propEndLsn, skEndLsn); + } + } + + Assert(sk->startStreamingAt >= truncateLsn && sk->startStreamingAt <= availableLsn); + + msg.tag = 'e'; + msg.term = propTerm; + msg.startStreamingAt = sk->startStreamingAt; + msg.termHistory = &propTermHistory; + msg.timelineStartLsn = timelineStartLsn; + + lastCommonTerm = i >= 0 ? propTermHistory.entries[i].term : 0; + elog(LOG, + "sending elected msg to node " UINT64_FORMAT " term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s, timelineStartLsn=%X/%X", + sk->greetResponse.nodeId, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port, LSN_FORMAT_ARGS(msg.timelineStartLsn)); + + resetStringInfo(&sk->outbuf); + pq_sendint64_le(&sk->outbuf, msg.tag); + pq_sendint64_le(&sk->outbuf, msg.term); + pq_sendint64_le(&sk->outbuf, msg.startStreamingAt); + pq_sendint32_le(&sk->outbuf, msg.termHistory->n_entries); + for (int i = 0; i < msg.termHistory->n_entries; i++) + { + pq_sendint64_le(&sk->outbuf, msg.termHistory->entries[i].term); + pq_sendint64_le(&sk->outbuf, msg.termHistory->entries[i].lsn); + } + pq_sendint64_le(&sk->outbuf, msg.timelineStartLsn); + + if (!AsyncWrite(sk, sk->outbuf.data, sk->outbuf.len, SS_SEND_ELECTED_FLUSH)) + return; + + StartStreaming(sk); +} + +/* + * Start walsender streaming replication + */ +static void +WalProposerStartStreaming(XLogRecPtr startpos) +{ + StartReplicationCmd cmd; + + elog(LOG, "WAL proposer starts streaming at %X/%X", + LSN_FORMAT_ARGS(startpos)); + cmd.slotname = WAL_PROPOSER_SLOT_NAME; + cmd.timeline = greetRequest.timeline; + cmd.startpoint = startpos; + StartProposerReplication(&cmd); +} + +/* + * Start streaming to safekeeper sk, always updates state to SS_ACTIVE and sets + * correct event set. + */ +static void +StartStreaming(Safekeeper *sk) +{ + /* + * This is the only entrypoint to state SS_ACTIVE. It's executed + * exactly once for a connection. + */ + sk->state = SS_ACTIVE; + sk->streamingAt = sk->startStreamingAt; + + /* event set will be updated inside SendMessageToNode */ + SendMessageToNode(sk); +} + +/* + * Try to send message to the particular node. Always updates event set. Will + * send at least one message, if socket is ready. + * + * Can be used only for safekeepers in SS_ACTIVE state. State can be changed + * in case of errors. + */ +static void +SendMessageToNode(Safekeeper *sk) +{ + Assert(sk->state == SS_ACTIVE); + + /* Note: we always send everything to the safekeeper until WOULDBLOCK or nothing left to send */ + HandleActiveState(sk, WL_SOCKET_WRITEABLE); +} + +/* + * Broadcast new message to all caught-up safekeepers + */ +static void +BroadcastAppendRequest() +{ + for (int i = 0; i < n_safekeepers; i++) + if (safekeeper[i].state == SS_ACTIVE) + SendMessageToNode(&safekeeper[i]); +} + +static void +PrepareAppendRequest(AppendRequestHeader *req, XLogRecPtr beginLsn, XLogRecPtr endLsn) +{ + Assert(endLsn >= beginLsn); + req->tag = 'a'; + req->term = propTerm; + req->epochStartLsn = propEpochStartLsn; + req->beginLsn = beginLsn; + req->endLsn = endLsn; + req->commitLsn = GetAcknowledgedByQuorumWALPosition(); + req->truncateLsn = truncateLsn; + req->proposerId = greetRequest.proposerId; +} + +/* + * Process all events happened in SS_ACTIVE state, update event set after that. + */ +static void +HandleActiveState(Safekeeper *sk, uint32 events) +{ + uint32 newEvents = WL_SOCKET_READABLE; + + if (events & WL_SOCKET_WRITEABLE) + if (!SendAppendRequests(sk)) + return; + + if (events & WL_SOCKET_READABLE) + if (!RecvAppendResponses(sk)) + return; + + /* + * We should wait for WL_SOCKET_WRITEABLE event if we have unflushed data + * in the buffer. + * + * LSN comparison checks if we have pending unsent messages. This check isn't + * necessary now, because we always send append messages immediately after + * arrival. But it's good to have it here in case we change this behavior + * in the future. + */ + if (sk->streamingAt != availableLsn || sk->flushWrite) + newEvents |= WL_SOCKET_WRITEABLE; + + UpdateEventSet(sk, newEvents); +} + +/* + * Send WAL messages starting from sk->streamingAt until the end or non-writable + * socket, whichever comes first. Caller should take care of updating event set. + * Even if no unsent WAL is available, at least one empty message will be sent + * as a heartbeat, if socket is ready. + * + * Can change state if Async* functions encounter errors and reset connection. + * Returns false in this case, true otherwise. + */ +static bool +SendAppendRequests(Safekeeper *sk) +{ + XLogRecPtr endLsn; + AppendRequestHeader *req; + PGAsyncWriteResult writeResult; + WALReadError errinfo; + bool sentAnything = false; + + if (sk->flushWrite) + { + if (!AsyncFlush(sk)) + /* + * AsyncFlush failed, that could happen if the socket is closed or + * we have nothing to write and should wait for writeable socket. + */ + return sk->state == SS_ACTIVE; + + /* Event set will be updated in the end of HandleActiveState */ + sk->flushWrite = false; + } + + while (sk->streamingAt != availableLsn || !sentAnything) + { + sentAnything = true; + + endLsn = sk->streamingAt; + endLsn += MAX_SEND_SIZE; + + /* if we went beyond available WAL, back off */ + if (endLsn > availableLsn) { + endLsn = availableLsn; + } + + req = &sk->appendRequest; + PrepareAppendRequest(&sk->appendRequest, sk->streamingAt, endLsn); + + ereport(DEBUG2, + (errmsg("sending message len %ld beginLsn=%X/%X endLsn=%X/%X commitLsn=%X/%X truncateLsn=%X/%X to %s:%s", + req->endLsn - req->beginLsn, + LSN_FORMAT_ARGS(req->beginLsn), + LSN_FORMAT_ARGS(req->endLsn), + LSN_FORMAT_ARGS(req->commitLsn), + LSN_FORMAT_ARGS(truncateLsn), sk->host, sk->port))); + + resetStringInfo(&sk->outbuf); + + /* write AppendRequest header */ + appendBinaryStringInfo(&sk->outbuf, (char*) req, sizeof(AppendRequestHeader)); + + /* write the WAL itself */ + enlargeStringInfo(&sk->outbuf, req->endLsn - req->beginLsn); + if (!WALRead(sk->xlogreader, + &sk->outbuf.data[sk->outbuf.len], + req->beginLsn, + req->endLsn - req->beginLsn, + ThisTimeLineID, + &errinfo)) + { + WALReadRaiseError(&errinfo); + } + sk->outbuf.len += req->endLsn - req->beginLsn; + + writeResult = walprop_async_write(sk->conn, sk->outbuf.data, sk->outbuf.len); + + /* Mark current message as sent, whatever the result is */ + sk->streamingAt = endLsn; + + switch (writeResult) + { + case PG_ASYNC_WRITE_SUCCESS: + /* Continue writing the next message */ + break; + + case PG_ASYNC_WRITE_TRY_FLUSH: + /* + * We still need to call PQflush some more to finish the job. + * Caller function will handle this by setting right event set. + */ + sk->flushWrite = true; + return true; + + case PG_ASYNC_WRITE_FAIL: + elog(WARNING, "Failed to send to node %s:%s in %s state: %s", + sk->host, sk->port, FormatSafekeeperState(sk->state), + walprop_error_message(sk->conn)); + ShutdownConnection(sk); + return false; + default: + Assert(false); + return false; + } + } + + return true; +} + +/* + * Receive and process all available feedback. + * + * Can change state if Async* functions encounter errors and reset connection. + * Returns false in this case, true otherwise. + * + * NB: This function can call SendMessageToNode and produce new messages. + */ +static bool +RecvAppendResponses(Safekeeper *sk) +{ + XLogRecPtr minQuorumLsn; + bool readAnything = false; + + while (true) + { + /* + * If our reading doesn't immediately succeed, any + * necessary error handling or state setting is taken care + * of. We can leave any other work until later. + */ + sk->appendResponse.apm.tag = 'a'; + if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->appendResponse)) + break; + + ereport(DEBUG2, + (errmsg("received message term=" INT64_FORMAT " flushLsn=%X/%X commitLsn=%X/%X from %s:%s", + sk->appendResponse.term, + LSN_FORMAT_ARGS(sk->appendResponse.flushLsn), + LSN_FORMAT_ARGS(sk->appendResponse.commitLsn), + sk->host, sk->port))); + + if (sk->appendResponse.term > propTerm) + { + /* Another compute with higher term is running. */ + elog(PANIC, "WAL acceptor %s:%s with term " INT64_FORMAT " rejected our request, our term " INT64_FORMAT "", + sk->host, sk->port, + sk->appendResponse.term, propTerm); + } + + readAnything = true; + } + + if (!readAnything) + return sk->state == SS_ACTIVE; + + HandleSafekeeperResponse(); + + /* + * Also send the new commit lsn to all the safekeepers. + */ + minQuorumLsn = GetAcknowledgedByQuorumWALPosition(); + if (minQuorumLsn > lastSentCommitLsn) + { + BroadcastAppendRequest(); + lastSentCommitLsn = minQuorumLsn; + } + + return sk->state == SS_ACTIVE; +} + +/* Parse a ReplicationFeedback message, or the ReplicationFeedback part of an AppendResponse */ +void +ParseReplicationFeedbackMessage(StringInfo reply_message, ReplicationFeedback *rf) +{ + uint8 nkeys; + int i; + int32 len; + + /* get number of custom keys */ + nkeys = pq_getmsgbyte(reply_message); + + for (i = 0; i < nkeys; i++) + { + const char *key = pq_getmsgstring(reply_message); + if (strcmp(key, "current_timeline_size") == 0) + { + pq_getmsgint(reply_message, sizeof(int32)); // read value length + rf->currentClusterSize = pq_getmsgint64(reply_message); + elog(DEBUG2, "ParseReplicationFeedbackMessage: current_timeline_size %lu", + rf->currentClusterSize); + } + else if (strcmp(key, "ps_writelsn") == 0) + { + pq_getmsgint(reply_message, sizeof(int32)); // read value length + rf->ps_writelsn = pq_getmsgint64(reply_message); + elog(DEBUG2, "ParseReplicationFeedbackMessage: ps_writelsn %X/%X", + LSN_FORMAT_ARGS(rf->ps_writelsn)); + } + else if (strcmp(key, "ps_flushlsn") == 0) + { + pq_getmsgint(reply_message, sizeof(int32)); // read value length + rf->ps_flushlsn = pq_getmsgint64(reply_message); + elog(DEBUG2, "ParseReplicationFeedbackMessage: ps_flushlsn %X/%X", + LSN_FORMAT_ARGS(rf->ps_flushlsn)); + } + else if (strcmp(key, "ps_applylsn") == 0) + { + pq_getmsgint(reply_message, sizeof(int32)); // read value length + rf->ps_applylsn = pq_getmsgint64(reply_message); + elog(DEBUG2, "ParseReplicationFeedbackMessage: ps_applylsn %X/%X", + LSN_FORMAT_ARGS(rf->ps_applylsn)); + } + else if (strcmp(key, "ps_replytime") == 0) + { + pq_getmsgint(reply_message, sizeof(int32)); // read value length + rf->ps_replytime = pq_getmsgint64(reply_message); + { + char *replyTimeStr; + + /* Copy because timestamptz_to_str returns a static buffer */ + replyTimeStr = pstrdup(timestamptz_to_str(rf->ps_replytime)); + elog(DEBUG2, "ParseReplicationFeedbackMessage: ps_replytime %lu reply_time: %s", + rf->ps_replytime, replyTimeStr); + + pfree(replyTimeStr); + } + } + else + { + len = pq_getmsgint(reply_message, sizeof(int32)); // read value length + // Skip unknown keys to support backward compatibile protocol changes + elog(LOG, "ParseReplicationFeedbackMessage: unknown key: %s len %d", key, len); + pq_getmsgbytes(reply_message, len); + }; + } +} + +/* + * Combine hot standby feedbacks from all safekeepers. + */ +static void +CombineHotStanbyFeedbacks(HotStandbyFeedback * hs) +{ + hs->ts = 0; + hs->xmin.value = ~0; /* largest unsigned value */ + hs->catalog_xmin.value = ~0; /* largest unsigned value */ + + for (int i = 0; i < n_safekeepers; i++) + { + if (safekeeper[i].appendResponse.hs.ts != 0) + { + if (FullTransactionIdPrecedes(safekeeper[i].appendResponse.hs.xmin, hs->xmin)) + { + hs->xmin = safekeeper[i].appendResponse.hs.xmin; + hs->ts = safekeeper[i].appendResponse.hs.ts; + } + if (FullTransactionIdPrecedes(safekeeper[i].appendResponse.hs.catalog_xmin, hs->catalog_xmin)) + { + hs->catalog_xmin = safekeeper[i].appendResponse.hs.catalog_xmin; + hs->ts = safekeeper[i].appendResponse.hs.ts; + } + } + } +} + + +/* + * Get minimum of flushed LSNs of all safekeepers, which is the LSN of the + * last WAL record that can be safely discarded. + */ +static XLogRecPtr +CalculateMinFlushLsn(void) +{ + XLogRecPtr lsn = n_safekeepers > 0 + ? safekeeper[0].appendResponse.flushLsn + : InvalidXLogRecPtr; + for (int i = 1; i < n_safekeepers; i++) + { + lsn = Min(lsn, safekeeper[i].appendResponse.flushLsn); + } + return lsn; +} + +/* + * Calculate WAL position acknowledged by quorum + */ +static XLogRecPtr +GetAcknowledgedByQuorumWALPosition(void) +{ + XLogRecPtr responses[MAX_SAFEKEEPERS]; + + /* + * Sort acknowledged LSNs + */ + for (int i = 0; i < n_safekeepers; i++) + { + /* + * Like in Raft, we aren't allowed to commit entries from previous + * terms, so ignore reported LSN until it gets to epochStartLsn. + */ + responses[i] = safekeeper[i].appendResponse.flushLsn >= propEpochStartLsn ? + safekeeper[i].appendResponse.flushLsn : 0; + } + qsort(responses, n_safekeepers, sizeof(XLogRecPtr), CompareLsn); + + /* + * Get the smallest LSN committed by quorum + */ + return responses[n_safekeepers - quorum]; +} + +/* + * ReplicationFeedbackShmemSize --- report amount of shared memory space needed + */ +Size +WalproposerShmemSize(void) +{ + return sizeof(WalproposerShmemState); +} + +bool +WalproposerShmemInit(void) +{ + bool found; + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + walprop_shared = ShmemInitStruct("Walproposer shared state", + sizeof(WalproposerShmemState), + &found); + + if (!found) + { + memset(walprop_shared, 0, WalproposerShmemSize()); + SpinLockInit(&walprop_shared->mutex); + } + LWLockRelease(AddinShmemInitLock); + + return found; +} + +void +replication_feedback_set(ReplicationFeedback *rf) +{ + SpinLockAcquire(&walprop_shared->mutex); + memcpy(&walprop_shared->feedback, rf, sizeof(ReplicationFeedback)); + SpinLockRelease(&walprop_shared->mutex); +} + + +void +replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn) +{ + SpinLockAcquire(&walprop_shared->mutex); + *writeLsn = walprop_shared->feedback.ps_writelsn; + *flushLsn = walprop_shared->feedback.ps_flushlsn; + *applyLsn = walprop_shared->feedback.ps_applylsn; + SpinLockRelease(&walprop_shared->mutex); +} + + +/* + * Get ReplicationFeedback fields from the most advanced safekeeper + */ +static void +GetLatestZentihFeedback(ReplicationFeedback *rf) +{ + int latest_safekeeper = 0; + XLogRecPtr ps_writelsn = InvalidXLogRecPtr; + for (int i = 0; i < n_safekeepers; i++) + { + if (safekeeper[i].appendResponse.rf.ps_writelsn > ps_writelsn) + { + latest_safekeeper = i; + ps_writelsn = safekeeper[i].appendResponse.rf.ps_writelsn; + } + } + + rf->currentClusterSize = safekeeper[latest_safekeeper].appendResponse.rf.currentClusterSize; + rf->ps_writelsn = safekeeper[latest_safekeeper].appendResponse.rf.ps_writelsn; + rf->ps_flushlsn = safekeeper[latest_safekeeper].appendResponse.rf.ps_flushlsn; + rf->ps_applylsn = safekeeper[latest_safekeeper].appendResponse.rf.ps_applylsn; + rf->ps_replytime = safekeeper[latest_safekeeper].appendResponse.rf.ps_replytime; + + elog(DEBUG2, "GetLatestZentihFeedback: currentClusterSize %lu," + " ps_writelsn %X/%X, ps_flushlsn %X/%X, ps_applylsn %X/%X, ps_replytime %lu", + rf->currentClusterSize, + LSN_FORMAT_ARGS(rf->ps_writelsn), + LSN_FORMAT_ARGS(rf->ps_flushlsn), + LSN_FORMAT_ARGS(rf->ps_applylsn), + rf->ps_replytime); + + replication_feedback_set(rf); +} + +static void +HandleSafekeeperResponse(void) +{ + HotStandbyFeedback hsFeedback; + XLogRecPtr minQuorumLsn; + XLogRecPtr diskConsistentLsn; + XLogRecPtr minFlushLsn; + + + minQuorumLsn = GetAcknowledgedByQuorumWALPosition(); + diskConsistentLsn = quorumFeedback.rf.ps_flushlsn; + + if (!syncSafekeepers) + { + // Get ReplicationFeedback fields from the most advanced safekeeper + GetLatestZentihFeedback(&quorumFeedback.rf); + SetZenithCurrentClusterSize(quorumFeedback.rf.currentClusterSize); + } + + if (minQuorumLsn > quorumFeedback.flushLsn || diskConsistentLsn != quorumFeedback.rf.ps_flushlsn) + { + + if (minQuorumLsn > quorumFeedback.flushLsn) + quorumFeedback.flushLsn = minQuorumLsn; + + /* advance the replication slot */ + if (!syncSafekeepers) + ProcessStandbyReply( + // write_lsn - This is what durably stored in WAL service. + quorumFeedback.flushLsn, + //flush_lsn - This is what durably stored in WAL service. + quorumFeedback.flushLsn, + //apply_lsn - This is what processed and durably saved at pageserver. + quorumFeedback.rf.ps_flushlsn, + GetCurrentTimestamp(), false); + } + + CombineHotStanbyFeedbacks(&hsFeedback); + if (hsFeedback.ts != 0 && memcmp(&hsFeedback, &quorumFeedback.hs, sizeof hsFeedback) != 0) + { + quorumFeedback.hs = hsFeedback; + if (!syncSafekeepers) + ProcessStandbyHSFeedback(hsFeedback.ts, + XidFromFullTransactionId(hsFeedback.xmin), + EpochFromFullTransactionId(hsFeedback.xmin), + XidFromFullTransactionId(hsFeedback.catalog_xmin), + EpochFromFullTransactionId(hsFeedback.catalog_xmin)); + } + + /* + * Try to advance truncateLsn to minFlushLsn, which is the last record + * flushed to all safekeepers. We must always start streaming from the + * beginning of the record, which simplifies decoding on the far end. + * + * Advanced truncateLsn should be not further than nearest commitLsn. + * This prevents surprising violation of truncateLsn <= commitLsn + * invariant which might occur because 1) truncateLsn can be advanced + * immediately once chunk is broadcast to all safekeepers, and + * commitLsn generally can't be advanced based on feedback from + * safekeeper who is still in the previous epoch (similar to 'leader + * can't commit entries from previous term' in Raft); 2) chunks we + * read from WAL and send are plain sheets of bytes, but safekeepers + * ack only on record boundaries. + */ + minFlushLsn = CalculateMinFlushLsn(); + if (minFlushLsn > truncateLsn) + { + truncateLsn = minFlushLsn; + + /* + * Advance the replication slot to free up old WAL files. Note + * that slot doesn't exist if we are in syncSafekeepers mode. + */ + if (MyReplicationSlot) + PhysicalConfirmReceivedLocation(truncateLsn); + } + + /* + * Generally sync is done when majority switched the epoch so we committed + * epochStartLsn and made the majority aware of it, ensuring they are + * ready to give all WAL to pageserver. It would mean whichever majority + * is alive, there will be at least one safekeeper who is able to stream + * WAL to pageserver to make basebackup possible. However, since at the + * moment we don't have any good mechanism of defining the healthy and + * most advanced safekeeper who should push the wal into pageserver and + * basically the random one gets connected, to prevent hanging basebackup + * (due to pageserver connecting to not-synced-safekeeper) we currently + * wait for all seemingly alive safekeepers to get synced. + */ + if (syncSafekeepers) + { + int n_synced; + + n_synced = 0; + for (int i = 0; i < n_safekeepers; i++) + { + Safekeeper *sk = &safekeeper[i]; + bool synced = sk->appendResponse.commitLsn >= propEpochStartLsn; + + /* alive safekeeper which is not synced yet; wait for it */ + if (sk->state != SS_OFFLINE && !synced) + return; + if (synced) + n_synced++; + } + if (n_synced >= quorum) + { + /* All safekeepers synced! */ + fprintf(stdout, "%X/%X\n", LSN_FORMAT_ARGS(propEpochStartLsn)); + exit(0); + } + } +} + +/* + * Try to read CopyData message from i'th safekeeper, resetting connection on + * failure. + */ +static bool +AsyncRead(Safekeeper *sk, char **buf, int *buf_size) +{ + switch (walprop_async_read(sk->conn, buf, buf_size)) + { + case PG_ASYNC_READ_SUCCESS: + return true; + + case PG_ASYNC_READ_TRY_AGAIN: + /* WL_SOCKET_READABLE is always set during copyboth */ + return false; + + case PG_ASYNC_READ_FAIL: + elog(WARNING, "Failed to read from node %s:%s in %s state: %s", sk->host, + sk->port, FormatSafekeeperState(sk->state), + walprop_error_message(sk->conn)); + ShutdownConnection(sk); + return false; + } + Assert(false); + return false; +} + +/* + * Read next message with known type into provided struct, by reading a CopyData + * block from the safekeeper's postgres connection, returning whether the read + * was successful. + * + * If the read needs more polling, we return 'false' and keep the state + * unmodified, waiting until it becomes read-ready to try again. If it fully + * failed, a warning is emitted and the connection is reset. + */ +static bool +AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg) +{ + char *buf; + int buf_size; + uint64 tag; + StringInfoData s; + + if (!(AsyncRead(sk, &buf, &buf_size))) + return false; + + /* parse it */ + s.data = buf; + s.len = buf_size; + s.cursor = 0; + + tag = pq_getmsgint64_le(&s); + if (tag != anymsg->tag) + { + elog(WARNING, "unexpected message tag %c from node %s:%s in state %s", (char) tag, sk->host, + sk->port, FormatSafekeeperState(sk->state)); + ResetConnection(sk); + return false; + } + + switch (tag) + { + case 'g': + { + AcceptorGreeting *msg = (AcceptorGreeting *) anymsg; + msg->term = pq_getmsgint64_le(&s); + msg->nodeId = pq_getmsgint64_le(&s); + pq_getmsgend(&s); + return true; + } + + case 'v': + { + VoteResponse *msg = (VoteResponse *) anymsg; + + msg->term = pq_getmsgint64_le(&s); + msg->voteGiven = pq_getmsgint64_le(&s); + msg->flushLsn = pq_getmsgint64_le(&s); + msg->truncateLsn = pq_getmsgint64_le(&s); + msg->termHistory.n_entries = pq_getmsgint32_le(&s); + msg->termHistory.entries = palloc(sizeof(TermSwitchEntry) * msg->termHistory.n_entries); + for (int i = 0; i < msg->termHistory.n_entries; i++) + { + msg->termHistory.entries[i].term = pq_getmsgint64_le(&s); + msg->termHistory.entries[i].lsn = pq_getmsgint64_le(&s); + } + msg->timelineStartLsn = pq_getmsgint64_le(&s); + pq_getmsgend(&s); + return true; + } + + case 'a': + { + AppendResponse *msg = (AppendResponse *) anymsg; + msg->term = pq_getmsgint64_le(&s); + msg->flushLsn = pq_getmsgint64_le(&s); + msg->commitLsn = pq_getmsgint64_le(&s); + msg->hs.ts = pq_getmsgint64_le(&s); + msg->hs.xmin.value = pq_getmsgint64_le(&s); + msg->hs.catalog_xmin.value = pq_getmsgint64_le(&s); + if (buf_size > APPENDRESPONSE_FIXEDPART_SIZE) + ParseReplicationFeedbackMessage(&s, &msg->rf); + pq_getmsgend(&s); + return true; + } + + default: + { + Assert(false); + return false; + } + } +} + +/* + * Blocking equivalent to AsyncWrite. + * + * We use this everywhere messages are small enough that they should fit in a + * single packet. + */ +static bool +BlockingWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState success_state) +{ + uint32 events; + + if (!walprop_blocking_write(sk->conn, msg, msg_size)) + { + elog(WARNING, "Failed to send to node %s:%s in %s state: %s", + sk->host, sk->port, FormatSafekeeperState(sk->state), + walprop_error_message(sk->conn)); + ShutdownConnection(sk); + return false; + } + + sk->state = success_state; + + /* + * If the new state will be waiting for events to happen, update the event + * set to wait for those + */ + events = SafekeeperStateDesiredEvents(success_state); + if (events) + UpdateEventSet(sk, events); + + return true; +} + +/* + * Starts a write into the 'i'th safekeeper's postgres connection, moving to + * flush_state (adjusting eventset) if write still needs flushing. + * + * Returns false if sending is unfinished (requires flushing or conn failed). + * Upon failure, a warning is emitted and the connection is reset. + */ +static bool +AsyncWrite(Safekeeper *sk, void *msg, size_t msg_size, SafekeeperState flush_state) +{ + switch (walprop_async_write(sk->conn, msg, msg_size)) + { + case PG_ASYNC_WRITE_SUCCESS: + return true; + case PG_ASYNC_WRITE_TRY_FLUSH: + + /* + * We still need to call PQflush some more to finish the job; go + * to the appropriate state. Update the event set at the bottom of + * this function + */ + sk->state = flush_state; + UpdateEventSet(sk, WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE); + return false; + case PG_ASYNC_WRITE_FAIL: + elog(WARNING, "Failed to send to node %s:%s in %s state: %s", + sk->host, sk->port, FormatSafekeeperState(sk->state), + walprop_error_message(sk->conn)); + ShutdownConnection(sk); + return false; + default: + Assert(false); + return false; + } +} + +/* + * Flushes a previous call to AsyncWrite. This only needs to be called when the + * socket becomes read or write ready *after* calling AsyncWrite. + * + * If flushing successfully completes returns true, otherwise false. Event set + * is updated only if connection fails, otherwise caller should manually unset + * WL_SOCKET_WRITEABLE. + */ +static bool +AsyncFlush(Safekeeper *sk) +{ + /*--- + * PQflush returns: + * 0 if successful [we're good to move on] + * 1 if unable to send everything yet [call PQflush again] + * -1 if it failed [emit an error] + */ + switch (walprop_flush(sk->conn)) + { + case 0: + /* flush is done */ + return true; + case 1: + /* Nothing to do; try again when the socket's ready */ + return false; + case -1: + elog(WARNING, "Failed to flush write to node %s:%s in %s state: %s", + sk->host, sk->port, FormatSafekeeperState(sk->state), + walprop_error_message(sk->conn)); + ResetConnection(sk); + return false; + default: + Assert(false); + return false; + } +} + +// Check if we need to suspend inserts because of lagging replication. +static uint64 +backpressure_lag_impl(void) +{ + if (max_replication_apply_lag > 0 || max_replication_flush_lag > 0 || max_replication_write_lag > 0) + { + XLogRecPtr writePtr; + XLogRecPtr flushPtr; + XLogRecPtr applyPtr; + XLogRecPtr myFlushLsn = GetFlushRecPtr(); + + replication_feedback_get_lsns(&writePtr, &flushPtr, &applyPtr); +#define MB ((XLogRecPtr)1024*1024) + + elog(DEBUG2, "current flushLsn %X/%X ReplicationFeedback: write %X/%X flush %X/%X apply %X/%X", + LSN_FORMAT_ARGS(myFlushLsn), + LSN_FORMAT_ARGS(writePtr), + LSN_FORMAT_ARGS(flushPtr), + LSN_FORMAT_ARGS(applyPtr)); + + if ((writePtr != InvalidXLogRecPtr + && max_replication_write_lag > 0 + && myFlushLsn > writePtr + max_replication_write_lag*MB)) + { + return (myFlushLsn - writePtr - max_replication_write_lag*MB); + } + + if ((flushPtr != InvalidXLogRecPtr + && max_replication_flush_lag > 0 + && myFlushLsn > flushPtr + max_replication_flush_lag*MB)) + { + return (myFlushLsn - flushPtr - max_replication_flush_lag*MB); + } + + if ((applyPtr != InvalidXLogRecPtr + && max_replication_apply_lag > 0 + && myFlushLsn > applyPtr + max_replication_apply_lag*MB)) + { + return (myFlushLsn - applyPtr - max_replication_apply_lag*MB); + } + } + return 0; +} diff --git a/pgxn/neon/walproposer.h b/pgxn/neon/walproposer.h new file mode 100644 index 0000000000..b684d5264f --- /dev/null +++ b/pgxn/neon/walproposer.h @@ -0,0 +1,540 @@ +#ifndef __NEON_WALPROPOSER_H__ +#define __NEON_WALPROPOSER_H__ + +#include "access/xlogdefs.h" +#include "postgres.h" +#include "port.h" +#include "access/xlog_internal.h" +#include "access/transam.h" +#include "nodes/replnodes.h" +#include "utils/uuid.h" +#include "replication/walreceiver.h" + +#define SK_MAGIC 0xCafeCeefu +#define SK_PROTOCOL_VERSION 2 + +#define MAX_SAFEKEEPERS 32 +#define MAX_SEND_SIZE (XLOG_BLCKSZ * 16) /* max size of a single WAL message */ +#define XLOG_HDR_SIZE (1+8*3) /* 'w' + startPos + walEnd + timestamp */ +#define XLOG_HDR_START_POS 1 /* offset of start position in wal sender message header */ +#define XLOG_HDR_END_POS (1+8) /* offset of end position in wal sender message header */ + +/* + * In the spirit of WL_SOCKET_READABLE and others, this corresponds to no events having occured, + * because all WL_* events are given flags equal to some (1 << i), starting from i = 0 + */ +#define WL_NO_EVENTS 0 + +extern char* wal_acceptors_list; +extern int wal_acceptor_reconnect_timeout; +extern int wal_acceptor_connect_timeout; +extern bool am_wal_proposer; + +struct WalProposerConn; /* Defined in libpqwalproposer */ +typedef struct WalProposerConn WalProposerConn; + +struct WalMessage; +typedef struct WalMessage WalMessage; + +extern char *zenith_timeline_walproposer; +extern char *zenith_tenant_walproposer; + +/* Possible return values from ReadPGAsync */ +typedef enum +{ + /* The full read was successful. buf now points to the data */ + PG_ASYNC_READ_SUCCESS, + /* The read is ongoing. Wait until the connection is read-ready, then try + * again. */ + PG_ASYNC_READ_TRY_AGAIN, + /* Reading failed. Check PQerrorMessage(conn) */ + PG_ASYNC_READ_FAIL, +} PGAsyncReadResult; + +/* Possible return values from WritePGAsync */ +typedef enum +{ + /* The write fully completed */ + PG_ASYNC_WRITE_SUCCESS, + /* The write started, but you'll need to call PQflush some more times + * to finish it off. We just tried, so it's best to wait until the + * connection is read- or write-ready to try again. + * + * If it becomes read-ready, call PQconsumeInput and flush again. If it + * becomes write-ready, just call PQflush. + */ + PG_ASYNC_WRITE_TRY_FLUSH, + /* Writing failed. Check PQerrorMessage(conn) */ + PG_ASYNC_WRITE_FAIL, +} PGAsyncWriteResult; + +/* + * WAL safekeeper state, which is used to wait for some event. + * + * States are listed here in the order that they're executed. + * + * Most states, upon failure, will move back to SS_OFFLINE by calls to + * ResetConnection or ShutdownConnection. + */ +typedef enum +{ + /* + * Does not have an active connection and will stay that way until + * further notice. + * + * Moves to SS_CONNECTING_WRITE by calls to ResetConnection. + */ + SS_OFFLINE, + + /* + * Connecting states. "_READ" waits for the socket to be available for + * reading, "_WRITE" waits for writing. There's no difference in the code + * they execute when polled, but we have this distinction in order to + * recreate the event set in HackyRemoveWalProposerEvent. + * + * After the connection is made, "START_WAL_PUSH" query is sent. + */ + SS_CONNECTING_WRITE, + SS_CONNECTING_READ, + + /* + * Waiting for the result of the "START_WAL_PUSH" command. + * + * After we get a successful result, sends handshake to safekeeper. + */ + SS_WAIT_EXEC_RESULT, + + /* + * Executing the receiving half of the handshake. After receiving, moves to + * SS_VOTING. + */ + SS_HANDSHAKE_RECV, + + /* + * Waiting to participate in voting, but a quorum hasn't yet been reached. + * This is an idle state - we do not expect AdvancePollState to be called. + * + * Moved externally by execution of SS_HANDSHAKE_RECV, when we received a + * quorum of handshakes. + */ + SS_VOTING, + + /* + * Already sent voting information, waiting to receive confirmation from the + * node. After receiving, moves to SS_IDLE, if the quorum isn't reached yet. + */ + SS_WAIT_VERDICT, + + /* Need to flush ProposerElected message. */ + SS_SEND_ELECTED_FLUSH, + + /* + * Waiting for quorum to send WAL. Idle state. If the socket becomes + * read-ready, the connection has been closed. + * + * Moves to SS_ACTIVE only by call to StartStreaming. + */ + SS_IDLE, + + /* + * Active phase, when we acquired quorum and have WAL to send or feedback + * to read. + */ + SS_ACTIVE, +} SafekeeperState; + +/* Consensus logical timestamp. */ +typedef uint64 term_t; + +/* neon storage node id */ +typedef uint64 NNodeId; + +/* + * Proposer <-> Acceptor messaging. + */ + +/* Initial Proposer -> Acceptor message */ +typedef struct ProposerGreeting +{ + uint64 tag; /* message tag */ + uint32 protocolVersion; /* proposer-safekeeper protocol version */ + uint32 pgVersion; + pg_uuid_t proposerId; + uint64 systemId; /* Postgres system identifier */ + uint8 ztimelineid[16]; /* Zenith timeline id */ + uint8 ztenantid[16]; + TimeLineID timeline; + uint32 walSegSize; +} ProposerGreeting; + +typedef struct AcceptorProposerMessage +{ + uint64 tag; +} AcceptorProposerMessage; + +/* + * Acceptor -> Proposer initial response: the highest term acceptor voted for. + */ +typedef struct AcceptorGreeting +{ + AcceptorProposerMessage apm; + term_t term; + NNodeId nodeId; +} AcceptorGreeting; + +/* + * Proposer -> Acceptor vote request. + */ +typedef struct VoteRequest +{ + uint64 tag; + term_t term; + pg_uuid_t proposerId; /* for monitoring/debugging */ +} VoteRequest; + +/* Element of term switching chain. */ +typedef struct TermSwitchEntry +{ + term_t term; + XLogRecPtr lsn; +} TermSwitchEntry; + +typedef struct TermHistory +{ + uint32 n_entries; + TermSwitchEntry *entries; +} TermHistory; + +/* Vote itself, sent from safekeeper to proposer */ +typedef struct VoteResponse { + AcceptorProposerMessage apm; + term_t term; + uint64 voteGiven; + /* + * Safekeeper flush_lsn (end of WAL) + history of term switches allow + * proposer to choose the most advanced one. + */ + XLogRecPtr flushLsn; + XLogRecPtr truncateLsn; /* minimal LSN which may be needed for recovery of some safekeeper */ + TermHistory termHistory; + XLogRecPtr timelineStartLsn; /* timeline globally starts at this LSN */ +} VoteResponse; + +/* + * Proposer -> Acceptor message announcing proposer is elected and communicating + * epoch history to it. + */ +typedef struct ProposerElected +{ + uint64 tag; + term_t term; + /* proposer will send since this point */ + XLogRecPtr startStreamingAt; + /* history of term switches up to this proposer */ + TermHistory *termHistory; + /* timeline globally starts at this LSN */ + XLogRecPtr timelineStartLsn; +} ProposerElected; + +/* + * Header of request with WAL message sent from proposer to safekeeper. + */ +typedef struct AppendRequestHeader +{ + uint64 tag; + term_t term; /* term of the proposer */ + /* + * LSN since which current proposer appends WAL (begin_lsn of its first + * record); determines epoch switch point. + */ + XLogRecPtr epochStartLsn; + XLogRecPtr beginLsn; /* start position of message in WAL */ + XLogRecPtr endLsn; /* end position of message in WAL */ + XLogRecPtr commitLsn; /* LSN committed by quorum of safekeepers */ + /* + * minimal LSN which may be needed for recovery of some safekeeper (end lsn + * + 1 of last chunk streamed to everyone) + */ + XLogRecPtr truncateLsn; + pg_uuid_t proposerId; /* for monitoring/debugging */ +} AppendRequestHeader; + +/* + * Hot standby feedback received from replica + */ +typedef struct HotStandbyFeedback +{ + TimestampTz ts; + FullTransactionId xmin; + FullTransactionId catalog_xmin; +} HotStandbyFeedback; + + +typedef struct ReplicationFeedback +{ + // current size of the timeline on pageserver + uint64 currentClusterSize; + // standby_status_update fields that safekeeper received from pageserver + XLogRecPtr ps_writelsn; + XLogRecPtr ps_flushlsn; + XLogRecPtr ps_applylsn; + TimestampTz ps_replytime; +} ReplicationFeedback; + + +typedef struct WalproposerShmemState +{ + slock_t mutex; + ReplicationFeedback feedback; + term_t mineLastElectedTerm; +} WalproposerShmemState; + +/* + * Report safekeeper state to proposer + */ +typedef struct AppendResponse +{ + AcceptorProposerMessage apm; + /* + * Current term of the safekeeper; if it is higher than proposer's, the + * compute is out of date. + */ + term_t term; + // TODO: add comment + XLogRecPtr flushLsn; + // Safekeeper reports back his awareness about which WAL is committed, as + // this is a criterion for walproposer --sync mode exit + XLogRecPtr commitLsn; + HotStandbyFeedback hs; + // Feedback recieved from pageserver includes standby_status_update fields + // and custom zenith feedback. + // This part of the message is extensible. + ReplicationFeedback rf; +} AppendResponse; + +// ReplicationFeedback is extensible part of the message that is parsed separately +// Other fields are fixed part +#define APPENDRESPONSE_FIXEDPART_SIZE offsetof(AppendResponse, rf) + + +/* + * Descriptor of safekeeper + */ +typedef struct Safekeeper +{ + char const* host; + char const* port; + char conninfo[MAXCONNINFO]; /* connection info for connecting/reconnecting */ + + /* + * postgres protocol connection to the WAL acceptor + * + * Equals NULL only when state = SS_OFFLINE. Nonblocking is set once we + * reach SS_ACTIVE; not before. + */ + WalProposerConn* conn; + /* + * Temporary buffer for the message being sent to the safekeeper. + */ + StringInfoData outbuf; + /* + * WAL reader, allocated for each safekeeper. + */ + XLogReaderState* xlogreader; + + /* + * Streaming will start here; must be record boundary. + */ + XLogRecPtr startStreamingAt; + + bool flushWrite; /* set to true if we need to call AsyncFlush, to flush pending messages */ + XLogRecPtr streamingAt; /* current streaming position */ + AppendRequestHeader appendRequest; /* request for sending to safekeeper */ + + int eventPos; /* position in wait event set. Equal to -1 if no event */ + SafekeeperState state; /* safekeeper state machine state */ + TimestampTz startedConnAt; /* when connection attempt started */ + AcceptorGreeting greetResponse; /* acceptor greeting */ + VoteResponse voteResponse; /* the vote */ + AppendResponse appendResponse; /* feedback for master */ +} Safekeeper; + + +extern PGDLLIMPORT void WalProposerMain(Datum main_arg); +void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos); +void WalProposerPoll(void); +void WalProposerRegister(void); +void ParseReplicationFeedbackMessage(StringInfo reply_message, + ReplicationFeedback *rf); +extern void StartProposerReplication(StartReplicationCmd *cmd); + +Size WalproposerShmemSize(void); +bool WalproposerShmemInit(void); +void replication_feedback_set(ReplicationFeedback *rf); +void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn); + +/* libpqwalproposer hooks & helper type */ + +/* Re-exported PostgresPollingStatusType */ +typedef enum +{ + WP_CONN_POLLING_FAILED = 0, + WP_CONN_POLLING_READING, + WP_CONN_POLLING_WRITING, + WP_CONN_POLLING_OK, + /* + * 'libpq-fe.h' still has PGRES_POLLING_ACTIVE, but says it's unused. + * We've removed it here to avoid clutter. + */ +} WalProposerConnectPollStatusType; + +/* Re-exported and modified ExecStatusType */ +typedef enum +{ + /* We received a single CopyBoth result */ + WP_EXEC_SUCCESS_COPYBOTH, + /* Any success result other than a single CopyBoth was received. The specifics of the result + * were already logged, but it may be useful to provide an error message indicating which + * safekeeper messed up. + * + * Do not expect PQerrorMessage to be appropriately set. */ + WP_EXEC_UNEXPECTED_SUCCESS, + /* No result available at this time. Wait until read-ready, then call again. Internally, this is + * returned when PQisBusy indicates that PQgetResult would block. */ + WP_EXEC_NEEDS_INPUT, + /* Catch-all failure. Check PQerrorMessage. */ + WP_EXEC_FAILED, +} WalProposerExecStatusType; + +/* Re-exported ConnStatusType */ +typedef enum +{ + WP_CONNECTION_OK, + WP_CONNECTION_BAD, + + /* + * The original ConnStatusType has many more tags, but requests that + * they not be relied upon (except for displaying to the user). We + * don't need that extra functionality, so we collect them into a + * single tag here. + */ + WP_CONNECTION_IN_PROGRESS, +} WalProposerConnStatusType; + +/* Re-exported PQerrorMessage */ +typedef char* (*walprop_error_message_fn) (WalProposerConn* conn); + +/* Re-exported PQstatus */ +typedef WalProposerConnStatusType (*walprop_status_fn) (WalProposerConn* conn); + +/* Re-exported PQconnectStart */ +typedef WalProposerConn* (*walprop_connect_start_fn) (char* conninfo); + +/* Re-exported PQconectPoll */ +typedef WalProposerConnectPollStatusType (*walprop_connect_poll_fn) (WalProposerConn* conn); + +/* Blocking wrapper around PQsendQuery */ +typedef bool (*walprop_send_query_fn) (WalProposerConn* conn, char* query); + +/* Wrapper around PQconsumeInput + PQisBusy + PQgetResult */ +typedef WalProposerExecStatusType (*walprop_get_query_result_fn) (WalProposerConn* conn); + +/* Re-exported PQsocket */ +typedef pgsocket (*walprop_socket_fn) (WalProposerConn* conn); + +/* Wrapper around PQconsumeInput (if socket's read-ready) + PQflush */ +typedef int (*walprop_flush_fn) (WalProposerConn* conn); + +/* Re-exported PQfinish */ +typedef void (*walprop_finish_fn) (WalProposerConn* conn); + +/* + * Ergonomic wrapper around PGgetCopyData + * + * Reads a CopyData block from a safekeeper, setting *amount to the number + * of bytes returned. + * + * This function is allowed to assume certain properties specific to the + * protocol with the safekeepers, so it should not be used as-is for any + * other purpose. + * + * Note: If possible, using is generally preferred, because it + * performs a bit of extra checking work that's always required and is normally + * somewhat verbose. + */ +typedef PGAsyncReadResult (*walprop_async_read_fn) (WalProposerConn* conn, + char** buf, + int* amount); + +/* + * Ergonomic wrapper around PQputCopyData + PQflush + * + * Starts to write a CopyData block to a safekeeper. + * + * For information on the meaning of return codes, refer to PGAsyncWriteResult. + */ +typedef PGAsyncWriteResult (*walprop_async_write_fn) (WalProposerConn* conn, + void const* buf, + size_t size); + +/* + * Blocking equivalent to walprop_async_write_fn + * + * Returns 'true' if successful, 'false' on failure. + */ +typedef bool (*walprop_blocking_write_fn) (WalProposerConn* conn, void const* buf, size_t size); + +/* All libpqwalproposer exported functions collected together. */ +typedef struct WalProposerFunctionsType +{ + walprop_error_message_fn walprop_error_message; + walprop_status_fn walprop_status; + walprop_connect_start_fn walprop_connect_start; + walprop_connect_poll_fn walprop_connect_poll; + walprop_send_query_fn walprop_send_query; + walprop_get_query_result_fn walprop_get_query_result; + walprop_socket_fn walprop_socket; + walprop_flush_fn walprop_flush; + walprop_finish_fn walprop_finish; + walprop_async_read_fn walprop_async_read; + walprop_async_write_fn walprop_async_write; + walprop_blocking_write_fn walprop_blocking_write; +} WalProposerFunctionsType; + +/* Allow the above functions to be "called" with normal syntax */ +#define walprop_error_message(conn) \ + WalProposerFunctions->walprop_error_message(conn) +#define walprop_status(conn) \ + WalProposerFunctions->walprop_status(conn) +#define walprop_connect_start(conninfo) \ + WalProposerFunctions->walprop_connect_start(conninfo) +#define walprop_connect_poll(conn) \ + WalProposerFunctions->walprop_connect_poll(conn) +#define walprop_send_query(conn, query) \ + WalProposerFunctions->walprop_send_query(conn, query) +#define walprop_get_query_result(conn) \ + WalProposerFunctions->walprop_get_query_result(conn) +#define walprop_set_nonblocking(conn, arg) \ + WalProposerFunctions->walprop_set_nonblocking(conn, arg) +#define walprop_socket(conn) \ + WalProposerFunctions->walprop_socket(conn) +#define walprop_flush(conn) \ + WalProposerFunctions->walprop_flush(conn) +#define walprop_finish(conn) \ + WalProposerFunctions->walprop_finish(conn) +#define walprop_async_read(conn, buf, amount) \ + WalProposerFunctions->walprop_async_read(conn, buf, amount) +#define walprop_async_write(conn, buf, size) \ + WalProposerFunctions->walprop_async_write(conn, buf, size) +#define walprop_blocking_write(conn, buf, size) \ + WalProposerFunctions->walprop_blocking_write(conn, buf, size) + +/* + * The runtime location of the libpqwalproposer functions. + * + * This pointer is set by the initializer in libpqwalproposer, so that we + * can use it later. + */ +extern PGDLLIMPORT WalProposerFunctionsType *WalProposerFunctions; + +#endif /* __NEON_WALPROPOSER_H__ */ diff --git a/pgxn/neon/walproposer_utils.c b/pgxn/neon/walproposer_utils.c new file mode 100644 index 0000000000..7b96fd580c --- /dev/null +++ b/pgxn/neon/walproposer_utils.c @@ -0,0 +1,1110 @@ +#include "postgres.h" + +#include "access/timeline.h" +#include "access/xlogutils.h" +#include "common/logging.h" +#include "common/ip.h" +#include "funcapi.h" +#include "libpq/libpq.h" +#include "libpq/pqformat.h" +#include "miscadmin.h" +#include "postmaster/interrupt.h" +#include "replication/slot.h" +#include "walproposer_utils.h" +#include "replication/walsender_private.h" + +#include "storage/ipc.h" +#include "utils/builtins.h" +#include "utils/ps_status.h" + +#include "libpq-fe.h" +#include +#include + +/* + * These variables are used similarly to openLogFile/SegNo, + * but for walproposer to write the XLOG during recovery. walpropFileTLI is the TimeLineID + * corresponding the filename of walpropFile. + */ +static int walpropFile = -1; +static TimeLineID walpropFileTLI = 0; +static XLogSegNo walpropSegNo = 0; + +/* START cloned file-local variables and functions from walsender.c */ + +/* + * xlogreader used for replication. Note that a WAL sender doing physical + * replication does not need xlogreader to read WAL, but it needs one to + * keep a state of its work. + */ +static XLogReaderState *xlogreader = NULL; + +/* + * These variables keep track of the state of the timeline we're currently + * sending. sendTimeLine identifies the timeline. If sendTimeLineIsHistoric, + * the timeline is not the latest timeline on this server, and the server's + * history forked off from that timeline at sendTimeLineValidUpto. + */ +static TimeLineID sendTimeLine = 0; +static TimeLineID sendTimeLineNextTLI = 0; +static bool sendTimeLineIsHistoric = false; +static XLogRecPtr sendTimeLineValidUpto = InvalidXLogRecPtr; + +/* + * Timestamp of last ProcessRepliesIfAny() that saw a reply from the + * standby. Set to 0 if wal_sender_timeout doesn't need to be active. + */ +static TimestampTz last_reply_timestamp = 0; + +/* Have we sent a heartbeat message asking for reply, since last reply? */ +static bool waiting_for_ping_response = false; + +static bool streamingDoneSending; +static bool streamingDoneReceiving; + +/* Are we there yet? */ +static bool WalSndCaughtUp = false; + +/* Flags set by signal handlers for later service in main loop */ +static volatile sig_atomic_t got_STOPPING = false; + +/* + * How far have we sent WAL already? This is also advertised in + * MyWalSnd->sentPtr. (Actually, this is the next WAL location to send.) + */ +static XLogRecPtr sentPtr = InvalidXLogRecPtr; + +/* + * This is set while we are streaming. When not set + * PROCSIG_WALSND_INIT_STOPPING signal will be handled like SIGTERM. When set, + * the main loop is responsible for checking got_STOPPING and terminating when + * it's set (after streaming any remaining WAL). + */ +static volatile sig_atomic_t replication_active = false; + +typedef void (*WalSndSendDataCallback) (void); +static void WalSndLoop(WalSndSendDataCallback send_data); +static void XLogSendPhysical(void); +static XLogRecPtr GetStandbyFlushRecPtr(void); + +static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo, + TimeLineID *tli_p); + +/* END cloned file-level variables and functions from walsender.c */ + +int +CompareLsn(const void *a, const void *b) +{ + XLogRecPtr lsn1 = *((const XLogRecPtr *) a); + XLogRecPtr lsn2 = *((const XLogRecPtr *) b); + + if (lsn1 < lsn2) + return -1; + else if (lsn1 == lsn2) + return 0; + else + return 1; +} + +/* Returns a human-readable string corresonding to the SafekeeperState + * + * The string should not be freed. + * + * The strings are intended to be used as a prefix to "state", e.g.: + * + * elog(LOG, "currently in %s state", FormatSafekeeperState(sk->state)); + * + * If this sort of phrasing doesn't fit the message, instead use something like: + * + * elog(LOG, "currently in state [%s]", FormatSafekeeperState(sk->state)); + */ +char* +FormatSafekeeperState(SafekeeperState state) +{ + char* return_val = NULL; + + switch (state) + { + case SS_OFFLINE: + return_val = "offline"; + break; + case SS_CONNECTING_READ: + case SS_CONNECTING_WRITE: + return_val = "connecting"; + break; + case SS_WAIT_EXEC_RESULT: + return_val = "receiving query result"; + break; + case SS_HANDSHAKE_RECV: + return_val = "handshake (receiving)"; + break; + case SS_VOTING: + return_val = "voting"; + break; + case SS_WAIT_VERDICT: + return_val = "wait-for-verdict"; + break; + case SS_SEND_ELECTED_FLUSH: + return_val = "send-announcement-flush"; + break; + case SS_IDLE: + return_val = "idle"; + break; + case SS_ACTIVE: + return_val = "active"; + break; + } + + Assert(return_val != NULL); + + return return_val; +} + +/* Asserts that the provided events are expected for given safekeeper's state */ +void +AssertEventsOkForState(uint32 events, Safekeeper* sk) +{ + uint32 expected = SafekeeperStateDesiredEvents(sk->state); + + /* The events are in-line with what we're expecting, under two conditions: + * (a) if we aren't expecting anything, `events` has no read- or + * write-ready component. + * (b) if we are expecting something, there's overlap + * (i.e. `events & expected != 0`) + */ + bool events_ok_for_state; /* long name so the `Assert` is more clear later */ + + if (expected == WL_NO_EVENTS) + events_ok_for_state = ((events & (WL_SOCKET_READABLE|WL_SOCKET_WRITEABLE)) == 0); + else + events_ok_for_state = ((events & expected) != 0); + + if (!events_ok_for_state) + { + /* To give a descriptive message in the case of failure, we use elog and + * then an assertion that's guaranteed to fail. */ + elog(WARNING, "events %s mismatched for safekeeper %s:%s in state [%s]", + FormatEvents(events), sk->host, sk->port, FormatSafekeeperState(sk->state)); + Assert(events_ok_for_state); + } +} + +/* Returns the set of events a safekeeper in this state should be waiting on + * + * This will return WL_NO_EVENTS (= 0) for some events. */ +uint32 +SafekeeperStateDesiredEvents(SafekeeperState state) +{ + uint32 result = WL_NO_EVENTS; + + /* If the state doesn't have a modifier, we can check the base state */ + switch (state) + { + /* Connecting states say what they want in the name */ + case SS_CONNECTING_READ: + result = WL_SOCKET_READABLE; + break; + case SS_CONNECTING_WRITE: + result = WL_SOCKET_WRITEABLE; + break; + + /* Reading states need the socket to be read-ready to continue */ + case SS_WAIT_EXEC_RESULT: + case SS_HANDSHAKE_RECV: + case SS_WAIT_VERDICT: + result = WL_SOCKET_READABLE; + break; + + /* Idle states use read-readiness as a sign that the connection has been + * disconnected. */ + case SS_VOTING: + case SS_IDLE: + result = WL_SOCKET_READABLE; + break; + + /* + * Flush states require write-ready for flushing. + * Active state does both reading and writing. + * + * TODO: SS_ACTIVE sometimes doesn't need to be write-ready. We should + * check sk->flushWrite here to set WL_SOCKET_WRITEABLE. + */ + case SS_SEND_ELECTED_FLUSH: + case SS_ACTIVE: + result = WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE; + break; + + /* The offline state expects no events. */ + case SS_OFFLINE: + result = WL_NO_EVENTS; + break; + + default: + Assert(false); + break; + } + + return result; +} + +/* Returns a human-readable string corresponding to the event set + * + * If the events do not correspond to something set as the `events` field of a `WaitEvent`, the + * returned string may be meaingless. + * + * The string should not be freed. It should also not be expected to remain the same between + * function calls. */ +char* +FormatEvents(uint32 events) +{ + static char return_str[8]; + + /* Helper variable to check if there's extra bits */ + uint32 all_flags = WL_LATCH_SET + | WL_SOCKET_READABLE + | WL_SOCKET_WRITEABLE + | WL_TIMEOUT + | WL_POSTMASTER_DEATH + | WL_EXIT_ON_PM_DEATH + | WL_SOCKET_CONNECTED; + + /* The formatting here isn't supposed to be *particularly* useful -- it's just to give an + * sense of what events have been triggered without needing to remember your powers of two. */ + + return_str[0] = (events & WL_LATCH_SET ) ? 'L' : '_'; + return_str[1] = (events & WL_SOCKET_READABLE ) ? 'R' : '_'; + return_str[2] = (events & WL_SOCKET_WRITEABLE) ? 'W' : '_'; + return_str[3] = (events & WL_TIMEOUT ) ? 'T' : '_'; + return_str[4] = (events & WL_POSTMASTER_DEATH) ? 'D' : '_'; + return_str[5] = (events & WL_EXIT_ON_PM_DEATH) ? 'E' : '_'; + return_str[5] = (events & WL_SOCKET_CONNECTED) ? 'C' : '_'; + + if (events & (~all_flags)) + { + elog(WARNING, "Event formatting found unexpected component %d", + events & (~all_flags)); + return_str[6] = '*'; + return_str[7] = '\0'; + } + else + return_str[6] = '\0'; + + return (char *) &return_str; +} + +/* + * Convert a character which represents a hexadecimal digit to an integer. + * + * Returns -1 if the character is not a hexadecimal digit. + */ +static int +HexDecodeChar(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + + return -1; +} + +/* + * Decode a hex string into a byte string, 2 hex chars per byte. + * + * Returns false if invalid characters are encountered; otherwise true. + */ +bool +HexDecodeString(uint8 *result, char *input, int nbytes) +{ + int i; + + for (i = 0; i < nbytes; ++i) + { + int n1 = HexDecodeChar(input[i * 2]); + int n2 = HexDecodeChar(input[i * 2 + 1]); + + if (n1 < 0 || n2 < 0) + return false; + result[i] = n1 * 16 + n2; + } + + return true; +} + +/* -------------------------------- + * pq_getmsgint32_le - get a binary 4-byte int from a message buffer in native (LE) order + * -------------------------------- + */ +uint32 +pq_getmsgint32_le(StringInfo msg) +{ + uint32 n32; + + pq_copymsgbytes(msg, (char *) &n32, sizeof(n32)); + + return n32; +} + +/* -------------------------------- + * pq_getmsgint64 - get a binary 8-byte int from a message buffer in native (LE) order + * -------------------------------- + */ +uint64 +pq_getmsgint64_le(StringInfo msg) +{ + uint64 n64; + + pq_copymsgbytes(msg, (char *) &n64, sizeof(n64)); + + return n64; +} + +/* append a binary [u]int32 to a StringInfo buffer in native (LE) order */ +void +pq_sendint32_le(StringInfo buf, uint32 i) +{ + enlargeStringInfo(buf, sizeof(uint32)); + memcpy(buf->data + buf->len, &i, sizeof(uint32)); + buf->len += sizeof(uint32); +} + +/* append a binary [u]int64 to a StringInfo buffer in native (LE) order */ +void +pq_sendint64_le(StringInfo buf, uint64 i) +{ + enlargeStringInfo(buf, sizeof(uint64)); + memcpy(buf->data + buf->len, &i, sizeof(uint64)); + buf->len += sizeof(uint64); +} + +/* + * Write XLOG data to disk. + */ +void +XLogWalPropWrite(char *buf, Size nbytes, XLogRecPtr recptr) +{ + int startoff; + int byteswritten; + + while (nbytes > 0) + { + int segbytes; + + /* Close the current segment if it's completed */ + if (walpropFile >= 0 && !XLByteInSeg(recptr, walpropSegNo, wal_segment_size)) + XLogWalPropClose(recptr); + + if (walpropFile < 0) + { + bool use_existent = true; + + /* Create/use new log file */ + XLByteToSeg(recptr, walpropSegNo, wal_segment_size); + walpropFile = XLogFileInit(walpropSegNo, &use_existent, false); + walpropFileTLI = ThisTimeLineID; + } + + /* Calculate the start offset of the received logs */ + startoff = XLogSegmentOffset(recptr, wal_segment_size); + + if (startoff + nbytes > wal_segment_size) + segbytes = wal_segment_size - startoff; + else + segbytes = nbytes; + + /* OK to write the logs */ + errno = 0; + + byteswritten = pg_pwrite(walpropFile, buf, segbytes, (off_t) startoff); + if (byteswritten <= 0) + { + char xlogfname[MAXFNAMELEN]; + int save_errno; + + /* if write didn't set errno, assume no disk space */ + if (errno == 0) + errno = ENOSPC; + + save_errno = errno; + XLogFileName(xlogfname, walpropFileTLI, walpropSegNo, wal_segment_size); + errno = save_errno; + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not write to log segment %s " + "at offset %u, length %lu: %m", + xlogfname, startoff, (unsigned long) segbytes))); + } + + /* Update state for write */ + recptr += byteswritten; + + nbytes -= byteswritten; + buf += byteswritten; + } + + /* + * Close the current segment if it's fully written up in the last cycle of + * the loop. + */ + if (walpropFile >= 0 && !XLByteInSeg(recptr, walpropSegNo, wal_segment_size)) + { + XLogWalPropClose(recptr); + } +} + +/* + * Close the current segment. + */ +void +XLogWalPropClose(XLogRecPtr recptr) +{ + Assert(walpropFile >= 0 && !XLByteInSeg(recptr, walpropSegNo, wal_segment_size)); + + if (close(walpropFile) != 0) + { + char xlogfname[MAXFNAMELEN]; + XLogFileName(xlogfname, walpropFileTLI, walpropSegNo, wal_segment_size); + + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not close log segment %s: %m", + xlogfname))); + } + + walpropFile = -1; +} + +/* START of cloned functions from walsender.c */ + +/* + * Handle START_REPLICATION command. + * + * At the moment, this never returns, but an ereport(ERROR) will take us back + * to the main loop. + */ +void +StartProposerReplication(StartReplicationCmd *cmd) +{ + XLogRecPtr FlushPtr; + + if (ThisTimeLineID == 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("IDENTIFY_SYSTEM has not been run before START_REPLICATION"))); + + /* create xlogreader for physical replication */ + xlogreader = + XLogReaderAllocate(wal_segment_size, NULL, + XL_ROUTINE(.segment_open = WalSndSegmentOpen, + .segment_close = wal_segment_close), + NULL); + + if (!xlogreader) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* + * We assume here that we're logging enough information in the WAL for + * log-shipping, since this is checked in PostmasterMain(). + * + * NOTE: wal_level can only change at shutdown, so in most cases it is + * difficult for there to be WAL data that we can still see that was + * written at wal_level='minimal'. + */ + + if (cmd->slotname) + { + ReplicationSlotAcquire(cmd->slotname, true); + if (SlotIsLogical(MyReplicationSlot)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot use a logical replication slot for physical replication"))); + + /* + * We don't need to verify the slot's restart_lsn here; instead we + * rely on the caller requesting the starting point to use. If the + * WAL segment doesn't exist, we'll fail later. + */ + } + + /* + * Select the timeline. If it was given explicitly by the client, use + * that. Otherwise use the timeline of the last replayed record, which is + * kept in ThisTimeLineID. + * + * Neon doesn't currently use PG Timelines, but it may in the future, so + * we keep this code around to lighten the load for when we need it. + */ + if (am_cascading_walsender) + { + /* this also updates ThisTimeLineID */ + FlushPtr = GetStandbyFlushRecPtr(); + } + else + FlushPtr = GetFlushRecPtr(); + + if (cmd->timeline != 0) + { + XLogRecPtr switchpoint; + + sendTimeLine = cmd->timeline; + if (sendTimeLine == ThisTimeLineID) + { + sendTimeLineIsHistoric = false; + sendTimeLineValidUpto = InvalidXLogRecPtr; + } + else + { + List *timeLineHistory; + + sendTimeLineIsHistoric = true; + + /* + * Check that the timeline the client requested exists, and the + * requested start location is on that timeline. + */ + timeLineHistory = readTimeLineHistory(ThisTimeLineID); + switchpoint = tliSwitchPoint(cmd->timeline, timeLineHistory, + &sendTimeLineNextTLI); + list_free_deep(timeLineHistory); + + /* + * Found the requested timeline in the history. Check that + * requested startpoint is on that timeline in our history. + * + * This is quite loose on purpose. We only check that we didn't + * fork off the requested timeline before the switchpoint. We + * don't check that we switched *to* it before the requested + * starting point. This is because the client can legitimately + * request to start replication from the beginning of the WAL + * segment that contains switchpoint, but on the new timeline, so + * that it doesn't end up with a partial segment. If you ask for + * too old a starting point, you'll get an error later when we + * fail to find the requested WAL segment in pg_wal. + * + * XXX: we could be more strict here and only allow a startpoint + * that's older than the switchpoint, if it's still in the same + * WAL segment. + */ + if (!XLogRecPtrIsInvalid(switchpoint) && + switchpoint < cmd->startpoint) + { + ereport(ERROR, + (errmsg("requested starting point %X/%X on timeline %u is not in this server's history", + LSN_FORMAT_ARGS(cmd->startpoint), + cmd->timeline), + errdetail("This server's history forked from timeline %u at %X/%X.", + cmd->timeline, + LSN_FORMAT_ARGS(switchpoint)))); + } + sendTimeLineValidUpto = switchpoint; + } + } + else + { + sendTimeLine = ThisTimeLineID; + sendTimeLineValidUpto = InvalidXLogRecPtr; + sendTimeLineIsHistoric = false; + } + + streamingDoneSending = streamingDoneReceiving = false; + + /* If there is nothing to stream, don't even enter COPY mode */ + if (!sendTimeLineIsHistoric || cmd->startpoint < sendTimeLineValidUpto) + { + /* + * When we first start replication the standby will be behind the + * primary. For some applications, for example synchronous + * replication, it is important to have a clear state for this initial + * catchup mode, so we can trigger actions when we change streaming + * state later. We may stay in this state for a long time, which is + * exactly why we want to be able to monitor whether or not we are + * still here. + */ + WalSndSetState(WALSNDSTATE_CATCHUP); + + /* + * Don't allow a request to stream from a future point in WAL that + * hasn't been flushed to disk in this server yet. + */ + if (FlushPtr < cmd->startpoint) + { + ereport(ERROR, + (errmsg("requested starting point %X/%X is ahead of the WAL flush position of this server %X/%X", + LSN_FORMAT_ARGS(cmd->startpoint), + LSN_FORMAT_ARGS(FlushPtr)))); + } + + /* Start streaming from the requested point */ + sentPtr = cmd->startpoint; + + /* Initialize shared memory status, too */ + SpinLockAcquire(&MyWalSnd->mutex); + MyWalSnd->sentPtr = sentPtr; + SpinLockRelease(&MyWalSnd->mutex); + + SyncRepInitConfig(); + + /* Main loop of walsender */ + replication_active = true; + + WalSndLoop(XLogSendPhysical); + + replication_active = false; + if (got_STOPPING) + proc_exit(0); + WalSndSetState(WALSNDSTATE_STARTUP); + + Assert(streamingDoneSending && streamingDoneReceiving); + } + + if (cmd->slotname) + ReplicationSlotRelease(); + + /* + * Copy is finished now. Send a single-row result set indicating the next + * timeline. + */ + if (sendTimeLineIsHistoric) + { + char startpos_str[8 + 1 + 8 + 1]; + DestReceiver *dest; + TupOutputState *tstate; + TupleDesc tupdesc; + Datum values[2]; + bool nulls[2]; + + snprintf(startpos_str, sizeof(startpos_str), "%X/%X", + LSN_FORMAT_ARGS(sendTimeLineValidUpto)); + + dest = CreateDestReceiver(DestRemoteSimple); + MemSet(nulls, false, sizeof(nulls)); + + /* + * Need a tuple descriptor representing two columns. int8 may seem + * like a surprising data type for this, but in theory int4 would not + * be wide enough for this, as TimeLineID is unsigned. + */ + tupdesc = CreateTemplateTupleDesc(2); + TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "next_tli", + INT8OID, -1, 0); + TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "next_tli_startpos", + TEXTOID, -1, 0); + + /* prepare for projection of tuple */ + tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual); + + values[0] = Int64GetDatum((int64) sendTimeLineNextTLI); + values[1] = CStringGetTextDatum(startpos_str); + + /* send it to dest */ + do_tup_output(tstate, values, nulls); + + end_tup_output(tstate); + } + + /* Send CommandComplete message */ + EndReplicationCommand("START_STREAMING"); +} + +/* + * Returns the latest point in WAL that has been safely flushed to disk, and + * can be sent to the standby. This should only be called when in recovery, + * ie. we're streaming to a cascaded standby. + * + * As a side-effect, ThisTimeLineID is updated to the TLI of the last + * replayed WAL record. + */ +static XLogRecPtr +GetStandbyFlushRecPtr(void) +{ + XLogRecPtr replayPtr; + TimeLineID replayTLI; + XLogRecPtr receivePtr; + TimeLineID receiveTLI; + XLogRecPtr result; + + /* + * We can safely send what's already been replayed. Also, if walreceiver + * is streaming WAL from the same timeline, we can send anything that it + * has streamed, but hasn't been replayed yet. + */ + + receivePtr = GetWalRcvFlushRecPtr(NULL, &receiveTLI); + replayPtr = GetXLogReplayRecPtr(&replayTLI); + + ThisTimeLineID = replayTLI; + + result = replayPtr; + if (receiveTLI == ThisTimeLineID && receivePtr > replayPtr) + result = receivePtr; + + return result; +} + +/* XLogReaderRoutine->segment_open callback */ +static void +WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo, + TimeLineID *tli_p) +{ + char path[MAXPGPATH]; + + /*------- + * When reading from a historic timeline, and there is a timeline switch + * within this segment, read from the WAL segment belonging to the new + * timeline. + * + * For example, imagine that this server is currently on timeline 5, and + * we're streaming timeline 4. The switch from timeline 4 to 5 happened at + * 0/13002088. In pg_wal, we have these files: + * + * ... + * 000000040000000000000012 + * 000000040000000000000013 + * 000000050000000000000013 + * 000000050000000000000014 + * ... + * + * In this situation, when requested to send the WAL from segment 0x13, on + * timeline 4, we read the WAL from file 000000050000000000000013. Archive + * recovery prefers files from newer timelines, so if the segment was + * restored from the archive on this server, the file belonging to the old + * timeline, 000000040000000000000013, might not exist. Their contents are + * equal up to the switchpoint, because at a timeline switch, the used + * portion of the old segment is copied to the new file. ------- + */ + *tli_p = sendTimeLine; + if (sendTimeLineIsHistoric) + { + XLogSegNo endSegNo; + + XLByteToSeg(sendTimeLineValidUpto, endSegNo, state->segcxt.ws_segsize); + if (nextSegNo == endSegNo) + *tli_p = sendTimeLineNextTLI; + } + + XLogFilePath(path, *tli_p, nextSegNo, state->segcxt.ws_segsize); + state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY); + if (state->seg.ws_file >= 0) + return; + + /* + * If the file is not found, assume it's because the standby asked for a + * too old WAL segment that has already been removed or recycled. + */ + if (errno == ENOENT) + { + char xlogfname[MAXFNAMELEN]; + int save_errno = errno; + + XLogFileName(xlogfname, *tli_p, nextSegNo, wal_segment_size); + errno = save_errno; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("requested WAL segment %s has already been removed", + xlogfname))); + } + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + path))); +} + + +/* Main loop of walsender process that streams the WAL over Copy messages. */ +static void +WalSndLoop(WalSndSendDataCallback send_data) +{ + /* + * Initialize the last reply timestamp. That enables timeout processing + * from hereon. + */ + last_reply_timestamp = GetCurrentTimestamp(); + waiting_for_ping_response = false; + + /* + * Loop until we reach the end of this timeline or the client requests to + * stop streaming. + */ + for (;;) + { + /* Clear any already-pending wakeups */ + ResetLatch(MyLatch); + + CHECK_FOR_INTERRUPTS(); + + /* Process any requests or signals received recently */ + if (ConfigReloadPending) + { + ConfigReloadPending = false; + ProcessConfigFile(PGC_SIGHUP); + SyncRepInitConfig(); + } + + /* always true */ + if (am_wal_proposer) + { + send_data(); + if (WalSndCaughtUp) + { + if (MyWalSnd->state == WALSNDSTATE_CATCHUP) + WalSndSetState(WALSNDSTATE_STREAMING); + WalProposerPoll(); + WalSndCaughtUp = false; + } + continue; + } + } +} + +/* + * Send out the WAL in its normal physical/stored form. + * + * Read up to MAX_SEND_SIZE bytes of WAL that's been flushed to disk, + * but not yet sent to the client, and buffer it in the libpq output + * buffer. + * + * If there is no unsent WAL remaining, WalSndCaughtUp is set to true, + * otherwise WalSndCaughtUp is set to false. + */ +static void +XLogSendPhysical(void) +{ + XLogRecPtr SendRqstPtr; + XLogRecPtr startptr; + XLogRecPtr endptr; + Size nbytes PG_USED_FOR_ASSERTS_ONLY; + + /* If requested switch the WAL sender to the stopping state. */ + if (got_STOPPING) + WalSndSetState(WALSNDSTATE_STOPPING); + + if (streamingDoneSending) + { + WalSndCaughtUp = true; + return; + } + + /* Figure out how far we can safely send the WAL. */ + if (sendTimeLineIsHistoric) + { + /* + * Streaming an old timeline that's in this server's history, but is + * not the one we're currently inserting or replaying. It can be + * streamed up to the point where we switched off that timeline. + */ + SendRqstPtr = sendTimeLineValidUpto; + } + else if (am_cascading_walsender) + { + /* + * Streaming the latest timeline on a standby. + * + * Attempt to send all WAL that has already been replayed, so that we + * know it's valid. If we're receiving WAL through streaming + * replication, it's also OK to send any WAL that has been received + * but not replayed. + * + * The timeline we're recovering from can change, or we can be + * promoted. In either case, the current timeline becomes historic. We + * need to detect that so that we don't try to stream past the point + * where we switched to another timeline. We check for promotion or + * timeline switch after calculating FlushPtr, to avoid a race + * condition: if the timeline becomes historic just after we checked + * that it was still current, it's still be OK to stream it up to the + * FlushPtr that was calculated before it became historic. + */ + bool becameHistoric = false; + + SendRqstPtr = GetStandbyFlushRecPtr(); + + if (!RecoveryInProgress()) + { + /* + * We have been promoted. RecoveryInProgress() updated + * ThisTimeLineID to the new current timeline. + */ + am_cascading_walsender = false; + becameHistoric = true; + } + else + { + /* + * Still a cascading standby. But is the timeline we're sending + * still the one recovery is recovering from? ThisTimeLineID was + * updated by the GetStandbyFlushRecPtr() call above. + */ + if (sendTimeLine != ThisTimeLineID) + becameHistoric = true; + } + + if (becameHistoric) + { + /* + * The timeline we were sending has become historic. Read the + * timeline history file of the new timeline to see where exactly + * we forked off from the timeline we were sending. + */ + List *history; + + history = readTimeLineHistory(ThisTimeLineID); + sendTimeLineValidUpto = tliSwitchPoint(sendTimeLine, history, &sendTimeLineNextTLI); + + Assert(sendTimeLine < sendTimeLineNextTLI); + list_free_deep(history); + + sendTimeLineIsHistoric = true; + + SendRqstPtr = sendTimeLineValidUpto; + } + } + else + { + /* + * Streaming the current timeline on a primary. + * + * Attempt to send all data that's already been written out and + * fsync'd to disk. We cannot go further than what's been written out + * given the current implementation of WALRead(). And in any case + * it's unsafe to send WAL that is not securely down to disk on the + * primary: if the primary subsequently crashes and restarts, standbys + * must not have applied any WAL that got lost on the primary. + */ + SendRqstPtr = GetFlushRecPtr(); + } + + /* + * Record the current system time as an approximation of the time at which + * this WAL location was written for the purposes of lag tracking. + * + * In theory we could make XLogFlush() record a time in shmem whenever WAL + * is flushed and we could get that time as well as the LSN when we call + * GetFlushRecPtr() above (and likewise for the cascading standby + * equivalent), but rather than putting any new code into the hot WAL path + * it seems good enough to capture the time here. We should reach this + * after XLogFlush() runs WalSndWakeupProcessRequests(), and although that + * may take some time, we read the WAL flush pointer and take the time + * very close to together here so that we'll get a later position if it is + * still moving. + * + * Because LagTrackerWrite ignores samples when the LSN hasn't advanced, + * this gives us a cheap approximation for the WAL flush time for this + * LSN. + * + * Note that the LSN is not necessarily the LSN for the data contained in + * the present message; it's the end of the WAL, which might be further + * ahead. All the lag tracking machinery cares about is finding out when + * that arbitrary LSN is eventually reported as written, flushed and + * applied, so that it can measure the elapsed time. + */ + LagTrackerWrite(SendRqstPtr, GetCurrentTimestamp()); + + /* + * If this is a historic timeline and we've reached the point where we + * forked to the next timeline, stop streaming. + * + * Note: We might already have sent WAL > sendTimeLineValidUpto. The + * startup process will normally replay all WAL that has been received + * from the primary, before promoting, but if the WAL streaming is + * terminated at a WAL page boundary, the valid portion of the timeline + * might end in the middle of a WAL record. We might've already sent the + * first half of that partial WAL record to the cascading standby, so that + * sentPtr > sendTimeLineValidUpto. That's OK; the cascading standby can't + * replay the partial WAL record either, so it can still follow our + * timeline switch. + */ + if (sendTimeLineIsHistoric && sendTimeLineValidUpto <= sentPtr) + { + /* close the current file. */ + if (xlogreader->seg.ws_file >= 0) + wal_segment_close(xlogreader); + + /* Send CopyDone */ + pq_putmessage_noblock('c', NULL, 0); + streamingDoneSending = true; + + WalSndCaughtUp = true; + + elog(DEBUG1, "walsender reached end of timeline at %X/%X (sent up to %X/%X)", + LSN_FORMAT_ARGS(sendTimeLineValidUpto), + LSN_FORMAT_ARGS(sentPtr)); + return; + } + + /* Do we have any work to do? */ + Assert(sentPtr <= SendRqstPtr); + if (SendRqstPtr <= sentPtr) + { + WalSndCaughtUp = true; + return; + } + + /* + * Figure out how much to send in one message. If there's no more than + * MAX_SEND_SIZE bytes to send, send everything. Otherwise send + * MAX_SEND_SIZE bytes, but round back to logfile or page boundary. + * + * The rounding is not only for performance reasons. Walreceiver relies on + * the fact that we never split a WAL record across two messages. Since a + * long WAL record is split at page boundary into continuation records, + * page boundary is always a safe cut-off point. We also assume that + * SendRqstPtr never points to the middle of a WAL record. + */ + startptr = sentPtr; + endptr = startptr; + endptr += MAX_SEND_SIZE; + + /* if we went beyond SendRqstPtr, back off */ + if (SendRqstPtr <= endptr) + { + endptr = SendRqstPtr; + if (sendTimeLineIsHistoric) + WalSndCaughtUp = false; + else + WalSndCaughtUp = true; + } + else + { + /* round down to page boundary. */ + endptr -= (endptr % XLOG_BLCKSZ); + WalSndCaughtUp = false; + } + + nbytes = endptr - startptr; + Assert(nbytes <= MAX_SEND_SIZE); + + /* always true */ + if (am_wal_proposer) + { + WalProposerBroadcast(startptr, endptr); + } + else + { + /* code removed for brevity */ + } + sentPtr = endptr; + + /* Update shared memory status */ + { + WalSnd *walsnd = MyWalSnd; + + SpinLockAcquire(&walsnd->mutex); + walsnd->sentPtr = sentPtr; + SpinLockRelease(&walsnd->mutex); + } + + /* Report progress of XLOG streaming in PS display */ + if (update_process_title) + { + char activitymsg[50]; + + snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X", + LSN_FORMAT_ARGS(sentPtr)); + set_ps_display(activitymsg); + } +} + diff --git a/pgxn/neon/walproposer_utils.h b/pgxn/neon/walproposer_utils.h new file mode 100644 index 0000000000..4771d3ff82 --- /dev/null +++ b/pgxn/neon/walproposer_utils.h @@ -0,0 +1,19 @@ +#ifndef __NEON_WALPROPOSER_UTILS_H__ +#define __NEON_WALPROPOSER_UTILS_H__ + +#include "walproposer.h" + +int CompareLsn(const void *a, const void *b); +char* FormatSafekeeperState(SafekeeperState state); +void AssertEventsOkForState(uint32 events, Safekeeper* sk); +uint32 SafekeeperStateDesiredEvents(SafekeeperState state); +char* FormatEvents(uint32 events); +bool HexDecodeString(uint8 *result, char *input, int nbytes); +uint32 pq_getmsgint32_le(StringInfo msg); +uint64 pq_getmsgint64_le(StringInfo msg); +void pq_sendint32_le(StringInfo buf, uint32 i); +void pq_sendint64_le(StringInfo buf, uint64 i); +void XLogWalPropWrite(char *buf, Size nbytes, XLogRecPtr recptr); +void XLogWalPropClose(XLogRecPtr recptr); + +#endif /* __NEON_WALPROPOSER_UTILS_H__ */ diff --git a/pgxn/neon_test_utils/Makefile b/pgxn/neon_test_utils/Makefile new file mode 100644 index 0000000000..9c774ec185 --- /dev/null +++ b/pgxn/neon_test_utils/Makefile @@ -0,0 +1,15 @@ +# pgxs/neon_test_utils/Makefile + + +MODULE_big = neon_test_utils +OBJS = \ + $(WIN32RES) \ + neontest.o + +EXTENSION = neon_test_utils +DATA = neon_test_utils--1.0.sql +PGFILEDESC = "neon_test_utils - helpers for neon testing and debugging" + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) diff --git a/pgxn/neon_test_utils/neon_test_utils--1.0.sql b/pgxn/neon_test_utils/neon_test_utils--1.0.sql new file mode 100644 index 0000000000..402981a9a6 --- /dev/null +++ b/pgxn/neon_test_utils/neon_test_utils--1.0.sql @@ -0,0 +1,29 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION neon_test_utils" to load this file. \quit + +CREATE FUNCTION test_consume_xids(nxids int) +RETURNS VOID +AS 'MODULE_PATHNAME', 'test_consume_xids' +LANGUAGE C STRICT +PARALLEL UNSAFE; + +CREATE FUNCTION clear_buffer_cache() +RETURNS VOID +AS 'MODULE_PATHNAME', 'clear_buffer_cache' +LANGUAGE C STRICT +PARALLEL UNSAFE; + +CREATE FUNCTION get_raw_page_at_lsn(relname text, forkname text, blocknum int8, lsn pg_lsn) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_at_lsn' +LANGUAGE C PARALLEL UNSAFE; + +CREATE FUNCTION get_raw_page_at_lsn(tbspc oid, db oid, relfilenode oid, forknum int8, blocknum int8, lsn pg_lsn) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_at_lsn_ex' +LANGUAGE C PARALLEL UNSAFE; + +CREATE FUNCTION neon_xlogflush(lsn pg_lsn) +RETURNS VOID +AS 'MODULE_PATHNAME', 'neon_xlogflush' +LANGUAGE C PARALLEL UNSAFE; diff --git a/pgxn/neon_test_utils/neon_test_utils.control b/pgxn/neon_test_utils/neon_test_utils.control new file mode 100644 index 0000000000..94e6720503 --- /dev/null +++ b/pgxn/neon_test_utils/neon_test_utils.control @@ -0,0 +1,5 @@ +# neon_test_utils extension +comment = 'helpers for neon testing and debugging' +default_version = '1.0' +module_pathname = '$libdir/neon_test_utils' +relocatable = true diff --git a/pgxn/neon_test_utils/neontest.c b/pgxn/neon_test_utils/neontest.c new file mode 100644 index 0000000000..3e30065cd3 --- /dev/null +++ b/pgxn/neon_test_utils/neontest.c @@ -0,0 +1,304 @@ +/*------------------------------------------------------------------------- + * + * neontest.c + * Helpers for neon testing and debugging + * + * IDENTIFICATION + * contrib/neon_test_utils/neontest.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/relation.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "catalog/namespace.h" +#include "fmgr.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/buf_internals.h" +#include "storage/bufmgr.h" +#include "utils/builtins.h" +#include "utils/pg_lsn.h" +#include "utils/rel.h" +#include "utils/varlena.h" +#include "../neon/pagestore_client.h" + +PG_MODULE_MAGIC; + +extern void _PG_init(void); + +PG_FUNCTION_INFO_V1(test_consume_xids); +PG_FUNCTION_INFO_V1(clear_buffer_cache); +PG_FUNCTION_INFO_V1(get_raw_page_at_lsn); +PG_FUNCTION_INFO_V1(get_raw_page_at_lsn_ex); +PG_FUNCTION_INFO_V1(neon_xlogflush); + +/* + * Linkage to functions in zenith module. + * The signature here would need to be updated whenever function parameters change in pagestore_smgr.c + */ +typedef void (*zenith_read_at_lsn_type)(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, + XLogRecPtr request_lsn, bool request_latest, char *buffer); + +static zenith_read_at_lsn_type zenith_read_at_lsn_ptr; + +/* + * Module initialize function: fetch function pointers for cross-module calls. + */ +void +_PG_init(void) +{ + /* Asserts verify that typedefs above match original declarations */ + AssertVariableIsOfType(&zenith_read_at_lsn, zenith_read_at_lsn_type); + zenith_read_at_lsn_ptr = (zenith_read_at_lsn_type) + load_external_function("$libdir/neon", "zenith_read_at_lsn", + true, NULL); +} + +#define zenith_read_at_lsn zenith_read_at_lsn_ptr + +/* + * test_consume_xids(int4), for rapidly consuming XIDs, to test wraparound. + */ +Datum +test_consume_xids(PG_FUNCTION_ARGS) +{ + int32 nxids = PG_GETARG_INT32(0); + TransactionId topxid; + FullTransactionId fullxid; + TransactionId xid; + TransactionId targetxid; + + /* make sure we have a top-XID first */ + topxid = GetTopTransactionId(); + + xid = ReadNextTransactionId(); + + targetxid = xid + nxids; + while (targetxid < FirstNormalTransactionId) + targetxid++; + + while (TransactionIdPrecedes(xid, targetxid)) + { + fullxid = GetNewTransactionId(true); + xid = XidFromFullTransactionId(fullxid); + elog(DEBUG1, "topxid: %u xid: %u", topxid, xid); + } + + PG_RETURN_VOID(); +} + +/* + * Flush the buffer cache, evicting all pages that are not currently pinned. + */ +Datum +clear_buffer_cache(PG_FUNCTION_ARGS) +{ + bool save_zenith_test_evict; + + /* + * Temporarily set the zenith_test_evict GUC, so that when we pin and + * unpin a buffer, the buffer is evicted. We use that hack to evict all + * buffers, as there is no explicit "evict this buffer" function in the + * buffer manager. + */ + save_zenith_test_evict = zenith_test_evict; + zenith_test_evict = true; + PG_TRY(); + { + /* Scan through all the buffers */ + for (int i = 0; i < NBuffers; i++) + { + BufferDesc *bufHdr; + uint32 buf_state; + Buffer bufferid; + bool isvalid; + RelFileNode rnode; + ForkNumber forknum; + BlockNumber blocknum; + + /* Peek into the buffer header to see what page it holds. */ + bufHdr = GetBufferDescriptor(i); + buf_state = LockBufHdr(bufHdr); + + if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID)) + isvalid = true; + else + isvalid = false; + bufferid = BufferDescriptorGetBuffer(bufHdr); + rnode = bufHdr->tag.rnode; + forknum = bufHdr->tag.forkNum; + blocknum = bufHdr->tag.blockNum; + + UnlockBufHdr(bufHdr, buf_state); + + /* + * Pin the buffer, and release it again. Because we have + * zenith_test_evict==true, this will evict the page from + * the buffer cache if no one else is holding a pin on it. + */ + if (isvalid) + { + if (ReadRecentBuffer(rnode, forknum, blocknum, bufferid)) + ReleaseBuffer(bufferid); + } + } + } + PG_FINALLY(); + { + /* restore the GUC */ + zenith_test_evict = save_zenith_test_evict; + } + PG_END_TRY(); + + PG_RETURN_VOID(); +} + + +/* + * Reads the page from page server without buffer cache + * usage mimics get_raw_page() in pageinspect, but offers reading versions at specific LSN + * NULL read lsn will result in reading the latest version. + * + * Note: reading latest version will result in waiting for latest changes to reach the page server, + * if this is undesirable, use pageinspect' get_raw_page that uses buffered access to the latest page + */ +Datum +get_raw_page_at_lsn(PG_FUNCTION_ARGS) +{ + bytea *raw_page; + ForkNumber forknum; + RangeVar *relrv; + Relation rel; + char *raw_page_data; + text *relname; + text *forkname; + uint32 blkno; + + bool request_latest = PG_ARGISNULL(3); + uint64 read_lsn = request_latest ? GetXLogInsertRecPtr() : PG_GETARG_INT64(3); + + if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2)) + PG_RETURN_NULL(); + + relname = PG_GETARG_TEXT_PP(0); + forkname = PG_GETARG_TEXT_PP(1); + blkno = PG_GETARG_UINT32(2); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + /* Check that this relation has storage */ + if (rel->rd_rel->relkind == RELKIND_VIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from view \"%s\"", + RelationGetRelationName(rel)))); + if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from composite type \"%s\"", + RelationGetRelationName(rel)))); + if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from foreign table \"%s\"", + RelationGetRelationName(rel)))); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from partitioned table \"%s\"", + RelationGetRelationName(rel)))); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from partitioned index \"%s\"", + RelationGetRelationName(rel)))); + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + + forknum = forkname_to_number(text_to_cstring(forkname)); + + /* Initialize buffer to copy to */ + raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ); + SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ); + raw_page_data = VARDATA(raw_page); + + zenith_read_at_lsn(rel->rd_node, forknum, blkno, read_lsn, request_latest, raw_page_data); + + relation_close(rel, AccessShareLock); + + PG_RETURN_BYTEA_P(raw_page); +} + +/* + * Another option to read a relation page from page server without cache + * this version doesn't validate input and allows reading blocks of dropped relations + * + * Note: reading latest version will result in waiting for latest changes to reach the page server, + * if this is undesirable, use pageinspect' get_raw_page that uses buffered access to the latest page + */ +Datum +get_raw_page_at_lsn_ex(PG_FUNCTION_ARGS) +{ + char *raw_page_data; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2) || + PG_ARGISNULL(3) || PG_ARGISNULL(4)) + PG_RETURN_NULL(); + + { + RelFileNode rnode = { + .spcNode = PG_GETARG_OID(0), + .dbNode = PG_GETARG_OID(1), + .relNode = PG_GETARG_OID(2) + }; + + ForkNumber forknum = PG_GETARG_UINT32(3); + + uint32 blkno = PG_GETARG_UINT32(4); + bool request_latest = PG_ARGISNULL(5); + uint64 read_lsn = request_latest ? GetXLogInsertRecPtr() : PG_GETARG_INT64(5); + + + /* Initialize buffer to copy to */ + bytea *raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ); + SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ); + raw_page_data = VARDATA(raw_page); + + zenith_read_at_lsn(rnode, forknum, blkno, read_lsn, request_latest, raw_page_data); + PG_RETURN_BYTEA_P(raw_page); + } +} + +/* + * Directly calls XLogFlush(lsn) to flush WAL buffers. + */ +Datum +neon_xlogflush(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn = PG_GETARG_LSN(0); + XLogFlush(lsn); + PG_RETURN_VOID(); +} diff --git a/vendor/postgres b/vendor/postgres index a479855158..8f132d968c 160000 --- a/vendor/postgres +++ b/vendor/postgres @@ -1 +1 @@ -Subproject commit a4798551587fb5a52740687a341af83b28733dc6 +Subproject commit 8f132d968cd44068fc6f72e4047f7d3d6320f4bb From a5ca6a9d2b69a8d4a67900901710c21167451a54 Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Fri, 26 Aug 2022 13:59:04 +0200 Subject: [PATCH 41/63] Move legacy version of compute-node Dockerfile from postgres repo (#2339) It's used by e2e CI. Building Dockerfile.compute-node will take unreasonable ammount of time without v2 runners. TODO: remove once cloud repo CI is moved to v2 runners. --- Dockerfile.compute-node.legacy | 87 ++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 Dockerfile.compute-node.legacy diff --git a/Dockerfile.compute-node.legacy b/Dockerfile.compute-node.legacy new file mode 100644 index 0000000000..ba34e2486f --- /dev/null +++ b/Dockerfile.compute-node.legacy @@ -0,0 +1,87 @@ +# +# Legacy version of the Dockerfile for the compute node. +# Used by e2e CI. Building Dockerfile.compute-node will take +# unreasonable ammount of time without v2 runners. +# +# TODO: remove once cloud repo CI is moved to v2 runners. +# + + +# Allow specifiyng different compute-tools tag and image repo, so we are +# able to use different images +ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com +ARG IMAGE=compute-tools +ARG TAG=latest + +# +# Image with pre-built tools +# +FROM $REPOSITORY/$IMAGE:$TAG AS compute-deps +# Only to get ready compute_ctl binary as deppendency + +# +# Image with Postgres build deps +# +FROM debian:buster-slim AS build-deps + +RUN apt-get update && apt-get -yq install automake libtool build-essential bison flex libreadline-dev zlib1g-dev libxml2-dev \ + libcurl4-openssl-dev libossp-uuid-dev + +# +# Image with built Postgres +# +FROM build-deps AS pg-build + +# Add user postgres +RUN adduser postgres +RUN mkdir /pg && chown postgres:postgres /pg + +# Copy source files +COPY ./vendor/postgres /pg/ +COPY ./pgxn /pg/ + +# Build and install Postgres locally +RUN mkdir /pg/compute_build && cd /pg/compute_build && \ + ../configure CFLAGS='-O2 -g3' --prefix=$(pwd)/postgres_bin --enable-debug --with-uuid=ossp && \ + # Install main binaries and contribs + make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \ + make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \ + # Install headers + make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install + +# Install neon contrib +RUN make MAKELEVEL=0 PG_CONFIG=/pg/compute_build/postgres_bin/bin/pg_config -j $(getconf _NPROCESSORS_ONLN) -C /pg/neon install + +USER postgres +WORKDIR /pg + +# +# Final compute node image to be exported +# +FROM debian:buster-slim + +# libreadline-dev is required to run psql +RUN apt-get update && apt-get -yq install libreadline-dev libossp-uuid-dev + +# Add user postgres +RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \ + echo "postgres:test_console_pass" | chpasswd && \ + mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \ + chown -R postgres:postgres /var/db/postgres && \ + chmod 0750 /var/db/postgres/compute + +# Copy ready Postgres binaries +COPY --from=pg-build /pg/compute_build/postgres_bin /usr/local + +# Copy binaries from compute-tools +COPY --from=compute-deps /usr/local/bin/compute_ctl /usr/local/bin/compute_ctl + +# XXX: temporary symlink for compatibility with old control-plane +RUN ln -s /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl + +# Add postgres shared objects to the search path +RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig + +USER postgres + +ENTRYPOINT ["/usr/local/bin/compute_ctl"] From a56ae15edf448534d00a6a21504cbc556d927cfd Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 26 Aug 2022 15:40:22 +0300 Subject: [PATCH 42/63] Lock cargo dependencies during CI builds --- .dockerignore | 1 + .github/workflows/build_and_test.yml | 4 ++-- .github/workflows/codestyle.yml | 2 +- Dockerfile | 2 +- Dockerfile.compute-node | 10 +++++----- Dockerfile.compute-tools | 2 +- run_clippy.sh | 4 ++-- 7 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.dockerignore b/.dockerignore index 0667d8870e..8a3d32e6d2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,6 +3,7 @@ **/.pytest_cache .git +.github target tmp_check tmp_install diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 6e570b22d4..bf6eb69930 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -95,11 +95,11 @@ jobs: if [[ $BUILD_TYPE == "debug" ]]; then cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run" CARGO_FEATURES="" - CARGO_FLAGS="" + CARGO_FLAGS="--locked" elif [[ $BUILD_TYPE == "release" ]]; then cov_prefix="" CARGO_FEATURES="--features profiling" - CARGO_FLAGS="--release $CARGO_FEATURES" + CARGO_FLAGS="--locked --release $CARGO_FEATURES" fi echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV echo "CARGO_FEATURES=${CARGO_FEATURES}" >> $GITHUB_ENV diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index eddfee88fc..b64ea8a01f 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -110,7 +110,7 @@ jobs: run: ./run_clippy.sh - name: Ensure all project builds - run: cargo build --all --all-targets + run: cargo build --locked --all --all-targets check-codestyle-python: runs-on: [ self-hosted, Linux, k8s-runner ] diff --git a/Dockerfile b/Dockerfile index 77598fd086..2dbe71f1ad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,7 +40,7 @@ COPY . . # Show build caching stats to check if it was used in the end. # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats. RUN set -e \ - && mold -run cargo build --release \ +&& mold -run cargo build --locked --release \ && cachepot -s # Build final image diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 4527fb9ece..057441e730 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -69,9 +69,9 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \ # Compile and run the Neon-specific `compute_ctl` binary FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:$TAG AS compute-tools USER nonroot -COPY --chown=nonroot compute_tools compute_tools -COPY --chown=nonroot workspace_hack workspace_hack -RUN cd compute_tools && cargo build --release +# Copy entire project to get Cargo.* files with proper dependencies for the whole project +COPY --chown=nonroot . . +RUN cd compute_tools && cargo build --locked --release # Put it all together into the final image FROM debian:bullseye-slim @@ -86,7 +86,7 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \ # TODO: Check if we can make the extension setup more modular versus a linear build # currently plv8-build copies the output /usr/local/pgsql from postgis-build, etc# COPY --from=neon-pg-ext-build --chown=postgres /usr/local/pgsql /usr/local -COPY --from=compute-tools --chown=postgres /home/nonroot/compute_tools/target/release/compute_ctl /usr/local/bin/compute_ctl +COPY --from=compute-tools --chown=postgres /home/nonroot/target/release/compute_ctl /usr/local/bin/compute_ctl RUN apt update && \ apt install -y libreadline-dev libossp-uuid-dev gdal-bin libgdal-dev libprotobuf-c-dev && \ @@ -103,4 +103,4 @@ RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.lis RUN ln -s /usr/local/bin/compute_ctl /usr/local/bin/zenith_ctl USER postgres -ENTRYPOINT ["/usr/local/bin/compute_ctl"] \ No newline at end of file +ENTRYPOINT ["/usr/local/bin/compute_ctl"] diff --git a/Dockerfile.compute-tools b/Dockerfile.compute-tools index 47c408bbf2..8231cd0ebb 100644 --- a/Dockerfile.compute-tools +++ b/Dockerfile.compute-tools @@ -20,7 +20,7 @@ ARG CACHEPOT_BUCKET=neon-github-dev COPY . . RUN set -e \ - && mold -run cargo build -p compute_tools --release \ + && mold -run cargo build -p compute_tools --locked --release \ && cachepot -s # Final image that only has one binary diff --git a/run_clippy.sh b/run_clippy.sh index 13af3fd2c5..9feb8de4ea 100755 --- a/run_clippy.sh +++ b/run_clippy.sh @@ -13,10 +13,10 @@ # avoid running regular linting script that checks every feature. if [[ "$OSTYPE" == "darwin"* ]]; then # no extra features to test currently, add more here when needed - cargo clippy --all --all-targets -- -A unknown_lints -D warnings + cargo clippy --locked --all --all-targets -- -A unknown_lints -D warnings else # * `-A unknown_lints` – do not warn about unknown lint suppressions # that people with newer toolchains might use # * `-D warnings` - fail on any warnings (`cargo` returns non-zero exit status) - cargo clippy --all --all-targets --all-features -- -A unknown_lints -D warnings + cargo clippy --locked --all --all-targets --all-features -- -A unknown_lints -D warnings fi From 6d30e21a326ed3d323bd563cbdecb15ee1d95ec9 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 26 Aug 2022 20:42:32 +0300 Subject: [PATCH 43/63] Fix proxy tests (#2343) There might be different psql & locale configurations, therefore we should explicitly reset them to defaults. --- test_runner/batch_others/test_proxy.py | 17 +++-------------- test_runner/fixtures/neon_fixtures.py | 13 ++++++++----- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/test_runner/batch_others/test_proxy.py b/test_runner/batch_others/test_proxy.py index 4ffd458b22..1efb795140 100644 --- a/test_runner/batch_others/test_proxy.py +++ b/test_runner/batch_others/test_proxy.py @@ -115,33 +115,22 @@ async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProx Step 4. assert that select 1 has been executed correctly. """ - # Step 1. psql = PSQL( host=link_proxy.host, port=link_proxy.proxy_port, ) - proc = await psql.run("select 1") + proc = await psql.run("select 42") - # Step 2.1 uri_prefix = link_proxy.link_auth_uri_prefix line_str = await get_uri_line_from_process_welcome_notice(uri_prefix, proc) - # step 2.2 psql_session_id = get_session_id_from_uri_line(uri_prefix, line_str) log.info(f"Parsed psql_session_id='{psql_session_id}' from Neon welcome message.") - # Step 3. create_and_send_db_info(vanilla_pg, psql_session_id, link_proxy.mgmt_port) - # Step 4. - # Expecting proxy output:: - # b' ?column? \n' - # b'----------\n' - # b' 1\n' - # b'(1 row)\n' - out_bytes = await proc.stdout.read() - expected_out_bytes = b" ?column? \n----------\n 1\n(1 row)\n\n" - assert out_bytes == expected_out_bytes + out = (await proc.stdout.read()).decode("utf-8").strip() + assert out == "42" # Pass extra options to the server. diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 3af0cf4dcb..ad686e1fce 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1738,13 +1738,16 @@ class PSQL: self.database_url = f"postgres://{host}:{port}/main?options=project%3Dgeneric-project-name" async def run(self, query=None): - run_args = [self.path, self.database_url] - run_args += ["--command", query] if query is not None else [] + run_args = [self.path, "--no-psqlrc", "--quiet", "--tuples-only", self.database_url] + if query is not None: + run_args += ["--command", query] - cmd_line = subprocess.list2cmdline(run_args) - log.info(f"Run psql: {cmd_line}") + log.info(f"Run psql: {subprocess.list2cmdline(run_args)}") return await asyncio.create_subprocess_exec( - *run_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE + *run_args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env={"LC_ALL": "C", **os.environ}, # one locale to rule them all ) From c0a867d86fc76ae43d15b3111701cef046c3cc9a Mon Sep 17 00:00:00 2001 From: MMeent Date: Fri, 26 Aug 2022 19:58:08 +0200 Subject: [PATCH 44/63] Include neon extensions in the main neon images (#2341) Oversight in #2325 - apparently this area wasn't well-covered by tests in the neon repo. Fixes #2340 --- Dockerfile | 7 ++++--- vendor/postgres | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2dbe71f1ad..aa31e227da 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,12 +10,13 @@ ARG TAG=pinned FROM $REPOSITORY/$IMAGE:$TAG AS pg-build WORKDIR /home/nonroot -COPY vendor/postgres vendor/postgres -COPY Makefile Makefile +COPY --chown=nonroot vendor/postgres vendor/postgres +COPY --chown=nonroot pgxn pgxn +COPY --chown=nonroot Makefile Makefile ENV BUILD_TYPE release RUN set -e \ - && mold -run make -j $(nproc) -s postgres \ + && mold -run make -j $(nproc) -s neon-pg-ext \ && rm -rf tmp_install/build \ && tar -C tmp_install -czf /home/nonroot/postgres_install.tar.gz . diff --git a/vendor/postgres b/vendor/postgres index 8f132d968c..22d9ead36b 160000 --- a/vendor/postgres +++ b/vendor/postgres @@ -1 +1 @@ -Subproject commit 8f132d968cd44068fc6f72e4047f7d3d6320f4bb +Subproject commit 22d9ead36beeab6b6a99c64f9b0b1576927ad91b From ec20534173b06bc89e2c5ee604e63b256713d9ac Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 27 Aug 2022 17:54:56 +0300 Subject: [PATCH 45/63] Fix minor typos and leftover comments. --- pageserver/src/layered_repository/filename.rs | 2 +- pageserver/src/layered_repository/metadata.rs | 4 ++-- pageserver/src/layered_repository/timeline.rs | 2 +- pageserver/src/walreceiver/walreceiver_connection.rs | 2 +- test_runner/batch_others/test_remote_storage.py | 6 +++--- .../batch_others/test_tenants_with_remote_storage.py | 6 +++--- test_runner/batch_others/test_wal_acceptor.py | 12 ++++++------ 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pageserver/src/layered_repository/filename.rs b/pageserver/src/layered_repository/filename.rs index f088088277..5ebac2332d 100644 --- a/pageserver/src/layered_repository/filename.rs +++ b/pageserver/src/layered_repository/filename.rs @@ -10,7 +10,7 @@ use std::path::PathBuf; use utils::lsn::Lsn; -// Note: LayeredTimeline::load_layer_map() relies on this sort order +// Note: Timeline::load_layer_map() relies on this sort order #[derive(Debug, PartialEq, Eq, Clone)] pub struct DeltaFileName { pub key_range: Range, diff --git a/pageserver/src/layered_repository/metadata.rs b/pageserver/src/layered_repository/metadata.rs index 74679cb43a..f3ddd42e76 100644 --- a/pageserver/src/layered_repository/metadata.rs +++ b/pageserver/src/layered_repository/metadata.rs @@ -1,4 +1,4 @@ -//! Every image of a certain timeline from [`crate::layered_repository::LayeredRepository`] +//! Every image of a certain timeline from [`crate::layered_repository::Repository`] //! has a metadata that needs to be stored persistently. //! //! Later, the file gets is used in [`crate::remote_storage::storage_sync`] as a part of @@ -30,7 +30,7 @@ pub const METADATA_FILE_NAME: &str = "metadata"; /// Metadata stored on disk for each timeline /// -/// The fields correspond to the values we hold in memory, in LayeredTimeline. +/// The fields correspond to the values we hold in memory, in Timeline. #[derive(Debug, Clone, PartialEq, Eq)] pub struct TimelineMetadata { hdr: TimelineMetadataHeader, diff --git a/pageserver/src/layered_repository/timeline.rs b/pageserver/src/layered_repository/timeline.rs index a909dcb5a1..ecf9a87500 100644 --- a/pageserver/src/layered_repository/timeline.rs +++ b/pageserver/src/layered_repository/timeline.rs @@ -366,7 +366,7 @@ pub struct Timeline { /// Layer removal lock. /// A lock to ensure that no layer of the timeline is removed concurrently by other threads. /// This lock is acquired in [`Timeline::gc`], [`Timeline::compact`], - /// and [`LayeredRepository::delete_timeline`]. + /// and [`Repository::delete_timeline`]. layer_removal_cs: Mutex<()>, // Needed to ensure that we can't create a branch at a point that was already garbage collected diff --git a/pageserver/src/walreceiver/walreceiver_connection.rs b/pageserver/src/walreceiver/walreceiver_connection.rs index b5f266614e..f816198eda 100644 --- a/pageserver/src/walreceiver/walreceiver_connection.rs +++ b/pageserver/src/walreceiver/walreceiver_connection.rs @@ -63,7 +63,7 @@ pub async fn handle_walreceiver_connection( ) .await .context("Timed out while waiting for walreceiver connection to open")? - .context("Failed to open walreceiver conection")?; + .context("Failed to open walreceiver connection")?; info!("connected!"); let mut connection_status = WalConnectionStatus { diff --git a/test_runner/batch_others/test_remote_storage.py b/test_runner/batch_others/test_remote_storage.py index 1e4fdc8602..0015c75670 100644 --- a/test_runner/batch_others/test_remote_storage.py +++ b/test_runner/batch_others/test_remote_storage.py @@ -38,17 +38,17 @@ from fixtures.utils import lsn_from_hex, query_scalar # * queries the specific data, ensuring that it matches the one stored before # # The tests are done for all types of remote storage pageserver supports. -@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) +@pytest.mark.parametrize("remote_storage_kind", available_remote_storages()) def test_remote_storage_backup_and_restore( neon_env_builder: NeonEnvBuilder, - remote_storatge_kind: RemoteStorageKind, + remote_storage_kind: RemoteStorageKind, ): # Use this test to check more realistic SK ids: some etcd key parsing bugs were related, # and this test needs SK to write data to pageserver, so it will be visible neon_env_builder.safekeepers_id_start = 12 neon_env_builder.enable_remote_storage( - remote_storage_kind=remote_storatge_kind, + remote_storage_kind=remote_storage_kind, test_name="test_remote_storage_backup_and_restore", ) diff --git a/test_runner/batch_others/test_tenants_with_remote_storage.py b/test_runner/batch_others/test_tenants_with_remote_storage.py index 7db58c2a70..083150e12a 100644 --- a/test_runner/batch_others/test_tenants_with_remote_storage.py +++ b/test_runner/batch_others/test_tenants_with_remote_storage.py @@ -53,10 +53,10 @@ async def all_tenants_workload(env: NeonEnv, tenants_pgs): await asyncio.gather(*workers) -@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) -def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind): +@pytest.mark.parametrize("remote_storage_kind", available_remote_storages()) +def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind): neon_env_builder.enable_remote_storage( - remote_storage_kind=remote_storatge_kind, + remote_storage_kind=remote_storage_kind, test_name="test_tenants_many", ) diff --git a/test_runner/batch_others/test_wal_acceptor.py b/test_runner/batch_others/test_wal_acceptor.py index 47838ddb76..28daeb18ed 100644 --- a/test_runner/batch_others/test_wal_acceptor.py +++ b/test_runner/batch_others/test_wal_acceptor.py @@ -420,12 +420,12 @@ def wait_wal_trim(tenant_id, timeline_id, sk, target_size): time.sleep(0.5) -@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) -def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind): +@pytest.mark.parametrize("remote_storage_kind", available_remote_storages()) +def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind): neon_env_builder.num_safekeepers = 3 neon_env_builder.enable_remote_storage( - remote_storage_kind=remote_storatge_kind, + remote_storage_kind=remote_storage_kind, test_name="test_safekeepers_wal_backup", ) @@ -468,12 +468,12 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: Remo wait_segment_offload(tenant_id, timeline_id, env.safekeepers[1], "0/5000000") -@pytest.mark.parametrize("remote_storatge_kind", available_remote_storages()) -def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storatge_kind: RemoteStorageKind): +@pytest.mark.parametrize("remote_storage_kind", available_remote_storages()) +def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind): neon_env_builder.num_safekeepers = 3 neon_env_builder.enable_remote_storage( - remote_storage_kind=remote_storatge_kind, + remote_storage_kind=remote_storage_kind, test_name="test_s3_wal_replay", ) From 88a339ed73b82bdb3aa9afcb5facdd5e63c20f99 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 27 Aug 2022 18:14:30 +0300 Subject: [PATCH 46/63] Update a few crates "cargo tree -d" showed that we're building multiple versions of some crates. Update some crates, to avoid depending on multiple versions. --- Cargo.lock | 45 ++++++++++----------------------------- libs/utils/Cargo.toml | 2 +- proxy/Cargo.toml | 4 ++-- workspace_hack/Cargo.toml | 10 +++++---- 4 files changed, 20 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 505cbb66c3..73b9c318ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1164,20 +1164,14 @@ version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" -dependencies = [ - "ahash", -] - [[package]] name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] [[package]] name = "heck" @@ -1379,7 +1373,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" dependencies = [ "autocfg", - "hashbrown 0.12.3", + "hashbrown", ] [[package]] @@ -2274,7 +2268,7 @@ dependencies = [ "clap 3.2.16", "futures", "git-version", - "hashbrown 0.11.2", + "hashbrown", "hex", "hmac 0.12.1", "hyper", @@ -2289,7 +2283,7 @@ dependencies = [ "routerify", "rstest", "rustls", - "rustls-pemfile 0.2.1", + "rustls-pemfile", "scopeguard", "serde", "serde_json", @@ -2315,15 +2309,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "quickcheck" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" -dependencies = [ - "rand", -] - [[package]] name = "quote" version = "1.0.21" @@ -2508,7 +2493,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls", - "rustls-pemfile 1.0.1", + "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", @@ -2697,15 +2682,6 @@ dependencies = [ "webpki", ] -[[package]] -name = "rustls-pemfile" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eebeaeb360c87bfb72e84abdb3447159c0eaececf1bef2aecd65a8be949d1c9" -dependencies = [ - "base64", -] - [[package]] name = "rustls-pemfile" version = "1.0.1" @@ -3242,7 +3218,6 @@ dependencies = [ "js-sys", "libc", "num_threads", - "quickcheck", "time-macros", ] @@ -3683,7 +3658,7 @@ dependencies = [ "rand", "routerify", "rustls", - "rustls-pemfile 0.2.1", + "rustls-pemfile", "rustls-split", "serde", "serde_json", @@ -3969,6 +3944,7 @@ version = "0.1.0" dependencies = [ "ahash", "anyhow", + "bstr", "bytes", "chrono", "clap 2.34.0", @@ -3978,7 +3954,7 @@ dependencies = [ "futures-task", "futures-util", "generic-array", - "hashbrown 0.11.2", + "hashbrown", "hex", "hyper", "indexmap", @@ -3993,6 +3969,7 @@ dependencies = [ "prost", "rand", "regex", + "regex-automata", "regex-syntax", "scopeguard", "serde", diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index e3e78ec68f..28ad658de4 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -39,7 +39,7 @@ bytes = "1.0.1" hex-literal = "0.3" tempfile = "3.2" criterion = "0.3" -rustls-pemfile = "0.2.1" +rustls-pemfile = "1" [[bench]] name = "benchmarks" diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml index 230fc8a253..d3f7ea5fdc 100644 --- a/proxy/Cargo.toml +++ b/proxy/Cargo.toml @@ -11,7 +11,7 @@ bstr = "0.2.17" bytes = { version = "1.0.1", features = ['serde'] } clap = "3.0" futures = "0.3.13" -hashbrown = "0.11.2" +hashbrown = "0.12" hex = "0.4.3" hmac = "0.12.1" hyper = "0.14" @@ -23,7 +23,7 @@ rand = "0.8.3" reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] } routerify = "3" rustls = "0.20.0" -rustls-pemfile = "0.2.1" +rustls-pemfile = "1" scopeguard = "1.1.0" serde = "1" serde_json = "1" diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index 4dc7e4e157..bfe61b9ced 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -16,6 +16,7 @@ publish = false [dependencies] ahash = { version = "0.7", features = ["std"] } anyhow = { version = "1", features = ["backtrace", "std"] } +bstr = { version = "0.2", features = ["lazy_static", "regex-automata", "serde", "serde1", "serde1-nostd", "std", "unicode"] } bytes = { version = "1", features = ["serde", "std"] } chrono = { version = "0.4", features = ["clock", "libc", "oldtime", "serde", "std", "time", "winapi"] } clap = { version = "2", features = ["ansi_term", "atty", "color", "strsim", "suggestions", "vec_map"] } @@ -25,7 +26,7 @@ futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink" futures-task = { version = "0.3", default-features = false, features = ["alloc", "std"] } futures-util = { version = "0.3", default-features = false, features = ["alloc", "async-await", "async-await-macro", "channel", "futures-channel", "futures-io", "futures-macro", "futures-sink", "io", "memchr", "sink", "slab", "std"] } generic-array = { version = "0.14", default-features = false, features = ["more_lengths"] } -hashbrown = { version = "0.11", features = ["ahash", "inline-more", "raw"] } +hashbrown = { version = "0.12", features = ["ahash", "inline-more", "raw"] } hex = { version = "0.4", features = ["alloc", "serde", "std"] } hyper = { version = "0.14", features = ["client", "full", "h2", "http1", "http2", "runtime", "server", "socket2", "stream", "tcp"] } indexmap = { version = "1", default-features = false, features = ["std"] } @@ -40,12 +41,13 @@ num-traits = { version = "0.2", features = ["i128", "std"] } prost = { version = "0.10", features = ["prost-derive", "std"] } rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "rand_hc", "small_rng", "std", "std_rng"] } regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] } +regex-automata = { version = "0.1", features = ["regex-syntax", "std"] } regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] } scopeguard = { version = "1", features = ["use_std"] } serde = { version = "1", features = ["alloc", "derive", "serde_derive", "std"] } -time = { version = "0.3", features = ["alloc", "formatting", "itoa", "macros", "parsing", "quickcheck", "quickcheck-dep", "std", "time-macros"] } +time = { version = "0.3", features = ["alloc", "formatting", "itoa", "macros", "parsing", "std", "time-macros"] } tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "process", "rt", "rt-multi-thread", "signal-hook-registry", "socket2", "sync", "time", "tokio-macros", "winapi"] } -tokio-util = { version = "0.7", features = ["codec", "io"] } +tokio-util = { version = "0.7", features = ["codec", "io", "tracing"] } tracing = { version = "0.1", features = ["attributes", "log", "std", "tracing-attributes"] } tracing-core = { version = "0.1", features = ["lazy_static", "std", "valuable"] } @@ -55,7 +57,7 @@ anyhow = { version = "1", features = ["backtrace", "std"] } bytes = { version = "1", features = ["serde", "std"] } clap = { version = "2", features = ["ansi_term", "atty", "color", "strsim", "suggestions", "vec_map"] } either = { version = "1", features = ["use_std"] } -hashbrown = { version = "0.11", features = ["ahash", "inline-more", "raw"] } +hashbrown = { version = "0.12", features = ["ahash", "inline-more", "raw"] } indexmap = { version = "1", default-features = false, features = ["std"] } libc = { version = "0.2", features = ["extra_traits", "std"] } log = { version = "0.4", default-features = false, features = ["serde", "std"] } From 34b5d7aa9f59d12e753a1d00ce410350b439d7a0 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 27 Aug 2022 18:14:33 +0300 Subject: [PATCH 47/63] Remove unused dependency --- Cargo.lock | 1 - libs/postgres_ffi/Cargo.toml | 1 - pageserver/Cargo.toml | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 73b9c318ea..603e034ed3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2105,7 +2105,6 @@ dependencies = [ "bindgen", "byteorder", "bytes", - "chrono", "crc32c", "env_logger", "hex", diff --git a/libs/postgres_ffi/Cargo.toml b/libs/postgres_ffi/Cargo.toml index 0118701a7e..5b9ecb7394 100644 --- a/libs/postgres_ffi/Cargo.toml +++ b/libs/postgres_ffi/Cargo.toml @@ -4,7 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] -chrono = "0.4.19" rand = "0.8.3" regex = "1.4.5" bytes = "1.0.1" diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 63a2263ae0..902765f424 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -15,7 +15,7 @@ failpoints = ["fail/failpoints"] chrono = "0.4.19" rand = "0.8.3" regex = "1.4.5" -bytes = { version = "1.0.1", features = ['serde'] } +bytes = "1.0.1" byteorder = "1.4.3" futures = "0.3.13" hex = "0.4.3" From f8188e679c51dcd07e90a4152d720e323748c412 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 27 Aug 2022 18:14:35 +0300 Subject: [PATCH 48/63] Downgrade a few panics into plain errors. Let's not bring down the whole pageserver if you import a bogus tar archive to one timeline. --- pageserver/src/import_datadir.rs | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs index 54e791e5b5..4cc3aafb0e 100644 --- a/pageserver/src/import_datadir.rs +++ b/pageserver/src/import_datadir.rs @@ -331,7 +331,11 @@ pub fn import_basebackup_from_tar( debug!("directory {:?}", file_path); } _ => { - panic!("tar::EntryType::?? {}", file_path.display()); + bail!( + "entry {} in backup tar archive is of unexpected type: {:?}", + file_path.display(), + header.entry_type() + ); } } } @@ -384,7 +388,11 @@ pub fn import_wal_from_tar( continue; } _ => { - panic!("tar::EntryType::?? {}", file_path.display()); + bail!( + "entry {} in WAL tar archive is of unexpected type: {:?}", + file_path.display(), + header.entry_type() + ); } } }; @@ -424,14 +432,12 @@ pub fn import_wal_from_tar( Ok(()) } -pub fn import_file( +fn import_file( modification: &mut DatadirModification, file_path: &Path, reader: Reader, len: usize, ) -> Result> { - debug!("looking at {:?}", file_path); - if file_path.starts_with("global") { let spcnode = pg_constants::GLOBALTABLESPACE_OID; let dbnode = 0; @@ -553,7 +559,10 @@ pub fn import_file( // this to import arbitrary postgres databases. bail!("Importing pg_tblspc is not implemented"); } else { - debug!("ignored"); + debug!( + "ignoring unrecognized file \"{}\" in tar archive", + file_path.display() + ); } Ok(None) From 5f189cd3855c3526d52f6184363e2026458025ae Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 27 Aug 2022 18:14:38 +0300 Subject: [PATCH 49/63] Remove some unnecessary derives. Doesn't make much difference, but let's be tidy. --- pageserver/src/storage_sync/index.rs | 2 +- safekeeper/src/safekeeper.rs | 2 +- safekeeper/src/send_wal.rs | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pageserver/src/storage_sync/index.rs b/pageserver/src/storage_sync/index.rs index 3dddda09bf..7e644da412 100644 --- a/pageserver/src/storage_sync/index.rs +++ b/pageserver/src/storage_sync/index.rs @@ -210,7 +210,7 @@ impl RemoteTimelineIndex { } /// Restored index part data about the timeline, stored in the remote index. -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, Clone)] pub struct RemoteTimeline { timeline_layers: HashSet, missing_layers: HashSet, diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index 22f8ca2de4..ed34669dde 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -332,7 +332,7 @@ pub struct AppendRequestHeader { } /// Report safekeeper state to proposer -#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct AppendResponse { // Current term of the safekeeper; if it is higher than proposer's, the // compute is out of date. diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs index 97ec945c3e..38523f9f82 100644 --- a/safekeeper/src/send_wal.rs +++ b/safekeeper/src/send_wal.rs @@ -36,7 +36,7 @@ const NEON_STATUS_UPDATE_TAG_BYTE: u8 = b'z'; type FullTransactionId = u64; /// Hot standby feedback received from replica -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct HotStandbyFeedback { pub ts: TimestampTz, pub xmin: FullTransactionId, @@ -54,7 +54,7 @@ impl HotStandbyFeedback { } /// Standby status update -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Deserialize)] pub struct StandbyReply { pub write_lsn: Lsn, // last lsn received by pageserver pub flush_lsn: Lsn, // pageserver's disk consistent lSN From 7a840ec60ca7248625a9e88fbddcf14ca67207ed Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 27 Aug 2022 18:14:40 +0300 Subject: [PATCH 50/63] Move save_metadata function. `timeline.rs` seems like a better home for it. --- pageserver/src/layered_repository.rs | 6 +- pageserver/src/layered_repository/metadata.rs | 64 +++++++++++++++---- pageserver/src/layered_repository/timeline.rs | 41 +----------- 3 files changed, 56 insertions(+), 55 deletions(-) diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index fae52c3daf..36b8e3eb9e 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -69,7 +69,7 @@ pub use timeline::Timeline; pub use crate::layered_repository::ephemeral_file::writeback as writeback_ephemeral_file; // re-export for use in storage_sync.rs -pub use crate::layered_repository::timeline::save_metadata; +pub use crate::layered_repository::metadata::save_metadata; // re-export for use in walreceiver pub use crate::layered_repository::timeline::WalReceiverInfo; @@ -185,7 +185,7 @@ impl Repository { crashsafe_dir::create_dir_all(timeline_path)?; let metadata = TimelineMetadata::new(Lsn(0), None, None, Lsn(0), initdb_lsn, initdb_lsn); - timeline::save_metadata(self.conf, timeline_id, self.tenant_id, &metadata, true)?; + save_metadata(self.conf, timeline_id, self.tenant_id, &metadata, true)?; let timeline = Timeline::new( self.conf, @@ -294,7 +294,7 @@ impl Repository { src_timeline.initdb_lsn, ); crashsafe_dir::create_dir_all(self.conf.timeline_path(&dst, &self.tenant_id))?; - timeline::save_metadata(self.conf, dst, self.tenant_id, &metadata, true)?; + save_metadata(self.conf, dst, self.tenant_id, &metadata, true)?; timelines.insert(dst, LayeredTimelineEntry::Unloaded { id: dst, metadata }); info!("branched timeline {} from {} at {}", dst, src, start_lsn); diff --git a/pageserver/src/layered_repository/metadata.rs b/pageserver/src/layered_repository/metadata.rs index f3ddd42e76..910dba4644 100644 --- a/pageserver/src/layered_repository/metadata.rs +++ b/pageserver/src/layered_repository/metadata.rs @@ -6,10 +6,13 @@ //! //! The module contains all structs and related helper methods related to timeline metadata. +use std::fs::{File, OpenOptions}; +use std::io::Write; use std::path::PathBuf; -use anyhow::ensure; +use anyhow::{bail, ensure, Context}; use serde::{Deserialize, Serialize}; +use tracing::info_span; use utils::{ bin_ser::BeSer, lsn::Lsn, @@ -17,6 +20,7 @@ use utils::{ }; use crate::config::PageServerConf; +use crate::virtual_file::VirtualFile; use crate::STORAGE_FORMAT_VERSION; /// We assume that a write of up to METADATA_MAX_SIZE bytes is atomic. @@ -65,17 +69,6 @@ struct TimelineMetadataBody { initdb_lsn: Lsn, } -/// Points to a place in pageserver's local directory, -/// where certain timeline's metadata file should be located. -pub fn metadata_path( - conf: &'static PageServerConf, - timelineid: ZTimelineId, - tenantid: ZTenantId, -) -> PathBuf { - conf.timeline_path(&timelineid, &tenantid) - .join(METADATA_FILE_NAME) -} - impl TimelineMetadata { pub fn new( disk_consistent_lsn: Lsn, @@ -173,6 +166,53 @@ impl TimelineMetadata { } } +/// Points to a place in pageserver's local directory, +/// where certain timeline's metadata file should be located. +pub fn metadata_path( + conf: &'static PageServerConf, + timelineid: ZTimelineId, + tenantid: ZTenantId, +) -> PathBuf { + conf.timeline_path(&timelineid, &tenantid) + .join(METADATA_FILE_NAME) +} + +/// Save timeline metadata to file +pub fn save_metadata( + conf: &'static PageServerConf, + timelineid: ZTimelineId, + tenantid: ZTenantId, + data: &TimelineMetadata, + first_save: bool, +) -> anyhow::Result<()> { + let _enter = info_span!("saving metadata").entered(); + let path = metadata_path(conf, timelineid, tenantid); + // use OpenOptions to ensure file presence is consistent with first_save + let mut file = VirtualFile::open_with_options( + &path, + OpenOptions::new().write(true).create_new(first_save), + )?; + + let metadata_bytes = data.to_bytes().context("Failed to get metadata bytes")?; + + if file.write(&metadata_bytes)? != metadata_bytes.len() { + bail!("Could not write all the metadata bytes in a single call"); + } + file.sync_all()?; + + // fsync the parent directory to ensure the directory entry is durable + if first_save { + let timeline_dir = File::open( + &path + .parent() + .expect("Metadata should always have a parent dir"), + )?; + timeline_dir.sync_all()?; + } + + Ok(()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/pageserver/src/layered_repository/timeline.rs b/pageserver/src/layered_repository/timeline.rs index ecf9a87500..5f3d669dc1 100644 --- a/pageserver/src/layered_repository/timeline.rs +++ b/pageserver/src/layered_repository/timeline.rs @@ -11,8 +11,6 @@ use tracing::*; use std::cmp::{max, min, Ordering}; use std::collections::{HashMap, HashSet}; use std::fs; -use std::fs::{File, OpenOptions}; -use std::io::Write; use std::ops::{Deref, Range}; use std::path::PathBuf; use std::sync::atomic::{self, AtomicBool, AtomicI64, Ordering as AtomicOrdering}; @@ -32,7 +30,7 @@ use crate::layered_repository::{ image_layer::{ImageLayer, ImageLayerWriter}, inmemory_layer::InMemoryLayer, layer_map::{LayerMap, SearchResult}, - metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME}, + metadata::{save_metadata, TimelineMetadata, METADATA_FILE_NAME}, par_fsync, storage_layer::{Layer, ValueReconstructResult, ValueReconstructState}, }; @@ -54,7 +52,6 @@ use utils::{ use crate::repository::{GcResult, RepositoryTimeline}; use crate::repository::{Key, Value}; use crate::thread_mgr; -use crate::virtual_file::VirtualFile; use crate::walreceiver::IS_WAL_RECEIVER; use crate::walredo::WalRedoManager; use crate::CheckpointConfig; @@ -2342,39 +2339,3 @@ fn rename_to_backup(path: PathBuf) -> anyhow::Result<()> { bail!("couldn't find an unused backup number for {:?}", path) } - -/// Save timeline metadata to file -pub fn save_metadata( - conf: &'static PageServerConf, - timelineid: ZTimelineId, - tenantid: ZTenantId, - data: &TimelineMetadata, - first_save: bool, -) -> Result<()> { - let _enter = info_span!("saving metadata").entered(); - let path = metadata_path(conf, timelineid, tenantid); - // use OpenOptions to ensure file presence is consistent with first_save - let mut file = VirtualFile::open_with_options( - &path, - OpenOptions::new().write(true).create_new(first_save), - )?; - - let metadata_bytes = data.to_bytes().context("Failed to get metadata bytes")?; - - if file.write(&metadata_bytes)? != metadata_bytes.len() { - bail!("Could not write all the metadata bytes in a single call"); - } - file.sync_all()?; - - // fsync the parent directory to ensure the directory entry is durable - if first_save { - let timeline_dir = File::open( - &path - .parent() - .expect("Metadata should always have a parent dir"), - )?; - timeline_dir.sync_all()?; - } - - Ok(()) -} From bfa1d916124962f0079c25ab7c17fd6fb5d698a1 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 29 Aug 2022 11:23:37 +0300 Subject: [PATCH 51/63] Introduce RCU, and use it to protect latest_gc_cutoff_lsn. `latest_gc_cutoff_lsn` tracks the cutoff point where GC has been performed. Anything older than the cutoff might already have been GC'd away, and cannot be queried by get_page_at_lsn requests. It's protected by an RWLock. Whenever a get_page_at_lsn requests comes in, it first grabs the lock and reads the current `latest_gc_cutoff`, and holds the lock it until the request has been served. The lock ensures that GC doesn't start concurrently and remove page versions that we still need to satisfy the request. With the lock, get_page_at_lsn request could potentially be blocked for a long time. GC only holds the lock in exclusive mode for a short duration, but depending on how whether the RWLock is "fair", a read request might be queued behind the GC's exclusive request, which in turn might be queued behind a long-running read operation, like a basebackup. If the lock implementation is not fair, i.e. if a reader can always jump the queue if the lock is already held in read mode, then another problem arises: GC might be starved if a constant stream of GetPage requests comes in. To avoid the long wait or starvation, introduce a Read-Copy-Update mechanism to replace the lock on `latest_gc_cutoff_lsn`. With the RCU, reader can always read the latest value without blocking (except for a very short duration if the lock protecting the RCU is contended; that's comparable to a spinlock). And a writer can always write a new value without waiting for readers to finish using the old value. The old readers will continue to see the old value through their guard object, while new readers will see the new value. This is purely theoretical ATM, we don't have any reports of either starvation or blocking behind GC happening in practice. But it's simple to fix, so let's nip that problem in the bud. --- libs/utils/src/lib.rs | 3 + libs/utils/src/simple_rcu.rs | 217 ++++++++++++++++++ pageserver/src/layered_repository.rs | 5 +- pageserver/src/layered_repository/timeline.rs | 33 ++- pageserver/src/page_service.rs | 5 +- 5 files changed, 249 insertions(+), 14 deletions(-) create mode 100644 libs/utils/src/simple_rcu.rs diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index 1b011bb73a..fa7a37adf1 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -8,6 +8,9 @@ pub mod lsn; /// SeqWait allows waiting for a future sequence number to arrive pub mod seqwait; +/// A simple Read-Copy-Update implementation. +pub mod simple_rcu; + /// append only ordered map implemented with a Vec pub mod vec_map; diff --git a/libs/utils/src/simple_rcu.rs b/libs/utils/src/simple_rcu.rs new file mode 100644 index 0000000000..24423815ab --- /dev/null +++ b/libs/utils/src/simple_rcu.rs @@ -0,0 +1,217 @@ +//! +//! RCU stands for Read-Copy-Update. It's a synchronization mechanism somewhat +//! similar to a lock, but it allows readers to "hold on" to an old value of RCU +//! without blocking writers, and allows writing a new values without blocking +//! readers. When you update the new value, the new value is immediately visible +//! to new readers, but the update waits until all existing readers have +//! finishe, so that no one sees the old value anymore. +//! +//! This implementation isn't wait-free; it uses an RwLock that is held for a +//! short duration when the value is read or updated. +//! +#![warn(missing_docs)] + +use std::ops::Deref; +use std::sync::mpsc::{sync_channel, Receiver, SyncSender}; +use std::sync::{Arc, Weak}; +use std::sync::{Mutex, RwLock, RwLockWriteGuard}; + +/// +/// Rcu allows multiple readers to read and hold onto a value without blocking +/// (for very long). Storing to the Rcu updates the value, making new readers +/// immediately see the new value, but it also waits for all current readers to +/// finish. +/// +pub struct Rcu { + inner: RwLock>, +} + +struct RcuInner { + current_cell: Arc>, + old_cells: Vec>>, +} + +/// +/// RcuCell holds one value. It can be the latest one, or an old one. +/// +struct RcuCell { + value: V, + + /// A dummy channel. We never send anything to this channel. The point is + /// that when the RcuCell is dropped, any cloned Senders will be notified + /// that the channel is closed. Updaters can use this to wait out until the + /// RcuCell has been dropped, i.e. until the old value is no longer in use. + /// + /// We never do anything with the receiver, we just need to hold onto it so + /// that the Senders will be notified when it's dropped. But because it's + /// not Sync, we need a Mutex on it. + watch: (SyncSender<()>, Mutex>), +} + +impl RcuCell { + fn new(value: V) -> Self { + let (watch_sender, watch_receiver) = sync_channel(0); + RcuCell { + value, + watch: (watch_sender, Mutex::new(watch_receiver)), + } + } +} + +impl Rcu { + /// Create a new `Rcu`, initialized to `starting_val` + pub fn new(starting_val: V) -> Self { + let inner = RcuInner { + current_cell: Arc::new(RcuCell::new(starting_val)), + old_cells: Vec::new(), + }; + Self { + inner: RwLock::new(inner), + } + } + + /// + /// Read current value. Any store() calls will block until the returned + /// guard object is dropped. + /// + pub fn read(&self) -> RcuReadGuard { + let current_cell = Arc::clone(&self.inner.read().unwrap().current_cell); + RcuReadGuard { cell: current_cell } + } + + /// + /// Lock the current value for updating. Returns a guard object that can be + /// used to read the current value, and to store a new value. + /// + /// Note: holding the write-guard blocks concurrent readers, so you should + /// finish the update and drop the guard quickly! + /// + pub fn write(&self) -> RcuWriteGuard<'_, V> { + let inner = self.inner.write().unwrap(); + RcuWriteGuard { inner } + } +} + +/// +/// Read guard returned by `read` +/// +pub struct RcuReadGuard { + cell: Arc>, +} + +impl Deref for RcuReadGuard { + type Target = V; + + fn deref(&self) -> &V { + &self.cell.value + } +} + +/// +/// Read guard returned by `read` +/// +pub struct RcuWriteGuard<'a, V> { + inner: RwLockWriteGuard<'a, RcuInner>, +} + +impl<'a, V> Deref for RcuWriteGuard<'a, V> { + type Target = V; + + fn deref(&self) -> &V { + &self.inner.current_cell.value + } +} + +impl<'a, V> RcuWriteGuard<'a, V> { + /// + /// Store a new value. The new value will be written to the Rcu immediately, + /// and will be immediately seen by any `read` calls that start afterwards. + /// But if there are any readers still holding onto the old value, or any + /// even older values, this will await until they have been released. + /// + /// This will drop the write-guard before it starts waiting for the reads to + /// finish, so a new write operation can begin before this functio returns. + /// + pub fn store(mut self, new_val: V) { + let new_cell = Arc::new(RcuCell::new(new_val)); + + let mut watches = Vec::new(); + { + let old = std::mem::replace(&mut self.inner.current_cell, new_cell); + self.inner.old_cells.push(Arc::downgrade(&old)); + + // cleanup old cells that no longer have any readers, and collect + // the watches for any that do. + self.inner.old_cells.retain(|weak| { + if let Some(cell) = weak.upgrade() { + watches.push(cell.watch.0.clone()); + true + } else { + false + } + }); + } + drop(self); + + // after all the old_cells are no longer in use, we're done + for w in watches.iter_mut() { + // This will block until the Receiver is closed. That happens then + // the RcuCell is dropped. + #[allow(clippy::single_match)] + match w.send(()) { + Ok(_) => panic!("send() unexpectedly succeeded on dummy channel"), + Err(_) => { + // closed, which means that the cell has been dropped, and + // its value is no longer in use + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::{Arc, Mutex}; + use std::thread::{sleep, spawn}; + use std::time::Duration; + + #[test] + fn basic() { + let rcu = Arc::new(Rcu::new(1)); + let log = Arc::new(Mutex::new(Vec::new())); + + let a = rcu.read(); + assert_eq!(*a, 1); + log.lock().unwrap().push("one"); + + let (rcu_clone, log_clone) = (Arc::clone(&rcu), Arc::clone(&log)); + let thread = spawn(move || { + log_clone.lock().unwrap().push("store two start"); + let write_guard = rcu_clone.write(); + assert_eq!(*write_guard, 1); + write_guard.store(2); + log_clone.lock().unwrap().push("store two done"); + }); + // without this sleep the test can pass on accident if the writer is slow + sleep(Duration::from_secs(1)); + + // new read should see the new value + let b = rcu.read(); + assert_eq!(*b, 2); + + // old guard still sees the old value + assert_eq!(*a, 1); + + // Release the old guard. This lets the store in the thread to finish. + log.lock().unwrap().push("release a"); + drop(a); + + thread.join().unwrap(); + + assert_eq!( + log.lock().unwrap().as_slice(), + &["one", "store two start", "release a", "store two done",] + ); + } +} diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index 36b8e3eb9e..73c30b51b8 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -254,7 +254,8 @@ impl Repository { src_timeline .check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn) .context(format!( - "invalid branch start lsn: less than latest GC cutoff {latest_gc_cutoff_lsn}" + "invalid branch start lsn: less than latest GC cutoff {}", + *latest_gc_cutoff_lsn ))?; { let gc_info = src_timeline.gc_info.read().unwrap(); @@ -290,7 +291,7 @@ impl Repository { dst_prev, Some(src), start_lsn, - *src_timeline.latest_gc_cutoff_lsn.read().unwrap(), + *src_timeline.latest_gc_cutoff_lsn.read(), src_timeline.initdb_lsn, ); crashsafe_dir::create_dir_all(self.conf.timeline_path(&dst, &self.tenant_id))?; diff --git a/pageserver/src/layered_repository/timeline.rs b/pageserver/src/layered_repository/timeline.rs index 5f3d669dc1..1a941affe5 100644 --- a/pageserver/src/layered_repository/timeline.rs +++ b/pageserver/src/layered_repository/timeline.rs @@ -14,7 +14,7 @@ use std::fs; use std::ops::{Deref, Range}; use std::path::PathBuf; use std::sync::atomic::{self, AtomicBool, AtomicI64, Ordering as AtomicOrdering}; -use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockReadGuard, TryLockError}; +use std::sync::{Arc, Mutex, MutexGuard, RwLock, TryLockError}; use std::time::{Duration, Instant, SystemTime}; use metrics::{ @@ -46,6 +46,7 @@ use postgres_ffi::v14::xlog_utils::to_pg_timestamp; use utils::{ lsn::{AtomicLsn, Lsn, RecordLsn}, seqwait::SeqWait, + simple_rcu::{Rcu, RcuReadGuard}, zid::{ZTenantId, ZTimelineId}, }; @@ -367,7 +368,7 @@ pub struct Timeline { layer_removal_cs: Mutex<()>, // Needed to ensure that we can't create a branch at a point that was already garbage collected - pub latest_gc_cutoff_lsn: RwLock, + pub latest_gc_cutoff_lsn: Rcu, // List of child timelines and their branch points. This is needed to avoid // garbage collecting data that is still needed by the child timelines. @@ -478,8 +479,8 @@ impl Timeline { } /// Lock and get timeline's GC cuttof - pub fn get_latest_gc_cutoff_lsn(&self) -> RwLockReadGuard { - self.latest_gc_cutoff_lsn.read().unwrap() + pub fn get_latest_gc_cutoff_lsn(&self) -> RcuReadGuard { + self.latest_gc_cutoff_lsn.read() } /// Look up given page version. @@ -594,7 +595,7 @@ impl Timeline { pub fn check_lsn_is_in_scope( &self, lsn: Lsn, - latest_gc_cutoff_lsn: &RwLockReadGuard, + latest_gc_cutoff_lsn: &RcuReadGuard, ) -> Result<()> { ensure!( lsn >= **latest_gc_cutoff_lsn, @@ -729,7 +730,7 @@ impl Timeline { pitr_cutoff: Lsn(0), }), - latest_gc_cutoff_lsn: RwLock::new(metadata.latest_gc_cutoff_lsn()), + latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()), initdb_lsn: metadata.initdb_lsn(), current_logical_size: AtomicI64::new(0), @@ -1377,7 +1378,7 @@ impl Timeline { ondisk_prev_record_lsn, ancestor_timelineid, self.ancestor_lsn, - *self.latest_gc_cutoff_lsn.read().unwrap(), + *self.latest_gc_cutoff_lsn.read(), self.initdb_lsn, ); @@ -2032,9 +2033,21 @@ impl Timeline { let _enter = info_span!("garbage collection", timeline = %self.timeline_id, tenant = %self.tenant_id, cutoff = %new_gc_cutoff).entered(); - // We need to ensure that no one branches at a point before latest_gc_cutoff_lsn. - // See branch_timeline() for details. - *self.latest_gc_cutoff_lsn.write().unwrap() = new_gc_cutoff; + // We need to ensure that no one tries to read page versions or create + // branches at a point before latest_gc_cutoff_lsn. See branch_timeline() + // for details. This will block until the old value is no longer in use. + // + // The GC cutoff should only ever move forwards. + { + let write_guard = self.latest_gc_cutoff_lsn.write(); + ensure!( + *write_guard <= new_gc_cutoff, + "Cannot move GC cutoff LSN backwards (was {}, new {})", + *write_guard, + new_gc_cutoff + ); + write_guard.store(new_gc_cutoff); + } info!("GC starting"); diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index ebcff1f2ac..fbc70f7690 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -17,13 +17,14 @@ use std::io::{self, Read}; use std::net::TcpListener; use std::str; use std::str::FromStr; -use std::sync::{Arc, RwLockReadGuard}; +use std::sync::Arc; use tracing::*; use utils::{ auth::{self, Claims, JwtAuth, Scope}, lsn::Lsn, postgres_backend::{self, is_socket_read_timed_out, AuthType, PostgresBackend}, pq_proto::{BeMessage, FeMessage, RowDescriptor, SINGLE_COL_ROWDESC}, + simple_rcu::RcuReadGuard, zid::{ZTenantId, ZTimelineId}, }; @@ -639,7 +640,7 @@ impl PageServerHandler { timeline: &Timeline, mut lsn: Lsn, latest: bool, - latest_gc_cutoff_lsn: &RwLockReadGuard, + latest_gc_cutoff_lsn: &RcuReadGuard, ) -> Result { if latest { // Latest page version was requested. If LSN is given, it is a hint From 1324dd89ed612e709fc8c84206d0a32936382789 Mon Sep 17 00:00:00 2001 From: MMeent Date: Mon, 29 Aug 2022 13:44:56 +0200 Subject: [PATCH 52/63] Mark PostGIS and PLV8 as trusted extensions (#2355) Now, users can install these extensions themselves if they are owner of the database they try to install the extension in. --- Dockerfile.compute-node | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 057441e730..950ec16016 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -33,7 +33,11 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.2.3.tar.gz && \ make -j $(getconf _NPROCESSORS_ONLN) install && \ cd extensions/postgis && \ make clean && \ - make -j $(getconf _NPROCESSORS_ONLN) install + make -j $(getconf _NPROCESSORS_ONLN) install && \ + echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \ + echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \ + echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \ + echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control # Build plv8 FROM build-deps AS plv8-build @@ -54,7 +58,8 @@ RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.3.tar.gz && \ export PATH="/usr/local/pgsql/bin:$PATH" && \ make && \ make install && \ - rm -rf /plv8-* + rm -rf /plv8-* && \ + echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control # compile neon extensions FROM build-deps AS neon-pg-ext-build From ee8b5f967dab4ffac35f8c920b4b2d37567c3105 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Mon, 29 Aug 2022 17:59:04 +0300 Subject: [PATCH 53/63] Add fork_at_current_lsn function which creates branch at current LSN (#2344) * Add fork_at_current_lsn function which creates branch at current LSN * Undo use of fork_at_current_lsn in test_branching because of short GC period * Add missed return in fork_at_current_lsn * Add missed return in fork_at_current_lsn * Update test_runner/fixtures/neon_fixtures.py Co-authored-by: Heikki Linnakangas * Update test_runner/fixtures/neon_fixtures.py Co-authored-by: Heikki Linnakangas * Update test_runner/fixtures/neon_fixtures.py Co-authored-by: Heikki Linnakangas Co-authored-by: Heikki Linnakangas --- test_runner/batch_others/test_twophase.py | 4 ++-- test_runner/batch_others/test_vm_bits.py | 4 ++-- test_runner/fixtures/neon_fixtures.py | 16 ++++++++++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/test_runner/batch_others/test_twophase.py b/test_runner/batch_others/test_twophase.py index e01ba7caef..f3b0f9ca06 100644 --- a/test_runner/batch_others/test_twophase.py +++ b/test_runner/batch_others/test_twophase.py @@ -1,7 +1,7 @@ import os from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv +from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn # @@ -55,7 +55,7 @@ def test_twophase(neon_simple_env: NeonEnv): assert len(twophase_files) == 2 # Create a branch with the transaction in prepared state - env.neon_cli.create_branch("test_twophase_prepared", "test_twophase") + fork_at_current_lsn(env, pg, "test_twophase_prepared", "test_twophase") # Start compute on the new branch pg2 = env.postgres.create_start( diff --git a/test_runner/batch_others/test_vm_bits.py b/test_runner/batch_others/test_vm_bits.py index c147c6dff5..16a870471b 100644 --- a/test_runner/batch_others/test_vm_bits.py +++ b/test_runner/batch_others/test_vm_bits.py @@ -1,5 +1,5 @@ from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv +from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn # @@ -33,7 +33,7 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv): cur.execute("UPDATE vmtest_update SET id = 5000 WHERE id = 1") # Branch at this point, to test that later - env.neon_cli.create_branch("test_vm_bit_clear_new", "test_vm_bit_clear") + fork_at_current_lsn(env, pg, "test_vm_bit_clear_new", "test_vm_bit_clear") # Clear the buffer cache, to force the VM page to be re-fetched from # the page server diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index ad686e1fce..32fd6f19c3 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -2640,3 +2640,19 @@ def wait_for_last_flush_lsn(env: NeonEnv, pg: Postgres, tenant: uuid.UUID, timel """Wait for pageserver to catch up the latest flush LSN""" last_flush_lsn = lsn_from_hex(pg.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn) + + +def fork_at_current_lsn( + env: NeonEnv, + pg: Postgres, + new_branch_name: str, + ancestor_branch_name: str, + tenant_id: Optional[uuid.UUID] = None, +) -> uuid.UUID: + """ + Create new branch at the last LSN of an existing branch. + The "last LSN" is taken from the given Postgres instance. The pageserver will wait for all the + the WAL up to that LSN to arrive in the pageserver before creating the branch. + """ + current_lsn = pg.safe_psql("SELECT pg_current_wal_lsn()")[0][0] + return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn) From 07b4ace52fd6097e74d982ba4dbd74dd28a4f8dc Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Sat, 27 Aug 2022 01:50:18 +0300 Subject: [PATCH 54/63] Use more restrictive .dockerignore --- .dockerignore | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/.dockerignore b/.dockerignore index 8a3d32e6d2..2c78951923 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,19 +1,18 @@ -**/.git/ -**/__pycache__ -**/.pytest_cache +* -.git -.github -target -tmp_check -tmp_install -tmp_check_cli -test_output -.vscode -.neon -integration_tests/.neon -.mypy_cache - -Dockerfile -.dockerignore +!Cargo.toml +!Cargo.lock +!Makefile +!.cargo/ +!.config/ +!control_plane/ +!compute_tools/ +!libs/ +!pageserver/ +!pgxn/ +!proxy/ +!safekeeper/ +!vendor/postgres/ +!workspace_hack/ +!neon_local/ From 60408db101b2ddcf877759405acb6ab6f6af7505 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Tue, 30 Aug 2022 10:52:58 +0300 Subject: [PATCH 55/63] Fix logging scopes in safekeeper. --- safekeeper/src/timeline.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index 3a10c5d59e..f482dbb3aa 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -529,7 +529,7 @@ impl Timeline { // release the lock before removing } let _enter = - info_span!("", timeline = %self.zttid.tenant_id, tenant = %self.zttid.timeline_id) + info_span!("", tenant = %self.zttid.tenant_id, timeline = %self.zttid.timeline_id) .entered(); remover(horizon_segno - 1)?; self.mutex.lock().unwrap().last_removed_segno = horizon_segno; @@ -626,7 +626,7 @@ impl GlobalTimelines { zttid: ZTenantTimelineId, create: bool, ) -> Result> { - let _enter = info_span!("", timeline = %zttid.tenant_id).entered(); + let _enter = info_span!("", timeline = %zttid.timeline_id).entered(); let mut state = TIMELINES_STATE.lock().unwrap(); From 96a50e99cf1b6800207570962e206a65db8215de Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 30 Aug 2022 17:36:21 +0300 Subject: [PATCH 56/63] Forward various connection params to compute nodes. (#2336) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, proxy didn't forward auxiliary `options` parameter and other ones to the client's compute node, e.g. ``` $ psql "user=john host=localhost dbname=postgres options='-cgeqo=off'" postgres=# show geqo; ┌──────┐ │ geqo │ ├──────┤ │ on │ └──────┘ (1 row) ``` With this patch we now forward `options`, `application_name` and `replication`. Further reading: https://www.postgresql.org/docs/current/libpq-connect.html Fixes #1287. --- Cargo.lock | 1 + libs/utils/src/pq_proto.rs | 157 +++++++++++++++++------ proxy/Cargo.toml | 1 + proxy/src/auth/backend.rs | 4 +- proxy/src/auth/backend/console.rs | 8 +- proxy/src/auth/backend/legacy_console.rs | 12 +- proxy/src/auth/backend/postgres.rs | 6 +- proxy/src/auth/credentials.rs | 85 ++++++------ proxy/src/cancellation.rs | 2 +- proxy/src/compute.rs | 39 +++++- proxy/src/proxy.rs | 52 +++++--- safekeeper/src/handler.rs | 25 ++-- test_runner/batch_others/test_proxy.py | 6 +- 13 files changed, 271 insertions(+), 127 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 603e034ed3..2e300e46f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2271,6 +2271,7 @@ dependencies = [ "hex", "hmac 0.12.1", "hyper", + "itertools", "md5", "metrics", "once_cell", diff --git a/libs/utils/src/pq_proto.rs b/libs/utils/src/pq_proto.rs index 2f8dcf31d3..dde76039d7 100644 --- a/libs/utils/src/pq_proto.rs +++ b/libs/utils/src/pq_proto.rs @@ -7,11 +7,14 @@ use anyhow::{bail, ensure, Context, Result}; use bytes::{Buf, BufMut, Bytes, BytesMut}; use postgres_protocol::PG_EPOCH; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::future::Future; -use std::io::{self, Cursor}; -use std::str; -use std::time::{Duration, SystemTime}; +use std::{ + borrow::Cow, + collections::HashMap, + future::Future, + io::{self, Cursor}, + str, + time::{Duration, SystemTime}, +}; use tokio::io::AsyncReadExt; use tracing::{trace, warn}; @@ -53,7 +56,67 @@ pub enum FeStartupPacket { }, } -pub type StartupMessageParams = HashMap; +#[derive(Debug)] +pub struct StartupMessageParams { + params: HashMap, +} + +impl StartupMessageParams { + /// Get parameter's value by its name. + pub fn get(&self, name: &str) -> Option<&str> { + self.params.get(name).map(|s| s.as_str()) + } + + /// Split command-line options according to PostgreSQL's logic, + /// taking into account all escape sequences but leaving them as-is. + /// [`None`] means that there's no `options` in [`Self`]. + pub fn options_raw(&self) -> Option> { + // See `postgres: pg_split_opts`. + let mut last_was_escape = false; + let iter = self + .get("options")? + .split(move |c: char| { + // We split by non-escaped whitespace symbols. + let should_split = c.is_ascii_whitespace() && !last_was_escape; + last_was_escape = c == '\\' && !last_was_escape; + should_split + }) + .filter(|s| !s.is_empty()); + + Some(iter) + } + + /// Split command-line options according to PostgreSQL's logic, + /// applying all escape sequences (using owned strings as needed). + /// [`None`] means that there's no `options` in [`Self`]. + pub fn options_escaped(&self) -> Option>> { + // See `postgres: pg_split_opts`. + let iter = self.options_raw()?.map(|s| { + let mut preserve_next_escape = false; + let escape = |c| { + // We should remove '\\' unless it's preceded by '\\'. + let should_remove = c == '\\' && !preserve_next_escape; + preserve_next_escape = should_remove; + should_remove + }; + + match s.contains('\\') { + true => Cow::Owned(s.replace(escape, "")), + false => Cow::Borrowed(s), + } + }); + + Some(iter) + } + + // This function is mostly useful in tests. + #[doc(hidden)] + pub fn new<'a, const N: usize>(pairs: [(&'a str, &'a str); N]) -> Self { + Self { + params: pairs.map(|(k, v)| (k.to_owned(), v.to_owned())).into(), + } + } +} #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] pub struct CancelKeyData { @@ -237,9 +300,9 @@ impl FeStartupPacket { stream.read_exact(params_bytes.as_mut()).await?; // Parse params depending on request code - let most_sig_16_bits = request_code >> 16; - let least_sig_16_bits = request_code & ((1 << 16) - 1); - let message = match (most_sig_16_bits, least_sig_16_bits) { + let req_hi = request_code >> 16; + let req_lo = request_code & ((1 << 16) - 1); + let message = match (req_hi, req_lo) { (RESERVED_INVALID_MAJOR_VERSION, CANCEL_REQUEST_CODE) => { ensure!(params_len == 8, "expected 8 bytes for CancelRequest params"); let mut cursor = Cursor::new(params_bytes); @@ -248,49 +311,44 @@ impl FeStartupPacket { cancel_key: cursor.read_i32().await?, }) } - (RESERVED_INVALID_MAJOR_VERSION, NEGOTIATE_SSL_CODE) => FeStartupPacket::SslRequest, + (RESERVED_INVALID_MAJOR_VERSION, NEGOTIATE_SSL_CODE) => { + // Requested upgrade to SSL (aka TLS) + FeStartupPacket::SslRequest + } (RESERVED_INVALID_MAJOR_VERSION, NEGOTIATE_GSS_CODE) => { + // Requested upgrade to GSSAPI FeStartupPacket::GssEncRequest } (RESERVED_INVALID_MAJOR_VERSION, unrecognized_code) => { bail!("Unrecognized request code {}", unrecognized_code) } + // TODO bail if protocol major_version is not 3? (major_version, minor_version) => { - // TODO bail if protocol major_version is not 3? - // Parse null-terminated (String) pairs of param name / param value - let params_str = str::from_utf8(¶ms_bytes).unwrap(); - let mut params_tokens = params_str.split('\0'); - let mut params: HashMap = HashMap::new(); - while let Some(name) = params_tokens.next() { - let value = params_tokens + // Parse pairs of null-terminated strings (key, value). + // See `postgres: ProcessStartupPacket, build_startup_packet`. + let mut tokens = str::from_utf8(¶ms_bytes) + .context("StartupMessage params: invalid utf-8")? + .strip_suffix('\0') // drop packet's own null terminator + .context("StartupMessage params: missing null terminator")? + .split_terminator('\0'); + + let mut params = HashMap::new(); + while let Some(name) = tokens.next() { + let value = tokens .next() - .context("expected even number of params in StartupMessage")?; - if name == "options" { - // parsing options arguments "...&options=%3D+=..." - // '%3D' is '=' and '+' is ' ' + .context("StartupMessage params: key without value")?; - // Note: we allow users that don't have SNI capabilities, - // to pass a special keyword argument 'project' - // to be used to determine the cluster name by the proxy. - - //TODO: write unit test for this and refactor in its own function. - for cmdopt in value.split(' ') { - let nameval: Vec<&str> = cmdopt.split('=').collect(); - if nameval.len() == 2 { - params.insert(nameval[0].to_string(), nameval[1].to_string()); - } - } - } else { - params.insert(name.to_string(), value.to_string()); - } + params.insert(name.to_owned(), value.to_owned()); } + FeStartupPacket::StartupMessage { major_version, minor_version, - params, + params: StartupMessageParams { params }, } } }; + Ok(Some(FeMessage::StartupPacket(message))) }) } @@ -967,6 +1025,33 @@ mod tests { assert_eq!(zf, zf_parsed); } + #[test] + fn test_startup_message_params_options_escaped() { + fn split_options(params: &StartupMessageParams) -> Vec> { + params + .options_escaped() + .expect("options are None") + .collect() + } + + let make_params = |options| StartupMessageParams::new([("options", options)]); + + let params = StartupMessageParams::new([]); + assert!(matches!(params.options_escaped(), None)); + + let params = make_params(""); + assert!(split_options(¶ms).is_empty()); + + let params = make_params("foo"); + assert_eq!(split_options(¶ms), ["foo"]); + + let params = make_params(" foo bar "); + assert_eq!(split_options(¶ms), ["foo", "bar"]); + + let params = make_params("foo\\ bar \\ \\\\ baz\\ lol"); + assert_eq!(split_options(¶ms), ["foo bar", " \\", "baz ", "lol"]); + } + // Make sure that `read` is sync/async callable async fn _assert(stream: &mut (impl tokio::io::AsyncRead + Unpin)) { let _ = FeMessage::read(&mut [].as_ref()); diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml index d3f7ea5fdc..5a450793f1 100644 --- a/proxy/Cargo.toml +++ b/proxy/Cargo.toml @@ -15,6 +15,7 @@ hashbrown = "0.12" hex = "0.4.3" hmac = "0.12.1" hyper = "0.14" +itertools = "0.10.3" once_cell = "1.13.0" md5 = "0.7.0" parking_lot = "0.12" diff --git a/proxy/src/auth/backend.rs b/proxy/src/auth/backend.rs index bb7e7ef67b..9c43620ffb 100644 --- a/proxy/src/auth/backend.rs +++ b/proxy/src/auth/backend.rs @@ -127,7 +127,7 @@ impl BackendType> { } } -impl BackendType { +impl BackendType> { /// Authenticate the client via the requested backend, possibly using credentials. pub async fn authenticate( mut self, @@ -149,7 +149,7 @@ impl BackendType { // Finally we may finish the initialization of `creds`. // TODO: add missing type safety to ClientCredentials. - creds.project = Some(payload.project); + creds.project = Some(payload.project.into()); let mut config = match &self { Console(creds) => { diff --git a/proxy/src/auth/backend/console.rs b/proxy/src/auth/backend/console.rs index 87906679ea..e239320e9b 100644 --- a/proxy/src/auth/backend/console.rs +++ b/proxy/src/auth/backend/console.rs @@ -121,7 +121,7 @@ pub enum AuthInfo { #[must_use] pub(super) struct Api<'a> { endpoint: &'a ApiUrl, - creds: &'a ClientCredentials, + creds: &'a ClientCredentials<'a>, } impl<'a> Api<'a> { @@ -143,7 +143,7 @@ impl<'a> Api<'a> { url.path_segments_mut().push("proxy_get_role_secret"); url.query_pairs_mut() .append_pair("project", self.creds.project().expect("impossible")) - .append_pair("role", &self.creds.user); + .append_pair("role", self.creds.user); // TODO: use a proper logger println!("cplane request: {url}"); @@ -187,8 +187,8 @@ impl<'a> Api<'a> { config .host(host) .port(port) - .dbname(&self.creds.dbname) - .user(&self.creds.user); + .dbname(self.creds.dbname) + .user(self.creds.user); Ok(config) } diff --git a/proxy/src/auth/backend/legacy_console.rs b/proxy/src/auth/backend/legacy_console.rs index 17ba44e833..b99a004dcd 100644 --- a/proxy/src/auth/backend/legacy_console.rs +++ b/proxy/src/auth/backend/legacy_console.rs @@ -56,7 +56,7 @@ enum ProxyAuthResponse { NotReady { ready: bool }, // TODO: get rid of `ready` } -impl ClientCredentials { +impl ClientCredentials<'_> { fn is_existing_user(&self) -> bool { self.user.ends_with("@zenith") } @@ -64,15 +64,15 @@ impl ClientCredentials { async fn authenticate_proxy_client( auth_endpoint: &reqwest::Url, - creds: &ClientCredentials, + creds: &ClientCredentials<'_>, md5_response: &str, salt: &[u8; 4], psql_session_id: &str, ) -> Result { let mut url = auth_endpoint.clone(); url.query_pairs_mut() - .append_pair("login", &creds.user) - .append_pair("database", &creds.dbname) + .append_pair("login", creds.user) + .append_pair("database", creds.dbname) .append_pair("md5response", md5_response) .append_pair("salt", &hex::encode(salt)) .append_pair("psql_session_id", psql_session_id); @@ -103,7 +103,7 @@ async fn authenticate_proxy_client( async fn handle_existing_user( auth_endpoint: &reqwest::Url, client: &mut PqStream, - creds: &ClientCredentials, + creds: &ClientCredentials<'_>, ) -> auth::Result { let psql_session_id = super::link::new_psql_session_id(); let md5_salt = rand::random(); @@ -136,7 +136,7 @@ async fn handle_existing_user( pub async fn handle_user( auth_endpoint: &reqwest::Url, auth_link_uri: &reqwest::Url, - creds: &ClientCredentials, + creds: &ClientCredentials<'_>, client: &mut PqStream, ) -> auth::Result { if creds.is_existing_user() { diff --git a/proxy/src/auth/backend/postgres.rs b/proxy/src/auth/backend/postgres.rs index 183fa52ec1..2055ee14c8 100644 --- a/proxy/src/auth/backend/postgres.rs +++ b/proxy/src/auth/backend/postgres.rs @@ -17,7 +17,7 @@ use tokio::io::{AsyncRead, AsyncWrite}; #[must_use] pub(super) struct Api<'a> { endpoint: &'a ApiUrl, - creds: &'a ClientCredentials, + creds: &'a ClientCredentials<'a>, } // Helps eliminate graceless `.map_err` calls without introducing another ctor. @@ -87,8 +87,8 @@ impl<'a> Api<'a> { config .host(self.endpoint.host_str().unwrap_or("localhost")) .port(self.endpoint.port().unwrap_or(5432)) - .dbname(&self.creds.dbname) - .user(&self.creds.user); + .dbname(self.creds.dbname) + .user(self.creds.user); Ok(config) } diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs index 4c72da1c48..ea71eba010 100644 --- a/proxy/src/auth/credentials.rs +++ b/proxy/src/auth/credentials.rs @@ -1,6 +1,7 @@ //! User credentials used in authentication. use crate::error::UserFacingError; +use std::borrow::Cow; use thiserror::Error; use utils::pq_proto::StartupMessageParams; @@ -27,51 +28,59 @@ impl UserFacingError for ClientCredsParseError {} /// Various client credentials which we use for authentication. /// Note that we don't store any kind of client key or password here. #[derive(Debug, Clone, PartialEq, Eq)] -pub struct ClientCredentials { - pub user: String, - pub dbname: String, - pub project: Option, +pub struct ClientCredentials<'a> { + pub user: &'a str, + pub dbname: &'a str, + pub project: Option>, } -impl ClientCredentials { +impl ClientCredentials<'_> { pub fn project(&self) -> Option<&str> { self.project.as_deref() } } -impl ClientCredentials { +impl<'a> ClientCredentials<'a> { pub fn parse( - mut options: StartupMessageParams, + params: &'a StartupMessageParams, sni: Option<&str>, common_name: Option<&str>, ) -> Result { use ClientCredsParseError::*; - // Some parameters are absolutely necessary, others not so much. - let mut get_param = |key| options.remove(key).ok_or(MissingKey(key)); - // Some parameters are stored in the startup message. + let get_param = |key| params.get(key).ok_or(MissingKey(key)); let user = get_param("user")?; let dbname = get_param("database")?; - let project_a = get_param("project").ok(); + + // Project name might be passed via PG's command-line options. + let project_a = params.options_raw().and_then(|options| { + for opt in options { + if let Some(value) = opt.strip_prefix("project=") { + return Some(Cow::Borrowed(value)); + } + } + None + }); // Alternative project name is in fact a subdomain from SNI. // NOTE: we do not consider SNI if `common_name` is missing. let project_b = sni .zip(common_name) .map(|(sni, cn)| { - // TODO: what if SNI is present but just a common name? subdomain_from_sni(sni, cn) - .ok_or_else(|| InconsistentSni(sni.to_owned(), cn.to_owned())) + .ok_or_else(|| InconsistentSni(sni.into(), cn.into())) + .map(Cow::<'static, str>::Owned) }) .transpose()?; let project = match (project_a, project_b) { // Invariant: if we have both project name variants, they should match. - (Some(a), Some(b)) if a != b => Some(Err(InconsistentProjectNames(a, b))), - (a, b) => a.or(b).map(|name| { - // Invariant: project name may not contain certain characters. - check_project_name(name).map_err(MalformedProjectName) + (Some(a), Some(b)) if a != b => Some(Err(InconsistentProjectNames(a.into(), b.into()))), + // Invariant: project name may not contain certain characters. + (a, b) => a.or(b).map(|name| match project_name_valid(&name) { + false => Err(MalformedProjectName(name.into())), + true => Ok(name), }), } .transpose()?; @@ -84,12 +93,8 @@ impl ClientCredentials { } } -fn check_project_name(name: String) -> Result { - if name.chars().all(|c| c.is_alphanumeric() || c == '-') { - Ok(name) - } else { - Err(name) - } +fn project_name_valid(name: &str) -> bool { + name.chars().all(|c| c.is_alphanumeric() || c == '-') } fn subdomain_from_sni(sni: &str, common_name: &str) -> Option { @@ -102,18 +107,14 @@ fn subdomain_from_sni(sni: &str, common_name: &str) -> Option { mod tests { use super::*; - fn make_options<'a, const N: usize>(pairs: [(&'a str, &'a str); N]) -> StartupMessageParams { - StartupMessageParams::from(pairs.map(|(k, v)| (k.to_owned(), v.to_owned()))) - } - #[test] #[ignore = "TODO: fix how database is handled"] fn parse_bare_minimum() -> anyhow::Result<()> { // According to postgresql, only `user` should be required. - let options = make_options([("user", "john_doe")]); + let options = StartupMessageParams::new([("user", "john_doe")]); // TODO: check that `creds.dbname` is None. - let creds = ClientCredentials::parse(options, None, None)?; + let creds = ClientCredentials::parse(&options, None, None)?; assert_eq!(creds.user, "john_doe"); Ok(()) @@ -121,9 +122,9 @@ mod tests { #[test] fn parse_missing_project() -> anyhow::Result<()> { - let options = make_options([("user", "john_doe"), ("database", "world")]); + let options = StartupMessageParams::new([("user", "john_doe"), ("database", "world")]); - let creds = ClientCredentials::parse(options, None, None)?; + let creds = ClientCredentials::parse(&options, None, None)?; assert_eq!(creds.user, "john_doe"); assert_eq!(creds.dbname, "world"); assert_eq!(creds.project, None); @@ -133,12 +134,12 @@ mod tests { #[test] fn parse_project_from_sni() -> anyhow::Result<()> { - let options = make_options([("user", "john_doe"), ("database", "world")]); + let options = StartupMessageParams::new([("user", "john_doe"), ("database", "world")]); let sni = Some("foo.localhost"); let common_name = Some("localhost"); - let creds = ClientCredentials::parse(options, sni, common_name)?; + let creds = ClientCredentials::parse(&options, sni, common_name)?; assert_eq!(creds.user, "john_doe"); assert_eq!(creds.dbname, "world"); assert_eq!(creds.project.as_deref(), Some("foo")); @@ -148,13 +149,13 @@ mod tests { #[test] fn parse_project_from_options() -> anyhow::Result<()> { - let options = make_options([ + let options = StartupMessageParams::new([ ("user", "john_doe"), ("database", "world"), - ("project", "bar"), + ("options", "-ckey=1 project=bar -c geqo=off"), ]); - let creds = ClientCredentials::parse(options, None, None)?; + let creds = ClientCredentials::parse(&options, None, None)?; assert_eq!(creds.user, "john_doe"); assert_eq!(creds.dbname, "world"); assert_eq!(creds.project.as_deref(), Some("bar")); @@ -164,16 +165,16 @@ mod tests { #[test] fn parse_projects_identical() -> anyhow::Result<()> { - let options = make_options([ + let options = StartupMessageParams::new([ ("user", "john_doe"), ("database", "world"), - ("project", "baz"), + ("options", "project=baz"), ]); let sni = Some("baz.localhost"); let common_name = Some("localhost"); - let creds = ClientCredentials::parse(options, sni, common_name)?; + let creds = ClientCredentials::parse(&options, sni, common_name)?; assert_eq!(creds.user, "john_doe"); assert_eq!(creds.dbname, "world"); assert_eq!(creds.project.as_deref(), Some("baz")); @@ -183,17 +184,17 @@ mod tests { #[test] fn parse_projects_different() { - let options = make_options([ + let options = StartupMessageParams::new([ ("user", "john_doe"), ("database", "world"), - ("project", "first"), + ("options", "project=first"), ]); let sni = Some("second.localhost"); let common_name = Some("localhost"); assert!(matches!( - ClientCredentials::parse(options, sni, common_name).expect_err("should fail"), + ClientCredentials::parse(&options, sni, common_name).expect_err("should fail"), ClientCredsParseError::InconsistentProjectNames(_, _) )); } diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index a801313635..b7412b6f5b 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -95,7 +95,7 @@ impl<'a> Session<'a> { /// Store the cancel token for the given session. /// This enables query cancellation in [`crate::proxy::handshake`]. - pub fn enable_cancellation(self, cancel_closure: CancelClosure) -> CancelKeyData { + pub fn enable_query_cancellation(self, cancel_closure: CancelClosure) -> CancelKeyData { self.cancel_map .0 .lock() diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index 3bad36661b..4ae44ded57 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -1,9 +1,11 @@ use crate::{cancellation::CancelClosure, error::UserFacingError}; use futures::TryFutureExt; +use itertools::Itertools; use std::{io, net::SocketAddr}; use thiserror::Error; use tokio::net::TcpStream; use tokio_postgres::NoTls; +use utils::pq_proto::StartupMessageParams; #[derive(Debug, Error)] pub enum ConnectionError { @@ -110,7 +112,42 @@ pub struct PostgresConnection { impl NodeInfo { /// Connect to a corresponding compute node. - pub async fn connect(&self) -> Result<(PostgresConnection, CancelClosure), ConnectionError> { + pub async fn connect( + mut self, + params: &StartupMessageParams, + ) -> Result<(PostgresConnection, CancelClosure), ConnectionError> { + if let Some(options) = params.options_raw() { + // We must drop all proxy-specific parameters. + #[allow(unstable_name_collisions)] + let options: String = options + .filter(|opt| !opt.starts_with("project=")) + .intersperse(" ") // TODO: use impl from std once it's stabilized + .collect(); + + self.config.options(&options); + } + + if let Some(app_name) = params.get("application_name") { + self.config.application_name(app_name); + } + + if let Some(replication) = params.get("replication") { + use tokio_postgres::config::ReplicationMode; + match replication { + "true" | "on" | "yes" | "1" => { + self.config.replication_mode(ReplicationMode::Physical); + } + "database" => { + self.config.replication_mode(ReplicationMode::Logical); + } + _other => {} + } + } + + // TODO: extend the list of the forwarded startup parameters. + // Currently, tokio-postgres doesn't allow us to pass + // arbitrary parameters, but the ones above are a good start. + let (socket_addr, mut stream) = self .connect_raw() .await diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs index 29be79c886..72cb822910 100644 --- a/proxy/src/proxy.rs +++ b/proxy/src/proxy.rs @@ -1,6 +1,6 @@ use crate::auth; use crate::cancellation::{self, CancelMap}; -use crate::config::{ProxyConfig, TlsConfig}; +use crate::config::{AuthUrls, ProxyConfig, TlsConfig}; use crate::stream::{MetricsStream, PqStream, Stream}; use anyhow::{bail, Context}; use futures::TryFutureExt; @@ -93,20 +93,21 @@ async fn handle_client( None => return Ok(()), // it's a cancellation request }; + // Extract credentials which we're going to use for auth. let creds = { let sni = stream.get_ref().sni_hostname(); let common_name = tls.and_then(|tls| tls.common_name.as_deref()); let result = config .auth_backend - .map(|_| auth::ClientCredentials::parse(params, sni, common_name)) + .map(|_| auth::ClientCredentials::parse(¶ms, sni, common_name)) .transpose(); async { result }.or_else(|e| stream.throw_error(e)).await? }; - let client = Client::new(stream, creds); + let client = Client::new(stream, creds, ¶ms); cancel_map - .with_session(|session| client.connect_to_db(config, session)) + .with_session(|session| client.connect_to_db(&config.auth_urls, session)) .await } @@ -174,38 +175,57 @@ async fn handshake( } /// Thin connection context. -struct Client { +struct Client<'a, S> { /// The underlying libpq protocol stream. stream: PqStream, /// Client credentials that we care about. - creds: auth::BackendType, + creds: auth::BackendType>, + /// KV-dictionary with PostgreSQL connection params. + params: &'a StartupMessageParams, } -impl Client { +impl<'a, S> Client<'a, S> { /// Construct a new connection context. - fn new(stream: PqStream, creds: auth::BackendType) -> Self { - Self { stream, creds } + fn new( + stream: PqStream, + creds: auth::BackendType>, + params: &'a StartupMessageParams, + ) -> Self { + Self { + stream, + creds, + params, + } } } -impl Client { +impl Client<'_, S> { /// Let the client authenticate and connect to the designated compute node. async fn connect_to_db( self, - config: &ProxyConfig, + urls: &AuthUrls, session: cancellation::Session<'_>, ) -> anyhow::Result<()> { - let Self { mut stream, creds } = self; + let Self { + mut stream, + creds, + params, + } = self; // Authenticate and connect to a compute node. - let auth = creds.authenticate(&config.auth_urls, &mut stream).await; + let auth = creds.authenticate(urls, &mut stream).await; let node = async { auth }.or_else(|e| stream.throw_error(e)).await?; + let reported_auth_ok = node.reported_auth_ok; - let (db, cancel_closure) = node.connect().or_else(|e| stream.throw_error(e)).await?; - let cancel_key_data = session.enable_cancellation(cancel_closure); + let (db, cancel_closure) = node + .connect(params) + .or_else(|e| stream.throw_error(e)) + .await?; + + let cancel_key_data = session.enable_query_cancellation(cancel_closure); // Report authentication success if we haven't done this already. - if !node.reported_auth_ok { + if !reported_auth_ok { stream .write_message_noflush(&Be::AuthenticationOk)? .write_message_noflush(&BeParameterStatusMessage::encoding())?; diff --git a/safekeeper/src/handler.rs b/safekeeper/src/handler.rs index c90c2a0446..3e301259ed 100644 --- a/safekeeper/src/handler.rs +++ b/safekeeper/src/handler.rs @@ -11,7 +11,6 @@ use anyhow::{bail, Context, Result}; use postgres_ffi::PG_TLI; use regex::Regex; -use std::str::FromStr; use std::sync::Arc; use tracing::info; use utils::{ @@ -67,18 +66,22 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler { // ztenant id and ztimeline id are passed in connection string params fn startup(&mut self, _pgb: &mut PostgresBackend, sm: &FeStartupPacket) -> Result<()> { if let FeStartupPacket::StartupMessage { params, .. } = sm { - self.ztenantid = match params.get("ztenantid") { - Some(z) => Some(ZTenantId::from_str(z)?), // just curious, can I do that from .map? - _ => None, - }; - - self.ztimelineid = match params.get("ztimelineid") { - Some(z) => Some(ZTimelineId::from_str(z)?), - _ => None, - }; + if let Some(options) = params.options_raw() { + for opt in options { + match opt.split_once('=') { + Some(("ztenantid", value)) => { + self.ztenantid = Some(value.parse()?); + } + Some(("ztimelineid", value)) => { + self.ztimelineid = Some(value.parse()?); + } + _ => continue, + } + } + } if let Some(app_name) = params.get("application_name") { - self.appname = Some(app_name.clone()); + self.appname = Some(app_name.to_owned()); } Ok(()) diff --git a/test_runner/batch_others/test_proxy.py b/test_runner/batch_others/test_proxy.py index 1efb795140..bd02841dc0 100644 --- a/test_runner/batch_others/test_proxy.py +++ b/test_runner/batch_others/test_proxy.py @@ -134,12 +134,8 @@ async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProx # Pass extra options to the server. -# -# Currently, proxy eats the extra connection options, so this fails. -# See https://github.com/neondatabase/neon/issues/1287 -@pytest.mark.xfail def test_proxy_options(static_proxy): - with static_proxy.connect(options="-cproxytest.option=value") as conn: + with static_proxy.connect(options="project=irrelevant -cproxytest.option=value") as conn: with conn.cursor() as cur: cur.execute("SHOW proxytest.option") value = cur.fetchall()[0][0] From 3aca717f3d994875d2dd2a4a09568ced9b9de4c5 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 30 Aug 2022 18:25:38 +0300 Subject: [PATCH 57/63] Reorganize python tests. Merge batch_others and batch_pg_regress. The original idea was to split all the python tests into multiple "batches" and run each batch in parallel as a separate CI job. However, the batch_pg_regress batch was pretty short compared to all the tests in batch_others. We could split batch_others into multiple batches, but it actually seems better to just treat them as one big pool of tests and use pytest's handle the parallelism on its own. If we need to split them across multiple nodes in the future, we could use pytest-shard or something else, instead of managing the batches ourselves. Merge test_neon_regress.py, test_pg_regress.py and test_isolation.py into one file, test_pg_regress.py. Seems more clear to group all pg_regress-based tests into one file, now that they would all be in the same directory. --- .../actions/run-python-test-set/action.yml | 2 +- .github/workflows/build_and_test.yml | 43 +---- pageserver/src/page_service.rs | 2 +- test_runner/README.md | 20 ++- .../batch_pg_regress/test_isolation.py | 50 ------ .../batch_pg_regress/test_neon_regress.py | 55 ------ .../batch_pg_regress/test_pg_regress.py | 56 ------ test_runner/neon_regress/README.md | 8 - .../test_ancestor_branch.py | 0 .../{batch_others => regress}/test_auth.py | 0 .../test_backpressure.py | 0 .../test_basebackup_error.py | 0 .../test_branch_and_gc.py | 0 .../test_branch_behind.py | 0 .../test_branching.py | 9 +- .../test_broken_timeline.py | 0 .../test_clog_truncate.py | 0 .../test_close_fds.py | 0 .../{batch_others => regress}/test_config.py | 0 .../test_crafted_wal_end.py | 0 .../test_createdropdb.py | 0 .../test_createuser.py | 0 .../test_fsm_truncate.py | 0 .../test_fullbackup.py | 0 .../test_gc_aggressive.py | 0 .../{batch_others => regress}/test_import.py | 0 .../test_large_schema.py | 0 .../test_lsn_mapping.py | 0 .../test_multixact.py | 0 .../test_neon_cli.py | 0 .../test_next_xid.py | 0 .../test_normal_work.py | 0 .../test_old_request_lsn.py | 0 .../test_pageserver_api.py | 0 .../test_pageserver_catchup.py | 0 .../test_pageserver_restart.py | 0 .../test_parallel_copy.py | 0 test_runner/regress/test_pg_regress.py | 159 ++++++++++++++++++ .../{batch_others => regress}/test_pitr_gc.py | 0 .../{batch_others => regress}/test_proxy.py | 0 .../test_read_validation.py | 0 .../test_readonly_node.py | 0 .../test_recovery.py | 0 .../test_remote_storage.py | 0 .../{batch_others => regress}/test_setup.py | 0 .../test_subxacts.py | 0 .../test_tenant_conf.py | 0 .../test_tenant_detach.py | 0 .../test_tenant_relocation.py | 0 .../test_tenant_tasks.py | 0 .../{batch_others => regress}/test_tenants.py | 0 .../test_tenants_with_remote_storage.py | 0 .../test_timeline_delete.py | 0 .../test_timeline_size.py | 0 .../test_twophase.py | 0 .../{batch_others => regress}/test_vm_bits.py | 0 .../test_wal_acceptor.py | 0 .../test_wal_acceptor_async.py | 0 .../test_wal_restore.py | 0 .../{neon_regress => sql_regress}/.gitignore | 0 test_runner/sql_regress/README.md | 13 ++ .../expected/.gitignore | 0 .../expected/neon-cid.out | 0 .../expected/neon-clog.out | 0 .../expected/neon-rel-truncate.out | 0 .../expected/neon-vacuum-full.out | 0 .../parallel_schedule | 0 .../sql/.gitignore | 0 .../sql/neon-cid.sql | 0 .../sql/neon-clog.sql | 0 .../sql/neon-rel-truncate.sql | 0 .../sql/neon-vacuum-full.sql | 0 72 files changed, 201 insertions(+), 216 deletions(-) delete mode 100644 test_runner/batch_pg_regress/test_isolation.py delete mode 100644 test_runner/batch_pg_regress/test_neon_regress.py delete mode 100644 test_runner/batch_pg_regress/test_pg_regress.py delete mode 100644 test_runner/neon_regress/README.md rename test_runner/{batch_others => regress}/test_ancestor_branch.py (100%) rename test_runner/{batch_others => regress}/test_auth.py (100%) rename test_runner/{batch_others => regress}/test_backpressure.py (100%) rename test_runner/{batch_others => regress}/test_basebackup_error.py (100%) rename test_runner/{batch_others => regress}/test_branch_and_gc.py (100%) rename test_runner/{batch_others => regress}/test_branch_behind.py (100%) rename test_runner/{batch_others => regress}/test_branching.py (91%) rename test_runner/{batch_others => regress}/test_broken_timeline.py (100%) rename test_runner/{batch_others => regress}/test_clog_truncate.py (100%) rename test_runner/{batch_others => regress}/test_close_fds.py (100%) rename test_runner/{batch_others => regress}/test_config.py (100%) rename test_runner/{batch_others => regress}/test_crafted_wal_end.py (100%) rename test_runner/{batch_others => regress}/test_createdropdb.py (100%) rename test_runner/{batch_others => regress}/test_createuser.py (100%) rename test_runner/{batch_others => regress}/test_fsm_truncate.py (100%) rename test_runner/{batch_others => regress}/test_fullbackup.py (100%) rename test_runner/{batch_others => regress}/test_gc_aggressive.py (100%) rename test_runner/{batch_others => regress}/test_import.py (100%) rename test_runner/{batch_others => regress}/test_large_schema.py (100%) rename test_runner/{batch_others => regress}/test_lsn_mapping.py (100%) rename test_runner/{batch_others => regress}/test_multixact.py (100%) rename test_runner/{batch_others => regress}/test_neon_cli.py (100%) rename test_runner/{batch_others => regress}/test_next_xid.py (100%) rename test_runner/{batch_others => regress}/test_normal_work.py (100%) rename test_runner/{batch_others => regress}/test_old_request_lsn.py (100%) rename test_runner/{batch_others => regress}/test_pageserver_api.py (100%) rename test_runner/{batch_others => regress}/test_pageserver_catchup.py (100%) rename test_runner/{batch_others => regress}/test_pageserver_restart.py (100%) rename test_runner/{batch_others => regress}/test_parallel_copy.py (100%) create mode 100644 test_runner/regress/test_pg_regress.py rename test_runner/{batch_others => regress}/test_pitr_gc.py (100%) rename test_runner/{batch_others => regress}/test_proxy.py (100%) rename test_runner/{batch_others => regress}/test_read_validation.py (100%) rename test_runner/{batch_others => regress}/test_readonly_node.py (100%) rename test_runner/{batch_others => regress}/test_recovery.py (100%) rename test_runner/{batch_others => regress}/test_remote_storage.py (100%) rename test_runner/{batch_others => regress}/test_setup.py (100%) rename test_runner/{batch_others => regress}/test_subxacts.py (100%) rename test_runner/{batch_others => regress}/test_tenant_conf.py (100%) rename test_runner/{batch_others => regress}/test_tenant_detach.py (100%) rename test_runner/{batch_others => regress}/test_tenant_relocation.py (100%) rename test_runner/{batch_others => regress}/test_tenant_tasks.py (100%) rename test_runner/{batch_others => regress}/test_tenants.py (100%) rename test_runner/{batch_others => regress}/test_tenants_with_remote_storage.py (100%) rename test_runner/{batch_others => regress}/test_timeline_delete.py (100%) rename test_runner/{batch_others => regress}/test_timeline_size.py (100%) rename test_runner/{batch_others => regress}/test_twophase.py (100%) rename test_runner/{batch_others => regress}/test_vm_bits.py (100%) rename test_runner/{batch_others => regress}/test_wal_acceptor.py (100%) rename test_runner/{batch_others => regress}/test_wal_acceptor_async.py (100%) rename test_runner/{batch_others => regress}/test_wal_restore.py (100%) rename test_runner/{neon_regress => sql_regress}/.gitignore (100%) create mode 100644 test_runner/sql_regress/README.md rename test_runner/{neon_regress => sql_regress}/expected/.gitignore (100%) rename test_runner/{neon_regress => sql_regress}/expected/neon-cid.out (100%) rename test_runner/{neon_regress => sql_regress}/expected/neon-clog.out (100%) rename test_runner/{neon_regress => sql_regress}/expected/neon-rel-truncate.out (100%) rename test_runner/{neon_regress => sql_regress}/expected/neon-vacuum-full.out (100%) rename test_runner/{neon_regress => sql_regress}/parallel_schedule (100%) rename test_runner/{neon_regress => sql_regress}/sql/.gitignore (100%) rename test_runner/{neon_regress => sql_regress}/sql/neon-cid.sql (100%) rename test_runner/{neon_regress => sql_regress}/sql/neon-clog.sql (100%) rename test_runner/{neon_regress => sql_regress}/sql/neon-rel-truncate.sql (100%) rename test_runner/{neon_regress => sql_regress}/sql/neon-vacuum-full.sql (100%) diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 22447025cb..a4bcaff56d 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -149,7 +149,7 @@ runs: fi - name: Upload Allure results - if: ${{ always() && (inputs.test_selection == 'batch_others' || inputs.test_selection == 'batch_pg_regress') }} + if: ${{ always() && (inputs.test_selection == 'regress') }} uses: ./.github/actions/allure-report with: action: store diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index bf6eb69930..8b1dc3a9c4 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -206,7 +206,7 @@ jobs: if: matrix.build_type == 'debug' uses: ./.github/actions/save-coverage-data - pg_regress-tests: + regress-tests: runs-on: dev container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned @@ -224,42 +224,13 @@ jobs: submodules: true fetch-depth: 2 - - name: Pytest regress tests + - name: Pytest regression tests uses: ./.github/actions/run-python-test-set with: build_type: ${{ matrix.build_type }} rust_toolchain: ${{ matrix.rust_toolchain }} - test_selection: batch_pg_regress + test_selection: regress needs_postgres_source: true - - - name: Merge and upload coverage data - if: matrix.build_type == 'debug' - uses: ./.github/actions/save-coverage-data - - other-tests: - runs-on: dev - container: - image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned - options: --init - needs: [ build-neon ] - strategy: - fail-fast: false - matrix: - build_type: [ debug, release ] - rust_toolchain: [ 1.58 ] - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - submodules: true - fetch-depth: 2 - - - name: Pytest other tests - uses: ./.github/actions/run-python-test-set - with: - build_type: ${{ matrix.build_type }} - rust_toolchain: ${{ matrix.rust_toolchain }} - test_selection: batch_others run_with_real_s3: true real_s3_bucket: ci-tests-s3 real_s3_region: us-west-2 @@ -307,7 +278,7 @@ jobs: container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned options: --init - needs: [ other-tests, pg_regress-tests ] + needs: [ regress-tests ] if: always() strategy: fail-fast: false @@ -330,7 +301,7 @@ jobs: container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned options: --init - needs: [ other-tests, pg_regress-tests ] + needs: [ regress-tests ] strategy: fail-fast: false matrix: @@ -587,7 +558,7 @@ jobs: #container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version. # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly - needs: [ push-docker-hub, calculate-deploy-targets, tag, other-tests, pg_regress-tests ] + needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ] if: | (github.ref_name == 'main' || github.ref_name == 'release') && github.event_name != 'workflow_dispatch' @@ -642,7 +613,7 @@ jobs: runs-on: dev container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently. - needs: [ push-docker-hub, calculate-deploy-targets, tag, other-tests, pg_regress-tests ] + needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ] if: | (github.ref_name == 'main' || github.ref_name == 'release') && github.event_name != 'workflow_dispatch' diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index fbc70f7690..d59a82d488 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -1077,7 +1077,7 @@ impl postgres_backend::Handler for PageServerHandler { .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?; } else if query_string.starts_with("do_gc ") { // Run GC immediately on given timeline. - // FIXME: This is just for tests. See test_runner/batch_others/test_gc.py. + // FIXME: This is just for tests. See test_runner/regress/test_gc.py. // This probably should require special authentication or a global flag to // enable, I don't think we want to or need to allow regular clients to invoke // GC. diff --git a/test_runner/README.md b/test_runner/README.md index 4b54c45175..c7ec361d65 100644 --- a/test_runner/README.md +++ b/test_runner/README.md @@ -15,12 +15,22 @@ Prerequisites: ### Test Organization -The tests are divided into a few batches, such that each batch takes roughly -the same amount of time. The batches can be run in parallel, to minimize total -runtime. Currently, there are only two batches: +Regression tests are in the 'regress' directory. They can be run in +parallel to minimize total runtime. Most regression test sets up their +environment with its own pageservers and safekeepers (but see +`TEST_SHARED_FIXTURES`). -- test_batch_pg_regress: Runs PostgreSQL regression tests -- test_others: All other tests +'pg_clients' contains tests for connecting with various client +libraries. Each client test uses a Dockerfile that pulls an image that +contains the client, and connects to PostgreSQL with it. The client +tests can be run against an existing PostgreSQL or Neon installation. + +'performance' contains performance regression tests. Each test +exercises a particular scenario or workload, and outputs +measurements. They should be run serially, to avoid the tests +interfering with the performance of each other. Some performance tests +set up their own Neon environment, while others can be run against an +existing PostgreSQL or Neon environment. ### Running the tests diff --git a/test_runner/batch_pg_regress/test_isolation.py b/test_runner/batch_pg_regress/test_isolation.py deleted file mode 100644 index 7127a069b0..0000000000 --- a/test_runner/batch_pg_regress/test_isolation.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -from pathlib import Path - -import pytest -from fixtures.neon_fixtures import NeonEnv, base_dir, pg_distrib_dir - - -# The isolation tests run for a long time, especially in debug mode, -# so use a larger-than-default timeout. -@pytest.mark.timeout(1800) -def test_isolation(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys): - env = neon_simple_env - - env.neon_cli.create_branch("test_isolation", "empty") - # Connect to postgres and create a database called "regression". - # isolation tests use prepared transactions, so enable them - pg = env.postgres.create_start("test_isolation", config_lines=["max_prepared_transactions=100"]) - pg.safe_psql("CREATE DATABASE isolation_regression") - - # Create some local directories for pg_isolation_regress to run in. - runpath = test_output_dir / "regress" - (runpath / "testtablespace").mkdir(parents=True) - - # Compute all the file locations that pg_isolation_regress will need. - build_path = os.path.join(pg_distrib_dir, "build/src/test/isolation") - src_path = os.path.join(base_dir, "vendor/postgres/src/test/isolation") - bindir = os.path.join(pg_distrib_dir, "bin") - schedule = os.path.join(src_path, "isolation_schedule") - pg_isolation_regress = os.path.join(build_path, "pg_isolation_regress") - - pg_isolation_regress_command = [ - pg_isolation_regress, - "--use-existing", - "--bindir={}".format(bindir), - "--dlpath={}".format(build_path), - "--inputdir={}".format(src_path), - "--schedule={}".format(schedule), - ] - - env_vars = { - "PGPORT": str(pg.default_options["port"]), - "PGUSER": pg.default_options["user"], - "PGHOST": pg.default_options["host"], - } - - # Run the command. - # We don't capture the output. It's not too chatty, and it always - # logs the exact same data to `regression.out` anyway. - with capsys.disabled(): - pg_bin.run(pg_isolation_regress_command, env=env_vars, cwd=runpath) diff --git a/test_runner/batch_pg_regress/test_neon_regress.py b/test_runner/batch_pg_regress/test_neon_regress.py deleted file mode 100644 index 4619647084..0000000000 --- a/test_runner/batch_pg_regress/test_neon_regress.py +++ /dev/null @@ -1,55 +0,0 @@ -import os -from pathlib import Path - -from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv, base_dir, check_restored_datadir_content, pg_distrib_dir - - -def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys): - env = neon_simple_env - - env.neon_cli.create_branch("test_neon_regress", "empty") - # Connect to postgres and create a database called "regression". - pg = env.postgres.create_start("test_neon_regress") - pg.safe_psql("CREATE DATABASE regression") - - # Create some local directories for pg_regress to run in. - runpath = test_output_dir / "regress" - (runpath / "testtablespace").mkdir(parents=True) - - # Compute all the file locations that pg_regress will need. - # This test runs neon specific tests - build_path = os.path.join(pg_distrib_dir, "build/src/test/regress") - src_path = os.path.join(base_dir, "test_runner/neon_regress") - bindir = os.path.join(pg_distrib_dir, "bin") - schedule = os.path.join(src_path, "parallel_schedule") - pg_regress = os.path.join(build_path, "pg_regress") - - pg_regress_command = [ - pg_regress, - "--use-existing", - "--bindir={}".format(bindir), - "--dlpath={}".format(build_path), - "--schedule={}".format(schedule), - "--inputdir={}".format(src_path), - ] - - log.info(pg_regress_command) - env_vars = { - "PGPORT": str(pg.default_options["port"]), - "PGUSER": pg.default_options["user"], - "PGHOST": pg.default_options["host"], - } - - # Run the command. - # We don't capture the output. It's not too chatty, and it always - # logs the exact same data to `regression.out` anyway. - with capsys.disabled(): - pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) - - # checkpoint one more time to ensure that the lsn we get is the latest one - pg.safe_psql("CHECKPOINT") - pg.safe_psql("select pg_current_wal_insert_lsn()")[0][0] - - # Check that we restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) diff --git a/test_runner/batch_pg_regress/test_pg_regress.py b/test_runner/batch_pg_regress/test_pg_regress.py deleted file mode 100644 index 478dbf0a91..0000000000 --- a/test_runner/batch_pg_regress/test_pg_regress.py +++ /dev/null @@ -1,56 +0,0 @@ -import os -import pathlib - -import pytest -from fixtures.neon_fixtures import NeonEnv, base_dir, check_restored_datadir_content, pg_distrib_dir - - -# The pg_regress tests run for a long time, especially in debug mode, -# so use a larger-than-default timeout. -@pytest.mark.timeout(1800) -def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_bin, capsys): - env = neon_simple_env - - env.neon_cli.create_branch("test_pg_regress", "empty") - # Connect to postgres and create a database called "regression". - pg = env.postgres.create_start("test_pg_regress") - pg.safe_psql("CREATE DATABASE regression") - - # Create some local directories for pg_regress to run in. - runpath = test_output_dir / "regress" - (runpath / "testtablespace").mkdir(parents=True) - - # Compute all the file locations that pg_regress will need. - build_path = os.path.join(pg_distrib_dir, "build/src/test/regress") - src_path = os.path.join(base_dir, "vendor/postgres/src/test/regress") - bindir = os.path.join(pg_distrib_dir, "bin") - schedule = os.path.join(src_path, "parallel_schedule") - pg_regress = os.path.join(build_path, "pg_regress") - - pg_regress_command = [ - pg_regress, - '--bindir=""', - "--use-existing", - "--bindir={}".format(bindir), - "--dlpath={}".format(build_path), - "--schedule={}".format(schedule), - "--inputdir={}".format(src_path), - ] - - env_vars = { - "PGPORT": str(pg.default_options["port"]), - "PGUSER": pg.default_options["user"], - "PGHOST": pg.default_options["host"], - } - - # Run the command. - # We don't capture the output. It's not too chatty, and it always - # logs the exact same data to `regression.out` anyway. - with capsys.disabled(): - pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) - - # checkpoint one more time to ensure that the lsn we get is the latest one - pg.safe_psql("CHECKPOINT") - - # Check that we restore the content of the datadir correctly - check_restored_datadir_content(test_output_dir, env, pg) diff --git a/test_runner/neon_regress/README.md b/test_runner/neon_regress/README.md deleted file mode 100644 index b23a55462e..0000000000 --- a/test_runner/neon_regress/README.md +++ /dev/null @@ -1,8 +0,0 @@ -To add a new SQL test - -- add sql script to run to neon_regress/sql/testname.sql -- add expected output to neon_regress/expected/testname.out -- add testname to parallel_schedule - -That's it. -For more complex tests see PostgreSQL regression tests. These works basically the same. diff --git a/test_runner/batch_others/test_ancestor_branch.py b/test_runner/regress/test_ancestor_branch.py similarity index 100% rename from test_runner/batch_others/test_ancestor_branch.py rename to test_runner/regress/test_ancestor_branch.py diff --git a/test_runner/batch_others/test_auth.py b/test_runner/regress/test_auth.py similarity index 100% rename from test_runner/batch_others/test_auth.py rename to test_runner/regress/test_auth.py diff --git a/test_runner/batch_others/test_backpressure.py b/test_runner/regress/test_backpressure.py similarity index 100% rename from test_runner/batch_others/test_backpressure.py rename to test_runner/regress/test_backpressure.py diff --git a/test_runner/batch_others/test_basebackup_error.py b/test_runner/regress/test_basebackup_error.py similarity index 100% rename from test_runner/batch_others/test_basebackup_error.py rename to test_runner/regress/test_basebackup_error.py diff --git a/test_runner/batch_others/test_branch_and_gc.py b/test_runner/regress/test_branch_and_gc.py similarity index 100% rename from test_runner/batch_others/test_branch_and_gc.py rename to test_runner/regress/test_branch_and_gc.py diff --git a/test_runner/batch_others/test_branch_behind.py b/test_runner/regress/test_branch_behind.py similarity index 100% rename from test_runner/batch_others/test_branch_behind.py rename to test_runner/regress/test_branch_behind.py diff --git a/test_runner/batch_others/test_branching.py b/test_runner/regress/test_branching.py similarity index 91% rename from test_runner/batch_others/test_branching.py rename to test_runner/regress/test_branching.py index 2d08b07f82..0c1490294d 100644 --- a/test_runner/batch_others/test_branching.py +++ b/test_runner/regress/test_branching.py @@ -62,10 +62,11 @@ def test_branching_with_pgbench( time.sleep(delay) log.info(f"Sleep {delay}s") - # If the number of concurrent threads exceeds a threshold, - # wait for all the threads to finish before spawning a new one. - # Because tests defined in `batch_others` are run concurrently in CI, - # we want to avoid the situation that one test exhausts resources for other tests. + # If the number of concurrent threads exceeds a threshold, wait for + # all the threads to finish before spawning a new one. Because the + # regression tests in this directory are run concurrently in CI, we + # want to avoid the situation that one test exhausts resources for + # other tests. if len(threads) >= thread_limit: for thread in threads: thread.join() diff --git a/test_runner/batch_others/test_broken_timeline.py b/test_runner/regress/test_broken_timeline.py similarity index 100% rename from test_runner/batch_others/test_broken_timeline.py rename to test_runner/regress/test_broken_timeline.py diff --git a/test_runner/batch_others/test_clog_truncate.py b/test_runner/regress/test_clog_truncate.py similarity index 100% rename from test_runner/batch_others/test_clog_truncate.py rename to test_runner/regress/test_clog_truncate.py diff --git a/test_runner/batch_others/test_close_fds.py b/test_runner/regress/test_close_fds.py similarity index 100% rename from test_runner/batch_others/test_close_fds.py rename to test_runner/regress/test_close_fds.py diff --git a/test_runner/batch_others/test_config.py b/test_runner/regress/test_config.py similarity index 100% rename from test_runner/batch_others/test_config.py rename to test_runner/regress/test_config.py diff --git a/test_runner/batch_others/test_crafted_wal_end.py b/test_runner/regress/test_crafted_wal_end.py similarity index 100% rename from test_runner/batch_others/test_crafted_wal_end.py rename to test_runner/regress/test_crafted_wal_end.py diff --git a/test_runner/batch_others/test_createdropdb.py b/test_runner/regress/test_createdropdb.py similarity index 100% rename from test_runner/batch_others/test_createdropdb.py rename to test_runner/regress/test_createdropdb.py diff --git a/test_runner/batch_others/test_createuser.py b/test_runner/regress/test_createuser.py similarity index 100% rename from test_runner/batch_others/test_createuser.py rename to test_runner/regress/test_createuser.py diff --git a/test_runner/batch_others/test_fsm_truncate.py b/test_runner/regress/test_fsm_truncate.py similarity index 100% rename from test_runner/batch_others/test_fsm_truncate.py rename to test_runner/regress/test_fsm_truncate.py diff --git a/test_runner/batch_others/test_fullbackup.py b/test_runner/regress/test_fullbackup.py similarity index 100% rename from test_runner/batch_others/test_fullbackup.py rename to test_runner/regress/test_fullbackup.py diff --git a/test_runner/batch_others/test_gc_aggressive.py b/test_runner/regress/test_gc_aggressive.py similarity index 100% rename from test_runner/batch_others/test_gc_aggressive.py rename to test_runner/regress/test_gc_aggressive.py diff --git a/test_runner/batch_others/test_import.py b/test_runner/regress/test_import.py similarity index 100% rename from test_runner/batch_others/test_import.py rename to test_runner/regress/test_import.py diff --git a/test_runner/batch_others/test_large_schema.py b/test_runner/regress/test_large_schema.py similarity index 100% rename from test_runner/batch_others/test_large_schema.py rename to test_runner/regress/test_large_schema.py diff --git a/test_runner/batch_others/test_lsn_mapping.py b/test_runner/regress/test_lsn_mapping.py similarity index 100% rename from test_runner/batch_others/test_lsn_mapping.py rename to test_runner/regress/test_lsn_mapping.py diff --git a/test_runner/batch_others/test_multixact.py b/test_runner/regress/test_multixact.py similarity index 100% rename from test_runner/batch_others/test_multixact.py rename to test_runner/regress/test_multixact.py diff --git a/test_runner/batch_others/test_neon_cli.py b/test_runner/regress/test_neon_cli.py similarity index 100% rename from test_runner/batch_others/test_neon_cli.py rename to test_runner/regress/test_neon_cli.py diff --git a/test_runner/batch_others/test_next_xid.py b/test_runner/regress/test_next_xid.py similarity index 100% rename from test_runner/batch_others/test_next_xid.py rename to test_runner/regress/test_next_xid.py diff --git a/test_runner/batch_others/test_normal_work.py b/test_runner/regress/test_normal_work.py similarity index 100% rename from test_runner/batch_others/test_normal_work.py rename to test_runner/regress/test_normal_work.py diff --git a/test_runner/batch_others/test_old_request_lsn.py b/test_runner/regress/test_old_request_lsn.py similarity index 100% rename from test_runner/batch_others/test_old_request_lsn.py rename to test_runner/regress/test_old_request_lsn.py diff --git a/test_runner/batch_others/test_pageserver_api.py b/test_runner/regress/test_pageserver_api.py similarity index 100% rename from test_runner/batch_others/test_pageserver_api.py rename to test_runner/regress/test_pageserver_api.py diff --git a/test_runner/batch_others/test_pageserver_catchup.py b/test_runner/regress/test_pageserver_catchup.py similarity index 100% rename from test_runner/batch_others/test_pageserver_catchup.py rename to test_runner/regress/test_pageserver_catchup.py diff --git a/test_runner/batch_others/test_pageserver_restart.py b/test_runner/regress/test_pageserver_restart.py similarity index 100% rename from test_runner/batch_others/test_pageserver_restart.py rename to test_runner/regress/test_pageserver_restart.py diff --git a/test_runner/batch_others/test_parallel_copy.py b/test_runner/regress/test_parallel_copy.py similarity index 100% rename from test_runner/batch_others/test_parallel_copy.py rename to test_runner/regress/test_parallel_copy.py diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py new file mode 100644 index 0000000000..119528b8f9 --- /dev/null +++ b/test_runner/regress/test_pg_regress.py @@ -0,0 +1,159 @@ +# +# This file runs pg_regress-based tests. +# +import os +from pathlib import Path + +import pytest +from fixtures.neon_fixtures import NeonEnv, base_dir, check_restored_datadir_content, pg_distrib_dir + + +# Run the main PostgreSQL regression tests, in src/test/regress. +# +# This runs for a long time, especially in debug mode, so use a larger-than-default +# timeout. +@pytest.mark.timeout(1800) +def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys): + env = neon_simple_env + + env.neon_cli.create_branch("test_pg_regress", "empty") + # Connect to postgres and create a database called "regression". + pg = env.postgres.create_start("test_pg_regress") + pg.safe_psql("CREATE DATABASE regression") + + # Create some local directories for pg_regress to run in. + runpath = test_output_dir / "regress" + (runpath / "testtablespace").mkdir(parents=True) + + # Compute all the file locations that pg_regress will need. + build_path = os.path.join(pg_distrib_dir, "build/src/test/regress") + src_path = os.path.join(base_dir, "vendor/postgres/src/test/regress") + bindir = os.path.join(pg_distrib_dir, "bin") + schedule = os.path.join(src_path, "parallel_schedule") + pg_regress = os.path.join(build_path, "pg_regress") + + pg_regress_command = [ + pg_regress, + '--bindir=""', + "--use-existing", + "--bindir={}".format(bindir), + "--dlpath={}".format(build_path), + "--schedule={}".format(schedule), + "--inputdir={}".format(src_path), + ] + + env_vars = { + "PGPORT": str(pg.default_options["port"]), + "PGUSER": pg.default_options["user"], + "PGHOST": pg.default_options["host"], + } + + # Run the command. + # We don't capture the output. It's not too chatty, and it always + # logs the exact same data to `regression.out` anyway. + with capsys.disabled(): + pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) + + # checkpoint one more time to ensure that the lsn we get is the latest one + pg.safe_psql("CHECKPOINT") + + # Check that we restore the content of the datadir correctly + check_restored_datadir_content(test_output_dir, env, pg) + + +# Run the PostgreSQL "isolation" tests, in src/test/isolation. +# +# This runs for a long time, especially in debug mode, so use a larger-than-default +# timeout. +@pytest.mark.timeout(1800) +def test_isolation(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys): + env = neon_simple_env + + env.neon_cli.create_branch("test_isolation", "empty") + # Connect to postgres and create a database called "regression". + # isolation tests use prepared transactions, so enable them + pg = env.postgres.create_start("test_isolation", config_lines=["max_prepared_transactions=100"]) + pg.safe_psql("CREATE DATABASE isolation_regression") + + # Create some local directories for pg_isolation_regress to run in. + runpath = test_output_dir / "regress" + (runpath / "testtablespace").mkdir(parents=True) + + # Compute all the file locations that pg_isolation_regress will need. + build_path = os.path.join(pg_distrib_dir, "build/src/test/isolation") + src_path = os.path.join(base_dir, "vendor/postgres/src/test/isolation") + bindir = os.path.join(pg_distrib_dir, "bin") + schedule = os.path.join(src_path, "isolation_schedule") + pg_isolation_regress = os.path.join(build_path, "pg_isolation_regress") + + pg_isolation_regress_command = [ + pg_isolation_regress, + "--use-existing", + "--bindir={}".format(bindir), + "--dlpath={}".format(build_path), + "--inputdir={}".format(src_path), + "--schedule={}".format(schedule), + ] + + env_vars = { + "PGPORT": str(pg.default_options["port"]), + "PGUSER": pg.default_options["user"], + "PGHOST": pg.default_options["host"], + } + + # Run the command. + # We don't capture the output. It's not too chatty, and it always + # logs the exact same data to `regression.out` anyway. + with capsys.disabled(): + pg_bin.run(pg_isolation_regress_command, env=env_vars, cwd=runpath) + + +# Run extra Neon-specific pg_regress-based tests. The tests and their +# schedule file are in the sql_regress/ directory. +def test_sql_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys): + env = neon_simple_env + + env.neon_cli.create_branch("test_sql_regress", "empty") + # Connect to postgres and create a database called "regression". + pg = env.postgres.create_start("test_sql_regress") + pg.safe_psql("CREATE DATABASE regression") + + # Create some local directories for pg_regress to run in. + runpath = test_output_dir / "regress" + (runpath / "testtablespace").mkdir(parents=True) + + # Compute all the file locations that pg_regress will need. + # This test runs neon specific tests + build_path = os.path.join(pg_distrib_dir, "build/src/test/regress") + src_path = os.path.join(base_dir, "test_runner/sql_regress") + bindir = os.path.join(pg_distrib_dir, "bin") + schedule = os.path.join(src_path, "parallel_schedule") + pg_regress = os.path.join(build_path, "pg_regress") + + pg_regress_command = [ + pg_regress, + "--use-existing", + "--bindir={}".format(bindir), + "--dlpath={}".format(build_path), + "--schedule={}".format(schedule), + "--inputdir={}".format(src_path), + ] + + env_vars = { + "PGPORT": str(pg.default_options["port"]), + "PGUSER": pg.default_options["user"], + "PGHOST": pg.default_options["host"], + } + + # Run the command. + # We don't capture the output. It's not too chatty, and it always + # logs the exact same data to `regression.out` anyway. + with capsys.disabled(): + pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath) + + # checkpoint one more time to ensure that the lsn we get is the latest one + pg.safe_psql("CHECKPOINT") + pg.safe_psql("select pg_current_wal_insert_lsn()")[0][0] + + # Check that we restore the content of the datadir correctly + check_restored_datadir_content(test_output_dir, env, pg) diff --git a/test_runner/batch_others/test_pitr_gc.py b/test_runner/regress/test_pitr_gc.py similarity index 100% rename from test_runner/batch_others/test_pitr_gc.py rename to test_runner/regress/test_pitr_gc.py diff --git a/test_runner/batch_others/test_proxy.py b/test_runner/regress/test_proxy.py similarity index 100% rename from test_runner/batch_others/test_proxy.py rename to test_runner/regress/test_proxy.py diff --git a/test_runner/batch_others/test_read_validation.py b/test_runner/regress/test_read_validation.py similarity index 100% rename from test_runner/batch_others/test_read_validation.py rename to test_runner/regress/test_read_validation.py diff --git a/test_runner/batch_others/test_readonly_node.py b/test_runner/regress/test_readonly_node.py similarity index 100% rename from test_runner/batch_others/test_readonly_node.py rename to test_runner/regress/test_readonly_node.py diff --git a/test_runner/batch_others/test_recovery.py b/test_runner/regress/test_recovery.py similarity index 100% rename from test_runner/batch_others/test_recovery.py rename to test_runner/regress/test_recovery.py diff --git a/test_runner/batch_others/test_remote_storage.py b/test_runner/regress/test_remote_storage.py similarity index 100% rename from test_runner/batch_others/test_remote_storage.py rename to test_runner/regress/test_remote_storage.py diff --git a/test_runner/batch_others/test_setup.py b/test_runner/regress/test_setup.py similarity index 100% rename from test_runner/batch_others/test_setup.py rename to test_runner/regress/test_setup.py diff --git a/test_runner/batch_others/test_subxacts.py b/test_runner/regress/test_subxacts.py similarity index 100% rename from test_runner/batch_others/test_subxacts.py rename to test_runner/regress/test_subxacts.py diff --git a/test_runner/batch_others/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py similarity index 100% rename from test_runner/batch_others/test_tenant_conf.py rename to test_runner/regress/test_tenant_conf.py diff --git a/test_runner/batch_others/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py similarity index 100% rename from test_runner/batch_others/test_tenant_detach.py rename to test_runner/regress/test_tenant_detach.py diff --git a/test_runner/batch_others/test_tenant_relocation.py b/test_runner/regress/test_tenant_relocation.py similarity index 100% rename from test_runner/batch_others/test_tenant_relocation.py rename to test_runner/regress/test_tenant_relocation.py diff --git a/test_runner/batch_others/test_tenant_tasks.py b/test_runner/regress/test_tenant_tasks.py similarity index 100% rename from test_runner/batch_others/test_tenant_tasks.py rename to test_runner/regress/test_tenant_tasks.py diff --git a/test_runner/batch_others/test_tenants.py b/test_runner/regress/test_tenants.py similarity index 100% rename from test_runner/batch_others/test_tenants.py rename to test_runner/regress/test_tenants.py diff --git a/test_runner/batch_others/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py similarity index 100% rename from test_runner/batch_others/test_tenants_with_remote_storage.py rename to test_runner/regress/test_tenants_with_remote_storage.py diff --git a/test_runner/batch_others/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py similarity index 100% rename from test_runner/batch_others/test_timeline_delete.py rename to test_runner/regress/test_timeline_delete.py diff --git a/test_runner/batch_others/test_timeline_size.py b/test_runner/regress/test_timeline_size.py similarity index 100% rename from test_runner/batch_others/test_timeline_size.py rename to test_runner/regress/test_timeline_size.py diff --git a/test_runner/batch_others/test_twophase.py b/test_runner/regress/test_twophase.py similarity index 100% rename from test_runner/batch_others/test_twophase.py rename to test_runner/regress/test_twophase.py diff --git a/test_runner/batch_others/test_vm_bits.py b/test_runner/regress/test_vm_bits.py similarity index 100% rename from test_runner/batch_others/test_vm_bits.py rename to test_runner/regress/test_vm_bits.py diff --git a/test_runner/batch_others/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py similarity index 100% rename from test_runner/batch_others/test_wal_acceptor.py rename to test_runner/regress/test_wal_acceptor.py diff --git a/test_runner/batch_others/test_wal_acceptor_async.py b/test_runner/regress/test_wal_acceptor_async.py similarity index 100% rename from test_runner/batch_others/test_wal_acceptor_async.py rename to test_runner/regress/test_wal_acceptor_async.py diff --git a/test_runner/batch_others/test_wal_restore.py b/test_runner/regress/test_wal_restore.py similarity index 100% rename from test_runner/batch_others/test_wal_restore.py rename to test_runner/regress/test_wal_restore.py diff --git a/test_runner/neon_regress/.gitignore b/test_runner/sql_regress/.gitignore similarity index 100% rename from test_runner/neon_regress/.gitignore rename to test_runner/sql_regress/.gitignore diff --git a/test_runner/sql_regress/README.md b/test_runner/sql_regress/README.md new file mode 100644 index 0000000000..1ae8aaf61a --- /dev/null +++ b/test_runner/sql_regress/README.md @@ -0,0 +1,13 @@ +Simple tests that only need a PostgreSQL connection to run. +These are run by the regress/test_pg_regress.py test, which uses +the PostgreSQL pg_regress utility. + +To add a new SQL test: + +- add sql script to run to neon_regress/sql/testname.sql +- add expected output to neon_regress/expected/testname.out +- add testname to parallel_schedule + +That's it. +For more complex tests see PostgreSQL regression tests in src/test/regress. +These work basically the same. diff --git a/test_runner/neon_regress/expected/.gitignore b/test_runner/sql_regress/expected/.gitignore similarity index 100% rename from test_runner/neon_regress/expected/.gitignore rename to test_runner/sql_regress/expected/.gitignore diff --git a/test_runner/neon_regress/expected/neon-cid.out b/test_runner/sql_regress/expected/neon-cid.out similarity index 100% rename from test_runner/neon_regress/expected/neon-cid.out rename to test_runner/sql_regress/expected/neon-cid.out diff --git a/test_runner/neon_regress/expected/neon-clog.out b/test_runner/sql_regress/expected/neon-clog.out similarity index 100% rename from test_runner/neon_regress/expected/neon-clog.out rename to test_runner/sql_regress/expected/neon-clog.out diff --git a/test_runner/neon_regress/expected/neon-rel-truncate.out b/test_runner/sql_regress/expected/neon-rel-truncate.out similarity index 100% rename from test_runner/neon_regress/expected/neon-rel-truncate.out rename to test_runner/sql_regress/expected/neon-rel-truncate.out diff --git a/test_runner/neon_regress/expected/neon-vacuum-full.out b/test_runner/sql_regress/expected/neon-vacuum-full.out similarity index 100% rename from test_runner/neon_regress/expected/neon-vacuum-full.out rename to test_runner/sql_regress/expected/neon-vacuum-full.out diff --git a/test_runner/neon_regress/parallel_schedule b/test_runner/sql_regress/parallel_schedule similarity index 100% rename from test_runner/neon_regress/parallel_schedule rename to test_runner/sql_regress/parallel_schedule diff --git a/test_runner/neon_regress/sql/.gitignore b/test_runner/sql_regress/sql/.gitignore similarity index 100% rename from test_runner/neon_regress/sql/.gitignore rename to test_runner/sql_regress/sql/.gitignore diff --git a/test_runner/neon_regress/sql/neon-cid.sql b/test_runner/sql_regress/sql/neon-cid.sql similarity index 100% rename from test_runner/neon_regress/sql/neon-cid.sql rename to test_runner/sql_regress/sql/neon-cid.sql diff --git a/test_runner/neon_regress/sql/neon-clog.sql b/test_runner/sql_regress/sql/neon-clog.sql similarity index 100% rename from test_runner/neon_regress/sql/neon-clog.sql rename to test_runner/sql_regress/sql/neon-clog.sql diff --git a/test_runner/neon_regress/sql/neon-rel-truncate.sql b/test_runner/sql_regress/sql/neon-rel-truncate.sql similarity index 100% rename from test_runner/neon_regress/sql/neon-rel-truncate.sql rename to test_runner/sql_regress/sql/neon-rel-truncate.sql diff --git a/test_runner/neon_regress/sql/neon-vacuum-full.sql b/test_runner/sql_regress/sql/neon-vacuum-full.sql similarity index 100% rename from test_runner/neon_regress/sql/neon-vacuum-full.sql rename to test_runner/sql_regress/sql/neon-vacuum-full.sql From f09bd6bc887c8370f6f82b4e942b504213eb8164 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 30 Aug 2022 18:44:06 +0300 Subject: [PATCH 58/63] Fix size checks in the "local" remote storage implementation. The code correctly detected too short and too long inputs, but the error message was bogus for the case the input stream was too long: Error: Provided stream has actual size 5 fthat is smaller than the given stream size 4 That check was only supposed to check for too small inputs, but it in fact caught too long inputs too. That was good, because the check below that that was supposed to check for too long inputs was in fact broken, and never did anything. It tried to read input a buffer of size 0, to check if there is any extra data, but reading to a zero-sized buffer always returns 0. --- libs/remote_storage/src/local_fs.rs | 49 ++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/libs/remote_storage/src/local_fs.rs b/libs/remote_storage/src/local_fs.rs index 07b04084b9..a65d0887af 100644 --- a/libs/remote_storage/src/local_fs.rs +++ b/libs/remote_storage/src/local_fs.rs @@ -150,8 +150,7 @@ impl RemoteStorage for LocalFs { ); let from_size_bytes = from_size_bytes as u64; - // Require to read 1 byte more than the expected to check later, that the stream and its size match. - let mut buffer_to_read = from.take(from_size_bytes + 1); + let mut buffer_to_read = from.take(from_size_bytes); let bytes_read = io::copy(&mut buffer_to_read, &mut destination) .await @@ -162,17 +161,15 @@ impl RemoteStorage for LocalFs { ) })?; + if bytes_read < from_size_bytes { + bail!("Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes"); + } + // Check if there is any extra data after the given size. + let mut from = buffer_to_read.into_inner(); + let extra_read = from.read(&mut [1]).await?; ensure!( - bytes_read == from_size_bytes, - "Provided stream has actual size {} fthat is smaller than the given stream size {}", - bytes_read, - from_size_bytes - ); - - ensure!( - buffer_to_read.read(&mut [0]).await? == 0, - "Provided stream has bigger size than the given stream size {}", - from_size_bytes + extra_read == 0, + "Provided stream was larger than expected: expected {from_size_bytes} bytes", ); destination.flush().await.with_context(|| { @@ -609,6 +606,34 @@ mod fs_tests { Ok(()) } + #[tokio::test] + async fn upload_file_negatives() -> anyhow::Result<()> { + let storage = create_storage()?; + + let id = storage.remote_object_id(&storage.working_directory.join("dummy"))?; + let content = std::io::Cursor::new(b"12345"); + + // Check that you get an error if the size parameter doesn't match the actual + // size of the stream. + storage + .upload(content.clone(), 0, &id, None) + .await + .expect_err("upload with zero size succeeded"); + storage + .upload(content.clone(), 4, &id, None) + .await + .expect_err("upload with too short size succeeded"); + storage + .upload(content.clone(), 6, &id, None) + .await + .expect_err("upload with too large size succeeded"); + + // Correct size is 5, this should succeed. + storage.upload(content, 5, &id, None).await?; + + Ok(()) + } + fn create_storage() -> anyhow::Result { LocalFs::new(tempdir()?.path().to_owned(), tempdir()?.path().to_owned()) } From a4803233bbf825449c2481aa78bf37c929cc0411 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Tue, 30 Aug 2022 22:19:52 +0300 Subject: [PATCH 59/63] Remove `RemoteObjectName` and many remote storage generics in pageserver (#2360) --- libs/remote_storage/src/lib.rs | 17 +- libs/remote_storage/src/local_fs.rs | 18 +- libs/remote_storage/src/s3_bucket.rs | 44 ++-- pageserver/src/bin/pageserver.rs | 14 +- pageserver/src/http/routes.rs | 28 +-- pageserver/src/storage_sync.rs | 118 ++++------ pageserver/src/storage_sync/delete.rs | 86 ++++---- pageserver/src/storage_sync/download.rs | 276 ++++++++++++++---------- pageserver/src/storage_sync/upload.rs | 152 +++++++------ pageserver/src/tenant_mgr.rs | 8 +- 10 files changed, 392 insertions(+), 369 deletions(-) diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index 07f8cb08aa..d5ad2f8633 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -42,19 +42,13 @@ pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10; /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/ pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100; -pub trait RemoteObjectName { - // Needed to retrieve last component for RemoteObjectId. - // In other words a file name - fn object_name(&self) -> Option<&str>; -} - /// Storage (potentially remote) API to manage its state. /// This storage tries to be unaware of any layered repository context, /// providing basic CRUD operations for storage files. #[async_trait::async_trait] pub trait RemoteStorage: Send + Sync { /// A way to uniquely reference a file in the remote storage. - type RemoteObjectId: RemoteObjectName; + type RemoteObjectId; /// Attempts to derive the storage path out of the local path, if the latter is correct. fn remote_object_id(&self, local_path: &Path) -> anyhow::Result; @@ -71,7 +65,7 @@ pub trait RemoteStorage: Send + Sync { /// so this method doesnt need to. async fn list_prefixes( &self, - prefix: Option, + prefix: Option<&Self::RemoteObjectId>, ) -> anyhow::Result>; /// Streams the local file contents into remote into the remote storage entry. @@ -163,6 +157,13 @@ impl GenericRemoteStorage { } } } + + pub fn as_local(&self) -> Option<&LocalFs> { + match self { + Self::Local(local_fs) => Some(local_fs), + _ => None, + } + } } /// Extra set of key-value pairs that contain arbitrary metadata about the storage entry. diff --git a/libs/remote_storage/src/local_fs.rs b/libs/remote_storage/src/local_fs.rs index a65d0887af..ddf6c01759 100644 --- a/libs/remote_storage/src/local_fs.rs +++ b/libs/remote_storage/src/local_fs.rs @@ -5,7 +5,6 @@ //! volume is mounted to the local FS. use std::{ - borrow::Cow, future::Future, path::{Path, PathBuf}, pin::Pin, @@ -18,16 +17,10 @@ use tokio::{ }; use tracing::*; -use crate::{path_with_suffix_extension, Download, DownloadError, RemoteObjectName}; +use crate::{path_with_suffix_extension, Download, DownloadError}; use super::{strip_path_prefix, RemoteStorage, StorageMetadata}; -impl RemoteObjectName for PathBuf { - fn object_name(&self) -> Option<&str> { - self.file_stem().and_then(|n| n.to_str()) - } -} - pub struct LocalFs { working_directory: PathBuf, storage_root: PathBuf, @@ -113,13 +106,10 @@ impl RemoteStorage for LocalFs { async fn list_prefixes( &self, - prefix: Option, + prefix: Option<&Self::RemoteObjectId>, ) -> anyhow::Result> { - let path = match prefix { - Some(prefix) => Cow::Owned(prefix), - None => Cow::Borrowed(&self.storage_root), - }; - get_all_files(path.as_ref(), false).await + let path = prefix.unwrap_or(&self.storage_root); + get_all_files(path, false).await } async fn upload( diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs index 1b241fe4ed..db31200c36 100644 --- a/libs/remote_storage/src/s3_bucket.rs +++ b/libs/remote_storage/src/s3_bucket.rs @@ -19,9 +19,7 @@ use tokio::{io, sync::Semaphore}; use tokio_util::io::ReaderStream; use tracing::debug; -use crate::{ - strip_path_prefix, Download, DownloadError, RemoteObjectName, RemoteStorage, S3Config, -}; +use crate::{strip_path_prefix, Download, DownloadError, RemoteStorage, S3Config}; use super::StorageMetadata; @@ -96,6 +94,23 @@ const S3_PREFIX_SEPARATOR: char = '/'; pub struct S3ObjectKey(String); impl S3ObjectKey { + /// Turn a/b/c or a/b/c/ into c + pub fn object_name(&self) -> Option<&str> { + // corner case, char::to_string is not const, thats why this is more verbose than it needs to be + // see https://github.com/rust-lang/rust/issues/88674 + if self.0.len() == 1 && self.0.chars().next().unwrap() == S3_PREFIX_SEPARATOR { + return None; + } + + if self.0.ends_with(S3_PREFIX_SEPARATOR) { + self.0.rsplit(S3_PREFIX_SEPARATOR).nth(1) + } else { + self.0 + .rsplit_once(S3_PREFIX_SEPARATOR) + .map(|(_, last)| last) + } + } + fn key(&self) -> &str { &self.0 } @@ -119,25 +134,6 @@ impl S3ObjectKey { } } -impl RemoteObjectName for S3ObjectKey { - /// Turn a/b/c or a/b/c/ into c - fn object_name(&self) -> Option<&str> { - // corner case, char::to_string is not const, thats why this is more verbose than it needs to be - // see https://github.com/rust-lang/rust/issues/88674 - if self.0.len() == 1 && self.0.chars().next().unwrap() == S3_PREFIX_SEPARATOR { - return None; - } - - if self.0.ends_with(S3_PREFIX_SEPARATOR) { - self.0.rsplit(S3_PREFIX_SEPARATOR).nth(1) - } else { - self.0 - .rsplit_once(S3_PREFIX_SEPARATOR) - .map(|(_, last)| last) - } - } -} - /// AWS S3 storage. pub struct S3Bucket { workdir: PathBuf, @@ -316,11 +312,11 @@ impl RemoteStorage for S3Bucket { /// Note: it wont include empty "directories" async fn list_prefixes( &self, - prefix: Option, + prefix: Option<&Self::RemoteObjectId>, ) -> anyhow::Result> { // get the passed prefix or if it is not set use prefix_in_bucket value let list_prefix = prefix - .map(|p| p.0) + .map(|p| p.0.clone()) .or_else(|| self.prefix_in_bucket.clone()) .map(|mut p| { // required to end with a separator diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 1a13147f42..7a33a548e7 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -1,6 +1,7 @@ //! Main entry point for the Page Server executable. -use std::{env, ops::ControlFlow, path::Path, str::FromStr}; +use remote_storage::GenericRemoteStorage; +use std::{env, ops::ControlFlow, path::Path, str::FromStr, sync::Arc}; use tracing::*; use anyhow::{bail, Context, Result}; @@ -298,7 +299,14 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<() }; info!("Using auth: {:#?}", conf.auth_type); - let remote_index = tenant_mgr::init_tenant_mgr(conf)?; + let remote_storage = conf + .remote_storage_config + .as_ref() + .map(|storage_config| GenericRemoteStorage::new(conf.workdir.clone(), storage_config)) + .transpose() + .context("Failed to init generic remote storage")? + .map(Arc::new); + let remote_index = tenant_mgr::init_tenant_mgr(conf, remote_storage.as_ref().map(Arc::clone))?; // Spawn a new thread for the http endpoint // bind before launching separate thread so the error reported before startup exits @@ -310,7 +318,7 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<() "http_endpoint_thread", true, move || { - let router = http::make_router(conf, auth_cloned, remote_index)?; + let router = http::make_router(conf, auth_cloned, remote_index, remote_storage)?; endpoint::serve_thread_main(router, http_listener, thread_mgr::shutdown_watcher()) }, )?; diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 2bb181dd9a..ef18129504 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -35,7 +35,7 @@ struct State { auth: Option>, remote_index: RemoteIndex, allowlist_routes: Vec, - remote_storage: Option, + remote_storage: Option>, } impl State { @@ -43,20 +43,12 @@ impl State { conf: &'static PageServerConf, auth: Option>, remote_index: RemoteIndex, + remote_storage: Option>, ) -> anyhow::Result { let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml"] .iter() .map(|v| v.parse().unwrap()) .collect::>(); - // Note that this remote storage is created separately from the main one in the sync_loop. - // It's fine since it's stateless and some code duplication saves us from bloating the code around with generics. - let remote_storage = conf - .remote_storage_config - .as_ref() - .map(|storage_config| GenericRemoteStorage::new(conf.workdir.clone(), storage_config)) - .transpose() - .context("Failed to init generic remote storage")?; - Ok(Self { conf, auth, @@ -448,16 +440,8 @@ async fn gather_tenant_timelines_index_parts( tenant_id: ZTenantId, ) -> anyhow::Result>> { let index_parts = match state.remote_storage.as_ref() { - Some(GenericRemoteStorage::Local(local_storage)) => { - storage_sync::gather_tenant_timelines_index_parts(state.conf, local_storage, tenant_id) - .await - } - // FIXME here s3 storage contains its own limits, that are separate from sync storage thread ones - // because it is a different instance. We can move this limit to some global static - // or use one instance everywhere. - Some(GenericRemoteStorage::S3(s3_storage)) => { - storage_sync::gather_tenant_timelines_index_parts(state.conf, s3_storage, tenant_id) - .await + Some(storage) => { + storage_sync::gather_tenant_timelines_index_parts(state.conf, storage, tenant_id).await } None => return Ok(None), } @@ -714,6 +698,7 @@ pub fn make_router( conf: &'static PageServerConf, auth: Option>, remote_index: RemoteIndex, + remote_storage: Option>, ) -> anyhow::Result> { let spec = include_bytes!("openapi_spec.yml"); let mut router = attach_openapi_ui(endpoint::make_router(), spec, "/swagger.yml", "/v1/doc"); @@ -730,7 +715,8 @@ pub fn make_router( Ok(router .data(Arc::new( - State::new(conf, auth, remote_index).context("Failed to initialize router state")?, + State::new(conf, auth, remote_index, remote_storage) + .context("Failed to initialize router state")?, )) .get("/v1/status", status_handler) .get("/v1/tenant", tenant_list_handler) diff --git a/pageserver/src/storage_sync.rs b/pageserver/src/storage_sync.rs index 52d544b28c..a52cde7286 100644 --- a/pageserver/src/storage_sync.rs +++ b/pageserver/src/storage_sync.rs @@ -156,7 +156,7 @@ use std::{ use anyhow::{anyhow, bail, Context}; use futures::stream::{FuturesUnordered, StreamExt}; use once_cell::sync::{Lazy, OnceCell}; -use remote_storage::{GenericRemoteStorage, RemoteStorage}; +use remote_storage::GenericRemoteStorage; use tokio::{ fs, runtime::Runtime, @@ -253,36 +253,20 @@ pub struct SyncStartupData { /// Along with that, scans tenant files local and remote (if the sync gets enabled) to check the initial timeline states. pub fn start_local_timeline_sync( config: &'static PageServerConf, + storage: Option>, ) -> anyhow::Result { let local_timeline_files = local_tenant_timeline_files(config) .context("Failed to collect local tenant timeline files")?; - match config.remote_storage_config.as_ref() { - Some(storage_config) => { - match GenericRemoteStorage::new(config.workdir.clone(), storage_config) - .context("Failed to init the generic remote storage")? - { - GenericRemoteStorage::Local(local_fs_storage) => { - storage_sync::spawn_storage_sync_thread( - config, - local_timeline_files, - local_fs_storage, - storage_config.max_concurrent_syncs, - storage_config.max_sync_errors, - ) - } - GenericRemoteStorage::S3(s3_bucket_storage) => { - storage_sync::spawn_storage_sync_thread( - config, - local_timeline_files, - s3_bucket_storage, - storage_config.max_concurrent_syncs, - storage_config.max_sync_errors, - ) - } - } - .context("Failed to spawn the storage sync thread") - } + match storage.zip(config.remote_storage_config.as_ref()) { + Some((storage, storage_config)) => storage_sync::spawn_storage_sync_thread( + config, + local_timeline_files, + storage, + storage_config.max_concurrent_syncs, + storage_config.max_sync_errors, + ) + .context("Failed to spawn the storage sync thread"), None => { info!("No remote storage configured, skipping storage sync, considering all local timelines with correct metadata files enabled"); let mut local_timeline_init_statuses = LocalTimelineInitStatuses::new(); @@ -810,17 +794,13 @@ pub fn schedule_layer_download(tenant_id: ZTenantId, timeline_id: ZTimelineId) { /// Launch a thread to perform remote storage sync tasks. /// See module docs for loop step description. -pub(super) fn spawn_storage_sync_thread( +pub(super) fn spawn_storage_sync_thread( conf: &'static PageServerConf, local_timeline_files: HashMap)>, - storage: S, + storage: Arc, max_concurrent_timelines_sync: NonZeroUsize, max_sync_errors: NonZeroU32, -) -> anyhow::Result -where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> anyhow::Result { let sync_queue = SyncQueue::new(max_concurrent_timelines_sync); SYNC_QUEUE .set(sync_queue) @@ -860,7 +840,7 @@ where storage_sync_loop( runtime, conf, - (Arc::new(storage), remote_index_clone, sync_queue), + (storage, remote_index_clone, sync_queue), max_sync_errors, ); Ok(()) @@ -873,15 +853,12 @@ where }) } -fn storage_sync_loop( +fn storage_sync_loop( runtime: Runtime, conf: &'static PageServerConf, - (storage, index, sync_queue): (Arc, RemoteIndex, &SyncQueue), + (storage, index, sync_queue): (Arc, RemoteIndex, &SyncQueue), max_sync_errors: NonZeroU32, -) where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) { info!("Starting remote storage sync loop"); loop { let loop_storage = Arc::clone(&storage); @@ -983,18 +960,14 @@ enum UploadStatus { Nothing, } -async fn process_batches( +async fn process_batches( conf: &'static PageServerConf, max_sync_errors: NonZeroU32, - storage: Arc, + storage: Arc, index: &RemoteIndex, batched_tasks: HashMap, sync_queue: &SyncQueue, -) -> HashSet -where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> HashSet { let mut sync_results = batched_tasks .into_iter() .map(|(sync_id, batch)| { @@ -1030,17 +1003,13 @@ where downloaded_timelines } -async fn process_sync_task_batch( +async fn process_sync_task_batch( conf: &'static PageServerConf, - (storage, index, sync_queue): (Arc, RemoteIndex, &SyncQueue), + (storage, index, sync_queue): (Arc, RemoteIndex, &SyncQueue), max_sync_errors: NonZeroU32, sync_id: ZTenantTimelineId, batch: SyncTaskBatch, -) -> DownloadStatus -where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> DownloadStatus { let sync_start = Instant::now(); let current_remote_timeline = { index.read().await.timeline_entry(&sync_id).cloned() }; @@ -1175,19 +1144,15 @@ where download_status } -async fn download_timeline_data( +async fn download_timeline_data( conf: &'static PageServerConf, - (storage, index, sync_queue): (&S, &RemoteIndex, &SyncQueue), + (storage, index, sync_queue): (&GenericRemoteStorage, &RemoteIndex, &SyncQueue), current_remote_timeline: Option<&RemoteTimeline>, sync_id: ZTenantTimelineId, new_download_data: SyncData, sync_start: Instant, task_name: &str, -) -> DownloadStatus -where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> DownloadStatus { match download_timeline_layers( conf, storage, @@ -1298,17 +1263,14 @@ async fn update_local_metadata( Ok(()) } -async fn delete_timeline_data( +async fn delete_timeline_data( conf: &'static PageServerConf, - (storage, index, sync_queue): (&S, &RemoteIndex, &SyncQueue), + (storage, index, sync_queue): (&GenericRemoteStorage, &RemoteIndex, &SyncQueue), sync_id: ZTenantTimelineId, mut new_delete_data: SyncData, sync_start: Instant, task_name: &str, -) where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) { let timeline_delete = &mut new_delete_data.data; if !timeline_delete.deletion_registered { @@ -1343,19 +1305,15 @@ async fn read_metadata_file(metadata_path: &Path) -> anyhow::Result( +async fn upload_timeline_data( conf: &'static PageServerConf, - (storage, index, sync_queue): (&S, &RemoteIndex, &SyncQueue), + (storage, index, sync_queue): (&GenericRemoteStorage, &RemoteIndex, &SyncQueue), current_remote_timeline: Option<&RemoteTimeline>, sync_id: ZTenantTimelineId, new_upload_data: SyncData, sync_start: Instant, task_name: &str, -) -> UploadStatus -where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> UploadStatus { let mut uploaded_data = match upload_timeline_layers( storage, sync_queue, @@ -1406,17 +1364,13 @@ enum RemoteDataUpdate<'a> { Delete(&'a HashSet), } -async fn update_remote_data( +async fn update_remote_data( conf: &'static PageServerConf, - storage: &S, + storage: &GenericRemoteStorage, index: &RemoteIndex, sync_id: ZTenantTimelineId, update: RemoteDataUpdate<'_>, -) -> anyhow::Result<()> -where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> anyhow::Result<()> { let updated_remote_timeline = { let mut index_accessor = index.write().await; diff --git a/pageserver/src/storage_sync/delete.rs b/pageserver/src/storage_sync/delete.rs index 2e39ed073f..d80a082d0c 100644 --- a/pageserver/src/storage_sync/delete.rs +++ b/pageserver/src/storage_sync/delete.rs @@ -1,27 +1,25 @@ //! Timeline synchronization logic to delete a bulk of timeline's remote files from the remote storage. +use std::path::Path; + use anyhow::Context; use futures::stream::{FuturesUnordered, StreamExt}; use tracing::{debug, error, info}; use crate::storage_sync::{SyncQueue, SyncTask}; -use remote_storage::RemoteStorage; +use remote_storage::{GenericRemoteStorage, RemoteStorage}; use utils::zid::ZTenantTimelineId; use super::{LayersDeletion, SyncData}; /// Attempts to remove the timleline layers from the remote storage. /// If the task had not adjusted the metadata before, the deletion will fail. -pub(super) async fn delete_timeline_layers<'a, P, S>( - storage: &'a S, +pub(super) async fn delete_timeline_layers<'a>( + storage: &'a GenericRemoteStorage, sync_queue: &SyncQueue, sync_id: ZTenantTimelineId, mut delete_data: SyncData, -) -> bool -where - P: std::fmt::Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> bool { if !delete_data.data.deletion_registered { error!("Cannot delete timeline layers before the deletion metadata is not registered, reenqueueing"); delete_data.retries += 1; @@ -45,25 +43,14 @@ where let mut delete_tasks = layers_to_delete .into_iter() .map(|local_layer_path| async { - let storage_path = - match storage - .remote_object_id(&local_layer_path) - .with_context(|| { - format!( - "Failed to get the layer storage path for local path '{}'", - local_layer_path.display() - ) - }) { - Ok(path) => path, - Err(e) => return Err((e, local_layer_path)), - }; - - match storage.delete(&storage_path).await.with_context(|| { - format!( - "Failed to delete remote layer from storage at '{:?}'", - storage_path - ) - }) { + match match storage { + GenericRemoteStorage::Local(storage) => { + remove_storage_object(storage, &local_layer_path).await + } + GenericRemoteStorage::S3(storage) => { + remove_storage_object(storage, &local_layer_path).await + } + } { Ok(()) => Ok(local_layer_path), Err(e) => Err((e, local_layer_path)), } @@ -101,6 +88,28 @@ where errored } +async fn remove_storage_object(storage: &S, local_layer_path: &Path) -> anyhow::Result<()> +where + P: std::fmt::Debug + Send + Sync + 'static, + S: RemoteStorage + Send + Sync + 'static, +{ + let storage_path = storage + .remote_object_id(local_layer_path) + .with_context(|| { + format!( + "Failed to get the layer storage path for local path '{}'", + local_layer_path.display() + ) + })?; + + storage.delete(&storage_path).await.with_context(|| { + format!( + "Failed to delete remote layer from storage at '{:?}'", + storage_path + ) + }) +} + #[cfg(test)] mod tests { use std::{collections::HashSet, num::NonZeroUsize}; @@ -114,7 +123,7 @@ mod tests { layered_repository::repo_harness::{RepoHarness, TIMELINE_ID}, storage_sync::test_utils::{create_local_timeline, dummy_metadata}, }; - use remote_storage::LocalFs; + use remote_storage::{LocalFs, RemoteStorage}; use super::*; @@ -123,10 +132,10 @@ mod tests { let harness = RepoHarness::create("delete_timeline_negative")?; let sync_queue = SyncQueue::new(NonZeroUsize::new(100).unwrap()); let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID); - let storage = LocalFs::new( + let storage = GenericRemoteStorage::Local(LocalFs::new( tempdir()?.path().to_path_buf(), harness.conf.workdir.clone(), - )?; + )?); let deleted = delete_timeline_layers( &storage, @@ -158,17 +167,20 @@ mod tests { let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID); let layer_files = ["a", "b", "c", "d"]; - let storage = LocalFs::new( + let storage = GenericRemoteStorage::Local(LocalFs::new( tempdir()?.path().to_path_buf(), harness.conf.workdir.clone(), - )?; + )?); + + let local_storage = storage.as_local().unwrap(); + let current_retries = 3; let metadata = dummy_metadata(Lsn(0x30)); let local_timeline_path = harness.timeline_path(&TIMELINE_ID); let timeline_upload = create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?; for local_path in timeline_upload.layers_to_upload { - let remote_path = storage.remote_object_id(&local_path)?; + let remote_path = local_storage.remote_object_id(&local_path)?; let remote_parent_dir = remote_path.parent().unwrap(); if !remote_parent_dir.exists() { fs::create_dir_all(&remote_parent_dir).await?; @@ -176,11 +188,11 @@ mod tests { fs::copy(&local_path, &remote_path).await?; } assert_eq!( - storage + local_storage .list() .await? .into_iter() - .map(|remote_path| storage.local_path(&remote_path).unwrap()) + .map(|remote_path| local_storage.local_path(&remote_path).unwrap()) .filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) }) .sorted() .collect::>(), @@ -213,11 +225,11 @@ mod tests { assert!(deleted, "Should be able to delete timeline files"); assert_eq!( - storage + local_storage .list() .await? .into_iter() - .map(|remote_path| storage.local_path(&remote_path).unwrap()) + .map(|remote_path| local_storage.local_path(&remote_path).unwrap()) .filter_map(|local_path| { Some(local_path.file_name()?.to_str()?.to_owned()) }) .sorted() .collect::>(), diff --git a/pageserver/src/storage_sync/download.rs b/pageserver/src/storage_sync/download.rs index 98c45bf9af..8e6aa47c88 100644 --- a/pageserver/src/storage_sync/download.rs +++ b/pageserver/src/storage_sync/download.rs @@ -9,7 +9,9 @@ use std::{ use anyhow::Context; use futures::stream::{FuturesUnordered, StreamExt}; -use remote_storage::{path_with_suffix_extension, DownloadError, RemoteObjectName, RemoteStorage}; +use remote_storage::{ + path_with_suffix_extension, Download, DownloadError, GenericRemoteStorage, RemoteStorage, +}; use tokio::{ fs, io::{self, AsyncWriteExt}, @@ -62,15 +64,11 @@ impl Default for TenantIndexParts { } } -pub async fn download_index_parts( +pub async fn download_index_parts( conf: &'static PageServerConf, - storage: &S, + storage: &GenericRemoteStorage, keys: HashSet, -) -> HashMap -where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> HashMap { let mut index_parts: HashMap = HashMap::new(); let mut part_downloads = keys @@ -114,60 +112,17 @@ where /// Note: The function is rather expensive from s3 access point of view, it will execute ceil(N/1000) + N requests. /// At least one request to obtain a list of tenant timelines (more requests is there are more than 1000 timelines). /// And then will attempt to download all index files that belong to these timelines. -pub async fn gather_tenant_timelines_index_parts( +pub async fn gather_tenant_timelines_index_parts( conf: &'static PageServerConf, - storage: &S, + storage: &GenericRemoteStorage, tenant_id: ZTenantId, -) -> anyhow::Result> -where - P: RemoteObjectName + Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> anyhow::Result> { let tenant_path = conf.timelines_path(&tenant_id); - let tenant_storage_path = storage.remote_object_id(&tenant_path).with_context(|| { - format!( - "Failed to get tenant storage path for local path '{}'", - tenant_path.display() - ) - })?; - - let timelines = storage - .list_prefixes(Some(tenant_storage_path)) + let timeline_sync_ids = get_timeline_sync_ids(storage, &tenant_path, tenant_id) .await - .with_context(|| { - format!( - "Failed to list tenant storage path to get remote timelines to download: {}", - tenant_id - ) - })?; + .with_context(|| format!("Failed to list timeline sync ids for tenat {tenant_id}"))?; - if timelines.is_empty() { - anyhow::bail!( - "no timelines found on the remote storage for tenant {}", - tenant_id - ) - } - - let mut sync_ids = HashSet::new(); - - for timeline_remote_storage_key in timelines { - let object_name = timeline_remote_storage_key.object_name().ok_or_else(|| { - anyhow::anyhow!("failed to get timeline id for remote tenant {tenant_id}") - })?; - - let timeline_id: ZTimelineId = object_name - .parse() - .with_context(|| { - format!("failed to parse object name into timeline id for tenant {tenant_id} '{object_name}'") - })?; - - sync_ids.insert(ZTenantTimelineId { - tenant_id, - timeline_id, - }); - } - - match download_index_parts(conf, storage, sync_ids) + match download_index_parts(conf, storage, timeline_sync_ids) .await .remove(&tenant_id) .ok_or_else(|| anyhow::anyhow!("Missing tenant index parts. This is a bug."))? @@ -180,29 +135,15 @@ where } /// Retrieves index data from the remote storage for a given timeline. -async fn download_index_part( +async fn download_index_part( conf: &'static PageServerConf, - storage: &S, + storage: &GenericRemoteStorage, sync_id: ZTenantTimelineId, -) -> Result -where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> Result { let index_part_path = metadata_path(conf, sync_id.timeline_id, sync_id.tenant_id) .with_file_name(IndexPart::FILE_NAME) .with_extension(IndexPart::FILE_EXTENSION); - let part_storage_path = storage - .remote_object_id(&index_part_path) - .with_context(|| { - format!( - "Failed to get the index part storage path for local path '{}'", - index_part_path.display() - ) - }) - .map_err(DownloadError::BadInput)?; - - let mut index_part_download = storage.download(&part_storage_path).await?; + let mut index_part_download = download_storage_object(storage, &index_part_path).await?; let mut index_part_bytes = Vec::new(); io::copy( @@ -211,14 +152,18 @@ where ) .await .with_context(|| { - format!("Failed to download an index part from storage path {part_storage_path:?}") + format!( + "Failed to download an index part into file '{}'", + index_part_path.display() + ) }) .map_err(DownloadError::Other)?; let index_part: IndexPart = serde_json::from_slice(&index_part_bytes) .with_context(|| { format!( - "Failed to deserialize index part file from storage path '{part_storage_path:?}'" + "Failed to deserialize index part file into file '{}'", + index_part_path.display() ) }) .map_err(DownloadError::Other)?; @@ -249,18 +194,14 @@ pub(super) enum DownloadedTimeline { /// updated in the end, if the remote one contains a newer disk_consistent_lsn. /// /// On an error, bumps the retries count and updates the files to skip with successful downloads, rescheduling the task. -pub(super) async fn download_timeline_layers<'a, P, S>( +pub(super) async fn download_timeline_layers<'a>( conf: &'static PageServerConf, - storage: &'a S, + storage: &'a GenericRemoteStorage, sync_queue: &'a SyncQueue, remote_timeline: Option<&'a RemoteTimeline>, sync_id: ZTenantTimelineId, mut download_data: SyncData, -) -> DownloadedTimeline -where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> DownloadedTimeline { let remote_timeline = match remote_timeline { Some(remote_timeline) => { if !remote_timeline.awaits_download { @@ -300,15 +241,6 @@ where layer_desination_path.display() ); } else { - let layer_storage_path = storage - .remote_object_id(&layer_desination_path) - .with_context(|| { - format!( - "Failed to get the layer storage path for local path '{}'", - layer_desination_path.display() - ) - })?; - // Perform a rename inspired by durable_rename from file_utils.c. // The sequence: // write(tmp) @@ -329,19 +261,23 @@ where temp_file_path.display() ) })?; - let mut download = storage - .download(&layer_storage_path) + + let mut layer_download = download_storage_object(storage, &layer_desination_path) .await .with_context(|| { format!( - "Failed to open a download stream for layer with remote storage path '{layer_storage_path:?}'" + "Failed to initiate the download the layer for {sync_id} into file '{}'", + temp_file_path.display() + ) + })?; + io::copy(&mut layer_download.download_stream, &mut destination_file) + .await + .with_context(|| { + format!( + "Failed to download the layer for {sync_id} into file '{}'", + temp_file_path.display() ) })?; - io::copy(&mut download.download_stream, &mut destination_file).await.with_context(|| { - format!( - "Failed to download layer with remote storage path '{layer_storage_path:?}' into file '{}'", temp_file_path.display() - ) - })?; // Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that: // A file will not be closed immediately when it goes out of scope if there are any IO operations @@ -429,6 +365,121 @@ where } } +async fn download_storage_object( + storage: &GenericRemoteStorage, + to_path: &Path, +) -> Result { + async fn do_download_storage_object( + storage: &S, + to_path: &Path, + ) -> Result + where + P: std::fmt::Debug + Send + Sync + 'static, + S: RemoteStorage + Send + Sync + 'static, + { + let remote_object_path = storage + .remote_object_id(to_path) + .with_context(|| { + format!( + "Failed to get the storage path for target local path '{}'", + to_path.display() + ) + }) + .map_err(DownloadError::BadInput)?; + + storage.download(&remote_object_path).await + } + + match storage { + GenericRemoteStorage::Local(storage) => do_download_storage_object(storage, to_path).await, + GenericRemoteStorage::S3(storage) => do_download_storage_object(storage, to_path).await, + } +} + +async fn get_timeline_sync_ids( + storage: &GenericRemoteStorage, + tenant_path: &Path, + tenant_id: ZTenantId, +) -> anyhow::Result> { + let timeline_ids: Vec = match storage { + GenericRemoteStorage::Local(storage) => list_prefixes(storage, tenant_path) + .await? + .into_iter() + .map(|timeline_directory_path| { + timeline_directory_path + .file_stem() + .with_context(|| { + format!( + "Failed to get timeline id string from file '{}'", + timeline_directory_path.display() + ) + })? + .to_string_lossy() + .as_ref() + .parse() + .with_context(|| { + format!( + "failed to parse directory name '{}' as timeline id", + timeline_directory_path.display() + ) + }) + }) + .collect::>(), + GenericRemoteStorage::S3(storage) => list_prefixes(storage, tenant_path) + .await? + .into_iter() + .map(|s3_path| { + s3_path + .object_name() + .with_context(|| { + format!("Failed to get object name out of S3 path {s3_path:?}") + })? + .parse() + .with_context(|| { + format!("failed to parse object name '{s3_path:?}' as timeline id") + }) + }) + .collect::>(), + } + .with_context(|| { + format!("Tenant {tenant_id} has at least one incorrect timeline subdirectory") + })?; + + if timeline_ids.is_empty() { + anyhow::bail!("no timelines found on the remote storage for tenant {tenant_id}") + } + + Ok(timeline_ids + .into_iter() + .map(|timeline_id| ZTenantTimelineId { + tenant_id, + timeline_id, + }) + .collect()) +} + +async fn list_prefixes(storage: &S, tenant_path: &Path) -> anyhow::Result> +where + P: std::fmt::Debug + Send + Sync + 'static, + S: RemoteStorage + Send + Sync + 'static, +{ + let tenant_storage_path = storage.remote_object_id(tenant_path).with_context(|| { + format!( + "Failed to get tenant storage path for local path '{}'", + tenant_path.display() + ) + })?; + + storage + .list_prefixes(Some(&tenant_storage_path)) + .await + .with_context(|| { + format!( + "Failed to list tenant storage path {tenant_storage_path:?} to get remote timelines to download" + ) + }) +} + async fn fsync_path(path: impl AsRef) -> Result<(), io::Error> { fs::File::open(path).await?.sync_all().await } @@ -461,10 +512,11 @@ mod tests { let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID); let layer_files = ["a", "b", "layer_to_skip", "layer_to_keep_locally"]; - let storage = LocalFs::new( - tempdir()?.path().to_path_buf(), + let storage = GenericRemoteStorage::Local(LocalFs::new( + tempdir()?.path().to_owned(), harness.conf.workdir.clone(), - )?; + )?); + let local_storage = storage.as_local().unwrap(); let current_retries = 3; let metadata = dummy_metadata(Lsn(0x30)); let local_timeline_path = harness.timeline_path(&TIMELINE_ID); @@ -472,7 +524,7 @@ mod tests { create_local_timeline(&harness, TIMELINE_ID, &layer_files, metadata.clone()).await?; for local_path in timeline_upload.layers_to_upload { - let remote_path = storage.remote_object_id(&local_path)?; + let remote_path = local_storage.remote_object_id(&local_path)?; let remote_parent_dir = remote_path.parent().unwrap(); if !remote_parent_dir.exists() { fs::create_dir_all(&remote_parent_dir).await?; @@ -558,7 +610,10 @@ mod tests { let harness = RepoHarness::create("download_timeline_negatives")?; let sync_queue = SyncQueue::new(NonZeroUsize::new(100).unwrap()); let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID); - let storage = LocalFs::new(tempdir()?.path().to_owned(), harness.conf.workdir.clone())?; + let storage = GenericRemoteStorage::Local(LocalFs::new( + tempdir()?.path().to_owned(), + harness.conf.workdir.clone(), + )?); let empty_remote_timeline_download = download_timeline_layers( harness.conf, @@ -614,10 +669,11 @@ mod tests { let harness = RepoHarness::create("test_download_index_part")?; let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID); - let storage = LocalFs::new( - tempdir()?.path().to_path_buf(), + let storage = GenericRemoteStorage::Local(LocalFs::new( + tempdir()?.path().to_owned(), harness.conf.workdir.clone(), - )?; + )?); + let local_storage = storage.as_local().unwrap(); let metadata = dummy_metadata(Lsn(0x30)); let local_timeline_path = harness.timeline_path(&TIMELINE_ID); @@ -638,7 +694,7 @@ mod tests { metadata_path(harness.conf, sync_id.timeline_id, sync_id.tenant_id) .with_file_name(IndexPart::FILE_NAME) .with_extension(IndexPart::FILE_EXTENSION); - let storage_path = storage.remote_object_id(&local_index_part_path)?; + let storage_path = local_storage.remote_object_id(&local_index_part_path)?; fs::create_dir_all(storage_path.parent().unwrap()).await?; fs::write(&storage_path, serde_json::to_vec(&index_part)?).await?; diff --git a/pageserver/src/storage_sync/upload.rs b/pageserver/src/storage_sync/upload.rs index 2acc935537..a8c768e0ae 100644 --- a/pageserver/src/storage_sync/upload.rs +++ b/pageserver/src/storage_sync/upload.rs @@ -1,11 +1,14 @@ //! Timeline synchronization logic to compress and upload to the remote storage all new timeline files from the checkpoints. -use std::{fmt::Debug, path::PathBuf}; +use std::{ + fmt::Debug, + path::{Path, PathBuf}, +}; use anyhow::Context; use futures::stream::{FuturesUnordered, StreamExt}; use once_cell::sync::Lazy; -use remote_storage::RemoteStorage; +use remote_storage::{GenericRemoteStorage, RemoteStorage}; use tokio::fs; use tracing::{debug, error, info, warn}; @@ -30,16 +33,12 @@ static NO_LAYERS_UPLOAD: Lazy = Lazy::new(|| { }); /// Serializes and uploads the given index part data to the remote storage. -pub(super) async fn upload_index_part( +pub(super) async fn upload_index_part( conf: &'static PageServerConf, - storage: &S, + storage: &GenericRemoteStorage, sync_id: ZTenantTimelineId, index_part: IndexPart, -) -> anyhow::Result<()> -where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> anyhow::Result<()> { let index_part_bytes = serde_json::to_vec(&index_part) .context("Failed to serialize index part file into bytes")?; let index_part_size = index_part_bytes.len(); @@ -48,27 +47,9 @@ where let index_part_path = metadata_path(conf, sync_id.timeline_id, sync_id.tenant_id) .with_file_name(IndexPart::FILE_NAME) .with_extension(IndexPart::FILE_EXTENSION); - let index_part_storage_path = - storage - .remote_object_id(&index_part_path) - .with_context(|| { - format!( - "Failed to get the index part storage path for local path '{}'", - index_part_path.display() - ) - })?; - - storage - .upload( - index_part_bytes, - index_part_size, - &index_part_storage_path, - None, - ) + upload_storage_object(storage, index_part_bytes, index_part_size, &index_part_path) .await - .with_context(|| { - format!("Failed to upload index part to the storage path '{index_part_storage_path:?}'") - }) + .with_context(|| format!("Failed to upload index part for '{sync_id}'")) } /// Timeline upload result, with extra data, needed for uploading. @@ -84,17 +65,13 @@ pub(super) enum UploadedTimeline { /// No extra checks for overlapping files is made and any files that are already present remotely will be overwritten, if submitted during the upload. /// /// On an error, bumps the retries count and reschedules the entire task. -pub(super) async fn upload_timeline_layers<'a, P, S>( - storage: &'a S, +pub(super) async fn upload_timeline_layers<'a>( + storage: &'a GenericRemoteStorage, sync_queue: &SyncQueue, remote_timeline: Option<&'a RemoteTimeline>, sync_id: ZTenantTimelineId, mut upload_data: SyncData, -) -> UploadedTimeline -where - P: Debug + Send + Sync + 'static, - S: RemoteStorage + Send + Sync + 'static, -{ +) -> UploadedTimeline { let upload = &mut upload_data.data; let new_upload_lsn = upload .metadata @@ -132,16 +109,6 @@ where let mut upload_tasks = layers_to_upload .into_iter() .map(|source_path| async move { - let storage_path = storage - .remote_object_id(&source_path) - .with_context(|| { - format!( - "Failed to get the layer storage path for local path '{}'", - source_path.display() - ) - }) - .map_err(UploadError::Other)?; - let source_file = match fs::File::open(&source_path).await.with_context(|| { format!( "Failed to upen a source file for layer '{}'", @@ -164,15 +131,10 @@ where .map_err(UploadError::Other)? .len() as usize; - match storage - .upload(source_file, source_size, &storage_path, None) + match upload_storage_object(storage, source_file, source_size, &source_path) .await - .with_context(|| { - format!( - "Failed to upload a layer from local path '{}'", - source_path.display() - ) - }) { + .with_context(|| format!("Failed to upload layer file for {sync_id}")) + { Ok(()) => Ok(source_path), Err(e) => Err(UploadError::MissingLocalFile(source_path, e)), } @@ -231,6 +193,51 @@ where } } +async fn upload_storage_object( + storage: &GenericRemoteStorage, + from: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static, + from_size_bytes: usize, + from_path: &Path, +) -> anyhow::Result<()> { + async fn do_upload_storage_object( + storage: &S, + from: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static, + from_size_bytes: usize, + from_path: &Path, + ) -> anyhow::Result<()> + where + P: std::fmt::Debug + Send + Sync + 'static, + S: RemoteStorage + Send + Sync + 'static, + { + let target_storage_path = storage.remote_object_id(from_path).with_context(|| { + format!( + "Failed to get the storage path for source local path '{}'", + from_path.display() + ) + })?; + + storage + .upload(from, from_size_bytes, &target_storage_path, None) + .await + .with_context(|| { + format!( + "Failed to upload from '{}' to storage path '{:?}'", + from_path.display(), + target_storage_path + ) + }) + } + + match storage { + GenericRemoteStorage::Local(storage) => { + do_upload_storage_object(storage, from, from_size_bytes, from_path).await + } + GenericRemoteStorage::S3(storage) => { + do_upload_storage_object(storage, from, from_size_bytes, from_path).await + } + } +} + enum UploadError { MissingLocalFile(PathBuf, anyhow::Error), Other(anyhow::Error), @@ -243,7 +250,7 @@ mod tests { num::NonZeroUsize, }; - use remote_storage::LocalFs; + use remote_storage::{LocalFs, RemoteStorage}; use tempfile::tempdir; use utils::lsn::Lsn; @@ -264,10 +271,11 @@ mod tests { let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID); let layer_files = ["a", "b"]; - let storage = LocalFs::new( - tempdir()?.path().to_path_buf(), + let storage = GenericRemoteStorage::Local(LocalFs::new( + tempdir()?.path().to_owned(), harness.conf.workdir.clone(), - )?; + )?); + let local_storage = storage.as_local().unwrap(); let current_retries = 3; let metadata = dummy_metadata(Lsn(0x30)); let local_timeline_path = harness.timeline_path(&TIMELINE_ID); @@ -276,7 +284,7 @@ mod tests { timeline_upload.metadata = None; assert!( - storage.list().await?.is_empty(), + local_storage.list().await?.is_empty(), "Storage should be empty before any uploads are made" ); @@ -322,7 +330,7 @@ mod tests { "Successful upload without metadata should not have it returned either" ); - let storage_files = storage.list().await?; + let storage_files = local_storage.list().await?; assert_eq!( storage_files.len(), layer_files.len(), @@ -331,7 +339,7 @@ mod tests { assert_eq!( storage_files .into_iter() - .map(|storage_path| storage.local_path(&storage_path)) + .map(|storage_path| local_storage.local_path(&storage_path)) .collect::>>()?, layer_files .into_iter() @@ -351,7 +359,11 @@ mod tests { let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID); let layer_files = ["a1", "b1"]; - let storage = LocalFs::new(tempdir()?.path().to_owned(), harness.conf.workdir.clone())?; + let storage = GenericRemoteStorage::Local(LocalFs::new( + tempdir()?.path().to_owned(), + harness.conf.workdir.clone(), + )?); + let local_storage = storage.as_local().unwrap(); let current_retries = 5; let metadata = dummy_metadata(Lsn(0x40)); @@ -365,7 +377,7 @@ mod tests { create_local_timeline(&harness, TIMELINE_ID, &layers_to_upload, metadata.clone()) .await?; assert!( - storage.list().await?.is_empty(), + local_storage.list().await?.is_empty(), "Storage should be empty before any uploads are made" ); @@ -414,7 +426,7 @@ mod tests { "Successful upload should not change its metadata" ); - let storage_files = storage.list().await?; + let storage_files = local_storage.list().await?; assert_eq!( storage_files.len(), layer_files.len(), @@ -423,7 +435,7 @@ mod tests { assert_eq!( storage_files .into_iter() - .map(|storage_path| storage.local_path(&storage_path)) + .map(|storage_path| local_storage.local_path(&storage_path)) .collect::>>()?, layer_files .into_iter() @@ -440,7 +452,11 @@ mod tests { let harness = RepoHarness::create("test_upload_index_part")?; let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID); - let storage = LocalFs::new(tempdir()?.path().to_owned(), harness.conf.workdir.clone())?; + let storage = GenericRemoteStorage::Local(LocalFs::new( + tempdir()?.path().to_owned(), + harness.conf.workdir.clone(), + )?); + let local_storage = storage.as_local().unwrap(); let metadata = dummy_metadata(Lsn(0x40)); let local_timeline_path = harness.timeline_path(&TIMELINE_ID); @@ -458,12 +474,12 @@ mod tests { ); assert!( - storage.list().await?.is_empty(), + local_storage.list().await?.is_empty(), "Storage should be empty before any uploads are made" ); upload_index_part(harness.conf, &storage, sync_id, index_part.clone()).await?; - let storage_files = storage.list().await?; + let storage_files = local_storage.list().await?; assert_eq!( storage_files.len(), 1, diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index 921d973a41..4a907ac0e1 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -12,6 +12,7 @@ use crate::thread_mgr::ThreadKind; use crate::walredo::PostgresRedoManager; use crate::{thread_mgr, timelines, walreceiver}; use anyhow::Context; +use remote_storage::GenericRemoteStorage; use serde::{Deserialize, Serialize}; use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; @@ -131,7 +132,10 @@ impl fmt::Display for TenantState { /// Initialize repositories with locally available timelines. /// Timelines that are only partially available locally (remote storage has more data than this pageserver) /// are scheduled for download and added to the repository once download is completed. -pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result { +pub fn init_tenant_mgr( + conf: &'static PageServerConf, + remote_storage: Option>, +) -> anyhow::Result { let (timeline_updates_sender, timeline_updates_receiver) = mpsc::unbounded_channel::(); tenants_state::set_timeline_update_sender(timeline_updates_sender)?; @@ -140,7 +144,7 @@ pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result Date: Wed, 31 Aug 2022 14:36:24 +0200 Subject: [PATCH 60/63] Remove deprecated notification channel (#2330) Co-authored-by: Rory de Zoete --- .github/workflows/notifications.yml | 45 ----------------------------- 1 file changed, 45 deletions(-) delete mode 100644 .github/workflows/notifications.yml diff --git a/.github/workflows/notifications.yml b/.github/workflows/notifications.yml deleted file mode 100644 index 55dc979896..0000000000 --- a/.github/workflows/notifications.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: Send Notifications - -on: - push: - branches: [ main ] - -jobs: - send-notifications: - timeout-minutes: 30 - name: send commit notifications - runs-on: ubuntu-latest - - steps: - - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: true - fetch-depth: 2 - - - name: Form variables for notification message - id: git_info_grab - run: | - git_stat=$(git show --stat=50) - git_stat="${git_stat//'%'/'%25'}" - git_stat="${git_stat//$'\n'/'%0A'}" - git_stat="${git_stat//$'\r'/'%0D'}" - git_stat="${git_stat// / }" # space -> 'Space En', as github tends to eat ordinary spaces - echo "::set-output name=git_stat::$git_stat" - echo "::set-output name=sha_short::$(git rev-parse --short HEAD)" - echo "##[set-output name=git_branch;]$(echo ${GITHUB_REF#refs/heads/})" - - - name: Send notification - uses: appleboy/telegram-action@master - with: - to: ${{ secrets.TELEGRAM_TO }} - token: ${{ secrets.TELEGRAM_TOKEN }} - format: markdown - args: | - *@${{ github.actor }} pushed to* [${{ github.repository }}:${{steps.git_info_grab.outputs.git_branch}}](github.com/${{ github.repository }}/commit/${{steps.git_info_grab.outputs.sha_short }}) - - ``` - ${{ steps.git_info_grab.outputs.git_stat }} - ``` - From d7c9cfe7bb30c0908a74dd5a80a437ec3ab36571 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Wed, 31 Aug 2022 16:15:26 +0100 Subject: [PATCH 61/63] Create Allure report for perf tests (#2326) --- .github/actions/allure-report/action.yml | 10 +- .../actions/run-python-test-set/action.yml | 39 +++-- .github/workflows/benchmarking.yml | 78 +++++---- .github/workflows/build_and_test.yml | 4 +- poetry.lock | 149 +++++++++--------- pyproject.toml | 2 +- test_runner/fixtures/neon_fixtures.py | 63 +++----- test_runner/fixtures/utils.py | 39 ++++- 8 files changed, 195 insertions(+), 189 deletions(-) diff --git a/.github/actions/allure-report/action.yml b/.github/actions/allure-report/action.yml index 2e52bd7695..34761f8df1 100644 --- a/.github/actions/allure-report/action.yml +++ b/.github/actions/allure-report/action.yml @@ -18,7 +18,7 @@ runs: - name: Validate input parameters shell: bash -euxo pipefail {0} run: | - if [ "${{ inputs.action }}" != "store"] && [ "${{ inputs.action }}" != "generate" ]; then + if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then echo 2>&1 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only" exit 1 fi @@ -41,7 +41,7 @@ runs: # Shortcut for a special branch key=main else - key=branch-$(echo ${GITHUB_REF#refs/heads/} | tr -cd "[:alnum:]._-") + key=branch-$(echo ${GITHUB_REF#refs/heads/} | tr -c "[:alnum:]._-" "-") fi echo "::set-output name=KEY::${key}" @@ -94,7 +94,7 @@ runs: BUILD_TYPE=${{ inputs.build_type }} EOF - ARCHIVE="${GITHUB_RUN_ID}-${{ inputs.test_selection }}-${GITHUB_RUN_ATTEMPT}.tar.zst" + ARCHIVE="${GITHUB_RUN_ID}-${{ inputs.test_selection }}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst" ZSTD_NBTHREADS=0 tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd . @@ -207,7 +207,7 @@ runs: script: | const { REPORT_URL, BUILD_TYPE, SHA } = process.env - result = await github.rest.repos.createCommitStatus({ + await github.rest.repos.createCommitStatus({ owner: context.repo.owner, repo: context.repo.repo, sha: `${SHA}`, @@ -215,5 +215,3 @@ runs: target_url: `${REPORT_URL}`, context: `Allure report / ${BUILD_TYPE}`, }) - - console.log(result); diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index a4bcaff56d..1cc65b4286 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -3,11 +3,11 @@ description: 'Runs a Neon python test set, performing all the required preparati inputs: build_type: - description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug".' + description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug", or "remote" for the remote cluster' required: true rust_toolchain: description: 'Rust toolchain version to fetch the caches' - required: true + required: false test_selection: description: 'A python test suite to run' required: true @@ -52,6 +52,7 @@ runs: using: "composite" steps: - name: Get Neon artifact + if: inputs.build_type != 'remote' uses: ./.github/actions/download with: name: neon-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-artifact @@ -78,7 +79,6 @@ runs: - name: Run pytest env: NEON_BIN: /tmp/neon/bin - POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install TEST_OUTPUT: /tmp/test_output # this variable will be embedded in perf test report # and is needed to distinguish different environments @@ -88,6 +88,12 @@ runs: AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }} shell: bash -euxo pipefail {0} run: | + export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install} + + if [ "${BUILD_TYPE}" = "remote" ]; then + export REMOTE_ENV=1 + fi + PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)" rm -rf $PERF_REPORT_DIR @@ -119,6 +125,13 @@ runs: cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run) elif [[ "${{ inputs.build_type }}" == "release" ]]; then cov_prefix=() + else + cov_prefix=() + fi + + # Wake up the cluster if we use remote neon instance + if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then + ${POSTGRES_DISTRIB_DIR}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();" fi # Run the tests. @@ -137,7 +150,6 @@ runs: --alluredir=$TEST_OUTPUT/allure/results \ --tb=short \ --verbose \ - -m "not remote_cluster" \ -rA $TEST_SELECTION $EXTRA_PARAMS if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then @@ -148,25 +160,10 @@ runs: fi fi - - name: Upload Allure results - if: ${{ always() && (inputs.test_selection == 'regress') }} + - name: Create Allure report + if: always() uses: ./.github/actions/allure-report with: action: store build_type: ${{ inputs.build_type }} test_selection: ${{ inputs.test_selection }} - - - name: Delete all data but logs - shell: bash -euxo pipefail {0} - if: always() - run: | - du -sh /tmp/test_output/* - find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete - du -sh /tmp/test_output/* - - - name: Upload python test logs - if: always() - uses: ./.github/actions/upload - with: - name: python-test-${{ inputs.test_selection }}-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-logs - path: /tmp/test_output/ diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 4ed6ac80fd..1370917377 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -128,9 +128,9 @@ jobs: env: TEST_PG_BENCH_DURATIONS_MATRIX: "60m" TEST_PG_BENCH_SCALES_MATRIX: "10gb" - REMOTE_ENV: "1" POSTGRES_DISTRIB_DIR: /usr TEST_OUTPUT: /tmp/test_output + BUILD_TYPE: remote strategy: fail-fast: false @@ -138,23 +138,15 @@ jobs: connstr: [ BENCHMARK_CAPTEST_CONNSTR, BENCHMARK_RDS_CONNSTR ] runs-on: dev - container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2817580636 + container: + image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned + options: --init timeout-minutes: 360 # 6h steps: - uses: actions/checkout@v3 - - name: Cache poetry deps - id: cache_poetry - uses: actions/cache@v3 - with: - path: ~/.cache/pypoetry/virtualenvs - key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }} - - - name: Install Python deps - run: ./scripts/pysync - - name: Calculate platform id: calculate-platform env: @@ -173,50 +165,54 @@ jobs: - name: Install Deps run: | - echo "deb http://apt.postgresql.org/pub/repos/apt focal-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list - wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - sudo apt -y update - sudo apt install -y postgresql-14 postgresql-client-14 + sudo apt install -y postgresql-14 - name: Benchmark init + uses: ./.github/actions/run-python-test-set + with: + build_type: ${{ env.BUILD_TYPE }} + test_selection: performance + run_in_parallel: false + save_perf_report: true + extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init env: PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }} BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }} - run: | - mkdir -p perf-report-captest - - psql $BENCHMARK_CONNSTR -c "SELECT 1;" - ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_init -v -m "remote_cluster" --out-dir perf-report-captest --timeout 21600 - name: Benchmark simple-update + uses: ./.github/actions/run-python-test-set + with: + build_type: ${{ env.BUILD_TYPE }} + test_selection: performance + run_in_parallel: false + save_perf_report: true + extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update env: PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }} BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }} - run: | - psql $BENCHMARK_CONNSTR -c "SELECT 1;" - ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_simple_update -v -m "remote_cluster" --out-dir perf-report-captest --timeout 21600 - - - name: Benchmark select-only - env: - PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }} - BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }} - run: | - psql $BENCHMARK_CONNSTR -c "SELECT 1;" - ./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_select_only -v -m "remote_cluster" --out-dir perf-report-captest --timeout 21600 - - - name: Submit result - env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" - run: | - REPORT_FROM=$(realpath perf-report-captest) REPORT_TO=staging scripts/generate_and_push_perf_report.sh - - name: Upload logs - if: always() - uses: ./.github/actions/upload + - name: Benchmark simple-update + uses: ./.github/actions/run-python-test-set with: - name: bench-captest-${{ steps.calculate-platform.outputs.PLATFORM }} - path: /tmp/test_output/ + build_type: ${{ env.BUILD_TYPE }} + test_selection: performance + run_in_parallel: false + save_perf_report: true + extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only + env: + PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }} + BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }} + VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" + + - name: Create Allure report + uses: ./.github/actions/allure-report + with: + action: generate + build_type: ${{ env.BUILD_TYPE }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8b1dc3a9c4..a3314738fa 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -278,7 +278,7 @@ jobs: container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned options: --init - needs: [ regress-tests ] + needs: [ regress-tests, benchmarks ] if: always() strategy: fail-fast: false @@ -290,7 +290,7 @@ jobs: with: submodules: false - - name: Merge and Allure results + - name: Create Allure report uses: ./.github/actions/allure-report with: action: generate diff --git a/poetry.lock b/poetry.lock index 6bce17008e..2af0d97511 100644 --- a/poetry.lock +++ b/poetry.lock @@ -15,20 +15,20 @@ sa = ["sqlalchemy[postgresql_psycopg2binary] (>=1.3,<1.5)"] [[package]] name = "allure-pytest" -version = "2.9.45" +version = "2.10.0" description = "Allure pytest integration" category = "main" optional = false python-versions = "*" [package.dependencies] -allure-python-commons = "2.9.45" +allure-python-commons = "2.10.0" pytest = ">=4.5.0" six = ">=1.9.0" [[package]] name = "allure-python-commons" -version = "2.9.45" +version = "2.10.0" description = "Common module for integrate allure with python-based frameworks" category = "main" optional = false @@ -56,9 +56,9 @@ optional = false python-versions = ">=3.6.0" [package.extras] -dev = ["Cython (>=0.29.24,<0.30.0)", "pytest (>=6.0)", "Sphinx (>=4.1.2,<4.2.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "pycodestyle (>=2.7.0,<2.8.0)", "flake8 (>=3.9.2,<3.10.0)", "uvloop (>=0.15.3)"] -docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)"] -test = ["pycodestyle (>=2.7.0,<2.8.0)", "flake8 (>=3.9.2,<3.10.0)", "uvloop (>=0.15.3)"] +dev = ["Cython (>=0.29.24,<0.30.0)", "Sphinx (>=4.1.2,<4.2.0)", "flake8 (>=3.9.2,<3.10.0)", "pycodestyle (>=2.7.0,<2.8.0)", "pytest (>=6.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)", "uvloop (>=0.15.3)"] +docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] +test = ["flake8 (>=3.9.2,<3.10.0)", "pycodestyle (>=2.7.0,<2.8.0)", "uvloop (>=0.15.3)"] [[package]] name = "atomicwrites" @@ -77,10 +77,10 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" [package.extras] -dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] -docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] -tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] -tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] +dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "sphinx", "sphinx-notfound-page", "zope.interface"] +docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] +tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "zope.interface"] +tests_no_zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six"] [[package]] name = "aws-sam-translator" @@ -95,7 +95,7 @@ boto3 = ">=1.19.5,<2.0.0" jsonschema = ">=3.2,<4.0" [package.extras] -dev = ["coverage (>=5.3,<6.0)", "flake8 (>=3.8.4,<3.9.0)", "tox (>=3.24,<4.0)", "pytest-cov (>=2.10.1,<2.11.0)", "pytest-xdist (>=2.5,<3.0)", "pytest-env (>=0.6.2,<0.7.0)", "pylint (>=2.9.0,<2.10.0)", "pyyaml (>=5.4,<6.0)", "pytest (>=6.2.5,<6.3.0)", "parameterized (>=0.7.4,<0.8.0)", "click (>=7.1,<8.0)", "dateparser (>=0.7,<1.0)", "boto3 (>=1.23,<2)", "tenacity (>=7.0.0,<7.1.0)", "requests (>=2.24.0,<2.25.0)", "docopt (>=0.6.2,<0.7.0)", "black (==20.8b1)"] +dev = ["black (==20.8b1)", "boto3 (>=1.23,<2)", "click (>=7.1,<8.0)", "coverage (>=5.3,<6.0)", "dateparser (>=0.7,<1.0)", "docopt (>=0.6.2,<0.7.0)", "flake8 (>=3.8.4,<3.9.0)", "parameterized (>=0.7.4,<0.8.0)", "pylint (>=2.9.0,<2.10.0)", "pytest (>=6.2.5,<6.3.0)", "pytest-cov (>=2.10.1,<2.11.0)", "pytest-env (>=0.6.2,<0.7.0)", "pytest-xdist (>=2.5,<3.0)", "pyyaml (>=5.4,<6.0)", "requests (>=2.24.0,<2.25.0)", "tenacity (>=7.0.0,<7.1.0)", "tox (>=3.24,<4.0)"] [[package]] name = "aws-xray-sdk" @@ -157,8 +157,8 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "boto3-stubs" -version = "1.24.56" -description = "Type annotations for boto3 1.24.56 generated with mypy-boto3-builder 7.11.7" +version = "1.24.58" +description = "Type annotations for boto3 1.24.58 generated with mypy-boto3-builder 7.11.7" category = "main" optional = false python-versions = ">=3.7" @@ -175,7 +175,7 @@ account = ["mypy-boto3-account (>=1.24.0,<1.25.0)"] acm = ["mypy-boto3-acm (>=1.24.0,<1.25.0)"] acm-pca = ["mypy-boto3-acm-pca (>=1.24.0,<1.25.0)"] alexaforbusiness = ["mypy-boto3-alexaforbusiness (>=1.24.0,<1.25.0)"] -all = ["mypy-boto3-accessanalyzer (>=1.24.0,<1.25.0)", "mypy-boto3-account (>=1.24.0,<1.25.0)", "mypy-boto3-acm (>=1.24.0,<1.25.0)", "mypy-boto3-acm-pca (>=1.24.0,<1.25.0)", "mypy-boto3-alexaforbusiness (>=1.24.0,<1.25.0)", "mypy-boto3-amp (>=1.24.0,<1.25.0)", "mypy-boto3-amplify (>=1.24.0,<1.25.0)", "mypy-boto3-amplifybackend (>=1.24.0,<1.25.0)", "mypy-boto3-amplifyuibuilder (>=1.24.0,<1.25.0)", "mypy-boto3-apigateway (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewaymanagementapi (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewayv2 (>=1.24.0,<1.25.0)", "mypy-boto3-appconfig (>=1.24.0,<1.25.0)", "mypy-boto3-appconfigdata (>=1.24.0,<1.25.0)", "mypy-boto3-appflow (>=1.24.0,<1.25.0)", "mypy-boto3-appintegrations (>=1.24.0,<1.25.0)", "mypy-boto3-application-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-application-insights (>=1.24.0,<1.25.0)", "mypy-boto3-applicationcostprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-appmesh (>=1.24.0,<1.25.0)", "mypy-boto3-apprunner (>=1.24.0,<1.25.0)", "mypy-boto3-appstream (>=1.24.0,<1.25.0)", "mypy-boto3-appsync (>=1.24.0,<1.25.0)", "mypy-boto3-athena (>=1.24.0,<1.25.0)", "mypy-boto3-auditmanager (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling-plans (>=1.24.0,<1.25.0)", "mypy-boto3-backup (>=1.24.0,<1.25.0)", "mypy-boto3-backup-gateway (>=1.24.0,<1.25.0)", "mypy-boto3-backupstorage (>=1.24.0,<1.25.0)", "mypy-boto3-batch (>=1.24.0,<1.25.0)", "mypy-boto3-billingconductor (>=1.24.0,<1.25.0)", "mypy-boto3-braket (>=1.24.0,<1.25.0)", "mypy-boto3-budgets (>=1.24.0,<1.25.0)", "mypy-boto3-ce (>=1.24.0,<1.25.0)", "mypy-boto3-chime (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-identity (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-meetings (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-messaging (>=1.24.0,<1.25.0)", "mypy-boto3-cloud9 (>=1.24.0,<1.25.0)", "mypy-boto3-cloudcontrol (>=1.24.0,<1.25.0)", "mypy-boto3-clouddirectory (>=1.24.0,<1.25.0)", "mypy-boto3-cloudformation (>=1.24.0,<1.25.0)", "mypy-boto3-cloudfront (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsm (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsmv2 (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearch (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearchdomain (>=1.24.0,<1.25.0)", "mypy-boto3-cloudtrail (>=1.24.0,<1.25.0)", "mypy-boto3-cloudwatch (>=1.24.0,<1.25.0)", "mypy-boto3-codeartifact (>=1.24.0,<1.25.0)", "mypy-boto3-codebuild (>=1.24.0,<1.25.0)", "mypy-boto3-codecommit (>=1.24.0,<1.25.0)", "mypy-boto3-codedeploy (>=1.24.0,<1.25.0)", "mypy-boto3-codeguru-reviewer (>=1.24.0,<1.25.0)", "mypy-boto3-codeguruprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-codepipeline (>=1.24.0,<1.25.0)", "mypy-boto3-codestar (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-connections (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-notifications (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-identity (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-idp (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-sync (>=1.24.0,<1.25.0)", "mypy-boto3-comprehend (>=1.24.0,<1.25.0)", "mypy-boto3-comprehendmedical (>=1.24.0,<1.25.0)", "mypy-boto3-compute-optimizer (>=1.24.0,<1.25.0)", "mypy-boto3-config (>=1.24.0,<1.25.0)", "mypy-boto3-connect (>=1.24.0,<1.25.0)", "mypy-boto3-connect-contact-lens (>=1.24.0,<1.25.0)", "mypy-boto3-connectcampaigns (>=1.24.0,<1.25.0)", "mypy-boto3-connectparticipant (>=1.24.0,<1.25.0)", "mypy-boto3-cur (>=1.24.0,<1.25.0)", "mypy-boto3-customer-profiles (>=1.24.0,<1.25.0)", "mypy-boto3-databrew (>=1.24.0,<1.25.0)", "mypy-boto3-dataexchange (>=1.24.0,<1.25.0)", "mypy-boto3-datapipeline (>=1.24.0,<1.25.0)", "mypy-boto3-datasync (>=1.24.0,<1.25.0)", "mypy-boto3-dax (>=1.24.0,<1.25.0)", "mypy-boto3-detective (>=1.24.0,<1.25.0)", "mypy-boto3-devicefarm (>=1.24.0,<1.25.0)", "mypy-boto3-devops-guru (>=1.24.0,<1.25.0)", "mypy-boto3-directconnect (>=1.24.0,<1.25.0)", "mypy-boto3-discovery (>=1.24.0,<1.25.0)", "mypy-boto3-dlm (>=1.24.0,<1.25.0)", "mypy-boto3-dms (>=1.24.0,<1.25.0)", "mypy-boto3-docdb (>=1.24.0,<1.25.0)", "mypy-boto3-drs (>=1.24.0,<1.25.0)", "mypy-boto3-ds (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodb (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodbstreams (>=1.24.0,<1.25.0)", "mypy-boto3-ebs (>=1.24.0,<1.25.0)", "mypy-boto3-ec2 (>=1.24.0,<1.25.0)", "mypy-boto3-ec2-instance-connect (>=1.24.0,<1.25.0)", "mypy-boto3-ecr (>=1.24.0,<1.25.0)", "mypy-boto3-ecr-public (>=1.24.0,<1.25.0)", "mypy-boto3-ecs (>=1.24.0,<1.25.0)", "mypy-boto3-efs (>=1.24.0,<1.25.0)", "mypy-boto3-eks (>=1.24.0,<1.25.0)", "mypy-boto3-elastic-inference (>=1.24.0,<1.25.0)", "mypy-boto3-elasticache (>=1.24.0,<1.25.0)", "mypy-boto3-elasticbeanstalk (>=1.24.0,<1.25.0)", "mypy-boto3-elastictranscoder (>=1.24.0,<1.25.0)", "mypy-boto3-elb (>=1.24.0,<1.25.0)", "mypy-boto3-elbv2 (>=1.24.0,<1.25.0)", "mypy-boto3-emr (>=1.24.0,<1.25.0)", "mypy-boto3-emr-containers (>=1.24.0,<1.25.0)", "mypy-boto3-emr-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-es (>=1.24.0,<1.25.0)", "mypy-boto3-events (>=1.24.0,<1.25.0)", "mypy-boto3-evidently (>=1.24.0,<1.25.0)", "mypy-boto3-finspace (>=1.24.0,<1.25.0)", "mypy-boto3-finspace-data (>=1.24.0,<1.25.0)", "mypy-boto3-firehose (>=1.24.0,<1.25.0)", "mypy-boto3-fis (>=1.24.0,<1.25.0)", "mypy-boto3-fms (>=1.24.0,<1.25.0)", "mypy-boto3-forecast (>=1.24.0,<1.25.0)", "mypy-boto3-forecastquery (>=1.24.0,<1.25.0)", "mypy-boto3-frauddetector (>=1.24.0,<1.25.0)", "mypy-boto3-fsx (>=1.24.0,<1.25.0)", "mypy-boto3-gamelift (>=1.24.0,<1.25.0)", "mypy-boto3-gamesparks (>=1.24.0,<1.25.0)", "mypy-boto3-glacier (>=1.24.0,<1.25.0)", "mypy-boto3-globalaccelerator (>=1.24.0,<1.25.0)", "mypy-boto3-glue (>=1.24.0,<1.25.0)", "mypy-boto3-grafana (>=1.24.0,<1.25.0)", "mypy-boto3-greengrass (>=1.24.0,<1.25.0)", "mypy-boto3-greengrassv2 (>=1.24.0,<1.25.0)", "mypy-boto3-groundstation (>=1.24.0,<1.25.0)", "mypy-boto3-guardduty (>=1.24.0,<1.25.0)", "mypy-boto3-health (>=1.24.0,<1.25.0)", "mypy-boto3-healthlake (>=1.24.0,<1.25.0)", "mypy-boto3-honeycode (>=1.24.0,<1.25.0)", "mypy-boto3-iam (>=1.24.0,<1.25.0)", "mypy-boto3-identitystore (>=1.24.0,<1.25.0)", "mypy-boto3-imagebuilder (>=1.24.0,<1.25.0)", "mypy-boto3-importexport (>=1.24.0,<1.25.0)", "mypy-boto3-inspector (>=1.24.0,<1.25.0)", "mypy-boto3-inspector2 (>=1.24.0,<1.25.0)", "mypy-boto3-iot (>=1.24.0,<1.25.0)", "mypy-boto3-iot-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot-jobs-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-devices (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-projects (>=1.24.0,<1.25.0)", "mypy-boto3-iotanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-iotdeviceadvisor (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents-data (>=1.24.0,<1.25.0)", "mypy-boto3-iotfleethub (>=1.24.0,<1.25.0)", "mypy-boto3-iotsecuretunneling (>=1.24.0,<1.25.0)", "mypy-boto3-iotsitewise (>=1.24.0,<1.25.0)", "mypy-boto3-iotthingsgraph (>=1.24.0,<1.25.0)", "mypy-boto3-iottwinmaker (>=1.24.0,<1.25.0)", "mypy-boto3-iotwireless (>=1.24.0,<1.25.0)", "mypy-boto3-ivs (>=1.24.0,<1.25.0)", "mypy-boto3-ivschat (>=1.24.0,<1.25.0)", "mypy-boto3-kafka (>=1.24.0,<1.25.0)", "mypy-boto3-kafkaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-kendra (>=1.24.0,<1.25.0)", "mypy-boto3-keyspaces (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-archived-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-signaling (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisvideo (>=1.24.0,<1.25.0)", "mypy-boto3-kms (>=1.24.0,<1.25.0)", "mypy-boto3-lakeformation (>=1.24.0,<1.25.0)", "mypy-boto3-lambda (>=1.24.0,<1.25.0)", "mypy-boto3-lex-models (>=1.24.0,<1.25.0)", "mypy-boto3-lex-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-models (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.24.0,<1.25.0)", "mypy-boto3-lightsail (>=1.24.0,<1.25.0)", "mypy-boto3-location (>=1.24.0,<1.25.0)", "mypy-boto3-logs (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutequipment (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutmetrics (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutvision (>=1.24.0,<1.25.0)", "mypy-boto3-m2 (>=1.24.0,<1.25.0)", "mypy-boto3-machinelearning (>=1.24.0,<1.25.0)", "mypy-boto3-macie (>=1.24.0,<1.25.0)", "mypy-boto3-macie2 (>=1.24.0,<1.25.0)", "mypy-boto3-managedblockchain (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-catalog (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-entitlement (>=1.24.0,<1.25.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconvert (>=1.24.0,<1.25.0)", "mypy-boto3-medialive (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage-vod (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore-data (>=1.24.0,<1.25.0)", "mypy-boto3-mediatailor (>=1.24.0,<1.25.0)", "mypy-boto3-memorydb (>=1.24.0,<1.25.0)", "mypy-boto3-meteringmarketplace (>=1.24.0,<1.25.0)", "mypy-boto3-mgh (>=1.24.0,<1.25.0)", "mypy-boto3-mgn (>=1.24.0,<1.25.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhub-config (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhubstrategy (>=1.24.0,<1.25.0)", "mypy-boto3-mobile (>=1.24.0,<1.25.0)", "mypy-boto3-mq (>=1.24.0,<1.25.0)", "mypy-boto3-mturk (>=1.24.0,<1.25.0)", "mypy-boto3-mwaa (>=1.24.0,<1.25.0)", "mypy-boto3-neptune (>=1.24.0,<1.25.0)", "mypy-boto3-network-firewall (>=1.24.0,<1.25.0)", "mypy-boto3-networkmanager (>=1.24.0,<1.25.0)", "mypy-boto3-nimble (>=1.24.0,<1.25.0)", "mypy-boto3-opensearch (>=1.24.0,<1.25.0)", "mypy-boto3-opsworks (>=1.24.0,<1.25.0)", "mypy-boto3-opsworkscm (>=1.24.0,<1.25.0)", "mypy-boto3-organizations (>=1.24.0,<1.25.0)", "mypy-boto3-outposts (>=1.24.0,<1.25.0)", "mypy-boto3-panorama (>=1.24.0,<1.25.0)", "mypy-boto3-personalize (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-events (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-pi (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-email (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.24.0,<1.25.0)", "mypy-boto3-polly (>=1.24.0,<1.25.0)", "mypy-boto3-pricing (>=1.24.0,<1.25.0)", "mypy-boto3-privatenetworks (>=1.24.0,<1.25.0)", "mypy-boto3-proton (>=1.24.0,<1.25.0)", "mypy-boto3-qldb (>=1.24.0,<1.25.0)", "mypy-boto3-qldb-session (>=1.24.0,<1.25.0)", "mypy-boto3-quicksight (>=1.24.0,<1.25.0)", "mypy-boto3-ram (>=1.24.0,<1.25.0)", "mypy-boto3-rbin (>=1.24.0,<1.25.0)", "mypy-boto3-rds (>=1.24.0,<1.25.0)", "mypy-boto3-rds-data (>=1.24.0,<1.25.0)", "mypy-boto3-redshift (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-data (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-rekognition (>=1.24.0,<1.25.0)", "mypy-boto3-resiliencehub (>=1.24.0,<1.25.0)", "mypy-boto3-resource-groups (>=1.24.0,<1.25.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.24.0,<1.25.0)", "mypy-boto3-robomaker (>=1.24.0,<1.25.0)", "mypy-boto3-rolesanywhere (>=1.24.0,<1.25.0)", "mypy-boto3-route53 (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-cluster (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-control-config (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-readiness (>=1.24.0,<1.25.0)", "mypy-boto3-route53domains (>=1.24.0,<1.25.0)", "mypy-boto3-route53resolver (>=1.24.0,<1.25.0)", "mypy-boto3-rum (>=1.24.0,<1.25.0)", "mypy-boto3-s3 (>=1.24.0,<1.25.0)", "mypy-boto3-s3control (>=1.24.0,<1.25.0)", "mypy-boto3-s3outposts (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-edge (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-savingsplans (>=1.24.0,<1.25.0)", "mypy-boto3-schemas (>=1.24.0,<1.25.0)", "mypy-boto3-sdb (>=1.24.0,<1.25.0)", "mypy-boto3-secretsmanager (>=1.24.0,<1.25.0)", "mypy-boto3-securityhub (>=1.24.0,<1.25.0)", "mypy-boto3-serverlessrepo (>=1.24.0,<1.25.0)", "mypy-boto3-service-quotas (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog-appregistry (>=1.24.0,<1.25.0)", "mypy-boto3-servicediscovery (>=1.24.0,<1.25.0)", "mypy-boto3-ses (>=1.24.0,<1.25.0)", "mypy-boto3-sesv2 (>=1.24.0,<1.25.0)", "mypy-boto3-shield (>=1.24.0,<1.25.0)", "mypy-boto3-signer (>=1.24.0,<1.25.0)", "mypy-boto3-sms (>=1.24.0,<1.25.0)", "mypy-boto3-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-snow-device-management (>=1.24.0,<1.25.0)", "mypy-boto3-snowball (>=1.24.0,<1.25.0)", "mypy-boto3-sns (>=1.24.0,<1.25.0)", "mypy-boto3-sqs (>=1.24.0,<1.25.0)", "mypy-boto3-ssm (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-contacts (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-incidents (>=1.24.0,<1.25.0)", "mypy-boto3-sso (>=1.24.0,<1.25.0)", "mypy-boto3-sso-admin (>=1.24.0,<1.25.0)", "mypy-boto3-sso-oidc (>=1.24.0,<1.25.0)", "mypy-boto3-stepfunctions (>=1.24.0,<1.25.0)", "mypy-boto3-storagegateway (>=1.24.0,<1.25.0)", "mypy-boto3-sts (>=1.24.0,<1.25.0)", "mypy-boto3-support (>=1.24.0,<1.25.0)", "mypy-boto3-swf (>=1.24.0,<1.25.0)", "mypy-boto3-synthetics (>=1.24.0,<1.25.0)", "mypy-boto3-textract (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-query (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-write (>=1.24.0,<1.25.0)", "mypy-boto3-transcribe (>=1.24.0,<1.25.0)", "mypy-boto3-transfer (>=1.24.0,<1.25.0)", "mypy-boto3-translate (>=1.24.0,<1.25.0)", "mypy-boto3-voice-id (>=1.24.0,<1.25.0)", "mypy-boto3-waf (>=1.24.0,<1.25.0)", "mypy-boto3-waf-regional (>=1.24.0,<1.25.0)", "mypy-boto3-wafv2 (>=1.24.0,<1.25.0)", "mypy-boto3-wellarchitected (>=1.24.0,<1.25.0)", "mypy-boto3-wisdom (>=1.24.0,<1.25.0)", "mypy-boto3-workdocs (>=1.24.0,<1.25.0)", "mypy-boto3-worklink (>=1.24.0,<1.25.0)", "mypy-boto3-workmail (>=1.24.0,<1.25.0)", "mypy-boto3-workmailmessageflow (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces-web (>=1.24.0,<1.25.0)", "mypy-boto3-xray (>=1.24.0,<1.25.0)"] +all = ["mypy-boto3-accessanalyzer (>=1.24.0,<1.25.0)", "mypy-boto3-account (>=1.24.0,<1.25.0)", "mypy-boto3-acm (>=1.24.0,<1.25.0)", "mypy-boto3-acm-pca (>=1.24.0,<1.25.0)", "mypy-boto3-alexaforbusiness (>=1.24.0,<1.25.0)", "mypy-boto3-amp (>=1.24.0,<1.25.0)", "mypy-boto3-amplify (>=1.24.0,<1.25.0)", "mypy-boto3-amplifybackend (>=1.24.0,<1.25.0)", "mypy-boto3-amplifyuibuilder (>=1.24.0,<1.25.0)", "mypy-boto3-apigateway (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewaymanagementapi (>=1.24.0,<1.25.0)", "mypy-boto3-apigatewayv2 (>=1.24.0,<1.25.0)", "mypy-boto3-appconfig (>=1.24.0,<1.25.0)", "mypy-boto3-appconfigdata (>=1.24.0,<1.25.0)", "mypy-boto3-appflow (>=1.24.0,<1.25.0)", "mypy-boto3-appintegrations (>=1.24.0,<1.25.0)", "mypy-boto3-application-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-application-insights (>=1.24.0,<1.25.0)", "mypy-boto3-applicationcostprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-appmesh (>=1.24.0,<1.25.0)", "mypy-boto3-apprunner (>=1.24.0,<1.25.0)", "mypy-boto3-appstream (>=1.24.0,<1.25.0)", "mypy-boto3-appsync (>=1.24.0,<1.25.0)", "mypy-boto3-athena (>=1.24.0,<1.25.0)", "mypy-boto3-auditmanager (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling (>=1.24.0,<1.25.0)", "mypy-boto3-autoscaling-plans (>=1.24.0,<1.25.0)", "mypy-boto3-backup (>=1.24.0,<1.25.0)", "mypy-boto3-backup-gateway (>=1.24.0,<1.25.0)", "mypy-boto3-backupstorage (>=1.24.0,<1.25.0)", "mypy-boto3-batch (>=1.24.0,<1.25.0)", "mypy-boto3-billingconductor (>=1.24.0,<1.25.0)", "mypy-boto3-braket (>=1.24.0,<1.25.0)", "mypy-boto3-budgets (>=1.24.0,<1.25.0)", "mypy-boto3-ce (>=1.24.0,<1.25.0)", "mypy-boto3-chime (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-identity (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-media-pipelines (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-meetings (>=1.24.0,<1.25.0)", "mypy-boto3-chime-sdk-messaging (>=1.24.0,<1.25.0)", "mypy-boto3-cloud9 (>=1.24.0,<1.25.0)", "mypy-boto3-cloudcontrol (>=1.24.0,<1.25.0)", "mypy-boto3-clouddirectory (>=1.24.0,<1.25.0)", "mypy-boto3-cloudformation (>=1.24.0,<1.25.0)", "mypy-boto3-cloudfront (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsm (>=1.24.0,<1.25.0)", "mypy-boto3-cloudhsmv2 (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearch (>=1.24.0,<1.25.0)", "mypy-boto3-cloudsearchdomain (>=1.24.0,<1.25.0)", "mypy-boto3-cloudtrail (>=1.24.0,<1.25.0)", "mypy-boto3-cloudwatch (>=1.24.0,<1.25.0)", "mypy-boto3-codeartifact (>=1.24.0,<1.25.0)", "mypy-boto3-codebuild (>=1.24.0,<1.25.0)", "mypy-boto3-codecommit (>=1.24.0,<1.25.0)", "mypy-boto3-codedeploy (>=1.24.0,<1.25.0)", "mypy-boto3-codeguru-reviewer (>=1.24.0,<1.25.0)", "mypy-boto3-codeguruprofiler (>=1.24.0,<1.25.0)", "mypy-boto3-codepipeline (>=1.24.0,<1.25.0)", "mypy-boto3-codestar (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-connections (>=1.24.0,<1.25.0)", "mypy-boto3-codestar-notifications (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-identity (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-idp (>=1.24.0,<1.25.0)", "mypy-boto3-cognito-sync (>=1.24.0,<1.25.0)", "mypy-boto3-comprehend (>=1.24.0,<1.25.0)", "mypy-boto3-comprehendmedical (>=1.24.0,<1.25.0)", "mypy-boto3-compute-optimizer (>=1.24.0,<1.25.0)", "mypy-boto3-config (>=1.24.0,<1.25.0)", "mypy-boto3-connect (>=1.24.0,<1.25.0)", "mypy-boto3-connect-contact-lens (>=1.24.0,<1.25.0)", "mypy-boto3-connectcampaigns (>=1.24.0,<1.25.0)", "mypy-boto3-connectparticipant (>=1.24.0,<1.25.0)", "mypy-boto3-cur (>=1.24.0,<1.25.0)", "mypy-boto3-customer-profiles (>=1.24.0,<1.25.0)", "mypy-boto3-databrew (>=1.24.0,<1.25.0)", "mypy-boto3-dataexchange (>=1.24.0,<1.25.0)", "mypy-boto3-datapipeline (>=1.24.0,<1.25.0)", "mypy-boto3-datasync (>=1.24.0,<1.25.0)", "mypy-boto3-dax (>=1.24.0,<1.25.0)", "mypy-boto3-detective (>=1.24.0,<1.25.0)", "mypy-boto3-devicefarm (>=1.24.0,<1.25.0)", "mypy-boto3-devops-guru (>=1.24.0,<1.25.0)", "mypy-boto3-directconnect (>=1.24.0,<1.25.0)", "mypy-boto3-discovery (>=1.24.0,<1.25.0)", "mypy-boto3-dlm (>=1.24.0,<1.25.0)", "mypy-boto3-dms (>=1.24.0,<1.25.0)", "mypy-boto3-docdb (>=1.24.0,<1.25.0)", "mypy-boto3-drs (>=1.24.0,<1.25.0)", "mypy-boto3-ds (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodb (>=1.24.0,<1.25.0)", "mypy-boto3-dynamodbstreams (>=1.24.0,<1.25.0)", "mypy-boto3-ebs (>=1.24.0,<1.25.0)", "mypy-boto3-ec2 (>=1.24.0,<1.25.0)", "mypy-boto3-ec2-instance-connect (>=1.24.0,<1.25.0)", "mypy-boto3-ecr (>=1.24.0,<1.25.0)", "mypy-boto3-ecr-public (>=1.24.0,<1.25.0)", "mypy-boto3-ecs (>=1.24.0,<1.25.0)", "mypy-boto3-efs (>=1.24.0,<1.25.0)", "mypy-boto3-eks (>=1.24.0,<1.25.0)", "mypy-boto3-elastic-inference (>=1.24.0,<1.25.0)", "mypy-boto3-elasticache (>=1.24.0,<1.25.0)", "mypy-boto3-elasticbeanstalk (>=1.24.0,<1.25.0)", "mypy-boto3-elastictranscoder (>=1.24.0,<1.25.0)", "mypy-boto3-elb (>=1.24.0,<1.25.0)", "mypy-boto3-elbv2 (>=1.24.0,<1.25.0)", "mypy-boto3-emr (>=1.24.0,<1.25.0)", "mypy-boto3-emr-containers (>=1.24.0,<1.25.0)", "mypy-boto3-emr-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-es (>=1.24.0,<1.25.0)", "mypy-boto3-events (>=1.24.0,<1.25.0)", "mypy-boto3-evidently (>=1.24.0,<1.25.0)", "mypy-boto3-finspace (>=1.24.0,<1.25.0)", "mypy-boto3-finspace-data (>=1.24.0,<1.25.0)", "mypy-boto3-firehose (>=1.24.0,<1.25.0)", "mypy-boto3-fis (>=1.24.0,<1.25.0)", "mypy-boto3-fms (>=1.24.0,<1.25.0)", "mypy-boto3-forecast (>=1.24.0,<1.25.0)", "mypy-boto3-forecastquery (>=1.24.0,<1.25.0)", "mypy-boto3-frauddetector (>=1.24.0,<1.25.0)", "mypy-boto3-fsx (>=1.24.0,<1.25.0)", "mypy-boto3-gamelift (>=1.24.0,<1.25.0)", "mypy-boto3-gamesparks (>=1.24.0,<1.25.0)", "mypy-boto3-glacier (>=1.24.0,<1.25.0)", "mypy-boto3-globalaccelerator (>=1.24.0,<1.25.0)", "mypy-boto3-glue (>=1.24.0,<1.25.0)", "mypy-boto3-grafana (>=1.24.0,<1.25.0)", "mypy-boto3-greengrass (>=1.24.0,<1.25.0)", "mypy-boto3-greengrassv2 (>=1.24.0,<1.25.0)", "mypy-boto3-groundstation (>=1.24.0,<1.25.0)", "mypy-boto3-guardduty (>=1.24.0,<1.25.0)", "mypy-boto3-health (>=1.24.0,<1.25.0)", "mypy-boto3-healthlake (>=1.24.0,<1.25.0)", "mypy-boto3-honeycode (>=1.24.0,<1.25.0)", "mypy-boto3-iam (>=1.24.0,<1.25.0)", "mypy-boto3-identitystore (>=1.24.0,<1.25.0)", "mypy-boto3-imagebuilder (>=1.24.0,<1.25.0)", "mypy-boto3-importexport (>=1.24.0,<1.25.0)", "mypy-boto3-inspector (>=1.24.0,<1.25.0)", "mypy-boto3-inspector2 (>=1.24.0,<1.25.0)", "mypy-boto3-iot (>=1.24.0,<1.25.0)", "mypy-boto3-iot-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot-jobs-data (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-devices (>=1.24.0,<1.25.0)", "mypy-boto3-iot1click-projects (>=1.24.0,<1.25.0)", "mypy-boto3-iotanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-iotdeviceadvisor (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents (>=1.24.0,<1.25.0)", "mypy-boto3-iotevents-data (>=1.24.0,<1.25.0)", "mypy-boto3-iotfleethub (>=1.24.0,<1.25.0)", "mypy-boto3-iotsecuretunneling (>=1.24.0,<1.25.0)", "mypy-boto3-iotsitewise (>=1.24.0,<1.25.0)", "mypy-boto3-iotthingsgraph (>=1.24.0,<1.25.0)", "mypy-boto3-iottwinmaker (>=1.24.0,<1.25.0)", "mypy-boto3-iotwireless (>=1.24.0,<1.25.0)", "mypy-boto3-ivs (>=1.24.0,<1.25.0)", "mypy-boto3-ivschat (>=1.24.0,<1.25.0)", "mypy-boto3-kafka (>=1.24.0,<1.25.0)", "mypy-boto3-kafkaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-kendra (>=1.24.0,<1.25.0)", "mypy-boto3-keyspaces (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-archived-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-media (>=1.24.0,<1.25.0)", "mypy-boto3-kinesis-video-signaling (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisanalyticsv2 (>=1.24.0,<1.25.0)", "mypy-boto3-kinesisvideo (>=1.24.0,<1.25.0)", "mypy-boto3-kms (>=1.24.0,<1.25.0)", "mypy-boto3-lakeformation (>=1.24.0,<1.25.0)", "mypy-boto3-lambda (>=1.24.0,<1.25.0)", "mypy-boto3-lex-models (>=1.24.0,<1.25.0)", "mypy-boto3-lex-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-models (>=1.24.0,<1.25.0)", "mypy-boto3-lexv2-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager (>=1.24.0,<1.25.0)", "mypy-boto3-license-manager-user-subscriptions (>=1.24.0,<1.25.0)", "mypy-boto3-lightsail (>=1.24.0,<1.25.0)", "mypy-boto3-location (>=1.24.0,<1.25.0)", "mypy-boto3-logs (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutequipment (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutmetrics (>=1.24.0,<1.25.0)", "mypy-boto3-lookoutvision (>=1.24.0,<1.25.0)", "mypy-boto3-m2 (>=1.24.0,<1.25.0)", "mypy-boto3-machinelearning (>=1.24.0,<1.25.0)", "mypy-boto3-macie (>=1.24.0,<1.25.0)", "mypy-boto3-macie2 (>=1.24.0,<1.25.0)", "mypy-boto3-managedblockchain (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-catalog (>=1.24.0,<1.25.0)", "mypy-boto3-marketplace-entitlement (>=1.24.0,<1.25.0)", "mypy-boto3-marketplacecommerceanalytics (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconnect (>=1.24.0,<1.25.0)", "mypy-boto3-mediaconvert (>=1.24.0,<1.25.0)", "mypy-boto3-medialive (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage (>=1.24.0,<1.25.0)", "mypy-boto3-mediapackage-vod (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore (>=1.24.0,<1.25.0)", "mypy-boto3-mediastore-data (>=1.24.0,<1.25.0)", "mypy-boto3-mediatailor (>=1.24.0,<1.25.0)", "mypy-boto3-memorydb (>=1.24.0,<1.25.0)", "mypy-boto3-meteringmarketplace (>=1.24.0,<1.25.0)", "mypy-boto3-mgh (>=1.24.0,<1.25.0)", "mypy-boto3-mgn (>=1.24.0,<1.25.0)", "mypy-boto3-migration-hub-refactor-spaces (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhub-config (>=1.24.0,<1.25.0)", "mypy-boto3-migrationhubstrategy (>=1.24.0,<1.25.0)", "mypy-boto3-mobile (>=1.24.0,<1.25.0)", "mypy-boto3-mq (>=1.24.0,<1.25.0)", "mypy-boto3-mturk (>=1.24.0,<1.25.0)", "mypy-boto3-mwaa (>=1.24.0,<1.25.0)", "mypy-boto3-neptune (>=1.24.0,<1.25.0)", "mypy-boto3-network-firewall (>=1.24.0,<1.25.0)", "mypy-boto3-networkmanager (>=1.24.0,<1.25.0)", "mypy-boto3-nimble (>=1.24.0,<1.25.0)", "mypy-boto3-opensearch (>=1.24.0,<1.25.0)", "mypy-boto3-opsworks (>=1.24.0,<1.25.0)", "mypy-boto3-opsworkscm (>=1.24.0,<1.25.0)", "mypy-boto3-organizations (>=1.24.0,<1.25.0)", "mypy-boto3-outposts (>=1.24.0,<1.25.0)", "mypy-boto3-panorama (>=1.24.0,<1.25.0)", "mypy-boto3-personalize (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-events (>=1.24.0,<1.25.0)", "mypy-boto3-personalize-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-pi (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-email (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-pinpoint-sms-voice-v2 (>=1.24.0,<1.25.0)", "mypy-boto3-polly (>=1.24.0,<1.25.0)", "mypy-boto3-pricing (>=1.24.0,<1.25.0)", "mypy-boto3-privatenetworks (>=1.24.0,<1.25.0)", "mypy-boto3-proton (>=1.24.0,<1.25.0)", "mypy-boto3-qldb (>=1.24.0,<1.25.0)", "mypy-boto3-qldb-session (>=1.24.0,<1.25.0)", "mypy-boto3-quicksight (>=1.24.0,<1.25.0)", "mypy-boto3-ram (>=1.24.0,<1.25.0)", "mypy-boto3-rbin (>=1.24.0,<1.25.0)", "mypy-boto3-rds (>=1.24.0,<1.25.0)", "mypy-boto3-rds-data (>=1.24.0,<1.25.0)", "mypy-boto3-redshift (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-data (>=1.24.0,<1.25.0)", "mypy-boto3-redshift-serverless (>=1.24.0,<1.25.0)", "mypy-boto3-rekognition (>=1.24.0,<1.25.0)", "mypy-boto3-resiliencehub (>=1.24.0,<1.25.0)", "mypy-boto3-resource-groups (>=1.24.0,<1.25.0)", "mypy-boto3-resourcegroupstaggingapi (>=1.24.0,<1.25.0)", "mypy-boto3-robomaker (>=1.24.0,<1.25.0)", "mypy-boto3-rolesanywhere (>=1.24.0,<1.25.0)", "mypy-boto3-route53 (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-cluster (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-control-config (>=1.24.0,<1.25.0)", "mypy-boto3-route53-recovery-readiness (>=1.24.0,<1.25.0)", "mypy-boto3-route53domains (>=1.24.0,<1.25.0)", "mypy-boto3-route53resolver (>=1.24.0,<1.25.0)", "mypy-boto3-rum (>=1.24.0,<1.25.0)", "mypy-boto3-s3 (>=1.24.0,<1.25.0)", "mypy-boto3-s3control (>=1.24.0,<1.25.0)", "mypy-boto3-s3outposts (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-a2i-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-edge (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-featurestore-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-sagemaker-runtime (>=1.24.0,<1.25.0)", "mypy-boto3-savingsplans (>=1.24.0,<1.25.0)", "mypy-boto3-schemas (>=1.24.0,<1.25.0)", "mypy-boto3-sdb (>=1.24.0,<1.25.0)", "mypy-boto3-secretsmanager (>=1.24.0,<1.25.0)", "mypy-boto3-securityhub (>=1.24.0,<1.25.0)", "mypy-boto3-serverlessrepo (>=1.24.0,<1.25.0)", "mypy-boto3-service-quotas (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog (>=1.24.0,<1.25.0)", "mypy-boto3-servicecatalog-appregistry (>=1.24.0,<1.25.0)", "mypy-boto3-servicediscovery (>=1.24.0,<1.25.0)", "mypy-boto3-ses (>=1.24.0,<1.25.0)", "mypy-boto3-sesv2 (>=1.24.0,<1.25.0)", "mypy-boto3-shield (>=1.24.0,<1.25.0)", "mypy-boto3-signer (>=1.24.0,<1.25.0)", "mypy-boto3-sms (>=1.24.0,<1.25.0)", "mypy-boto3-sms-voice (>=1.24.0,<1.25.0)", "mypy-boto3-snow-device-management (>=1.24.0,<1.25.0)", "mypy-boto3-snowball (>=1.24.0,<1.25.0)", "mypy-boto3-sns (>=1.24.0,<1.25.0)", "mypy-boto3-sqs (>=1.24.0,<1.25.0)", "mypy-boto3-ssm (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-contacts (>=1.24.0,<1.25.0)", "mypy-boto3-ssm-incidents (>=1.24.0,<1.25.0)", "mypy-boto3-sso (>=1.24.0,<1.25.0)", "mypy-boto3-sso-admin (>=1.24.0,<1.25.0)", "mypy-boto3-sso-oidc (>=1.24.0,<1.25.0)", "mypy-boto3-stepfunctions (>=1.24.0,<1.25.0)", "mypy-boto3-storagegateway (>=1.24.0,<1.25.0)", "mypy-boto3-sts (>=1.24.0,<1.25.0)", "mypy-boto3-support (>=1.24.0,<1.25.0)", "mypy-boto3-support-app (>=1.24.0,<1.25.0)", "mypy-boto3-swf (>=1.24.0,<1.25.0)", "mypy-boto3-synthetics (>=1.24.0,<1.25.0)", "mypy-boto3-textract (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-query (>=1.24.0,<1.25.0)", "mypy-boto3-timestream-write (>=1.24.0,<1.25.0)", "mypy-boto3-transcribe (>=1.24.0,<1.25.0)", "mypy-boto3-transfer (>=1.24.0,<1.25.0)", "mypy-boto3-translate (>=1.24.0,<1.25.0)", "mypy-boto3-voice-id (>=1.24.0,<1.25.0)", "mypy-boto3-waf (>=1.24.0,<1.25.0)", "mypy-boto3-waf-regional (>=1.24.0,<1.25.0)", "mypy-boto3-wafv2 (>=1.24.0,<1.25.0)", "mypy-boto3-wellarchitected (>=1.24.0,<1.25.0)", "mypy-boto3-wisdom (>=1.24.0,<1.25.0)", "mypy-boto3-workdocs (>=1.24.0,<1.25.0)", "mypy-boto3-worklink (>=1.24.0,<1.25.0)", "mypy-boto3-workmail (>=1.24.0,<1.25.0)", "mypy-boto3-workmailmessageflow (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces (>=1.24.0,<1.25.0)", "mypy-boto3-workspaces-web (>=1.24.0,<1.25.0)", "mypy-boto3-xray (>=1.24.0,<1.25.0)"] amp = ["mypy-boto3-amp (>=1.24.0,<1.25.0)"] amplify = ["mypy-boto3-amplify (>=1.24.0,<1.25.0)"] amplifybackend = ["mypy-boto3-amplifybackend (>=1.24.0,<1.25.0)"] @@ -464,6 +464,7 @@ stepfunctions = ["mypy-boto3-stepfunctions (>=1.24.0,<1.25.0)"] storagegateway = ["mypy-boto3-storagegateway (>=1.24.0,<1.25.0)"] sts = ["mypy-boto3-sts (>=1.24.0,<1.25.0)"] support = ["mypy-boto3-support (>=1.24.0,<1.25.0)"] +support-app = ["mypy-boto3-support-app (>=1.24.0,<1.25.0)"] swf = ["mypy-boto3-swf (>=1.24.0,<1.25.0)"] synthetics = ["mypy-boto3-synthetics (>=1.24.0,<1.25.0)"] textract = ["mypy-boto3-textract (>=1.24.0,<1.25.0)"] @@ -601,11 +602,11 @@ cffi = ">=1.12" [package.extras] docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"] -docstest = ["pyenchant (>=1.6.11)", "twine (>=1.12.0)", "sphinxcontrib-spelling (>=4.0.1)"] +docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"] sdist = ["setuptools_rust (>=0.11.4)"] ssh = ["bcrypt (>=3.1.5)"] -test = ["pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"] +test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-subtests", "pytest-xdist", "pytz"] [[package]] name = "docker" @@ -622,8 +623,8 @@ six = ">=1.4.0" websocket-client = ">=0.32.0" [package.extras] -tls = ["idna (>=2.0.0)", "cryptography (>=1.3.4)", "pyOpenSSL (>=17.5.0)"] ssh = ["paramiko (>=2.4.2)"] +tls = ["cryptography (>=1.3.4)", "idna (>=2.0.0)", "pyOpenSSL (>=17.5.0)"] [[package]] name = "ecdsa" @@ -723,9 +724,9 @@ python-versions = ">=3.7" zipp = ">=0.5" [package.extras] -docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"] +docs = ["jaraco.packaging (>=9)", "rst.linker (>=1.9)", "sphinx"] perf = ["ipython"] -testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] [[package]] name = "iniconfig" @@ -744,10 +745,10 @@ optional = false python-versions = ">=3.6.1,<4.0" [package.extras] -pipfile_deprecated_finder = ["pipreqs", "requirementslib"] -requirements_deprecated_finder = ["pipreqs", "pip-api"] colors = ["colorama (>=0.4.3,<0.5.0)"] +pipfile_deprecated_finder = ["pipreqs", "requirementslib"] plugins = ["setuptools"] +requirements_deprecated_finder = ["pip-api", "pipreqs"] [[package]] name = "itsdangerous" @@ -820,9 +821,9 @@ optional = false python-versions = ">=2.7" [package.extras] -testing = ["pytest-flake8 (>=1.1.1)", "jsonlib", "enum34", "pytest-flake8 (<1.1.0)", "sqlalchemy", "scikit-learn", "pymongo", "pandas", "numpy", "feedparser", "ecdsa", "pytest-cov", "pytest-black-multipy", "pytest-checkdocs (>=1.2.3)", "pytest (>=3.5,!=3.7.3)"] -"testing.libs" = ["yajl", "ujson", "simplejson"] -docs = ["rst.linker (>=1.9)", "jaraco.packaging (>=3.2)", "sphinx"] +docs = ["jaraco.packaging (>=3.2)", "rst.linker (>=1.9)", "sphinx"] +testing = ["ecdsa", "enum34", "feedparser", "jsonlib", "numpy", "pandas", "pymongo", "pytest (>=3.5,!=3.7.3)", "pytest-black-multipy", "pytest-checkdocs (>=1.2.3)", "pytest-cov", "pytest-flake8 (<1.1.0)", "pytest-flake8 (>=1.1.1)", "scikit-learn", "sqlalchemy"] +"testing.libs" = ["simplejson", "ujson", "yajl"] [[package]] name = "jsonpointer" @@ -847,7 +848,7 @@ six = ">=1.11.0" [package.extras] format = ["idna", "jsonpointer (>1.13)", "rfc3987", "strict-rfc3339", "webcolors"] -format_nongpl = ["idna", "jsonpointer (>1.13)", "webcolors", "rfc3986-validator (>0.1.0)", "rfc3339-validator"] +format_nongpl = ["idna", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "webcolors"] [[package]] name = "junit-xml" @@ -912,28 +913,28 @@ werkzeug = ">=0.5,<2.2.0" xmltodict = "*" [package.extras] -xray = ["setuptools", "aws-xray-sdk (>=0.93,!=0.96)"] -ssm = ["dataclasses", "PyYAML (>=5.1)"] -server = ["flask-cors", "flask (<2.2.0)", "setuptools", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "sshpubkeys (>=3.1.0)", "cfn-lint (>=0.4.0)", "idna (>=2.5,<4)", "aws-xray-sdk (>=0.93,!=0.96)", "jsondiff (>=1.1.2)", "graphql-core", "docker (>=2.5.1)", "ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "PyYAML (>=5.1)"] -s3 = ["PyYAML (>=5.1)"] -route53resolver = ["sshpubkeys (>=3.1.0)"] -iotdata = ["jsondiff (>=1.1.2)"] -glue = ["pyparsing (>=3.0.7)"] -efs = ["sshpubkeys (>=3.1.0)"] -ec2 = ["sshpubkeys (>=3.1.0)"] -ebs = ["sshpubkeys (>=3.1.0)"] -dynamodbstreams = ["docker (>=2.5.1)"] -dynamodb2 = ["docker (>=2.5.1)"] -dynamodb = ["docker (>=2.5.1)"] -ds = ["sshpubkeys (>=3.1.0)"] -cognitoidp = ["ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"] -cloudformation = ["setuptools", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "sshpubkeys (>=3.1.0)", "cfn-lint (>=0.4.0)", "idna (>=2.5,<4)", "aws-xray-sdk (>=0.93,!=0.96)", "jsondiff (>=1.1.2)", "graphql-core", "docker (>=2.5.1)", "ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "PyYAML (>=5.1)"] -batch = ["docker (>=2.5.1)"] -awslambda = ["docker (>=2.5.1)"] -appsync = ["graphql-core"] +all = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.4.0)", "docker (>=2.5.1)", "ecdsa (!=0.15)", "graphql-core", "idna (>=2.5,<4)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] +apigateway = ["PyYAML (>=5.1)", "ecdsa (!=0.15)", "openapi-spec-validator (>=0.2.8)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"] apigatewayv2 = ["PyYAML (>=5.1)"] -apigateway = ["openapi-spec-validator (>=0.2.8)", "ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "PyYAML (>=5.1)"] -all = ["setuptools", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "sshpubkeys (>=3.1.0)", "cfn-lint (>=0.4.0)", "idna (>=2.5,<4)", "aws-xray-sdk (>=0.93,!=0.96)", "jsondiff (>=1.1.2)", "graphql-core", "docker (>=2.5.1)", "ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "PyYAML (>=5.1)"] +appsync = ["graphql-core"] +awslambda = ["docker (>=2.5.1)"] +batch = ["docker (>=2.5.1)"] +cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.4.0)", "docker (>=2.5.1)", "ecdsa (!=0.15)", "graphql-core", "idna (>=2.5,<4)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] +cognitoidp = ["ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"] +ds = ["sshpubkeys (>=3.1.0)"] +dynamodb = ["docker (>=2.5.1)"] +dynamodb2 = ["docker (>=2.5.1)"] +dynamodbstreams = ["docker (>=2.5.1)"] +ebs = ["sshpubkeys (>=3.1.0)"] +ec2 = ["sshpubkeys (>=3.1.0)"] +efs = ["sshpubkeys (>=3.1.0)"] +glue = ["pyparsing (>=3.0.7)"] +iotdata = ["jsondiff (>=1.1.2)"] +route53resolver = ["sshpubkeys (>=3.1.0)"] +s3 = ["PyYAML (>=5.1)"] +server = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.4.0)", "docker (>=2.5.1)", "ecdsa (!=0.15)", "flask (<2.2.0)", "flask-cors", "graphql-core", "idna (>=2.5,<4)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] +ssm = ["PyYAML (>=5.1)", "dataclasses"] +xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"] [[package]] name = "mypy" @@ -981,11 +982,11 @@ optional = false python-versions = ">=3.8" [package.extras] -default = ["numpy (>=1.19)", "scipy (>=1.8)", "matplotlib (>=3.4)", "pandas (>=1.3)"] -developer = ["pre-commit (>=2.19)", "mypy (>=0.960)"] -doc = ["sphinx (>=5)", "pydata-sphinx-theme (>=0.9)", "sphinx-gallery (>=0.10)", "numpydoc (>=1.4)", "pillow (>=9.1)", "nb2plots (>=0.6)", "texext (>=0.6.6)"] -extra = ["lxml (>=4.6)", "pygraphviz (>=1.9)", "pydot (>=1.4.2)", "sympy (>=1.10)"] -test = ["pytest (>=7.1)", "pytest-cov (>=3.0)", "codecov (>=2.1)"] +default = ["matplotlib (>=3.4)", "numpy (>=1.19)", "pandas (>=1.3)", "scipy (>=1.8)"] +developer = ["mypy (>=0.960)", "pre-commit (>=2.19)"] +doc = ["nb2plots (>=0.6)", "numpydoc (>=1.4)", "pillow (>=9.1)", "pydata-sphinx-theme (>=0.9)", "sphinx (>=5)", "sphinx-gallery (>=0.10)", "texext (>=0.6.6)"] +extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.9)", "sympy (>=1.10)"] +test = ["codecov (>=2.1)", "pytest (>=7.1)", "pytest-cov (>=3.0)"] [[package]] name = "openapi-schema-validator" @@ -1000,8 +1001,8 @@ jsonschema = ">=3.0.0,<5.0.0" [package.extras] isodate = ["isodate"] -strict-rfc3339 = ["strict-rfc3339"] rfc3339-validator = ["rfc3339-validator"] +strict-rfc3339 = ["strict-rfc3339"] [[package]] name = "openapi-spec-validator" @@ -1055,8 +1056,8 @@ optional = false python-versions = ">=3.7" [package.extras] -test = ["pytest (>=6)", "pytest-mock (>=3.6)", "pytest-cov (>=2.7)", "appdirs (==1.4.4)"] -docs = ["sphinx (>=4)", "sphinx-autodoc-typehints (>=1.12)", "proselint (>=0.10.2)", "furo (>=2021.7.5b38)"] +docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx (>=4)", "sphinx-autodoc-typehints (>=1.12)"] +test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)"] [[package]] name = "pluggy" @@ -1067,8 +1068,8 @@ optional = false python-versions = ">=3.6" [package.extras] -testing = ["pytest-benchmark", "pytest"] -dev = ["tox", "pre-commit"] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] [[package]] name = "prometheus-client" @@ -1142,9 +1143,9 @@ cryptography = {version = ">=3.3.1", optional = true, markers = "extra == \"cryp [package.extras] crypto = ["cryptography (>=3.3.1)"] -dev = ["sphinx", "sphinx-rtd-theme", "zope.interface", "cryptography (>=3.3.1)", "pytest (>=6.0.0,<7.0.0)", "coverage[toml] (==5.0.4)", "mypy", "pre-commit"] +dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.3.1)", "mypy", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx", "sphinx-rtd-theme", "zope.interface"] docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"] -tests = ["pytest (>=6.0.0,<7.0.0)", "coverage[toml] (==5.0.4)"] +tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] [[package]] name = "pyparsing" @@ -1155,7 +1156,7 @@ optional = false python-versions = ">=3.6.8" [package.extras] -diagrams = ["railroad-diagrams", "jinja2"] +diagrams = ["jinja2", "railroad-diagrams"] [[package]] name = "pypiwin32" @@ -1209,7 +1210,7 @@ python-versions = ">=3.7" pytest = ">=6.1.0" [package.extras] -testing = ["pytest-trio (>=0.7.0)", "mypy (>=0.931)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "coverage (>=6.2)"] +testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"] [[package]] name = "pytest-forked" @@ -1304,8 +1305,8 @@ rsa = "*" [package.extras] cryptography = ["cryptography (>=3.4.0)"] -pycrypto = ["pycrypto (>=2.6.0,<2.7.0)", "pyasn1"] -pycryptodome = ["pycryptodome (>=3.3.1,<4.0.0)", "pyasn1"] +pycrypto = ["pyasn1", "pycrypto (>=2.6.0,<2.7.0)"] +pycryptodome = ["pyasn1", "pycryptodome (>=3.3.1,<4.0.0)"] [[package]] name = "pytz" @@ -1362,7 +1363,7 @@ requests = ">=2.0,<3.0" urllib3 = ">=1.25.10" [package.extras] -tests = ["pytest (>=7.0.0)", "coverage (>=6.0.0)", "pytest-cov", "pytest-asyncio", "pytest-localserver", "flake8", "types-mock", "types-requests", "mypy"] +tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-localserver", "types-mock", "types-requests"] [[package]] name = "rsa" @@ -1492,8 +1493,8 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4" [package.extras] -brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] -secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] @@ -1545,13 +1546,13 @@ optional = false python-versions = ">=3.7" [package.extras] -docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"] -testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"] +docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"] +testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "badfeff521c68277b10555ab4174847b7315d82818ef5841e600299fb6128698" +content-hash = "ead1495454ee6d880bb240447025db93a25ebe263c2709de5f144cc2d85dc975" [metadata.files] aiopg = [ @@ -1559,12 +1560,12 @@ aiopg = [ {file = "aiopg-1.3.4.tar.gz", hash = "sha256:23f9e4cd9f28e9d91a6de3b4fb517e8bed25511cd954acccba9fe3a702d9b7d0"}, ] allure-pytest = [ - {file = "allure-pytest-2.9.45.tar.gz", hash = "sha256:20620fde08a597578b157a60ff38bdcc300e312d12eaa38cf28e4a62e22bdaa3"}, - {file = "allure_pytest-2.9.45-py3-none-any.whl", hash = "sha256:9b0325e06f8f79cf03289d4f4d741e57607d0fa12d9c094e243cbb042283f083"}, + {file = "allure-pytest-2.10.0.tar.gz", hash = "sha256:3b2ab67629f4cbd8617abd817d2b22292c6eb7efd5584f992d1af8143aea6ee7"}, + {file = "allure_pytest-2.10.0-py3-none-any.whl", hash = "sha256:08274096594758447db54c3b2c382526ee04f1fe12119cdaee92d2d93c84b530"}, ] allure-python-commons = [ - {file = "allure-python-commons-2.9.45.tar.gz", hash = "sha256:c238d28aeac35e8c7c517d8a2327e25ae5bbf2c30b5e2313d20ef11d75f5549d"}, - {file = "allure_python_commons-2.9.45-py3-none-any.whl", hash = "sha256:3572f0526db3946fb14470c58b0b41d343483aad91d37d414e4641815e13691a"}, + {file = "allure-python-commons-2.10.0.tar.gz", hash = "sha256:d4d31344b0f0037a4a11e16b91b28cf0eeb23ffa0e50c27fcfc6aabe72212d3c"}, + {file = "allure_python_commons-2.10.0-py3-none-any.whl", hash = "sha256:2a717e8ca8d296bf89cd57f38fc3c21893bd7ea8cd02a6ae5420e6d1a6eda5d0"}, ] async-timeout = [ {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"}, @@ -1635,8 +1636,8 @@ boto3 = [ {file = "boto3-1.24.38.tar.gz", hash = "sha256:f4c6b025f392c934338c7f01badfddbd0d3cf2397ff5df35c31409798dce33f5"}, ] boto3-stubs = [ - {file = "boto3-stubs-1.24.56.tar.gz", hash = "sha256:02e11b3669481469b45eee53fa5e0b587e5710f86bb95bd40667d1353d1e4bf6"}, - {file = "boto3_stubs-1.24.56-py3-none-any.whl", hash = "sha256:e5df3a68ddb8299404f63d19decc1f706ebdac64f3133c1e1cab747820337a75"}, + {file = "boto3-stubs-1.24.58.tar.gz", hash = "sha256:95ab521a9a931cc21d48c97c5bd7de0e37370d9b6a298e3905ec621db9243897"}, + {file = "boto3_stubs-1.24.58-py3-none-any.whl", hash = "sha256:a16940df2a347f7890075af8c0b202b06057bc18ff4c640ef94e09ce4176adb9"}, ] botocore = [ {file = "botocore-1.27.38-py3-none-any.whl", hash = "sha256:46a0264ff3335496bd9cb404f83ec0d8eb7bfdef8f74a830c13e6a6b9612adea"}, diff --git a/pyproject.toml b/pyproject.toml index 2c9270934d..ec166ea7cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ prometheus-client = "^0.14.1" pytest-timeout = "^2.1.0" Werkzeug = "2.1.2" pytest-order = "^1.0.1" -allure-pytest = "^2.9.45" +allure-pytest = "^2.10.0" pytest-asyncio = "^0.19.0" [tool.poetry.dev-dependencies] diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 32fd6f19c3..bbc35736bc 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -6,12 +6,10 @@ import enum import filecmp import json import os -import pathlib import re import shutil import socket import subprocess -import tarfile import tempfile import textwrap import time @@ -22,7 +20,6 @@ from enum import Flag, auto from pathlib import Path from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypeVar, Union, cast -import allure # type: ignore import asyncpg import backoff # type: ignore import boto3 @@ -38,7 +35,14 @@ from psycopg2.extensions import connection as PgConnection from psycopg2.extensions import make_dsn, parse_dsn from typing_extensions import Literal -from .utils import etcd_path, get_self_dir, lsn_from_hex, lsn_to_hex, subprocess_capture +from .utils import ( + allure_attach_from_dir, + etcd_path, + get_self_dir, + lsn_from_hex, + lsn_to_hex, + subprocess_capture, +) """ This file contains pytest fixtures. A fixture is a test resource that can be @@ -99,7 +103,7 @@ def pytest_configure(config): top_output_dir = env_test_output else: top_output_dir = os.path.join(base_dir, DEFAULT_OUTPUT_DIR) - pathlib.Path(top_output_dir).mkdir(exist_ok=True) + Path(top_output_dir).mkdir(exist_ok=True) # Find the postgres installation. global pg_distrib_dir @@ -234,11 +238,12 @@ def default_broker(request: Any, port_distributor: PortDistributor): client_port = port_distributor.get_port() # multiple pytest sessions could get launched in parallel, get them different datadirs etcd_datadir = os.path.join(get_test_output_dir(request), f"etcd_datadir_{client_port}") - pathlib.Path(etcd_datadir).mkdir(exist_ok=True, parents=True) + Path(etcd_datadir).mkdir(exist_ok=True, parents=True) broker = Etcd(datadir=etcd_datadir, port=client_port, peer_port=port_distributor.get_port()) yield broker broker.stop() + allure_attach_from_dir(Path(etcd_datadir)) @pytest.fixture(scope="session") @@ -1882,7 +1887,7 @@ class Postgres(PgProtocol): self.env.neon_cli.pg_create( branch_name, node_name=self.node_name, tenant_id=self.tenant_id, lsn=lsn, port=self.port ) - path = pathlib.Path("pgdatadirs") / "tenants" / self.tenant_id.hex / self.node_name + path = Path("pgdatadirs") / "tenants" / self.tenant_id.hex / self.node_name self.pgdata_dir = os.path.join(self.env.repo_dir, path) if config_lines is None: @@ -1913,7 +1918,7 @@ class Postgres(PgProtocol): def pg_data_dir_path(self) -> str: """Path to data directory""" assert self.node_name - path = pathlib.Path("pgdatadirs") / "tenants" / self.tenant_id.hex / self.node_name + path = Path("pgdatadirs") / "tenants" / self.tenant_id.hex / self.node_name return os.path.join(self.env.repo_dir, path) def pg_xact_dir_path(self) -> str: @@ -2289,7 +2294,7 @@ class Etcd: log.debug(f"etcd is already running on port {self.port}") return - pathlib.Path(self.datadir).mkdir(exist_ok=True) + Path(self.datadir).mkdir(exist_ok=True) if not self.binary_path.is_file(): raise RuntimeError(f"etcd broker binary '{self.binary_path}' is not a file") @@ -2329,26 +2334,16 @@ class Etcd: self.handle.wait() -def get_test_output_dir(request: Any) -> pathlib.Path: +def get_test_output_dir(request: Any) -> Path: """Compute the working directory for an individual test.""" test_name = request.node.name - test_dir = pathlib.Path(top_output_dir) / test_name.replace("/", "-") + test_dir = Path(top_output_dir) / test_name.replace("/", "-") log.info(f"get_test_output_dir is {test_dir}") # make mypy happy - assert isinstance(test_dir, pathlib.Path) + assert isinstance(test_dir, Path) return test_dir -ATTACHMENT_SUFFIXES = frozenset( - ( - ".log", - ".stderr", - ".stdout", - ".diffs", - ) -) - - # This is autouse, so the test output directory always gets created, even # if a test doesn't put anything there. It also solves a problem with the # neon_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it @@ -2359,7 +2354,7 @@ ATTACHMENT_SUFFIXES = frozenset( # this fixture ensures that the directory exists. That works because # 'autouse' fixtures are run before other fixtures. @pytest.fixture(scope="function", autouse=True) -def test_output_dir(request: Any) -> Iterator[pathlib.Path]: +def test_output_dir(request: Any) -> Iterator[Path]: """Create the working directory for an individual test.""" # one directory per test @@ -2370,23 +2365,7 @@ def test_output_dir(request: Any) -> Iterator[pathlib.Path]: yield test_dir - for attachment in test_dir.glob("**/*"): - if attachment.suffix in ATTACHMENT_SUFFIXES: - source = str(attachment) - name = str(attachment.relative_to(test_dir)) - attachment_type = "text/plain" - extension = attachment.suffix.removeprefix(".") - - # compress files larger than 1Mb, they're hardly readable in a browser - if attachment.stat().st_size > 1024 * 1024: - source = f"{attachment}.tar.gz" - with tarfile.open(source, "w:gz") as tar: - tar.add(attachment, arcname=attachment.name) - name = f"{name}.tar.gz" - attachment_type = "application/gzip" - extension = "tar.gz" - - allure.attach.file(source, name, attachment_type, extension) + allure_attach_from_dir(test_dir) SKIP_DIRS = frozenset( @@ -2439,7 +2418,7 @@ def should_skip_file(filename: str) -> bool: # # Test helpers # -def list_files_to_compare(pgdata_dir: pathlib.Path): +def list_files_to_compare(pgdata_dir: Path): pgdata_files = [] for root, _file, filenames in os.walk(pgdata_dir): for filename in filenames: @@ -2492,7 +2471,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post # list files we're going to compare assert pg.pgdata_dir - pgdata_files = list_files_to_compare(pathlib.Path(pg.pgdata_dir)) + pgdata_files = list_files_to_compare(Path(pg.pgdata_dir)) restored_files = list_files_to_compare(restored_dir_path) # check that file sets are equal diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index 324c62170b..88bf6d634d 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -1,11 +1,13 @@ import contextlib import os -import pathlib +import re import shutil import subprocess +import tarfile from pathlib import Path from typing import Any, List, Tuple +import allure # type: ignore from fixtures.log_helper import log from psycopg2.extensions import cursor @@ -116,7 +118,7 @@ def get_dir_size(path: str) -> int: return totalbytes -def get_timeline_dir_size(path: pathlib.Path) -> int: +def get_timeline_dir_size(path: Path) -> int: """Get the timeline directory's total size, which only counts the layer files' size.""" sz = 0 for dir_entry in path.iterdir(): @@ -161,3 +163,36 @@ def get_scale_for_db(size_mb: int) -> int: """ return round(0.06689 * size_mb - 0.5) + + +ATTACHMENT_NAME_REGEX = re.compile( + r".+\.log|.+\.stderr|.+\.stdout|.+\.filediff|.+\.metrics|flamegraph\.svg|regression\.diffs" +) + + +def allure_attach_from_dir(dir: Path): + """Attach all non-empty files from `dir` that matches `ATTACHMENT_NAME_REGEX` to Allure report""" + + for attachment in Path(dir).glob("**/*"): + if ATTACHMENT_NAME_REGEX.fullmatch(attachment.name) and attachment.stat().st_size > 0: + source = str(attachment) + name = str(attachment.relative_to(dir)) + + # compress files larger than 1Mb, they're hardly readable in a browser + if attachment.stat().st_size > 1024 * 1024: + source = f"{attachment}.tar.gz" + with tarfile.open(source, "w:gz") as tar: + tar.add(attachment, arcname=attachment.name) + name = f"{name}.tar.gz" + + if source.endswith(".tar.gz"): + attachment_type = "application/gzip" + extension = "tar.gz" + elif source.endswith(".svg"): + attachment_type = "image/svg+xml" + extension = "svg" + else: + attachment_type = "text/plain" + extension = attachment.suffix.removeprefix(".") + + allure.attach.file(source, name, attachment_type, extension) From 13beeb59cd0da7b9482a5631cfd74ab23c33c48c Mon Sep 17 00:00:00 2001 From: MMeent Date: Thu, 1 Sep 2022 12:53:17 +0200 Subject: [PATCH 62/63] Update extensions included in compute-node Update PLV8 to 3.1.4 - which is the latest release. Update PostGIS to 3.3.0 Remove PLV8 from the final image -- there is an issue we hit when installing PLV8, and we don't quite know what it is yet. --- Dockerfile.compute-node | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 950ec16016..2e031b17da 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -1,4 +1,7 @@ ARG TAG=pinned +# apparently, ARGs don't get replaced in RUN commands in kaniko +# ARG POSTGIS_VERSION=3.3.0 +# ARG PLV8_VERSION=3.1.4 FROM debian:bullseye-slim AS build-deps RUN apt update && \ @@ -24,9 +27,9 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN apt update && \ apt install -y gdal-bin libgdal-dev libprotobuf-c-dev protobuf-c-compiler xsltproc wget -RUN wget https://download.osgeo.org/postgis/source/postgis-3.2.3.tar.gz && \ - tar xvzf postgis-3.2.3.tar.gz && \ - cd postgis-3.2.3 && \ +RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.0.tar.gz && \ + tar xvzf postgis-3.3.0.tar.gz && \ + cd postgis-3.3.0 && \ ./autogen.sh && \ export PATH="/usr/local/pgsql/bin:$PATH" && \ ./configure && \ @@ -52,18 +55,18 @@ RUN echo "deb http://ftp.debian.org/debian testing main" >> /etc/apt/sources.lis apt update && \ apt install -y --no-install-recommends -t testing binutils -RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.3.tar.gz && \ - tar xvzf v3.1.3.tar.gz && \ - cd plv8-3.1.3 && \ +RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \ + tar xvzf v3.1.4.tar.gz && \ + cd plv8-3.1.4 && \ export PATH="/usr/local/pgsql/bin:$PATH" && \ - make && \ - make install && \ + make -j $(getconf _NPROCESSORS_ONLN) && \ + make -j $(getconf _NPROCESSORS_ONLN) install && \ rm -rf /plv8-* && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control # compile neon extensions FROM build-deps AS neon-pg-ext-build -COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/ +COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/ COPY pgxn/ pgxn/ RUN make -j $(getconf _NPROCESSORS_ONLN) \ From 46c8a93976873da6199c0c128969129e2751f9b6 Mon Sep 17 00:00:00 2001 From: Sergey Melnikov Date: Thu, 1 Sep 2022 15:06:52 +0300 Subject: [PATCH 63/63] Fix PERF_TEST_RESULT_CONNSTR for benchmark init (#2375) --- .github/actions/run-python-test-set/action.yml | 2 +- .github/workflows/benchmarking.yml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 1cc65b4286..2344fba13c 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -24,7 +24,7 @@ inputs: required: false default: 'true' save_perf_report: - description: 'Whether to upload the performance report' + description: 'Whether to upload the performance report, if true PERF_TEST_RESULT_CONNSTR env variable should be set' required: false default: 'false' run_with_real_s3: diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 1370917377..4c58dda6b6 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -179,6 +179,8 @@ jobs: env: PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }} BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }} + VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" - name: Benchmark simple-update uses: ./.github/actions/run-python-test-set