mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-29 19:10:38 +00:00
Compare commits
44 Commits
faster-ci
...
hackaneon/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6037b8e6c1 | ||
|
|
c108872a36 | ||
|
|
588e6dd1ef | ||
|
|
2fafe47e09 | ||
|
|
e6e72e8b68 | ||
|
|
2a0695e01a | ||
|
|
af6c168265 | ||
|
|
1b3209497f | ||
|
|
9f97523d0d | ||
|
|
33196c90bc | ||
|
|
c72c5df922 | ||
|
|
4c7599e8df | ||
|
|
4b2b0a24da | ||
|
|
ef5a95010b | ||
|
|
c736f9d6ef | ||
|
|
adc798b59e | ||
|
|
f0668a7a4d | ||
|
|
6d73642a93 | ||
|
|
9012a18fa1 | ||
|
|
a6a7550bb4 | ||
|
|
10556f25df | ||
|
|
f54cf567ff | ||
|
|
4303914681 | ||
|
|
539225d792 | ||
|
|
c118736c9c | ||
|
|
3f85246a42 | ||
|
|
709a6ad29b | ||
|
|
3640824553 | ||
|
|
fea1f34f6a | ||
|
|
5d40d1ccdd | ||
|
|
b2cb10590e | ||
|
|
2923fd2a5b | ||
|
|
2a5336b9ab | ||
|
|
6f20726610 | ||
|
|
29f741e1e9 | ||
|
|
2b37a40079 | ||
|
|
af2b65a2fb | ||
|
|
5d194c7824 | ||
|
|
ac2702afd3 | ||
|
|
88fd46d795 | ||
|
|
2d6763882e | ||
|
|
c0c23cde72 | ||
|
|
942bc9544b | ||
|
|
02b7cdb305 |
115
Cargo.lock
generated
115
Cargo.lock
generated
@@ -1189,9 +1189,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "comfy-table"
|
||||
version = "7.1.1"
|
||||
version = "6.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7"
|
||||
checksum = "6e7b787b0dc42e8111badfdbe4c3059158ccb2db8780352fa1b01e8ccf45cc4d"
|
||||
dependencies = [
|
||||
"crossterm",
|
||||
"strum",
|
||||
@@ -1246,7 +1246,7 @@ dependencies = [
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"toml_edit 0.19.10",
|
||||
"tracing",
|
||||
"tracing-opentelemetry",
|
||||
"tracing-subscriber",
|
||||
@@ -1360,8 +1360,8 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-util",
|
||||
"toml",
|
||||
"toml_edit",
|
||||
"toml 0.7.4",
|
||||
"toml_edit 0.19.10",
|
||||
"tracing",
|
||||
"url",
|
||||
"utils",
|
||||
@@ -1485,22 +1485,25 @@ checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
|
||||
|
||||
[[package]]
|
||||
name = "crossterm"
|
||||
version = "0.27.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df"
|
||||
checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"bitflags 1.3.2",
|
||||
"crossterm_winapi",
|
||||
"libc",
|
||||
"mio",
|
||||
"parking_lot 0.12.1",
|
||||
"signal-hook",
|
||||
"signal-hook-mio",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossterm_winapi"
|
||||
version = "0.9.1"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
|
||||
checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
@@ -3141,7 +3144,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd01039851e82f8799046eabbb354056283fb265c8ec0996af940f4e85a380ff"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"toml",
|
||||
"toml 0.8.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3657,7 +3660,7 @@ dependencies = [
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"toml_edit 0.19.10",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -3744,7 +3747,7 @@ dependencies = [
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"toml_edit 0.19.10",
|
||||
"tracing",
|
||||
"twox-hash",
|
||||
"url",
|
||||
@@ -3907,9 +3910,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "parquet"
|
||||
version = "53.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0fbf928021131daaa57d334ca8e3904fe9ae22f73c56244fc7db9b04eedc3d8"
|
||||
version = "51.0.0"
|
||||
source = "git+https://github.com/apache/arrow-rs?branch=master#2534976a564be3d2d56312dc88fb1b6ed4cef829"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"bytes",
|
||||
@@ -3928,9 +3930,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "parquet_derive"
|
||||
version = "53.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86e9fcfae007533a06b580429a3f7e07cb833ec8aa37c041c16563e7918f057e"
|
||||
version = "51.0.0"
|
||||
source = "git+https://github.com/apache/arrow-rs?branch=master#2534976a564be3d2d56312dc88fb1b6ed4cef829"
|
||||
dependencies = [
|
||||
"parquet",
|
||||
"proc-macro2",
|
||||
@@ -4120,7 +4121,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres"
|
||||
version = "0.19.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -4133,7 +4134,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"base64 0.20.0",
|
||||
"byteorder",
|
||||
@@ -4152,7 +4153,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -4811,7 +4812,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"toml_edit 0.19.10",
|
||||
"tracing",
|
||||
"utils",
|
||||
]
|
||||
@@ -5321,7 +5322,7 @@ dependencies = [
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"toml_edit 0.19.10",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"url",
|
||||
@@ -5730,6 +5731,17 @@ dependencies = [
|
||||
"signal-hook-registry",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-mio"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"mio",
|
||||
"signal-hook",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.1"
|
||||
@@ -6042,21 +6054,21 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.26.3"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
|
||||
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.26.4"
|
||||
version = "0.24.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
|
||||
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"heck 0.4.1",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.52",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6397,7 +6409,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.7"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
@@ -6508,6 +6520,18 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6135d499e69981f9ff0ef2167955a5333c35e36f6937d382974566b3d5b94ec"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"toml_edit 0.19.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.8.14"
|
||||
@@ -6517,7 +6541,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"toml_edit",
|
||||
"toml_edit 0.22.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6529,6 +6553,19 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
version = "0.19.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
|
||||
dependencies = [
|
||||
"indexmap 1.9.3",
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"winnow 0.4.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
version = "0.22.14"
|
||||
@@ -6539,7 +6576,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"winnow",
|
||||
"winnow 0.6.13",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6952,7 +6989,7 @@ dependencies = [
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"toml_edit 0.19.10",
|
||||
"tracing",
|
||||
"tracing-error",
|
||||
"tracing-subscriber",
|
||||
@@ -7498,6 +7535,15 @@ version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.6.13"
|
||||
@@ -7605,7 +7651,6 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.0",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"tonic",
|
||||
"tower",
|
||||
"tracing",
|
||||
|
||||
39
Cargo.toml
39
Cargo.toml
@@ -73,7 +73,7 @@ camino = "1.1.6"
|
||||
cfg-if = "1.0.0"
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
comfy-table = "7.1"
|
||||
comfy-table = "6.1"
|
||||
const_format = "0.2"
|
||||
crc32c = "0.6"
|
||||
crossbeam-deque = "0.8.5"
|
||||
@@ -123,8 +123,8 @@ opentelemetry = "0.20.0"
|
||||
opentelemetry-otlp = { version = "0.13.0", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.12.0"
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "53"
|
||||
parquet = { version = "51.0.0", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "51.0.0"
|
||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||
pin-project-lite = "0.2"
|
||||
procfs = "0.16"
|
||||
@@ -158,8 +158,8 @@ signal-hook = "0.3"
|
||||
smallvec = "1.11"
|
||||
smol_str = { version = "0.2.0", features = ["serde"] }
|
||||
socket2 = "0.5"
|
||||
strum = "0.26"
|
||||
strum_macros = "0.26"
|
||||
strum = "0.24"
|
||||
strum_macros = "0.24"
|
||||
"subtle" = "2.5.0"
|
||||
svg_fmt = "0.4.3"
|
||||
sync_wrapper = "0.1.2"
|
||||
@@ -177,8 +177,8 @@ tokio-rustls = "0.25"
|
||||
tokio-stream = "0.1"
|
||||
tokio-tar = "0.3"
|
||||
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
|
||||
toml = "0.8"
|
||||
toml_edit = "0.22"
|
||||
toml = "0.7"
|
||||
toml_edit = "0.19"
|
||||
tonic = {version = "0.9", features = ["tls", "tls-roots"]}
|
||||
tower-service = "0.3.2"
|
||||
tracing = "0.1"
|
||||
@@ -201,21 +201,10 @@ env_logger = "0.10"
|
||||
log = "0.4"
|
||||
|
||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||
|
||||
# We want to use the 'neon' branch for these, but there's currently one
|
||||
# incompatible change on the branch. See:
|
||||
#
|
||||
# - PR #8076 which contained changes that depended on the new changes in
|
||||
# the rust-postgres crate, and
|
||||
# - PR #8654 which reverted those changes and made the code in proxy incompatible
|
||||
# with the tip of the 'neon' branch again.
|
||||
#
|
||||
# When those proxy changes are re-applied (see PR #8747), we can switch using
|
||||
# the tip of the 'neon' branch again.
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
|
||||
## Local libraries
|
||||
compute_api = { version = "0.1", path = "./libs/compute_api/" }
|
||||
@@ -252,7 +241,11 @@ tonic-build = "0.9"
|
||||
[patch.crates-io]
|
||||
|
||||
# Needed to get `tokio-postgres-rustls` to depend on our fork.
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
|
||||
# bug fixes for UUID
|
||||
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" }
|
||||
parquet_derive = { git = "https://github.com/apache/arrow-rs", branch = "master" }
|
||||
|
||||
################# Binary contents sections
|
||||
|
||||
|
||||
@@ -75,14 +75,14 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
|
||||
fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::DocumentMut {
|
||||
toml_edit::DocumentMut::from_str(&format!("id={node_id}")).unwrap()
|
||||
fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::Document {
|
||||
toml_edit::Document::from_str(&format!("id={node_id}")).unwrap()
|
||||
}
|
||||
|
||||
fn pageserver_init_make_toml(
|
||||
&self,
|
||||
conf: NeonLocalInitPageserverConf,
|
||||
) -> anyhow::Result<toml_edit::DocumentMut> {
|
||||
) -> anyhow::Result<toml_edit::Document> {
|
||||
assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
|
||||
|
||||
// TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)
|
||||
@@ -137,9 +137,9 @@ impl PageServerNode {
|
||||
|
||||
// Turn `overrides` into a toml document.
|
||||
// TODO: above code is legacy code, it should be refactored to use toml_edit directly.
|
||||
let mut config_toml = toml_edit::DocumentMut::new();
|
||||
let mut config_toml = toml_edit::Document::new();
|
||||
for fragment_str in overrides {
|
||||
let fragment = toml_edit::DocumentMut::from_str(&fragment_str)
|
||||
let fragment = toml_edit::Document::from_str(&fragment_str)
|
||||
.expect("all fragments in `overrides` are valid toml documents, this function controls that");
|
||||
for (key, item) in fragment.iter() {
|
||||
config_toml.insert(key, item.clone());
|
||||
|
||||
@@ -263,6 +263,15 @@ impl Key {
|
||||
field5: u8::MAX,
|
||||
field6: u32::MAX,
|
||||
};
|
||||
/// A key slightly smaller than [`Key::MAX`] for use in layer key ranges to avoid them to be confused with L0 layers
|
||||
pub const NON_L0_MAX: Key = Key {
|
||||
field1: u8::MAX,
|
||||
field2: u32::MAX,
|
||||
field3: u32::MAX,
|
||||
field4: u32::MAX,
|
||||
field5: u8::MAX,
|
||||
field6: u32::MAX - 1,
|
||||
};
|
||||
|
||||
pub fn from_hex(s: &str) -> Result<Self> {
|
||||
if s.len() != 36 {
|
||||
|
||||
@@ -62,7 +62,7 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
serde::Serialize,
|
||||
serde::Deserialize,
|
||||
strum_macros::Display,
|
||||
strum_macros::VariantNames,
|
||||
strum_macros::EnumVariantNames,
|
||||
strum_macros::AsRefStr,
|
||||
strum_macros::IntoStaticStr,
|
||||
)]
|
||||
|
||||
@@ -121,7 +121,6 @@ fn main() -> anyhow::Result<()> {
|
||||
.allowlist_type("XLogPageHeaderData")
|
||||
.allowlist_type("XLogLongPageHeaderData")
|
||||
.allowlist_var("XLOG_PAGE_MAGIC")
|
||||
.allowlist_var("PG_MAJORVERSION_NUM")
|
||||
.allowlist_var("PG_CONTROL_FILE_SIZE")
|
||||
.allowlist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
|
||||
.allowlist_type("PageHeaderData")
|
||||
|
||||
@@ -44,9 +44,6 @@ macro_rules! postgres_ffi {
|
||||
// Re-export some symbols from bindings
|
||||
pub use bindings::DBState_DB_SHUTDOWNED;
|
||||
pub use bindings::{CheckPoint, ControlFileData, XLogRecord};
|
||||
|
||||
pub const ZERO_CHECKPOINT: bytes::Bytes =
|
||||
bytes::Bytes::from_static(&[0u8; xlog_utils::SIZEOF_CHECKPOINT]);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -109,107 +106,6 @@ macro_rules! dispatch_pgversion {
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! enum_pgversion_dispatch {
|
||||
($name:expr, $typ:ident, $bind:ident, $code:block) => {
|
||||
enum_pgversion_dispatch!(
|
||||
name = $name,
|
||||
bind = $bind,
|
||||
typ = $typ,
|
||||
code = $code,
|
||||
pgversions = [
|
||||
V14 : v14,
|
||||
V15 : v15,
|
||||
V16 : v16,
|
||||
]
|
||||
)
|
||||
};
|
||||
(name = $name:expr,
|
||||
bind = $bind:ident,
|
||||
typ = $typ:ident,
|
||||
code = $code:block,
|
||||
pgversions = [$($variant:ident : $md:ident),+ $(,)?]) => {
|
||||
match $name {
|
||||
$(
|
||||
self::$typ::$variant($bind) => {
|
||||
use $crate::$md as pgv;
|
||||
$code
|
||||
}
|
||||
),+,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! enum_pgversion {
|
||||
{$name:ident, pgv :: $t:ident} => {
|
||||
enum_pgversion!{
|
||||
name = $name,
|
||||
typ = $t,
|
||||
pgversions = [
|
||||
V14 : v14,
|
||||
V15 : v15,
|
||||
V16 : v16,
|
||||
]
|
||||
}
|
||||
};
|
||||
{$name:ident, pgv :: $p:ident :: $t:ident} => {
|
||||
enum_pgversion!{
|
||||
name = $name,
|
||||
path = $p,
|
||||
typ = $t,
|
||||
pgversions = [
|
||||
V14 : v14,
|
||||
V15 : v15,
|
||||
V16 : v16,
|
||||
]
|
||||
}
|
||||
};
|
||||
{name = $name:ident,
|
||||
typ = $t:ident,
|
||||
pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {
|
||||
pub enum $name {
|
||||
$($variant ( $crate::$md::$t )),+
|
||||
}
|
||||
impl self::$name {
|
||||
pub fn pg_version(&self) -> u32 {
|
||||
enum_pgversion_dispatch!(self, $name, _ign, {
|
||||
pgv::bindings::PG_MAJORVERSION_NUM
|
||||
})
|
||||
}
|
||||
}
|
||||
$(
|
||||
impl Into<self::$name> for $crate::$md::$t {
|
||||
fn into(self) -> self::$name {
|
||||
self::$name::$variant (self)
|
||||
}
|
||||
}
|
||||
)+
|
||||
};
|
||||
{name = $name:ident,
|
||||
path = $p:ident,
|
||||
typ = $t:ident,
|
||||
pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {
|
||||
pub enum $name {
|
||||
$($variant ($crate::$md::$p::$t)),+
|
||||
}
|
||||
impl $name {
|
||||
pub fn pg_version(&self) -> u32 {
|
||||
enum_pgversion_dispatch!(self, $name, _ign, {
|
||||
pgv::bindings::PG_MAJORVERSION_NUM
|
||||
})
|
||||
}
|
||||
}
|
||||
$(
|
||||
impl Into<$name> for $crate::$md::$p::$t {
|
||||
fn into(self) -> $name {
|
||||
$name::$variant (self)
|
||||
}
|
||||
}
|
||||
)+
|
||||
};
|
||||
}
|
||||
|
||||
pub mod pg_constants;
|
||||
pub mod relfile_utils;
|
||||
|
||||
|
||||
@@ -185,7 +185,7 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse(input: &str) -> anyhow::Result<RemoteStorageConfig> {
|
||||
let toml = input.parse::<toml_edit::DocumentMut>().unwrap();
|
||||
let toml = input.parse::<toml_edit::Document>().unwrap();
|
||||
RemoteStorageConfig::from_toml(toml.as_item())
|
||||
}
|
||||
|
||||
|
||||
@@ -3,9 +3,11 @@ use std::str::FromStr;
|
||||
use anyhow::Context;
|
||||
use metrics::{IntCounter, IntCounterVec};
|
||||
use once_cell::sync::Lazy;
|
||||
use strum_macros::{EnumString, VariantNames};
|
||||
use strum_macros::{EnumString, EnumVariantNames};
|
||||
|
||||
#[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]
|
||||
#[derive(
|
||||
EnumString, strum_macros::Display, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy,
|
||||
)]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
pub enum LogFormat {
|
||||
Plain,
|
||||
|
||||
@@ -10,7 +10,7 @@ pub fn deserialize_item<T>(item: &toml_edit::Item) -> Result<T, Error>
|
||||
where
|
||||
T: serde::de::DeserializeOwned,
|
||||
{
|
||||
let document: toml_edit::DocumentMut = match item {
|
||||
let document: toml_edit::Document = match item {
|
||||
toml_edit::Item::Table(toml) => toml.clone().into(),
|
||||
toml_edit::Item::Value(toml_edit::Value::InlineTable(toml)) => {
|
||||
toml.clone().into_table().into()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::pin::Pin;
|
||||
use std::{pin::Pin, sync::Arc};
|
||||
|
||||
use futures::SinkExt;
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use pageserver_api::{
|
||||
models::{
|
||||
PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest,
|
||||
@@ -10,7 +10,6 @@ use pageserver_api::{
|
||||
};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_postgres::CopyOutStream;
|
||||
use tokio_stream::StreamExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
@@ -136,18 +135,68 @@ impl PagestreamClient {
|
||||
drop(copy_both);
|
||||
}
|
||||
|
||||
pub async fn getpage(
|
||||
&mut self,
|
||||
req: PagestreamGetPageRequest,
|
||||
) -> anyhow::Result<PagestreamGetPageResponse> {
|
||||
pub fn split(self) -> (PagestreamTx, PagestreamRx) {
|
||||
let Self {
|
||||
copy_both,
|
||||
cancel_on_client_drop,
|
||||
conn_task,
|
||||
} = self;
|
||||
let keep_client_alive = KeepClientAlive {
|
||||
client: conn_task,
|
||||
cancel_on_client_drop: cancel_on_client_drop.unwrap(),
|
||||
};
|
||||
let keep_client_alive = Arc::new(keep_client_alive);
|
||||
let (sink, stream): (
|
||||
futures::stream::SplitSink<
|
||||
Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>,
|
||||
bytes::Bytes,
|
||||
>,
|
||||
futures::stream::SplitStream<Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>>,
|
||||
) = copy_both.split();
|
||||
(
|
||||
PagestreamTx {
|
||||
sink,
|
||||
keep_client_alive: keep_client_alive.clone(),
|
||||
},
|
||||
PagestreamRx {
|
||||
stream,
|
||||
keep_client_alive,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
struct KeepClientAlive {
|
||||
client: JoinHandle<()>,
|
||||
cancel_on_client_drop: tokio_util::sync::DropGuard,
|
||||
}
|
||||
|
||||
pub struct PagestreamTx {
|
||||
sink: futures::stream::SplitSink<
|
||||
Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>,
|
||||
bytes::Bytes,
|
||||
>,
|
||||
keep_client_alive: Arc<KeepClientAlive>,
|
||||
}
|
||||
|
||||
pub struct PagestreamRx {
|
||||
stream: futures::stream::SplitStream<Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>>,
|
||||
keep_client_alive: Arc<KeepClientAlive>,
|
||||
}
|
||||
|
||||
impl PagestreamTx {
|
||||
pub async fn send_getpage(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {
|
||||
let req = PagestreamFeMessage::GetPage(req);
|
||||
let req: bytes::Bytes = req.serialize();
|
||||
// let mut req = tokio_util::io::ReaderStream::new(&req);
|
||||
let mut req = tokio_stream::once(Ok(req));
|
||||
let mut req = tokio_stream::once(Ok(req.clone()));
|
||||
self.sink.send_all(&mut req).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
self.copy_both.send_all(&mut req).await?;
|
||||
|
||||
let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
|
||||
impl PagestreamRx {
|
||||
pub async fn recv_getpage(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {
|
||||
let next: Option<Result<bytes::Bytes, _>> = self.stream.next().await;
|
||||
let next: bytes::Bytes = next.unwrap()?;
|
||||
|
||||
let msg = PagestreamBeMessage::deserialize(next)?;
|
||||
|
||||
@@ -108,6 +108,7 @@ fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
|
||||
enum LineKind {
|
||||
GcCutoff,
|
||||
Branch,
|
||||
KeyVertical,
|
||||
}
|
||||
|
||||
impl From<LineKind> for Fill {
|
||||
@@ -115,6 +116,7 @@ impl From<LineKind> for Fill {
|
||||
match value {
|
||||
LineKind::GcCutoff => Fill::Color(rgb(255, 0, 0)),
|
||||
LineKind::Branch => Fill::Color(rgb(0, 255, 0)),
|
||||
LineKind::KeyVertical => Fill::Color(rgb(0, 0, 255)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -126,6 +128,7 @@ impl FromStr for LineKind {
|
||||
Ok(match s {
|
||||
"gc_cutoff" => LineKind::GcCutoff,
|
||||
"branch" => LineKind::Branch,
|
||||
"key" => LineKind::KeyVertical,
|
||||
_ => anyhow::bail!("unsupported linekind: {s}"),
|
||||
})
|
||||
}
|
||||
@@ -142,25 +145,31 @@ pub fn main() -> Result<()> {
|
||||
let stdin = io::stdin();
|
||||
|
||||
let mut lines: Vec<(Lsn, LineKind)> = vec![];
|
||||
let mut vertical_lines: Vec<(Key, LineKind)> = vec![];
|
||||
|
||||
for (lineno, line) in stdin.lock().lines().enumerate() {
|
||||
let lineno = lineno + 1;
|
||||
|
||||
let line = line.unwrap();
|
||||
if let Some((kind, lsn)) = line.split_once(':') {
|
||||
let (kind, lsn) = LineKind::from_str(kind)
|
||||
.context("parse kind")
|
||||
.and_then(|kind| {
|
||||
if lsn.contains('/') {
|
||||
Lsn::from_str(lsn)
|
||||
} else {
|
||||
Lsn::from_hex(lsn)
|
||||
if let Some((kind, what)) = line.split_once(':') {
|
||||
(|| {
|
||||
match LineKind::from_str(kind).context("parse kind")? {
|
||||
kind @ LineKind::Branch | kind @ LineKind::GcCutoff => {
|
||||
let lsn = if what.contains('/') {
|
||||
Lsn::from_str(what)?
|
||||
} else {
|
||||
Lsn::from_hex(what)?
|
||||
};
|
||||
lines.push((lsn, kind));
|
||||
}
|
||||
.map(|lsn| (kind, lsn))
|
||||
.context("parse lsn")
|
||||
})
|
||||
.with_context(|| format!("parse {line:?} on {lineno}"))?;
|
||||
lines.push((lsn, kind));
|
||||
kind @ LineKind::KeyVertical => {
|
||||
let key = Key::from_hex(what).context("parse key")?;
|
||||
vertical_lines.push((key, kind));
|
||||
}
|
||||
}
|
||||
anyhow::Ok(())
|
||||
})()
|
||||
.with_context(|| format!("parse {line:?} on {lineno}"))?;
|
||||
continue;
|
||||
}
|
||||
let line = PathBuf::from_str(&line).unwrap();
|
||||
@@ -175,7 +184,7 @@ pub fn main() -> Result<()> {
|
||||
}
|
||||
|
||||
// Collect all coordinates
|
||||
let mut keys: Vec<Key> = Vec::with_capacity(files.len());
|
||||
let mut keys: Vec<Key> = Vec::with_capacity(files.len() + vertical_lines.len());
|
||||
let mut lsns: Vec<Lsn> = Vec::with_capacity(files.len() + lines.len());
|
||||
|
||||
for Layer {
|
||||
@@ -192,6 +201,8 @@ pub fn main() -> Result<()> {
|
||||
|
||||
lsns.extend(lines.iter().map(|(lsn, _)| *lsn));
|
||||
|
||||
keys.extend(vertical_lines.iter().map(|(key, _)| *key));
|
||||
|
||||
// Analyze
|
||||
let key_map = build_coordinate_compression_map(keys);
|
||||
let lsn_map = build_coordinate_compression_map(lsns);
|
||||
@@ -283,6 +294,25 @@ pub fn main() -> Result<()> {
|
||||
);
|
||||
}
|
||||
|
||||
for (key, kind) in vertical_lines {
|
||||
let key = *key_map.get(&key).unwrap();
|
||||
let stretch = 2.0;
|
||||
let xmargin = 0.05;
|
||||
let ymargin = 0.05;
|
||||
let lsn_diff = 0.3;
|
||||
let lsn_offset = -lsn_diff / 2.0;
|
||||
println!(
|
||||
"{}",
|
||||
rectangle(
|
||||
5.0 + key as f32,
|
||||
0.0,
|
||||
(key_map.len() + 10) as f32,
|
||||
lsn_map.len() as f32,
|
||||
)
|
||||
.fill(kind)
|
||||
);
|
||||
}
|
||||
|
||||
println!("{}", EndSvg);
|
||||
|
||||
eprintln!("num_images: {}", num_images);
|
||||
|
||||
@@ -174,7 +174,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
println!("specified prefix '{}' failed validation", cmd.prefix);
|
||||
return Ok(());
|
||||
};
|
||||
let toml_document = toml_edit::DocumentMut::from_str(&cmd.config_toml_str)?;
|
||||
let toml_document = toml_edit::Document::from_str(&cmd.config_toml_str)?;
|
||||
let toml_item = toml_document
|
||||
.get("remote_storage")
|
||||
.expect("need remote_storage");
|
||||
|
||||
@@ -13,7 +13,7 @@ use rand::prelude::*;
|
||||
use tokio::task::JoinSet;
|
||||
use tracing::info;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{HashSet, VecDeque};
|
||||
use std::future::Future;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::pin::Pin;
|
||||
@@ -295,64 +295,58 @@ async fn main_impl(
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (mut pagestream_tx, mut pagestream_rx) = client.split();
|
||||
|
||||
start_work_barrier.wait().await;
|
||||
let client_start = Instant::now();
|
||||
let mut ticks_processed = 0;
|
||||
while !cancel.is_cancelled() {
|
||||
// Detect if a request took longer than the RPS rate
|
||||
if let Some(period) = &rps_period {
|
||||
let periods_passed_until_now =
|
||||
usize::try_from(client_start.elapsed().as_micros() / period.as_micros())
|
||||
let (rq_tx, mut rq_rx) = tokio::sync::mpsc::channel(4096);
|
||||
let sender = tokio::spawn(async move {
|
||||
while !cancel.is_cancelled() {
|
||||
let start = Instant::now();
|
||||
let req = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = &ranges[weights.sample(&mut rng)];
|
||||
let key: i128 = rng.gen_range(r.start..r.end);
|
||||
let key = Key::from_i128(key);
|
||||
assert!(key.is_rel_block_key());
|
||||
let (rel_tag, block_no) = key
|
||||
.to_rel_block()
|
||||
.expect("we filter non-rel-block keys out above");
|
||||
PagestreamGetPageRequest {
|
||||
request_lsn: if rng.gen_bool(args.req_latest_probability) {
|
||||
Lsn::MAX
|
||||
} else {
|
||||
r.timeline_lsn
|
||||
},
|
||||
not_modified_since: r.timeline_lsn,
|
||||
rel: rel_tag,
|
||||
blkno: block_no,
|
||||
}
|
||||
};
|
||||
pagestream_tx.send_getpage(req).await.unwrap();
|
||||
rq_tx.send(start).await.unwrap();
|
||||
}
|
||||
});
|
||||
|
||||
let receiver = tokio::spawn(async move {
|
||||
while let Some(start) = rq_rx.recv().await {
|
||||
let response = pagestream_rx.recv_getpage().await.unwrap();
|
||||
let end = Instant::now();
|
||||
live_stats.request_done();
|
||||
STATS.with(|stats| {
|
||||
stats
|
||||
.borrow()
|
||||
.lock()
|
||||
.unwrap()
|
||||
.observe(end.duration_since(start))
|
||||
.unwrap();
|
||||
|
||||
if periods_passed_until_now > ticks_processed {
|
||||
live_stats.missed((periods_passed_until_now - ticks_processed) as u64);
|
||||
}
|
||||
ticks_processed = periods_passed_until_now;
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
let start = Instant::now();
|
||||
let req = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = &ranges[weights.sample(&mut rng)];
|
||||
let key: i128 = rng.gen_range(r.start..r.end);
|
||||
let key = Key::from_i128(key);
|
||||
assert!(key.is_rel_block_key());
|
||||
let (rel_tag, block_no) = key
|
||||
.to_rel_block()
|
||||
.expect("we filter non-rel-block keys out above");
|
||||
PagestreamGetPageRequest {
|
||||
request_lsn: if rng.gen_bool(args.req_latest_probability) {
|
||||
Lsn::MAX
|
||||
} else {
|
||||
r.timeline_lsn
|
||||
},
|
||||
not_modified_since: r.timeline_lsn,
|
||||
rel: rel_tag,
|
||||
blkno: block_no,
|
||||
}
|
||||
};
|
||||
client.getpage(req).await.unwrap();
|
||||
let end = Instant::now();
|
||||
live_stats.request_done();
|
||||
ticks_processed += 1;
|
||||
STATS.with(|stats| {
|
||||
stats
|
||||
.borrow()
|
||||
.lock()
|
||||
.unwrap()
|
||||
.observe(end.duration_since(start))
|
||||
.unwrap();
|
||||
});
|
||||
|
||||
if let Some(period) = &rps_period {
|
||||
let next_at = client_start
|
||||
+ Duration::from_micros(
|
||||
(ticks_processed) as u64 * u64::try_from(period.as_micros()).unwrap(),
|
||||
);
|
||||
tokio::time::sleep_until(next_at.into()).await;
|
||||
}
|
||||
}
|
||||
sender.await.unwrap();
|
||||
receiver.await.unwrap();
|
||||
})
|
||||
};
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ use metrics::{
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use strum::{EnumCount, VariantNames};
|
||||
use strum_macros::{IntoStaticStr, VariantNames};
|
||||
use strum_macros::{EnumVariantNames, IntoStaticStr};
|
||||
use tracing::warn;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
@@ -27,7 +27,7 @@ const CRITICAL_OP_BUCKETS: &[f64] = &[
|
||||
];
|
||||
|
||||
// Metrics collected on operations on the storage repository.
|
||||
#[derive(Debug, VariantNames, IntoStaticStr)]
|
||||
#[derive(Debug, EnumVariantNames, IntoStaticStr)]
|
||||
#[strum(serialize_all = "kebab_case")]
|
||||
pub(crate) enum StorageTimeOperation {
|
||||
#[strum(serialize = "layer flush")]
|
||||
@@ -1185,6 +1185,7 @@ struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
|
||||
ctx: &'c RequestContext,
|
||||
start: std::time::Instant,
|
||||
op: SmgrQueryType,
|
||||
count: usize,
|
||||
}
|
||||
|
||||
impl<'a, 'c> Drop for GlobalAndPerTimelineHistogramTimer<'a, 'c> {
|
||||
@@ -1212,9 +1213,11 @@ impl<'a, 'c> Drop for GlobalAndPerTimelineHistogramTimer<'a, 'c> {
|
||||
elapsed
|
||||
}
|
||||
};
|
||||
self.global_metric.observe(ex_throttled.as_secs_f64());
|
||||
if let Some(timeline_metric) = self.timeline_metric {
|
||||
timeline_metric.observe(ex_throttled.as_secs_f64());
|
||||
for _ in 0..self.count {
|
||||
self.global_metric.observe(ex_throttled.as_secs_f64());
|
||||
if let Some(timeline_metric) = self.timeline_metric {
|
||||
timeline_metric.observe(ex_throttled.as_secs_f64());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1343,6 +1346,14 @@ impl SmgrQueryTimePerTimeline {
|
||||
&'a self,
|
||||
op: SmgrQueryType,
|
||||
ctx: &'c RequestContext,
|
||||
) -> Option<impl Drop + '_> {
|
||||
self.start_timer_many(op, 1, ctx)
|
||||
}
|
||||
pub(crate) fn start_timer_many<'c: 'a, 'a>(
|
||||
&'a self,
|
||||
op: SmgrQueryType,
|
||||
count: usize,
|
||||
ctx: &'c RequestContext,
|
||||
) -> Option<impl Drop + '_> {
|
||||
let global_metric = &self.global_metrics[op as usize];
|
||||
let start = Instant::now();
|
||||
@@ -1376,6 +1387,7 @@ impl SmgrQueryTimePerTimeline {
|
||||
ctx,
|
||||
start,
|
||||
op,
|
||||
count,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -3170,6 +3182,16 @@ static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
pub(crate) static CONSECUTIVE_NONBLOCKING_GETPAGE_REQUESTS_HISTOGRAM: Lazy<Histogram> =
|
||||
Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_consecutive_nonblocking_getpage_requests",
|
||||
"Number of consecutive nonblocking getpage requests",
|
||||
(0..=256).map(|x| x as f64).collect::<Vec<f64>>(),
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
|
||||
static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
|
||||
let _guard = SERIALIZE.lock().unwrap();
|
||||
|
||||
@@ -5,14 +5,14 @@ use anyhow::Context;
|
||||
use async_compression::tokio::write::GzipEncoder;
|
||||
use bytes::Buf;
|
||||
use futures::FutureExt;
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_api::models::TenantState;
|
||||
use once_cell::sync::{Lazy, OnceCell};
|
||||
use pageserver_api::models::{self, TenantState};
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
||||
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
||||
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
|
||||
PagestreamGetSlruSegmentRequest, PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest,
|
||||
PagestreamNblocksResponse, PagestreamProtocolVersion,
|
||||
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,
|
||||
PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
|
||||
PagestreamProtocolVersion,
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use postgres_backend::{is_expected_io_error, AuthType, PostgresBackend, QueryError};
|
||||
@@ -43,7 +43,7 @@ use crate::basebackup;
|
||||
use crate::basebackup::BasebackupError;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::metrics;
|
||||
use crate::metrics::{self, CONSECUTIVE_NONBLOCKING_GETPAGE_REQUESTS_HISTOGRAM};
|
||||
use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS};
|
||||
use crate::pgdatadir_mapping::Version;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
@@ -58,7 +58,7 @@ use crate::tenant::GetTimelineError;
|
||||
use crate::tenant::PageReconstructError;
|
||||
use crate::tenant::Timeline;
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::reltag::SlruKind;
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
@@ -577,124 +577,326 @@ impl PageServerHandler {
|
||||
}
|
||||
}
|
||||
|
||||
loop {
|
||||
// read request bytes (it's exactly 1 PagestreamFeMessage per CopyData)
|
||||
let msg = tokio::select! {
|
||||
biased;
|
||||
_ = self.cancel.cancelled() => {
|
||||
return Err(QueryError::Shutdown)
|
||||
}
|
||||
msg = pgb.read_message() => { msg }
|
||||
};
|
||||
let copy_data_bytes = match msg? {
|
||||
Some(FeMessage::CopyData(bytes)) => bytes,
|
||||
Some(FeMessage::Terminate) => break,
|
||||
Some(m) => {
|
||||
return Err(QueryError::Other(anyhow::anyhow!(
|
||||
"unexpected message: {m:?} during COPY"
|
||||
)));
|
||||
}
|
||||
None => break, // client disconnected
|
||||
};
|
||||
let mut batched = None;
|
||||
'outer: loop {
|
||||
enum DebouncedFeMessage {
|
||||
Exists(models::PagestreamExistsRequest),
|
||||
Nblocks(models::PagestreamNblocksRequest),
|
||||
GetPage {
|
||||
span: Span,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
effective_request_lsn: Lsn,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
|
||||
},
|
||||
DbSize(models::PagestreamDbSizeRequest),
|
||||
GetSlruSegment(models::PagestreamGetSlruSegmentRequest),
|
||||
RespondError(Span, PageStreamError),
|
||||
}
|
||||
let mut debounce: Option<std::time::Instant> = None;
|
||||
// return or `?` on protocol error
|
||||
// `break EXPR` to stop batching. The EXPR will be the first message in the next batch.
|
||||
let next_batched: Option<DebouncedFeMessage> = loop {
|
||||
static BOUNCE_TIMEOUT: Lazy<Duration> = Lazy::new(|| {
|
||||
utils::env::var::<humantime::Duration, _>("NEON_PAGESERVER_DEBOUNCE")
|
||||
.unwrap()
|
||||
.into()
|
||||
});
|
||||
let sleep_fut = if let Some(started_at) = debounce {
|
||||
futures::future::Either::Left(tokio::time::sleep_until(
|
||||
(started_at + *BOUNCE_TIMEOUT).into(),
|
||||
))
|
||||
} else {
|
||||
futures::future::Either::Right(futures::future::pending())
|
||||
};
|
||||
let msg = tokio::select! {
|
||||
biased;
|
||||
_ = self.cancel.cancelled() => {
|
||||
return Err(QueryError::Shutdown)
|
||||
}
|
||||
msg = pgb.read_message() => {
|
||||
msg
|
||||
}
|
||||
_ = sleep_fut => {
|
||||
assert!(batched.is_some());
|
||||
break None;
|
||||
}
|
||||
};
|
||||
let copy_data_bytes = match msg? {
|
||||
Some(FeMessage::CopyData(bytes)) => bytes,
|
||||
Some(FeMessage::Terminate) => break 'outer,
|
||||
Some(m) => {
|
||||
return Err(QueryError::Other(anyhow::anyhow!(
|
||||
"unexpected message: {m:?} during COPY"
|
||||
)));
|
||||
}
|
||||
None => break 'outer, // client disconnected
|
||||
};
|
||||
trace!("query: {copy_data_bytes:?}");
|
||||
fail::fail_point!("ps::handle-pagerequest-message");
|
||||
|
||||
trace!("query: {copy_data_bytes:?}");
|
||||
fail::fail_point!("ps::handle-pagerequest-message");
|
||||
// parse request
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
|
||||
|
||||
// parse request
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
|
||||
let this_msg = match neon_fe_msg {
|
||||
PagestreamFeMessage::Exists(msg) => DebouncedFeMessage::Exists(msg),
|
||||
PagestreamFeMessage::Nblocks(msg) => DebouncedFeMessage::Nblocks(msg),
|
||||
PagestreamFeMessage::DbSize(msg) => DebouncedFeMessage::DbSize(msg),
|
||||
PagestreamFeMessage::GetSlruSegment(msg) => {
|
||||
DebouncedFeMessage::GetSlruSegment(msg)
|
||||
}
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
rel,
|
||||
blkno,
|
||||
}) => {
|
||||
let span = tracing::info_span!("handle_get_page_at_lsn_request_batched", %tenant_id, %timeline_id, shard_id = tracing::field::Empty, req_lsn = %request_lsn, batch_size = tracing::field::Empty, batch_id = tracing::field::Empty);
|
||||
let key = rel_block_to_key(rel, blkno);
|
||||
let shard = match self
|
||||
.timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Page(key))
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
{
|
||||
Ok(tl) => tl,
|
||||
Err(GetActiveTimelineError::Tenant(
|
||||
GetActiveTenantError::NotFound(_),
|
||||
)) => {
|
||||
// We already know this tenant exists in general, because we resolved it at
|
||||
// start of connection. Getting a NotFound here indicates that the shard containing
|
||||
// the requested page is not present on this node: the client's knowledge of shard->pageserver
|
||||
// mapping is out of date.
|
||||
//
|
||||
// Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via
|
||||
// client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration
|
||||
// and talk to a different pageserver.
|
||||
break Some(DebouncedFeMessage::RespondError(
|
||||
span,
|
||||
PageStreamError::Reconnect(
|
||||
"getpage@lsn request routed to wrong shard".into(),
|
||||
),
|
||||
));
|
||||
}
|
||||
Err(e) => break Some(DebouncedFeMessage::RespondError(span, e.into())),
|
||||
};
|
||||
let effective_request_lsn = match Self::wait_or_get_last_lsn(
|
||||
&shard,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
&shard.get_latest_gc_cutoff_lsn(),
|
||||
&ctx,
|
||||
)
|
||||
// TODO: if we actually need to wait for lsn here, it delays the entire batch which doesn't need to wait
|
||||
.await
|
||||
{
|
||||
Ok(lsn) => lsn,
|
||||
Err(e) => {
|
||||
break Some(DebouncedFeMessage::RespondError(span, e));
|
||||
}
|
||||
};
|
||||
DebouncedFeMessage::GetPage {
|
||||
span,
|
||||
shard,
|
||||
effective_request_lsn,
|
||||
pages: smallvec::smallvec![(rel, blkno)],
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// check if we can debounce
|
||||
match (&mut batched, this_msg) {
|
||||
(None, this_msg) => {
|
||||
batched = Some(this_msg);
|
||||
}
|
||||
(
|
||||
Some(DebouncedFeMessage::GetPage {
|
||||
span: _,
|
||||
shard: accum_shard,
|
||||
pages: accum_pages,
|
||||
effective_request_lsn: accum_lsn,
|
||||
}),
|
||||
DebouncedFeMessage::GetPage {
|
||||
span: _,
|
||||
shard: this_shard,
|
||||
pages: this_pages,
|
||||
effective_request_lsn: this_lsn,
|
||||
},
|
||||
) if async {
|
||||
assert_eq!(this_pages.len(), 1);
|
||||
if accum_pages.len() >= Timeline::MAX_GET_VECTORED_KEYS as usize {
|
||||
assert_eq!(accum_pages.len(), Timeline::MAX_GET_VECTORED_KEYS as usize);
|
||||
return false;
|
||||
}
|
||||
if (accum_shard.tenant_shard_id, accum_shard.timeline_id)
|
||||
!= (this_shard.tenant_shard_id, this_shard.timeline_id)
|
||||
{
|
||||
// TODO: we _could_ batch & execute each shard seperately (and in parallel).
|
||||
// But the current logig for keeping responses in order does not support that.
|
||||
return false;
|
||||
}
|
||||
// the vectored get currently only supports a single LSN, so, bounce as soon
|
||||
// as the effective request_lsn changes
|
||||
if (*accum_lsn != this_lsn) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
.await =>
|
||||
{
|
||||
// ok to batch
|
||||
accum_pages.extend(this_pages);
|
||||
}
|
||||
(Some(_), this_msg) => {
|
||||
// by default, don't continue batching
|
||||
break Some(this_msg);
|
||||
}
|
||||
}
|
||||
|
||||
// debounce impl piece
|
||||
let started_at = debounce.get_or_insert_with(Instant::now);
|
||||
if started_at.elapsed() > *BOUNCE_TIMEOUT {
|
||||
break None;
|
||||
}
|
||||
};
|
||||
|
||||
// invoke handler function
|
||||
let (handler_result, span) = match neon_fe_msg {
|
||||
PagestreamFeMessage::Exists(req) => {
|
||||
let (handler_results, span): (
|
||||
smallvec::SmallVec<[Result<PagestreamBeMessage, PageStreamError>; 1]>,
|
||||
_,
|
||||
) = match batched.take().expect("loop above ensures this") {
|
||||
DebouncedFeMessage::Exists(req) => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::exists");
|
||||
let span = tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn);
|
||||
(
|
||||
self.handle_get_rel_exists_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
smallvec::smallvec![
|
||||
self.handle_get_rel_exists_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
],
|
||||
span,
|
||||
)
|
||||
}
|
||||
PagestreamFeMessage::Nblocks(req) => {
|
||||
DebouncedFeMessage::Nblocks(req) => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::nblocks");
|
||||
let span = tracing::info_span!("handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn);
|
||||
(
|
||||
self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
smallvec::smallvec![
|
||||
self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
],
|
||||
span,
|
||||
)
|
||||
}
|
||||
PagestreamFeMessage::GetPage(req) => {
|
||||
DebouncedFeMessage::GetPage {
|
||||
span,
|
||||
shard,
|
||||
effective_request_lsn,
|
||||
pages,
|
||||
} => {
|
||||
CONSECUTIVE_NONBLOCKING_GETPAGE_REQUESTS_HISTOGRAM.observe(pages.len() as f64);
|
||||
span.record("batch_size", pages.len() as u64);
|
||||
static BATCH_ID: Lazy<std::sync::atomic::AtomicUsize> =
|
||||
Lazy::new(|| std::sync::atomic::AtomicUsize::new(0));
|
||||
span.record(
|
||||
"batch_id",
|
||||
BATCH_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed) as u64,
|
||||
);
|
||||
fail::fail_point!("ps::handle-pagerequest-message::getpage");
|
||||
// shard_id is filled in by the handler
|
||||
let span = tracing::info_span!("handle_get_page_at_lsn_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.request_lsn);
|
||||
(
|
||||
self.handle_get_page_at_lsn_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
{
|
||||
let npages = pages.len();
|
||||
let res = self
|
||||
.handle_get_page_at_lsn_request_batched(
|
||||
&shard,
|
||||
effective_request_lsn,
|
||||
pages,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(span.clone())
|
||||
.await;
|
||||
assert_eq!(res.len(), npages);
|
||||
res
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
PagestreamFeMessage::DbSize(req) => {
|
||||
DebouncedFeMessage::DbSize(req) => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::dbsize");
|
||||
let span = tracing::info_span!("handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn);
|
||||
(
|
||||
self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
smallvec::smallvec![
|
||||
self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
],
|
||||
span,
|
||||
)
|
||||
}
|
||||
PagestreamFeMessage::GetSlruSegment(req) => {
|
||||
DebouncedFeMessage::GetSlruSegment(req) => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
|
||||
let span = tracing::info_span!("handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn);
|
||||
(
|
||||
self.handle_get_slru_segment_request(tenant_id, timeline_id, &req, &ctx)
|
||||
smallvec::smallvec![
|
||||
self.handle_get_slru_segment_request(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
&req,
|
||||
&ctx
|
||||
)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
.await
|
||||
],
|
||||
span,
|
||||
)
|
||||
}
|
||||
DebouncedFeMessage::RespondError(span, e) => {
|
||||
// We've already decided to respond with an error, so we don't need to
|
||||
// call the handler.
|
||||
(smallvec::smallvec![Err(e)], span)
|
||||
}
|
||||
};
|
||||
|
||||
// Map handler result to protocol behavior.
|
||||
// Some handler errors cause exit from pagestream protocol.
|
||||
// Other handler errors are sent back as an error message and we stay in pagestream protocol.
|
||||
let response_msg = match handler_result {
|
||||
Err(e) => match &e {
|
||||
PageStreamError::Shutdown => {
|
||||
// If we fail to fulfil a request during shutdown, which may be _because_ of
|
||||
// shutdown, then do not send the error to the client. Instead just drop the
|
||||
// connection.
|
||||
span.in_scope(|| info!("dropping connection due to shutdown"));
|
||||
return Err(QueryError::Shutdown);
|
||||
}
|
||||
PageStreamError::Reconnect(reason) => {
|
||||
span.in_scope(|| info!("handler requested reconnect: {reason}"));
|
||||
return Err(QueryError::Reconnect);
|
||||
}
|
||||
PageStreamError::Read(_)
|
||||
| PageStreamError::LsnTimeout(_)
|
||||
| PageStreamError::NotFound(_)
|
||||
| PageStreamError::BadRequest(_) => {
|
||||
// print the all details to the log with {:#}, but for the client the
|
||||
// error message is enough. Do not log if shutting down, as the anyhow::Error
|
||||
// here includes cancellation which is not an error.
|
||||
let full = utils::error::report_compact_sources(&e);
|
||||
span.in_scope(|| {
|
||||
error!("error reading relation or page version: {full:#}")
|
||||
});
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
}
|
||||
},
|
||||
Ok(response_msg) => response_msg,
|
||||
};
|
||||
for handler_result in handler_results {
|
||||
let response_msg = match handler_result {
|
||||
Err(e) => match &e {
|
||||
PageStreamError::Shutdown => {
|
||||
// If we fail to fulfil a request during shutdown, which may be _because_ of
|
||||
// shutdown, then do not send the error to the client. Instead just drop the
|
||||
// connection.
|
||||
span.in_scope(|| info!("dropping connection due to shutdown"));
|
||||
return Err(QueryError::Shutdown);
|
||||
}
|
||||
PageStreamError::Reconnect(reason) => {
|
||||
span.in_scope(|| info!("handler requested reconnect: {reason}"));
|
||||
return Err(QueryError::Reconnect);
|
||||
}
|
||||
PageStreamError::Read(_)
|
||||
| PageStreamError::LsnTimeout(_)
|
||||
| PageStreamError::NotFound(_)
|
||||
| PageStreamError::BadRequest(_) => {
|
||||
// print the all details to the log with {:#}, but for the client the
|
||||
// error message is enough. Do not log if shutting down, as the anyhow::Error
|
||||
// here includes cancellation which is not an error.
|
||||
let full = utils::error::report_compact_sources(&e);
|
||||
span.in_scope(|| {
|
||||
error!("error reading relation or page version: {full:#}")
|
||||
});
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
}
|
||||
},
|
||||
Ok(response_msg) => response_msg,
|
||||
};
|
||||
|
||||
// marshal & transmit response message
|
||||
pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
|
||||
// marshal & transmit response message
|
||||
pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
|
||||
}
|
||||
tokio::select! {
|
||||
biased;
|
||||
_ = self.cancel.cancelled() => {
|
||||
@@ -706,6 +908,9 @@ impl PageServerHandler {
|
||||
res?;
|
||||
}
|
||||
}
|
||||
|
||||
assert!(batched.is_none(), "we take() earlier");
|
||||
batched = next_batched;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -949,60 +1154,30 @@ impl PageServerHandler {
|
||||
}))
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(shard_id))]
|
||||
async fn handle_get_page_at_lsn_request(
|
||||
#[instrument(skip_all)]
|
||||
async fn handle_get_page_at_lsn_request_batched(
|
||||
&mut self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
req: &PagestreamGetPageRequest,
|
||||
timeline: &Timeline,
|
||||
effective_lsn: Lsn,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<PagestreamBeMessage, PageStreamError> {
|
||||
let timeline = match self
|
||||
.timeline_handles
|
||||
.get(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
ShardSelector::Page(rel_block_to_key(req.rel, req.blkno)),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(tl) => tl,
|
||||
Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => {
|
||||
// We already know this tenant exists in general, because we resolved it at
|
||||
// start of connection. Getting a NotFound here indicates that the shard containing
|
||||
// the requested page is not present on this node: the client's knowledge of shard->pageserver
|
||||
// mapping is out of date.
|
||||
//
|
||||
// Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via
|
||||
// client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration
|
||||
// and talk to a different pageserver.
|
||||
return Err(PageStreamError::Reconnect(
|
||||
"getpage@lsn request routed to wrong shard".into(),
|
||||
));
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
let _timer = timeline
|
||||
.query_metrics
|
||||
.start_timer(metrics::SmgrQueryType::GetPageAtLsn, ctx);
|
||||
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
&timeline,
|
||||
req.request_lsn,
|
||||
req.not_modified_since,
|
||||
&latest_gc_cutoff_lsn,
|
||||
) -> smallvec::SmallVec<[Result<PagestreamBeMessage, PageStreamError>; 1]> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
let _timer = timeline.query_metrics.start_timer_many(
|
||||
metrics::SmgrQueryType::GetPageAtLsn,
|
||||
pages.len(),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
);
|
||||
|
||||
let page = timeline
|
||||
.get_rel_page_at_lsn(req.rel, req.blkno, Version::Lsn(lsn), ctx)
|
||||
.await?;
|
||||
let pages = timeline
|
||||
.get_rel_page_at_lsn_batched(pages, Version::Lsn(effective_lsn), ctx)
|
||||
.await;
|
||||
|
||||
Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
|
||||
page,
|
||||
smallvec::SmallVec::from_iter(pages.into_iter().map(|page| {
|
||||
page.map(|page| {
|
||||
PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse { page })
|
||||
})
|
||||
.map_err(PageStreamError::Read)
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -1499,3 +1674,10 @@ fn set_tracing_field_shard_id(timeline: &Timeline) {
|
||||
);
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
}
|
||||
|
||||
struct WaitedForLsn(Lsn);
|
||||
impl From<WaitedForLsn> for Lsn {
|
||||
fn from(WaitedForLsn(lsn): WaitedForLsn) -> Self {
|
||||
lsn
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,12 +9,17 @@
|
||||
use super::tenant::{PageReconstructError, Timeline};
|
||||
use crate::context::RequestContext;
|
||||
use crate::keyspace::{KeySpace, KeySpaceAccum};
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
|
||||
use crate::span::{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id,
|
||||
debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id,
|
||||
};
|
||||
use crate::tenant::timeline::GetVectoredError;
|
||||
use crate::walrecord::NeonWalRecord;
|
||||
use crate::{aux_file, repository::*};
|
||||
use anyhow::{ensure, Context};
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use enum_map::Enum;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::key::{
|
||||
dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key,
|
||||
relmap_file_key, repl_origin_key, repl_origin_key_range, slru_block_to_key, slru_dir_to_key,
|
||||
@@ -28,7 +33,7 @@ use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use postgres_ffi::{Oid, RepOriginId, TimestampTz, TransactionId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{hash_map, HashMap, HashSet};
|
||||
use std::collections::{hash_map, BTreeMap, HashMap, HashSet};
|
||||
use std::ops::ControlFlow;
|
||||
use std::ops::Range;
|
||||
use strum::IntoEnumIterator;
|
||||
@@ -191,26 +196,184 @@ impl Timeline {
|
||||
version: Version<'_>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Bytes, PageReconstructError> {
|
||||
if tag.relnode == 0 {
|
||||
return Err(PageReconstructError::Other(
|
||||
RelationError::InvalidRelnode.into(),
|
||||
));
|
||||
}
|
||||
let pages = smallvec::smallvec![(tag, blknum)];
|
||||
let res = self.get_rel_page_at_lsn_batched(pages, version, ctx).await;
|
||||
assert_eq!(res.len(), 1);
|
||||
res.into_iter().next().unwrap()
|
||||
}
|
||||
|
||||
let nblocks = self.get_rel_size(tag, version, ctx).await?;
|
||||
if blknum >= nblocks {
|
||||
debug!(
|
||||
"read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page",
|
||||
tag,
|
||||
blknum,
|
||||
version.get_lsn(),
|
||||
nblocks
|
||||
);
|
||||
return Ok(ZERO_PAGE.clone());
|
||||
/// Like [`get_rel_page_at_lsn`], but returns a batch of pages.
|
||||
pub(crate) async fn get_rel_page_at_lsn_batched(
|
||||
&self,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
|
||||
version: Version<'_>,
|
||||
ctx: &RequestContext,
|
||||
) -> smallvec::SmallVec<[Result<Bytes, PageReconstructError>; 1]> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
let request_lsn = match version {
|
||||
Version::Lsn(lsn) => lsn,
|
||||
Version::Modified(_) => panic!("unsupported"),
|
||||
};
|
||||
enum KeyState {
|
||||
NeedsVectoredGet,
|
||||
Done(Result<Bytes, PageReconstructError>),
|
||||
}
|
||||
let mut key_states = BTreeMap::new();
|
||||
let mut vectored_gets: smallvec::SmallVec<[_; 1]> =
|
||||
smallvec::SmallVec::with_capacity(pages.len());
|
||||
for (response_order, (tag, blknum)) in pages.into_iter().enumerate() {
|
||||
let key = rel_block_to_key(tag, blknum);
|
||||
use std::collections::btree_map::Entry;
|
||||
let key_state_slot = match key_states.entry((key, response_order)) {
|
||||
Entry::Occupied(_entry) => unreachable!(
|
||||
"enumerate makes keys unique, even if batch contains same key twice"
|
||||
),
|
||||
Entry::Vacant(entry) => entry,
|
||||
};
|
||||
|
||||
let key = rel_block_to_key(tag, blknum);
|
||||
version.get(self, key, ctx).await
|
||||
if tag.relnode == 0 {
|
||||
key_state_slot.insert(KeyState::Done(Err(PageReconstructError::Other(
|
||||
RelationError::InvalidRelnode.into(),
|
||||
))));
|
||||
continue;
|
||||
}
|
||||
|
||||
let nblocks = match self.get_rel_size(tag, version, ctx).await {
|
||||
Ok(nblocks) => nblocks,
|
||||
Err(err) => {
|
||||
key_state_slot.insert(KeyState::Done(Err(err)));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if blknum >= nblocks {
|
||||
debug!(
|
||||
"read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page",
|
||||
tag,
|
||||
blknum,
|
||||
version.get_lsn(),
|
||||
nblocks
|
||||
);
|
||||
key_state_slot.insert(KeyState::Done(Ok(ZERO_PAGE.clone())));
|
||||
continue;
|
||||
}
|
||||
|
||||
vectored_gets.push(key);
|
||||
key_state_slot.insert(KeyState::NeedsVectoredGet);
|
||||
}
|
||||
// turn vectored_gets into a keyspace
|
||||
let keyspace = {
|
||||
// add_key reuqires monotonicity
|
||||
vectored_gets.sort_unstable();
|
||||
let mut acc = KeySpaceAccum::new();
|
||||
for key in vectored_gets
|
||||
.into_iter()
|
||||
// in fact it requires strong monotonicity
|
||||
.dedup()
|
||||
{
|
||||
acc.add_key(key);
|
||||
}
|
||||
acc.to_keyspace()
|
||||
};
|
||||
|
||||
match self.get_vectored(keyspace, request_lsn, ctx).await {
|
||||
Ok(results) => {
|
||||
for (key, res) in results {
|
||||
if let Err(err) = &res {
|
||||
warn!(%key, ?err, "a key inside get_vectored failed with a per-key error");
|
||||
}
|
||||
let mut interests = key_states.range_mut((key, 0)..(key.next(), 0)).peekable();
|
||||
let first_interest = interests.next().unwrap();
|
||||
let next_interest = interests.peek().is_some();
|
||||
if !next_interest {
|
||||
match first_interest.1 {
|
||||
KeyState::NeedsVectoredGet => {
|
||||
*first_interest.1 = KeyState::Done(res);
|
||||
}
|
||||
KeyState::Done(_) => unreachable!(),
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
for ((_, _), state) in [first_interest].into_iter().chain(interests) {
|
||||
match state {
|
||||
KeyState::NeedsVectoredGet => {
|
||||
*state = KeyState::Done(match &res {
|
||||
Ok(buf) => Ok(buf.clone()),
|
||||
// this `match` is working around the fact that we cannot Clone the PageReconstructError
|
||||
Err(err) => Err(match err {
|
||||
PageReconstructError::Cancelled => {
|
||||
PageReconstructError::Cancelled
|
||||
}
|
||||
|
||||
x @ PageReconstructError::Other(_) |
|
||||
x @ PageReconstructError::AncestorLsnTimeout(_) |
|
||||
x @ PageReconstructError::WalRedo(_) |
|
||||
x @ PageReconstructError::MissingKey(_) => {
|
||||
PageReconstructError::Other(anyhow::anyhow!("there was more than one request for this key in the batch, error logged once: {x:?}"))
|
||||
},
|
||||
}),
|
||||
});
|
||||
}
|
||||
KeyState::Done(_) => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(?err, "get_vectored failed with a global error, mapping that error to per-key failure");
|
||||
// this cannot really happen because get_vectored only errors globally on invalid LSN or too large batch size
|
||||
for ((_, _), state) in key_states.iter_mut() {
|
||||
// this whole `match` is a lot like `From<GetVectoredError> for PageReconstructError`
|
||||
// but without taking ownership of the GetVectoredError
|
||||
match &err {
|
||||
GetVectoredError::Cancelled => {
|
||||
*state = KeyState::Done(Err(PageReconstructError::Cancelled));
|
||||
}
|
||||
// TODO: restructure get_vectored API to make this error per-key
|
||||
GetVectoredError::MissingKey(err) => {
|
||||
*state = KeyState::Done(Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more of the requested keys were missing: {err:?}"))));
|
||||
}
|
||||
// TODO: restructure get_vectored API to make this error per-key
|
||||
GetVectoredError::GetReadyAncestorError(err) => {
|
||||
*state = KeyState::Done(Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}"))));
|
||||
}
|
||||
// TODO: restructure get_vectored API to make this error per-key
|
||||
GetVectoredError::Other(err) => {
|
||||
*state = KeyState::Done(Err(PageReconstructError::Other(
|
||||
anyhow::anyhow!("whole vectored get request failed: {err:?}"),
|
||||
)));
|
||||
}
|
||||
// TODO: we can prevent this error class by moving this check into the type system
|
||||
GetVectoredError::InvalidLsn(e) => {
|
||||
*state =
|
||||
KeyState::Done(Err(anyhow::anyhow!("invalid LSN: {e:?}").into()));
|
||||
}
|
||||
// NB: this should never happen in practice because we limit MAX_GET_VECTORED_KEYS
|
||||
// TODO: we can prevent this error class by moving this check into the type system
|
||||
GetVectoredError::Oversized(err) => {
|
||||
*state = KeyState::Done(Err(anyhow::anyhow!(
|
||||
"batching oversized: {err:?}"
|
||||
)
|
||||
.into()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// get the results into the order in which they were requested
|
||||
let mut return_order: smallvec::SmallVec<[_; Timeline::MAX_GET_VECTORED_KEYS as usize]> =
|
||||
smallvec::SmallVec::with_capacity(key_states.len());
|
||||
return_order.extend(key_states.keys().map(|(key, idx)| (*key, *idx)));
|
||||
return_order.sort_unstable_by_key(|(_, idx)| *idx);
|
||||
let mut res = smallvec::SmallVec::with_capacity(key_states.len());
|
||||
res.extend(return_order.into_iter().map(|key_states_key| {
|
||||
match key_states.remove(&key_states_key).unwrap() {
|
||||
KeyState::Done(res) => res,
|
||||
KeyState::NeedsVectoredGet => unreachable!(),
|
||||
}
|
||||
}));
|
||||
res
|
||||
}
|
||||
|
||||
// Get size of a database in blocks
|
||||
|
||||
@@ -73,6 +73,21 @@ impl ValueBytes {
|
||||
|
||||
Ok(raw[8] == 1)
|
||||
}
|
||||
|
||||
pub(crate) fn is_image(raw: &[u8]) -> Result<bool, InvalidInput> {
|
||||
if raw.len() < 12 {
|
||||
return Err(InvalidInput::TooShortValue);
|
||||
}
|
||||
|
||||
let value_discriminator = &raw[0..4];
|
||||
|
||||
if value_discriminator == [0, 0, 0, 0] {
|
||||
// Value::Image always initializes
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -7091,13 +7091,13 @@ mod tests {
|
||||
vec![
|
||||
// Image layer at GC horizon
|
||||
PersistentLayerKey {
|
||||
key_range: Key::MIN..Key::MAX,
|
||||
key_range: Key::MIN..Key::NON_L0_MAX,
|
||||
lsn_range: Lsn(0x30)..Lsn(0x31),
|
||||
is_delta: false
|
||||
},
|
||||
// The delta layer below the horizon
|
||||
// The delta layer covers the full range (with the layer key hack to avoid being recognized as L0)
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(3)..get_key(4),
|
||||
key_range: Key::MIN..Key::NON_L0_MAX,
|
||||
lsn_range: Lsn(0x30)..Lsn(0x48),
|
||||
is_delta: true
|
||||
},
|
||||
|
||||
@@ -452,8 +452,7 @@ impl TryFrom<toml_edit::Item> for TenantConfOpt {
|
||||
.map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
|
||||
}
|
||||
toml_edit::Item::Table(table) => {
|
||||
let deserializer =
|
||||
toml_edit::de::Deserializer::from(toml_edit::DocumentMut::from(table));
|
||||
let deserializer = toml_edit::de::Deserializer::new(table.into());
|
||||
return serde_path_to_error::deserialize(deserializer)
|
||||
.map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
|
||||
}
|
||||
|
||||
@@ -8,15 +8,18 @@ mod layer_desc;
|
||||
mod layer_name;
|
||||
pub mod merge_iterator;
|
||||
|
||||
use tokio::sync::{self};
|
||||
use tracing::{debug, Instrument};
|
||||
use utils::bin_ser::BeSer;
|
||||
pub mod split_writer;
|
||||
|
||||
use crate::context::{AccessStatsBehavior, RequestContext};
|
||||
use crate::repository::Value;
|
||||
use crate::repository::{Value, ValueBytes};
|
||||
use crate::walrecord::NeonWalRecord;
|
||||
use bytes::Bytes;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::key::{Key, DBDIR_KEY};
|
||||
use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
|
||||
use std::cmp::{Ordering, Reverse};
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{BinaryHeap, HashMap};
|
||||
use std::ops::Range;
|
||||
@@ -79,30 +82,57 @@ pub(crate) enum ValueReconstructSituation {
|
||||
}
|
||||
|
||||
/// Reconstruct data accumulated for a single key during a vectored get
|
||||
#[derive(Debug, Default, Clone)]
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct VectoredValueReconstructState {
|
||||
pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
|
||||
pub(crate) img: Option<(Lsn, Bytes)>,
|
||||
pub(crate) records: Vec<(
|
||||
Lsn,
|
||||
tokio::sync::oneshot::Receiver<Result<Bytes, std::io::Error>>,
|
||||
)>,
|
||||
pub(crate) will_init_lsn: Option<Lsn>,
|
||||
|
||||
situation: ValueReconstructSituation,
|
||||
pub(crate) situation: ValueReconstructSituation,
|
||||
}
|
||||
|
||||
impl VectoredValueReconstructState {
|
||||
fn get_cached_lsn(&self) -> Option<Lsn> {
|
||||
self.img.as_ref().map(|img| img.0)
|
||||
self.will_init_lsn
|
||||
}
|
||||
}
|
||||
|
||||
impl From<VectoredValueReconstructState> for ValueReconstructState {
|
||||
fn from(mut state: VectoredValueReconstructState) -> Self {
|
||||
// walredo expects the records to be descending in terms of Lsn
|
||||
state.records.sort_by_key(|(lsn, _)| Reverse(*lsn));
|
||||
pub(crate) async fn convert(
|
||||
_key: Key,
|
||||
from: VectoredValueReconstructState,
|
||||
) -> Result<ValueReconstructState, PageReconstructError> {
|
||||
let mut to = ValueReconstructState::default();
|
||||
|
||||
ValueReconstructState {
|
||||
records: state.records,
|
||||
img: state.img,
|
||||
for (lsn, fut) in from.records {
|
||||
match fut.await {
|
||||
Ok(res) => match res {
|
||||
Ok(bytes) => {
|
||||
let value = Value::des(&bytes)
|
||||
.map_err(|err| PageReconstructError::Other(err.into()))?;
|
||||
|
||||
match value {
|
||||
Value::WalRecord(rec) => {
|
||||
to.records.push((lsn, rec));
|
||||
}
|
||||
Value::Image(img) => {
|
||||
assert!(to.img.is_none());
|
||||
to.img = Some((lsn, img));
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
return Err(PageReconstructError::Other(err.into()));
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
return Err(PageReconstructError::Other(err.into()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(to)
|
||||
}
|
||||
|
||||
/// Bag of data accumulated during a vectored get..
|
||||
@@ -119,6 +149,47 @@ pub(crate) struct ValuesReconstructState {
|
||||
// Statistics that are still accessible as a caller of `get_vectored_impl`.
|
||||
layers_visited: u32,
|
||||
delta_layers_visited: u32,
|
||||
|
||||
io_concurrency: IoConcurrency,
|
||||
}
|
||||
|
||||
enum IoConcurrency {
|
||||
Serial {
|
||||
prev_io: Option<(usize, tokio::task::JoinHandle<()>)>,
|
||||
},
|
||||
Parallel,
|
||||
}
|
||||
|
||||
impl IoConcurrency {
|
||||
pub(crate) fn spawn_io<F>(&mut self, fut: F)
|
||||
where
|
||||
F: std::future::Future<Output = ()> + Send + 'static,
|
||||
{
|
||||
static IO_COUNTER: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0);
|
||||
let io_id = IO_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
let span = tracing::debug_span!("spawned_io", io_id,);
|
||||
match self {
|
||||
IoConcurrency::Serial { prev_io } => {
|
||||
let prev = prev_io.take();
|
||||
*prev_io = Some((
|
||||
io_id,
|
||||
tokio::spawn(
|
||||
async move {
|
||||
if let Some((prev_id, prev_task)) = prev {
|
||||
debug!(prev_io = prev_id, "Waiting for previous IO to complete");
|
||||
prev_task.await.unwrap();
|
||||
}
|
||||
fut.await;
|
||||
}
|
||||
.instrument(span),
|
||||
),
|
||||
));
|
||||
}
|
||||
IoConcurrency::Parallel => {
|
||||
tokio::spawn(fut);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ValuesReconstructState {
|
||||
@@ -129,9 +200,30 @@ impl ValuesReconstructState {
|
||||
keys_with_image_coverage: None,
|
||||
layers_visited: 0,
|
||||
delta_layers_visited: 0,
|
||||
io_concurrency: {
|
||||
static IO_CONCURRENCY: once_cell::sync::Lazy<String> =
|
||||
once_cell::sync::Lazy::new(|| {
|
||||
std::env::var("NEON_PAGESERVER_VALUE_RECONSTRUCT_IO_CONCURRENCY").unwrap()
|
||||
});
|
||||
match IO_CONCURRENCY.as_str() {
|
||||
"parallel" => IoConcurrency::Parallel,
|
||||
"serial" => IoConcurrency::Serial { prev_io: None },
|
||||
x => panic!(
|
||||
"Invalid value for NEON_PAGESERVER_VALUE_RECONSTRUCT_IO_CONCURRENCY: {}",
|
||||
x
|
||||
),
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn spawn_io<F>(&mut self, fut: F)
|
||||
where
|
||||
F: std::future::Future<Output = ()> + Send + 'static,
|
||||
{
|
||||
self.io_concurrency.spawn_io(fut);
|
||||
}
|
||||
|
||||
/// Associate a key with the error which it encountered and mark it as done
|
||||
pub(crate) fn on_key_error(&mut self, key: Key, err: PageReconstructError) {
|
||||
let previous = self.keys.insert(key, Err(err));
|
||||
@@ -200,7 +292,8 @@ impl ValuesReconstructState {
|
||||
&mut self,
|
||||
key: &Key,
|
||||
lsn: Lsn,
|
||||
value: Value,
|
||||
completes: bool,
|
||||
value: sync::oneshot::Receiver<Result<Bytes, std::io::Error>>,
|
||||
) -> ValueReconstructSituation {
|
||||
let state = self
|
||||
.keys
|
||||
@@ -208,31 +301,16 @@ impl ValuesReconstructState {
|
||||
.or_insert(Ok(VectoredValueReconstructState::default()));
|
||||
|
||||
if let Ok(state) = state {
|
||||
let key_done = match state.situation {
|
||||
match state.situation {
|
||||
ValueReconstructSituation::Complete => unreachable!(),
|
||||
ValueReconstructSituation::Continue => match value {
|
||||
Value::Image(img) => {
|
||||
state.img = Some((lsn, img));
|
||||
true
|
||||
}
|
||||
Value::WalRecord(rec) => {
|
||||
debug_assert!(
|
||||
Some(lsn) > state.get_cached_lsn(),
|
||||
"Attempt to collect a record below cached LSN for walredo: {} < {}",
|
||||
lsn,
|
||||
state
|
||||
.get_cached_lsn()
|
||||
.expect("Assertion can only fire if a cached lsn is present")
|
||||
);
|
||||
ValueReconstructSituation::Continue => {
|
||||
state.records.push((lsn, value));
|
||||
}
|
||||
}
|
||||
|
||||
let will_init = rec.will_init();
|
||||
state.records.push((lsn, rec));
|
||||
will_init
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
if key_done && state.situation == ValueReconstructSituation::Continue {
|
||||
if completes && state.situation == ValueReconstructSituation::Continue {
|
||||
assert_eq!(state.will_init_lsn, None);
|
||||
state.will_init_lsn = Some(lsn);
|
||||
state.situation = ValueReconstructSituation::Complete;
|
||||
self.keys_done.add_key(*key);
|
||||
}
|
||||
@@ -279,7 +357,7 @@ pub(crate) enum LayerId {
|
||||
/// Layer wrapper for the read path. Note that it is valid
|
||||
/// to use these layers even after external operations have
|
||||
/// been performed on them (compaction, freeze, etc.).
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) enum ReadableLayer {
|
||||
PersistentLayer(Layer),
|
||||
InMemoryLayer(Arc<InMemoryLayer>),
|
||||
@@ -292,6 +370,8 @@ struct ReadDesc {
|
||||
layer_id: LayerId,
|
||||
/// Lsn range for the read, used for selecting the next read
|
||||
lsn_range: Range<Lsn>,
|
||||
/// This read's index in [`LayerKeyspace::target_keyspace`];
|
||||
read_id: LayerKeyspaceReadId,
|
||||
}
|
||||
|
||||
/// Data structure which maintains a fringe of layers for the
|
||||
@@ -310,9 +390,13 @@ pub(crate) struct LayerFringe {
|
||||
#[derive(Debug)]
|
||||
struct LayerKeyspace {
|
||||
layer: ReadableLayer,
|
||||
target_keyspace: KeySpaceRandomAccum,
|
||||
next_read_id: LayerKeyspaceReadId,
|
||||
reads: HashMap<LayerKeyspaceReadId, (Range<Lsn>, KeySpace)>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Hash, Debug, Clone, Copy)]
|
||||
struct LayerKeyspaceReadId(usize);
|
||||
|
||||
impl LayerFringe {
|
||||
pub(crate) fn new() -> Self {
|
||||
LayerFringe {
|
||||
@@ -327,22 +411,24 @@ impl LayerFringe {
|
||||
None => return None,
|
||||
};
|
||||
|
||||
let removed = self.layers.remove_entry(&read_desc.layer_id);
|
||||
let mut entry = match self.layers.entry(read_desc.layer_id) {
|
||||
Entry::Occupied(o) => o,
|
||||
Entry::Vacant(_) => unreachable!("fringe internals are always consistent"),
|
||||
};
|
||||
|
||||
match removed {
|
||||
Some((
|
||||
_,
|
||||
LayerKeyspace {
|
||||
layer,
|
||||
mut target_keyspace,
|
||||
},
|
||||
)) => Some((
|
||||
layer,
|
||||
target_keyspace.consume_keyspace(),
|
||||
read_desc.lsn_range,
|
||||
)),
|
||||
None => unreachable!("fringe internals are always consistent"),
|
||||
let (lsn_range, keyspace) = entry
|
||||
.get_mut()
|
||||
.reads
|
||||
.remove(&read_desc.read_id)
|
||||
.expect("fringe internals are always consistent");
|
||||
|
||||
let layer = entry.get().layer.clone();
|
||||
|
||||
if entry.get().reads.is_empty() {
|
||||
entry.remove();
|
||||
}
|
||||
|
||||
Some((layer, keyspace, lsn_range))
|
||||
}
|
||||
|
||||
pub(crate) fn update(
|
||||
@@ -355,18 +441,31 @@ impl LayerFringe {
|
||||
let entry = self.layers.entry(layer_id.clone());
|
||||
match entry {
|
||||
Entry::Occupied(mut entry) => {
|
||||
entry.get_mut().target_keyspace.add_keyspace(keyspace);
|
||||
let read_id = {
|
||||
let r = &mut entry.get_mut().next_read_id;
|
||||
let read_id = *r;
|
||||
*r = LayerKeyspaceReadId(r.0 + 1);
|
||||
read_id
|
||||
};
|
||||
self.planned_reads_by_lsn.push(ReadDesc {
|
||||
lsn_range: lsn_range.clone(),
|
||||
layer_id: layer_id.clone(),
|
||||
read_id,
|
||||
});
|
||||
let replaced = entry.get_mut().reads.insert(read_id, (lsn_range, keyspace));
|
||||
assert!(replaced.is_none());
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
let read_id = LayerKeyspaceReadId(0);
|
||||
self.planned_reads_by_lsn.push(ReadDesc {
|
||||
lsn_range,
|
||||
lsn_range: lsn_range.clone(),
|
||||
layer_id: layer_id.clone(),
|
||||
read_id,
|
||||
});
|
||||
let mut accum = KeySpaceRandomAccum::new();
|
||||
accum.add_keyspace(keyspace);
|
||||
entry.insert(LayerKeyspace {
|
||||
layer,
|
||||
target_keyspace: accum,
|
||||
next_read_id: LayerKeyspaceReadId(1),
|
||||
reads: [(read_id, (lsn_range, keyspace))].into(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,13 +42,12 @@ use crate::tenant::vectored_blob_io::{
|
||||
BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
|
||||
VectoredReadCoalesceMode, VectoredReadPlanner,
|
||||
};
|
||||
use crate::tenant::PageReconstructError;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
|
||||
use crate::virtual_file::{self, VirtualFile};
|
||||
use crate::{walrecord, TEMP_FILE_SUFFIX};
|
||||
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use bytes::BytesMut;
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use futures::StreamExt;
|
||||
use itertools::Itertools;
|
||||
@@ -58,14 +57,14 @@ use pageserver_api::models::ImageCompressionAlgorithm;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::VecDeque;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::fs::File;
|
||||
use std::io::SeekFrom;
|
||||
use std::ops::Range;
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::OnceCell;
|
||||
use tokio::sync::{self, OnceCell};
|
||||
use tokio_epoll_uring::IoBuf;
|
||||
use tracing::*;
|
||||
|
||||
@@ -224,7 +223,7 @@ pub struct DeltaLayerInner {
|
||||
index_start_blk: u32,
|
||||
index_root_blk: u32,
|
||||
|
||||
file: VirtualFile,
|
||||
file: Arc<VirtualFile>,
|
||||
file_id: FileId,
|
||||
|
||||
layer_key_range: Range<Key>,
|
||||
@@ -788,9 +787,11 @@ impl DeltaLayerInner {
|
||||
max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
let file = VirtualFile::open(path, ctx)
|
||||
.await
|
||||
.context("open layer file")?;
|
||||
let file = Arc::new(
|
||||
VirtualFile::open(path, ctx)
|
||||
.await
|
||||
.context("open layer file")?,
|
||||
);
|
||||
|
||||
let file_id = page_cache::next_file_id();
|
||||
|
||||
@@ -841,6 +842,7 @@ impl DeltaLayerInner {
|
||||
// can be further optimised to visit the index only once.
|
||||
pub(super) async fn get_values_reconstruct_data(
|
||||
&self,
|
||||
self_desc: PersistentLayerDesc,
|
||||
keyspace: KeySpace,
|
||||
lsn_range: Range<Lsn>,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
@@ -863,6 +865,7 @@ impl DeltaLayerInner {
|
||||
let data_end_offset = self.index_start_offset();
|
||||
|
||||
let reads = Self::plan_reads(
|
||||
self_desc,
|
||||
&keyspace,
|
||||
lsn_range.clone(),
|
||||
data_end_offset,
|
||||
@@ -883,6 +886,7 @@ impl DeltaLayerInner {
|
||||
}
|
||||
|
||||
async fn plan_reads<Reader>(
|
||||
self_desc: PersistentLayerDesc,
|
||||
keyspace: &KeySpace,
|
||||
lsn_range: Range<Lsn>,
|
||||
data_end_offset: u64,
|
||||
@@ -911,6 +915,8 @@ impl DeltaLayerInner {
|
||||
let lsn = DeltaKey::extract_lsn_from_buf(&raw_key);
|
||||
let blob_ref = BlobRef(value);
|
||||
|
||||
debug!(file = %self_desc.layer_name(), %key, %lsn, will_init = blob_ref.will_init(), "delta layer found key");
|
||||
|
||||
// Lsns are not monotonically increasing across keys, so we don't assert on them.
|
||||
assert!(key >= range.start);
|
||||
|
||||
@@ -933,7 +939,7 @@ impl DeltaLayerInner {
|
||||
range_end_handled = true;
|
||||
break;
|
||||
} else {
|
||||
planner.handle(key, lsn, blob_ref.pos(), flag);
|
||||
planner.handle(key, lsn, blob_ref.pos(), flag, blob_ref.will_init());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -980,77 +986,59 @@ impl DeltaLayerInner {
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) {
|
||||
let vectored_blob_reader = VectoredBlobReader::new(&self.file);
|
||||
let mut ignore_key_with_err = None;
|
||||
|
||||
let max_vectored_read_bytes = self
|
||||
.max_vectored_read_bytes
|
||||
.expect("Layer is loaded with max vectored bytes config")
|
||||
.0
|
||||
.into();
|
||||
let buf_size = Self::get_min_read_buffer_size(&reads, max_vectored_read_bytes);
|
||||
let mut buf = Some(BytesMut::with_capacity(buf_size));
|
||||
|
||||
// Note that reads are processed in reverse order (from highest key+lsn).
|
||||
// This is the order that `ReconstructState` requires such that it can
|
||||
// track when a key is done.
|
||||
for read in reads.into_iter().rev() {
|
||||
let res = vectored_blob_reader
|
||||
.read_blobs(&read, buf.take().expect("Should have a buffer"), ctx)
|
||||
.await;
|
||||
|
||||
let blobs_buf = match res {
|
||||
Ok(blobs_buf) => blobs_buf,
|
||||
Err(err) => {
|
||||
let kind = err.kind();
|
||||
for (_, blob_meta) in read.blobs_at.as_slice() {
|
||||
reconstruct_state.on_key_error(
|
||||
blob_meta.key,
|
||||
PageReconstructError::Other(anyhow!(
|
||||
"Failed to read blobs from virtual file {}: {}",
|
||||
self.file.path,
|
||||
kind
|
||||
)),
|
||||
);
|
||||
}
|
||||
|
||||
// We have "lost" the buffer since the lower level IO api
|
||||
// doesn't return the buffer on error. Allocate a new one.
|
||||
buf = Some(BytesMut::with_capacity(buf_size));
|
||||
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
for meta in blobs_buf.blobs.iter().rev() {
|
||||
if Some(meta.meta.key) == ignore_key_with_err {
|
||||
continue;
|
||||
}
|
||||
|
||||
let value = Value::des(&blobs_buf.buf[meta.start..meta.end]);
|
||||
let value = match value {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
reconstruct_state.on_key_error(
|
||||
meta.meta.key,
|
||||
PageReconstructError::Other(anyhow!(e).context(format!(
|
||||
"Failed to deserialize blob from virtual file {}",
|
||||
self.file.path,
|
||||
))),
|
||||
);
|
||||
|
||||
ignore_key_with_err = Some(meta.meta.key);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Invariant: once a key reaches [`ValueReconstructSituation::Complete`]
|
||||
// state, no further updates shall be made to it. The call below will
|
||||
// panic if the invariant is violated.
|
||||
reconstruct_state.update_key(&meta.meta.key, meta.meta.lsn, value);
|
||||
let mut senders: HashMap<
|
||||
(Key, Lsn),
|
||||
sync::oneshot::Sender<Result<Bytes, std::io::Error>>,
|
||||
> = Default::default();
|
||||
for (_, blob_meta) in read.blobs_at.as_slice().iter().rev() {
|
||||
let (tx, rx) = sync::oneshot::channel();
|
||||
senders.insert((blob_meta.key, blob_meta.lsn), tx);
|
||||
reconstruct_state.update_key(
|
||||
&blob_meta.key,
|
||||
blob_meta.lsn,
|
||||
blob_meta.will_init,
|
||||
rx,
|
||||
);
|
||||
}
|
||||
|
||||
buf = Some(blobs_buf.buf);
|
||||
let read_from = self.file.clone();
|
||||
let read_ctx = ctx.attached_child();
|
||||
reconstruct_state.spawn_io(async move {
|
||||
let vectored_blob_reader = VectoredBlobReader::new(&read_from);
|
||||
let buf = BytesMut::with_capacity(buf_size);
|
||||
|
||||
let res = vectored_blob_reader.read_blobs(&read, buf, &read_ctx).await;
|
||||
match res {
|
||||
Ok(blobs_buf) => {
|
||||
for meta in blobs_buf.blobs.iter().rev() {
|
||||
let buf = &blobs_buf.buf[meta.start..meta.end];
|
||||
let sender = senders
|
||||
.remove(&(meta.meta.key, meta.meta.lsn))
|
||||
.expect("sender must exist");
|
||||
let _ = sender.send(Ok(Bytes::copy_from_slice(buf)));
|
||||
}
|
||||
|
||||
assert!(senders.is_empty());
|
||||
}
|
||||
Err(err) => {
|
||||
for (_, sender) in senders {
|
||||
let _ = sender
|
||||
.send(Err(std::io::Error::new(err.kind(), "vec read failed")));
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1190,7 +1178,14 @@ impl DeltaLayerInner {
|
||||
let actionable = if let Some((key, lsn, start_offset)) = prev.take() {
|
||||
let end_offset = offset;
|
||||
|
||||
Some((BlobMeta { key, lsn }, start_offset..end_offset))
|
||||
Some((
|
||||
BlobMeta {
|
||||
key,
|
||||
lsn,
|
||||
will_init: false,
|
||||
},
|
||||
start_offset..end_offset,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
//!
|
||||
//! Every image layer file consists of three parts: "summary",
|
||||
//! "index", and "values". The summary is a fixed size header at the
|
||||
//! beginning of the file, and it contains basic information about the
|
||||
//! beginningof the file, and it contains basic information about the
|
||||
//! layer, and offsets to the other parts. The "index" is a B-tree,
|
||||
//! mapping from Key to an offset in the "values" part. The
|
||||
//! actual page images are stored in the "values" part.
|
||||
@@ -38,11 +38,11 @@ use crate::tenant::timeline::GetVectoredError;
|
||||
use crate::tenant::vectored_blob_io::{
|
||||
BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, VectoredReadPlanner,
|
||||
};
|
||||
use crate::tenant::{PageReconstructError, Timeline};
|
||||
use crate::tenant::Timeline;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||
use crate::virtual_file::{self, VirtualFile};
|
||||
use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hex;
|
||||
@@ -52,13 +52,14 @@ use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::shard::{ShardIdentity, TenantShardId};
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::VecDeque;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::fs::File;
|
||||
use std::io::SeekFrom;
|
||||
use std::ops::Range;
|
||||
use std::os::unix::prelude::FileExt;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio::sync::OnceCell;
|
||||
use tokio_stream::StreamExt;
|
||||
use tracing::*;
|
||||
@@ -163,7 +164,7 @@ pub struct ImageLayerInner {
|
||||
key_range: Range<Key>,
|
||||
lsn: Lsn,
|
||||
|
||||
file: VirtualFile,
|
||||
file: Arc<VirtualFile>,
|
||||
file_id: FileId,
|
||||
|
||||
max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
|
||||
@@ -390,9 +391,11 @@ impl ImageLayerInner {
|
||||
max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
let file = VirtualFile::open(path, ctx)
|
||||
.await
|
||||
.context("open layer file")?;
|
||||
let file = Arc::new(
|
||||
VirtualFile::open(path, ctx)
|
||||
.await
|
||||
.context("open layer file")?,
|
||||
);
|
||||
let file_id = page_cache::next_file_id();
|
||||
let block_reader = FileBlockReader::new(&file, file_id);
|
||||
let summary_blk = block_reader
|
||||
@@ -438,12 +441,13 @@ impl ImageLayerInner {
|
||||
// the reconstruct state with whatever is found.
|
||||
pub(super) async fn get_values_reconstruct_data(
|
||||
&self,
|
||||
self_desc: PersistentLayerDesc,
|
||||
keyspace: KeySpace,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), GetVectoredError> {
|
||||
let reads = self
|
||||
.plan_reads(keyspace, None, ctx)
|
||||
.plan_reads(self_desc, keyspace, None, ctx)
|
||||
.await
|
||||
.map_err(GetVectoredError::Other)?;
|
||||
|
||||
@@ -462,6 +466,7 @@ impl ImageLayerInner {
|
||||
/// this shard.
|
||||
async fn plan_reads(
|
||||
&self,
|
||||
self_desc: PersistentLayerDesc,
|
||||
keyspace: KeySpace,
|
||||
shard_identity: Option<&ShardIdentity>,
|
||||
ctx: &RequestContext,
|
||||
@@ -505,12 +510,14 @@ impl ImageLayerInner {
|
||||
BlobFlag::None
|
||||
};
|
||||
|
||||
debug!(file = %self_desc.layer_name(), %key, %self.lsn, will_init=true, "image layer found key");
|
||||
|
||||
if key >= range.end {
|
||||
planner.handle_range_end(offset);
|
||||
range_end_handled = true;
|
||||
break;
|
||||
} else {
|
||||
planner.handle(key, self.lsn, offset, flag);
|
||||
planner.handle(key, self.lsn, offset, flag, true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -534,6 +541,7 @@ impl ImageLayerInner {
|
||||
// Fragment the range into the regions owned by this ShardIdentity
|
||||
let plan = self
|
||||
.plan_reads(
|
||||
todo!(),
|
||||
KeySpace {
|
||||
// If asked for the total key space, plan_reads will give us all the keys in the layer
|
||||
ranges: vec![Key::MIN..Key::MAX],
|
||||
@@ -579,8 +587,16 @@ impl ImageLayerInner {
|
||||
.0
|
||||
.into();
|
||||
|
||||
let vectored_blob_reader = VectoredBlobReader::new(&self.file);
|
||||
for read in reads.into_iter() {
|
||||
let mut senders: HashMap<(Key, Lsn), oneshot::Sender<Result<Bytes, std::io::Error>>> =
|
||||
Default::default();
|
||||
for (_, blob_meta) in read.blobs_at.as_slice() {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
senders.insert((blob_meta.key, blob_meta.lsn), tx);
|
||||
|
||||
reconstruct_state.update_key(&blob_meta.key, blob_meta.lsn, true, rx);
|
||||
}
|
||||
|
||||
let buf_size = read.size();
|
||||
|
||||
if buf_size > max_vectored_read_bytes {
|
||||
@@ -599,36 +615,36 @@ impl ImageLayerInner {
|
||||
);
|
||||
}
|
||||
|
||||
let buf = BytesMut::with_capacity(buf_size);
|
||||
let res = vectored_blob_reader.read_blobs(&read, buf, ctx).await;
|
||||
let read_from = self.file.clone();
|
||||
let read_ctx = ctx.attached_child();
|
||||
reconstruct_state.spawn_io(async move {
|
||||
let buf = BytesMut::with_capacity(buf_size);
|
||||
let vectored_blob_reader = VectoredBlobReader::new(&read_from);
|
||||
let res = vectored_blob_reader.read_blobs(&read, buf, &read_ctx).await;
|
||||
|
||||
match res {
|
||||
Ok(blobs_buf) => {
|
||||
let frozen_buf = blobs_buf.buf.freeze();
|
||||
match res {
|
||||
Ok(blobs_buf) => {
|
||||
for meta in blobs_buf.blobs.iter() {
|
||||
let buf = &blobs_buf.buf[meta.start..meta.end];
|
||||
let sender = senders
|
||||
.remove(&(meta.meta.key, meta.meta.lsn))
|
||||
.expect("sender must exist");
|
||||
// TODO: this is silly - sort it out
|
||||
let bytes = Value::ser(&Value::Image(Bytes::copy_from_slice(buf)))
|
||||
.expect("stupid but correct");
|
||||
let _ = sender.send(Ok(bytes.into()));
|
||||
}
|
||||
|
||||
for meta in blobs_buf.blobs.iter() {
|
||||
let img_buf = frozen_buf.slice(meta.start..meta.end);
|
||||
reconstruct_state.update_key(
|
||||
&meta.meta.key,
|
||||
self.lsn,
|
||||
Value::Image(img_buf),
|
||||
);
|
||||
assert!(senders.is_empty());
|
||||
}
|
||||
Err(err) => {
|
||||
for (_, sender) in senders {
|
||||
let _ = sender
|
||||
.send(Err(std::io::Error::new(err.kind(), "vec read failed")));
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
let kind = err.kind();
|
||||
for (_, blob_meta) in read.blobs_at.as_slice() {
|
||||
reconstruct_state.on_key_error(
|
||||
blob_meta.key,
|
||||
PageReconstructError::from(anyhow!(
|
||||
"Failed to read blobs from virtual file {}: {}",
|
||||
self.file.path,
|
||||
kind
|
||||
)),
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,10 +10,9 @@ use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
|
||||
use crate::repository::{Key, Value};
|
||||
use crate::tenant::ephemeral_file::EphemeralFile;
|
||||
use crate::tenant::timeline::GetVectoredError;
|
||||
use crate::tenant::PageReconstructError;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||
use crate::{l0_flush, page_cache};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use anyhow::{Context, Result};
|
||||
use bytes::Bytes;
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::key::CompactKey;
|
||||
@@ -35,9 +34,7 @@ use std::sync::atomic::Ordering as AtomicOrdering;
|
||||
use std::sync::atomic::{AtomicU64, AtomicUsize};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use super::{
|
||||
DeltaLayerWriter, PersistentLayerDesc, ValueReconstructSituation, ValuesReconstructState,
|
||||
};
|
||||
use super::{DeltaLayerWriter, PersistentLayerDesc, ValuesReconstructState};
|
||||
|
||||
pub(crate) mod vectored_dio_read;
|
||||
|
||||
@@ -87,7 +84,7 @@ pub struct InMemoryLayerInner {
|
||||
/// The values are stored in a serialized format in this file.
|
||||
/// Each serialized Value is preceded by a 'u32' length field.
|
||||
/// PerSeg::page_versions map stores offsets into this file.
|
||||
file: EphemeralFile,
|
||||
file: Arc<tokio::sync::RwLock<EphemeralFile>>,
|
||||
|
||||
resource_units: GlobalResourceUnits,
|
||||
}
|
||||
@@ -381,7 +378,11 @@ impl InMemoryLayer {
|
||||
}
|
||||
|
||||
pub(crate) fn try_len(&self) -> Option<u64> {
|
||||
self.inner.try_read().map(|i| i.file.len()).ok()
|
||||
self.inner
|
||||
.try_read()
|
||||
.map(|i| i.file.try_read().map(|i| i.len()).ok())
|
||||
.ok()
|
||||
.flatten()
|
||||
}
|
||||
|
||||
pub(crate) fn assert_writable(&self) {
|
||||
@@ -432,6 +433,10 @@ impl InMemoryLayer {
|
||||
read: vectored_dio_read::LogicalRead<Vec<u8>>,
|
||||
}
|
||||
let mut reads: HashMap<Key, Vec<ValueRead>> = HashMap::new();
|
||||
let mut senders: HashMap<
|
||||
(Key, Lsn),
|
||||
tokio::sync::oneshot::Sender<Result<Bytes, std::io::Error>>,
|
||||
> = Default::default();
|
||||
|
||||
for range in keyspace.ranges.iter() {
|
||||
for (key, vec_map) in inner
|
||||
@@ -459,6 +464,13 @@ impl InMemoryLayer {
|
||||
Vec::with_capacity(len as usize),
|
||||
),
|
||||
});
|
||||
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
senders.insert((key, *entry_lsn), tx);
|
||||
reconstruct_state.update_key(&key, *entry_lsn, will_init, rx);
|
||||
|
||||
debug!(%key, %entry_lsn, will_init, "inmemory layer found key");
|
||||
|
||||
if will_init {
|
||||
break;
|
||||
}
|
||||
@@ -466,46 +478,39 @@ impl InMemoryLayer {
|
||||
}
|
||||
}
|
||||
|
||||
// Execute the reads.
|
||||
let read_from = inner.file.clone();
|
||||
let read_ctx = ctx.attached_child();
|
||||
reconstruct_state.spawn_io(async move {
|
||||
let locked = read_from.read().await;
|
||||
let f = vectored_dio_read::execute(
|
||||
&*locked,
|
||||
reads
|
||||
.iter()
|
||||
.flat_map(|(_, value_reads)| value_reads.iter().map(|v| &v.read)),
|
||||
&read_ctx,
|
||||
);
|
||||
send_future::SendFuture::send(f) // https://github.com/rust-lang/rust/issues/96865
|
||||
.await;
|
||||
|
||||
let f = vectored_dio_read::execute(
|
||||
&inner.file,
|
||||
reads
|
||||
.iter()
|
||||
.flat_map(|(_, value_reads)| value_reads.iter().map(|v| &v.read)),
|
||||
&ctx,
|
||||
);
|
||||
send_future::SendFuture::send(f) // https://github.com/rust-lang/rust/issues/96865
|
||||
.await;
|
||||
|
||||
// Process results into the reconstruct state
|
||||
'next_key: for (key, value_reads) in reads {
|
||||
for ValueRead { entry_lsn, read } in value_reads {
|
||||
match read.into_result().expect("we run execute() above") {
|
||||
Err(e) => {
|
||||
reconstruct_state.on_key_error(key, PageReconstructError::from(anyhow!(e)));
|
||||
continue 'next_key;
|
||||
}
|
||||
Ok(value_buf) => {
|
||||
let value = Value::des(&value_buf);
|
||||
if let Err(e) = value {
|
||||
reconstruct_state
|
||||
.on_key_error(key, PageReconstructError::from(anyhow!(e)));
|
||||
continue 'next_key;
|
||||
for (key, value_reads) in reads {
|
||||
for ValueRead { entry_lsn, read } in value_reads {
|
||||
let sender = senders
|
||||
.remove(&(key, entry_lsn))
|
||||
.expect("sender must exist");
|
||||
match read.into_result().expect("we run execute() above") {
|
||||
Err(e) => {
|
||||
let _ = sender
|
||||
.send(Err(std::io::Error::new(e.kind(), "dio vec read failed")));
|
||||
}
|
||||
|
||||
let key_situation =
|
||||
reconstruct_state.update_key(&key, entry_lsn, value.unwrap());
|
||||
if key_situation == ValueReconstructSituation::Complete {
|
||||
// TODO: metric to see if we fetched more values than necessary
|
||||
continue 'next_key;
|
||||
Ok(value_buf) => {
|
||||
let _ = sender.send(Ok(value_buf.into()));
|
||||
}
|
||||
|
||||
// process the next value in the next iteration of the loop
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(senders.is_empty());
|
||||
});
|
||||
|
||||
reconstruct_state.on_lsn_advanced(&keyspace, self.start_lsn);
|
||||
|
||||
@@ -600,7 +605,8 @@ impl InMemoryLayer {
|
||||
/// Get layer size.
|
||||
pub async fn size(&self) -> Result<u64> {
|
||||
let inner = self.inner.read().await;
|
||||
Ok(inner.file.len())
|
||||
let locked = inner.file.try_read().expect("no contention");
|
||||
Ok(locked.len())
|
||||
}
|
||||
|
||||
/// Create a new, empty, in-memory layer
|
||||
@@ -614,9 +620,10 @@ impl InMemoryLayer {
|
||||
) -> Result<InMemoryLayer> {
|
||||
trace!("initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}");
|
||||
|
||||
let file =
|
||||
EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate_guard, ctx).await?;
|
||||
let key = InMemoryLayerFileId(file.page_cache_file_id());
|
||||
let file = Arc::new(tokio::sync::RwLock::new(
|
||||
EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate_guard, ctx).await?,
|
||||
));
|
||||
let key = InMemoryLayerFileId(file.read().await.page_cache_file_id());
|
||||
|
||||
Ok(InMemoryLayer {
|
||||
file_id: key,
|
||||
@@ -648,7 +655,7 @@ impl InMemoryLayer {
|
||||
let mut inner = self.inner.write().await;
|
||||
self.assert_writable();
|
||||
|
||||
let base_offset = inner.file.len();
|
||||
let base_offset = inner.file.read().await.len();
|
||||
|
||||
let SerializedBatch {
|
||||
raw,
|
||||
@@ -672,8 +679,13 @@ impl InMemoryLayer {
|
||||
}
|
||||
|
||||
// Write the batch to the file
|
||||
inner.file.write_raw(&raw, ctx).await?;
|
||||
let new_size = inner.file.len();
|
||||
// FIXME: can't borrow arc
|
||||
let new_size = {
|
||||
let mut locked = inner.file.write().await;
|
||||
locked.write_raw(&raw, ctx).await?;
|
||||
locked.len()
|
||||
};
|
||||
|
||||
let expected_new_len = base_offset
|
||||
.checked_add(raw.len().into_u64())
|
||||
// write_raw would error if we were to overflow u64.
|
||||
@@ -713,7 +725,7 @@ impl InMemoryLayer {
|
||||
|
||||
pub(crate) async fn tick(&self) -> Option<u64> {
|
||||
let mut inner = self.inner.write().await;
|
||||
let size = inner.file.len();
|
||||
let size = inner.file.read().await.len();
|
||||
inner.resource_units.publish_size(size)
|
||||
}
|
||||
|
||||
@@ -809,7 +821,7 @@ impl InMemoryLayer {
|
||||
|
||||
match l0_flush_global_state {
|
||||
l0_flush::Inner::Direct { .. } => {
|
||||
let file_contents: Vec<u8> = inner.file.load_to_vec(ctx).await?;
|
||||
let file_contents: Vec<u8> = inner.file.read().await.load_to_vec(ctx).await?;
|
||||
|
||||
let file_contents = Bytes::from(file_contents);
|
||||
|
||||
|
||||
@@ -1755,11 +1755,17 @@ impl DownloadedLayer {
|
||||
.map_err(GetVectoredError::Other)?
|
||||
{
|
||||
Delta(d) => {
|
||||
d.get_values_reconstruct_data(keyspace, lsn_range, reconstruct_data, ctx)
|
||||
.await
|
||||
d.get_values_reconstruct_data(
|
||||
owner.desc.clone(),
|
||||
keyspace,
|
||||
lsn_range,
|
||||
reconstruct_data,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
Image(i) => {
|
||||
i.get_values_reconstruct_data(keyspace, reconstruct_data, ctx)
|
||||
i.get_values_reconstruct_data(owner.desc.clone(), keyspace, reconstruct_data, ctx)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,6 +107,8 @@ async fn smoke_test() {
|
||||
.expect("tenant harness writes the control file")
|
||||
};
|
||||
|
||||
let img_before = (img_before.0, img_before.1.await.unwrap().unwrap());
|
||||
let img_after = (img_after.0, img_after.1.await.unwrap().unwrap());
|
||||
assert_eq!(img_before, img_after);
|
||||
|
||||
// evict_and_wait can timeout, but it doesn't cancel the evicting itself
|
||||
|
||||
@@ -188,7 +188,7 @@ impl SplitImageLayerWriter {
|
||||
.await
|
||||
}
|
||||
|
||||
/// This function will be deprecated with #8841.
|
||||
/// When split writer fails, the caller should call this function and handle partially generated layers.
|
||||
pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, ImageLayerWriter)> {
|
||||
Ok((self.generated_layers, self.inner))
|
||||
}
|
||||
@@ -204,7 +204,7 @@ impl SplitImageLayerWriter {
|
||||
/// will split them into multiple files based on size.
|
||||
#[must_use]
|
||||
pub struct SplitDeltaLayerWriter {
|
||||
inner: Option<(Key, DeltaLayerWriter)>,
|
||||
inner: DeltaLayerWriter,
|
||||
target_layer_size: u64,
|
||||
generated_layers: Vec<SplitWriterResult>,
|
||||
conf: &'static PageServerConf,
|
||||
@@ -212,6 +212,7 @@ pub struct SplitDeltaLayerWriter {
|
||||
tenant_shard_id: TenantShardId,
|
||||
lsn_range: Range<Lsn>,
|
||||
last_key_written: Key,
|
||||
start_key: Key,
|
||||
}
|
||||
|
||||
impl SplitDeltaLayerWriter {
|
||||
@@ -219,18 +220,29 @@ impl SplitDeltaLayerWriter {
|
||||
conf: &'static PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
start_key: Key,
|
||||
lsn_range: Range<Lsn>,
|
||||
target_layer_size: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
Ok(Self {
|
||||
target_layer_size,
|
||||
inner: None,
|
||||
inner: DeltaLayerWriter::new(
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_shard_id,
|
||||
start_key,
|
||||
lsn_range.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
generated_layers: Vec::new(),
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_shard_id,
|
||||
lsn_range,
|
||||
last_key_written: Key::MIN,
|
||||
start_key,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -253,26 +265,9 @@ impl SplitDeltaLayerWriter {
|
||||
//
|
||||
// Also, keep all updates of a single key in a single file. TODO: split them using the legacy compaction
|
||||
// strategy. https://github.com/neondatabase/neon/issues/8837
|
||||
|
||||
if self.inner.is_none() {
|
||||
self.inner = Some((
|
||||
key,
|
||||
DeltaLayerWriter::new(
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_shard_id,
|
||||
key,
|
||||
self.lsn_range.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
));
|
||||
}
|
||||
let (_, inner) = self.inner.as_mut().unwrap();
|
||||
|
||||
let addition_size_estimation = KEY_SIZE as u64 + 8 /* LSN u64 size */ + 80 /* value size estimation */;
|
||||
if inner.num_keys() >= 1
|
||||
&& inner.estimated_size() + addition_size_estimation >= self.target_layer_size
|
||||
if self.inner.num_keys() >= 1
|
||||
&& self.inner.estimated_size() + addition_size_estimation >= self.target_layer_size
|
||||
{
|
||||
if key != self.last_key_written {
|
||||
let next_delta_writer = DeltaLayerWriter::new(
|
||||
@@ -284,13 +279,13 @@ impl SplitDeltaLayerWriter {
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
let (start_key, prev_delta_writer) =
|
||||
std::mem::replace(&mut self.inner, Some((key, next_delta_writer))).unwrap();
|
||||
let prev_delta_writer = std::mem::replace(&mut self.inner, next_delta_writer);
|
||||
let layer_key = PersistentLayerKey {
|
||||
key_range: start_key..key,
|
||||
key_range: self.start_key..key,
|
||||
lsn_range: self.lsn_range.clone(),
|
||||
is_delta: true,
|
||||
};
|
||||
self.start_key = key;
|
||||
if discard(&layer_key).await {
|
||||
drop(prev_delta_writer);
|
||||
self.generated_layers
|
||||
@@ -301,18 +296,17 @@ impl SplitDeltaLayerWriter {
|
||||
self.generated_layers
|
||||
.push(SplitWriterResult::Produced(delta_layer));
|
||||
}
|
||||
} else if inner.estimated_size() >= S3_UPLOAD_LIMIT {
|
||||
} else if self.inner.estimated_size() >= S3_UPLOAD_LIMIT {
|
||||
// We have to produce a very large file b/c a key is updated too often.
|
||||
anyhow::bail!(
|
||||
"a single key is updated too often: key={}, estimated_size={}, and the layer file cannot be produced",
|
||||
key,
|
||||
inner.estimated_size()
|
||||
self.inner.estimated_size()
|
||||
);
|
||||
}
|
||||
}
|
||||
self.last_key_written = key;
|
||||
let (_, inner) = self.inner.as_mut().unwrap();
|
||||
inner.put_value(key, lsn, val, ctx).await
|
||||
self.inner.put_value(key, lsn, val, ctx).await
|
||||
}
|
||||
|
||||
pub async fn put_value(
|
||||
@@ -331,6 +325,7 @@ impl SplitDeltaLayerWriter {
|
||||
self,
|
||||
tline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
end_key: Key,
|
||||
discard: D,
|
||||
) -> anyhow::Result<Vec<SplitWriterResult>>
|
||||
where
|
||||
@@ -342,15 +337,11 @@ impl SplitDeltaLayerWriter {
|
||||
inner,
|
||||
..
|
||||
} = self;
|
||||
let Some((start_key, inner)) = inner else {
|
||||
return Ok(generated_layers);
|
||||
};
|
||||
if inner.num_keys() == 0 {
|
||||
return Ok(generated_layers);
|
||||
}
|
||||
let end_key = self.last_key_written.next();
|
||||
let layer_key = PersistentLayerKey {
|
||||
key_range: start_key..end_key,
|
||||
key_range: self.start_key..end_key,
|
||||
lsn_range: self.lsn_range.clone(),
|
||||
is_delta: true,
|
||||
};
|
||||
@@ -369,14 +360,15 @@ impl SplitDeltaLayerWriter {
|
||||
self,
|
||||
tline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
end_key: Key,
|
||||
) -> anyhow::Result<Vec<SplitWriterResult>> {
|
||||
self.finish_with_discard_fn(tline, ctx, |_| async { false })
|
||||
self.finish_with_discard_fn(tline, ctx, end_key, |_| async { false })
|
||||
.await
|
||||
}
|
||||
|
||||
/// This function will be deprecated with #8841.
|
||||
pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, Option<DeltaLayerWriter>)> {
|
||||
Ok((self.generated_layers, self.inner.map(|x| x.1)))
|
||||
/// When split writer fails, the caller should call this function and handle partially generated layers.
|
||||
pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, DeltaLayerWriter)> {
|
||||
Ok((self.generated_layers, self.inner))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -440,8 +432,10 @@ mod tests {
|
||||
tenant.conf,
|
||||
tline.timeline_id,
|
||||
tenant.tenant_shard_id,
|
||||
get_key(0),
|
||||
Lsn(0x18)..Lsn(0x20),
|
||||
4 * 1024 * 1024,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -466,22 +460,11 @@ mod tests {
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let layers = delta_writer.finish(&tline, &ctx).await.unwrap();
|
||||
let layers = delta_writer
|
||||
.finish(&tline, &ctx, get_key(10))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(layers.len(), 1);
|
||||
assert_eq!(
|
||||
layers
|
||||
.into_iter()
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_resident_layer()
|
||||
.layer_desc()
|
||||
.key(),
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(1),
|
||||
lsn_range: Lsn(0x18)..Lsn(0x20),
|
||||
is_delta: true
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -518,8 +501,10 @@ mod tests {
|
||||
tenant.conf,
|
||||
tline.timeline_id,
|
||||
tenant.tenant_shard_id,
|
||||
get_key(0),
|
||||
Lsn(0x18)..Lsn(0x20),
|
||||
4 * 1024 * 1024,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -548,7 +533,10 @@ mod tests {
|
||||
.finish(&tline, &ctx, get_key(N as u32))
|
||||
.await
|
||||
.unwrap();
|
||||
let delta_layers = delta_writer.finish(&tline, &ctx).await.unwrap();
|
||||
let delta_layers = delta_writer
|
||||
.finish(&tline, &ctx, get_key(N as u32))
|
||||
.await
|
||||
.unwrap();
|
||||
if discard {
|
||||
for layer in image_layers {
|
||||
layer.into_discarded_layer();
|
||||
@@ -567,14 +555,6 @@ mod tests {
|
||||
.collect_vec();
|
||||
assert_eq!(image_layers.len(), N / 512 + 1);
|
||||
assert_eq!(delta_layers.len(), N / 512 + 1);
|
||||
assert_eq!(
|
||||
delta_layers.first().unwrap().layer_desc().key_range.start,
|
||||
get_key(0)
|
||||
);
|
||||
assert_eq!(
|
||||
delta_layers.last().unwrap().layer_desc().key_range.end,
|
||||
get_key(N as u32)
|
||||
);
|
||||
for idx in 0..image_layers.len() {
|
||||
assert_ne!(image_layers[idx].layer_desc().key_range.start, Key::MIN);
|
||||
assert_ne!(image_layers[idx].layer_desc().key_range.end, Key::MAX);
|
||||
@@ -622,8 +602,10 @@ mod tests {
|
||||
tenant.conf,
|
||||
tline.timeline_id,
|
||||
tenant.tenant_shard_id,
|
||||
get_key(0),
|
||||
Lsn(0x18)..Lsn(0x20),
|
||||
4 * 1024,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -662,35 +644,11 @@ mod tests {
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let layers = delta_writer.finish(&tline, &ctx).await.unwrap();
|
||||
let layers = delta_writer
|
||||
.finish(&tline, &ctx, get_key(10))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(layers.len(), 2);
|
||||
let mut layers_iter = layers.into_iter();
|
||||
assert_eq!(
|
||||
layers_iter
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_resident_layer()
|
||||
.layer_desc()
|
||||
.key(),
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(1),
|
||||
lsn_range: Lsn(0x18)..Lsn(0x20),
|
||||
is_delta: true
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
layers_iter
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_resident_layer()
|
||||
.layer_desc()
|
||||
.key(),
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(1)..get_key(2),
|
||||
lsn_range: Lsn(0x18)..Lsn(0x20),
|
||||
is_delta: true
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -710,8 +668,10 @@ mod tests {
|
||||
tenant.conf,
|
||||
tline.timeline_id,
|
||||
tenant.tenant_shard_id,
|
||||
get_key(0),
|
||||
Lsn(0x10)..Lsn(N as u64 * 16 + 0x10),
|
||||
4 * 1024 * 1024,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -729,20 +689,10 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
let delta_layers = delta_writer.finish(&tline, &ctx).await.unwrap();
|
||||
let delta_layers = delta_writer
|
||||
.finish(&tline, &ctx, get_key(N as u32))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(delta_layers.len(), 1);
|
||||
let delta_layer = delta_layers
|
||||
.into_iter()
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_resident_layer();
|
||||
assert_eq!(
|
||||
delta_layer.layer_desc().key(),
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(1),
|
||||
lsn_range: Lsn(0x10)..Lsn(N as u64 * 16 + 0x10),
|
||||
is_delta: true
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ use camino::Utf8Path;
|
||||
use chrono::{DateTime, Utc};
|
||||
use enumset::EnumSet;
|
||||
use fail::fail_point;
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use handle::ShardTimelineId;
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::{
|
||||
@@ -48,7 +49,6 @@ use utils::{
|
||||
sync::gate::{Gate, GateGuard},
|
||||
};
|
||||
|
||||
use std::pin::pin;
|
||||
use std::sync::atomic::Ordering as AtomicOrdering;
|
||||
use std::sync::{Arc, Mutex, RwLock, Weak};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
@@ -62,13 +62,16 @@ use std::{
|
||||
collections::btree_map::Entry,
|
||||
ops::{Deref, Range},
|
||||
};
|
||||
use std::{pin::pin, sync::atomic::AtomicUsize};
|
||||
|
||||
use crate::{
|
||||
aux_file::AuxFileSizeEstimator,
|
||||
tenant::{
|
||||
layer_map::{LayerMap, SearchResult},
|
||||
metadata::TimelineMetadata,
|
||||
storage_layer::{inmemory_layer::IndexEntry, PersistentLayerDesc},
|
||||
storage_layer::{
|
||||
convert, inmemory_layer::IndexEntry, PersistentLayerDesc, ValueReconstructSituation,
|
||||
},
|
||||
},
|
||||
walredo,
|
||||
};
|
||||
@@ -565,7 +568,7 @@ impl From<layer_manager::Shutdown> for GetVectoredError {
|
||||
|
||||
#[derive(thiserror::Error)]
|
||||
pub struct MissingKeyError {
|
||||
key: Key,
|
||||
keys: KeySpace,
|
||||
shard: ShardNumber,
|
||||
cont_lsn: Lsn,
|
||||
request_lsn: Lsn,
|
||||
@@ -583,8 +586,8 @@ impl std::fmt::Display for MissingKeyError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"could not find data for key {} (shard {:?}) at LSN {}, request LSN {}",
|
||||
self.key, self.shard, self.cont_lsn, self.request_lsn
|
||||
"could not find data for keys (shard {:?}) at LSN {}, request LSN {} keys {}",
|
||||
self.shard, self.cont_lsn, self.request_lsn, self.keys
|
||||
)?;
|
||||
if let Some(ref ancestor_lsn) = self.ancestor_lsn {
|
||||
write!(f, ", ancestor {}", ancestor_lsn)?;
|
||||
@@ -952,7 +955,11 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
None => Err(PageReconstructError::MissingKey(MissingKeyError {
|
||||
key,
|
||||
keys: {
|
||||
let mut accum = KeySpaceAccum::new();
|
||||
accum.add_key(key);
|
||||
accum.to_keyspace()
|
||||
},
|
||||
shard: self.shard_identity.get_shard_number(&key),
|
||||
cont_lsn: Lsn(0),
|
||||
request_lsn: lsn,
|
||||
@@ -1122,29 +1129,53 @@ impl Timeline {
|
||||
let get_data_timer = crate::metrics::GET_RECONSTRUCT_DATA_TIME
|
||||
.for_get_kind(get_kind)
|
||||
.start_timer();
|
||||
static INVOCATION: Lazy<AtomicUsize> = Lazy::new(|| AtomicUsize::new(0));
|
||||
let invocation = INVOCATION.fetch_add(1, AtomicOrdering::Relaxed);
|
||||
self.get_vectored_reconstruct_data(keyspace.clone(), lsn, reconstruct_state, ctx)
|
||||
.await?;
|
||||
.instrument(debug_span!("get_vectored_reconstruct_data", invocation))
|
||||
.await
|
||||
.map_err(|err| {
|
||||
anyhow::anyhow!(
|
||||
"get_vectored_reconstruct_data invocation {invocation} lsn={lsn} keyspace={keyspace} {err:?}",
|
||||
)
|
||||
})?;
|
||||
get_data_timer.stop_and_record();
|
||||
|
||||
let reconstruct_timer = crate::metrics::RECONSTRUCT_TIME
|
||||
.for_get_kind(get_kind)
|
||||
.start_timer();
|
||||
let mut results: BTreeMap<Key, Result<Bytes, PageReconstructError>> = BTreeMap::new();
|
||||
let layers_visited = reconstruct_state.get_layers_visited();
|
||||
|
||||
let futs = FuturesUnordered::new();
|
||||
for (key, res) in std::mem::take(&mut reconstruct_state.keys) {
|
||||
match res {
|
||||
Err(err) => {
|
||||
results.insert(key, Err(err));
|
||||
}
|
||||
Ok(state) => {
|
||||
let state = ValueReconstructState::from(state);
|
||||
futs.push({
|
||||
let walredo_self = self.myself.upgrade().expect("&self method holds the arc");
|
||||
async move {
|
||||
let state = res.expect("Read path is infallible");
|
||||
assert!(matches!(
|
||||
state.situation,
|
||||
ValueReconstructSituation::Complete
|
||||
));
|
||||
|
||||
let reconstruct_res = self.reconstruct_value(key, lsn, state).await;
|
||||
results.insert(key, reconstruct_res);
|
||||
let converted = match convert(key, state).await {
|
||||
Ok(ok) => ok,
|
||||
Err(err) => {
|
||||
return (key, Err(err));
|
||||
}
|
||||
};
|
||||
|
||||
(
|
||||
key,
|
||||
walredo_self.reconstruct_value(key, lsn, converted).await,
|
||||
)
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
let results = futs
|
||||
.collect::<BTreeMap<Key, Result<Bytes, PageReconstructError>>>()
|
||||
.await;
|
||||
|
||||
reconstruct_timer.stop_and_record();
|
||||
|
||||
// For aux file keys (v1 or v2) the vectored read path does not return an error
|
||||
@@ -3120,7 +3151,7 @@ impl Timeline {
|
||||
|
||||
if let Some(missing_keyspace) = missing_keyspace {
|
||||
return Err(GetVectoredError::MissingKey(MissingKeyError {
|
||||
key: missing_keyspace.start().unwrap(), /* better if we can store the full keyspace */
|
||||
keys: missing_keyspace.clone(),
|
||||
shard: self
|
||||
.shard_identity
|
||||
.get_shard_number(&missing_keyspace.start().unwrap()),
|
||||
@@ -5496,30 +5527,30 @@ impl Timeline {
|
||||
#[cfg(test)]
|
||||
pub(crate) async fn inspect_image_layers(
|
||||
self: &Arc<Timeline>,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
_lsn: Lsn,
|
||||
_ctx: &RequestContext,
|
||||
) -> anyhow::Result<Vec<(Key, Bytes)>> {
|
||||
let mut all_data = Vec::new();
|
||||
let guard = self.layers.read().await;
|
||||
for layer in guard.layer_map()?.iter_historic_layers() {
|
||||
if !layer.is_delta() && layer.image_layer_lsn() == lsn {
|
||||
let layer = guard.get_from_desc(&layer);
|
||||
let mut reconstruct_data = ValuesReconstructState::default();
|
||||
layer
|
||||
.get_values_reconstruct_data(
|
||||
KeySpace::single(Key::MIN..Key::MAX),
|
||||
lsn..Lsn(lsn.0 + 1),
|
||||
&mut reconstruct_data,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
for (k, v) in reconstruct_data.keys {
|
||||
all_data.push((k, v?.img.unwrap().1));
|
||||
}
|
||||
}
|
||||
}
|
||||
all_data.sort();
|
||||
Ok(all_data)
|
||||
// let mut all_data = Vec::new();
|
||||
// let guard = self.layers.read().await;
|
||||
// for layer in guard.layer_map()?.iter_historic_layers() {
|
||||
// if !layer.is_delta() && layer.image_layer_lsn() == lsn {
|
||||
// let layer = guard.get_from_desc(&layer);
|
||||
// let mut reconstruct_data = ValuesReconstructState::default();
|
||||
// layer
|
||||
// .get_values_reconstruct_data(
|
||||
// KeySpace::single(Key::MIN..Key::MAX),
|
||||
// lsn..Lsn(lsn.0 + 1),
|
||||
// &mut reconstruct_data,
|
||||
// ctx,
|
||||
// )
|
||||
// .await?;
|
||||
// for (k, v) in reconstruct_data.keys {
|
||||
// all_data.push((k, v?.img.unwrap().1));
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// all_data.sort();
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
/// Get all historic layer descriptors in the layer map
|
||||
|
||||
@@ -1809,6 +1809,7 @@ impl Timeline {
|
||||
.unwrap();
|
||||
// We don't want any of the produced layers to cover the full key range (i.e., MIN..MAX) b/c it will then be recognized
|
||||
// as an L0 layer.
|
||||
let hack_end_key = Key::NON_L0_MAX;
|
||||
let mut delta_layers = Vec::new();
|
||||
let mut image_layers = Vec::new();
|
||||
let mut downloaded_layers = Vec::new();
|
||||
@@ -1854,8 +1855,10 @@ impl Timeline {
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_shard_id,
|
||||
Key::MIN,
|
||||
lowest_retain_lsn..end_lsn,
|
||||
self.get_compaction_target_size(),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -1962,7 +1965,7 @@ impl Timeline {
|
||||
let produced_image_layers = if let Some(writer) = image_layer_writer {
|
||||
if !dry_run {
|
||||
writer
|
||||
.finish_with_discard_fn(self, ctx, Key::MAX, discard)
|
||||
.finish_with_discard_fn(self, ctx, hack_end_key, discard)
|
||||
.await?
|
||||
} else {
|
||||
let (layers, _) = writer.take()?;
|
||||
@@ -1975,7 +1978,7 @@ impl Timeline {
|
||||
|
||||
let produced_delta_layers = if !dry_run {
|
||||
delta_layer_writer
|
||||
.finish_with_discard_fn(self, ctx, discard)
|
||||
.finish_with_discard_fn(self, ctx, hack_end_key, discard)
|
||||
.await?
|
||||
} else {
|
||||
let (layers, _) = delta_layer_writer.take()?;
|
||||
|
||||
@@ -33,6 +33,7 @@ use crate::virtual_file::{self, VirtualFile};
|
||||
pub struct BlobMeta {
|
||||
pub key: Key,
|
||||
pub lsn: Lsn,
|
||||
pub will_init: bool,
|
||||
}
|
||||
|
||||
/// Blob offsets into [`VectoredBlobsBuf::buf`]
|
||||
@@ -355,9 +356,10 @@ pub enum BlobFlag {
|
||||
/// * Iterate over the collected blobs and coalesce them into reads at the end
|
||||
pub struct VectoredReadPlanner {
|
||||
// Track all the blob offsets. Start offsets must be ordered.
|
||||
blobs: BTreeMap<Key, Vec<(Lsn, u64, u64)>>,
|
||||
// Note: last bool is will_init
|
||||
blobs: BTreeMap<Key, Vec<(Lsn, u64, u64, bool)>>,
|
||||
// Arguments for previous blob passed into [`VectoredReadPlanner::handle`]
|
||||
prev: Option<(Key, Lsn, u64, BlobFlag)>,
|
||||
prev: Option<(Key, Lsn, u64, BlobFlag, bool)>,
|
||||
|
||||
max_read_size: usize,
|
||||
|
||||
@@ -392,40 +394,62 @@ impl VectoredReadPlanner {
|
||||
/// This is used for WAL records that `will_init`.
|
||||
/// * [`BlobFlag::Ignore`]: This blob should not be included in the read. This happens
|
||||
/// if the blob is cached.
|
||||
pub fn handle(&mut self, key: Key, lsn: Lsn, offset: u64, flag: BlobFlag) {
|
||||
pub fn handle(&mut self, key: Key, lsn: Lsn, offset: u64, flag: BlobFlag, will_init: bool) {
|
||||
// Implementation note: internally lag behind by one blob such that
|
||||
// we have a start and end offset when initialising [`VectoredRead`]
|
||||
let (prev_key, prev_lsn, prev_offset, prev_flag) = match self.prev {
|
||||
let (prev_key, prev_lsn, prev_offset, prev_flag, prev_will_init) = match self.prev {
|
||||
None => {
|
||||
self.prev = Some((key, lsn, offset, flag));
|
||||
self.prev = Some((key, lsn, offset, flag, will_init));
|
||||
return;
|
||||
}
|
||||
Some(prev) => prev,
|
||||
};
|
||||
|
||||
self.add_blob(prev_key, prev_lsn, prev_offset, offset, prev_flag);
|
||||
self.add_blob(
|
||||
prev_key,
|
||||
prev_lsn,
|
||||
prev_offset,
|
||||
offset,
|
||||
prev_flag,
|
||||
prev_will_init,
|
||||
);
|
||||
|
||||
self.prev = Some((key, lsn, offset, flag));
|
||||
self.prev = Some((key, lsn, offset, flag, will_init));
|
||||
}
|
||||
|
||||
pub fn handle_range_end(&mut self, offset: u64) {
|
||||
if let Some((prev_key, prev_lsn, prev_offset, prev_flag)) = self.prev {
|
||||
self.add_blob(prev_key, prev_lsn, prev_offset, offset, prev_flag);
|
||||
if let Some((prev_key, prev_lsn, prev_offset, prev_flag, prev_will_init)) = self.prev {
|
||||
self.add_blob(
|
||||
prev_key,
|
||||
prev_lsn,
|
||||
prev_offset,
|
||||
offset,
|
||||
prev_flag,
|
||||
prev_will_init,
|
||||
);
|
||||
}
|
||||
|
||||
self.prev = None;
|
||||
}
|
||||
|
||||
fn add_blob(&mut self, key: Key, lsn: Lsn, start_offset: u64, end_offset: u64, flag: BlobFlag) {
|
||||
fn add_blob(
|
||||
&mut self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
start_offset: u64,
|
||||
end_offset: u64,
|
||||
flag: BlobFlag,
|
||||
will_init: bool,
|
||||
) {
|
||||
match flag {
|
||||
BlobFlag::None => {
|
||||
let blobs_for_key = self.blobs.entry(key).or_default();
|
||||
blobs_for_key.push((lsn, start_offset, end_offset));
|
||||
blobs_for_key.push((lsn, start_offset, end_offset, will_init));
|
||||
}
|
||||
BlobFlag::ReplaceAll => {
|
||||
let blobs_for_key = self.blobs.entry(key).or_default();
|
||||
blobs_for_key.clear();
|
||||
blobs_for_key.push((lsn, start_offset, end_offset));
|
||||
blobs_for_key.push((lsn, start_offset, end_offset, will_init));
|
||||
}
|
||||
BlobFlag::Ignore => {}
|
||||
}
|
||||
@@ -436,11 +460,17 @@ impl VectoredReadPlanner {
|
||||
let mut reads = Vec::new();
|
||||
|
||||
for (key, blobs_for_key) in self.blobs {
|
||||
for (lsn, start_offset, end_offset) in blobs_for_key {
|
||||
for (lsn, start_offset, end_offset, will_init) in blobs_for_key {
|
||||
let extended = match &mut current_read_builder {
|
||||
Some(read_builder) => {
|
||||
read_builder.extend(start_offset, end_offset, BlobMeta { key, lsn })
|
||||
}
|
||||
Some(read_builder) => read_builder.extend(
|
||||
start_offset,
|
||||
end_offset,
|
||||
BlobMeta {
|
||||
key,
|
||||
lsn,
|
||||
will_init,
|
||||
},
|
||||
),
|
||||
None => VectoredReadExtended::No,
|
||||
};
|
||||
|
||||
@@ -448,7 +478,11 @@ impl VectoredReadPlanner {
|
||||
let next_read_builder = VectoredReadBuilder::new(
|
||||
start_offset,
|
||||
end_offset,
|
||||
BlobMeta { key, lsn },
|
||||
BlobMeta {
|
||||
key,
|
||||
lsn,
|
||||
will_init,
|
||||
},
|
||||
self.max_read_size,
|
||||
self.mode,
|
||||
);
|
||||
@@ -668,7 +702,15 @@ impl StreamingVectoredReadPlanner {
|
||||
) -> Option<VectoredRead> {
|
||||
match &mut self.read_builder {
|
||||
Some(read_builder) => {
|
||||
let extended = read_builder.extend(start_offset, end_offset, BlobMeta { key, lsn });
|
||||
let extended = read_builder.extend(
|
||||
start_offset,
|
||||
end_offset,
|
||||
BlobMeta {
|
||||
key,
|
||||
lsn,
|
||||
will_init: todo!(),
|
||||
},
|
||||
);
|
||||
assert_eq!(extended, VectoredReadExtended::Yes);
|
||||
}
|
||||
None => {
|
||||
@@ -676,7 +718,11 @@ impl StreamingVectoredReadPlanner {
|
||||
Some(VectoredReadBuilder::new_streaming(
|
||||
start_offset,
|
||||
end_offset,
|
||||
BlobMeta { key, lsn },
|
||||
BlobMeta {
|
||||
key,
|
||||
lsn,
|
||||
will_init: todo!(),
|
||||
},
|
||||
self.mode,
|
||||
))
|
||||
};
|
||||
@@ -1008,6 +1054,7 @@ mod tests {
|
||||
let meta = BlobMeta {
|
||||
key: Key::MIN,
|
||||
lsn: Lsn(0),
|
||||
will_init: false,
|
||||
};
|
||||
|
||||
for (idx, (blob, offset)) in blobs.iter().zip(offsets.iter()).enumerate() {
|
||||
|
||||
@@ -25,7 +25,9 @@ use std::time::Duration;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
use postgres_ffi::{dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, TimestampTz};
|
||||
use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
|
||||
use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
|
||||
use postgres_ffi::TimestampTz;
|
||||
use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
@@ -46,31 +48,16 @@ use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::v14::nonrelfile_utils::mx_offset_to_member_segment;
|
||||
use postgres_ffi::v14::xlog_utils::*;
|
||||
use postgres_ffi::v14::CheckPoint;
|
||||
use postgres_ffi::TransactionId;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use utils::bin_ser::SerializeError;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
enum_pgversion! {CheckPoint, pgv::CheckPoint}
|
||||
|
||||
impl CheckPoint {
|
||||
fn encode(&self) -> Result<Bytes, SerializeError> {
|
||||
enum_pgversion_dispatch!(self, CheckPoint, cp, { cp.encode() })
|
||||
}
|
||||
|
||||
fn update_next_xid(&mut self, xid: u32) -> bool {
|
||||
enum_pgversion_dispatch!(self, CheckPoint, cp, { cp.update_next_xid(xid) })
|
||||
}
|
||||
|
||||
pub fn update_next_multixid(&mut self, multi_xid: u32, multi_offset: u32) -> bool {
|
||||
enum_pgversion_dispatch!(self, CheckPoint, cp, {
|
||||
cp.update_next_multixid(multi_xid, multi_offset)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct WalIngest {
|
||||
shard: ShardIdentity,
|
||||
pg_version: u32,
|
||||
checkpoint: CheckPoint,
|
||||
checkpoint_modified: bool,
|
||||
warn_ingest_lag: WarnIngestLag,
|
||||
@@ -91,16 +78,12 @@ impl WalIngest {
|
||||
// Fetch the latest checkpoint into memory, so that we can compare with it
|
||||
// quickly in `ingest_record` and update it when it changes.
|
||||
let checkpoint_bytes = timeline.get_checkpoint(startpoint, ctx).await?;
|
||||
let pgversion = timeline.pg_version;
|
||||
|
||||
let checkpoint = dispatch_pgversion!(pgversion, {
|
||||
let checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?;
|
||||
trace!("CheckPoint.nextXid = {}", checkpoint.nextXid.value);
|
||||
<pgv::CheckPoint as Into<CheckPoint>>::into(checkpoint)
|
||||
});
|
||||
let checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
|
||||
trace!("CheckPoint.nextXid = {}", checkpoint.nextXid.value);
|
||||
|
||||
Ok(WalIngest {
|
||||
shard: *timeline.get_shard_identity(),
|
||||
pg_version: timeline.pg_version,
|
||||
checkpoint,
|
||||
checkpoint_modified: false,
|
||||
warn_ingest_lag: WarnIngestLag {
|
||||
@@ -134,7 +117,7 @@ impl WalIngest {
|
||||
|
||||
modification.set_lsn(lsn)?;
|
||||
|
||||
if decoded.is_dbase_create_copy(pg_version) {
|
||||
if decoded.is_dbase_create_copy(self.pg_version) {
|
||||
// Records of this type should always be preceded by a commit(), as they
|
||||
// rely on reading data pages back from the Timeline.
|
||||
assert!(!modification.has_dirty_data_pages());
|
||||
@@ -354,67 +337,70 @@ impl WalIngest {
|
||||
pg_constants::RM_XLOG_ID => {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
|
||||
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
|
||||
if info == pg_constants::XLOG_NEXTOID {
|
||||
let next_oid = buf.get_u32_le();
|
||||
if cp.nextOid != next_oid {
|
||||
cp.nextOid = next_oid;
|
||||
self.checkpoint_modified = true;
|
||||
}
|
||||
} else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
|
||||
|| info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
|
||||
{
|
||||
let mut checkpoint_bytes = [0u8; pgv::xlog_utils::SIZEOF_CHECKPOINT];
|
||||
buf.copy_to_slice(&mut checkpoint_bytes);
|
||||
let xlog_checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?;
|
||||
trace!(
|
||||
"xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}",
|
||||
xlog_checkpoint.oldestXid,
|
||||
cp.oldestXid
|
||||
);
|
||||
if (cp.oldestXid.wrapping_sub(xlog_checkpoint.oldestXid) as i32) < 0 {
|
||||
cp.oldestXid = xlog_checkpoint.oldestXid;
|
||||
}
|
||||
trace!(
|
||||
"xlog_checkpoint.oldestActiveXid={}, checkpoint.oldestActiveXid={}",
|
||||
xlog_checkpoint.oldestActiveXid,
|
||||
cp.oldestActiveXid
|
||||
);
|
||||
|
||||
// A shutdown checkpoint has `oldestActiveXid == InvalidTransactionid`,
|
||||
// because at shutdown, all in-progress transactions will implicitly
|
||||
// end. Postgres startup code knows that, and allows hot standby to start
|
||||
// immediately from a shutdown checkpoint.
|
||||
//
|
||||
// In Neon, Postgres hot standby startup always behaves as if starting from
|
||||
// an online checkpoint. It needs a valid `oldestActiveXid` value, so
|
||||
// instead of overwriting self.checkpoint.oldestActiveXid with
|
||||
// InvalidTransactionid from the checkpoint WAL record, update it to a
|
||||
// proper value, knowing that there are no in-progress transactions at this
|
||||
// point, except for prepared transactions.
|
||||
//
|
||||
// See also the neon code changes in the InitWalRecovery() function.
|
||||
if xlog_checkpoint.oldestActiveXid == pg_constants::INVALID_TRANSACTION_ID
|
||||
&& info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
|
||||
{
|
||||
let mut oldest_active_xid = cp.nextXid.value as u32;
|
||||
for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
|
||||
if (xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
|
||||
oldest_active_xid = xid;
|
||||
}
|
||||
}
|
||||
cp.oldestActiveXid = oldest_active_xid;
|
||||
} else {
|
||||
cp.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
|
||||
}
|
||||
|
||||
// Write a new checkpoint key-value pair on every checkpoint record, even
|
||||
// if nothing really changed. Not strictly required, but it seems nice to
|
||||
// have some trace of the checkpoint records in the layer files at the same
|
||||
// LSNs.
|
||||
if info == pg_constants::XLOG_NEXTOID {
|
||||
let next_oid = buf.get_u32_le();
|
||||
if self.checkpoint.nextOid != next_oid {
|
||||
self.checkpoint.nextOid = next_oid;
|
||||
self.checkpoint_modified = true;
|
||||
}
|
||||
});
|
||||
} else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
|
||||
|| info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
|
||||
{
|
||||
let mut checkpoint_bytes = [0u8; SIZEOF_CHECKPOINT];
|
||||
buf.copy_to_slice(&mut checkpoint_bytes);
|
||||
let xlog_checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
|
||||
trace!(
|
||||
"xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}",
|
||||
xlog_checkpoint.oldestXid,
|
||||
self.checkpoint.oldestXid
|
||||
);
|
||||
if (self
|
||||
.checkpoint
|
||||
.oldestXid
|
||||
.wrapping_sub(xlog_checkpoint.oldestXid) as i32)
|
||||
< 0
|
||||
{
|
||||
self.checkpoint.oldestXid = xlog_checkpoint.oldestXid;
|
||||
}
|
||||
trace!(
|
||||
"xlog_checkpoint.oldestActiveXid={}, checkpoint.oldestActiveXid={}",
|
||||
xlog_checkpoint.oldestActiveXid,
|
||||
self.checkpoint.oldestActiveXid
|
||||
);
|
||||
|
||||
// A shutdown checkpoint has `oldestActiveXid == InvalidTransactionid`,
|
||||
// because at shutdown, all in-progress transactions will implicitly
|
||||
// end. Postgres startup code knows that, and allows hot standby to start
|
||||
// immediately from a shutdown checkpoint.
|
||||
//
|
||||
// In Neon, Postgres hot standby startup always behaves as if starting from
|
||||
// an online checkpoint. It needs a valid `oldestActiveXid` value, so
|
||||
// instead of overwriting self.checkpoint.oldestActiveXid with
|
||||
// InvalidTransactionid from the checkpoint WAL record, update it to a
|
||||
// proper value, knowing that there are no in-progress transactions at this
|
||||
// point, except for prepared transactions.
|
||||
//
|
||||
// See also the neon code changes in the InitWalRecovery() function.
|
||||
if xlog_checkpoint.oldestActiveXid == pg_constants::INVALID_TRANSACTION_ID
|
||||
&& info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
|
||||
{
|
||||
let mut oldest_active_xid = self.checkpoint.nextXid.value as u32;
|
||||
for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
|
||||
if (xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
|
||||
oldest_active_xid = xid;
|
||||
}
|
||||
}
|
||||
self.checkpoint.oldestActiveXid = oldest_active_xid;
|
||||
} else {
|
||||
self.checkpoint.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
|
||||
}
|
||||
|
||||
// Write a new checkpoint key-value pair on every checkpoint record, even
|
||||
// if nothing really changed. Not strictly required, but it seems nice to
|
||||
// have some trace of the checkpoint records in the layer files at the same
|
||||
// LSNs.
|
||||
self.checkpoint_modified = true;
|
||||
}
|
||||
}
|
||||
pg_constants::RM_LOGICALMSG_ID => {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
@@ -438,11 +424,7 @@ impl WalIngest {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
if info == pg_constants::XLOG_RUNNING_XACTS {
|
||||
let xlrec = crate::walrecord::XlRunningXacts::decode(&mut buf);
|
||||
|
||||
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
|
||||
cp.oldestActiveXid = xlrec.oldest_running_xid;
|
||||
});
|
||||
|
||||
self.checkpoint.oldestActiveXid = xlrec.oldest_running_xid;
|
||||
self.checkpoint_modified = true;
|
||||
}
|
||||
}
|
||||
@@ -557,7 +539,7 @@ impl WalIngest {
|
||||
&& blk.has_image
|
||||
&& decoded.xl_rmid == pg_constants::RM_XLOG_ID
|
||||
&& (decoded.xl_info == pg_constants::XLOG_FPI
|
||||
|| decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
|
||||
|| decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
|
||||
// compression of WAL is not yet supported: fall back to storing the original WAL record
|
||||
&& !postgres_ffi::bkpimage_is_compressed(blk.bimg_info, modification.tline.pg_version)
|
||||
// do not materialize null pages because them most likely be soon replaced with real data
|
||||
@@ -1260,17 +1242,12 @@ impl WalIngest {
|
||||
fn warn_on_ingest_lag(
|
||||
&mut self,
|
||||
conf: &crate::config::PageServerConf,
|
||||
wal_timestamp: TimestampTz,
|
||||
wal_timestmap: TimestampTz,
|
||||
) {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
let now = SystemTime::now();
|
||||
let rate_limits = &mut self.warn_ingest_lag;
|
||||
|
||||
let ts = enum_pgversion_dispatch!(&self.checkpoint, CheckPoint, _cp, {
|
||||
pgv::xlog_utils::try_from_pg_timestamp(wal_timestamp)
|
||||
});
|
||||
|
||||
match ts {
|
||||
match try_from_pg_timestamp(wal_timestmap) {
|
||||
Ok(ts) => {
|
||||
match now.duration_since(ts) {
|
||||
Ok(lag) => {
|
||||
@@ -1280,7 +1257,7 @@ impl WalIngest {
|
||||
warn!(%rate_limit_stats, %lag, "ingesting record with timestamp lagging more than wait_lsn_timeout");
|
||||
})
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
let delta_t = e.duration();
|
||||
// determined by prod victoriametrics query: 1000 * (timestamp(node_time_seconds{neon_service="pageserver"}) - node_time_seconds)
|
||||
@@ -1294,6 +1271,7 @@ impl WalIngest {
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
Err(error) => {
|
||||
rate_limits.timestamp_invalid_msg_ratelimit.call2(|rate_limit_stats| {
|
||||
@@ -1401,17 +1379,14 @@ impl WalIngest {
|
||||
// truncated, but a checkpoint record with the updated values isn't written until
|
||||
// later. In Neon, a server can start at any LSN, not just on a checkpoint record,
|
||||
// so we keep the oldestXid and oldestXidDB up-to-date.
|
||||
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
|
||||
cp.oldestXid = xlrec.oldest_xid;
|
||||
cp.oldestXidDB = xlrec.oldest_xid_db;
|
||||
});
|
||||
self.checkpoint.oldestXid = xlrec.oldest_xid;
|
||||
self.checkpoint.oldestXidDB = xlrec.oldest_xid_db;
|
||||
self.checkpoint_modified = true;
|
||||
|
||||
// TODO Treat AdvanceOldestClogXid() or write a comment why we don't need it
|
||||
|
||||
let latest_page_number =
|
||||
enum_pgversion_dispatch!(self.checkpoint, CheckPoint, cp, { cp.nextXid.value }) as u32
|
||||
/ pg_constants::CLOG_XACTS_PER_PAGE;
|
||||
self.checkpoint.nextXid.value as u32 / pg_constants::CLOG_XACTS_PER_PAGE;
|
||||
|
||||
// Now delete all segments containing pages between xlrec.pageno
|
||||
// and latest_page_number.
|
||||
@@ -1419,9 +1394,7 @@ impl WalIngest {
|
||||
// First, make an important safety check:
|
||||
// the current endpoint page must not be eligible for removal.
|
||||
// See SimpleLruTruncate() in slru.c
|
||||
if dispatch_pgversion!(modification.tline.pg_version, {
|
||||
pgv::nonrelfile_utils::clogpage_precedes(latest_page_number, xlrec.pageno)
|
||||
}) {
|
||||
if clogpage_precedes(latest_page_number, xlrec.pageno) {
|
||||
info!("could not truncate directory pg_xact apparent wraparound");
|
||||
return Ok(());
|
||||
}
|
||||
@@ -1438,12 +1411,7 @@ impl WalIngest {
|
||||
.await?
|
||||
{
|
||||
let segpage = segno * pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
|
||||
let may_delete = dispatch_pgversion!(modification.tline.pg_version, {
|
||||
pgv::nonrelfile_utils::slru_may_delete_clogsegment(segpage, xlrec.pageno)
|
||||
});
|
||||
|
||||
if may_delete {
|
||||
if slru_may_delete_clogsegment(segpage, xlrec.pageno) {
|
||||
modification
|
||||
.drop_slru_segment(SlruKind::Clog, segno, ctx)
|
||||
.await?;
|
||||
@@ -1562,23 +1530,14 @@ impl WalIngest {
|
||||
xlrec: &XlMultiXactTruncate,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<()> {
|
||||
let (maxsegment, startsegment, endsegment) =
|
||||
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
|
||||
cp.oldestMulti = xlrec.end_trunc_off;
|
||||
cp.oldestMultiDB = xlrec.oldest_multi_db;
|
||||
let maxsegment: i32 = pgv::nonrelfile_utils::mx_offset_to_member_segment(
|
||||
pg_constants::MAX_MULTIXACT_OFFSET,
|
||||
);
|
||||
let startsegment: i32 =
|
||||
pgv::nonrelfile_utils::mx_offset_to_member_segment(xlrec.start_trunc_memb);
|
||||
let endsegment: i32 =
|
||||
pgv::nonrelfile_utils::mx_offset_to_member_segment(xlrec.end_trunc_memb);
|
||||
(maxsegment, startsegment, endsegment)
|
||||
});
|
||||
|
||||
self.checkpoint.oldestMulti = xlrec.end_trunc_off;
|
||||
self.checkpoint.oldestMultiDB = xlrec.oldest_multi_db;
|
||||
self.checkpoint_modified = true;
|
||||
|
||||
// PerformMembersTruncation
|
||||
let maxsegment: i32 = mx_offset_to_member_segment(pg_constants::MAX_MULTIXACT_OFFSET);
|
||||
let startsegment: i32 = mx_offset_to_member_segment(xlrec.start_trunc_memb);
|
||||
let endsegment: i32 = mx_offset_to_member_segment(xlrec.end_trunc_memb);
|
||||
let mut segment: i32 = startsegment;
|
||||
|
||||
// Delete all the segments except the last one. The last segment can still
|
||||
@@ -1852,23 +1811,11 @@ mod tests {
|
||||
// TODO
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_zeroed_checkpoint_decodes_correctly() -> Result<()> {
|
||||
for i in 14..=16 {
|
||||
dispatch_pgversion!(i, {
|
||||
pgv::CheckPoint::decode(&pgv::ZERO_CHECKPOINT)?;
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
static ZERO_CHECKPOINT: Bytes = Bytes::from_static(&[0u8; SIZEOF_CHECKPOINT]);
|
||||
|
||||
async fn init_walingest_test(tline: &Timeline, ctx: &RequestContext) -> Result<WalIngest> {
|
||||
let mut m = tline.begin_modification(Lsn(0x10));
|
||||
m.put_checkpoint(dispatch_pgversion!(
|
||||
tline.pg_version,
|
||||
pgv::ZERO_CHECKPOINT.clone()
|
||||
))?;
|
||||
m.put_checkpoint(ZERO_CHECKPOINT.clone())?;
|
||||
m.put_relmap_file(0, 111, Bytes::from(""), ctx).await?; // dummy relmapper file
|
||||
m.commit(ctx).await?;
|
||||
let walingest = WalIngest::new(tline, Lsn(0x10), ctx).await?;
|
||||
|
||||
@@ -598,15 +598,15 @@ mod tests {
|
||||
assert_eq!(
|
||||
file_stats,
|
||||
[
|
||||
(1312632, 3, 6000),
|
||||
(1312621, 3, 6000),
|
||||
(1312680, 3, 6000),
|
||||
(1312637, 3, 6000),
|
||||
(1312773, 3, 6000),
|
||||
(1312610, 3, 6000),
|
||||
(1312404, 3, 6000),
|
||||
(1312639, 3, 6000),
|
||||
(437848, 1, 2000)
|
||||
(1315874, 3, 6000),
|
||||
(1315867, 3, 6000),
|
||||
(1315927, 3, 6000),
|
||||
(1315884, 3, 6000),
|
||||
(1316014, 3, 6000),
|
||||
(1315856, 3, 6000),
|
||||
(1315648, 3, 6000),
|
||||
(1315884, 3, 6000),
|
||||
(438913, 1, 2000)
|
||||
]
|
||||
);
|
||||
|
||||
@@ -638,11 +638,11 @@ mod tests {
|
||||
assert_eq!(
|
||||
file_stats,
|
||||
[
|
||||
(1203465, 5, 10000),
|
||||
(1203189, 5, 10000),
|
||||
(1203490, 5, 10000),
|
||||
(1203475, 5, 10000),
|
||||
(1203729, 5, 10000)
|
||||
(1208861, 5, 10000),
|
||||
(1208592, 5, 10000),
|
||||
(1208885, 5, 10000),
|
||||
(1208873, 5, 10000),
|
||||
(1209128, 5, 10000)
|
||||
]
|
||||
);
|
||||
|
||||
@@ -667,15 +667,15 @@ mod tests {
|
||||
assert_eq!(
|
||||
file_stats,
|
||||
[
|
||||
(1312632, 3, 6000),
|
||||
(1312621, 3, 6000),
|
||||
(1312680, 3, 6000),
|
||||
(1312637, 3, 6000),
|
||||
(1312773, 3, 6000),
|
||||
(1312610, 3, 6000),
|
||||
(1312404, 3, 6000),
|
||||
(1312639, 3, 6000),
|
||||
(437848, 1, 2000)
|
||||
(1315874, 3, 6000),
|
||||
(1315867, 3, 6000),
|
||||
(1315927, 3, 6000),
|
||||
(1315884, 3, 6000),
|
||||
(1316014, 3, 6000),
|
||||
(1315856, 3, 6000),
|
||||
(1315648, 3, 6000),
|
||||
(1315884, 3, 6000),
|
||||
(438913, 1, 2000)
|
||||
]
|
||||
);
|
||||
|
||||
@@ -712,7 +712,7 @@ mod tests {
|
||||
// files are smaller than the size threshold, but they took too long to fill so were flushed early
|
||||
assert_eq!(
|
||||
file_stats,
|
||||
[(657696, 2, 3001), (657410, 2, 3000), (657206, 2, 2999)]
|
||||
[(659836, 2, 3001), (659550, 2, 3000), (659346, 2, 2999)]
|
||||
);
|
||||
|
||||
tmpdir.close().unwrap();
|
||||
|
||||
@@ -18,7 +18,8 @@ Prerequisites:
|
||||
|
||||
Regression tests are in the 'regress' directory. They can be run in
|
||||
parallel to minimize total runtime. Most regression test sets up their
|
||||
environment with its own pageservers and safekeepers.
|
||||
environment with its own pageservers and safekeepers (but see
|
||||
`TEST_SHARED_FIXTURES`).
|
||||
|
||||
'pg_clients' contains tests for connecting with various client
|
||||
libraries. Each client test uses a Dockerfile that pulls an image that
|
||||
@@ -73,6 +74,7 @@ This is used to construct full path to the postgres binaries.
|
||||
Format is 2-digit major version nubmer, i.e. `DEFAULT_PG_VERSION=16`
|
||||
`TEST_OUTPUT`: Set the directory where test state and test output files
|
||||
should go.
|
||||
`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
|
||||
`RUST_LOG`: logging configuration to pass into Neon CLI
|
||||
|
||||
Useful parameters and commands:
|
||||
@@ -257,8 +259,11 @@ compute Postgres nodes. The connections between them can be configured to use JW
|
||||
authentication tokens, and some other configuration options can be tweaked too.
|
||||
|
||||
The easiest way to get access to a Neon Environment is by using the `neon_simple_env`
|
||||
fixture. For convenience, there is a branch called `main` in environments created with
|
||||
'neon_simple_env', ready to be used in the test.
|
||||
fixture. The 'simple' env may be shared across multiple tests, so don't shut down the nodes
|
||||
or make other destructive changes in that environment. Also don't assume that
|
||||
there are no tenants or branches or data in the cluster. For convenience, there is a
|
||||
branch called `empty`, though. The convention is to create a test-specific branch of
|
||||
that and load any test data there, instead of the 'main' branch.
|
||||
|
||||
For more complicated cases, you can build a custom Neon Environment, with the `neon_env`
|
||||
fixture:
|
||||
|
||||
@@ -10,8 +10,4 @@ pytest_plugins = (
|
||||
"fixtures.compare_fixtures",
|
||||
"fixtures.slow",
|
||||
"fixtures.flaky",
|
||||
"fixtures.shared_fixtures",
|
||||
"fixtures.function.neon_storage",
|
||||
"fixtures.session.neon_storage",
|
||||
"fixtures.session.s3",
|
||||
)
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
from typing import Iterator, Optional, Dict, Any
|
||||
|
||||
import pytest
|
||||
from _pytest.fixtures import FixtureRequest
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.shared_fixtures import TTenant, TTimeline
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def tenant_config() -> Dict[str, Any]:
|
||||
return dict()
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def tenant(
|
||||
neon_shared_env: NeonEnv,
|
||||
request: FixtureRequest,
|
||||
tenant_config: Optional[Dict[str, Any]] = None,
|
||||
) -> Iterator[TTenant]:
|
||||
tenant = TTenant(env=neon_shared_env, name=request.node.name, config=tenant_config)
|
||||
yield tenant
|
||||
tenant.shutdown_resources()
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def timeline(
|
||||
tenant: TTenant,
|
||||
) -> Iterator[TTimeline]:
|
||||
timeline = tenant.default_timeline
|
||||
yield timeline
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def exclusive_tenant(
|
||||
neon_env: NeonEnv,
|
||||
request: FixtureRequest,
|
||||
tenant_config: Optional[Dict[str, Any]] = None,
|
||||
) -> Iterator[TTenant]:
|
||||
tenant = TTenant(env=neon_env, name=request.node.name, config=tenant_config)
|
||||
yield tenant
|
||||
tenant.shutdown_resources()
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def exclusive_timeline(
|
||||
exclusive_tenant: TTenant,
|
||||
) -> Iterator[TTimeline]:
|
||||
timeline = exclusive_tenant.default_timeline
|
||||
yield timeline
|
||||
@@ -57,6 +57,7 @@ from _pytest.fixtures import FixtureRequest
|
||||
from psycopg2.extensions import connection as PgConnection
|
||||
from psycopg2.extensions import cursor as PgCursor
|
||||
from psycopg2.extensions import make_dsn, parse_dsn
|
||||
from typing_extensions import Literal
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from fixtures import overlayfs
|
||||
@@ -93,6 +94,7 @@ from fixtures.utils import (
|
||||
allure_add_grafana_links,
|
||||
allure_attach_from_dir,
|
||||
assert_no_errors,
|
||||
get_self_dir,
|
||||
print_gc_result,
|
||||
subprocess_capture,
|
||||
wait_until,
|
||||
@@ -125,6 +127,149 @@ Env = Dict[str, str]
|
||||
DEFAULT_OUTPUT_DIR: str = "test_output"
|
||||
DEFAULT_BRANCH_NAME: str = "main"
|
||||
|
||||
BASE_PORT: int = 15000
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def base_dir() -> Iterator[Path]:
|
||||
# find the base directory (currently this is the git root)
|
||||
base_dir = get_self_dir().parent.parent
|
||||
log.info(f"base_dir is {base_dir}")
|
||||
|
||||
yield base_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def neon_binpath(base_dir: Path, build_type: str) -> Iterator[Path]:
|
||||
if os.getenv("REMOTE_ENV"):
|
||||
# we are in remote env and do not have neon binaries locally
|
||||
# this is the case for benchmarks run on self-hosted runner
|
||||
return
|
||||
|
||||
# Find the neon binaries.
|
||||
if env_neon_bin := os.environ.get("NEON_BIN"):
|
||||
binpath = Path(env_neon_bin)
|
||||
else:
|
||||
binpath = base_dir / "target" / build_type
|
||||
log.info(f"neon_binpath is {binpath}")
|
||||
|
||||
if not (binpath / "pageserver").exists():
|
||||
raise Exception(f"neon binaries not found at '{binpath}'")
|
||||
|
||||
yield binpath
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def pg_distrib_dir(base_dir: Path) -> Iterator[Path]:
|
||||
if env_postgres_bin := os.environ.get("POSTGRES_DISTRIB_DIR"):
|
||||
distrib_dir = Path(env_postgres_bin).resolve()
|
||||
else:
|
||||
distrib_dir = base_dir / "pg_install"
|
||||
|
||||
log.info(f"pg_distrib_dir is {distrib_dir}")
|
||||
yield distrib_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def top_output_dir(base_dir: Path) -> Iterator[Path]:
|
||||
# Compute the top-level directory for all tests.
|
||||
if env_test_output := os.environ.get("TEST_OUTPUT"):
|
||||
output_dir = Path(env_test_output).resolve()
|
||||
else:
|
||||
output_dir = base_dir / DEFAULT_OUTPUT_DIR
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
log.info(f"top_output_dir is {output_dir}")
|
||||
yield output_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def versioned_pg_distrib_dir(pg_distrib_dir: Path, pg_version: PgVersion) -> Iterator[Path]:
|
||||
versioned_dir = pg_distrib_dir / pg_version.v_prefixed
|
||||
|
||||
psql_bin_path = versioned_dir / "bin/psql"
|
||||
postgres_bin_path = versioned_dir / "bin/postgres"
|
||||
|
||||
if os.getenv("REMOTE_ENV"):
|
||||
# When testing against a remote server, we only need the client binary.
|
||||
if not psql_bin_path.exists():
|
||||
raise Exception(f"psql not found at '{psql_bin_path}'")
|
||||
else:
|
||||
if not postgres_bin_path.exists():
|
||||
raise Exception(f"postgres not found at '{postgres_bin_path}'")
|
||||
|
||||
log.info(f"versioned_pg_distrib_dir is {versioned_dir}")
|
||||
yield versioned_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def neon_api_key() -> str:
|
||||
api_key = os.getenv("NEON_API_KEY")
|
||||
if not api_key:
|
||||
raise AssertionError("Set the NEON_API_KEY environment variable")
|
||||
|
||||
return api_key
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def neon_api_base_url() -> str:
|
||||
return os.getenv("NEON_API_BASE_URL", "https://console-stage.neon.build/api/v2")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def neon_api(neon_api_key: str, neon_api_base_url: str) -> NeonAPI:
|
||||
return NeonAPI(neon_api_key, neon_api_base_url)
|
||||
|
||||
|
||||
def shareable_scope(fixture_name: str, config: Config) -> Literal["session", "function"]:
|
||||
"""Return either session of function scope, depending on TEST_SHARED_FIXTURES envvar.
|
||||
|
||||
This function can be used as a scope like this:
|
||||
@pytest.fixture(scope=shareable_scope)
|
||||
def myfixture(...)
|
||||
...
|
||||
"""
|
||||
scope: Literal["session", "function"]
|
||||
|
||||
if os.environ.get("TEST_SHARED_FIXTURES") is None:
|
||||
# Create the environment in the per-test output directory
|
||||
scope = "function"
|
||||
elif (
|
||||
os.environ.get("BUILD_TYPE") is not None
|
||||
and os.environ.get("DEFAULT_PG_VERSION") is not None
|
||||
):
|
||||
scope = "session"
|
||||
else:
|
||||
pytest.fail(
|
||||
"Shared environment(TEST_SHARED_FIXTURES) requires BUILD_TYPE and DEFAULT_PG_VERSION to be set",
|
||||
pytrace=False,
|
||||
)
|
||||
|
||||
return scope
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def worker_port_num():
|
||||
return (32768 - BASE_PORT) // int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", "1"))
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def worker_seq_no(worker_id: str) -> int:
|
||||
# worker_id is a pytest-xdist fixture
|
||||
# it can be master or gw<number>
|
||||
# parse it to always get a number
|
||||
if worker_id == "master":
|
||||
return 0
|
||||
assert worker_id.startswith("gw")
|
||||
return int(worker_id[2:])
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def worker_base_port(worker_seq_no: int, worker_port_num: int) -> int:
|
||||
# so we divide ports in ranges of ports
|
||||
# so workers have disjoint set of ports for services
|
||||
return BASE_PORT + worker_seq_no * worker_port_num
|
||||
|
||||
|
||||
def get_dir_size(path: str) -> int:
|
||||
"""Return size in bytes."""
|
||||
@@ -136,6 +281,11 @@ def get_dir_size(path: str) -> int:
|
||||
return totalbytes
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def port_distributor(worker_base_port: int, worker_port_num: int) -> PortDistributor:
|
||||
return PortDistributor(base_port=worker_base_port, port_number=worker_port_num)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def default_broker(
|
||||
port_distributor: PortDistributor,
|
||||
@@ -151,6 +301,18 @@ def default_broker(
|
||||
broker.stop()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def run_id() -> Iterator[uuid.UUID]:
|
||||
yield uuid.uuid4()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def mock_s3_server(port_distributor: PortDistributor) -> Iterator[MockS3Server]:
|
||||
mock_s3_server = MockS3Server(port_distributor.get_port())
|
||||
yield mock_s3_server
|
||||
mock_s3_server.kill()
|
||||
|
||||
|
||||
class PgProtocol:
|
||||
"""Reusable connection logic"""
|
||||
|
||||
@@ -350,7 +512,6 @@ class NeonEnvBuilder:
|
||||
safekeeper_extra_opts: Optional[list[str]] = None,
|
||||
storage_controller_port_override: Optional[int] = None,
|
||||
pageserver_io_buffer_alignment: Optional[int] = None,
|
||||
shared: Optional[bool] = False,
|
||||
):
|
||||
self.repo_dir = repo_dir
|
||||
self.rust_log_override = rust_log_override
|
||||
@@ -407,8 +568,8 @@ class NeonEnvBuilder:
|
||||
|
||||
self.pageserver_io_buffer_alignment = pageserver_io_buffer_alignment
|
||||
|
||||
assert (
|
||||
test_name.startswith("test_") or shared
|
||||
assert test_name.startswith(
|
||||
"test_"
|
||||
), "Unexpectedly instantiated from outside a test function"
|
||||
self.test_name = test_name
|
||||
|
||||
@@ -1270,8 +1431,8 @@ class NeonEnv:
|
||||
return "ep-" + str(self.endpoint_counter)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def neon_simple_env(
|
||||
@pytest.fixture(scope=shareable_scope)
|
||||
def _shared_simple_env(
|
||||
request: FixtureRequest,
|
||||
pytestconfig: Config,
|
||||
port_distributor: PortDistributor,
|
||||
@@ -1289,13 +1450,19 @@ def neon_simple_env(
|
||||
pageserver_io_buffer_alignment: Optional[int],
|
||||
) -> Iterator[NeonEnv]:
|
||||
"""
|
||||
Simple Neon environment, with no authentication and no safekeepers.
|
||||
# Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES
|
||||
is set, this is shared by all tests using `neon_simple_env`.
|
||||
|
||||
This fixture will use RemoteStorageKind.LOCAL_FS with pageserver.
|
||||
"""
|
||||
|
||||
# Create the environment in the per-test output directory
|
||||
repo_dir = get_test_repo_dir(request, top_output_dir)
|
||||
if os.environ.get("TEST_SHARED_FIXTURES") is None:
|
||||
# Create the environment in the per-test output directory
|
||||
repo_dir = get_test_repo_dir(request, top_output_dir)
|
||||
else:
|
||||
# We're running shared fixtures. Share a single directory.
|
||||
repo_dir = top_output_dir / "shared_repo"
|
||||
shutil.rmtree(repo_dir, ignore_errors=True)
|
||||
|
||||
with NeonEnvBuilder(
|
||||
top_output_dir=top_output_dir,
|
||||
@@ -1317,22 +1484,25 @@ def neon_simple_env(
|
||||
) as builder:
|
||||
env = builder.init_start()
|
||||
|
||||
# For convenience in tests, create a branch from the freshly-initialized cluster.
|
||||
env.neon_cli.create_branch("empty", ancestor_branch_name=DEFAULT_BRANCH_NAME)
|
||||
|
||||
yield env
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def neon_endpoint(request: FixtureRequest, neon_shared_env: NeonEnv) -> Endpoint:
|
||||
neon_shared_env.neon_cli.create_branch(request.node.name)
|
||||
ep = neon_shared_env.endpoints.create_start(request.node.name)
|
||||
def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:
|
||||
"""
|
||||
Simple Neon environment, with no authentication and no safekeepers.
|
||||
|
||||
try:
|
||||
yield ep
|
||||
finally:
|
||||
if ep.is_running():
|
||||
try:
|
||||
ep.stop()
|
||||
except BaseException:
|
||||
pass
|
||||
If TEST_SHARED_FIXTURES environment variable is set, we reuse the same
|
||||
environment for all tests that use 'neon_simple_env', keeping the
|
||||
page server and safekeepers running. Any compute nodes are stopped after
|
||||
each the test, however.
|
||||
"""
|
||||
yield _shared_simple_env
|
||||
|
||||
_shared_simple_env.endpoints.stop_all()
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
@@ -4690,35 +4860,27 @@ def _get_test_dir(request: FixtureRequest, top_output_dir: Path, prefix: str) ->
|
||||
return test_dir
|
||||
|
||||
|
||||
def get_test_output_dir(
|
||||
request: FixtureRequest, top_output_dir: Path, prefix: Optional[str] = None
|
||||
) -> Path:
|
||||
def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
|
||||
"""
|
||||
The working directory for a test.
|
||||
"""
|
||||
return _get_test_dir(request, top_output_dir, prefix or "")
|
||||
return _get_test_dir(request, top_output_dir, "")
|
||||
|
||||
|
||||
def get_test_overlay_dir(
|
||||
request: FixtureRequest, top_output_dir: Path, prefix: Optional[str] = None
|
||||
) -> Path:
|
||||
def get_test_overlay_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
|
||||
"""
|
||||
Directory that contains `upperdir` and `workdir` for overlayfs mounts
|
||||
that a test creates. See `NeonEnvBuilder.overlay_mount`.
|
||||
"""
|
||||
return _get_test_dir(request, top_output_dir, f"overlay-{prefix or ''}")
|
||||
return _get_test_dir(request, top_output_dir, "overlay-")
|
||||
|
||||
|
||||
def get_shared_snapshot_dir_path(
|
||||
top_output_dir: Path, snapshot_name: str, prefix: Optional[str] = None
|
||||
) -> Path:
|
||||
return top_output_dir / f"{prefix or ''}shared-snapshots" / snapshot_name
|
||||
def get_shared_snapshot_dir_path(top_output_dir: Path, snapshot_name: str) -> Path:
|
||||
return top_output_dir / "shared-snapshots" / snapshot_name
|
||||
|
||||
|
||||
def get_test_repo_dir(
|
||||
request: FixtureRequest, top_output_dir: Path, prefix: Optional[str] = None
|
||||
) -> Path:
|
||||
return get_test_output_dir(request, top_output_dir, prefix or "") / "repo"
|
||||
def get_test_repo_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
|
||||
return get_test_output_dir(request, top_output_dir) / "repo"
|
||||
|
||||
|
||||
def pytest_addoption(parser: Parser):
|
||||
@@ -4736,7 +4898,14 @@ SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile( # type: ignore[type-arg]
|
||||
|
||||
|
||||
# This is autouse, so the test output directory always gets created, even
|
||||
# if a test doesn't put anything there.
|
||||
# if a test doesn't put anything there. It also solves a problem with the
|
||||
# neon_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it
|
||||
# creates the repo in the test output directory. But it cannot depend on
|
||||
# 'test_output_dir' fixture, because when TEST_SHARED_FIXTURES is not set,
|
||||
# it has 'session' scope and cannot access fixtures with 'function'
|
||||
# scope. So it uses the get_test_output_dir() function to get the path, and
|
||||
# this fixture ensures that the directory exists. That works because
|
||||
# 'autouse' fixtures are run before other fixtures.
|
||||
#
|
||||
# NB: we request the overlay dir fixture so the fixture does its cleanups
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
@@ -5083,7 +5252,7 @@ def tenant_get_shards(
|
||||
return [(TenantShardId(tenant_id, 0, 0), override_pageserver or env.pageserver)]
|
||||
|
||||
|
||||
def wait_replica_caughtup(primary: PgProtocol, secondary: PgProtocol):
|
||||
def wait_replica_caughtup(primary: Endpoint, secondary: Endpoint):
|
||||
primary_lsn = Lsn(
|
||||
primary.safe_psql_scalar("SELECT pg_current_wal_flush_lsn()", log_query=False)
|
||||
)
|
||||
|
||||
@@ -14,60 +14,32 @@ Dynamically parametrize tests by different parameters
|
||||
"""
|
||||
|
||||
|
||||
def get_pgversions():
|
||||
if (v := os.getenv("DEFAULT_PG_VERSION")) is None:
|
||||
pg_versions = [version for version in PgVersion if version != PgVersion.NOT_SET]
|
||||
else:
|
||||
pg_versions = [PgVersion(v)]
|
||||
|
||||
return pg_versions
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def pg_version() -> Optional[PgVersion]:
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
scope="session",
|
||||
autouse=True,
|
||||
params=get_pgversions(),
|
||||
ids=lambda v: f"pg{v}",
|
||||
)
|
||||
def pg_version(request) -> Optional[PgVersion]:
|
||||
return request.param
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def build_type() -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def get_buildtypes():
|
||||
if (bt := os.getenv("BUILD_TYPE")) is None:
|
||||
build_types = ["debug", "release"]
|
||||
else:
|
||||
build_types = [bt.lower()]
|
||||
|
||||
return build_types
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
scope="session",
|
||||
autouse=True,
|
||||
params=get_buildtypes(),
|
||||
ids=lambda t: f"{t}",
|
||||
)
|
||||
def build_type(request) -> Optional[str]:
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def platform() -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def pageserver_virtual_file_io_engine() -> Optional[str]:
|
||||
return os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def pageserver_io_buffer_alignment() -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def pageserver_aux_file_policy() -> Optional[AuxFileStore]:
|
||||
return None
|
||||
|
||||
@@ -81,12 +53,26 @@ def get_pageserver_default_tenant_config_compaction_algorithm() -> Optional[Dict
|
||||
return v
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def pageserver_default_tenant_config_compaction_algorithm() -> Optional[Dict[str, Any]]:
|
||||
return get_pageserver_default_tenant_config_compaction_algorithm()
|
||||
|
||||
|
||||
def pytest_generate_tests(metafunc: Metafunc):
|
||||
if (bt := os.getenv("BUILD_TYPE")) is None:
|
||||
build_types = ["debug", "release"]
|
||||
else:
|
||||
build_types = [bt.lower()]
|
||||
|
||||
metafunc.parametrize("build_type", build_types)
|
||||
|
||||
if (v := os.getenv("DEFAULT_PG_VERSION")) is None:
|
||||
pg_versions = [version for version in PgVersion if version != PgVersion.NOT_SET]
|
||||
else:
|
||||
pg_versions = [PgVersion(v)]
|
||||
|
||||
metafunc.parametrize("pg_version", pg_versions, ids=map(lambda v: f"pg{v}", pg_versions))
|
||||
|
||||
# A hacky way to parametrize tests only for `pageserver_virtual_file_io_engine=std-fs`
|
||||
# And do not change test name for default `pageserver_virtual_file_io_engine=tokio-epoll-uring` to keep tests statistics
|
||||
if (io_engine := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_ENGINE", "")) not in (
|
||||
@@ -103,7 +89,6 @@ def pytest_generate_tests(metafunc: Metafunc):
|
||||
"pageserver_default_tenant_config_compaction_algorithm",
|
||||
[explicit_default],
|
||||
ids=[explicit_default["kind"]],
|
||||
scope="session",
|
||||
)
|
||||
|
||||
# For performance tests, parametrize also by platform
|
||||
|
||||
@@ -1,303 +0,0 @@
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterator, Optional, cast
|
||||
|
||||
import pytest
|
||||
from _pytest.config import Config
|
||||
from _pytest.fixtures import FixtureRequest
|
||||
|
||||
from fixtures import overlayfs
|
||||
from fixtures.broker import NeonBroker
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_api import NeonAPI
|
||||
from fixtures.neon_fixtures import (
|
||||
DEFAULT_OUTPUT_DIR,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
get_test_output_dir,
|
||||
get_test_overlay_dir,
|
||||
get_test_repo_dir,
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.port_distributor import PortDistributor
|
||||
from fixtures.remote_storage import MockS3Server
|
||||
from fixtures.utils import AuxFileStore, allure_attach_from_dir, get_self_dir
|
||||
|
||||
BASE_PORT: int = 15000
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def base_dir() -> Iterator[Path]:
|
||||
# find the base directory (currently this is the git root)
|
||||
base_dir = get_self_dir().parent.parent
|
||||
log.info(f"base_dir is {base_dir}")
|
||||
|
||||
yield base_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def neon_binpath(base_dir: Path, build_type: str) -> Iterator[Path]:
|
||||
if os.getenv("REMOTE_ENV"):
|
||||
# we are in remote env and do not have neon binaries locally
|
||||
# this is the case for benchmarks run on self-hosted runner
|
||||
return
|
||||
|
||||
# Find the neon binaries.
|
||||
if env_neon_bin := os.environ.get("NEON_BIN"):
|
||||
binpath = Path(env_neon_bin)
|
||||
else:
|
||||
binpath = base_dir / "target" / build_type
|
||||
log.info(f"neon_binpath is {binpath}")
|
||||
|
||||
if not (binpath / "pageserver").exists():
|
||||
raise Exception(f"neon binaries not found at '{binpath}'")
|
||||
|
||||
yield binpath
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def pg_distrib_dir(base_dir: Path) -> Iterator[Path]:
|
||||
if env_postgres_bin := os.environ.get("POSTGRES_DISTRIB_DIR"):
|
||||
distrib_dir = Path(env_postgres_bin).resolve()
|
||||
else:
|
||||
distrib_dir = base_dir / "pg_install"
|
||||
|
||||
log.info(f"pg_distrib_dir is {distrib_dir}")
|
||||
yield distrib_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def top_output_dir(base_dir: Path) -> Iterator[Path]:
|
||||
# Compute the top-level directory for all tests.
|
||||
if env_test_output := os.environ.get("TEST_OUTPUT"):
|
||||
output_dir = Path(env_test_output).resolve()
|
||||
else:
|
||||
output_dir = base_dir / DEFAULT_OUTPUT_DIR
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
log.info(f"top_output_dir is {output_dir}")
|
||||
yield output_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def versioned_pg_distrib_dir(pg_distrib_dir: Path, pg_version: PgVersion) -> Iterator[Path]:
|
||||
versioned_dir = pg_distrib_dir / pg_version.v_prefixed
|
||||
|
||||
psql_bin_path = versioned_dir / "bin/psql"
|
||||
postgres_bin_path = versioned_dir / "bin/postgres"
|
||||
|
||||
if os.getenv("REMOTE_ENV"):
|
||||
# When testing against a remote server, we only need the client binary.
|
||||
if not psql_bin_path.exists():
|
||||
raise Exception(f"psql not found at '{psql_bin_path}'")
|
||||
else:
|
||||
if not postgres_bin_path.exists():
|
||||
raise Exception(f"postgres not found at '{postgres_bin_path}'")
|
||||
|
||||
log.info(f"versioned_pg_distrib_dir is {versioned_dir}")
|
||||
yield versioned_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def neon_api_key() -> str:
|
||||
api_key = os.getenv("NEON_API_KEY")
|
||||
if not api_key:
|
||||
raise AssertionError("Set the NEON_API_KEY environment variable")
|
||||
|
||||
return api_key
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def neon_api_base_url() -> str:
|
||||
return os.getenv("NEON_API_BASE_URL", "https://console-stage.neon.build/api/v2")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def neon_api(neon_api_key: str, neon_api_base_url: str) -> NeonAPI:
|
||||
return NeonAPI(neon_api_key, neon_api_base_url)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def worker_port_num():
|
||||
return (32768 - BASE_PORT) // int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", "1"))
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def worker_seq_no(worker_id: str) -> int:
|
||||
# worker_id is a pytest-xdist fixture
|
||||
# it can be master or gw<number>
|
||||
# parse it to always get a number
|
||||
if worker_id == "master":
|
||||
return 0
|
||||
assert worker_id.startswith("gw")
|
||||
return int(worker_id[2:])
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def worker_base_port(worker_seq_no: int, worker_port_num: int) -> int:
|
||||
# so we divide ports in ranges of ports
|
||||
# so workers have disjoint set of ports for services
|
||||
return BASE_PORT + worker_seq_no * worker_port_num
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def port_distributor(worker_base_port: int, worker_port_num: int) -> PortDistributor:
|
||||
return PortDistributor(base_port=worker_base_port, port_number=worker_port_num)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def shared_broker(
|
||||
port_distributor: PortDistributor,
|
||||
shared_test_output_dir: Path,
|
||||
neon_binpath: Path,
|
||||
) -> Iterator[NeonBroker]:
|
||||
# multiple pytest sessions could get launched in parallel, get them different ports/datadirs
|
||||
client_port = port_distributor.get_port()
|
||||
broker_logfile = shared_test_output_dir / "repo" / "storage_broker.log"
|
||||
|
||||
broker = NeonBroker(logfile=broker_logfile, port=client_port, neon_binpath=neon_binpath)
|
||||
yield broker
|
||||
broker.stop()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def run_id() -> Iterator[uuid.UUID]:
|
||||
yield uuid.uuid4()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def neon_shared_env(
|
||||
pytestconfig: Config,
|
||||
port_distributor: PortDistributor,
|
||||
mock_s3_server: MockS3Server,
|
||||
shared_broker: NeonBroker,
|
||||
run_id: uuid.UUID,
|
||||
top_output_dir: Path,
|
||||
shared_test_output_dir: Path,
|
||||
neon_binpath: Path,
|
||||
build_type: str,
|
||||
pg_distrib_dir: Path,
|
||||
pg_version: PgVersion,
|
||||
pageserver_virtual_file_io_engine: str,
|
||||
pageserver_aux_file_policy: Optional[AuxFileStore],
|
||||
pageserver_default_tenant_config_compaction_algorithm: Optional[Dict[str, Any]],
|
||||
pageserver_io_buffer_alignment: Optional[int],
|
||||
request: FixtureRequest,
|
||||
) -> Iterator[NeonEnv]:
|
||||
"""
|
||||
Simple Neon environment, with no authentication and no safekeepers.
|
||||
|
||||
This fixture will use RemoteStorageKind.LOCAL_FS with pageserver.
|
||||
"""
|
||||
|
||||
prefix = f"shared[{build_type}-{pg_version.v_prefixed}]-"
|
||||
|
||||
# Create the environment in the per-test output directory
|
||||
repo_dir = get_test_repo_dir(request, top_output_dir, prefix)
|
||||
|
||||
with NeonEnvBuilder(
|
||||
top_output_dir=top_output_dir,
|
||||
repo_dir=repo_dir,
|
||||
port_distributor=port_distributor,
|
||||
broker=shared_broker,
|
||||
mock_s3_server=mock_s3_server,
|
||||
neon_binpath=neon_binpath,
|
||||
pg_distrib_dir=pg_distrib_dir,
|
||||
pg_version=pg_version,
|
||||
run_id=run_id,
|
||||
preserve_database_files=cast(bool, pytestconfig.getoption("--preserve-database-files")),
|
||||
test_name=f"{prefix}{request.node.name}",
|
||||
test_output_dir=shared_test_output_dir,
|
||||
pageserver_virtual_file_io_engine=pageserver_virtual_file_io_engine,
|
||||
pageserver_aux_file_policy=pageserver_aux_file_policy,
|
||||
pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm,
|
||||
pageserver_io_buffer_alignment=pageserver_io_buffer_alignment,
|
||||
shared=True,
|
||||
) as builder:
|
||||
env = builder.init_start()
|
||||
|
||||
yield env
|
||||
|
||||
|
||||
# This is autouse, so the test output directory always gets created, even
|
||||
# if a test doesn't put anything there.
|
||||
#
|
||||
# NB: we request the overlay dir fixture so the fixture does its cleanups
|
||||
@pytest.fixture(scope="session")
|
||||
def shared_test_output_dir(
|
||||
request: FixtureRequest,
|
||||
pg_version: PgVersion,
|
||||
build_type: str,
|
||||
top_output_dir: Path,
|
||||
shared_test_overlay_dir: Path,
|
||||
) -> Iterator[Path]:
|
||||
"""Create the working directory for shared tests."""
|
||||
|
||||
prefix = f"shared[{build_type}-{pg_version.v_prefixed}]-"
|
||||
|
||||
# one directory per test
|
||||
test_dir = get_test_output_dir(request, top_output_dir, prefix)
|
||||
|
||||
log.info(f"test_output_dir is {test_dir}")
|
||||
shutil.rmtree(test_dir, ignore_errors=True)
|
||||
test_dir.mkdir()
|
||||
|
||||
yield test_dir
|
||||
|
||||
# Allure artifacts creation might involve the creation of `.tar.zst` archives,
|
||||
# which aren't going to be used if Allure results collection is not enabled
|
||||
# (i.e. --alluredir is not set).
|
||||
# Skip `allure_attach_from_dir` in this case
|
||||
if not request.config.getoption("--alluredir"):
|
||||
return
|
||||
|
||||
preserve_database_files = False
|
||||
for k, v in request.node.user_properties:
|
||||
# NB: the neon_env_builder fixture uses this fixture (test_output_dir).
|
||||
# So, neon_env_builder's cleanup runs before here.
|
||||
# The cleanup propagates NeonEnvBuilder.preserve_database_files into this user property.
|
||||
if k == "preserve_database_files":
|
||||
assert isinstance(v, bool)
|
||||
preserve_database_files = v
|
||||
|
||||
allure_attach_from_dir(test_dir, preserve_database_files)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def shared_test_overlay_dir(request: FixtureRequest, top_output_dir: Path) -> Optional[Path]:
|
||||
"""
|
||||
Idempotently create a test's overlayfs mount state directory.
|
||||
If the functionality isn't enabled via env var, returns None.
|
||||
|
||||
The procedure cleans up after previous runs that were aborted (e.g. due to Ctrl-C, OOM kills, etc).
|
||||
"""
|
||||
|
||||
if os.getenv("NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS") is None:
|
||||
return None
|
||||
|
||||
overlay_dir = get_test_overlay_dir(request, top_output_dir)
|
||||
log.info(f"test_overlay_dir is {overlay_dir}")
|
||||
|
||||
overlay_dir.mkdir(exist_ok=True)
|
||||
# unmount stale overlayfs mounts which subdirectories of `overlay_dir/*` as the overlayfs `upperdir` and `workdir`
|
||||
for mountpoint in overlayfs.iter_mounts_beneath(get_test_output_dir(request, top_output_dir)):
|
||||
cmd = ["sudo", "umount", str(mountpoint)]
|
||||
log.info(
|
||||
f"Unmounting stale overlayfs mount probably created during earlier test run: {cmd}"
|
||||
)
|
||||
subprocess.run(cmd, capture_output=True, check=True)
|
||||
# the overlayfs `workdir`` is owned by `root`, shutil.rmtree won't work.
|
||||
cmd = ["sudo", "rm", "-rf", str(overlay_dir)]
|
||||
subprocess.run(cmd, capture_output=True, check=True)
|
||||
|
||||
overlay_dir.mkdir()
|
||||
|
||||
return overlay_dir
|
||||
|
||||
# no need to clean up anything: on clean shutdown,
|
||||
# NeonEnvBuilder.overlay_cleanup_teardown takes care of cleanup
|
||||
# and on unclean shutdown, this function will take care of it
|
||||
# on the next test run
|
||||
@@ -1,13 +0,0 @@
|
||||
from typing import Iterator
|
||||
|
||||
import pytest
|
||||
|
||||
from fixtures.port_distributor import PortDistributor
|
||||
from fixtures.remote_storage import MockS3Server
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def mock_s3_server(port_distributor: PortDistributor) -> Iterator[MockS3Server]:
|
||||
mock_s3_server = MockS3Server(port_distributor.get_port())
|
||||
yield mock_s3_server
|
||||
mock_s3_server.kill()
|
||||
@@ -1,348 +0,0 @@
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional, cast, List, Dict
|
||||
|
||||
import pytest
|
||||
|
||||
from fixtures.common_types import Lsn, TenantId, TimelineId
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import Endpoint, NeonEnv, PgProtocol, DEFAULT_BRANCH_NAME, tenant_get_shards, \
|
||||
check_restored_datadir_content
|
||||
from fixtures.pageserver.utils import wait_for_last_record_lsn
|
||||
from fixtures.safekeeper.utils import are_walreceivers_absent
|
||||
from fixtures.utils import wait_until
|
||||
|
||||
"""
|
||||
In this file most important resources are exposed as function-level fixtures
|
||||
that depend on session-level resources like pageservers and safekeepers.
|
||||
|
||||
The main rationale here is that we don't need to initialize a new SK/PS/etc
|
||||
every time we want to test something that has branching: we can just as well
|
||||
reuse still-available PageServers from other tests of the same kind.
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def shared_storage_repo_path(build_type: str, base_dir: Path) -> Path:
|
||||
return base_dir / f"shared[{build_type}]" / "repo"
|
||||
|
||||
|
||||
class TEndpoint(PgProtocol):
|
||||
def clear_shared_buffers(self, cursor: Optional[Any] = None):
|
||||
"""
|
||||
Best-effort way to clear postgres buffers. Pinned buffers will not be 'cleared.'
|
||||
|
||||
Might also clear LFC.
|
||||
"""
|
||||
if cursor is not None:
|
||||
cursor.execute("select clear_buffer_cache()")
|
||||
else:
|
||||
self.safe_psql("select clear_buffer_cache()")
|
||||
|
||||
_TEndpoint = Endpoint
|
||||
|
||||
|
||||
class TTimeline:
|
||||
__primary: Optional[_TEndpoint]
|
||||
__secondary: Optional[_TEndpoint]
|
||||
|
||||
def __init__(self, timeline_id: TimelineId, tenant: "TTenant", name: str):
|
||||
self.id = timeline_id
|
||||
self.tenant = tenant
|
||||
self.name = name
|
||||
self.__primary = None
|
||||
self.__secondary = None
|
||||
|
||||
@property
|
||||
def primary(self) -> TEndpoint:
|
||||
if self.__primary is None:
|
||||
self.__primary = cast(_TEndpoint, self.tenant.create_endpoint(
|
||||
name=self._get_full_endpoint_name("primary"),
|
||||
timeline_id=self.id,
|
||||
primary=True,
|
||||
running=True,
|
||||
))
|
||||
|
||||
return cast(TEndpoint, self.__primary)
|
||||
|
||||
def primary_with_config(self, config_lines: List[str], reboot: bool = False):
|
||||
if self.__primary is None:
|
||||
self.__primary = cast(_TEndpoint, self.tenant.create_endpoint(
|
||||
name=self._get_full_endpoint_name("primary"),
|
||||
timeline_id=self.id,
|
||||
primary=True,
|
||||
running=True,
|
||||
config_lines=config_lines,
|
||||
))
|
||||
else:
|
||||
self.__primary.config(config_lines)
|
||||
if reboot:
|
||||
if self.__primary.is_running():
|
||||
self.__primary.stop()
|
||||
self.__primary.start()
|
||||
|
||||
return cast(TTimeline, self.__primary)
|
||||
|
||||
@property
|
||||
def secondary(self) -> TEndpoint:
|
||||
if self.__secondary is None:
|
||||
self.__secondary = cast(_TEndpoint, self.tenant.create_endpoint(
|
||||
name=self._get_full_endpoint_name("secondary"),
|
||||
timeline_id=self.id,
|
||||
primary=False,
|
||||
running=True,
|
||||
))
|
||||
|
||||
return cast(TEndpoint, self.__secondary)
|
||||
|
||||
def secondary_with_config(self, config_lines: List[str], reboot: bool = False) -> TEndpoint:
|
||||
if self.__secondary is None:
|
||||
self.__secondary = cast(_TEndpoint, self.tenant.create_endpoint(
|
||||
name=self._get_full_endpoint_name("secondary"),
|
||||
timeline_id=self.id,
|
||||
primary=False,
|
||||
running=True,
|
||||
config_lines=config_lines,
|
||||
))
|
||||
else:
|
||||
self.__secondary.config(config_lines)
|
||||
if reboot:
|
||||
if self.__secondary.is_running():
|
||||
self.__secondary.stop()
|
||||
self.__secondary.start()
|
||||
|
||||
return cast(TEndpoint, self.__secondary)
|
||||
|
||||
def _get_full_endpoint_name(self, name) -> str:
|
||||
return f"{self.name}.{name}"
|
||||
|
||||
def create_branch(self, name: str, lsn: Optional[Lsn]) -> "TTimeline":
|
||||
return self.tenant.create_timeline(
|
||||
new_name=name,
|
||||
parent_name=self.name,
|
||||
branch_at=lsn,
|
||||
)
|
||||
|
||||
def stop_and_flush(self, endpoint: TEndpoint):
|
||||
self.tenant.stop_endpoint(endpoint)
|
||||
self.tenant.flush_timeline_data(self)
|
||||
|
||||
def checkpoint(self, **kwargs):
|
||||
self.tenant.checkpoint_timeline(self, **kwargs)
|
||||
|
||||
|
||||
class TTenant:
|
||||
"""
|
||||
An object representing a single test case on a shared pageserver.
|
||||
All operations here are safe practically safe.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
env: NeonEnv,
|
||||
name: str,
|
||||
config: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
self.id = TenantId.generate()
|
||||
self.timelines = []
|
||||
self.timeline_names = {}
|
||||
self.timeline_ids = {}
|
||||
self.name = name
|
||||
|
||||
# publicly inaccessible stuff, used during shutdown
|
||||
self.__endpoints: List[Endpoint] = []
|
||||
self.__env: NeonEnv = env
|
||||
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id=self.id,
|
||||
set_default=False,
|
||||
conf=config
|
||||
)
|
||||
|
||||
self.first_timeline_id = env.neon_cli.create_branch(
|
||||
new_branch_name=f"{self.name}:{DEFAULT_BRANCH_NAME}",
|
||||
ancestor_branch_name=DEFAULT_BRANCH_NAME,
|
||||
tenant_id=self.id,
|
||||
)
|
||||
|
||||
self._add_timeline(
|
||||
TTimeline(
|
||||
timeline_id=self.first_timeline_id,
|
||||
tenant=self,
|
||||
name=DEFAULT_BRANCH_NAME,
|
||||
)
|
||||
)
|
||||
|
||||
def _add_timeline(self, timeline: TTimeline):
|
||||
assert timeline.tenant == self
|
||||
assert timeline not in self.timelines
|
||||
assert timeline.id is not None
|
||||
|
||||
self.timelines.append(timeline)
|
||||
self.timeline_ids[timeline.id] = timeline
|
||||
|
||||
if timeline.name is not None:
|
||||
self.timeline_names[timeline.name] = timeline
|
||||
|
||||
def create_timeline(
|
||||
self,
|
||||
new_name: str,
|
||||
parent_name: Optional[str] = None,
|
||||
parent_id: Optional[TimelineId] = None,
|
||||
branch_at: Optional[Lsn] = None,
|
||||
) -> TTimeline:
|
||||
if parent_name is not None:
|
||||
pass
|
||||
elif parent_id is not None:
|
||||
assert parent_name is None
|
||||
parent = self.timeline_ids[parent_id]
|
||||
parent_name = parent.name
|
||||
else:
|
||||
raise LookupError("Timeline creation requires parent by either ID or name")
|
||||
|
||||
assert parent_name is not None
|
||||
|
||||
new_id = self.__env.neon_cli.create_branch(
|
||||
new_branch_name=f"{self.name}:{new_name}",
|
||||
ancestor_branch_name=f"{self.name}:{parent_name}",
|
||||
tenant_id=self.id,
|
||||
ancestor_start_lsn=branch_at,
|
||||
)
|
||||
|
||||
new_tl = TTimeline(
|
||||
timeline_id=new_id,
|
||||
tenant=self,
|
||||
name=new_name,
|
||||
)
|
||||
self._add_timeline(new_tl)
|
||||
|
||||
return new_tl
|
||||
|
||||
@property
|
||||
def default_timeline(self) -> TTimeline:
|
||||
return self.timeline_ids.get(self.first_timeline_id)
|
||||
|
||||
def start_endpoint(self, ep: TEndpoint):
|
||||
if ep not in self.__endpoints:
|
||||
return
|
||||
|
||||
ep = cast(Endpoint, ep)
|
||||
|
||||
if not ep.is_running():
|
||||
ep.start()
|
||||
|
||||
def stop_endpoint(self, ep: TEndpoint, mode: str = "fast"):
|
||||
if ep not in self.__endpoints:
|
||||
return
|
||||
|
||||
ep = cast(Endpoint, ep)
|
||||
|
||||
if ep.is_running():
|
||||
ep.stop(mode=mode)
|
||||
|
||||
def create_endpoint(
|
||||
self,
|
||||
name: str,
|
||||
timeline_id: TimelineId,
|
||||
primary: bool = True,
|
||||
running: bool = False,
|
||||
port: Optional[int] = None,
|
||||
http_port: Optional[int] = None,
|
||||
lsn: Optional[Lsn] = None,
|
||||
config_lines: Optional[List[str]] = None,
|
||||
) -> TEndpoint:
|
||||
endpoint: _TEndpoint
|
||||
|
||||
if port is None:
|
||||
port = self.__env.port_distributor.get_port()
|
||||
|
||||
if http_port is None:
|
||||
http_port = self.__env.port_distributor.get_port()
|
||||
|
||||
if running:
|
||||
endpoint = self.__env.endpoints.create_start(
|
||||
branch_name=self.timeline_ids[timeline_id].name,
|
||||
endpoint_id=f"{self.name}.{name}",
|
||||
tenant_id=self.id,
|
||||
lsn=lsn,
|
||||
hot_standby=not primary,
|
||||
config_lines=config_lines,
|
||||
)
|
||||
else:
|
||||
endpoint = self.__env.endpoints.create(
|
||||
branch_name=self.timeline_ids[timeline_id].name,
|
||||
endpoint_id=f"{self.name}.{name}",
|
||||
tenant_id=self.id,
|
||||
lsn=lsn,
|
||||
hot_standby=not primary,
|
||||
config_lines=config_lines,
|
||||
)
|
||||
|
||||
self.__endpoints.append(endpoint)
|
||||
|
||||
return endpoint
|
||||
|
||||
def shutdown_resources(self):
|
||||
for ep in self.__endpoints:
|
||||
try:
|
||||
ep.stop_and_destroy("fast")
|
||||
except BaseException as e:
|
||||
log.error("Error encountered while shutting down endpoint %s", ep.endpoint_id, exc_info=e)
|
||||
|
||||
def reconfigure(self, endpoint: TEndpoint, lines: List[str], restart: bool):
|
||||
if endpoint not in self.__endpoints:
|
||||
return
|
||||
|
||||
endpoint = cast(_TEndpoint, endpoint)
|
||||
was_running = endpoint.is_running()
|
||||
if restart and was_running:
|
||||
endpoint.stop()
|
||||
|
||||
endpoint.config(lines)
|
||||
|
||||
if restart:
|
||||
endpoint.start()
|
||||
|
||||
def flush_timeline_data(self, timeline: TTimeline) -> Lsn:
|
||||
commit_lsn: Lsn = Lsn(0)
|
||||
# In principle in the absense of failures polling single sk would be enough.
|
||||
for sk in self.__env.safekeepers:
|
||||
cli = sk.http_client()
|
||||
# wait until compute connections are gone
|
||||
wait_until(30, 0.5, partial(are_walreceivers_absent, cli, self.id, timeline.id))
|
||||
commit_lsn = max(cli.get_commit_lsn(self.id, timeline.id), commit_lsn)
|
||||
|
||||
# Note: depending on WAL filtering implementation, probably most shards
|
||||
# won't be able to reach commit_lsn (unless gaps are also ack'ed), so this
|
||||
# is broken in sharded case.
|
||||
shards = tenant_get_shards(env=self.__env, tenant_id=self.id)
|
||||
for tenant_shard_id, pageserver in shards:
|
||||
log.info(
|
||||
f"flush_ep_to_pageserver: waiting for {commit_lsn} on shard {tenant_shard_id} on pageserver {pageserver.id})"
|
||||
)
|
||||
waited = wait_for_last_record_lsn(
|
||||
pageserver.http_client(), tenant_shard_id, timeline.id, commit_lsn
|
||||
)
|
||||
|
||||
assert waited >= commit_lsn
|
||||
|
||||
return commit_lsn
|
||||
|
||||
def checkpoint_timeline(self, timeline: TTimeline, **kwargs):
|
||||
self.__env.pageserver.http_client().timeline_checkpoint(
|
||||
tenant_id=self.id,
|
||||
timeline_id=timeline.id,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def pgdatadir(self, endpoint: TEndpoint):
|
||||
if endpoint not in self.__endpoints:
|
||||
return None
|
||||
|
||||
return cast(Endpoint, endpoint).pgdata_dir
|
||||
|
||||
def check_restored_datadir_content(self, path, endpoint: TEndpoint, *args, **kwargs):
|
||||
if endpoint not in self.__endpoints:
|
||||
return
|
||||
|
||||
check_restored_datadir_content(path, self.__env, cast(Endpoint, endpoint), *args, **kwargs)
|
||||
@@ -22,8 +22,10 @@ if TYPE_CHECKING:
|
||||
def test_logical_replication(neon_simple_env: NeonEnv, pg_bin: PgBin, vanilla_pg):
|
||||
env = neon_simple_env
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
env.neon_cli.create_branch("test_logical_replication", "empty")
|
||||
endpoint = env.endpoints.create_start("test_logical_replication")
|
||||
|
||||
log.info("postgres is running on 'test_logical_replication' branch")
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s10", endpoint.connstr()])
|
||||
|
||||
endpoint.safe_psql("create publication pub1 for table pgbench_accounts, pgbench_history")
|
||||
|
||||
@@ -8,10 +8,11 @@ from fixtures.neon_fixtures import NeonEnv
|
||||
#
|
||||
def test_basebackup_error(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_basebackup_error", "empty")
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# Introduce failpoint
|
||||
pageserver_http.configure_failpoints(("basebackup-before-control-file", "return"))
|
||||
|
||||
with pytest.raises(Exception, match="basebackup-before-control-file"):
|
||||
env.endpoints.create_start("main")
|
||||
env.endpoints.create_start("test_basebackup_error")
|
||||
|
||||
@@ -15,7 +15,6 @@ from fixtures.neon_fixtures import (
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import wait_until_tenant_active
|
||||
from fixtures.shared_fixtures import TTimeline
|
||||
from fixtures.utils import query_scalar
|
||||
from performance.test_perf_pgbench import get_scales_matrix
|
||||
from requests import RequestException
|
||||
@@ -116,10 +115,13 @@ def test_branching_with_pgbench(
|
||||
# This test checks if the pageserver is able to handle a "unnormalized" starting LSN.
|
||||
#
|
||||
# Related: see discussion in https://github.com/neondatabase/neon/pull/2143#issuecomment-1209092186
|
||||
def test_branching_unnormalized_start_lsn(timeline: TTimeline, pg_bin: PgBin):
|
||||
def test_branching_unnormalized_start_lsn(neon_simple_env: NeonEnv, pg_bin: PgBin):
|
||||
XLOG_BLCKSZ = 8192
|
||||
|
||||
endpoint0 = timeline.primary
|
||||
env = neon_simple_env
|
||||
|
||||
env.neon_cli.create_branch("b0")
|
||||
endpoint0 = env.endpoints.create_start("b0")
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", endpoint0.connstr()])
|
||||
|
||||
@@ -131,8 +133,8 @@ def test_branching_unnormalized_start_lsn(timeline: TTimeline, pg_bin: PgBin):
|
||||
start_lsn = Lsn((int(curr_lsn) - XLOG_BLCKSZ) // XLOG_BLCKSZ * XLOG_BLCKSZ)
|
||||
|
||||
log.info(f"Branching b1 from b0 starting at lsn {start_lsn}...")
|
||||
tl2 = timeline.create_branch("branch", start_lsn)
|
||||
endpoint1 = tl2.primary
|
||||
env.neon_cli.create_branch("b1", "b0", ancestor_start_lsn=start_lsn)
|
||||
endpoint1 = env.endpoints.create_start("b1")
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", endpoint1.connstr()])
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ from fixtures.utils import query_scalar
|
||||
#
|
||||
def test_clog_truncate(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_clog_truncate", "empty")
|
||||
|
||||
# set aggressive autovacuum to make sure that truncation will happen
|
||||
config = [
|
||||
@@ -23,7 +24,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
|
||||
"autovacuum_freeze_max_age=100000",
|
||||
]
|
||||
|
||||
endpoint = env.endpoints.create_start("main", config_lines=config)
|
||||
endpoint = env.endpoints.create_start("test_clog_truncate", config_lines=config)
|
||||
|
||||
# Install extension containing function needed for test
|
||||
endpoint.safe_psql("CREATE EXTENSION neon_test_utils")
|
||||
@@ -57,7 +58,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
|
||||
# create new branch after clog truncation and start a compute node on it
|
||||
log.info(f"create branch at lsn_after_truncation {lsn_after_truncation}")
|
||||
env.neon_cli.create_branch(
|
||||
"test_clog_truncate_new", "main", ancestor_start_lsn=lsn_after_truncation
|
||||
"test_clog_truncate_new", "test_clog_truncate", ancestor_start_lsn=lsn_after_truncation
|
||||
)
|
||||
endpoint2 = env.endpoints.create_start("test_clog_truncate_new")
|
||||
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
from pathlib import Path
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, flush_ep_to_pageserver, test_output_dir
|
||||
from fixtures.shared_fixtures import TTimeline
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, flush_ep_to_pageserver
|
||||
|
||||
|
||||
def do_combocid_op(timeline: TTimeline, op):
|
||||
endpoint = timeline.primary_with_config(config_lines=[
|
||||
"shared_buffers='1MB'",
|
||||
])
|
||||
def do_combocid_op(neon_env_builder: NeonEnvBuilder, op):
|
||||
env = neon_env_builder.init_start()
|
||||
endpoint = env.endpoints.create_start(
|
||||
"main",
|
||||
config_lines=[
|
||||
"shared_buffers='1MB'",
|
||||
],
|
||||
)
|
||||
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
@@ -42,26 +43,32 @@ def do_combocid_op(timeline: TTimeline, op):
|
||||
assert len(rows) == 500
|
||||
|
||||
cur.execute("rollback")
|
||||
timeline.stop_and_flush(endpoint)
|
||||
timeline.checkpoint(compact=False, wait_until_uploaded=True)
|
||||
flush_ep_to_pageserver(env, endpoint, env.initial_tenant, env.initial_timeline)
|
||||
env.pageserver.http_client().timeline_checkpoint(
|
||||
env.initial_tenant, env.initial_timeline, compact=False, wait_until_uploaded=True
|
||||
)
|
||||
|
||||
|
||||
def test_combocid_delete(timeline: TTimeline):
|
||||
do_combocid_op(timeline, "delete from t")
|
||||
def test_combocid_delete(neon_env_builder: NeonEnvBuilder):
|
||||
do_combocid_op(neon_env_builder, "delete from t")
|
||||
|
||||
|
||||
def test_combocid_update(timeline: TTimeline):
|
||||
do_combocid_op(timeline, "update t set val=val+1")
|
||||
def test_combocid_update(neon_env_builder: NeonEnvBuilder):
|
||||
do_combocid_op(neon_env_builder, "update t set val=val+1")
|
||||
|
||||
|
||||
def test_combocid_lock(timeline: TTimeline):
|
||||
do_combocid_op(timeline, "select * from t for update")
|
||||
def test_combocid_lock(neon_env_builder: NeonEnvBuilder):
|
||||
do_combocid_op(neon_env_builder, "select * from t for update")
|
||||
|
||||
|
||||
def test_combocid_multi_insert(timeline: TTimeline, test_output_dir: Path):
|
||||
endpoint = timeline.primary_with_config(config_lines=[
|
||||
"shared_buffers='1MB'",
|
||||
])
|
||||
def test_combocid_multi_insert(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
endpoint = env.endpoints.create_start(
|
||||
"main",
|
||||
config_lines=[
|
||||
"shared_buffers='1MB'",
|
||||
],
|
||||
)
|
||||
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
@@ -70,7 +77,7 @@ def test_combocid_multi_insert(timeline: TTimeline, test_output_dir: Path):
|
||||
cur.execute("CREATE EXTENSION neon_test_utils")
|
||||
|
||||
cur.execute("create table t(id integer, val integer)")
|
||||
file_path = str(test_output_dir / "t.csv")
|
||||
file_path = f"{endpoint.pg_data_dir_path()}/t.csv"
|
||||
cur.execute(f"insert into t select g, 0 from generate_series(1,{n_records}) g")
|
||||
cur.execute(f"copy t to '{file_path}'")
|
||||
cur.execute("truncate table t")
|
||||
@@ -99,12 +106,15 @@ def test_combocid_multi_insert(timeline: TTimeline, test_output_dir: Path):
|
||||
|
||||
cur.execute("rollback")
|
||||
|
||||
timeline.stop_and_flush(endpoint)
|
||||
timeline.checkpoint(compact=False, wait_until_uploaded=True)
|
||||
flush_ep_to_pageserver(env, endpoint, env.initial_tenant, env.initial_timeline)
|
||||
env.pageserver.http_client().timeline_checkpoint(
|
||||
env.initial_tenant, env.initial_timeline, compact=False, wait_until_uploaded=True
|
||||
)
|
||||
|
||||
|
||||
def test_combocid(timeline: TTimeline):
|
||||
endpoint = timeline.primary
|
||||
def test_combocid(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
@@ -137,5 +147,7 @@ def test_combocid(timeline: TTimeline):
|
||||
|
||||
cur.execute("rollback")
|
||||
|
||||
timeline.stop_and_flush(endpoint)
|
||||
timeline.checkpoint(compact=False, wait_until_uploaded=True)
|
||||
flush_ep_to_pageserver(env, endpoint, env.initial_tenant, env.initial_timeline)
|
||||
env.pageserver.http_client().timeline_checkpoint(
|
||||
env.initial_tenant, env.initial_timeline, compact=False, wait_until_uploaded=True
|
||||
)
|
||||
|
||||
@@ -4,8 +4,9 @@ from fixtures.neon_fixtures import NeonEnv
|
||||
|
||||
def test_compute_catalog(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_config", "empty")
|
||||
|
||||
endpoint = env.endpoints.create_start("main", config_lines=["log_min_messages=debug1"])
|
||||
endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"])
|
||||
client = endpoint.http_client()
|
||||
|
||||
objects = client.dbs_and_roles()
|
||||
|
||||
@@ -9,9 +9,10 @@ from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder
|
||||
#
|
||||
def test_config(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_config", "empty")
|
||||
|
||||
# change config
|
||||
endpoint = env.endpoints.create_start("main", config_lines=["log_min_messages=debug1"])
|
||||
endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"])
|
||||
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -5,7 +5,6 @@ import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.shared_fixtures import TTimeline
|
||||
from fixtures.utils import query_scalar
|
||||
|
||||
|
||||
@@ -13,17 +12,20 @@ from fixtures.utils import query_scalar
|
||||
# Test CREATE DATABASE when there have been relmapper changes
|
||||
#
|
||||
@pytest.mark.parametrize("strategy", ["file_copy", "wal_log"])
|
||||
def test_createdb(timeline: TTimeline, pg_version: PgVersion, strategy: str):
|
||||
if pg_version == PgVersion.V14 and strategy == "wal_log":
|
||||
def test_createdb(neon_simple_env: NeonEnv, strategy: str):
|
||||
env = neon_simple_env
|
||||
if env.pg_version == PgVersion.V14 and strategy == "wal_log":
|
||||
pytest.skip("wal_log strategy not supported on PostgreSQL 14")
|
||||
|
||||
endpoint = timeline.primary
|
||||
env.neon_cli.create_branch("test_createdb", "empty")
|
||||
|
||||
endpoint = env.endpoints.create_start("test_createdb")
|
||||
|
||||
with endpoint.cursor() as cur:
|
||||
# Cause a 'relmapper' change in the original branch
|
||||
cur.execute("VACUUM FULL pg_class")
|
||||
|
||||
if pg_version == PgVersion.V14:
|
||||
if env.pg_version == PgVersion.V14:
|
||||
cur.execute("CREATE DATABASE foodb")
|
||||
else:
|
||||
cur.execute(f"CREATE DATABASE foodb STRATEGY={strategy}")
|
||||
@@ -31,8 +33,8 @@ def test_createdb(timeline: TTimeline, pg_version: PgVersion, strategy: str):
|
||||
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
|
||||
|
||||
# Create a branch
|
||||
tl2 = timeline.create_branch("createdb2", lsn=lsn)
|
||||
endpoint2 = tl2.primary
|
||||
env.neon_cli.create_branch("test_createdb2", "test_createdb", ancestor_start_lsn=lsn)
|
||||
endpoint2 = env.endpoints.create_start("test_createdb2")
|
||||
|
||||
# Test that you can connect to the new database on both branches
|
||||
for db in (endpoint, endpoint2):
|
||||
@@ -58,8 +60,10 @@ def test_createdb(timeline: TTimeline, pg_version: PgVersion, strategy: str):
|
||||
#
|
||||
# Test DROP DATABASE
|
||||
#
|
||||
def test_dropdb(timeline: TTimeline, test_output_dir):
|
||||
endpoint = timeline.primary
|
||||
def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_dropdb", "empty")
|
||||
endpoint = env.endpoints.create_start("test_dropdb")
|
||||
|
||||
with endpoint.cursor() as cur:
|
||||
cur.execute("CREATE DATABASE foodb")
|
||||
@@ -76,28 +80,32 @@ def test_dropdb(timeline: TTimeline, test_output_dir):
|
||||
lsn_after_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
|
||||
|
||||
# Create two branches before and after database drop.
|
||||
tl_before = timeline.create_branch("test_before_dropdb", lsn=lsn_before_drop)
|
||||
endpoint_before = tl_before.primary
|
||||
env.neon_cli.create_branch(
|
||||
"test_before_dropdb", "test_dropdb", ancestor_start_lsn=lsn_before_drop
|
||||
)
|
||||
endpoint_before = env.endpoints.create_start("test_before_dropdb")
|
||||
|
||||
tl_after = timeline.create_branch("test_after_dropdb", lsn=lsn_after_drop)
|
||||
endpoint_after = tl_after.primary
|
||||
env.neon_cli.create_branch(
|
||||
"test_after_dropdb", "test_dropdb", ancestor_start_lsn=lsn_after_drop
|
||||
)
|
||||
endpoint_after = env.endpoints.create_start("test_after_dropdb")
|
||||
|
||||
# Test that database exists on the branch before drop
|
||||
endpoint_before.connect(dbname="foodb").close()
|
||||
|
||||
# Test that database subdir exists on the branch before drop
|
||||
assert timeline.tenant.pgdatadir(endpoint_before)
|
||||
dbpath = pathlib.Path(timeline.tenant.pgdatadir(endpoint_before)) / "base" / str(dboid)
|
||||
assert endpoint_before.pgdata_dir
|
||||
dbpath = pathlib.Path(endpoint_before.pgdata_dir) / "base" / str(dboid)
|
||||
log.info(dbpath)
|
||||
|
||||
assert os.path.isdir(dbpath) is True
|
||||
|
||||
# Test that database subdir doesn't exist on the branch after drop
|
||||
assert timeline.tenant.pgdatadir(endpoint_after)
|
||||
dbpath = pathlib.Path(timeline.tenant.pgdatadir(endpoint_after)) / "base" / str(dboid)
|
||||
assert endpoint_after.pgdata_dir
|
||||
dbpath = pathlib.Path(endpoint_after.pgdata_dir) / "base" / str(dboid)
|
||||
log.info(dbpath)
|
||||
|
||||
assert os.path.isdir(dbpath) is False
|
||||
|
||||
# Check that we restore the content of the datadir correctly
|
||||
timeline.tenant.check_restored_datadir_content(test_output_dir, endpoint)
|
||||
check_restored_datadir_content(test_output_dir, env, endpoint)
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.shared_fixtures import TTimeline
|
||||
from fixtures.utils import query_scalar
|
||||
|
||||
|
||||
#
|
||||
# Test CREATE USER to check shared catalog restore
|
||||
#
|
||||
def test_createuser(timeline: TTimeline):
|
||||
endpoint = timeline.primary
|
||||
def test_createuser(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_createuser", "empty")
|
||||
endpoint = env.endpoints.create_start("test_createuser")
|
||||
|
||||
with endpoint.cursor() as cur:
|
||||
# Cause a 'relmapper' change in the original branch
|
||||
@@ -18,8 +19,8 @@ def test_createuser(timeline: TTimeline):
|
||||
lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")
|
||||
|
||||
# Create a branch
|
||||
branch = timeline.create_branch("test_createuser2", lsn=lsn)
|
||||
endpoint2 = branch.primary
|
||||
env.neon_cli.create_branch("test_createuser2", "test_createuser", ancestor_start_lsn=lsn)
|
||||
endpoint2 = env.endpoints.create_start("test_createuser2")
|
||||
|
||||
# Test that you can connect to new branch as a new user
|
||||
assert endpoint2.safe_psql("select current_user", user="testuser") == [("testuser",)]
|
||||
|
||||
@@ -290,8 +290,9 @@ def assert_db_connlimit(endpoint: Any, db_name: str, connlimit: int, msg: str):
|
||||
# Here we test the latter. The first one is tested in test_ddl_forwarding
|
||||
def test_ddl_forwarding_invalid_db(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_ddl_forwarding_invalid_db", "empty")
|
||||
endpoint = env.endpoints.create_start(
|
||||
"main",
|
||||
"test_ddl_forwarding_invalid_db",
|
||||
# Some non-existent url
|
||||
config_lines=["neon.console_url=http://localhost:9999/unknown/api/v0/roles_and_databases"],
|
||||
)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import Endpoint
|
||||
from fixtures.shared_fixtures import TTimeline
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -11,11 +10,14 @@ from fixtures.shared_fixtures import TTimeline
|
||||
"💣", # calls `trigger_segfault` internally
|
||||
],
|
||||
)
|
||||
def test_endpoint_crash(timeline: TTimeline, sql_func: str):
|
||||
def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str):
|
||||
"""
|
||||
Test that triggering crash from neon_test_utils crashes the endpoint
|
||||
"""
|
||||
endpoint = timeline.primary
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_endpoint_crash")
|
||||
endpoint = env.endpoints.create_start("test_endpoint_crash")
|
||||
|
||||
endpoint.safe_psql("CREATE EXTENSION neon_test_utils;")
|
||||
with pytest.raises(Exception, match="This probably means the server terminated abnormally"):
|
||||
endpoint.safe_psql(f"SELECT {sql_func}();")
|
||||
|
||||
@@ -10,9 +10,11 @@ def test_explain_with_lfc_stats(neon_simple_env: NeonEnv):
|
||||
cache_dir = Path(env.repo_dir) / "file_cache"
|
||||
cache_dir.mkdir(exist_ok=True)
|
||||
|
||||
log.info("Creating endpoint with 1MB shared_buffers and 64 MB LFC")
|
||||
branchname = "test_explain_with_lfc_stats"
|
||||
env.neon_cli.create_branch(branchname, "empty")
|
||||
log.info(f"Creating endopint with 1MB shared_buffers and 64 MB LFC for branch {branchname}")
|
||||
endpoint = env.endpoints.create_start(
|
||||
"main",
|
||||
branchname,
|
||||
config_lines=[
|
||||
"shared_buffers='1MB'",
|
||||
f"neon.file_cache_path='{cache_dir}/file.cache'",
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from fixtures.shared_fixtures import TTimeline
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
def test_fsm_truncate(timeline: TTimeline):
|
||||
endpoint = timeline.primary
|
||||
def test_fsm_truncate(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("test_fsm_truncate")
|
||||
endpoint = env.endpoints.create_start("test_fsm_truncate")
|
||||
endpoint.safe_psql(
|
||||
"CREATE TABLE t1(key int); CREATE TABLE t2(key int); TRUNCATE TABLE t1; TRUNCATE TABLE t2;"
|
||||
)
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
import time
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv, wait_replica_caughtup
|
||||
from fixtures.shared_fixtures import TTimeline
|
||||
|
||||
|
||||
#
|
||||
# Test that redo of XLOG_GIN_VACUUM_PAGE doesn't produce error
|
||||
#
|
||||
def test_gin_redo(timeline: TTimeline):
|
||||
primary = timeline.primary
|
||||
secondary = timeline.secondary
|
||||
def test_gin_redo(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
primary = env.endpoints.create_start(branch_name="main", endpoint_id="primary")
|
||||
time.sleep(1)
|
||||
secondary = env.endpoints.new_replica_start(origin=primary, endpoint_id="secondary")
|
||||
con = primary.connect()
|
||||
cur = con.cursor()
|
||||
cur.execute("create table gin_test_tbl(id integer, i int4[])")
|
||||
|
||||
@@ -14,7 +14,6 @@ from fixtures.neon_fixtures import (
|
||||
tenant_get_shards,
|
||||
wait_replica_caughtup,
|
||||
)
|
||||
from fixtures.shared_fixtures import TTimeline
|
||||
from fixtures.utils import wait_until
|
||||
|
||||
|
||||
@@ -222,20 +221,29 @@ def pgbench_accounts_initialized(ep):
|
||||
#
|
||||
# Without hs feedback enabled we'd see 'User query might have needed to see row
|
||||
# versions that must be removed.' errors.
|
||||
def test_hot_standby_feedback(timeline: TTimeline, pg_bin: PgBin):
|
||||
with timeline.primary_with_config(config_lines=[
|
||||
def test_hot_standby_feedback(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
env = neon_env_builder.init_start()
|
||||
agressive_vacuum_conf = [
|
||||
"log_autovacuum_min_duration = 0",
|
||||
"autovacuum_naptime = 10s",
|
||||
"autovacuum_vacuum_threshold = 25",
|
||||
"autovacuum_vacuum_scale_factor = 0.1",
|
||||
"autovacuum_vacuum_cost_delay = -1",
|
||||
]) as primary:
|
||||
]
|
||||
with env.endpoints.create_start(
|
||||
branch_name="main", endpoint_id="primary", config_lines=agressive_vacuum_conf
|
||||
) as primary:
|
||||
# It would be great to have more strict max_standby_streaming_delay=0s here, but then sometimes it fails with
|
||||
# 'User was holding shared buffer pin for too long.'.
|
||||
with timeline.secondary_with_config(config_lines=[
|
||||
"max_standby_streaming_delay=2s",
|
||||
"hot_standby_feedback=true",
|
||||
]) as secondary:
|
||||
with env.endpoints.new_replica_start(
|
||||
origin=primary,
|
||||
endpoint_id="secondary",
|
||||
config_lines=[
|
||||
"max_standby_streaming_delay=2s",
|
||||
"neon.protocol_version=2",
|
||||
"hot_standby_feedback=true",
|
||||
],
|
||||
) as secondary:
|
||||
log.info(
|
||||
f"primary connstr is {primary.connstr()}, secondary connstr {secondary.connstr()}"
|
||||
)
|
||||
@@ -278,15 +286,20 @@ def test_hot_standby_feedback(timeline: TTimeline, pg_bin: PgBin):
|
||||
|
||||
# Test race condition between WAL replay and backends performing queries
|
||||
# https://github.com/neondatabase/neon/issues/7791
|
||||
def test_replica_query_race(timeline: TTimeline):
|
||||
primary_ep = timeline.primary
|
||||
def test_replica_query_race(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
primary_ep = env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
endpoint_id="primary",
|
||||
)
|
||||
|
||||
with primary_ep.connect() as p_con:
|
||||
with p_con.cursor() as p_cur:
|
||||
p_cur.execute("CREATE EXTENSION neon_test_utils")
|
||||
p_cur.execute("CREATE TABLE test AS SELECT 0 AS counter")
|
||||
|
||||
standby_ep = timeline.secondary
|
||||
standby_ep = env.endpoints.new_replica_start(origin=primary_ep, endpoint_id="standby")
|
||||
wait_replica_caughtup(primary_ep, standby_ep)
|
||||
|
||||
# In primary, run a lot of UPDATEs on a single page
|
||||
|
||||
@@ -8,19 +8,23 @@ import time
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, PgBin
|
||||
from fixtures.shared_fixtures import TTimeline
|
||||
|
||||
|
||||
#
|
||||
# Test branching, when a transaction is in prepared state
|
||||
#
|
||||
@pytest.mark.timeout(600)
|
||||
def test_lfc_resize(timeline: TTimeline, pg_bin: PgBin):
|
||||
endpoint = timeline.primary_with_config(config_lines=[
|
||||
"neon.file_cache_path='file.cache'",
|
||||
"neon.max_file_cache_size=512MB",
|
||||
"neon.file_cache_size_limit=512MB",
|
||||
])
|
||||
def test_lfc_resize(neon_simple_env: NeonEnv, pg_bin: PgBin):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_lfc_resize", "empty")
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_lfc_resize",
|
||||
config_lines=[
|
||||
"neon.file_cache_path='file.cache'",
|
||||
"neon.max_file_cache_size=512MB",
|
||||
"neon.file_cache_size_limit=512MB",
|
||||
],
|
||||
)
|
||||
n_resize = 10
|
||||
scale = 100
|
||||
|
||||
@@ -46,7 +50,7 @@ def test_lfc_resize(timeline: TTimeline, pg_bin: PgBin):
|
||||
|
||||
thread.join()
|
||||
|
||||
lfc_file_path = timeline.tenant.pgdatadir(endpoint) / "file.cache"
|
||||
lfc_file_path = f"{endpoint.pg_data_dir_path()}/file.cache"
|
||||
lfc_file_size = os.path.getsize(lfc_file_path)
|
||||
res = subprocess.run(["ls", "-sk", lfc_file_path], check=True, text=True, capture_output=True)
|
||||
lfc_file_blocks = re.findall("([0-9A-F]+)", res.stdout)[0]
|
||||
|
||||
@@ -3,7 +3,6 @@ from pathlib import Path
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.shared_fixtures import TTimeline
|
||||
from fixtures.utils import query_scalar
|
||||
|
||||
|
||||
@@ -13,9 +12,11 @@ def test_lfc_working_set_approximation(neon_simple_env: NeonEnv):
|
||||
cache_dir = Path(env.repo_dir) / "file_cache"
|
||||
cache_dir.mkdir(exist_ok=True)
|
||||
|
||||
log.info("Creating endpoint with 1MB shared_buffers and 64 MB LFC")
|
||||
branchname = "test_approximate_working_set_size"
|
||||
env.neon_cli.create_branch(branchname, "empty")
|
||||
log.info(f"Creating endopint with 1MB shared_buffers and 64 MB LFC for branch {branchname}")
|
||||
endpoint = env.endpoints.create_start(
|
||||
"main",
|
||||
branchname,
|
||||
config_lines=[
|
||||
"shared_buffers='1MB'",
|
||||
f"neon.file_cache_path='{cache_dir}/file.cache'",
|
||||
@@ -74,14 +75,18 @@ WITH (fillfactor='100');
|
||||
assert blocks < 10
|
||||
|
||||
|
||||
def test_sliding_working_set_approximation(timeline: TTimeline):
|
||||
endpoint = timeline.primary_with_config(config_lines=[
|
||||
"autovacuum = off",
|
||||
"shared_buffers=1MB",
|
||||
"neon.max_file_cache_size=256MB",
|
||||
"neon.file_cache_size_limit=245MB",
|
||||
])
|
||||
def test_sliding_working_set_approximation(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
endpoint = env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
config_lines=[
|
||||
"autovacuum = off",
|
||||
"shared_buffers=1MB",
|
||||
"neon.max_file_cache_size=256MB",
|
||||
"neon.file_cache_size_limit=245MB",
|
||||
],
|
||||
)
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("create extension neon")
|
||||
|
||||
@@ -5,7 +5,7 @@ import threading
|
||||
import time
|
||||
from typing import List
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnvBuilder
|
||||
from fixtures.utils import query_scalar
|
||||
|
||||
|
||||
@@ -15,8 +15,11 @@ def test_local_file_cache_unlink(neon_env_builder: NeonEnvBuilder):
|
||||
cache_dir = os.path.join(env.repo_dir, "file_cache")
|
||||
os.mkdir(cache_dir)
|
||||
|
||||
env.neon_cli.create_branch("empty", ancestor_branch_name=DEFAULT_BRANCH_NAME)
|
||||
env.neon_cli.create_branch("test_local_file_cache_unlink", "empty")
|
||||
|
||||
endpoint = env.endpoints.create_start(
|
||||
"main",
|
||||
"test_local_file_cache_unlink",
|
||||
config_lines=[
|
||||
"shared_buffers='1MB'",
|
||||
f"neon.file_cache_path='{cache_dir}/file.cache'",
|
||||
|
||||
@@ -36,8 +36,10 @@ def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
env = neon_simple_env
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
endpoint = env.endpoints.create_start("main", config_lines=["log_statement=all"])
|
||||
timeline_id = env.neon_cli.create_branch("test_logical_replication", "empty")
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_logical_replication", config_lines=["log_statement=all"]
|
||||
)
|
||||
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
@@ -183,9 +185,10 @@ def test_obsolete_slot_drop(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
|
||||
env = neon_simple_env
|
||||
|
||||
env.neon_cli.create_branch("test_logical_replication", "empty")
|
||||
# set low neon.logical_replication_max_snap_files
|
||||
endpoint = env.endpoints.create_start(
|
||||
"main",
|
||||
"test_logical_replication",
|
||||
config_lines=["log_statement=all", "neon.logical_replication_max_snap_files=1"],
|
||||
)
|
||||
|
||||
@@ -469,7 +472,7 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
|
||||
def test_replication_shutdown(neon_simple_env: NeonEnv):
|
||||
# Ensure Postgres can exit without stuck when a replication job is active + neon extension installed
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_replication_shutdown_publisher", "main")
|
||||
env.neon_cli.create_branch("test_replication_shutdown_publisher", "empty")
|
||||
pub = env.endpoints.create("test_replication_shutdown_publisher")
|
||||
|
||||
env.neon_cli.create_branch("test_replication_shutdown_subscriber")
|
||||
|
||||
@@ -9,8 +9,9 @@ if TYPE_CHECKING:
|
||||
|
||||
def test_migrations(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_migrations", "empty")
|
||||
|
||||
endpoint = env.endpoints.create("main")
|
||||
endpoint = env.endpoints.create("test_migrations")
|
||||
endpoint.respec(skip_pg_catalog_updates=False)
|
||||
endpoint.start()
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
|
||||
from fixtures.shared_fixtures import TTimeline
|
||||
from fixtures.utils import query_scalar
|
||||
|
||||
|
||||
@@ -13,8 +12,10 @@ from fixtures.utils import query_scalar
|
||||
# is enough to verify that the WAL records are handled correctly
|
||||
# in the pageserver.
|
||||
#
|
||||
def test_multixact(timeline: TTimeline, test_output_dir):
|
||||
endpoint = timeline.primary
|
||||
def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_multixact", "empty")
|
||||
endpoint = env.endpoints.create_start("test_multixact")
|
||||
|
||||
cur = endpoint.connect().cursor()
|
||||
cur.execute(
|
||||
@@ -72,8 +73,8 @@ def test_multixact(timeline: TTimeline, test_output_dir):
|
||||
assert int(next_multixact_id) > int(next_multixact_id_old)
|
||||
|
||||
# Branch at this point
|
||||
branch = timeline.create_branch("test_multixact_new", lsn=lsn)
|
||||
endpoint_new = branch.primary
|
||||
env.neon_cli.create_branch("test_multixact_new", "test_multixact", ancestor_start_lsn=lsn)
|
||||
endpoint_new = env.endpoints.create_start("test_multixact_new")
|
||||
|
||||
next_multixact_id_new = endpoint_new.safe_psql(
|
||||
"SELECT next_multixact_id FROM pg_control_checkpoint()"
|
||||
@@ -83,4 +84,4 @@ def test_multixact(timeline: TTimeline, test_output_dir):
|
||||
assert next_multixact_id_new == next_multixact_id
|
||||
|
||||
# Check that we can restore the content of the datadir correctly
|
||||
timeline.tenant.check_restored_datadir_content(test_output_dir, endpoint)
|
||||
check_restored_datadir_content(test_output_dir, env, endpoint)
|
||||
|
||||
@@ -6,7 +6,7 @@ from fixtures.utils import wait_until
|
||||
|
||||
def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_neon_superuser_publisher", "main")
|
||||
env.neon_cli.create_branch("test_neon_superuser_publisher", "empty")
|
||||
pub = env.endpoints.create("test_neon_superuser_publisher")
|
||||
|
||||
env.neon_cli.create_branch("test_neon_superuser_subscriber")
|
||||
|
||||
@@ -41,7 +41,8 @@ async def parallel_load_same_table(endpoint: Endpoint, n_parallel: int):
|
||||
# Load data into one table with COPY TO from 5 parallel connections
|
||||
def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5):
|
||||
env = neon_simple_env
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
env.neon_cli.create_branch("test_parallel_copy", "empty")
|
||||
endpoint = env.endpoints.create_start("test_parallel_copy")
|
||||
|
||||
# Create test table
|
||||
conn = endpoint.connect()
|
||||
|
||||
@@ -42,9 +42,11 @@ def test_cancellations(neon_simple_env: NeonEnv):
|
||||
ps_http = ps.http_client()
|
||||
ps_http.is_testing_enabled_or_skip()
|
||||
|
||||
env.neon_cli.create_branch("test_config", "empty")
|
||||
|
||||
# We don't want to have any racy behaviour with autovacuum IOs
|
||||
ep = env.endpoints.create_start(
|
||||
"main",
|
||||
"test_config",
|
||||
config_lines=[
|
||||
"autovacuum = off",
|
||||
"shared_buffers = 128MB",
|
||||
|
||||
@@ -22,8 +22,8 @@ def check_wal_segment(pg_waldump_path: str, segment_path: str, test_output_dir):
|
||||
def test_pg_waldump(neon_simple_env: NeonEnv, test_output_dir, pg_bin: PgBin):
|
||||
env = neon_simple_env
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
timeline_id = env.neon_cli.create_branch("test_pg_waldump", "empty")
|
||||
endpoint = env.endpoints.create_start("test_pg_waldump")
|
||||
|
||||
cur = endpoint.connect().cursor()
|
||||
cur.execute(
|
||||
|
||||
@@ -15,8 +15,12 @@ extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"]
|
||||
#
|
||||
def test_read_validation(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_read_validation", "empty")
|
||||
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_read_validation",
|
||||
)
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
with closing(endpoint.connect()) as con:
|
||||
with con.cursor() as c:
|
||||
for e in extensions:
|
||||
@@ -127,9 +131,13 @@ def test_read_validation(neon_simple_env: NeonEnv):
|
||||
|
||||
def test_read_validation_neg(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
env.neon_cli.create_branch("test_read_validation_neg", "empty")
|
||||
|
||||
env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*")
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_read_validation_neg",
|
||||
)
|
||||
|
||||
with closing(endpoint.connect()) as con:
|
||||
with con.cursor() as c:
|
||||
|
||||
@@ -22,7 +22,8 @@ from fixtures.utils import query_scalar
|
||||
#
|
||||
def test_readonly_node(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
endpoint_main = env.endpoints.create_start("main")
|
||||
env.neon_cli.create_branch("test_readonly_node", "empty")
|
||||
endpoint_main = env.endpoints.create_start("test_readonly_node")
|
||||
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
@@ -73,12 +74,12 @@ def test_readonly_node(neon_simple_env: NeonEnv):
|
||||
|
||||
# Create first read-only node at the point where only 100 rows were inserted
|
||||
endpoint_hundred = env.endpoints.create_start(
|
||||
branch_name="main", endpoint_id="ep-readonly_node_hundred", lsn=lsn_a
|
||||
branch_name="test_readonly_node", endpoint_id="ep-readonly_node_hundred", lsn=lsn_a
|
||||
)
|
||||
|
||||
# And another at the point where 200100 rows were inserted
|
||||
endpoint_more = env.endpoints.create_start(
|
||||
branch_name="main", endpoint_id="ep-readonly_node_more", lsn=lsn_b
|
||||
branch_name="test_readonly_node", endpoint_id="ep-readonly_node_more", lsn=lsn_b
|
||||
)
|
||||
|
||||
# On the 'hundred' node, we should see only 100 rows
|
||||
@@ -99,7 +100,7 @@ def test_readonly_node(neon_simple_env: NeonEnv):
|
||||
|
||||
# Check creating a node at segment boundary
|
||||
endpoint = env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
branch_name="test_readonly_node",
|
||||
endpoint_id="ep-branch_segment_boundary",
|
||||
lsn=Lsn("0/3000000"),
|
||||
)
|
||||
@@ -111,7 +112,7 @@ def test_readonly_node(neon_simple_env: NeonEnv):
|
||||
with pytest.raises(Exception, match="invalid basebackup lsn"):
|
||||
# compute node startup with invalid LSN should fail
|
||||
env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
branch_name="test_readonly_node",
|
||||
endpoint_id="ep-readonly_node_preinitdb",
|
||||
lsn=Lsn("0/42"),
|
||||
)
|
||||
@@ -217,10 +218,14 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
|
||||
# Similar test, but with more data, and we force checkpoints
|
||||
def test_timetravel(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
pageserver_http_client = env.pageserver.http_client()
|
||||
env.neon_cli.create_branch("test_timetravel", "empty")
|
||||
endpoint = env.endpoints.create_start("test_timetravel")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
|
||||
|
||||
lsns = []
|
||||
|
||||
@@ -244,7 +249,7 @@ def test_timetravel(neon_simple_env: NeonEnv):
|
||||
wait_for_last_record_lsn(client, tenant_id, timeline_id, current_lsn)
|
||||
|
||||
# run checkpoint manually to force a new layer file
|
||||
client.timeline_checkpoint(tenant_id, timeline_id)
|
||||
pageserver_http_client.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
##### Restart pageserver
|
||||
env.endpoints.stop_all()
|
||||
@@ -253,7 +258,7 @@ def test_timetravel(neon_simple_env: NeonEnv):
|
||||
|
||||
for i, lsn in lsns:
|
||||
endpoint_old = env.endpoints.create_start(
|
||||
branch_name="main", endpoint_id=f"ep-old_lsn_{i}", lsn=lsn
|
||||
branch_name="test_timetravel", endpoint_id=f"ep-old_lsn_{i}", lsn=lsn
|
||||
)
|
||||
with endpoint_old.cursor() as cur:
|
||||
assert query_scalar(cur, f"select count(*) from testtab where iteration={i}") == 100000
|
||||
|
||||
@@ -9,7 +9,8 @@ from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
|
||||
# CLOG.
|
||||
def test_subxacts(neon_simple_env: NeonEnv, test_output_dir):
|
||||
env = neon_simple_env
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
env.neon_cli.create_branch("test_subxacts", "empty")
|
||||
endpoint = env.endpoints.create_start("test_subxacts")
|
||||
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
@@ -68,13 +68,10 @@ def test_timeline_delete(neon_simple_env: NeonEnv):
|
||||
|
||||
# construct pair of branches to validate that pageserver prohibits
|
||||
# deletion of ancestor timelines when they have child branches
|
||||
parent_timeline_id = env.neon_cli.create_branch(
|
||||
new_branch_name="test_ancestor_branch_delete_parent", ancestor_branch_name="main"
|
||||
)
|
||||
parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_parent", "empty")
|
||||
|
||||
leaf_timeline_id = env.neon_cli.create_branch(
|
||||
new_branch_name="test_ancestor_branch_delete_branch1",
|
||||
ancestor_branch_name="test_ancestor_branch_delete_parent",
|
||||
"test_ancestor_branch_delete_branch1", "test_ancestor_branch_delete_parent"
|
||||
)
|
||||
|
||||
timeline_path = env.pageserver.timeline_dir(env.initial_tenant, parent_timeline_id)
|
||||
|
||||
@@ -36,7 +36,7 @@ from fixtures.utils import get_timeline_dir_size, wait_until
|
||||
|
||||
def test_timeline_size(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "main")
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "empty")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
|
||||
@@ -68,7 +68,7 @@ def test_timeline_size(neon_simple_env: NeonEnv):
|
||||
|
||||
def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_createdropdb", "main")
|
||||
new_timeline_id = env.neon_cli.create_branch("test_timeline_size_createdropdb", "empty")
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
|
||||
|
||||
@@ -9,7 +9,10 @@ from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn
|
||||
#
|
||||
def test_twophase(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
endpoint = env.endpoints.create_start("main", config_lines=["max_prepared_transactions=5"])
|
||||
env.neon_cli.create_branch("test_twophase", "empty")
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_twophase", config_lines=["max_prepared_transactions=5"]
|
||||
)
|
||||
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
@@ -53,7 +56,7 @@ def test_twophase(neon_simple_env: NeonEnv):
|
||||
assert len(twophase_files) == 2
|
||||
|
||||
# Create a branch with the transaction in prepared state
|
||||
fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "main")
|
||||
fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "test_twophase")
|
||||
|
||||
# Start compute on the new branch
|
||||
endpoint2 = env.endpoints.create_start(
|
||||
|
||||
@@ -9,7 +9,8 @@ from fixtures.pg_version import PgVersion
|
||||
#
|
||||
def test_unlogged(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
env.neon_cli.create_branch("test_unlogged", "empty")
|
||||
endpoint = env.endpoints.create_start("test_unlogged")
|
||||
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
@@ -21,7 +22,7 @@ def test_unlogged(neon_simple_env: NeonEnv):
|
||||
cur.execute("INSERT INTO iut (id) values (42);")
|
||||
|
||||
# create another compute to fetch inital empty contents from pageserver
|
||||
fork_at_current_lsn(env, endpoint, "test_unlogged_basebackup", "main")
|
||||
fork_at_current_lsn(env, endpoint, "test_unlogged_basebackup", "test_unlogged")
|
||||
endpoint2 = env.endpoints.create_start("test_unlogged_basebackup")
|
||||
|
||||
conn2 = endpoint2.connect()
|
||||
|
||||
@@ -2,7 +2,7 @@ import time
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder
|
||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, fork_at_current_lsn
|
||||
from fixtures.utils import query_scalar
|
||||
|
||||
|
||||
@@ -13,7 +13,8 @@ from fixtures.utils import query_scalar
|
||||
def test_vm_bit_clear(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
env.neon_cli.create_branch("test_vm_bit_clear", "empty")
|
||||
endpoint = env.endpoints.create_start("test_vm_bit_clear")
|
||||
|
||||
pg_conn = endpoint.connect()
|
||||
cur = pg_conn.cursor()
|
||||
@@ -57,7 +58,7 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
|
||||
cur.execute("UPDATE vmtest_cold_update2 SET id = 5000, filler=repeat('x', 200) WHERE id = 1")
|
||||
|
||||
# Branch at this point, to test that later
|
||||
# fork_at_current_lsn(env, endpoint, "test_vm_bit_clear_new", "main")
|
||||
fork_at_current_lsn(env, endpoint, "test_vm_bit_clear_new", "test_vm_bit_clear")
|
||||
|
||||
# Clear the buffer cache, to force the VM page to be re-fetched from
|
||||
# the page server
|
||||
@@ -91,7 +92,6 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
|
||||
# a dirty VM page is evicted. If the VM bit was not correctly cleared by the
|
||||
# earlier WAL record, the full-page image hides the problem. Starting a new
|
||||
# server at the right point-in-time avoids that full-page image.
|
||||
|
||||
endpoint_new = env.endpoints.create_start("test_vm_bit_clear_new")
|
||||
|
||||
pg_new_conn = endpoint_new.connect()
|
||||
|
||||
@@ -23,7 +23,8 @@ run_broken = pytest.mark.skipif(
|
||||
def test_broken(neon_simple_env: NeonEnv, pg_bin):
|
||||
env = neon_simple_env
|
||||
|
||||
env.endpoints.create_start("main")
|
||||
env.neon_cli.create_branch("test_broken", "empty")
|
||||
env.endpoints.create_start("test_broken")
|
||||
log.info("postgres is running")
|
||||
|
||||
log.info("THIS NEXT COMMAND WILL FAIL:")
|
||||
|
||||
2
vendor/postgres-v15
vendored
2
vendor/postgres-v15
vendored
Submodule vendor/postgres-v15 updated: 0353a8f5f7...49d5e576a5
2
vendor/postgres-v16
vendored
2
vendor/postgres-v16
vendored
Submodule vendor/postgres-v16 updated: 49386938eb...6e9a4ff624
@@ -60,7 +60,7 @@ num-bigint = { version = "0.4" }
|
||||
num-integer = { version = "0.1", features = ["i128"] }
|
||||
num-traits = { version = "0.2", features = ["i128", "libm"] }
|
||||
once_cell = { version = "1" }
|
||||
parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master", default-features = false, features = ["zstd"] }
|
||||
prost = { version = "0.11" }
|
||||
rand = { version = "0.8", features = ["small_rng"] }
|
||||
regex = { version = "1" }
|
||||
@@ -83,7 +83,6 @@ time = { version = "0.3", features = ["macros", "serde-well-known"] }
|
||||
tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util"] }
|
||||
tokio-rustls = { version = "0.24" }
|
||||
tokio-util = { version = "0.7", features = ["codec", "compat", "io", "rt"] }
|
||||
toml_edit = { version = "0.22", features = ["serde"] }
|
||||
tonic = { version = "0.9", features = ["tls-roots"] }
|
||||
tower = { version = "0.4", default-features = false, features = ["balance", "buffer", "limit", "log", "timeout", "util"] }
|
||||
tracing = { version = "0.1", features = ["log"] }
|
||||
@@ -116,7 +115,7 @@ num-bigint = { version = "0.4" }
|
||||
num-integer = { version = "0.1", features = ["i128"] }
|
||||
num-traits = { version = "0.2", features = ["i128", "libm"] }
|
||||
once_cell = { version = "1" }
|
||||
parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master", default-features = false, features = ["zstd"] }
|
||||
proc-macro2 = { version = "1" }
|
||||
prost = { version = "0.11" }
|
||||
quote = { version = "1" }
|
||||
@@ -127,7 +126,6 @@ serde = { version = "1", features = ["alloc", "derive"] }
|
||||
syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full", "visit"] }
|
||||
syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] }
|
||||
time-macros = { version = "0.2", default-features = false, features = ["formatting", "parsing", "serde"] }
|
||||
toml_edit = { version = "0.22", features = ["serde"] }
|
||||
zstd = { version = "0.13" }
|
||||
zstd-safe = { version = "7", default-features = false, features = ["arrays", "legacy", "std", "zdict_builder"] }
|
||||
zstd-sys = { version = "2", default-features = false, features = ["legacy", "std", "zdict_builder"] }
|
||||
|
||||
Reference in New Issue
Block a user