mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-31 17:20:37 +00:00
Compare commits
176 Commits
jcsp/delet
...
release-38
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1804111a02 | ||
|
|
43bb8bfdbb | ||
|
|
300a5aa05e | ||
|
|
b9c111962f | ||
|
|
83ae2bd82c | ||
|
|
f2c21447ce | ||
|
|
93dcdb293a | ||
|
|
a93274b389 | ||
|
|
a7c0e4dcd0 | ||
|
|
3b81e0c86d | ||
|
|
e5a397cf96 | ||
|
|
cd0178efed | ||
|
|
333574be57 | ||
|
|
79a799a143 | ||
|
|
9da06af6c9 | ||
|
|
ce1753d036 | ||
|
|
67db8432b4 | ||
|
|
4e2e44e524 | ||
|
|
ed786104f3 | ||
|
|
84b74f2bd1 | ||
|
|
fec2ad6283 | ||
|
|
98eebd4682 | ||
|
|
2f74287c9b | ||
|
|
aee1bf95e3 | ||
|
|
b9de9d75ff | ||
|
|
7943b709e6 | ||
|
|
d7d066d493 | ||
|
|
e78ac22107 | ||
|
|
76a8f2bb44 | ||
|
|
8d59a8581f | ||
|
|
b1ddd01289 | ||
|
|
6eae4fc9aa | ||
|
|
765455bca2 | ||
|
|
4204960942 | ||
|
|
67345d66ea | ||
|
|
2266ee5971 | ||
|
|
b58445d855 | ||
|
|
36050e7f3d | ||
|
|
33360ed96d | ||
|
|
39a28d1108 | ||
|
|
efa6aa134f | ||
|
|
2c724e56e2 | ||
|
|
feff887c6f | ||
|
|
353d915fcf | ||
|
|
2e38098cbc | ||
|
|
a6fe5ea1ac | ||
|
|
05b0aed0c1 | ||
|
|
cd1705357d | ||
|
|
6bc7561290 | ||
|
|
fbd3ac14b5 | ||
|
|
e437787c8f | ||
|
|
3460dbf90b | ||
|
|
6b89d99677 | ||
|
|
6cc8ea86e4 | ||
|
|
e62a492d6f | ||
|
|
a475cdf642 | ||
|
|
7002c79a47 | ||
|
|
ee6cf357b4 | ||
|
|
e5c2086b5f | ||
|
|
5f1208296a | ||
|
|
88e8e473cd | ||
|
|
b0a77844f6 | ||
|
|
1baf464307 | ||
|
|
e9b8e81cea | ||
|
|
85d6194aa4 | ||
|
|
333a7a68ef | ||
|
|
6aa4e41bee | ||
|
|
840183e51f | ||
|
|
cbccc94b03 | ||
|
|
fce227df22 | ||
|
|
bd787e800f | ||
|
|
4a7704b4a3 | ||
|
|
ff1119da66 | ||
|
|
4c3ba1627b | ||
|
|
1407174fb2 | ||
|
|
ec9dcb1889 | ||
|
|
d11d781afc | ||
|
|
4e44565b71 | ||
|
|
4ed51ad33b | ||
|
|
1c1ebe5537 | ||
|
|
c19cb7f386 | ||
|
|
4b97d31b16 | ||
|
|
923ade3dd7 | ||
|
|
b04e711975 | ||
|
|
afd0a6b39a | ||
|
|
99752286d8 | ||
|
|
15df93363c | ||
|
|
bc0ab741af | ||
|
|
51d9dfeaa3 | ||
|
|
f63cb18155 | ||
|
|
0de603d88e | ||
|
|
240913912a | ||
|
|
91a4ea0de2 | ||
|
|
8608704f49 | ||
|
|
efef68ce99 | ||
|
|
8daefd24da | ||
|
|
46cc8b7982 | ||
|
|
38cd90dd0c | ||
|
|
a51b269f15 | ||
|
|
43bf6d0a0f | ||
|
|
15273a9b66 | ||
|
|
78aca668d0 | ||
|
|
acbf4148ea | ||
|
|
6508540561 | ||
|
|
a41b5244a8 | ||
|
|
2b3189be95 | ||
|
|
248563c595 | ||
|
|
14cd6ca933 | ||
|
|
eb36403e71 | ||
|
|
3c6f779698 | ||
|
|
f67f0c1c11 | ||
|
|
edb02d3299 | ||
|
|
664a69e65b | ||
|
|
478322ebf9 | ||
|
|
802f174072 | ||
|
|
47f9890bae | ||
|
|
262265daad | ||
|
|
300da5b872 | ||
|
|
7b22b5c433 | ||
|
|
ffca97bc1e | ||
|
|
cb356f3259 | ||
|
|
c85374295f | ||
|
|
4992160677 | ||
|
|
bd535b3371 | ||
|
|
d90c5a03af | ||
|
|
2d02cc9079 | ||
|
|
49ad94b99f | ||
|
|
948a217398 | ||
|
|
125381eae7 | ||
|
|
cd01bbc715 | ||
|
|
d8b5e3b88d | ||
|
|
06d25f2186 | ||
|
|
f759b561f3 | ||
|
|
ece0555600 | ||
|
|
73ea0a0b01 | ||
|
|
d8f6d6fd6f | ||
|
|
d24de169a7 | ||
|
|
0816168296 | ||
|
|
277b44d57a | ||
|
|
68c2c3880e | ||
|
|
49da498f65 | ||
|
|
2c76ba3dd7 | ||
|
|
dbe3dc69ad | ||
|
|
8e5bb3ed49 | ||
|
|
ab0be7b8da | ||
|
|
b4c55f5d24 | ||
|
|
ede70d833c | ||
|
|
70c3d18bb0 | ||
|
|
7a491f52c4 | ||
|
|
323c4ecb4f | ||
|
|
3d2466607e | ||
|
|
ed478b39f4 | ||
|
|
91585a558d | ||
|
|
93467eae1f | ||
|
|
f3aac81d19 | ||
|
|
979ad60c19 | ||
|
|
9316cb1b1f | ||
|
|
e7939a527a | ||
|
|
36d26665e1 | ||
|
|
873347f977 | ||
|
|
e814ac16f9 | ||
|
|
ad3055d386 | ||
|
|
94e03eb452 | ||
|
|
380f26ef79 | ||
|
|
3c5b7f59d7 | ||
|
|
fee89f80b5 | ||
|
|
41cce8eaf1 | ||
|
|
f88fe0218d | ||
|
|
cc856eca85 | ||
|
|
cf350c6002 | ||
|
|
0ce6b6a0a3 | ||
|
|
73f247d537 | ||
|
|
960be82183 | ||
|
|
806e5a6c19 | ||
|
|
8d5df07cce | ||
|
|
df7a9d1407 |
423
Cargo.lock
generated
423
Cargo.lock
generated
@@ -221,9 +221,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "aws-config"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bcdcf0d683fe9c23d32cf5b53c9918ea0a500375a9fb20109802552658e576c9"
|
||||
checksum = "de3d533e0263bf453cc80af4c8bcc4d64e2aca293bd16f81633a36f1bf4a97cb"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-http",
|
||||
@@ -236,7 +236,7 @@ dependencies = [
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"fastrand",
|
||||
"fastrand 2.0.0",
|
||||
"http",
|
||||
"hyper",
|
||||
"time",
|
||||
@@ -247,37 +247,23 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-credential-types"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fcdb2f7acbc076ff5ad05e7864bdb191ca70a6fd07668dc3a1a8bcd051de5ae"
|
||||
checksum = "e4834ba01c5ad1ed9740aa222de62190e3c565d11ab7e72cc68314a258994567"
|
||||
dependencies = [
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-types",
|
||||
"fastrand",
|
||||
"fastrand 2.0.0",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-endpoint"
|
||||
version = "0.55.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8cce1c41a6cfaa726adee9ebb9a56fcd2bbfd8be49fd8a04c5e20fd968330b04"
|
||||
dependencies = [
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"http",
|
||||
"regex",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-http"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aadbc44e7a8f3e71c8b374e03ecd972869eb91dd2bc89ed018954a52ba84bc44"
|
||||
checksum = "72badf9de83cc7d66b21b004f09241836823b8302afb25a24708769e576a8d8f"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-smithy-http",
|
||||
@@ -293,23 +279,45 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-s3"
|
||||
version = "0.27.0"
|
||||
name = "aws-runtime"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37c77060408d653d3efa6ea7b66c1389bc35a0342352984c8bf8bcb814a8fc27"
|
||||
checksum = "cf832f522111225c02547e1e1c28137e840e4b082399d93a236e4b29193a4667"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-endpoint",
|
||||
"aws-http",
|
||||
"aws-sig-auth",
|
||||
"aws-sigv4",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"fastrand 2.0.0",
|
||||
"http",
|
||||
"percent-encoding",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-s3"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e30370b61599168d38190ad272bb91842cd81870a6ca035c05dd5726d22832c"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-http",
|
||||
"aws-runtime",
|
||||
"aws-sigv4",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-checksums",
|
||||
"aws-smithy-client",
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-http-tower",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"aws-smithy-xml",
|
||||
"aws-types",
|
||||
@@ -320,57 +328,39 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
"regex",
|
||||
"tokio-stream",
|
||||
"tower",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-sts"
|
||||
version = "0.28.0"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "265fac131fbfc188e5c3d96652ea90ecc676a934e3174eaaee523c6cec040b3b"
|
||||
checksum = "79e21aa1a5b0853969a1ef96ccfaa8ff5d57c761549786a4d5f86c1902b2586a"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-endpoint",
|
||||
"aws-http",
|
||||
"aws-sig-auth",
|
||||
"aws-runtime",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-client",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-http-tower",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-query",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"aws-smithy-xml",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"http",
|
||||
"regex",
|
||||
"tower",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sig-auth"
|
||||
version = "0.55.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b94acb10af0c879ecd5c7bdf51cda6679a0a4f4643ce630905a77673bfa3c61"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-sigv4",
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-http",
|
||||
"aws-types",
|
||||
"http",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sigv4"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9d2ce6f507be68e968a33485ced670111d1cbad161ddbbab1e313c03d37d8f4c"
|
||||
checksum = "2cb40a93429794065f41f0581734fc56a345f6a38d8e2e3c25c7448d930cd132"
|
||||
dependencies = [
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-http",
|
||||
@@ -389,9 +379,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-async"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13bda3996044c202d75b91afeb11a9afae9db9a721c6a7a427410018e286b880"
|
||||
checksum = "6ee6d17d487c8b579423067718b3580c0908d0f01d7461813f94ec4323bad623"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
@@ -401,9 +391,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-checksums"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07ed8b96d95402f3f6b8b57eb4e0e45ee365f78b1a924faf20ff6e97abf1eae6"
|
||||
checksum = "0d1849fd5916904513fb0862543b36f8faab43c07984dbc476132b7da1aed056"
|
||||
dependencies = [
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
@@ -422,23 +412,23 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-client"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a86aa6e21e86c4252ad6a0e3e74da9617295d8d6e374d552be7d3059c41cedd"
|
||||
checksum = "bdbe0a3ad15283cc5f863a68cb6adc8e256e7c109c43c01bdd09be407219a1e9"
|
||||
dependencies = [
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-http-tower",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"fastrand",
|
||||
"fastrand 2.0.0",
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"hyper-rustls 0.23.2",
|
||||
"hyper-rustls",
|
||||
"lazy_static",
|
||||
"pin-project-lite",
|
||||
"rustls 0.20.8",
|
||||
"rustls",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tracing",
|
||||
@@ -446,9 +436,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-eventstream"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "460c8da5110835e3d9a717c61f5556b20d03c32a1dec57f8fc559b360f733bb8"
|
||||
checksum = "a56afef1aa766f512b4970b4c3150b9bf2df8035939723830df4b30267e2d7cb"
|
||||
dependencies = [
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
@@ -457,9 +447,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-http"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b3b693869133551f135e1f2c77cb0b8277d9e3e17feaf2213f735857c4f0d28"
|
||||
checksum = "34dc313472d727f5ef44fdda93e668ebfe17380c99dee512c403e3ca51863bb9"
|
||||
dependencies = [
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-types",
|
||||
@@ -480,9 +470,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-http-tower"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3ae4f6c5798a247fac98a867698197d9ac22643596dc3777f0c76b91917616b9"
|
||||
checksum = "1dd50fca5a4ea4ec3771689ee93bf06b32de02a80af01ed93a8f8a4ed90e8483"
|
||||
dependencies = [
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
@@ -496,50 +486,88 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-json"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23f9f42fbfa96d095194a632fbac19f60077748eba536eb0b9fecc28659807f8"
|
||||
checksum = "3591dd7c2fe01ab8025e4847a0a0f6d0c2b2269714688ffb856f9cf6c6d465cf"
|
||||
dependencies = [
|
||||
"aws-smithy-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-query"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "98819eb0b04020a1c791903533b638534ae6c12e2aceda3e6e6fba015608d51d"
|
||||
checksum = "dbabb1145e65dd57ae72d91a2619d3f5fba40b68a5f40ba009c30571dfd60aff"
|
||||
dependencies = [
|
||||
"aws-smithy-types",
|
||||
"urlencoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-types"
|
||||
version = "0.55.3"
|
||||
name = "aws-smithy-runtime"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16a3d0bf4f324f4ef9793b86a1701d9700fbcdbd12a846da45eed104c634c6e8"
|
||||
checksum = "3687fb838d4ad1c883b62eb59115bc9fb02c4f308aac49a7df89627067f6eb0d"
|
||||
dependencies = [
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-client",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"fastrand 2.0.0",
|
||||
"http",
|
||||
"http-body",
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"pin-utils",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-runtime-api"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5cfbf1e5c2108b41f5ca607cde40dd5109fecc448f5d30c8e614b61f36dce704"
|
||||
dependencies = [
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"http",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-types"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eed0a94eefd845a2a78677f1b72f02fa75802d38f7f59be675add140279aa8bf"
|
||||
dependencies = [
|
||||
"base64-simd",
|
||||
"itoa",
|
||||
"num-integer",
|
||||
"ryu",
|
||||
"serde",
|
||||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-xml"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1b9d12875731bd07e767be7baad95700c3137b56730ec9ddeedb52a5e5ca63b"
|
||||
checksum = "c88052c812f696143ad7ba729c63535209ff0e0f49e31a6d2b1205208ea6ea79"
|
||||
dependencies = [
|
||||
"xmlparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-types"
|
||||
version = "0.55.3"
|
||||
version = "0.56.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6dd209616cc8d7bfb82f87811a5c655dc97537f592689b18743bddf5dc5c4829"
|
||||
checksum = "6bceb8cf724ad057ad7f327d0d256d7147b3eac777b39849a26189e003dc9782"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-smithy-async",
|
||||
@@ -1402,6 +1430,12 @@ dependencies = [
|
||||
"instant",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764"
|
||||
|
||||
[[package]]
|
||||
name = "filetime"
|
||||
version = "0.2.21"
|
||||
@@ -1837,21 +1871,6 @@ dependencies = [
|
||||
"want",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-rustls"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c"
|
||||
dependencies = [
|
||||
"http",
|
||||
"hyper",
|
||||
"log",
|
||||
"rustls 0.20.8",
|
||||
"rustls-native-certs",
|
||||
"tokio",
|
||||
"tokio-rustls 0.23.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-rustls"
|
||||
version = "0.24.0"
|
||||
@@ -1860,9 +1879,11 @@ checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7"
|
||||
dependencies = [
|
||||
"http",
|
||||
"hyper",
|
||||
"rustls 0.21.1",
|
||||
"log",
|
||||
"rustls",
|
||||
"rustls-native-certs",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.0",
|
||||
"tokio-rustls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2054,7 +2075,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378"
|
||||
dependencies = [
|
||||
"base64 0.21.1",
|
||||
"pem",
|
||||
"pem 1.1.1",
|
||||
"ring",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -2780,6 +2801,16 @@ dependencies = [
|
||||
"base64 0.13.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pem"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b13fe415cdf3c8e44518e18a7c95a13431d9bdf6d15367d82b23c377fdd441a"
|
||||
dependencies = [
|
||||
"base64 0.21.1",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "2.2.0"
|
||||
@@ -2942,14 +2973,14 @@ dependencies = [
|
||||
"futures",
|
||||
"once_cell",
|
||||
"pq_proto",
|
||||
"rustls 0.20.8",
|
||||
"rustls",
|
||||
"rustls-pemfile",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres-rustls",
|
||||
"tokio-rustls 0.23.4",
|
||||
"tokio-rustls",
|
||||
"tracing",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -3174,7 +3205,7 @@ dependencies = [
|
||||
"reqwest-tracing",
|
||||
"routerify",
|
||||
"rstest",
|
||||
"rustls 0.20.8",
|
||||
"rustls",
|
||||
"rustls-pemfile",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
@@ -3187,7 +3218,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres-rustls",
|
||||
"tokio-rustls 0.23.4",
|
||||
"tokio-rustls",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-opentelemetry",
|
||||
@@ -3196,7 +3227,7 @@ dependencies = [
|
||||
"url",
|
||||
"utils",
|
||||
"uuid",
|
||||
"webpki-roots 0.23.0",
|
||||
"webpki-roots 0.25.2",
|
||||
"workspace_hack",
|
||||
"x509-parser",
|
||||
]
|
||||
@@ -3264,11 +3295,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rcgen"
|
||||
version = "0.10.0"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffbe84efe2f38dea12e9bfc1f65377fdf03e53a18cb3b995faedf7934c7e785b"
|
||||
checksum = "4954fbc00dcd4d8282c987710e50ba513d351400dbdd00e803a05172a90d8976"
|
||||
dependencies = [
|
||||
"pem",
|
||||
"pem 2.0.1",
|
||||
"ring",
|
||||
"time",
|
||||
"yasna",
|
||||
@@ -3324,6 +3355,12 @@ version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
|
||||
|
||||
[[package]]
|
||||
name = "relative-path"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c707298afce11da2efef2f600116fa93ffa7a032b5d7b628aa17711ec81383ca"
|
||||
|
||||
[[package]]
|
||||
name = "remote_storage"
|
||||
version = "0.1.0"
|
||||
@@ -3354,9 +3391,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.11.18"
|
||||
version = "0.11.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55"
|
||||
checksum = "20b9b67e2ca7dd9e9f9285b759de30ff538aab981abaaf7bc9bd90b84a0126c3"
|
||||
dependencies = [
|
||||
"base64 0.21.1",
|
||||
"bytes",
|
||||
@@ -3367,7 +3404,7 @@ dependencies = [
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"hyper-rustls 0.24.0",
|
||||
"hyper-rustls",
|
||||
"ipnet",
|
||||
"js-sys",
|
||||
"log",
|
||||
@@ -3376,19 +3413,19 @@ dependencies = [
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustls 0.21.1",
|
||||
"rustls",
|
||||
"rustls-pemfile",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.0",
|
||||
"tokio-rustls",
|
||||
"tower-service",
|
||||
"url",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
"web-sys",
|
||||
"webpki-roots 0.22.6",
|
||||
"webpki-roots 0.25.2",
|
||||
"winreg",
|
||||
]
|
||||
|
||||
@@ -3498,9 +3535,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rstest"
|
||||
version = "0.17.0"
|
||||
version = "0.18.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de1bb486a691878cd320c2f0d319ba91eeaa2e894066d8b5f8f117c000e9d962"
|
||||
checksum = "97eeab2f3c0a199bc4be135c36c924b6590b88c377d416494288c14f2db30199"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"futures-timer",
|
||||
@@ -3510,15 +3547,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rstest_macros"
|
||||
version = "0.17.0"
|
||||
version = "0.18.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "290ca1a1c8ca7edb7c3283bd44dc35dd54fdec6253a3912e201ba1072018fca8"
|
||||
checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"glob",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"relative-path",
|
||||
"rustc_version",
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.28",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
@@ -3582,25 +3622,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.20.8"
|
||||
version = "0.21.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f"
|
||||
checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb"
|
||||
dependencies = [
|
||||
"log",
|
||||
"ring",
|
||||
"sct",
|
||||
"webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c911ba11bc8433e811ce56fde130ccf32f5127cab0e0194e9c68c5a5b671791e"
|
||||
dependencies = [
|
||||
"log",
|
||||
"ring",
|
||||
"rustls-webpki",
|
||||
"rustls-webpki 0.101.4",
|
||||
"sct",
|
||||
]
|
||||
|
||||
@@ -3635,6 +3663,16 @@ dependencies = [
|
||||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls-webpki"
|
||||
version = "0.101.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d93931baf2d282fff8d3a532bbfd7653f734643161b87e3e01e59a04439bf0d"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.12"
|
||||
@@ -3772,27 +3810,28 @@ checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
|
||||
|
||||
[[package]]
|
||||
name = "sentry"
|
||||
version = "0.30.0"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5ce6d3512e2617c209ec1e86b0ca2fea06454cd34653c91092bf0f3ec41f8e3"
|
||||
checksum = "2e95efd0cefa32028cdb9766c96de71d96671072f9fb494dc9fb84c0ef93e52b"
|
||||
dependencies = [
|
||||
"httpdate",
|
||||
"reqwest",
|
||||
"rustls 0.20.8",
|
||||
"rustls",
|
||||
"sentry-backtrace",
|
||||
"sentry-contexts",
|
||||
"sentry-core",
|
||||
"sentry-panic",
|
||||
"sentry-tracing",
|
||||
"tokio",
|
||||
"ureq",
|
||||
"webpki-roots 0.22.6",
|
||||
"webpki-roots 0.25.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-backtrace"
|
||||
version = "0.30.0"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e7fe408d4d1f8de188a9309916e02e129cbe51ca19e55badea5a64899399b1a"
|
||||
checksum = "6ac2bac6f310c4c4c4bb094d1541d32ae497f8c5c23405e85492cefdfe0971a9"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"once_cell",
|
||||
@@ -3802,9 +3841,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-contexts"
|
||||
version = "0.30.0"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5695096a059a89973ec541062d331ff4c9aeef9c2951416c894f0fff76340e7d"
|
||||
checksum = "6c3e17295cecdbacf66c5bd38d6e1147e09e1e9d824d2d5341f76638eda02a3a"
|
||||
dependencies = [
|
||||
"hostname",
|
||||
"libc",
|
||||
@@ -3816,9 +3855,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-core"
|
||||
version = "0.30.0"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b22828bfd118a7b660cf7a155002a494755c0424cebb7061e4743ecde9c7dbc"
|
||||
checksum = "8339474f587f36cb110fa1ed1b64229eea6d47b0b886375579297b7e47aeb055"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"rand",
|
||||
@@ -3829,19 +3868,31 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-panic"
|
||||
version = "0.30.0"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f4ced2a7a8c14899d58eec402d946f69d5ed26a3fc363a7e8b1e5cb88473a01"
|
||||
checksum = "875b69f506da75bd664029eafb05f8934297d2990192896d17325f066bd665b7"
|
||||
dependencies = [
|
||||
"sentry-backtrace",
|
||||
"sentry-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-types"
|
||||
version = "0.30.0"
|
||||
name = "sentry-tracing"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "360ee3270f7a4a1eee6c667f7d38360b995431598a73b740dfe420da548d9cc9"
|
||||
checksum = "89feead9bdd116f8035e89567651340fc382db29240b6c55ef412078b08d1aa3"
|
||||
dependencies = [
|
||||
"sentry-backtrace",
|
||||
"sentry-core",
|
||||
"tracing-core",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-types"
|
||||
version = "0.31.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99dc599bd6646884fc403d593cdcb9816dd67c50cff3271c01ff123617908dcd"
|
||||
dependencies = [
|
||||
"debugid",
|
||||
"getrandom",
|
||||
@@ -4248,7 +4299,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"fastrand",
|
||||
"fastrand 1.9.0",
|
||||
"redox_syscall 0.3.5",
|
||||
"rustix 0.37.19",
|
||||
"windows-sys 0.45.0",
|
||||
@@ -4378,16 +4429,16 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||
|
||||
[[package]]
|
||||
name = "tls-listener"
|
||||
version = "0.6.0"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97abcaa5d5850d3b469898d1e0939b57c3afb4475122e792cdd1c82b07f5de06"
|
||||
checksum = "81294c017957a1a69794f506723519255879e15a870507faf45dfed288b763dd"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"hyper",
|
||||
"pin-project-lite",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-rustls 0.23.4",
|
||||
"tokio-rustls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4464,27 +4515,16 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio-postgres-rustls"
|
||||
version = "0.9.0"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "606f2b73660439474394432239c82249c0d45eb5f23d91f401be1e33590444a7"
|
||||
checksum = "dd5831152cb0d3f79ef5523b357319ba154795d64c7078b2daa95a803b54057f"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"ring",
|
||||
"rustls 0.20.8",
|
||||
"rustls",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-rustls 0.23.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-rustls"
|
||||
version = "0.23.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59"
|
||||
dependencies = [
|
||||
"rustls 0.20.8",
|
||||
"tokio",
|
||||
"webpki",
|
||||
"tokio-rustls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4493,7 +4533,7 @@ version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5"
|
||||
dependencies = [
|
||||
"rustls 0.21.1",
|
||||
"rustls",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
@@ -4651,7 +4691,7 @@ dependencies = [
|
||||
"rustls-native-certs",
|
||||
"rustls-pemfile",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.0",
|
||||
"tokio-rustls",
|
||||
"tokio-stream",
|
||||
"tower",
|
||||
"tower-layer",
|
||||
@@ -4949,17 +4989,17 @@ checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
|
||||
|
||||
[[package]]
|
||||
name = "ureq"
|
||||
version = "2.6.2"
|
||||
version = "2.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "338b31dd1314f68f3aabf3ed57ab922df95ffcd902476ca7ba3c4ce7b908c46d"
|
||||
checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9"
|
||||
dependencies = [
|
||||
"base64 0.13.1",
|
||||
"base64 0.21.1",
|
||||
"log",
|
||||
"once_cell",
|
||||
"rustls 0.20.8",
|
||||
"rustls",
|
||||
"rustls-webpki 0.100.2",
|
||||
"url",
|
||||
"webpki",
|
||||
"webpki-roots 0.22.6",
|
||||
"webpki-roots 0.23.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5230,32 +5270,19 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki"
|
||||
version = "0.22.0"
|
||||
name = "webpki-roots"
|
||||
version = "0.23.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd"
|
||||
checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"untrusted",
|
||||
"rustls-webpki 0.100.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.22.6"
|
||||
version = "0.25.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87"
|
||||
dependencies = [
|
||||
"webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.23.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa54963694b65584e170cf5dc46aeb4dcaa5584e652ff5f3952e56d66aff0125"
|
||||
dependencies = [
|
||||
"rustls-webpki",
|
||||
]
|
||||
checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc"
|
||||
|
||||
[[package]]
|
||||
name = "which"
|
||||
@@ -5466,11 +5493,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "winreg"
|
||||
version = "0.10.1"
|
||||
version = "0.50.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
|
||||
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
"cfg-if",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5479,6 +5507,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"axum",
|
||||
"base64 0.21.1",
|
||||
"bytes",
|
||||
"cc",
|
||||
"chrono",
|
||||
@@ -5509,7 +5538,7 @@ dependencies = [
|
||||
"regex-syntax 0.7.2",
|
||||
"reqwest",
|
||||
"ring",
|
||||
"rustls 0.20.8",
|
||||
"rustls",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -5518,7 +5547,7 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.28",
|
||||
"tokio",
|
||||
"tokio-rustls 0.23.4",
|
||||
"tokio-rustls",
|
||||
"tokio-util",
|
||||
"toml_datetime",
|
||||
"toml_edit",
|
||||
|
||||
26
Cargo.toml
26
Cargo.toml
@@ -37,11 +37,11 @@ async-compression = { version = "0.4.0", features = ["tokio", "gzip"] }
|
||||
flate2 = "1.0.26"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
aws-config = { version = "0.55", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "0.27"
|
||||
aws-smithy-http = "0.55"
|
||||
aws-credential-types = "0.55"
|
||||
aws-types = "0.55"
|
||||
aws-config = { version = "0.56", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "0.29"
|
||||
aws-smithy-http = "0.56"
|
||||
aws-credential-types = "0.56"
|
||||
aws-types = "0.56"
|
||||
axum = { version = "0.6.20", features = ["ws"] }
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
@@ -105,12 +105,12 @@ reqwest-middleware = "0.2.0"
|
||||
reqwest-retry = "0.2.2"
|
||||
routerify = "3"
|
||||
rpds = "0.13"
|
||||
rustls = "0.20"
|
||||
rustls = "0.21"
|
||||
rustls-pemfile = "1"
|
||||
rustls-split = "0.3"
|
||||
scopeguard = "1.1"
|
||||
sysinfo = "0.29.2"
|
||||
sentry = { version = "0.30", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
|
||||
sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_with = "2.0"
|
||||
@@ -125,11 +125,11 @@ sync_wrapper = "0.1.2"
|
||||
tar = "0.4"
|
||||
test-context = "0.1"
|
||||
thiserror = "1.0"
|
||||
tls-listener = { version = "0.6", features = ["rustls", "hyper-h1"] }
|
||||
tls-listener = { version = "0.7", features = ["rustls", "hyper-h1"] }
|
||||
tokio = { version = "1.17", features = ["macros"] }
|
||||
tokio-io-timeout = "1.2.0"
|
||||
tokio-postgres-rustls = "0.9.0"
|
||||
tokio-rustls = "0.23"
|
||||
tokio-postgres-rustls = "0.10.0"
|
||||
tokio-rustls = "0.24"
|
||||
tokio-stream = "0.1"
|
||||
tokio-tar = "0.3"
|
||||
tokio-util = { version = "0.7", features = ["io"] }
|
||||
@@ -143,7 +143,7 @@ tracing-subscriber = { version = "0.3", default_features = false, features = ["s
|
||||
url = "2.2"
|
||||
uuid = { version = "1.2", features = ["v4", "serde"] }
|
||||
walkdir = "2.3.2"
|
||||
webpki-roots = "0.23"
|
||||
webpki-roots = "0.25"
|
||||
x509-parser = "0.15"
|
||||
|
||||
## TODO replace this with tracing
|
||||
@@ -182,8 +182,8 @@ workspace_hack = { version = "0.1", path = "./workspace_hack/" }
|
||||
|
||||
## Build dependencies
|
||||
criterion = "0.5.1"
|
||||
rcgen = "0.10"
|
||||
rstest = "0.17"
|
||||
rcgen = "0.11"
|
||||
rstest = "0.18"
|
||||
tempfile = "3.4"
|
||||
tonic-build = "0.9"
|
||||
|
||||
|
||||
@@ -1,12 +1,39 @@
|
||||
use anyhow::{anyhow, Result};
|
||||
use anyhow::{anyhow, Ok, Result};
|
||||
use postgres::Client;
|
||||
use tokio_postgres::NoTls;
|
||||
use tracing::{error, instrument};
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
/// Create a special service table for availability checks
|
||||
/// only if it does not exist already.
|
||||
pub fn create_availability_check_data(client: &mut Client) -> Result<()> {
|
||||
let query = "
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS(
|
||||
SELECT 1
|
||||
FROM pg_catalog.pg_tables
|
||||
WHERE tablename = 'health_check'
|
||||
)
|
||||
THEN
|
||||
CREATE TABLE health_check (
|
||||
id serial primary key,
|
||||
updated_at timestamptz default now()
|
||||
);
|
||||
INSERT INTO health_check VALUES (1, now())
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET updated_at = now();
|
||||
END IF;
|
||||
END
|
||||
$$;";
|
||||
client.execute(query, &[])?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Update timestamp in a row in a special service table to check
|
||||
/// that we can actually write some data in this particular timeline.
|
||||
/// Create table if it's missing.
|
||||
#[instrument(skip_all)]
|
||||
pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
|
||||
// Connect to the database.
|
||||
@@ -24,19 +51,15 @@ pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
|
||||
});
|
||||
|
||||
let query = "
|
||||
CREATE TABLE IF NOT EXISTS health_check (
|
||||
id serial primary key,
|
||||
updated_at timestamptz default now()
|
||||
);
|
||||
INSERT INTO health_check VALUES (1, now())
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET updated_at = now();";
|
||||
|
||||
let result = client.simple_query(query).await?;
|
||||
|
||||
if result.len() != 2 {
|
||||
if result.len() != 1 {
|
||||
return Err(anyhow::format_err!(
|
||||
"expected 2 query results, but got {}",
|
||||
"expected 1 query result, but got {}",
|
||||
result.len()
|
||||
));
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ use utils::measured_stream::MeasuredReader;
|
||||
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
|
||||
|
||||
use crate::checker::create_availability_check_data;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::spec::*;
|
||||
use crate::sync_sk::{check_if_synced, ping_safekeeper};
|
||||
@@ -696,6 +697,7 @@ impl ComputeNode {
|
||||
handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
|
||||
handle_grants(spec, self.connstr.as_str())?;
|
||||
handle_extensions(spec, &mut client)?;
|
||||
create_availability_check_data(&mut client)?;
|
||||
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
@@ -1078,7 +1080,8 @@ LIMIT 100",
|
||||
|
||||
let mut download_tasks = Vec::new();
|
||||
for library in &libs_vec {
|
||||
let (ext_name, ext_path) = remote_extensions.get_ext(library, true)?;
|
||||
let (ext_name, ext_path) =
|
||||
remote_extensions.get_ext(library, true, &self.build_tag, &self.pgversion)?;
|
||||
download_tasks.push(self.download_extension(ext_name, ext_path));
|
||||
}
|
||||
let results = join_all(download_tasks).await;
|
||||
|
||||
@@ -180,7 +180,19 @@ pub async fn download_extension(
|
||||
// Create extension control files from spec
|
||||
pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
|
||||
let local_sharedir = Path::new(&get_pg_config("--sharedir", pgbin)).join("extension");
|
||||
for ext_data in remote_extensions.extension_data.values() {
|
||||
for (ext_name, ext_data) in remote_extensions.extension_data.iter() {
|
||||
// Check if extension is present in public or custom.
|
||||
// If not, then it is not allowed to be used by this compute.
|
||||
if let Some(public_extensions) = &remote_extensions.public_extensions {
|
||||
if !public_extensions.contains(ext_name) {
|
||||
if let Some(custom_extensions) = &remote_extensions.custom_extensions {
|
||||
if !custom_extensions.contains(ext_name) {
|
||||
continue; // skip this extension, it is not allowed
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (control_name, control_content) in &ext_data.control_data {
|
||||
let control_path = local_sharedir.join(control_name);
|
||||
if !control_path.exists() {
|
||||
|
||||
@@ -169,7 +169,12 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
|
||||
}
|
||||
};
|
||||
|
||||
remote_extensions.get_ext(&filename, is_library)
|
||||
remote_extensions.get_ext(
|
||||
&filename,
|
||||
is_library,
|
||||
&compute.build_tag,
|
||||
&compute.pgversion,
|
||||
)
|
||||
};
|
||||
|
||||
match ext {
|
||||
|
||||
@@ -138,7 +138,13 @@ impl ComputeControlPlane {
|
||||
mode,
|
||||
tenant_id,
|
||||
pg_version,
|
||||
skip_pg_catalog_updates: false,
|
||||
// We don't setup roles and databases in the spec locally, so we don't need to
|
||||
// do catalog updates. Catalog updates also include check availability
|
||||
// data creation. Yet, we have tests that check that size and db dump
|
||||
// before and after start are the same. So, skip catalog updates,
|
||||
// with this we basically test a case of waking up an idle compute, where
|
||||
// we also skip catalog updates in the cloud.
|
||||
skip_pg_catalog_updates: true,
|
||||
});
|
||||
|
||||
ep.create_endpoint_dir()?;
|
||||
@@ -152,7 +158,7 @@ impl ComputeControlPlane {
|
||||
http_port,
|
||||
pg_port,
|
||||
pg_version,
|
||||
skip_pg_catalog_updates: false,
|
||||
skip_pg_catalog_updates: true,
|
||||
})?,
|
||||
)?;
|
||||
std::fs::write(
|
||||
|
||||
@@ -89,6 +89,8 @@ impl RemoteExtSpec {
|
||||
&self,
|
||||
ext_name: &str,
|
||||
is_library: bool,
|
||||
build_tag: &str,
|
||||
pg_major_version: &str,
|
||||
) -> anyhow::Result<(String, RemotePath)> {
|
||||
let mut real_ext_name = ext_name;
|
||||
if is_library {
|
||||
@@ -104,11 +106,32 @@ impl RemoteExtSpec {
|
||||
.ok_or(anyhow::anyhow!("library {} is not found", lib_raw_name))?;
|
||||
}
|
||||
|
||||
// Check if extension is present in public or custom.
|
||||
// If not, then it is not allowed to be used by this compute.
|
||||
if let Some(public_extensions) = &self.public_extensions {
|
||||
if !public_extensions.contains(&real_ext_name.to_string()) {
|
||||
if let Some(custom_extensions) = &self.custom_extensions {
|
||||
if !custom_extensions.contains(&real_ext_name.to_string()) {
|
||||
return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match self.extension_data.get(real_ext_name) {
|
||||
Some(ext_data) => Ok((
|
||||
real_ext_name.to_string(),
|
||||
RemotePath::from_string(&ext_data.archive_path)?,
|
||||
)),
|
||||
Some(_ext_data) => {
|
||||
// Construct the path to the extension archive
|
||||
// BUILD_TAG/PG_MAJOR_VERSION/extensions/EXTENSION_NAME.tar.zst
|
||||
//
|
||||
// Keep it in sync with path generation in
|
||||
// https://github.com/neondatabase/build-custom-extensions/tree/main
|
||||
let archive_path_str =
|
||||
format!("{build_tag}/{pg_major_version}/extensions/{real_ext_name}.tar.zst");
|
||||
Ok((
|
||||
real_ext_name.to_string(),
|
||||
RemotePath::from_string(&archive_path_str)?,
|
||||
))
|
||||
}
|
||||
None => Err(anyhow::anyhow!(
|
||||
"real_ext_name {} is not found",
|
||||
real_ext_name
|
||||
|
||||
@@ -13,14 +13,13 @@ use std::{
|
||||
collections::HashMap,
|
||||
fmt::Debug,
|
||||
num::{NonZeroU32, NonZeroUsize},
|
||||
path::{Path, PathBuf, StripPrefixError},
|
||||
path::{Path, PathBuf},
|
||||
pin::Pin,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::io;
|
||||
use toml_edit::Item;
|
||||
use tracing::info;
|
||||
@@ -45,34 +44,12 @@ pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;
|
||||
|
||||
const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';
|
||||
|
||||
// From the S3 spec
|
||||
pub const MAX_KEYS_PER_DELETE: usize = 1000;
|
||||
|
||||
/// Path on the remote storage, relative to some inner prefix.
|
||||
/// The prefix is an implementation detail, that allows representing local paths
|
||||
/// as the remote ones, stripping the local storage prefix away.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct RemotePath(PathBuf);
|
||||
|
||||
impl Serialize for RemotePath {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
serializer.collect_str(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for RemotePath {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
let str = String::deserialize(deserializer)?;
|
||||
Ok(Self(PathBuf::from(&str)))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RemotePath {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0.display())
|
||||
@@ -111,10 +88,6 @@ impl RemotePath {
|
||||
pub fn extension(&self) -> Option<&str> {
|
||||
self.0.extension()?.to_str()
|
||||
}
|
||||
|
||||
pub fn strip_prefix(&self, p: &RemotePath) -> Result<&Path, StripPrefixError> {
|
||||
self.0.strip_prefix(&p.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage (potentially remote) API to manage its state.
|
||||
@@ -193,8 +166,6 @@ pub enum DownloadError {
|
||||
BadInput(anyhow::Error),
|
||||
/// The file was not found in the remote storage.
|
||||
NotFound,
|
||||
/// The client was shut down
|
||||
Shutdown,
|
||||
/// The file was found in the remote storage, but the download failed.
|
||||
Other(anyhow::Error),
|
||||
}
|
||||
@@ -206,7 +177,6 @@ impl std::fmt::Display for DownloadError {
|
||||
write!(f, "Failed to download a remote file due to user input: {e}")
|
||||
}
|
||||
DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
|
||||
DownloadError::Shutdown => write!(f, "Client shutting down"),
|
||||
DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
|
||||
}
|
||||
}
|
||||
@@ -271,18 +241,6 @@ impl GenericRemoteStorage {
|
||||
}
|
||||
}
|
||||
|
||||
/// For small, simple downloads where caller doesn't want to handle the streaming: return the full body
|
||||
pub async fn download_all(&self, from: &RemotePath) -> Result<Vec<u8>, DownloadError> {
|
||||
let mut download = self.download(from).await?;
|
||||
|
||||
let mut bytes = Vec::new();
|
||||
tokio::io::copy(&mut download.download_stream, &mut bytes)
|
||||
.await
|
||||
.with_context(|| format!("Failed to download body from {from}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
pub async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
|
||||
@@ -22,7 +22,7 @@ use aws_sdk_s3::{
|
||||
Client,
|
||||
};
|
||||
use aws_smithy_http::body::SdkBody;
|
||||
use hyper::{Body, StatusCode};
|
||||
use hyper::Body;
|
||||
use scopeguard::ScopeGuard;
|
||||
use tokio::{
|
||||
io::{self, AsyncRead},
|
||||
@@ -529,16 +529,7 @@ impl RemoteStorage for S3Bucket {
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if let Some(r) = e.raw_response() {
|
||||
if r.http().status() == StatusCode::NOT_FOUND {
|
||||
// 404 is acceptable for deletions. AWS S3 does not return this, but
|
||||
// some other implementations might (e.g. GCS XML API returns 404 on DeleteObject
|
||||
// to a missing key)
|
||||
continue;
|
||||
} else {
|
||||
return Err(anyhow::format_err!("DeleteObjects response error: {e}"));
|
||||
}
|
||||
}
|
||||
return Err(e.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
113
libs/utils/src/generation.rs
Normal file
113
libs/utils/src/generation.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
use std::fmt::Debug;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Tenant generations are used to provide split-brain safety and allow
|
||||
/// multiple pageservers to attach the same tenant concurrently.
|
||||
///
|
||||
/// See docs/rfcs/025-generation-numbers.md for detail on how generation
|
||||
/// numbers are used.
|
||||
#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
|
||||
pub enum Generation {
|
||||
// Generations with this magic value will not add a suffix to S3 keys, and will not
|
||||
// be included in persisted index_part.json. This value is only to be used
|
||||
// during migration from pre-generation metadata to generation-aware metadata,
|
||||
// and should eventually go away.
|
||||
//
|
||||
// A special Generation is used rather than always wrapping Generation in an Option,
|
||||
// so that code handling generations doesn't have to be aware of the legacy
|
||||
// case everywhere it touches a generation.
|
||||
None,
|
||||
// Generations with this magic value may never be used to construct S3 keys:
|
||||
// we will panic if someone tries to. This is for Tenants in the "Broken" state,
|
||||
// so that we can satisfy their constructor with a Generation without risking
|
||||
// a code bug using it in an S3 write (broken tenants should never write)
|
||||
Broken,
|
||||
Valid(u32),
|
||||
}
|
||||
|
||||
/// The Generation type represents a number associated with a Tenant, which
|
||||
/// increments every time the tenant is attached to a new pageserver, or
|
||||
/// an attached pageserver restarts.
|
||||
///
|
||||
/// It is included as a suffix in S3 keys, as a protection against split-brain
|
||||
/// scenarios where pageservers might otherwise issue conflicting writes to
|
||||
/// remote storage
|
||||
impl Generation {
|
||||
/// Create a new Generation that represents a legacy key format with
|
||||
/// no generation suffix
|
||||
pub fn none() -> Self {
|
||||
Self::None
|
||||
}
|
||||
|
||||
// Create a new generation that will panic if you try to use get_suffix
|
||||
pub fn broken() -> Self {
|
||||
Self::Broken
|
||||
}
|
||||
|
||||
pub fn new(v: u32) -> Self {
|
||||
Self::Valid(v)
|
||||
}
|
||||
|
||||
pub fn is_none(&self) -> bool {
|
||||
matches!(self, Self::None)
|
||||
}
|
||||
|
||||
pub fn get_suffix(&self) -> String {
|
||||
match self {
|
||||
Self::Valid(v) => {
|
||||
format!("-{:08x}", v)
|
||||
}
|
||||
Self::None => "".into(),
|
||||
Self::Broken => {
|
||||
panic!("Tried to use a broken generation");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Generation {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
if let Self::Valid(v) = self {
|
||||
v.serialize(serializer)
|
||||
} else {
|
||||
// We should never be asked to serialize a None or Broken. Structures
|
||||
// that include an optional generation should convert None to an
|
||||
// Option<Generation>::None
|
||||
Err(serde::ser::Error::custom(
|
||||
"Tried to serialize invalid generation ({self})",
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Generation {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
Ok(Self::Valid(u32::deserialize(deserializer)?))
|
||||
}
|
||||
}
|
||||
|
||||
// We intentionally do not implement Display for Generation, to reduce the
|
||||
// risk of a bug where the generation is used in a format!() string directly
|
||||
// instead of using get_suffix().
|
||||
impl Debug for Generation {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Valid(v) => {
|
||||
write!(f, "{:08x}", v)
|
||||
}
|
||||
Self::None => {
|
||||
write!(f, "<none>")
|
||||
}
|
||||
Self::Broken => {
|
||||
write!(f, "<broken>")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -24,9 +24,6 @@ pub enum ApiError {
|
||||
#[error("Precondition failed: {0}")]
|
||||
PreconditionFailed(Box<str>),
|
||||
|
||||
#[error("Shutting down")]
|
||||
ShuttingDown,
|
||||
|
||||
#[error(transparent)]
|
||||
InternalServerError(anyhow::Error),
|
||||
}
|
||||
@@ -55,10 +52,6 @@ impl ApiError {
|
||||
self.to_string(),
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
),
|
||||
ApiError::ShuttingDown => HttpErrorBody::response_from_msg_and_status(
|
||||
"Shutting down".to_string(),
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
),
|
||||
ApiError::InternalServerError(err) => HttpErrorBody::response_from_msg_and_status(
|
||||
err.to_string(),
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
|
||||
@@ -244,13 +244,13 @@ id_newtype!(TenantId);
|
||||
/// NOTE: It (de)serializes as an array of hex bytes, so the string representation would look
|
||||
/// like `[173,80,132,115,129,226,72,254,170,201,135,108,199,26,228,24]`.
|
||||
/// See [`Id`] for alternative ways to serialize it.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
|
||||
pub struct ConnectionId(Id);
|
||||
|
||||
id_newtype!(ConnectionId);
|
||||
|
||||
// A pair uniquely identifying Neon instance.
|
||||
#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||
#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct TenantTimelineId {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
@@ -273,36 +273,6 @@ impl TenantTimelineId {
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for TenantTimelineId {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
serializer.collect_str(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for TenantTimelineId {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
let str = String::deserialize(deserializer)?;
|
||||
if let Some((tenant_part, timeline_part)) = str.split_once('/') {
|
||||
Ok(Self {
|
||||
tenant_id: TenantId(Id::from_hex(tenant_part).map_err(|e| {
|
||||
serde::de::Error::custom(format!("Malformed tenant in TenantTimelineId: {e}"))
|
||||
})?),
|
||||
timeline_id: TimelineId(Id::from_hex(timeline_part).map_err(|e| {
|
||||
serde::de::Error::custom(format!("Malformed timeline in TenantTimelineId {e}"))
|
||||
})?),
|
||||
})
|
||||
} else {
|
||||
Err(serde::de::Error::custom("Malformed TenantTimelineId"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TenantTimelineId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}/{}", self.tenant_id, self.timeline_id)
|
||||
|
||||
@@ -27,6 +27,9 @@ pub mod id;
|
||||
// http endpoint utils
|
||||
pub mod http;
|
||||
|
||||
// definition of the Generation type for pageserver attachment APIs
|
||||
pub mod generation;
|
||||
|
||||
// common log initialisation routine
|
||||
pub mod logging;
|
||||
|
||||
|
||||
@@ -2,14 +2,12 @@
|
||||
|
||||
use std::env::{var, VarError};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::{env, ops::ControlFlow, path::Path, str::FromStr};
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
|
||||
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
|
||||
use pageserver::deletion_queue::{DeletionQueue, DeletionQueueError};
|
||||
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
|
||||
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
|
||||
use pageserver::task_mgr::WALRECEIVER_RUNTIME;
|
||||
@@ -351,35 +349,6 @@ fn start_pageserver(
|
||||
// Set up remote storage client
|
||||
let remote_storage = create_remote_storage_client(conf)?;
|
||||
|
||||
// Set up deletion queue
|
||||
let deletion_queue_cancel = tokio_util::sync::CancellationToken::new();
|
||||
let (deletion_queue, deletion_frontend, deletion_backend, deletion_executor) =
|
||||
DeletionQueue::new(remote_storage.clone(), conf, deletion_queue_cancel.clone());
|
||||
if let Some(mut deletion_frontend) = deletion_frontend {
|
||||
BACKGROUND_RUNTIME.spawn(async move {
|
||||
deletion_frontend
|
||||
.background()
|
||||
.instrument(info_span!(parent:None, "deletion frontend"))
|
||||
.await
|
||||
});
|
||||
}
|
||||
if let Some(mut deletion_backend) = deletion_backend {
|
||||
BACKGROUND_RUNTIME.spawn(async move {
|
||||
deletion_backend
|
||||
.background()
|
||||
.instrument(info_span!(parent: None, "deletion backend"))
|
||||
.await
|
||||
});
|
||||
}
|
||||
if let Some(mut deletion_executor) = deletion_executor {
|
||||
BACKGROUND_RUNTIME.spawn(async move {
|
||||
deletion_executor
|
||||
.background()
|
||||
.instrument(info_span!(parent: None, "deletion executor"))
|
||||
.await
|
||||
});
|
||||
}
|
||||
|
||||
// Up to this point no significant I/O has been done: this should have been fast. Record
|
||||
// duration prior to starting I/O intensive phase of startup.
|
||||
startup_checkpoint("initial", "Starting loading tenants");
|
||||
@@ -417,7 +386,6 @@ fn start_pageserver(
|
||||
TenantSharedResources {
|
||||
broker_client: broker_client.clone(),
|
||||
remote_storage: remote_storage.clone(),
|
||||
deletion_queue_client: deletion_queue.new_client(),
|
||||
},
|
||||
order,
|
||||
))?;
|
||||
@@ -514,7 +482,6 @@ fn start_pageserver(
|
||||
http_auth,
|
||||
broker_client.clone(),
|
||||
remote_storage,
|
||||
deletion_queue.clone(),
|
||||
disk_usage_eviction_state,
|
||||
)?
|
||||
.build()
|
||||
@@ -637,36 +604,6 @@ fn start_pageserver(
|
||||
// The plan is to change that over time.
|
||||
shutdown_pageserver.take();
|
||||
BACKGROUND_RUNTIME.block_on(pageserver::shutdown_pageserver(0));
|
||||
|
||||
// Best effort to persist any outstanding deletions, to avoid leaking objects
|
||||
let dq = deletion_queue.clone();
|
||||
BACKGROUND_RUNTIME.block_on(async move {
|
||||
match tokio::time::timeout(Duration::from_secs(5), dq.new_client().flush()).await {
|
||||
Ok(flush_r) => {
|
||||
match flush_r {
|
||||
Ok(()) => {
|
||||
info!("Deletion queue flushed successfully on shutdown")
|
||||
}
|
||||
Err(e) => {
|
||||
match e {
|
||||
DeletionQueueError::ShuttingDown => {
|
||||
// This is not harmful for correctness, but is unexpected: the deletion
|
||||
// queue's workers should stay alive as long as there are any client handles instantiated.
|
||||
warn!("Deletion queue stopped prematurely");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Timed out flushing deletion queue on shutdown ({e})")
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Clean shutdown of deletion queue workers
|
||||
deletion_queue_cancel.cancel();
|
||||
|
||||
unreachable!()
|
||||
}
|
||||
})
|
||||
|
||||
@@ -566,27 +566,6 @@ impl PageServerConf {
|
||||
self.workdir.join("tenants")
|
||||
}
|
||||
|
||||
pub fn deletion_prefix(&self) -> PathBuf {
|
||||
self.workdir.join("deletion")
|
||||
}
|
||||
|
||||
pub fn deletion_list_path(&self, sequence: u64) -> PathBuf {
|
||||
// Encode a version in the filename, so that if we ever switch away from JSON we can
|
||||
// increment this.
|
||||
const VERSION: u8 = 1;
|
||||
|
||||
self.deletion_prefix()
|
||||
.join(format!("{sequence:016x}-{VERSION:02x}.list"))
|
||||
}
|
||||
|
||||
pub fn deletion_header_path(&self) -> PathBuf {
|
||||
// Encode a version in the filename, so that if we ever switch away from JSON we can
|
||||
// increment this.
|
||||
const VERSION: u8 = 1;
|
||||
|
||||
self.deletion_prefix().join(format!("header-{VERSION:02x}"))
|
||||
}
|
||||
|
||||
pub fn tenant_path(&self, tenant_id: &TenantId) -> PathBuf {
|
||||
self.tenants_path().join(tenant_id.to_string())
|
||||
}
|
||||
@@ -664,23 +643,6 @@ impl PageServerConf {
|
||||
.join(METADATA_FILE_NAME)
|
||||
}
|
||||
|
||||
/// Files on the remote storage are stored with paths, relative to the workdir.
|
||||
/// That path includes in itself both tenant and timeline ids, allowing to have a unique remote storage path.
|
||||
///
|
||||
/// Errors if the path provided does not start from pageserver's workdir.
|
||||
pub fn remote_path(&self, local_path: &Path) -> anyhow::Result<RemotePath> {
|
||||
local_path
|
||||
.strip_prefix(&self.workdir)
|
||||
.context("Failed to strip workdir prefix")
|
||||
.and_then(RemotePath::new)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to resolve remote part of path {:?} for base {:?}",
|
||||
local_path, self.workdir
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Turns storage remote path of a file into its local path.
|
||||
pub fn local_path(&self, remote_path: &RemotePath) -> PathBuf {
|
||||
remote_path.with_base(&self.workdir)
|
||||
|
||||
@@ -1,782 +0,0 @@
|
||||
mod backend;
|
||||
mod executor;
|
||||
mod frontend;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::metrics::DELETION_QUEUE_SUBMITTED;
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath};
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_with::serde_as;
|
||||
use thiserror::Error;
|
||||
use tokio;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{self, debug, error};
|
||||
use utils::id::{TenantId, TenantTimelineId, TimelineId};
|
||||
|
||||
pub(crate) use self::backend::BackendQueueWorker;
|
||||
use self::executor::ExecutorWorker;
|
||||
use self::frontend::DeletionOp;
|
||||
pub(crate) use self::frontend::FrontendQueueWorker;
|
||||
use backend::BackendQueueMessage;
|
||||
use executor::ExecutorMessage;
|
||||
use frontend::FrontendQueueMessage;
|
||||
|
||||
use crate::{config::PageServerConf, tenant::storage_layer::LayerFileName};
|
||||
|
||||
// TODO: adminstrative "panic button" config property to disable all deletions
|
||||
// TODO: configurable for how long to wait before executing deletions
|
||||
|
||||
/// We aggregate object deletions from many tenants in one place, for several reasons:
|
||||
/// - Coalesce deletions into fewer DeleteObjects calls
|
||||
/// - Enable Tenant/Timeline lifetimes to be shorter than the time it takes
|
||||
/// to flush any outstanding deletions.
|
||||
/// - Globally control throughput of deletions, as these are a low priority task: do
|
||||
/// not compete with the same S3 clients/connections used for higher priority uploads.
|
||||
/// - Future: enable validating that we may do deletions in a multi-attached scenario,
|
||||
/// via generation numbers (see https://github.com/neondatabase/neon/pull/4919)
|
||||
///
|
||||
/// There are two kinds of deletion: deferred and immediate. A deferred deletion
|
||||
/// may be intentionally delayed to protect passive readers of S3 data, and may
|
||||
/// be subject to a generation number validation step. An immediate deletion is
|
||||
/// ready to execute immediately, and is only queued up so that it can be coalesced
|
||||
/// with other deletions in flight.
|
||||
///
|
||||
/// Deferred deletions pass through three steps:
|
||||
/// - Frontend: accumulate deletion requests from Timelines, and batch them up into
|
||||
/// DeletionLists, which are persisted to S3.
|
||||
/// - Backend: accumulate deletion lists, and validate them en-masse prior to passing
|
||||
/// the keys in the list onward for actual deletion
|
||||
/// - Executor: accumulate object keys that the backend has validated for immediate
|
||||
/// deletion, and execute them in batches of 1000 keys via DeleteObjects.
|
||||
///
|
||||
/// Non-deferred deletions, such as during timeline deletion, bypass the first
|
||||
/// two stages and are passed straight into the Executor.
|
||||
///
|
||||
/// Internally, each stage is joined by a channel to the next. In S3, there is only
|
||||
/// one queue (of DeletionLists), which is written by the frontend and consumed
|
||||
/// by the backend.
|
||||
#[derive(Clone)]
|
||||
pub struct DeletionQueue {
|
||||
client: DeletionQueueClient,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FlushOp {
|
||||
tx: tokio::sync::oneshot::Sender<()>,
|
||||
}
|
||||
|
||||
impl FlushOp {
|
||||
fn fire(self) {
|
||||
if self.tx.send(()).is_err() {
|
||||
// oneshot channel closed. This is legal: a client could be destroyed while waiting for a flush.
|
||||
debug!("deletion queue flush from dropped client");
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DeletionQueueClient {
|
||||
tx: tokio::sync::mpsc::Sender<FrontendQueueMessage>,
|
||||
executor_tx: tokio::sync::mpsc::Sender<ExecutorMessage>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct TimelineDeletionList {
|
||||
objects: Vec<RemotePath>,
|
||||
// TODO: Tenant attachment generation will go here
|
||||
// (see https://github.com/neondatabase/neon/pull/4919)
|
||||
// attach_gen: u32,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct DeletionList {
|
||||
/// Serialization version, for future use
|
||||
version: u8,
|
||||
|
||||
/// Used for constructing a unique key for each deletion list we write out.
|
||||
sequence: u64,
|
||||
|
||||
/// To avoid repeating tenant/timeline IDs in every key, we store keys in
|
||||
/// nested HashMaps by TenantTimelineID
|
||||
objects: HashMap<TenantTimelineId, TimelineDeletionList>,
|
||||
// TODO: Node generation will go here
|
||||
// (see https://github.com/neondatabase/neon/pull/4919)
|
||||
// node_gen: u32,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct DeletionHeader {
|
||||
/// Serialization version, for future use
|
||||
version: u8,
|
||||
|
||||
/// Enable determining the next sequence number even if there are no deletion lists present.
|
||||
/// If there _are_ deletion lists present, then their sequence numbers take precedence over
|
||||
/// this.
|
||||
last_deleted_list_seq: u64,
|
||||
// TODO: this is where we will track a 'clean' sequence number that indicates all deletion
|
||||
// lists <= that sequence have had their generations validated with the control plane
|
||||
// and are OK to execute.
|
||||
}
|
||||
|
||||
impl DeletionHeader {
|
||||
const VERSION_LATEST: u8 = 1;
|
||||
|
||||
fn new(last_deleted_list_seq: u64) -> Self {
|
||||
Self {
|
||||
version: Self::VERSION_LATEST,
|
||||
last_deleted_list_seq,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DeletionList {
|
||||
const VERSION_LATEST: u8 = 1;
|
||||
fn new(sequence: u64) -> Self {
|
||||
Self {
|
||||
version: Self::VERSION_LATEST,
|
||||
sequence,
|
||||
objects: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.objects.is_empty()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.objects.values().map(|v| v.objects.len()).sum()
|
||||
}
|
||||
|
||||
fn push(&mut self, tenant: &TenantId, timeline: &TimelineId, mut objects: Vec<RemotePath>) {
|
||||
if objects.is_empty() {
|
||||
// Avoid inserting an empty TimelineDeletionList: this preserves the property
|
||||
// that if we have no keys, then self.objects is empty (used in Self::is_empty)
|
||||
return;
|
||||
}
|
||||
|
||||
let key = TenantTimelineId::new(*tenant, *timeline);
|
||||
let entry = self
|
||||
.objects
|
||||
.entry(key)
|
||||
.or_insert_with(|| TimelineDeletionList {
|
||||
objects: Vec::new(),
|
||||
});
|
||||
entry.objects.append(&mut objects)
|
||||
}
|
||||
|
||||
fn take_paths(&mut self) -> Vec<RemotePath> {
|
||||
self.objects
|
||||
.drain()
|
||||
.flat_map(|(_k, v)| v.objects.into_iter())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum DeletionQueueError {
|
||||
#[error("Deletion queue unavailable during shutdown")]
|
||||
ShuttingDown,
|
||||
}
|
||||
|
||||
impl DeletionQueueClient {
|
||||
async fn do_push(&self, msg: FrontendQueueMessage) -> Result<(), DeletionQueueError> {
|
||||
match self.tx.send(msg).await {
|
||||
Ok(_) => Ok(()),
|
||||
Err(e) => {
|
||||
// This shouldn't happen, we should shut down all tenants before
|
||||
// we shut down the global delete queue. If we encounter a bug like this,
|
||||
// we may leak objects as deletions won't be processed.
|
||||
error!("Deletion queue closed while pushing, shutting down? ({e})");
|
||||
Err(DeletionQueueError::ShuttingDown)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Submit a list of layers for deletion: this function will return before the deletion is
|
||||
/// persistent, but it may be executed at any time after this function enters: do not push
|
||||
/// layers until you're sure they can be deleted safely (i.e. remote metadata no longer
|
||||
/// references them).
|
||||
pub(crate) async fn push_layers(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
layers: Vec<LayerFileName>,
|
||||
) -> Result<(), DeletionQueueError> {
|
||||
DELETION_QUEUE_SUBMITTED.inc_by(layers.len() as u64);
|
||||
self.do_push(FrontendQueueMessage::Delete(DeletionOp {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
layers,
|
||||
objects: Vec::new(),
|
||||
}))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn do_flush(
|
||||
&self,
|
||||
msg: FrontendQueueMessage,
|
||||
rx: tokio::sync::oneshot::Receiver<()>,
|
||||
) -> Result<(), DeletionQueueError> {
|
||||
self.do_push(msg).await?;
|
||||
if rx.await.is_err() {
|
||||
// This shouldn't happen if tenants are shut down before deletion queue. If we
|
||||
// encounter a bug like this, then a flusher will incorrectly believe it has flushed
|
||||
// when it hasn't, possibly leading to leaking objects.
|
||||
error!("Deletion queue dropped flush op while client was still waiting");
|
||||
Err(DeletionQueueError::ShuttingDown)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Wait until all previous deletions are persistent (either executed, or written to a DeletionList)
|
||||
pub async fn flush(&self) -> Result<(), DeletionQueueError> {
|
||||
let (tx, rx) = tokio::sync::oneshot::channel::<()>();
|
||||
self.do_flush(FrontendQueueMessage::Flush(FlushOp { tx }), rx)
|
||||
.await
|
||||
}
|
||||
|
||||
// Wait until all previous deletions are executed
|
||||
pub(crate) async fn flush_execute(&self) -> Result<(), DeletionQueueError> {
|
||||
debug!("flush_execute: flushing to deletion lists...");
|
||||
// Flush any buffered work to deletion lists
|
||||
self.flush().await?;
|
||||
|
||||
// Flush execution of deletion lists
|
||||
let (tx, rx) = tokio::sync::oneshot::channel::<()>();
|
||||
debug!("flush_execute: flushing execution...");
|
||||
self.do_flush(FrontendQueueMessage::FlushExecute(FlushOp { tx }), rx)
|
||||
.await?;
|
||||
debug!("flush_execute: finished flushing execution...");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// This interface bypasses the persistent deletion queue, and any validation
|
||||
/// that this pageserver is still elegible to execute the deletions. It is for
|
||||
/// use in timeline deletions, where the control plane is telling us we may
|
||||
/// delete everything in the timeline.
|
||||
///
|
||||
/// DO NOT USE THIS FROM GC OR COMPACTION CODE. Use the regular `push_layers`.
|
||||
pub(crate) async fn push_immediate(
|
||||
&self,
|
||||
objects: Vec<RemotePath>,
|
||||
) -> Result<(), DeletionQueueError> {
|
||||
self.executor_tx
|
||||
.send(ExecutorMessage::Delete(objects))
|
||||
.await
|
||||
.map_err(|_| DeletionQueueError::ShuttingDown)
|
||||
}
|
||||
|
||||
/// Companion to push_immediate. When this returns Ok, all prior objects sent
|
||||
/// into push_immediate have been deleted from remote storage.
|
||||
pub(crate) async fn flush_immediate(&self) -> Result<(), DeletionQueueError> {
|
||||
let (tx, rx) = tokio::sync::oneshot::channel::<()>();
|
||||
self.executor_tx
|
||||
.send(ExecutorMessage::Flush(FlushOp { tx }))
|
||||
.await
|
||||
.map_err(|_| DeletionQueueError::ShuttingDown)?;
|
||||
|
||||
rx.await.map_err(|_| DeletionQueueError::ShuttingDown)
|
||||
}
|
||||
}
|
||||
|
||||
impl DeletionQueue {
|
||||
pub fn new_client(&self) -> DeletionQueueClient {
|
||||
self.client.clone()
|
||||
}
|
||||
|
||||
/// Caller may use the returned object to construct clients with new_client.
|
||||
/// Caller should tokio::spawn the background() members of the two worker objects returned:
|
||||
/// we don't spawn those inside new() so that the caller can use their runtime/spans of choice.
|
||||
///
|
||||
/// If remote_storage is None, then the returned workers will also be None.
|
||||
pub fn new(
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
conf: &'static PageServerConf,
|
||||
cancel: CancellationToken,
|
||||
) -> (
|
||||
Self,
|
||||
Option<FrontendQueueWorker>,
|
||||
Option<BackendQueueWorker>,
|
||||
Option<ExecutorWorker>,
|
||||
) {
|
||||
// Deep channel: it consumes deletions from all timelines and we do not want to block them
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(16384);
|
||||
|
||||
// Shallow channel: it carries DeletionLists which each contain up to thousands of deletions
|
||||
let (backend_tx, backend_rx) = tokio::sync::mpsc::channel(16);
|
||||
|
||||
// Shallow channel: it carries lists of paths, and we expect the main queueing to
|
||||
// happen in the backend (persistent), not in this queue.
|
||||
let (executor_tx, executor_rx) = tokio::sync::mpsc::channel(16);
|
||||
|
||||
let remote_storage = match remote_storage {
|
||||
None => {
|
||||
return (
|
||||
Self {
|
||||
client: DeletionQueueClient { tx, executor_tx },
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
}
|
||||
Some(r) => r,
|
||||
};
|
||||
|
||||
(
|
||||
Self {
|
||||
client: DeletionQueueClient {
|
||||
tx,
|
||||
executor_tx: executor_tx.clone(),
|
||||
},
|
||||
},
|
||||
Some(FrontendQueueWorker::new(
|
||||
conf,
|
||||
rx,
|
||||
backend_tx,
|
||||
cancel.clone(),
|
||||
)),
|
||||
Some(BackendQueueWorker::new(conf, backend_rx, executor_tx)),
|
||||
Some(ExecutorWorker::new(
|
||||
remote_storage,
|
||||
executor_rx,
|
||||
cancel.clone(),
|
||||
)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use hex_literal::hex;
|
||||
use std::{
|
||||
io::ErrorKind,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
use tracing::info;
|
||||
|
||||
use remote_storage::{RemoteStorageConfig, RemoteStorageKind};
|
||||
use tokio::{runtime::EnterGuard, task::JoinHandle};
|
||||
|
||||
use crate::tenant::harness::TenantHarness;
|
||||
|
||||
use super::*;
|
||||
pub const TIMELINE_ID: TimelineId =
|
||||
TimelineId::from_array(hex!("11223344556677881122334455667788"));
|
||||
|
||||
struct TestSetup {
|
||||
runtime: &'static tokio::runtime::Runtime,
|
||||
_entered_runtime: EnterGuard<'static>,
|
||||
harness: TenantHarness,
|
||||
remote_fs_dir: PathBuf,
|
||||
storage: GenericRemoteStorage,
|
||||
deletion_queue: DeletionQueue,
|
||||
fe_worker: JoinHandle<()>,
|
||||
be_worker: JoinHandle<()>,
|
||||
ex_worker: JoinHandle<()>,
|
||||
}
|
||||
|
||||
impl TestSetup {
|
||||
/// Simulate a pageserver restart by destroying and recreating the deletion queue
|
||||
fn restart(&mut self) {
|
||||
let (deletion_queue, fe_worker, be_worker, ex_worker) = DeletionQueue::new(
|
||||
Some(self.storage.clone()),
|
||||
self.harness.conf,
|
||||
CancellationToken::new(),
|
||||
);
|
||||
|
||||
self.deletion_queue = deletion_queue;
|
||||
|
||||
let mut fe_worker = fe_worker.unwrap();
|
||||
let mut be_worker = be_worker.unwrap();
|
||||
let mut ex_worker = ex_worker.unwrap();
|
||||
let mut fe_worker = self
|
||||
.runtime
|
||||
.spawn(async move { fe_worker.background().await });
|
||||
let mut be_worker = self
|
||||
.runtime
|
||||
.spawn(async move { be_worker.background().await });
|
||||
let mut ex_worker = self.runtime.spawn(async move {
|
||||
drop(ex_worker.background().await);
|
||||
});
|
||||
std::mem::swap(&mut self.fe_worker, &mut fe_worker);
|
||||
std::mem::swap(&mut self.be_worker, &mut be_worker);
|
||||
std::mem::swap(&mut self.ex_worker, &mut ex_worker);
|
||||
|
||||
// Join the old workers
|
||||
self.runtime.block_on(fe_worker).unwrap();
|
||||
self.runtime.block_on(be_worker).unwrap();
|
||||
self.runtime.block_on(ex_worker).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn setup(test_name: &str) -> anyhow::Result<TestSetup> {
|
||||
let test_name = Box::leak(Box::new(format!("deletion_queue__{test_name}")));
|
||||
let harness = TenantHarness::create(test_name)?;
|
||||
|
||||
// We do not load() the harness: we only need its config and remote_storage
|
||||
|
||||
// Set up a GenericRemoteStorage targetting a directory
|
||||
let remote_fs_dir = harness.conf.workdir.join("remote_fs");
|
||||
std::fs::create_dir_all(remote_fs_dir)?;
|
||||
let remote_fs_dir = std::fs::canonicalize(harness.conf.workdir.join("remote_fs"))?;
|
||||
let storage_config = RemoteStorageConfig {
|
||||
max_concurrent_syncs: std::num::NonZeroUsize::new(
|
||||
remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS,
|
||||
)
|
||||
.unwrap(),
|
||||
max_sync_errors: std::num::NonZeroU32::new(
|
||||
remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS,
|
||||
)
|
||||
.unwrap(),
|
||||
storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()),
|
||||
};
|
||||
let storage = GenericRemoteStorage::from_config(&storage_config).unwrap();
|
||||
|
||||
let runtime = Box::leak(Box::new(
|
||||
tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()?,
|
||||
));
|
||||
let entered_runtime = runtime.enter();
|
||||
|
||||
let (deletion_queue, fe_worker, be_worker, ex_worker) = DeletionQueue::new(
|
||||
Some(storage.clone()),
|
||||
harness.conf,
|
||||
CancellationToken::new(),
|
||||
);
|
||||
|
||||
let mut fe_worker = fe_worker.unwrap();
|
||||
let mut be_worker = be_worker.unwrap();
|
||||
let mut ex_worker = ex_worker.unwrap();
|
||||
let fe_worker_join = runtime.spawn(async move { fe_worker.background().await });
|
||||
let be_worker_join = runtime.spawn(async move { be_worker.background().await });
|
||||
let ex_worker_join = runtime.spawn(async move {
|
||||
drop(ex_worker.background().await);
|
||||
});
|
||||
|
||||
Ok(TestSetup {
|
||||
runtime,
|
||||
_entered_runtime: entered_runtime,
|
||||
harness,
|
||||
remote_fs_dir,
|
||||
storage,
|
||||
deletion_queue,
|
||||
fe_worker: fe_worker_join,
|
||||
be_worker: be_worker_join,
|
||||
ex_worker: ex_worker_join,
|
||||
})
|
||||
}
|
||||
|
||||
// TODO: put this in a common location so that we can share with remote_timeline_client's tests
|
||||
fn assert_remote_files(expected: &[&str], remote_path: &Path) {
|
||||
let mut expected: Vec<String> = expected.iter().map(|x| String::from(*x)).collect();
|
||||
expected.sort();
|
||||
|
||||
let mut found: Vec<String> = Vec::new();
|
||||
let dir = match std::fs::read_dir(remote_path) {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
if e.kind() == ErrorKind::NotFound {
|
||||
if expected.is_empty() {
|
||||
// We are asserting prefix is empty: it is expected that the dir is missing
|
||||
return;
|
||||
} else {
|
||||
assert_eq!(expected, Vec::<String>::new());
|
||||
unreachable!();
|
||||
}
|
||||
} else {
|
||||
panic!(
|
||||
"Unexpected error listing {0}: {e}",
|
||||
remote_path.to_string_lossy()
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for entry in dir.flatten() {
|
||||
let entry_name = entry.file_name();
|
||||
let fname = entry_name.to_str().unwrap();
|
||||
found.push(String::from(fname));
|
||||
}
|
||||
found.sort();
|
||||
|
||||
assert_eq!(expected, found);
|
||||
}
|
||||
|
||||
fn assert_local_files(expected: &[&str], directory: &Path) {
|
||||
let mut dir = match std::fs::read_dir(directory) {
|
||||
Ok(d) => d,
|
||||
Err(_) => {
|
||||
assert_eq!(expected, &Vec::<String>::new());
|
||||
return;
|
||||
}
|
||||
};
|
||||
let mut found = Vec::new();
|
||||
while let Some(dentry) = dir.next() {
|
||||
let dentry = dentry.unwrap();
|
||||
let file_name = dentry.file_name();
|
||||
let file_name_str = file_name.to_string_lossy();
|
||||
found.push(file_name_str.to_string());
|
||||
}
|
||||
found.sort();
|
||||
assert_eq!(expected, found);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deletion_queue_smoke() -> anyhow::Result<()> {
|
||||
// Basic test that the deletion queue processes the deletions we pass into it
|
||||
let ctx = setup("deletion_queue_smoke").expect("Failed test setup");
|
||||
let client = ctx.deletion_queue.new_client();
|
||||
|
||||
let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
|
||||
let tenant_id = ctx.harness.tenant_id;
|
||||
|
||||
let content: Vec<u8> = "victim1 contents".into();
|
||||
let relative_remote_path = ctx
|
||||
.harness
|
||||
.conf
|
||||
.remote_path(&ctx.harness.timeline_path(&TIMELINE_ID))
|
||||
.expect("Failed to construct remote path");
|
||||
let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());
|
||||
let deletion_prefix = ctx.harness.conf.deletion_prefix();
|
||||
|
||||
// Inject a victim file to remote storage
|
||||
info!("Writing");
|
||||
std::fs::create_dir_all(&remote_timeline_path)?;
|
||||
std::fs::write(
|
||||
remote_timeline_path.join(layer_file_name_1.to_string()),
|
||||
content,
|
||||
)?;
|
||||
assert_remote_files(&[&layer_file_name_1.file_name()], &remote_timeline_path);
|
||||
|
||||
// File should still be there after we push it to the queue (we haven't pushed enough to flush anything)
|
||||
info!("Pushing");
|
||||
ctx.runtime.block_on(client.push_layers(
|
||||
tenant_id,
|
||||
TIMELINE_ID,
|
||||
[layer_file_name_1.clone()].to_vec(),
|
||||
))?;
|
||||
assert_remote_files(&[&layer_file_name_1.file_name()], &remote_timeline_path);
|
||||
|
||||
assert_local_files(&[], &deletion_prefix);
|
||||
|
||||
// File should still be there after we write a deletion list (we haven't pushed enough to execute anything)
|
||||
info!("Flushing");
|
||||
ctx.runtime.block_on(client.flush())?;
|
||||
assert_remote_files(&[&layer_file_name_1.file_name()], &remote_timeline_path);
|
||||
assert_local_files(&["0000000000000001-01.list"], &deletion_prefix);
|
||||
|
||||
// File should go away when we execute
|
||||
info!("Flush-executing");
|
||||
ctx.runtime.block_on(client.flush_execute())?;
|
||||
assert_remote_files(&[], &remote_timeline_path);
|
||||
assert_local_files(&["header-01"], &deletion_prefix);
|
||||
|
||||
// Flushing on an empty queue should succeed immediately, and not write any lists
|
||||
info!("Flush-executing on empty");
|
||||
ctx.runtime.block_on(client.flush_execute())?;
|
||||
assert_local_files(&["header-01"], &deletion_prefix);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deletion_queue_recovery() -> anyhow::Result<()> {
|
||||
// Basic test that the deletion queue processes the deletions we pass into it
|
||||
let mut ctx = setup("deletion_queue_recovery").expect("Failed test setup");
|
||||
let client = ctx.deletion_queue.new_client();
|
||||
|
||||
let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
|
||||
let tenant_id = ctx.harness.tenant_id;
|
||||
|
||||
let content: Vec<u8> = "victim1 contents".into();
|
||||
let relative_remote_path = ctx
|
||||
.harness
|
||||
.conf
|
||||
.remote_path(&ctx.harness.timeline_path(&TIMELINE_ID))
|
||||
.expect("Failed to construct remote path");
|
||||
let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());
|
||||
let deletion_prefix = ctx.harness.conf.deletion_prefix();
|
||||
|
||||
// Inject a file, delete it, and flush to a deletion list
|
||||
std::fs::create_dir_all(&remote_timeline_path)?;
|
||||
std::fs::write(
|
||||
remote_timeline_path.join(layer_file_name_1.to_string()),
|
||||
content,
|
||||
)?;
|
||||
ctx.runtime.block_on(client.push_layers(
|
||||
tenant_id,
|
||||
TIMELINE_ID,
|
||||
[layer_file_name_1.clone()].to_vec(),
|
||||
))?;
|
||||
ctx.runtime.block_on(client.flush())?;
|
||||
assert_local_files(&["0000000000000001-01.list"], &deletion_prefix);
|
||||
|
||||
// Restart the deletion queue
|
||||
drop(client);
|
||||
ctx.restart();
|
||||
let client = ctx.deletion_queue.new_client();
|
||||
|
||||
// If we have recovered the deletion list properly, then executing after restart should purge it
|
||||
info!("Flush-executing");
|
||||
ctx.runtime.block_on(client.flush_execute())?;
|
||||
assert_remote_files(&[], &remote_timeline_path);
|
||||
assert_local_files(&["header-01"], &deletion_prefix);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A lightweight queue which can issue ordinary DeletionQueueClient objects, but doesn't do any persistence
|
||||
/// or coalescing, and doesn't actually execute any deletions unless you call pump() to kick it.
|
||||
#[cfg(test)]
|
||||
pub mod mock {
|
||||
use tracing::info;
|
||||
|
||||
use super::*;
|
||||
use std::sync::{
|
||||
atomic::{AtomicUsize, Ordering},
|
||||
Arc,
|
||||
};
|
||||
|
||||
pub struct MockDeletionQueue {
|
||||
tx: tokio::sync::mpsc::Sender<FrontendQueueMessage>,
|
||||
executor_tx: tokio::sync::mpsc::Sender<ExecutorMessage>,
|
||||
tx_pump: tokio::sync::mpsc::Sender<FlushOp>,
|
||||
executed: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
impl MockDeletionQueue {
|
||||
pub fn new(
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
conf: &'static PageServerConf,
|
||||
) -> Self {
|
||||
let (tx, mut rx) = tokio::sync::mpsc::channel(16384);
|
||||
let (tx_pump, mut rx_pump) = tokio::sync::mpsc::channel::<FlushOp>(1);
|
||||
let (executor_tx, mut executor_rx) = tokio::sync::mpsc::channel(16384);
|
||||
|
||||
let executed = Arc::new(AtomicUsize::new(0));
|
||||
let executed_bg = executed.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let remote_storage = match &remote_storage {
|
||||
Some(rs) => rs,
|
||||
None => {
|
||||
info!("No remote storage configured, deletion queue will not run");
|
||||
return;
|
||||
}
|
||||
};
|
||||
info!("Running mock deletion queue");
|
||||
// Each time we are asked to pump, drain the queue of deletions
|
||||
while let Some(flush_op) = rx_pump.recv().await {
|
||||
info!("Executing all pending deletions");
|
||||
|
||||
// Transform all executor messages to generic frontend messages
|
||||
while let Ok(msg) = executor_rx.try_recv() {
|
||||
match msg {
|
||||
ExecutorMessage::Delete(objects) => {
|
||||
for path in objects {
|
||||
match remote_storage.delete(&path).await {
|
||||
Ok(_) => {
|
||||
debug!("Deleted {path}");
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to delete {path}, leaking object! ({e})"
|
||||
);
|
||||
}
|
||||
}
|
||||
executed_bg.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
ExecutorMessage::Flush(flush_op) => {
|
||||
flush_op.fire();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while let Ok(msg) = rx.try_recv() {
|
||||
match msg {
|
||||
FrontendQueueMessage::Delete(op) => {
|
||||
let timeline_path =
|
||||
conf.timeline_path(&op.tenant_id, &op.timeline_id);
|
||||
|
||||
let mut objects = op.objects;
|
||||
for layer in op.layers {
|
||||
let local_path = timeline_path.join(layer.file_name());
|
||||
let path = match conf.remote_path(&local_path) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
panic!("Can't make a timeline path! {e}");
|
||||
}
|
||||
};
|
||||
objects.push(path);
|
||||
}
|
||||
|
||||
for path in objects {
|
||||
info!("Executing deletion {path}");
|
||||
match remote_storage.delete(&path).await {
|
||||
Ok(_) => {
|
||||
debug!("Deleted {path}");
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to delete {path}, leaking object! ({e})"
|
||||
);
|
||||
}
|
||||
}
|
||||
executed_bg.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
FrontendQueueMessage::Flush(op) => {
|
||||
op.fire();
|
||||
}
|
||||
FrontendQueueMessage::FlushExecute(op) => {
|
||||
// We have already executed all prior deletions because mock does them inline
|
||||
op.fire();
|
||||
}
|
||||
}
|
||||
info!("All pending deletions have been executed");
|
||||
}
|
||||
flush_op
|
||||
.tx
|
||||
.send(())
|
||||
.expect("Test called flush but dropped before finishing");
|
||||
}
|
||||
});
|
||||
|
||||
Self {
|
||||
tx,
|
||||
tx_pump,
|
||||
executor_tx,
|
||||
executed,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_executed(&self) -> usize {
|
||||
self.executed.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub async fn pump(&self) {
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
self.tx_pump
|
||||
.send(FlushOp { tx })
|
||||
.await
|
||||
.expect("pump called after deletion queue loop stopped");
|
||||
rx.await
|
||||
.expect("Mock delete queue shutdown while waiting to pump");
|
||||
}
|
||||
|
||||
pub(crate) fn new_client(&self) -> DeletionQueueClient {
|
||||
DeletionQueueClient {
|
||||
tx: self.tx.clone(),
|
||||
executor_tx: self.executor_tx.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,181 +0,0 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use tracing::debug;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::metrics::DELETION_QUEUE_ERRORS;
|
||||
|
||||
use super::executor::ExecutorMessage;
|
||||
use super::DeletionHeader;
|
||||
use super::DeletionList;
|
||||
use super::FlushOp;
|
||||
|
||||
// After this length of time, execute deletions which are elegible to run,
|
||||
// even if we haven't accumulated enough for a full-sized DeleteObjects
|
||||
const EXECUTE_IDLE_DEADLINE: Duration = Duration::from_secs(60);
|
||||
|
||||
// If we have received this number of keys, proceed with attempting to execute
|
||||
const AUTOFLUSH_KEY_COUNT: usize = 16384;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) enum BackendQueueMessage {
|
||||
Delete(DeletionList),
|
||||
Flush(FlushOp),
|
||||
}
|
||||
pub struct BackendQueueWorker {
|
||||
conf: &'static PageServerConf,
|
||||
rx: tokio::sync::mpsc::Receiver<BackendQueueMessage>,
|
||||
tx: tokio::sync::mpsc::Sender<ExecutorMessage>,
|
||||
|
||||
// Accumulate some lists to execute in a batch.
|
||||
// The purpose of this accumulation is to implement batched validation of
|
||||
// attachment generations, when split-brain protection is implemented.
|
||||
// (see https://github.com/neondatabase/neon/pull/4919)
|
||||
pending_lists: Vec<DeletionList>,
|
||||
|
||||
// Sum of all the lengths of lists in pending_lists
|
||||
pending_key_count: usize,
|
||||
|
||||
// DeletionLists we have fully executed, which may be deleted
|
||||
// from remote storage.
|
||||
executed_lists: Vec<DeletionList>,
|
||||
}
|
||||
|
||||
impl BackendQueueWorker {
|
||||
pub(super) fn new(
|
||||
conf: &'static PageServerConf,
|
||||
rx: tokio::sync::mpsc::Receiver<BackendQueueMessage>,
|
||||
tx: tokio::sync::mpsc::Sender<ExecutorMessage>,
|
||||
) -> Self {
|
||||
Self {
|
||||
conf,
|
||||
rx,
|
||||
tx,
|
||||
pending_lists: Vec::new(),
|
||||
pending_key_count: 0,
|
||||
executed_lists: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn cleanup_lists(&mut self) {
|
||||
debug!(
|
||||
"cleanup_lists: {0} executed lists, {1} pending lists",
|
||||
self.executed_lists.len(),
|
||||
self.pending_lists.len()
|
||||
);
|
||||
|
||||
// Lists are always pushed into the queues + executed list in sequence order, so
|
||||
// no sort is required: can find the highest sequence number by peeking at last element
|
||||
let max_executed_seq = match self.executed_lists.last() {
|
||||
Some(v) => v.sequence,
|
||||
None => {
|
||||
// No executed lists, nothing to clean up.
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// In case this is the last list, write a header out first so that
|
||||
// we don't risk losing our knowledge of the sequence number (on replay, our
|
||||
// next sequence number is the highest list seen + 1, or read from the header
|
||||
// if there are no lists)
|
||||
let header = DeletionHeader::new(max_executed_seq);
|
||||
debug!("Writing header {:?}", header);
|
||||
let header_bytes =
|
||||
serde_json::to_vec(&header).expect("Failed to serialize deletion header");
|
||||
let header_path = self.conf.deletion_header_path();
|
||||
|
||||
if let Err(e) = tokio::fs::write(&header_path, header_bytes).await {
|
||||
warn!("Failed to upload deletion queue header: {e:#}");
|
||||
DELETION_QUEUE_ERRORS
|
||||
.with_label_values(&["put_header"])
|
||||
.inc();
|
||||
return;
|
||||
}
|
||||
|
||||
while let Some(list) = self.executed_lists.pop() {
|
||||
let list_path = self.conf.deletion_list_path(list.sequence);
|
||||
if let Err(e) = tokio::fs::remove_file(&list_path).await {
|
||||
// Unexpected: we should have permissions and nothing else should
|
||||
// be touching these files
|
||||
tracing::error!("Failed to delete {0}: {e:#}", list_path.display());
|
||||
self.executed_lists.push(list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn flush(&mut self) {
|
||||
self.pending_key_count = 0;
|
||||
|
||||
// Submit all keys from pending DeletionLists into the executor
|
||||
for list in &mut self.pending_lists {
|
||||
let objects = list.take_paths();
|
||||
if let Err(_e) = self.tx.send(ExecutorMessage::Delete(objects)).await {
|
||||
warn!("Shutting down");
|
||||
return;
|
||||
};
|
||||
}
|
||||
|
||||
// Flush the executor to ensure all the operations we just submitted have been executed
|
||||
let (tx, rx) = tokio::sync::oneshot::channel::<()>();
|
||||
let flush_op = FlushOp { tx };
|
||||
if let Err(_e) = self.tx.send(ExecutorMessage::Flush(flush_op)).await {
|
||||
warn!("Shutting down");
|
||||
return;
|
||||
};
|
||||
if rx.await.is_err() {
|
||||
warn!("Shutting down");
|
||||
return;
|
||||
}
|
||||
|
||||
// After flush, we are assured that all contents of the pending lists
|
||||
// are executed
|
||||
self.executed_lists.append(&mut self.pending_lists);
|
||||
|
||||
// Erase the lists we executed
|
||||
self.cleanup_lists().await;
|
||||
}
|
||||
|
||||
pub async fn background(&mut self) {
|
||||
// TODO: if we would like to be able to defer deletions while a Layer still has
|
||||
// refs (but it will be elegible for deletion after process ends), then we may
|
||||
// add an ephemeral part to BackendQueueMessage::Delete that tracks which keys
|
||||
// in the deletion list may not be deleted yet, with guards to block on while
|
||||
// we wait to proceed.
|
||||
|
||||
loop {
|
||||
let msg = match tokio::time::timeout(EXECUTE_IDLE_DEADLINE, self.rx.recv()).await {
|
||||
Ok(Some(m)) => m,
|
||||
Ok(None) => {
|
||||
// All queue senders closed
|
||||
info!("Shutting down");
|
||||
break;
|
||||
}
|
||||
Err(_) => {
|
||||
// Timeout, we hit deadline to execute whatever we have in hand. These functions will
|
||||
// return immediately if no work is pending
|
||||
self.flush().await;
|
||||
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match msg {
|
||||
BackendQueueMessage::Delete(list) => {
|
||||
self.pending_key_count += list.objects.len();
|
||||
self.pending_lists.push(list);
|
||||
|
||||
if self.pending_key_count > AUTOFLUSH_KEY_COUNT {
|
||||
self.flush().await;
|
||||
}
|
||||
}
|
||||
BackendQueueMessage::Flush(op) => {
|
||||
self.flush().await;
|
||||
op.fire();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,143 +0,0 @@
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use remote_storage::RemotePath;
|
||||
use remote_storage::MAX_KEYS_PER_DELETE;
|
||||
use std::time::Duration;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::metrics::DELETION_QUEUE_ERRORS;
|
||||
use crate::metrics::DELETION_QUEUE_EXECUTED;
|
||||
|
||||
use super::DeletionQueueError;
|
||||
use super::FlushOp;
|
||||
|
||||
const AUTOFLUSH_INTERVAL: Duration = Duration::from_secs(10);
|
||||
|
||||
pub(super) enum ExecutorMessage {
|
||||
Delete(Vec<RemotePath>),
|
||||
Flush(FlushOp),
|
||||
}
|
||||
|
||||
/// Non-persistent deletion queue, for coalescing multiple object deletes into
|
||||
/// larger DeleteObjects requests.
|
||||
pub struct ExecutorWorker {
|
||||
// Accumulate up to 1000 keys for the next deletion operation
|
||||
accumulator: Vec<RemotePath>,
|
||||
|
||||
rx: tokio::sync::mpsc::Receiver<ExecutorMessage>,
|
||||
|
||||
cancel: CancellationToken,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
}
|
||||
|
||||
impl ExecutorWorker {
|
||||
pub(super) fn new(
|
||||
remote_storage: GenericRemoteStorage,
|
||||
rx: tokio::sync::mpsc::Receiver<ExecutorMessage>,
|
||||
cancel: CancellationToken,
|
||||
) -> Self {
|
||||
Self {
|
||||
remote_storage,
|
||||
rx,
|
||||
cancel,
|
||||
accumulator: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrap the remote `delete_objects` with a failpoint
|
||||
pub async fn remote_delete(&self) -> Result<(), anyhow::Error> {
|
||||
fail::fail_point!("deletion-queue-before-execute", |_| {
|
||||
info!("Skipping execution, failpoint set");
|
||||
DELETION_QUEUE_ERRORS
|
||||
.with_label_values(&["failpoint"])
|
||||
.inc();
|
||||
Err(anyhow::anyhow!("failpoint hit"))
|
||||
});
|
||||
|
||||
self.remote_storage.delete_objects(&self.accumulator).await
|
||||
}
|
||||
|
||||
/// Block until everything in accumulator has been executed
|
||||
pub async fn flush(&mut self) -> Result<(), DeletionQueueError> {
|
||||
while !self.accumulator.is_empty() && !self.cancel.is_cancelled() {
|
||||
match self.remote_delete().await {
|
||||
Ok(()) => {
|
||||
// Note: we assume that the remote storage layer returns Ok(()) if some
|
||||
// or all of the deleted objects were already gone.
|
||||
DELETION_QUEUE_EXECUTED.inc_by(self.accumulator.len() as u64);
|
||||
info!(
|
||||
"Executed deletion batch {}..{}",
|
||||
self.accumulator
|
||||
.first()
|
||||
.expect("accumulator should be non-empty"),
|
||||
self.accumulator
|
||||
.last()
|
||||
.expect("accumulator should be non-empty"),
|
||||
);
|
||||
self.accumulator.clear();
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("DeleteObjects request failed: {e:#}, will retry");
|
||||
DELETION_QUEUE_ERRORS.with_label_values(&["execute"]).inc();
|
||||
}
|
||||
};
|
||||
}
|
||||
if self.cancel.is_cancelled() {
|
||||
// Expose an error because we may not have actually flushed everything
|
||||
Err(DeletionQueueError::ShuttingDown)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn background(&mut self) -> Result<(), DeletionQueueError> {
|
||||
self.accumulator.reserve(MAX_KEYS_PER_DELETE);
|
||||
|
||||
loop {
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(DeletionQueueError::ShuttingDown);
|
||||
}
|
||||
|
||||
let msg = match tokio::time::timeout(AUTOFLUSH_INTERVAL, self.rx.recv()).await {
|
||||
Ok(Some(m)) => m,
|
||||
Ok(None) => {
|
||||
// All queue senders closed
|
||||
info!("Shutting down");
|
||||
return Err(DeletionQueueError::ShuttingDown);
|
||||
}
|
||||
Err(_) => {
|
||||
// Timeout, we hit deadline to execute whatever we have in hand. These functions will
|
||||
// return immediately if no work is pending
|
||||
self.flush().await?;
|
||||
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match msg {
|
||||
ExecutorMessage::Delete(mut list) => {
|
||||
while !list.is_empty() || self.accumulator.len() == MAX_KEYS_PER_DELETE {
|
||||
if self.accumulator.len() == MAX_KEYS_PER_DELETE {
|
||||
self.flush().await?;
|
||||
// If we have received this number of keys, proceed with attempting to execute
|
||||
assert_eq!(self.accumulator.len(), 0);
|
||||
}
|
||||
|
||||
let available_slots = MAX_KEYS_PER_DELETE - self.accumulator.len();
|
||||
let take_count = std::cmp::min(available_slots, list.len());
|
||||
for path in list.drain(list.len() - take_count..) {
|
||||
self.accumulator.push(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
ExecutorMessage::Flush(flush_op) => {
|
||||
// If flush() errors, we drop the flush_op and the caller will get
|
||||
// an error recv()'ing their oneshot channel.
|
||||
self.flush().await?;
|
||||
flush_op.fire();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,357 +0,0 @@
|
||||
use super::BackendQueueMessage;
|
||||
use super::DeletionHeader;
|
||||
use super::DeletionList;
|
||||
use super::FlushOp;
|
||||
|
||||
use std::fs::create_dir_all;
|
||||
use std::time::Duration;
|
||||
|
||||
use regex::Regex;
|
||||
use remote_storage::RemotePath;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::debug;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
use utils::id::TenantId;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::metrics::DELETION_QUEUE_ERRORS;
|
||||
use crate::metrics::DELETION_QUEUE_SUBMITTED;
|
||||
use crate::tenant::storage_layer::LayerFileName;
|
||||
|
||||
// The number of keys in a DeletionList before we will proactively persist it
|
||||
// (without reaching a flush deadline). This aims to deliver objects of the order
|
||||
// of magnitude 1MB when we are under heavy delete load.
|
||||
const DELETION_LIST_TARGET_SIZE: usize = 16384;
|
||||
|
||||
// Ordinarily, we only flush to DeletionList periodically, to bound the window during
|
||||
// which we might leak objects from not flushing a DeletionList after
|
||||
// the objects are already unlinked from timeline metadata.
|
||||
const FRONTEND_DEFAULT_TIMEOUT: Duration = Duration::from_millis(10000);
|
||||
|
||||
// If someone is waiting for a flush to DeletionList, only delay a little to accumulate
|
||||
// more objects before doing the flush.
|
||||
const FRONTEND_FLUSHING_TIMEOUT: Duration = Duration::from_millis(100);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct DeletionOp {
|
||||
pub(super) tenant_id: TenantId,
|
||||
pub(super) timeline_id: TimelineId,
|
||||
// `layers` and `objects` are both just lists of objects. `layers` is used if you do not
|
||||
// have a config object handy to project it to a remote key, and need the consuming worker
|
||||
// to do it for you.
|
||||
pub(super) layers: Vec<LayerFileName>,
|
||||
pub(super) objects: Vec<RemotePath>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) enum FrontendQueueMessage {
|
||||
Delete(DeletionOp),
|
||||
// Wait until all prior deletions make it into a persistent DeletionList
|
||||
Flush(FlushOp),
|
||||
// Wait until all prior deletions have been executed (i.e. objects are actually deleted)
|
||||
FlushExecute(FlushOp),
|
||||
}
|
||||
|
||||
pub struct FrontendQueueWorker {
|
||||
conf: &'static PageServerConf,
|
||||
|
||||
// Incoming frontend requests to delete some keys
|
||||
rx: tokio::sync::mpsc::Receiver<FrontendQueueMessage>,
|
||||
|
||||
// Outbound requests to the backend to execute deletion lists we have composed.
|
||||
tx: tokio::sync::mpsc::Sender<BackendQueueMessage>,
|
||||
|
||||
// The list we are currently building, contains a buffer of keys to delete
|
||||
// and our next sequence number
|
||||
pending: DeletionList,
|
||||
|
||||
// These FlushOps should fire the next time we flush
|
||||
pending_flushes: Vec<FlushOp>,
|
||||
|
||||
// Worker loop is torn down when this fires.
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
impl FrontendQueueWorker {
|
||||
pub(super) fn new(
|
||||
conf: &'static PageServerConf,
|
||||
rx: tokio::sync::mpsc::Receiver<FrontendQueueMessage>,
|
||||
tx: tokio::sync::mpsc::Sender<BackendQueueMessage>,
|
||||
cancel: CancellationToken,
|
||||
) -> Self {
|
||||
Self {
|
||||
pending: DeletionList::new(1),
|
||||
conf,
|
||||
rx,
|
||||
tx,
|
||||
pending_flushes: Vec::new(),
|
||||
cancel,
|
||||
}
|
||||
}
|
||||
async fn upload_pending_list(&mut self) -> anyhow::Result<()> {
|
||||
let path = self.conf.deletion_list_path(self.pending.sequence);
|
||||
|
||||
let bytes = serde_json::to_vec(&self.pending).expect("Failed to serialize deletion list");
|
||||
tokio::fs::write(&path, &bytes).await?;
|
||||
tokio::fs::File::open(&path).await?.sync_all().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Try to flush `list` to persistent storage
|
||||
///
|
||||
/// This does not return errors, because on failure to flush we do not lose
|
||||
/// any state: flushing will be retried implicitly on the next deadline
|
||||
async fn flush(&mut self) {
|
||||
if self.pending.is_empty() {
|
||||
for f in self.pending_flushes.drain(..) {
|
||||
f.fire();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
match self.upload_pending_list().await {
|
||||
Ok(_) => {
|
||||
info!(sequence = self.pending.sequence, "Stored deletion list");
|
||||
|
||||
for f in self.pending_flushes.drain(..) {
|
||||
f.fire();
|
||||
}
|
||||
|
||||
let mut onward_list = DeletionList::new(self.pending.sequence);
|
||||
std::mem::swap(&mut onward_list.objects, &mut self.pending.objects);
|
||||
|
||||
// We have consumed out of pending: reset it for the next incoming deletions to accumulate there
|
||||
self.pending = DeletionList::new(self.pending.sequence + 1);
|
||||
|
||||
if let Err(e) = self.tx.send(BackendQueueMessage::Delete(onward_list)).await {
|
||||
// This is allowed to fail: it will only happen if the backend worker is shut down,
|
||||
// so we can just drop this on the floor.
|
||||
info!("Deletion list dropped, this is normal during shutdown ({e:#})");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
DELETION_QUEUE_ERRORS.with_label_values(&["put_list"]).inc();
|
||||
warn!(
|
||||
sequence = self.pending.sequence,
|
||||
"Failed to write deletion list to remote storage, will retry later ({e:#})"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn recover(&mut self) -> Result<(), anyhow::Error> {
|
||||
// Load header: this is not required to be present, e.g. when a pageserver first runs
|
||||
let header_path = self.conf.deletion_header_path();
|
||||
|
||||
// Synchronous, but we only do it once per process lifetime so it's tolerable
|
||||
create_dir_all(&self.conf.deletion_prefix())?;
|
||||
|
||||
let header_bytes = match tokio::fs::read(&header_path).await {
|
||||
Ok(h) => Ok(Some(h)),
|
||||
Err(e) => {
|
||||
if e.kind() == std::io::ErrorKind::NotFound {
|
||||
debug!(
|
||||
"Deletion header {0} not found, first start?",
|
||||
header_path.display()
|
||||
);
|
||||
Ok(None)
|
||||
} else {
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}?;
|
||||
|
||||
if let Some(header_bytes) = header_bytes {
|
||||
if let Some(header) = match serde_json::from_slice::<DeletionHeader>(&header_bytes) {
|
||||
Ok(h) => Some(h),
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to deserialize deletion header, ignoring {0}: {e:#}",
|
||||
header_path.display()
|
||||
);
|
||||
// This should never happen unless we make a mistake with our serialization.
|
||||
// Ignoring a deletion header is not consequential for correctnes because all deletions
|
||||
// are ultimately allowed to fail: worst case we leak some objects for the scrubber to clean up.
|
||||
None
|
||||
}
|
||||
} {
|
||||
self.pending.sequence =
|
||||
std::cmp::max(self.pending.sequence, header.last_deleted_list_seq + 1);
|
||||
};
|
||||
};
|
||||
|
||||
let mut dir = match tokio::fs::read_dir(&self.conf.deletion_prefix()).await {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to open deletion list directory {0}: {e:#}",
|
||||
header_path.display()
|
||||
);
|
||||
|
||||
// Give up: if we can't read the deletion list directory, we probably can't
|
||||
// write lists into it later, so the queue won't work.
|
||||
return Err(e.into());
|
||||
}
|
||||
};
|
||||
|
||||
let list_name_pattern = Regex::new("([a-zA-Z0-9]{16})-([a-zA-Z0-9]{2}).list").unwrap();
|
||||
|
||||
let mut seqs: Vec<u64> = Vec::new();
|
||||
while let Some(dentry) = dir.next_entry().await? {
|
||||
let file_name = dentry.file_name().to_owned();
|
||||
let basename = file_name.to_string_lossy();
|
||||
let seq_part = if let Some(m) = list_name_pattern.captures(&basename) {
|
||||
m.get(1)
|
||||
.expect("Non optional group should be present")
|
||||
.as_str()
|
||||
} else {
|
||||
warn!("Unexpected key in deletion queue: {basename}");
|
||||
continue;
|
||||
};
|
||||
|
||||
let seq: u64 = match u64::from_str_radix(seq_part, 16) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
warn!("Malformed key '{basename}': {e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
seqs.push(seq);
|
||||
}
|
||||
seqs.sort();
|
||||
|
||||
// Initialize the next sequence number in the frontend based on the maximum of the highest list we see,
|
||||
// and the last list that was deleted according to the header. Combined with writing out the header
|
||||
// prior to deletions, this guarnatees no re-use of sequence numbers.
|
||||
if let Some(max_list_seq) = seqs.last() {
|
||||
self.pending.sequence = std::cmp::max(self.pending.sequence, max_list_seq + 1);
|
||||
}
|
||||
|
||||
for s in seqs {
|
||||
let list_path = self.conf.deletion_list_path(s);
|
||||
let list_bytes = tokio::fs::read(&list_path).await?;
|
||||
|
||||
let deletion_list = match serde_json::from_slice::<DeletionList>(&list_bytes) {
|
||||
Ok(l) => l,
|
||||
Err(e) => {
|
||||
// Drop the list on the floor: any objects it referenced will be left behind
|
||||
// for scrubbing to clean up. This should never happen unless we have a serialization bug.
|
||||
warn!(sequence = s, "Failed to deserialize deletion list: {e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// We will drop out of recovery if this fails: it indicates that we are shutting down
|
||||
// or the backend has panicked
|
||||
DELETION_QUEUE_SUBMITTED.inc_by(deletion_list.len() as u64);
|
||||
self.tx
|
||||
.send(BackendQueueMessage::Delete(deletion_list))
|
||||
.await?;
|
||||
}
|
||||
|
||||
info!(next_sequence = self.pending.sequence, "Replay complete");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// This is the front-end ingest, where we bundle up deletion requests into DeletionList
|
||||
/// and write them out, for later
|
||||
pub async fn background(&mut self) {
|
||||
info!("Started deletion frontend worker");
|
||||
|
||||
let mut recovered: bool = false;
|
||||
|
||||
while !self.cancel.is_cancelled() {
|
||||
let timeout = if self.pending_flushes.is_empty() {
|
||||
FRONTEND_DEFAULT_TIMEOUT
|
||||
} else {
|
||||
FRONTEND_FLUSHING_TIMEOUT
|
||||
};
|
||||
|
||||
let msg = match tokio::time::timeout(timeout, self.rx.recv()).await {
|
||||
Ok(Some(msg)) => msg,
|
||||
Ok(None) => {
|
||||
// Queue sender destroyed, shutting down
|
||||
break;
|
||||
}
|
||||
Err(_) => {
|
||||
// Hit deadline, flush.
|
||||
self.flush().await;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// On first message, do recovery. This avoids unnecessary recovery very
|
||||
// early in startup, and simplifies testing by avoiding a 404 reading the
|
||||
// header on every first pageserver startup.
|
||||
if !recovered {
|
||||
// Before accepting any input from this pageserver lifetime, recover all deletion lists that are in S3
|
||||
if let Err(e) = self.recover().await {
|
||||
// This should only happen in truly unrecoverable cases, like the recovery finding that the backend
|
||||
// queue receiver has been dropped.
|
||||
info!("Deletion queue recover aborted, deletion queue will not proceed ({e})");
|
||||
return;
|
||||
} else {
|
||||
recovered = true;
|
||||
}
|
||||
}
|
||||
|
||||
match msg {
|
||||
FrontendQueueMessage::Delete(op) => {
|
||||
debug!(
|
||||
"Delete: ingesting {0} layers, {1} other objects",
|
||||
op.layers.len(),
|
||||
op.objects.len()
|
||||
);
|
||||
|
||||
let timeline_path = self.conf.timeline_path(&op.tenant_id, &op.timeline_id);
|
||||
let mut layer_paths = Vec::new();
|
||||
for layer in op.layers {
|
||||
// TODO go directly to remote path without composing local path
|
||||
let local_path = timeline_path.join(layer.file_name());
|
||||
let path = match self.conf.remote_path(&local_path) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
panic!("Can't make a timeline path! {e}");
|
||||
}
|
||||
};
|
||||
layer_paths.push(path);
|
||||
}
|
||||
|
||||
self.pending
|
||||
.push(&op.tenant_id, &op.timeline_id, layer_paths);
|
||||
self.pending
|
||||
.push(&op.tenant_id, &op.timeline_id, op.objects);
|
||||
}
|
||||
FrontendQueueMessage::Flush(op) => {
|
||||
if self.pending.objects.is_empty() {
|
||||
// Execute immediately
|
||||
debug!("Flush: No pending objects, flushing immediately");
|
||||
op.fire()
|
||||
} else {
|
||||
// Execute next time we flush
|
||||
debug!("Flush: adding to pending flush list for next deadline flush");
|
||||
self.pending_flushes.push(op);
|
||||
}
|
||||
}
|
||||
FrontendQueueMessage::FlushExecute(op) => {
|
||||
debug!("FlushExecute: passing through to backend");
|
||||
// We do not flush to a deletion list here: the client sends a Flush before the FlushExecute
|
||||
if let Err(e) = self.tx.send(BackendQueueMessage::Flush(op)).await {
|
||||
info!("Can't flush, shutting down ({e})");
|
||||
// Caller will get error when their oneshot sender was dropped.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.pending.objects.len() > DELETION_LIST_TARGET_SIZE
|
||||
|| !self.pending_flushes.is_empty()
|
||||
{
|
||||
self.flush().await;
|
||||
}
|
||||
}
|
||||
info!("Deletion queue shut down.");
|
||||
}
|
||||
}
|
||||
@@ -52,29 +52,6 @@ paths:
|
||||
schema:
|
||||
type: object
|
||||
|
||||
/v1/deletion_queue/flush:
|
||||
parameters:
|
||||
- name: execute
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
type: boolean
|
||||
description:
|
||||
If true, attempt to execute deletions. If false, just flush deletions to persistent deletion lists.
|
||||
put:
|
||||
description: Execute any deletions currently enqueued
|
||||
security: []
|
||||
responses:
|
||||
"200":
|
||||
description: |
|
||||
Flush completed: if execute was true, then enqueued deletions have been completed. If execute was false,
|
||||
then enqueued deletions have been persisted to deletion lists, and may have been completed.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
|
||||
|
||||
/v1/tenant/{tenant_id}:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
|
||||
@@ -23,7 +23,6 @@ use super::models::{
|
||||
TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
|
||||
};
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::deletion_queue::{DeletionQueue, DeletionQueueError};
|
||||
use crate::metrics::{StorageTimeOperation, STORAGE_TIME_GLOBAL};
|
||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
use crate::task_mgr::TaskKind;
|
||||
@@ -57,7 +56,6 @@ struct State {
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
allowlist_routes: Vec<Uri>,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
deletion_queue: DeletionQueue,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||
}
|
||||
@@ -67,7 +65,6 @@ impl State {
|
||||
conf: &'static PageServerConf,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
deletion_queue: DeletionQueue,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||
) -> anyhow::Result<Self> {
|
||||
@@ -81,7 +78,6 @@ impl State {
|
||||
allowlist_routes,
|
||||
remote_storage,
|
||||
broker_client,
|
||||
deletion_queue,
|
||||
disk_usage_eviction_state,
|
||||
})
|
||||
}
|
||||
@@ -494,7 +490,6 @@ async fn tenant_attach_handler(
|
||||
tenant_conf,
|
||||
state.broker_client.clone(),
|
||||
remote_storage.clone(),
|
||||
&state.deletion_queue,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("tenant_attach", %tenant_id))
|
||||
@@ -557,7 +552,6 @@ async fn tenant_load_handler(
|
||||
tenant_id,
|
||||
state.broker_client.clone(),
|
||||
state.remote_storage.clone(),
|
||||
&state.deletion_queue,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("load", %tenant_id))
|
||||
@@ -883,7 +877,6 @@ async fn tenant_create_handler(
|
||||
target_tenant_id,
|
||||
state.broker_client.clone(),
|
||||
state.remote_storage.clone(),
|
||||
&state.deletion_queue,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("tenant_create", tenant_id = %target_tenant_id))
|
||||
@@ -1124,48 +1117,6 @@ async fn always_panic_handler(
|
||||
json_response(StatusCode::NO_CONTENT, ())
|
||||
}
|
||||
|
||||
async fn deletion_queue_flush(
|
||||
r: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let state = get_state(&r);
|
||||
|
||||
if state.remote_storage.is_none() {
|
||||
// Nothing to do if remote storage is disabled.
|
||||
return json_response(StatusCode::OK, ());
|
||||
}
|
||||
|
||||
let execute = parse_query_param(&r, "execute")?.unwrap_or(false);
|
||||
|
||||
let queue_client = state.deletion_queue.new_client();
|
||||
|
||||
tokio::select! {
|
||||
flush_result = async {
|
||||
if execute {
|
||||
queue_client.flush_execute().await
|
||||
} else {
|
||||
queue_client.flush().await
|
||||
}
|
||||
} => {
|
||||
match flush_result {
|
||||
Ok(())=> {
|
||||
json_response(StatusCode::OK, ())
|
||||
},
|
||||
Err(e) => {
|
||||
match e {
|
||||
DeletionQueueError::ShuttingDown => {
|
||||
Err(ApiError::ShuttingDown)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
_ = cancel.cancelled() => {
|
||||
Err(ApiError::ShuttingDown)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn disk_usage_eviction_run(
|
||||
mut r: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
@@ -1375,7 +1326,6 @@ pub fn make_router(
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
broker_client: BrokerClientChannel,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
deletion_queue: DeletionQueue,
|
||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||
) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
|
||||
let spec = include_bytes!("openapi_spec.yml");
|
||||
@@ -1405,7 +1355,6 @@ pub fn make_router(
|
||||
conf,
|
||||
auth,
|
||||
remote_storage,
|
||||
deletion_queue,
|
||||
broker_client,
|
||||
disk_usage_eviction_state,
|
||||
)
|
||||
@@ -1490,9 +1439,6 @@ pub fn make_router(
|
||||
.put("/v1/disk_usage_eviction/run", |r| {
|
||||
api_handler(r, disk_usage_eviction_run)
|
||||
})
|
||||
.put("/v1/deletion_queue/flush", |r| {
|
||||
api_handler(r, deletion_queue_flush)
|
||||
})
|
||||
.put("/v1/tenant/:tenant_id/break", |r| {
|
||||
testing_api_handler("set tenant state to broken", r, handle_tenant_break)
|
||||
})
|
||||
|
||||
@@ -3,7 +3,6 @@ pub mod basebackup;
|
||||
pub mod config;
|
||||
pub mod consumption_metrics;
|
||||
pub mod context;
|
||||
pub mod deletion_queue;
|
||||
pub mod disk_usage_eviction_task;
|
||||
pub mod http;
|
||||
pub mod import_datadir;
|
||||
|
||||
@@ -795,31 +795,6 @@ static REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST: Lazy<HistogramVec> = Lazy::new
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static DELETION_QUEUE_SUBMITTED: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"pageserver_deletion_queue_submitted_total",
|
||||
"Number of objects submitted for deletion"
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static DELETION_QUEUE_EXECUTED: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"pageserver_deletion_queue_executed_total",
|
||||
"Number of objects deleted"
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static DELETION_QUEUE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_deletion_queue_errors_total",
|
||||
"Incremented on retryable remote I/O errors writing deletion lists or executing deletions.",
|
||||
&["op_kind"],
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_remote_timeline_client_bytes_started",
|
||||
|
||||
@@ -59,7 +59,6 @@ use self::timeline::EvictionTaskTenantState;
|
||||
use self::timeline::TimelineResources;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::deletion_queue::DeletionQueueClient;
|
||||
use crate::import_datadir;
|
||||
use crate::is_uninit_mark;
|
||||
use crate::metrics::TENANT_ACTIVATION;
|
||||
@@ -86,6 +85,7 @@ pub use pageserver_api::models::TenantState;
|
||||
use toml_edit;
|
||||
use utils::{
|
||||
crashsafe,
|
||||
generation::Generation,
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::{Lsn, RecordLsn},
|
||||
};
|
||||
@@ -157,7 +157,6 @@ pub const TENANT_DELETED_MARKER_FILE_NAME: &str = "deleted";
|
||||
pub struct TenantSharedResources {
|
||||
pub broker_client: storage_broker::BrokerClientChannel,
|
||||
pub remote_storage: Option<GenericRemoteStorage>,
|
||||
pub deletion_queue_client: DeletionQueueClient,
|
||||
}
|
||||
|
||||
///
|
||||
@@ -180,6 +179,11 @@ pub struct Tenant {
|
||||
tenant_conf: Arc<RwLock<TenantConfOpt>>,
|
||||
|
||||
tenant_id: TenantId,
|
||||
|
||||
/// The remote storage generation, used to protect S3 objects from split-brain.
|
||||
/// Does not change over the lifetime of the [`Tenant`] object.
|
||||
generation: Generation,
|
||||
|
||||
timelines: Mutex<HashMap<TimelineId, Arc<Timeline>>>,
|
||||
// This mutex prevents creation of new timelines during GC.
|
||||
// Adding yet another mutex (in addition to `timelines`) is needed because holding
|
||||
@@ -193,9 +197,6 @@ pub struct Tenant {
|
||||
// provides access to timeline data sitting in the remote storage
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
|
||||
// Access to global deletion queue for when this tenant wants to schedule a deletion
|
||||
deletion_queue_client: Option<DeletionQueueClient>,
|
||||
|
||||
/// Cached logical sizes updated updated on each [`Tenant::gather_size_inputs`].
|
||||
cached_logical_sizes: tokio::sync::Mutex<HashMap<(TimelineId, Lsn), u64>>,
|
||||
cached_synthetic_tenant_size: Arc<AtomicU64>,
|
||||
@@ -527,10 +528,10 @@ impl Tenant {
|
||||
pub(crate) fn spawn_attach(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
generation: Generation,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
tenants: &'static tokio::sync::RwLock<TenantsMap>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<Tenant>> {
|
||||
// TODO dedup with spawn_load
|
||||
@@ -544,8 +545,8 @@ impl Tenant {
|
||||
tenant_conf,
|
||||
wal_redo_manager,
|
||||
tenant_id,
|
||||
generation,
|
||||
Some(remote_storage.clone()),
|
||||
Some(deletion_queue_client),
|
||||
));
|
||||
|
||||
// Do all the hard work in the background
|
||||
@@ -655,12 +656,8 @@ impl Tenant {
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow::anyhow!("cannot attach without remote storage"))?;
|
||||
|
||||
let remote_timeline_ids = remote_timeline_client::list_remote_timelines(
|
||||
remote_storage,
|
||||
self.conf,
|
||||
self.tenant_id,
|
||||
)
|
||||
.await?;
|
||||
let remote_timeline_ids =
|
||||
remote_timeline_client::list_remote_timelines(remote_storage, self.tenant_id).await?;
|
||||
|
||||
info!("found {} timelines", remote_timeline_ids.len());
|
||||
|
||||
@@ -672,6 +669,7 @@ impl Tenant {
|
||||
self.conf,
|
||||
self.tenant_id,
|
||||
timeline_id,
|
||||
self.generation,
|
||||
);
|
||||
part_downloads.spawn(
|
||||
async move {
|
||||
@@ -734,7 +732,6 @@ impl Tenant {
|
||||
remote_metadata,
|
||||
TimelineResources {
|
||||
remote_client: Some(remote_client),
|
||||
deletion_queue_client: self.deletion_queue_client.clone(),
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
@@ -759,7 +756,6 @@ impl Tenant {
|
||||
timeline_id,
|
||||
&index_part.metadata,
|
||||
Some(remote_timeline_client),
|
||||
self.deletion_queue_client.clone(),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
@@ -860,7 +856,7 @@ impl Tenant {
|
||||
TenantConfOpt::default(),
|
||||
wal_redo_manager,
|
||||
tenant_id,
|
||||
None,
|
||||
Generation::broken(),
|
||||
None,
|
||||
))
|
||||
}
|
||||
@@ -878,6 +874,7 @@ impl Tenant {
|
||||
pub(crate) fn spawn_load(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
generation: Generation,
|
||||
resources: TenantSharedResources,
|
||||
init_order: Option<InitializationOrder>,
|
||||
tenants: &'static tokio::sync::RwLock<TenantsMap>,
|
||||
@@ -895,7 +892,6 @@ impl Tenant {
|
||||
|
||||
let broker_client = resources.broker_client;
|
||||
let remote_storage = resources.remote_storage;
|
||||
let deletion_queue_client = resources.deletion_queue_client;
|
||||
|
||||
let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id));
|
||||
let tenant = Tenant::new(
|
||||
@@ -904,8 +900,8 @@ impl Tenant {
|
||||
tenant_conf,
|
||||
wal_redo_manager,
|
||||
tenant_id,
|
||||
generation,
|
||||
remote_storage.clone(),
|
||||
Some(deletion_queue_client),
|
||||
);
|
||||
let tenant = Arc::new(tenant);
|
||||
|
||||
@@ -1313,7 +1309,6 @@ impl Tenant {
|
||||
timeline_id,
|
||||
&local_metadata,
|
||||
Some(remote_client),
|
||||
self.deletion_queue_client.clone(),
|
||||
init_order,
|
||||
)
|
||||
.await
|
||||
@@ -1363,7 +1358,6 @@ impl Tenant {
|
||||
timeline_id,
|
||||
&local_metadata,
|
||||
None,
|
||||
None,
|
||||
init_order,
|
||||
)
|
||||
.await
|
||||
@@ -2288,6 +2282,7 @@ impl Tenant {
|
||||
ancestor,
|
||||
new_timeline_id,
|
||||
self.tenant_id,
|
||||
self.generation,
|
||||
Arc::clone(&self.walredo_mgr),
|
||||
resources,
|
||||
pg_version,
|
||||
@@ -2305,17 +2300,9 @@ impl Tenant {
|
||||
tenant_conf: TenantConfOpt,
|
||||
walredo_mgr: Arc<dyn WalRedoManager + Send + Sync>,
|
||||
tenant_id: TenantId,
|
||||
generation: Generation,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
deletion_queue_client: Option<DeletionQueueClient>,
|
||||
) -> Tenant {
|
||||
#[cfg(not(test))]
|
||||
match state {
|
||||
TenantState::Broken { .. } => {}
|
||||
_ => {
|
||||
// Non-broken tenants must be constructed with a deletion queue
|
||||
assert!(deletion_queue_client.is_some());
|
||||
}
|
||||
}
|
||||
let (state, mut rx) = watch::channel(state);
|
||||
|
||||
tokio::spawn(async move {
|
||||
@@ -2372,6 +2359,7 @@ impl Tenant {
|
||||
|
||||
Tenant {
|
||||
tenant_id,
|
||||
generation,
|
||||
conf,
|
||||
// using now here is good enough approximation to catch tenants with really long
|
||||
// activation times.
|
||||
@@ -2381,7 +2369,6 @@ impl Tenant {
|
||||
gc_cs: tokio::sync::Mutex::new(()),
|
||||
walredo_mgr,
|
||||
remote_storage,
|
||||
deletion_queue_client,
|
||||
state,
|
||||
cached_logical_sizes: tokio::sync::Mutex::new(HashMap::new()),
|
||||
cached_synthetic_tenant_size: Arc::new(AtomicU64::new(0)),
|
||||
@@ -2955,16 +2942,14 @@ impl Tenant {
|
||||
self.conf,
|
||||
self.tenant_id,
|
||||
timeline_id,
|
||||
self.generation,
|
||||
);
|
||||
Some(remote_client)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
TimelineResources {
|
||||
remote_client,
|
||||
deletion_queue_client: self.deletion_queue_client.clone(),
|
||||
}
|
||||
TimelineResources { remote_client }
|
||||
}
|
||||
|
||||
/// Creates intermediate timeline structure and its files.
|
||||
@@ -3481,6 +3466,7 @@ pub mod harness {
|
||||
pub conf: &'static PageServerConf,
|
||||
pub tenant_conf: TenantConf,
|
||||
pub tenant_id: TenantId,
|
||||
pub generation: Generation,
|
||||
}
|
||||
|
||||
static LOG_HANDLE: OnceCell<()> = OnceCell::new();
|
||||
@@ -3522,13 +3508,14 @@ pub mod harness {
|
||||
conf,
|
||||
tenant_conf,
|
||||
tenant_id,
|
||||
generation: Generation::new(0xdeadbeef),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn load(&self) -> (Arc<Tenant>, RequestContext) {
|
||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
|
||||
(
|
||||
self.try_load(&ctx, None, None)
|
||||
self.try_load(&ctx, None)
|
||||
.await
|
||||
.expect("failed to load test tenant"),
|
||||
ctx,
|
||||
@@ -3539,7 +3526,6 @@ pub mod harness {
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
remote_storage: Option<remote_storage::GenericRemoteStorage>,
|
||||
deletion_queue_client: Option<DeletionQueueClient>,
|
||||
) -> anyhow::Result<Arc<Tenant>> {
|
||||
let walredo_mgr = Arc::new(TestRedoManager);
|
||||
|
||||
@@ -3549,8 +3535,8 @@ pub mod harness {
|
||||
TenantConfOpt::from(self.tenant_conf),
|
||||
walredo_mgr,
|
||||
self.tenant_id,
|
||||
self.generation,
|
||||
remote_storage,
|
||||
deletion_queue_client,
|
||||
));
|
||||
tenant
|
||||
.load(None, ctx)
|
||||
@@ -4115,7 +4101,7 @@ mod tests {
|
||||
std::fs::write(metadata_path, metadata_bytes)?;
|
||||
|
||||
let err = harness
|
||||
.try_load(&ctx, None, None)
|
||||
.try_load(&ctx, None)
|
||||
.await
|
||||
.err()
|
||||
.expect("should fail");
|
||||
|
||||
@@ -18,7 +18,6 @@ use utils::crashsafe;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::deletion_queue::DeletionQueue;
|
||||
use crate::task_mgr::{self, TaskKind};
|
||||
use crate::tenant::config::TenantConfOpt;
|
||||
use crate::tenant::delete::DeleteTenantFlow;
|
||||
@@ -26,6 +25,7 @@ use crate::tenant::{create_tenant_files, CreateTenantFilesMode, Tenant, TenantSt
|
||||
use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME};
|
||||
|
||||
use utils::fs_ext::PathExt;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use super::delete::DeleteTenantError;
|
||||
@@ -203,10 +203,10 @@ pub(crate) fn schedule_local_tenant_processing(
|
||||
match Tenant::spawn_attach(
|
||||
conf,
|
||||
tenant_id,
|
||||
Generation::none(),
|
||||
resources.broker_client,
|
||||
tenants,
|
||||
remote_storage,
|
||||
resources.deletion_queue_client,
|
||||
ctx,
|
||||
) {
|
||||
Ok(tenant) => tenant,
|
||||
@@ -226,7 +226,15 @@ pub(crate) fn schedule_local_tenant_processing(
|
||||
} else {
|
||||
info!("tenant {tenant_id} is assumed to be loadable, starting load operation");
|
||||
// Start loading the tenant into memory. It will initially be in Loading state.
|
||||
Tenant::spawn_load(conf, tenant_id, resources, init_order, tenants, ctx)
|
||||
Tenant::spawn_load(
|
||||
conf,
|
||||
tenant_id,
|
||||
Generation::none(),
|
||||
resources,
|
||||
init_order,
|
||||
tenants,
|
||||
ctx,
|
||||
)
|
||||
};
|
||||
Ok(tenant)
|
||||
}
|
||||
@@ -351,7 +359,6 @@ pub async fn create_tenant(
|
||||
tenant_id: TenantId,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
deletion_queue: &DeletionQueue,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Arc<Tenant>, TenantMapInsertError> {
|
||||
tenant_map_insert(tenant_id, || {
|
||||
@@ -365,7 +372,6 @@ pub async fn create_tenant(
|
||||
let tenant_resources = TenantSharedResources {
|
||||
broker_client,
|
||||
remote_storage,
|
||||
deletion_queue_client: deletion_queue.new_client(),
|
||||
};
|
||||
let created_tenant =
|
||||
schedule_local_tenant_processing(conf, &tenant_directory, tenant_resources, None, &TENANTS, ctx)?;
|
||||
@@ -517,7 +523,6 @@ pub async fn load_tenant(
|
||||
tenant_id: TenantId,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
deletion_queue: &DeletionQueue,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), TenantMapInsertError> {
|
||||
tenant_map_insert(tenant_id, || {
|
||||
@@ -531,7 +536,6 @@ pub async fn load_tenant(
|
||||
let resources = TenantSharedResources {
|
||||
broker_client,
|
||||
remote_storage,
|
||||
deletion_queue_client: deletion_queue.new_client(),
|
||||
};
|
||||
let new_tenant = schedule_local_tenant_processing(conf, &tenant_path, resources, None, &TENANTS, ctx)
|
||||
.with_context(|| {
|
||||
@@ -600,7 +604,6 @@ pub async fn attach_tenant(
|
||||
tenant_conf: TenantConfOpt,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
deletion_queue: &DeletionQueue,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), TenantMapInsertError> {
|
||||
tenant_map_insert(tenant_id, || {
|
||||
@@ -618,7 +621,6 @@ pub async fn attach_tenant(
|
||||
let resources = TenantSharedResources {
|
||||
broker_client,
|
||||
remote_storage: Some(remote_storage),
|
||||
deletion_queue_client: deletion_queue.new_client(),
|
||||
};
|
||||
let attached_tenant = schedule_local_tenant_processing(conf, &tenant_dir, resources, None, &TENANTS, ctx)?;
|
||||
// TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here.
|
||||
|
||||
@@ -56,11 +56,9 @@
|
||||
//! # Consistency
|
||||
//!
|
||||
//! To have a consistent remote structure, it's important that uploads and
|
||||
//! deletions are performed in the right order. For example:
|
||||
//! - the index file contains a list of layer files, so it must not be uploaded
|
||||
//! until all the layer files that are in its list have been successfully uploaded.
|
||||
//! - objects must be removed from the index before being deleted, and that updated
|
||||
//! index must be written to remote storage before deleting the objects from remote storage.
|
||||
//! deletions are performed in the right order. For example, the index file
|
||||
//! contains a list of layer files, so it must not be uploaded until all the
|
||||
//! layer files that are in its list have been successfully uploaded.
|
||||
//!
|
||||
//! The contract between client and its user is that the user is responsible of
|
||||
//! scheduling operations in an order that keeps the remote consistent as
|
||||
@@ -72,12 +70,10 @@
|
||||
//! correct order, and the client will parallelize the operations in a way that
|
||||
//! is safe.
|
||||
//!
|
||||
//! The caller should be careful with deletion, though:
|
||||
//! - they should not delete local files that have been scheduled for upload but
|
||||
//! not yet finished uploading. Otherwise the upload will fail. To wait for an
|
||||
//! upload to finish, use the 'wait_completion' function (more on that later.)
|
||||
//! - they should not to remote deletions via DeletionQueue without waiting for
|
||||
//! the latest metadata to upload via RemoteTimelineClient.
|
||||
//! The caller should be careful with deletion, though. They should not delete
|
||||
//! local files that have been scheduled for upload but not yet finished uploading.
|
||||
//! Otherwise the upload will fail. To wait for an upload to finish, use
|
||||
//! the 'wait_completion' function (more on that later.)
|
||||
//!
|
||||
//! All of this relies on the following invariants:
|
||||
//!
|
||||
@@ -204,11 +200,12 @@
|
||||
//! [`Tenant::timeline_init_and_sync`]: super::Tenant::timeline_init_and_sync
|
||||
//! [`Timeline::load_layer_map`]: super::Timeline::load_layer_map
|
||||
|
||||
mod delete;
|
||||
mod download;
|
||||
pub mod index;
|
||||
mod upload;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use anyhow::Context;
|
||||
use chrono::{NaiveDateTime, Utc};
|
||||
// re-export these
|
||||
pub use download::{is_temp_download_file, list_remote_timelines};
|
||||
@@ -219,7 +216,7 @@ use utils::backoff::{
|
||||
};
|
||||
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
@@ -229,7 +226,6 @@ use tracing::{debug, error, info, instrument, warn};
|
||||
use tracing::{info_span, Instrument};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::deletion_queue::DeletionQueueClient;
|
||||
use crate::metrics::{
|
||||
MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,
|
||||
RemoteTimelineClientMetricsCallTrackSize, REMOTE_ONDEMAND_DOWNLOADED_BYTES,
|
||||
@@ -238,6 +234,8 @@ use crate::metrics::{
|
||||
use crate::task_mgr::shutdown_token;
|
||||
use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||
use crate::tenant::upload_queue::Delete;
|
||||
use crate::tenant::TIMELINES_SEGMENT_NAME;
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
task_mgr,
|
||||
@@ -255,6 +253,7 @@ use self::index::IndexPart;
|
||||
|
||||
use super::storage_layer::LayerFileName;
|
||||
use super::upload_queue::SetDeletedFlagProgress;
|
||||
use super::Generation;
|
||||
|
||||
// Occasional network issues and such can cause remote operations to fail, and
|
||||
// that's expected. If a download fails, we log it at info-level, and retry.
|
||||
@@ -318,6 +317,7 @@ pub struct RemoteTimelineClient {
|
||||
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
generation: Generation,
|
||||
|
||||
upload_queue: Mutex<UploadQueue>,
|
||||
|
||||
@@ -338,12 +338,14 @@ impl RemoteTimelineClient {
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
generation: Generation,
|
||||
) -> RemoteTimelineClient {
|
||||
RemoteTimelineClient {
|
||||
conf,
|
||||
runtime: BACKGROUND_RUNTIME.handle().to_owned(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
generation,
|
||||
storage_impl: remote_storage,
|
||||
upload_queue: Mutex::new(UploadQueue::Uninitialized),
|
||||
metrics: Arc::new(RemoteTimelineClientMetrics::new(&tenant_id, &timeline_id)),
|
||||
@@ -452,10 +454,10 @@ impl RemoteTimelineClient {
|
||||
);
|
||||
|
||||
let index_part = download::download_index_part(
|
||||
self.conf,
|
||||
&self.storage_impl,
|
||||
&self.tenant_id,
|
||||
&self.timeline_id,
|
||||
self.generation,
|
||||
)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
@@ -634,46 +636,70 @@ impl RemoteTimelineClient {
|
||||
/// deletion won't actually be performed, until any previously scheduled
|
||||
/// upload operations, and the index file upload, have completed
|
||||
/// successfully.
|
||||
pub async fn schedule_layer_file_deletion(
|
||||
pub fn schedule_layer_file_deletion(
|
||||
self: &Arc<Self>,
|
||||
names: &[LayerFileName],
|
||||
deletion_queue_client: &DeletionQueueClient,
|
||||
) -> anyhow::Result<()> {
|
||||
// Synchronous update of upload queues under mutex
|
||||
{
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
// Deleting layers doesn't affect the values stored in TimelineMetadata,
|
||||
// so we don't need update it. Just serialize it.
|
||||
let metadata = upload_queue.latest_metadata.clone();
|
||||
// Deleting layers doesn't affect the values stored in TimelineMetadata,
|
||||
// so we don't need update it. Just serialize it.
|
||||
let metadata = upload_queue.latest_metadata.clone();
|
||||
|
||||
// Update the remote index file, removing the to-be-deleted files from the index,
|
||||
// before deleting the actual files.
|
||||
for name in names {
|
||||
if upload_queue.latest_files.remove(name).is_some() {
|
||||
upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;
|
||||
}
|
||||
}
|
||||
// Update the remote index file, removing the to-be-deleted files from the index,
|
||||
// before deleting the actual files.
|
||||
//
|
||||
// Once we start removing files from upload_queue.latest_files, there's
|
||||
// no going back! Otherwise, some of the files would already be removed
|
||||
// from latest_files, but not yet scheduled for deletion. Use a closure
|
||||
// to syntactically forbid ? or bail! calls here.
|
||||
let no_bail_here = || {
|
||||
// Decorate our list of names with each name's generation, dropping
|
||||
// makes that are unexpectedly missing from our metadata.
|
||||
let with_generations: Vec<_> = names
|
||||
.iter()
|
||||
.filter_map(|name| {
|
||||
// Remove from latest_files, learning the file's remote generation in the process
|
||||
let meta = upload_queue.latest_files.remove(name);
|
||||
|
||||
if let Some(meta) = meta {
|
||||
upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;
|
||||
Some((name, meta.generation))
|
||||
} else {
|
||||
// This can only happen if we forgot to to schedule the file upload
|
||||
// before scheduling the delete. Log it because it is a rare/strange
|
||||
// situation, and in case something is misbehaving, we'd like to know which
|
||||
// layers experienced this.
|
||||
info!(
|
||||
"Deleting layer {name} not found in latest_files list, never uploaded?"
|
||||
);
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {
|
||||
self.schedule_index_upload(upload_queue, metadata);
|
||||
}
|
||||
}
|
||||
|
||||
// Barrier: we must ensure all prior uploads and index writes have landed in S3
|
||||
// before emitting deletions.
|
||||
if let Err(e) = self.wait_completion().await {
|
||||
// This can only fail if upload queue is shut down: if this happens, we do
|
||||
// not emit any deletions. In this condition (remote client is shut down
|
||||
// during compaction or GC) we may leak some objects.
|
||||
bail!("Cannot complete layer file deletions during shutdown ({e})");
|
||||
}
|
||||
// schedule the actual deletions
|
||||
for (name, generation) in with_generations {
|
||||
let op = UploadOp::Delete(Delete {
|
||||
file_kind: RemoteOpFileKind::Layer,
|
||||
layer_file_name: name.clone(),
|
||||
scheduled_from_timeline_delete: false,
|
||||
generation,
|
||||
});
|
||||
self.calls_unfinished_metric_begin(&op);
|
||||
upload_queue.queued_operations.push_back(op);
|
||||
info!("scheduled layer file deletion {name}");
|
||||
}
|
||||
|
||||
// Enqueue deletions
|
||||
deletion_queue_client
|
||||
.push_layers(self.tenant_id, self.timeline_id, names.to_vec())
|
||||
.await?;
|
||||
// Launch the tasks immediately, if possible
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
};
|
||||
no_bail_here();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -759,10 +785,10 @@ impl RemoteTimelineClient {
|
||||
backoff::retry(
|
||||
|| {
|
||||
upload::upload_index_part(
|
||||
self.conf,
|
||||
&self.storage_impl,
|
||||
&self.tenant_id,
|
||||
&self.timeline_id,
|
||||
self.generation,
|
||||
&index_part_with_deleted_at,
|
||||
)
|
||||
},
|
||||
@@ -799,13 +825,12 @@ impl RemoteTimelineClient {
|
||||
/// Prerequisites: UploadQueue should be in stopped state and deleted_at should be successfuly set.
|
||||
/// The function deletes layer files one by one, then lists the prefix to see if we leaked something
|
||||
/// deletes leaked files if any and proceeds with deletion of index file at the end.
|
||||
pub(crate) async fn delete_all(
|
||||
self: &Arc<Self>,
|
||||
deletion_queue: &DeletionQueueClient,
|
||||
) -> anyhow::Result<()> {
|
||||
pub(crate) async fn delete_all(self: &Arc<Self>) -> anyhow::Result<()> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let layers: Vec<LayerFileName> = {
|
||||
let (mut receiver, deletions_queued) = {
|
||||
let mut deletions_queued = 0;
|
||||
|
||||
let mut locked = self.upload_queue.lock().unwrap();
|
||||
let stopped = locked.stopped_mut()?;
|
||||
|
||||
@@ -817,34 +842,41 @@ impl RemoteTimelineClient {
|
||||
|
||||
stopped
|
||||
.upload_queue_for_deletion
|
||||
.latest_files
|
||||
.drain()
|
||||
.map(|kv| kv.0)
|
||||
.collect()
|
||||
.queued_operations
|
||||
.reserve(stopped.upload_queue_for_deletion.latest_files.len());
|
||||
|
||||
// schedule the actual deletions
|
||||
for (name, meta) in &stopped.upload_queue_for_deletion.latest_files {
|
||||
let op = UploadOp::Delete(Delete {
|
||||
file_kind: RemoteOpFileKind::Layer,
|
||||
layer_file_name: name.clone(),
|
||||
scheduled_from_timeline_delete: true,
|
||||
generation: meta.generation,
|
||||
});
|
||||
|
||||
self.calls_unfinished_metric_begin(&op);
|
||||
stopped
|
||||
.upload_queue_for_deletion
|
||||
.queued_operations
|
||||
.push_back(op);
|
||||
|
||||
info!("scheduled layer file deletion {name}");
|
||||
deletions_queued += 1;
|
||||
}
|
||||
|
||||
self.launch_queued_tasks(&mut stopped.upload_queue_for_deletion);
|
||||
|
||||
(
|
||||
self.schedule_barrier(&mut stopped.upload_queue_for_deletion),
|
||||
deletions_queued,
|
||||
)
|
||||
};
|
||||
|
||||
let layer_deletion_count = layers.len();
|
||||
|
||||
let timeline_path = self.conf.timeline_path(&self.tenant_id, &self.timeline_id);
|
||||
let layer_paths = layers
|
||||
.into_iter()
|
||||
.map(|l| {
|
||||
let local_path = timeline_path.join(l.file_name());
|
||||
self.conf
|
||||
.remote_path(&local_path)
|
||||
.expect("Timeline path should always convert to remote")
|
||||
})
|
||||
.collect();
|
||||
deletion_queue.push_immediate(layer_paths).await?;
|
||||
receiver.changed().await.context("upload queue shut down")?;
|
||||
|
||||
// Do not delete index part yet, it is needed for possible retry. If we remove it first
|
||||
// and retry will arrive to different pageserver there wont be any traces of it on remote storage
|
||||
let timeline_path = self.conf.timeline_path(&self.tenant_id, &self.timeline_id);
|
||||
let timeline_storage_path = self.conf.remote_path(&timeline_path)?;
|
||||
|
||||
// Execute all pending deletions, so that when we prroceed to do a list_prefixes below, we aren't
|
||||
// taking the burden of listing all the layers that we already know we should delete.
|
||||
deletion_queue.flush_immediate().await?;
|
||||
let timeline_storage_path = remote_timeline_path(&self.tenant_id, &self.timeline_id);
|
||||
|
||||
let remaining = backoff::retry(
|
||||
|| async {
|
||||
@@ -873,9 +905,17 @@ impl RemoteTimelineClient {
|
||||
})
|
||||
.collect();
|
||||
|
||||
let not_referenced_count = remaining.len();
|
||||
if !remaining.is_empty() {
|
||||
deletion_queue.push_immediate(remaining).await?;
|
||||
backoff::retry(
|
||||
|| async { self.storage_impl.delete_objects(&remaining).await },
|
||||
|_e| false,
|
||||
FAILED_UPLOAD_WARN_THRESHOLD,
|
||||
FAILED_REMOTE_OP_RETRIES,
|
||||
"delete_objects",
|
||||
backoff::Cancel::new(shutdown_token(), || anyhow::anyhow!("Cancelled!")),
|
||||
)
|
||||
.await
|
||||
.context("delete_objects")?;
|
||||
}
|
||||
|
||||
fail::fail_point!("timeline-delete-before-index-delete", |_| {
|
||||
@@ -886,14 +926,18 @@ impl RemoteTimelineClient {
|
||||
|
||||
let index_file_path = timeline_storage_path.join(Path::new(IndexPart::FILE_NAME));
|
||||
|
||||
debug!("enqueuing index part deletion");
|
||||
deletion_queue
|
||||
.push_immediate([index_file_path].to_vec())
|
||||
.await?;
|
||||
debug!("deleting index part");
|
||||
|
||||
// Timeline deletion is rare and we have probably emitted a reasonably number of objects: wait
|
||||
// for a flush to a persistent deletion list so that we may be sure deletion will occur.
|
||||
deletion_queue.flush_immediate().await?;
|
||||
backoff::retry(
|
||||
|| async { self.storage_impl.delete(&index_file_path).await },
|
||||
|_e| false,
|
||||
FAILED_UPLOAD_WARN_THRESHOLD,
|
||||
FAILED_REMOTE_OP_RETRIES,
|
||||
"delete_index",
|
||||
backoff::Cancel::new(shutdown_token(), || anyhow::anyhow!("Cancelled")),
|
||||
)
|
||||
.await
|
||||
.context("delete_index")?;
|
||||
|
||||
fail::fail_point!("timeline-delete-after-index-delete", |_| {
|
||||
Err(anyhow::anyhow!(
|
||||
@@ -901,7 +945,7 @@ impl RemoteTimelineClient {
|
||||
))?
|
||||
});
|
||||
|
||||
info!(prefix=%timeline_storage_path, referenced=layer_deletion_count, not_referenced=%not_referenced_count, "done deleting in timeline prefix, including index_part.json");
|
||||
info!(prefix=%timeline_storage_path, referenced=deletions_queued, not_referenced=%remaining.len(), "done deleting in timeline prefix, including index_part.json");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -924,6 +968,10 @@ impl RemoteTimelineClient {
|
||||
// have finished.
|
||||
upload_queue.inprogress_tasks.is_empty()
|
||||
}
|
||||
UploadOp::Delete(_) => {
|
||||
// Wait for preceding uploads to finish. Concurrent deletions are OK, though.
|
||||
upload_queue.num_inprogress_deletions == upload_queue.inprogress_tasks.len()
|
||||
}
|
||||
|
||||
UploadOp::Barrier(_) => upload_queue.inprogress_tasks.is_empty(),
|
||||
};
|
||||
@@ -951,6 +999,9 @@ impl RemoteTimelineClient {
|
||||
UploadOp::UploadMetadata(_, _) => {
|
||||
upload_queue.num_inprogress_metadata_uploads += 1;
|
||||
}
|
||||
UploadOp::Delete(_) => {
|
||||
upload_queue.num_inprogress_deletions += 1;
|
||||
}
|
||||
UploadOp::Barrier(sender) => {
|
||||
sender.send_replace(());
|
||||
continue;
|
||||
@@ -1029,15 +1080,17 @@ impl RemoteTimelineClient {
|
||||
|
||||
let upload_result: anyhow::Result<()> = match &task.op {
|
||||
UploadOp::UploadLayer(ref layer_file_name, ref layer_metadata) => {
|
||||
let path = &self
|
||||
let path = self
|
||||
.conf
|
||||
.timeline_path(&self.tenant_id, &self.timeline_id)
|
||||
.join(layer_file_name.file_name());
|
||||
|
||||
upload::upload_timeline_layer(
|
||||
self.conf,
|
||||
&self.storage_impl,
|
||||
path,
|
||||
&path,
|
||||
layer_metadata,
|
||||
self.generation,
|
||||
)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
@@ -1059,10 +1112,10 @@ impl RemoteTimelineClient {
|
||||
};
|
||||
|
||||
let res = upload::upload_index_part(
|
||||
self.conf,
|
||||
&self.storage_impl,
|
||||
&self.tenant_id,
|
||||
&self.timeline_id,
|
||||
self.generation,
|
||||
index_part,
|
||||
)
|
||||
.measure_remote_op(
|
||||
@@ -1082,6 +1135,21 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
res
|
||||
}
|
||||
UploadOp::Delete(delete) => {
|
||||
let path = &self
|
||||
.conf
|
||||
.timeline_path(&self.tenant_id, &self.timeline_id)
|
||||
.join(delete.layer_file_name.file_name());
|
||||
delete::delete_layer(self.conf, &self.storage_impl, path, delete.generation)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
delete.file_kind,
|
||||
RemoteOpKind::Delete,
|
||||
Arc::clone(&self.metrics),
|
||||
)
|
||||
.await
|
||||
}
|
||||
UploadOp::Barrier(_) => {
|
||||
// unreachable. Barrier operations are handled synchronously in
|
||||
// launch_queued_tasks
|
||||
@@ -1141,7 +1209,15 @@ impl RemoteTimelineClient {
|
||||
let mut upload_queue_guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = match upload_queue_guard.deref_mut() {
|
||||
UploadQueue::Uninitialized => panic!("callers are responsible for ensuring this is only called on an initialized queue"),
|
||||
UploadQueue::Stopped(_) => { None }
|
||||
UploadQueue::Stopped(stopped) => {
|
||||
// Special care is needed for deletions, if it was an earlier deletion (not scheduled from deletion)
|
||||
// then stop() took care of it so we just return.
|
||||
// For deletions that come from delete_all we still want to maintain metrics, launch following tasks, etc.
|
||||
match &task.op {
|
||||
UploadOp::Delete(delete) if delete.scheduled_from_timeline_delete => Some(&mut stopped.upload_queue_for_deletion),
|
||||
_ => None
|
||||
}
|
||||
},
|
||||
UploadQueue::Initialized(qi) => { Some(qi) }
|
||||
};
|
||||
|
||||
@@ -1163,6 +1239,9 @@ impl RemoteTimelineClient {
|
||||
upload_queue.num_inprogress_metadata_uploads -= 1;
|
||||
upload_queue.last_uploaded_consistent_lsn = lsn; // XXX monotonicity check?
|
||||
}
|
||||
UploadOp::Delete(_) => {
|
||||
upload_queue.num_inprogress_deletions -= 1;
|
||||
}
|
||||
UploadOp::Barrier(_) => unreachable!(),
|
||||
};
|
||||
|
||||
@@ -1194,6 +1273,13 @@ impl RemoteTimelineClient {
|
||||
reason: "metadata uploads are tiny",
|
||||
},
|
||||
),
|
||||
UploadOp::Delete(delete) => (
|
||||
delete.file_kind,
|
||||
RemoteOpKind::Delete,
|
||||
DontTrackSize {
|
||||
reason: "should we track deletes? positive or negative sign?",
|
||||
},
|
||||
),
|
||||
UploadOp::Barrier(_) => {
|
||||
// we do not account these
|
||||
return None;
|
||||
@@ -1253,6 +1339,7 @@ impl RemoteTimelineClient {
|
||||
last_uploaded_consistent_lsn: initialized.last_uploaded_consistent_lsn,
|
||||
num_inprogress_layer_uploads: 0,
|
||||
num_inprogress_metadata_uploads: 0,
|
||||
num_inprogress_deletions: 0,
|
||||
inprogress_tasks: HashMap::default(),
|
||||
queued_operations: VecDeque::default(),
|
||||
};
|
||||
@@ -1273,7 +1360,9 @@ impl RemoteTimelineClient {
|
||||
|
||||
// consistency check
|
||||
assert_eq!(
|
||||
qi.num_inprogress_layer_uploads + qi.num_inprogress_metadata_uploads,
|
||||
qi.num_inprogress_layer_uploads
|
||||
+ qi.num_inprogress_metadata_uploads
|
||||
+ qi.num_inprogress_deletions,
|
||||
qi.inprogress_tasks.len()
|
||||
);
|
||||
|
||||
@@ -1298,15 +1387,79 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remote_timelines_path(tenant_id: &TenantId) -> RemotePath {
|
||||
let path = format!("tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}");
|
||||
RemotePath::from_string(&path).expect("Failed to construct path")
|
||||
}
|
||||
|
||||
pub fn remote_timeline_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
|
||||
remote_timelines_path(tenant_id).join(&PathBuf::from(timeline_id.to_string()))
|
||||
}
|
||||
|
||||
pub fn remote_layer_path(
|
||||
tenant_id: &TenantId,
|
||||
timeline_id: &TimelineId,
|
||||
layer_file_name: &LayerFileName,
|
||||
layer_meta: &LayerFileMetadata,
|
||||
) -> RemotePath {
|
||||
// Generation-aware key format
|
||||
let path = format!(
|
||||
"tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
|
||||
layer_file_name.file_name(),
|
||||
layer_meta.generation.get_suffix()
|
||||
);
|
||||
|
||||
RemotePath::from_string(&path).expect("Failed to construct path")
|
||||
}
|
||||
|
||||
pub fn remote_index_path(
|
||||
tenant_id: &TenantId,
|
||||
timeline_id: &TimelineId,
|
||||
generation: Generation,
|
||||
) -> RemotePath {
|
||||
RemotePath::from_string(&format!(
|
||||
"tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
|
||||
IndexPart::FILE_NAME,
|
||||
generation.get_suffix()
|
||||
))
|
||||
.expect("Failed to construct path")
|
||||
}
|
||||
|
||||
/// Files on the remote storage are stored with paths, relative to the workdir.
|
||||
/// That path includes in itself both tenant and timeline ids, allowing to have a unique remote storage path.
|
||||
///
|
||||
/// Errors if the path provided does not start from pageserver's workdir.
|
||||
pub fn remote_path(
|
||||
conf: &PageServerConf,
|
||||
local_path: &Path,
|
||||
generation: Generation,
|
||||
) -> anyhow::Result<RemotePath> {
|
||||
let stripped = local_path
|
||||
.strip_prefix(&conf.workdir)
|
||||
.context("Failed to strip workdir prefix")?;
|
||||
|
||||
let suffixed = format!(
|
||||
"{0}{1}",
|
||||
stripped.to_string_lossy(),
|
||||
generation.get_suffix()
|
||||
);
|
||||
|
||||
RemotePath::new(&PathBuf::from(suffixed)).with_context(|| {
|
||||
format!(
|
||||
"to resolve remote part of path {:?} for base {:?}",
|
||||
local_path, conf.workdir
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
context::RequestContext,
|
||||
deletion_queue::mock::MockDeletionQueue,
|
||||
tenant::{
|
||||
harness::{TenantHarness, TIMELINE_ID},
|
||||
Tenant, Timeline,
|
||||
Generation, Tenant, Timeline,
|
||||
},
|
||||
DEFAULT_PG_VERSION,
|
||||
};
|
||||
@@ -1348,8 +1501,11 @@ mod tests {
|
||||
assert_eq!(avec, bvec);
|
||||
}
|
||||
|
||||
fn assert_remote_files(expected: &[&str], remote_path: &Path) {
|
||||
let mut expected: Vec<String> = expected.iter().map(|x| String::from(*x)).collect();
|
||||
fn assert_remote_files(expected: &[&str], remote_path: &Path, generation: Generation) {
|
||||
let mut expected: Vec<String> = expected
|
||||
.iter()
|
||||
.map(|x| format!("{}{}", x, generation.get_suffix()))
|
||||
.collect();
|
||||
expected.sort();
|
||||
|
||||
let mut found: Vec<String> = Vec::new();
|
||||
@@ -1370,7 +1526,6 @@ mod tests {
|
||||
tenant_ctx: RequestContext,
|
||||
remote_fs_dir: PathBuf,
|
||||
client: Arc<RemoteTimelineClient>,
|
||||
deletion_queue: MockDeletionQueue,
|
||||
}
|
||||
|
||||
impl TestSetup {
|
||||
@@ -1401,6 +1556,8 @@ mod tests {
|
||||
storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()),
|
||||
};
|
||||
|
||||
let generation = Generation::new(0xdeadbeef);
|
||||
|
||||
let storage = GenericRemoteStorage::from_config(&storage_config).unwrap();
|
||||
|
||||
let client = Arc::new(RemoteTimelineClient {
|
||||
@@ -1408,7 +1565,8 @@ mod tests {
|
||||
runtime: tokio::runtime::Handle::current(),
|
||||
tenant_id: harness.tenant_id,
|
||||
timeline_id: TIMELINE_ID,
|
||||
storage_impl: storage.clone(),
|
||||
generation,
|
||||
storage_impl: storage,
|
||||
upload_queue: Mutex::new(UploadQueue::Uninitialized),
|
||||
metrics: Arc::new(RemoteTimelineClientMetrics::new(
|
||||
&harness.tenant_id,
|
||||
@@ -1416,8 +1574,6 @@ mod tests {
|
||||
)),
|
||||
});
|
||||
|
||||
let deletion_queue = MockDeletionQueue::new(Some(storage), harness.conf);
|
||||
|
||||
Ok(Self {
|
||||
harness,
|
||||
tenant,
|
||||
@@ -1425,7 +1581,6 @@ mod tests {
|
||||
tenant_ctx: ctx,
|
||||
remote_fs_dir,
|
||||
client,
|
||||
deletion_queue,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1454,7 +1609,6 @@ mod tests {
|
||||
tenant_ctx: _tenant_ctx,
|
||||
remote_fs_dir,
|
||||
client,
|
||||
deletion_queue,
|
||||
} = TestSetup::new("upload_scheduling").await.unwrap();
|
||||
|
||||
let timeline_path = harness.timeline_path(&TIMELINE_ID);
|
||||
@@ -1470,6 +1624,8 @@ mod tests {
|
||||
.init_upload_queue_for_empty_remote(&metadata)
|
||||
.unwrap();
|
||||
|
||||
let generation = Generation::new(0xdeadbeef);
|
||||
|
||||
// Create a couple of dummy files, schedule upload for them
|
||||
let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
|
||||
let layer_file_name_2: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D9-00000000016B5A52".parse().unwrap();
|
||||
@@ -1489,13 +1645,13 @@ mod tests {
|
||||
client
|
||||
.schedule_layer_file_upload(
|
||||
&layer_file_name_1,
|
||||
&LayerFileMetadata::new(content_1.len() as u64),
|
||||
&LayerFileMetadata::new(content_1.len() as u64, generation),
|
||||
)
|
||||
.unwrap();
|
||||
client
|
||||
.schedule_layer_file_upload(
|
||||
&layer_file_name_2,
|
||||
&LayerFileMetadata::new(content_2.len() as u64),
|
||||
&LayerFileMetadata::new(content_2.len() as u64, generation),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1559,66 +1715,35 @@ mod tests {
|
||||
client
|
||||
.schedule_layer_file_upload(
|
||||
&layer_file_name_3,
|
||||
&LayerFileMetadata::new(content_3.len() as u64),
|
||||
&LayerFileMetadata::new(content_3.len() as u64, generation),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
{
|
||||
let mut guard = client.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut().unwrap();
|
||||
assert_eq!(upload_queue.queued_operations.len(), 0);
|
||||
assert_eq!(upload_queue.num_inprogress_layer_uploads, 1);
|
||||
}
|
||||
|
||||
assert_remote_files(
|
||||
&[
|
||||
&layer_file_name_1.file_name(),
|
||||
&layer_file_name_2.file_name(),
|
||||
"index_part.json",
|
||||
],
|
||||
&remote_timeline_dir,
|
||||
);
|
||||
|
||||
client
|
||||
.schedule_layer_file_deletion(
|
||||
&[layer_file_name_1.clone()],
|
||||
&deletion_queue.new_client(),
|
||||
)
|
||||
.await
|
||||
.schedule_layer_file_deletion(&[layer_file_name_1.clone()])
|
||||
.unwrap();
|
||||
|
||||
{
|
||||
let mut guard = client.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut().unwrap();
|
||||
|
||||
// Deletion schedules upload of the index file via RemoteTimelineClient, and
|
||||
// deletion of layer files via DeletionQueue. The uploads have all been flushed
|
||||
// because schedule_layer_file_deletion does a wait_completion before pushing
|
||||
// to the deletion_queue
|
||||
assert_eq!(upload_queue.queued_operations.len(), 0);
|
||||
assert_eq!(upload_queue.inprogress_tasks.len(), 0);
|
||||
assert_eq!(upload_queue.num_inprogress_layer_uploads, 0);
|
||||
assert_eq!(
|
||||
upload_queue.latest_files_changes_since_metadata_upload_scheduled,
|
||||
0
|
||||
);
|
||||
// Deletion schedules upload of the index file, and the file deletion itself
|
||||
assert!(upload_queue.queued_operations.len() == 2);
|
||||
assert!(upload_queue.inprogress_tasks.len() == 1);
|
||||
assert!(upload_queue.num_inprogress_layer_uploads == 1);
|
||||
assert!(upload_queue.num_inprogress_deletions == 0);
|
||||
assert!(upload_queue.latest_files_changes_since_metadata_upload_scheduled == 0);
|
||||
}
|
||||
assert_remote_files(
|
||||
&[
|
||||
&layer_file_name_1.file_name(),
|
||||
&layer_file_name_2.file_name(),
|
||||
&layer_file_name_3.file_name(),
|
||||
"index_part.json",
|
||||
],
|
||||
&remote_timeline_dir,
|
||||
generation,
|
||||
);
|
||||
|
||||
// Finish uploads and deletions
|
||||
// Finish them
|
||||
client.wait_completion().await.unwrap();
|
||||
deletion_queue.pump().await;
|
||||
|
||||
// 1 layer was deleted
|
||||
assert_eq!(deletion_queue.get_executed(), 1);
|
||||
|
||||
assert_remote_files(
|
||||
&[
|
||||
@@ -1627,6 +1752,7 @@ mod tests {
|
||||
"index_part.json",
|
||||
],
|
||||
&remote_timeline_dir,
|
||||
generation,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1679,12 +1805,14 @@ mod tests {
|
||||
|
||||
// Test
|
||||
|
||||
let generation = Generation::new(0xdeadbeef);
|
||||
|
||||
let init = get_bytes_started_stopped();
|
||||
|
||||
client
|
||||
.schedule_layer_file_upload(
|
||||
&layer_file_name_1,
|
||||
&LayerFileMetadata::new(content_1.len() as u64),
|
||||
&LayerFileMetadata::new(content_1.len() as u64, generation),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
34
pageserver/src/tenant/remote_timeline_client/delete.rs
Normal file
34
pageserver/src/tenant/remote_timeline_client/delete.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
//! Helper functions to delete files from remote storage with a RemoteStorage
|
||||
use anyhow::Context;
|
||||
use std::path::Path;
|
||||
use tracing::debug;
|
||||
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
tenant::{remote_timeline_client::remote_path, Generation},
|
||||
};
|
||||
|
||||
pub(super) async fn delete_layer<'a>(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &'a GenericRemoteStorage,
|
||||
local_layer_path: &'a Path,
|
||||
generation: Generation,
|
||||
) -> anyhow::Result<()> {
|
||||
fail::fail_point!("before-delete-layer", |_| {
|
||||
anyhow::bail!("failpoint before-delete-layer")
|
||||
});
|
||||
debug!("Deleting layer from remote storage: {local_layer_path:?}",);
|
||||
|
||||
let path_to_delete = remote_path(conf, local_layer_path, generation)?;
|
||||
|
||||
// We don't want to print an error if the delete failed if the file has
|
||||
// already been deleted. Thankfully, in this situation S3 already
|
||||
// does not yield an error. While OS-provided local file system APIs do yield
|
||||
// errors, we avoid them in the `LocalFs` wrapper.
|
||||
storage
|
||||
.delete(&path_to_delete)
|
||||
.await
|
||||
.with_context(|| format!("delete remote layer from storage at {path_to_delete:?}"))
|
||||
}
|
||||
@@ -15,14 +15,16 @@ use tokio_util::sync::CancellationToken;
|
||||
use utils::{backoff, crashsafe};
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
|
||||
use crate::tenant::storage_layer::LayerFileName;
|
||||
use crate::tenant::timeline::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::tenant::Generation;
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use super::index::{IndexPart, LayerFileMetadata};
|
||||
use super::{FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES};
|
||||
use super::{remote_index_path, FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES};
|
||||
|
||||
static MAX_DOWNLOAD_DURATION: Duration = Duration::from_secs(120);
|
||||
|
||||
@@ -41,13 +43,11 @@ pub async fn download_layer_file<'a>(
|
||||
) -> Result<u64, DownloadError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let timeline_path = conf.timeline_path(&tenant_id, &timeline_id);
|
||||
let local_path = conf
|
||||
.timeline_path(&tenant_id, &timeline_id)
|
||||
.join(layer_file_name.file_name());
|
||||
|
||||
let local_path = timeline_path.join(layer_file_name.file_name());
|
||||
|
||||
let remote_path = conf
|
||||
.remote_path(&local_path)
|
||||
.map_err(DownloadError::Other)?;
|
||||
let remote_path = remote_layer_path(&tenant_id, &timeline_id, layer_file_name, layer_metadata);
|
||||
|
||||
// Perform a rename inspired by durable_rename from file_utils.c.
|
||||
// The sequence:
|
||||
@@ -64,33 +64,43 @@ pub async fn download_layer_file<'a>(
|
||||
let (mut destination_file, bytes_amount) = download_retry(
|
||||
|| async {
|
||||
// TODO: this doesn't use the cached fd for some reason?
|
||||
let mut destination_file = fs::File::create(&temp_file_path).await.with_context(|| {
|
||||
format!(
|
||||
"create a destination file for layer '{}'",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
let mut download = storage.download(&remote_path).await.with_context(|| {
|
||||
format!(
|
||||
let mut destination_file = fs::File::create(&temp_file_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"create a destination file for layer '{}'",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
let mut download = storage
|
||||
.download(&remote_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"open a download stream for layer with remote storage path '{remote_path:?}'"
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let bytes_amount = tokio::time::timeout(MAX_DOWNLOAD_DURATION, tokio::io::copy(&mut download.download_stream, &mut destination_file))
|
||||
.await
|
||||
.map_err(|e| DownloadError::Other(anyhow::anyhow!("Timed out {:?}", e)))?
|
||||
.with_context(|| {
|
||||
format!("Failed to download layer with remote storage path '{remote_path:?}' into file {temp_file_path:?}")
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
Ok((destination_file, bytes_amount))
|
||||
let bytes_amount = tokio::time::timeout(
|
||||
MAX_DOWNLOAD_DURATION,
|
||||
tokio::io::copy(&mut download.download_stream, &mut destination_file),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| DownloadError::Other(anyhow::anyhow!("Timed out {:?}", e)))?
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"download layer at remote path '{remote_path:?}' into file {temp_file_path:?}"
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
Ok((destination_file, bytes_amount))
|
||||
},
|
||||
&format!("download {remote_path:?}"),
|
||||
).await?;
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that:
|
||||
// A file will not be closed immediately when it goes out of scope if there are any IO operations
|
||||
@@ -103,12 +113,7 @@ pub async fn download_layer_file<'a>(
|
||||
destination_file
|
||||
.flush()
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"failed to flush source file at {}",
|
||||
temp_file_path.display()
|
||||
)
|
||||
})
|
||||
.with_context(|| format!("flush source file at {}", temp_file_path.display()))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let expected = layer_metadata.file_size();
|
||||
@@ -139,17 +144,12 @@ pub async fn download_layer_file<'a>(
|
||||
|
||||
fs::rename(&temp_file_path, &local_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Could not rename download layer file to {}",
|
||||
local_path.display(),
|
||||
)
|
||||
})
|
||||
.with_context(|| format!("rename download layer file to {}", local_path.display(),))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
crashsafe::fsync_async(&local_path)
|
||||
.await
|
||||
.with_context(|| format!("Could not fsync layer file {}", local_path.display(),))
|
||||
.with_context(|| format!("fsync layer file {}", local_path.display(),))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
tracing::debug!("download complete: {}", local_path.display());
|
||||
@@ -173,21 +173,19 @@ pub fn is_temp_download_file(path: &Path) -> bool {
|
||||
}
|
||||
|
||||
/// List timelines of given tenant in remote storage
|
||||
pub async fn list_remote_timelines<'a>(
|
||||
storage: &'a GenericRemoteStorage,
|
||||
conf: &'static PageServerConf,
|
||||
pub async fn list_remote_timelines(
|
||||
storage: &GenericRemoteStorage,
|
||||
tenant_id: TenantId,
|
||||
) -> anyhow::Result<HashSet<TimelineId>> {
|
||||
let tenant_path = conf.timelines_path(&tenant_id);
|
||||
let tenant_storage_path = conf.remote_path(&tenant_path)?;
|
||||
let remote_path = remote_timelines_path(&tenant_id);
|
||||
|
||||
fail::fail_point!("storage-sync-list-remote-timelines", |_| {
|
||||
anyhow::bail!("storage-sync-list-remote-timelines");
|
||||
});
|
||||
|
||||
let timelines = download_retry(
|
||||
|| storage.list_prefixes(Some(&tenant_storage_path)),
|
||||
&format!("list prefixes for {tenant_path:?}"),
|
||||
|| storage.list_prefixes(Some(&remote_path)),
|
||||
&format!("list prefixes for {tenant_id}"),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -202,9 +200,9 @@ pub async fn list_remote_timelines<'a>(
|
||||
anyhow::anyhow!("failed to get timeline id for remote tenant {tenant_id}")
|
||||
})?;
|
||||
|
||||
let timeline_id: TimelineId = object_name.parse().with_context(|| {
|
||||
format!("failed to parse object name into timeline id '{object_name}'")
|
||||
})?;
|
||||
let timeline_id: TimelineId = object_name
|
||||
.parse()
|
||||
.with_context(|| format!("parse object name into timeline id '{object_name}'"))?;
|
||||
|
||||
// list_prefixes is assumed to return unique names. Ensure this here.
|
||||
// NB: it's safer to bail out than warn-log this because the pageserver
|
||||
@@ -222,28 +220,33 @@ pub async fn list_remote_timelines<'a>(
|
||||
}
|
||||
|
||||
pub(super) async fn download_index_part(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &GenericRemoteStorage,
|
||||
tenant_id: &TenantId,
|
||||
timeline_id: &TimelineId,
|
||||
generation: Generation,
|
||||
) -> Result<IndexPart, DownloadError> {
|
||||
let index_part_path = conf
|
||||
.metadata_path(tenant_id, timeline_id)
|
||||
.with_file_name(IndexPart::FILE_NAME);
|
||||
let part_storage_path = conf
|
||||
.remote_path(&index_part_path)
|
||||
.map_err(DownloadError::BadInput)?;
|
||||
let remote_path = remote_index_path(tenant_id, timeline_id, generation);
|
||||
|
||||
let index_part_bytes = download_retry(
|
||||
|| storage.download_all(&part_storage_path),
|
||||
&format!("download {part_storage_path:?}"),
|
||||
|| async {
|
||||
let mut index_part_download = storage.download(&remote_path).await?;
|
||||
|
||||
let mut index_part_bytes = Vec::new();
|
||||
tokio::io::copy(
|
||||
&mut index_part_download.download_stream,
|
||||
&mut index_part_bytes,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("download index part at {remote_path:?}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
Ok(index_part_bytes)
|
||||
},
|
||||
&format!("download {remote_path:?}"),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let index_part: IndexPart = serde_json::from_slice(&index_part_bytes)
|
||||
.with_context(|| {
|
||||
format!("Failed to deserialize index part file into file {index_part_path:?}")
|
||||
})
|
||||
.with_context(|| format!("download index part file at {remote_path:?}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
Ok(index_part)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
//! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
|
||||
//! remote timeline layers and its metadata.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use chrono::NaiveDateTime;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -12,6 +12,7 @@ use utils::bin_ser::SerializeError;
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
use crate::tenant::storage_layer::LayerFileName;
|
||||
use crate::tenant::upload_queue::UploadQueueInitialized;
|
||||
use crate::tenant::Generation;
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -20,22 +21,28 @@ use utils::lsn::Lsn;
|
||||
/// Fields have to be `Option`s because remote [`IndexPart`]'s can be from different version, which
|
||||
/// might have less or more metadata depending if upgrading or rolling back an upgrade.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[cfg_attr(test, derive(Default))]
|
||||
//#[cfg_attr(test, derive(Default))]
|
||||
pub struct LayerFileMetadata {
|
||||
file_size: u64,
|
||||
|
||||
pub(crate) generation: Generation,
|
||||
}
|
||||
|
||||
impl From<&'_ IndexLayerMetadata> for LayerFileMetadata {
|
||||
fn from(other: &IndexLayerMetadata) -> Self {
|
||||
LayerFileMetadata {
|
||||
file_size: other.file_size,
|
||||
generation: other.generation,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LayerFileMetadata {
|
||||
pub fn new(file_size: u64) -> Self {
|
||||
LayerFileMetadata { file_size }
|
||||
pub fn new(file_size: u64, generation: Generation) -> Self {
|
||||
LayerFileMetadata {
|
||||
file_size,
|
||||
generation,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn file_size(&self) -> u64 {
|
||||
@@ -62,10 +69,6 @@ pub struct IndexPart {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub deleted_at: Option<NaiveDateTime>,
|
||||
|
||||
/// Legacy field: equal to the keys of `layer_metadata`, only written out for forward compat
|
||||
#[serde(default, skip_deserializing)]
|
||||
timeline_layers: HashSet<LayerFileName>,
|
||||
|
||||
/// Per layer file name metadata, which can be present for a present or missing layer file.
|
||||
///
|
||||
/// Older versions of `IndexPart` will not have this property or have only a part of metadata
|
||||
@@ -91,7 +94,8 @@ impl IndexPart {
|
||||
/// - 2: added `deleted_at`
|
||||
/// - 3: no longer deserialize `timeline_layers` (serialized format is the same, but timeline_layers
|
||||
/// is always generated from the keys of `layer_metadata`)
|
||||
const LATEST_VERSION: usize = 3;
|
||||
/// - 4: timeline_layers is fully removed.
|
||||
const LATEST_VERSION: usize = 4;
|
||||
pub const FILE_NAME: &'static str = "index_part.json";
|
||||
|
||||
pub fn new(
|
||||
@@ -99,18 +103,14 @@ impl IndexPart {
|
||||
disk_consistent_lsn: Lsn,
|
||||
metadata: TimelineMetadata,
|
||||
) -> Self {
|
||||
let mut timeline_layers = HashSet::with_capacity(layers_and_metadata.len());
|
||||
let mut layer_metadata = HashMap::with_capacity(layers_and_metadata.len());
|
||||
|
||||
for (remote_name, metadata) in &layers_and_metadata {
|
||||
timeline_layers.insert(remote_name.to_owned());
|
||||
let metadata = IndexLayerMetadata::from(metadata);
|
||||
layer_metadata.insert(remote_name.to_owned(), metadata);
|
||||
}
|
||||
// Transform LayerFileMetadata into IndexLayerMetadata
|
||||
let layer_metadata = layers_and_metadata
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k, IndexLayerMetadata::from(v)))
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
version: Self::LATEST_VERSION,
|
||||
timeline_layers,
|
||||
layer_metadata,
|
||||
disk_consistent_lsn,
|
||||
metadata,
|
||||
@@ -135,15 +135,20 @@ impl TryFrom<&UploadQueueInitialized> for IndexPart {
|
||||
}
|
||||
|
||||
/// Serialized form of [`LayerFileMetadata`].
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||
pub struct IndexLayerMetadata {
|
||||
pub(super) file_size: u64,
|
||||
|
||||
#[serde(default = "Generation::none")]
|
||||
#[serde(skip_serializing_if = "Generation::is_none")]
|
||||
pub(super) generation: Generation,
|
||||
}
|
||||
|
||||
impl From<&'_ LayerFileMetadata> for IndexLayerMetadata {
|
||||
fn from(other: &'_ LayerFileMetadata) -> Self {
|
||||
impl From<LayerFileMetadata> for IndexLayerMetadata {
|
||||
fn from(other: LayerFileMetadata) -> Self {
|
||||
IndexLayerMetadata {
|
||||
file_size: other.file_size,
|
||||
generation: other.generation,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -168,15 +173,16 @@ mod tests {
|
||||
let expected = IndexPart {
|
||||
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
|
||||
version: 1,
|
||||
timeline_layers: HashSet::new(),
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none()
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none()
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -205,15 +211,16 @@ mod tests {
|
||||
let expected = IndexPart {
|
||||
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
|
||||
version: 1,
|
||||
timeline_layers: HashSet::new(),
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none()
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none()
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -243,15 +250,16 @@ mod tests {
|
||||
let expected = IndexPart {
|
||||
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
|
||||
version: 2,
|
||||
timeline_layers: HashSet::new(),
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none()
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none()
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
@@ -276,7 +284,6 @@ mod tests {
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 1,
|
||||
timeline_layers: HashSet::new(),
|
||||
layer_metadata: HashMap::new(),
|
||||
disk_consistent_lsn: "0/2532648".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::from_bytes(&[
|
||||
@@ -309,4 +316,41 @@ mod tests {
|
||||
|
||||
assert_eq!(empty_layers_parsed, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v4_indexpart_is_parsed() {
|
||||
let example = r#"{
|
||||
"version":4,
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata_bytes":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
|
||||
"deleted_at": "2023-07-31T09:00:00.123"
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 4,
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none()
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none()
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
|
||||
deleted_at: Some(chrono::NaiveDateTime::parse_from_str(
|
||||
"2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap())
|
||||
};
|
||||
|
||||
let part = serde_json::from_str::<IndexPart>(example).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,11 @@ use fail::fail_point;
|
||||
use std::{io::ErrorKind, path::Path};
|
||||
use tokio::fs;
|
||||
|
||||
use crate::{config::PageServerConf, tenant::remote_timeline_client::index::IndexPart};
|
||||
use super::Generation;
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
tenant::remote_timeline_client::{index::IndexPart, remote_index_path, remote_path},
|
||||
};
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
@@ -15,10 +19,10 @@ use tracing::info;
|
||||
|
||||
/// Serializes and uploads the given index part data to the remote storage.
|
||||
pub(super) async fn upload_index_part<'a>(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &'a GenericRemoteStorage,
|
||||
tenant_id: &TenantId,
|
||||
timeline_id: &TimelineId,
|
||||
generation: Generation,
|
||||
index_part: &'a IndexPart,
|
||||
) -> anyhow::Result<()> {
|
||||
tracing::trace!("uploading new index part");
|
||||
@@ -27,20 +31,16 @@ pub(super) async fn upload_index_part<'a>(
|
||||
bail!("failpoint before-upload-index")
|
||||
});
|
||||
|
||||
let index_part_bytes = serde_json::to_vec(&index_part)
|
||||
.context("Failed to serialize index part file into bytes")?;
|
||||
let index_part_bytes =
|
||||
serde_json::to_vec(&index_part).context("serialize index part file into bytes")?;
|
||||
let index_part_size = index_part_bytes.len();
|
||||
let index_part_bytes = tokio::io::BufReader::new(std::io::Cursor::new(index_part_bytes));
|
||||
|
||||
let index_part_path = conf
|
||||
.metadata_path(tenant_id, timeline_id)
|
||||
.with_file_name(IndexPart::FILE_NAME);
|
||||
let storage_path = conf.remote_path(&index_part_path)?;
|
||||
|
||||
let remote_path = remote_index_path(tenant_id, timeline_id, generation);
|
||||
storage
|
||||
.upload_storage_object(Box::new(index_part_bytes), index_part_size, &storage_path)
|
||||
.upload_storage_object(Box::new(index_part_bytes), index_part_size, &remote_path)
|
||||
.await
|
||||
.with_context(|| format!("Failed to upload index part for '{tenant_id} / {timeline_id}'"))
|
||||
.with_context(|| format!("upload index part for '{tenant_id} / {timeline_id}'"))
|
||||
}
|
||||
|
||||
/// Attempts to upload given layer files.
|
||||
@@ -52,12 +52,13 @@ pub(super) async fn upload_timeline_layer<'a>(
|
||||
storage: &'a GenericRemoteStorage,
|
||||
source_path: &'a Path,
|
||||
known_metadata: &'a LayerFileMetadata,
|
||||
generation: Generation,
|
||||
) -> anyhow::Result<()> {
|
||||
fail_point!("before-upload-layer", |_| {
|
||||
bail!("failpoint before-upload-layer")
|
||||
});
|
||||
let storage_path = conf.remote_path(source_path)?;
|
||||
|
||||
let storage_path = remote_path(conf, source_path, generation)?;
|
||||
let source_file_res = fs::File::open(&source_path).await;
|
||||
let source_file = match source_file_res {
|
||||
Ok(source_file) => source_file,
|
||||
@@ -70,16 +71,15 @@ pub(super) async fn upload_timeline_layer<'a>(
|
||||
info!(path = %source_path.display(), "File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more.");
|
||||
return Ok(());
|
||||
}
|
||||
Err(e) => Err(e)
|
||||
.with_context(|| format!("Failed to open a source file for layer {source_path:?}"))?,
|
||||
Err(e) => {
|
||||
Err(e).with_context(|| format!("open a source file for layer {source_path:?}"))?
|
||||
}
|
||||
};
|
||||
|
||||
let fs_size = source_file
|
||||
.metadata()
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to get the source file metadata for layer {source_path:?}")
|
||||
})?
|
||||
.with_context(|| format!("get the source file metadata for layer {source_path:?}"))?
|
||||
.len();
|
||||
|
||||
let metadata_size = known_metadata.file_size();
|
||||
@@ -87,19 +87,13 @@ pub(super) async fn upload_timeline_layer<'a>(
|
||||
bail!("File {source_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}");
|
||||
}
|
||||
|
||||
let fs_size = usize::try_from(fs_size).with_context(|| {
|
||||
format!("File {source_path:?} size {fs_size} could not be converted to usize")
|
||||
})?;
|
||||
let fs_size = usize::try_from(fs_size)
|
||||
.with_context(|| format!("convert {source_path:?} size {fs_size} usize"))?;
|
||||
|
||||
storage
|
||||
.upload(source_file, fs_size, &storage_path, None)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to upload a layer from local path '{}'",
|
||||
source_path.display()
|
||||
)
|
||||
})?;
|
||||
.with_context(|| format!("upload layer from local path '{}'", source_path.display()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -38,7 +38,6 @@ use std::time::{Duration, Instant, SystemTime};
|
||||
use crate::context::{
|
||||
AccessStatsBehavior, DownloadBehavior, RequestContext, RequestContextBuilder,
|
||||
};
|
||||
use crate::deletion_queue::DeletionQueueClient;
|
||||
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||
use crate::tenant::storage_layer::delta_layer::DeltaEntry;
|
||||
use crate::tenant::storage_layer::{
|
||||
@@ -68,6 +67,7 @@ use postgres_connection::PgConnectionConfig;
|
||||
use postgres_ffi::to_pg_timestamp;
|
||||
use utils::{
|
||||
completion,
|
||||
generation::Generation,
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::{AtomicLsn, Lsn, RecordLsn},
|
||||
seqwait::SeqWait,
|
||||
@@ -142,7 +142,6 @@ fn drop_wlock<T>(rlock: tokio::sync::RwLockWriteGuard<'_, T>) {
|
||||
/// The outward-facing resources required to build a Timeline
|
||||
pub struct TimelineResources {
|
||||
pub remote_client: Option<RemoteTimelineClient>,
|
||||
pub deletion_queue_client: Option<DeletionQueueClient>,
|
||||
}
|
||||
|
||||
pub struct Timeline {
|
||||
@@ -154,6 +153,10 @@ pub struct Timeline {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
|
||||
/// The generation of the tenant that instantiated us: this is used for safety when writing remote objects.
|
||||
/// Never changes for the lifetime of this [`Timeline`] object.
|
||||
generation: Generation,
|
||||
|
||||
pub pg_version: u32,
|
||||
|
||||
/// The tuple has two elements.
|
||||
@@ -197,9 +200,6 @@ pub struct Timeline {
|
||||
/// See [`remote_timeline_client`](super::remote_timeline_client) module comment for details.
|
||||
pub remote_client: Option<Arc<RemoteTimelineClient>>,
|
||||
|
||||
/// Deletion queue: a global queue, separate to the remote storage queue's
|
||||
deletion_queue_client: Option<Arc<DeletionQueueClient>>,
|
||||
|
||||
// What page versions do we hold in the repository? If we get a
|
||||
// request > last_record_lsn, we need to wait until we receive all
|
||||
// the WAL up to the request. The SeqWait provides functions for
|
||||
@@ -1204,7 +1204,7 @@ impl Timeline {
|
||||
Ok(delta) => Some(delta),
|
||||
};
|
||||
|
||||
let layer_metadata = LayerFileMetadata::new(layer_file_size);
|
||||
let layer_metadata = LayerFileMetadata::new(layer_file_size, self.generation);
|
||||
|
||||
let new_remote_layer = Arc::new(match local_layer.filename() {
|
||||
LayerFileName::Image(image_name) => RemoteLayer::new_img(
|
||||
@@ -1267,18 +1267,6 @@ impl Timeline {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_all_remote(&self) -> anyhow::Result<()> {
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
if let Some(deletion_queue_client) = &self.deletion_queue_client {
|
||||
remote_client.delete_all(deletion_queue_client).await
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
@@ -1394,6 +1382,7 @@ impl Timeline {
|
||||
ancestor: Option<Arc<Timeline>>,
|
||||
timeline_id: TimelineId,
|
||||
tenant_id: TenantId,
|
||||
generation: Generation,
|
||||
walredo_mgr: Arc<dyn WalRedoManager + Send + Sync>,
|
||||
resources: TimelineResources,
|
||||
pg_version: u32,
|
||||
@@ -1423,6 +1412,7 @@ impl Timeline {
|
||||
myself: myself.clone(),
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
generation,
|
||||
pg_version,
|
||||
layers: Arc::new(tokio::sync::RwLock::new(LayerManager::create())),
|
||||
wanted_image_layers: Mutex::new(None),
|
||||
@@ -1431,7 +1421,6 @@ impl Timeline {
|
||||
walreceiver: Mutex::new(None),
|
||||
|
||||
remote_client: resources.remote_client.map(Arc::new),
|
||||
deletion_queue_client: resources.deletion_queue_client.map(Arc::new),
|
||||
|
||||
// initialize in-memory 'last_record_lsn' from 'disk_consistent_lsn'.
|
||||
last_record_lsn: SeqWait::new(RecordLsn {
|
||||
@@ -1633,6 +1622,9 @@ impl Timeline {
|
||||
let (conf, tenant_id, timeline_id) = (self.conf, self.tenant_id, self.timeline_id);
|
||||
let span = tracing::Span::current();
|
||||
|
||||
// Copy to move into the task we're about to spawn
|
||||
let generation = self.generation;
|
||||
|
||||
let (loaded_layers, to_sync, total_physical_size) = tokio::task::spawn_blocking({
|
||||
move || {
|
||||
let _g = span.entered();
|
||||
@@ -1674,8 +1666,12 @@ impl Timeline {
|
||||
);
|
||||
}
|
||||
|
||||
let decided =
|
||||
init::reconcile(discovered_layers, index_part.as_ref(), disk_consistent_lsn);
|
||||
let decided = init::reconcile(
|
||||
discovered_layers,
|
||||
index_part.as_ref(),
|
||||
disk_consistent_lsn,
|
||||
generation,
|
||||
);
|
||||
|
||||
let mut loaded_layers = Vec::new();
|
||||
let mut needs_upload = Vec::new();
|
||||
@@ -1768,15 +1764,11 @@ impl Timeline {
|
||||
guard.initialize_local_layers(loaded_layers, disk_consistent_lsn + 1);
|
||||
|
||||
if let Some(rtc) = self.remote_client.as_ref() {
|
||||
// Deletion queue client is always Some if remote_client is Some
|
||||
let deletion_queue_client = self.deletion_queue_client.as_ref().unwrap();
|
||||
|
||||
let (needs_upload, needs_cleanup) = to_sync;
|
||||
for (layer, m) in needs_upload {
|
||||
rtc.schedule_layer_file_upload(&layer.layer_desc().filename(), &m)?;
|
||||
}
|
||||
rtc.schedule_layer_file_deletion(&needs_cleanup, deletion_queue_client)
|
||||
.await?;
|
||||
rtc.schedule_layer_file_deletion(&needs_cleanup)?;
|
||||
rtc.schedule_index_upload_for_file_changes()?;
|
||||
// Tenant::create_timeline will wait for these uploads to happen before returning, or
|
||||
// on retry.
|
||||
@@ -2691,7 +2683,7 @@ impl Timeline {
|
||||
(
|
||||
HashMap::from([(
|
||||
layer.filename(),
|
||||
LayerFileMetadata::new(layer.layer_desc().file_size),
|
||||
LayerFileMetadata::new(layer.layer_desc().file_size, self.generation),
|
||||
)]),
|
||||
Some(layer),
|
||||
)
|
||||
@@ -3087,7 +3079,10 @@ impl Timeline {
|
||||
.metadata()
|
||||
.with_context(|| format!("reading metadata of layer file {}", path.file_name()))?;
|
||||
|
||||
layer_paths_to_upload.insert(path, LayerFileMetadata::new(metadata.len()));
|
||||
layer_paths_to_upload.insert(
|
||||
path,
|
||||
LayerFileMetadata::new(metadata.len(), self.generation),
|
||||
);
|
||||
|
||||
self.metrics
|
||||
.resident_physical_size_gauge
|
||||
@@ -3762,7 +3757,7 @@ impl Timeline {
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
remote_client.schedule_layer_file_upload(
|
||||
&l.filename(),
|
||||
&LayerFileMetadata::new(metadata.len()),
|
||||
&LayerFileMetadata::new(metadata.len(), self.generation),
|
||||
)?;
|
||||
}
|
||||
|
||||
@@ -3771,7 +3766,10 @@ impl Timeline {
|
||||
.resident_physical_size_gauge
|
||||
.add(metadata.len());
|
||||
|
||||
new_layer_paths.insert(new_delta_path, LayerFileMetadata::new(metadata.len()));
|
||||
new_layer_paths.insert(
|
||||
new_delta_path,
|
||||
LayerFileMetadata::new(metadata.len(), self.generation),
|
||||
);
|
||||
l.access_stats().record_residence_event(
|
||||
LayerResidenceStatus::Resident,
|
||||
LayerResidenceEventReason::LayerCreate,
|
||||
@@ -3811,13 +3809,7 @@ impl Timeline {
|
||||
|
||||
// Also schedule the deletions in remote storage
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
let deletion_queue = self
|
||||
.deletion_queue_client
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow::anyhow!("Remote storage enabled without deletion queue"))?;
|
||||
remote_client
|
||||
.schedule_layer_file_deletion(&layer_names_to_delete, deletion_queue)
|
||||
.await?;
|
||||
remote_client.schedule_layer_file_deletion(&layer_names_to_delete)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -4151,15 +4143,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
// Remote metadata upload was scheduled in `update_metadata_file`: wait
|
||||
// for completion before scheduling any deletions.
|
||||
remote_client.wait_completion().await?;
|
||||
let deletion_queue = self.deletion_queue_client.as_ref().ok_or_else(|| {
|
||||
anyhow::anyhow!("Remote storage enabled without deletion queue")
|
||||
})?;
|
||||
remote_client
|
||||
.schedule_layer_file_deletion(&layer_names_to_delete, deletion_queue)
|
||||
.await?;
|
||||
remote_client.schedule_layer_file_deletion(&layer_names_to_delete)?;
|
||||
}
|
||||
|
||||
apply.flush();
|
||||
@@ -4749,7 +4733,6 @@ mod tests {
|
||||
|
||||
use utils::{id::TimelineId, lsn::Lsn};
|
||||
|
||||
use crate::deletion_queue::mock::MockDeletionQueue;
|
||||
use crate::tenant::{harness::TenantHarness, storage_layer::PersistentLayer};
|
||||
|
||||
use super::{EvictionError, Timeline};
|
||||
@@ -4772,17 +4755,9 @@ mod tests {
|
||||
};
|
||||
GenericRemoteStorage::from_config(&config).unwrap()
|
||||
};
|
||||
let deletion_queue = MockDeletionQueue::new(Some(remote_storage.clone()), harness.conf);
|
||||
|
||||
let ctx = any_context();
|
||||
let tenant = harness
|
||||
.try_load(
|
||||
&ctx,
|
||||
Some(remote_storage),
|
||||
Some(deletion_queue.new_client()),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let tenant = harness.try_load(&ctx, Some(remote_storage)).await.unwrap();
|
||||
let timeline = tenant
|
||||
.create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
|
||||
.await
|
||||
@@ -4845,17 +4820,9 @@ mod tests {
|
||||
};
|
||||
GenericRemoteStorage::from_config(&config).unwrap()
|
||||
};
|
||||
let deletion_queue = MockDeletionQueue::new(Some(remote_storage.clone()), harness.conf);
|
||||
|
||||
let ctx = any_context();
|
||||
let tenant = harness
|
||||
.try_load(
|
||||
&ctx,
|
||||
Some(remote_storage),
|
||||
Some(deletion_queue.new_client()),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let tenant = harness.try_load(&ctx, Some(remote_storage)).await.unwrap();
|
||||
let timeline = tenant
|
||||
.create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
|
||||
.await
|
||||
|
||||
@@ -14,7 +14,6 @@ use utils::{
|
||||
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
deletion_queue::DeletionQueueClient,
|
||||
task_mgr::{self, TaskKind},
|
||||
tenant::{
|
||||
metadata::TimelineMetadata,
|
||||
@@ -239,6 +238,15 @@ async fn delete_local_layer_files(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Removes remote layers and an index file after them.
|
||||
async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<()> {
|
||||
if let Some(remote_client) = &timeline.remote_client {
|
||||
remote_client.delete_all().await.context("delete_all")?
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// This function removs remaining traces of a timeline on disk.
|
||||
// Namely: metadata file, timeline directory, delete mark.
|
||||
// Note: io::ErrorKind::NotFound are ignored for metadata and timeline dir.
|
||||
@@ -399,7 +407,6 @@ impl DeleteTimelineFlow {
|
||||
timeline_id: TimelineId,
|
||||
local_metadata: &TimelineMetadata,
|
||||
remote_client: Option<RemoteTimelineClient>,
|
||||
deletion_queue_client: Option<DeletionQueueClient>,
|
||||
init_order: Option<&InitializationOrder>,
|
||||
) -> anyhow::Result<()> {
|
||||
// Note: here we even skip populating layer map. Timeline is essentially uninitialized.
|
||||
@@ -409,10 +416,7 @@ impl DeleteTimelineFlow {
|
||||
timeline_id,
|
||||
local_metadata,
|
||||
None, // Ancestor is not needed for deletion.
|
||||
TimelineResources {
|
||||
remote_client,
|
||||
deletion_queue_client,
|
||||
},
|
||||
TimelineResources { remote_client },
|
||||
init_order,
|
||||
// Important. We dont pass ancestor above because it can be missing.
|
||||
// Thus we need to skip the validation here.
|
||||
@@ -555,7 +559,7 @@ impl DeleteTimelineFlow {
|
||||
) -> Result<(), DeleteTimelineError> {
|
||||
delete_local_layer_files(conf, tenant.tenant_id, timeline).await?;
|
||||
|
||||
timeline.delete_all_remote().await?;
|
||||
delete_remote_layers_and_index(timeline).await?;
|
||||
|
||||
pausable_failpoint!("in_progress_delete");
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ use crate::{
|
||||
index::{IndexPart, LayerFileMetadata},
|
||||
},
|
||||
storage_layer::LayerFileName,
|
||||
Generation,
|
||||
},
|
||||
METADATA_FILE_NAME,
|
||||
};
|
||||
@@ -104,6 +105,7 @@ pub(super) fn reconcile(
|
||||
discovered: Vec<(LayerFileName, u64)>,
|
||||
index_part: Option<&IndexPart>,
|
||||
disk_consistent_lsn: Lsn,
|
||||
generation: Generation,
|
||||
) -> Vec<(LayerFileName, Result<Decision, FutureLayer>)> {
|
||||
use Decision::*;
|
||||
|
||||
@@ -112,7 +114,15 @@ pub(super) fn reconcile(
|
||||
|
||||
let mut discovered = discovered
|
||||
.into_iter()
|
||||
.map(|(name, file_size)| (name, (Some(LayerFileMetadata::new(file_size)), None)))
|
||||
.map(|(name, file_size)| {
|
||||
(
|
||||
name,
|
||||
// The generation here will be corrected to match IndexPart in the merge below, unless
|
||||
// it is not in IndexPart, in which case using our current generation makes sense
|
||||
// because it will be uploaded in this generation.
|
||||
(Some(LayerFileMetadata::new(file_size, generation)), None),
|
||||
)
|
||||
})
|
||||
.collect::<Collected>();
|
||||
|
||||
// merge any index_part information, when available
|
||||
@@ -137,7 +147,11 @@ pub(super) fn reconcile(
|
||||
Err(FutureLayer { local })
|
||||
} else {
|
||||
Ok(match (local, remote) {
|
||||
(Some(local), Some(remote)) if local != remote => UseRemote { local, remote },
|
||||
(Some(local), Some(remote)) if local != remote => {
|
||||
assert_eq!(local.generation, remote.generation);
|
||||
|
||||
UseRemote { local, remote }
|
||||
}
|
||||
(Some(x), Some(_)) => UseLocal(x),
|
||||
(None, Some(x)) => Evicted(x),
|
||||
(Some(x), None) => NeedsUpload(x),
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
use crate::metrics::RemoteOpFileKind;
|
||||
|
||||
use super::storage_layer::LayerFileName;
|
||||
use super::Generation;
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
use crate::tenant::remote_timeline_client::index::IndexPart;
|
||||
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||
@@ -60,6 +63,7 @@ pub(crate) struct UploadQueueInitialized {
|
||||
// Breakdown of different kinds of tasks currently in-progress
|
||||
pub(crate) num_inprogress_layer_uploads: usize,
|
||||
pub(crate) num_inprogress_metadata_uploads: usize,
|
||||
pub(crate) num_inprogress_deletions: usize,
|
||||
|
||||
/// Tasks that are currently in-progress. In-progress means that a tokio Task
|
||||
/// has been launched for it. An in-progress task can be busy uploading, but it can
|
||||
@@ -117,6 +121,7 @@ impl UploadQueue {
|
||||
task_counter: 0,
|
||||
num_inprogress_layer_uploads: 0,
|
||||
num_inprogress_metadata_uploads: 0,
|
||||
num_inprogress_deletions: 0,
|
||||
inprogress_tasks: HashMap::new(),
|
||||
queued_operations: VecDeque::new(),
|
||||
};
|
||||
@@ -158,6 +163,7 @@ impl UploadQueue {
|
||||
task_counter: 0,
|
||||
num_inprogress_layer_uploads: 0,
|
||||
num_inprogress_metadata_uploads: 0,
|
||||
num_inprogress_deletions: 0,
|
||||
inprogress_tasks: HashMap::new(),
|
||||
queued_operations: VecDeque::new(),
|
||||
};
|
||||
@@ -195,6 +201,14 @@ pub(crate) struct UploadTask {
|
||||
pub(crate) op: UploadOp,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Delete {
|
||||
pub(crate) file_kind: RemoteOpFileKind,
|
||||
pub(crate) layer_file_name: LayerFileName,
|
||||
pub(crate) scheduled_from_timeline_delete: bool,
|
||||
pub(crate) generation: Generation,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum UploadOp {
|
||||
/// Upload a layer file
|
||||
@@ -203,6 +217,9 @@ pub(crate) enum UploadOp {
|
||||
/// Upload the metadata file
|
||||
UploadMetadata(IndexPart, Lsn),
|
||||
|
||||
/// Delete a layer file
|
||||
Delete(Delete),
|
||||
|
||||
/// Barrier. When the barrier operation is reached,
|
||||
Barrier(tokio::sync::watch::Sender<()>),
|
||||
}
|
||||
@@ -213,12 +230,22 @@ impl std::fmt::Display for UploadOp {
|
||||
UploadOp::UploadLayer(path, metadata) => {
|
||||
write!(
|
||||
f,
|
||||
"UploadLayer({}, size={:?})",
|
||||
"UploadLayer({}, size={:?}, gen={:?})",
|
||||
path.file_name(),
|
||||
metadata.file_size()
|
||||
metadata.file_size(),
|
||||
metadata.generation,
|
||||
)
|
||||
}
|
||||
UploadOp::UploadMetadata(_, lsn) => write!(f, "UploadMetadata(lsn: {})", lsn),
|
||||
UploadOp::UploadMetadata(_, lsn) => {
|
||||
write!(f, "UploadMetadata(lsn: {})", lsn)
|
||||
}
|
||||
UploadOp::Delete(delete) => write!(
|
||||
f,
|
||||
"Delete(path: {}, scheduled_from_timeline_delete: {}, gen: {:?})",
|
||||
delete.layer_file_name.file_name(),
|
||||
delete.scheduled_from_timeline_delete,
|
||||
delete.generation
|
||||
),
|
||||
UploadOp::Barrier(_) => write!(f, "Barrier"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,13 +12,19 @@ pub struct PasswordHackPayload {
|
||||
|
||||
impl PasswordHackPayload {
|
||||
pub fn parse(bytes: &[u8]) -> Option<Self> {
|
||||
// The format is `project=<utf-8>;<password-bytes>`.
|
||||
let mut iter = bytes.splitn_str(2, ";");
|
||||
let endpoint = iter.next()?.to_str().ok()?;
|
||||
let endpoint = parse_endpoint_param(endpoint)?.to_owned();
|
||||
let password = iter.next()?.to_owned();
|
||||
// The format is `project=<utf-8>;<password-bytes>` or `project=<utf-8>$<password-bytes>`.
|
||||
let separators = [";", "$"];
|
||||
for sep in separators {
|
||||
if let Some((endpoint, password)) = bytes.split_once_str(sep) {
|
||||
let endpoint = endpoint.to_str().ok()?;
|
||||
return Some(Self {
|
||||
endpoint: parse_endpoint_param(endpoint)?.to_owned(),
|
||||
password: password.to_owned(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Some(Self { endpoint, password })
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,4 +97,23 @@ mod tests {
|
||||
assert_eq!(payload.endpoint, "foobar");
|
||||
assert_eq!(payload.password, b"pass;word");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_password_hack_payload_dollar() {
|
||||
let bytes = b"";
|
||||
assert!(PasswordHackPayload::parse(bytes).is_none());
|
||||
|
||||
let bytes = b"endpoint=";
|
||||
assert!(PasswordHackPayload::parse(bytes).is_none());
|
||||
|
||||
let bytes = b"endpoint=$";
|
||||
let payload = PasswordHackPayload::parse(bytes).expect("parsing failed");
|
||||
assert_eq!(payload.endpoint, "");
|
||||
assert_eq!(payload.password, b"");
|
||||
|
||||
let bytes = b"endpoint=foobar$pass$word";
|
||||
let payload = PasswordHackPayload::parse(bytes).expect("parsing failed");
|
||||
assert_eq!(payload.endpoint, "foobar");
|
||||
assert_eq!(payload.password, b"pass$word");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,12 +16,21 @@ use tracing::{error, info, info_span, warn, Instrument};
|
||||
pub struct Api {
|
||||
endpoint: http::Endpoint,
|
||||
caches: &'static ApiCaches,
|
||||
jwt: String,
|
||||
}
|
||||
|
||||
impl Api {
|
||||
/// Construct an API object containing the auth parameters.
|
||||
pub fn new(endpoint: http::Endpoint, caches: &'static ApiCaches) -> Self {
|
||||
Self { endpoint, caches }
|
||||
let jwt: String = match std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN") {
|
||||
Ok(v) => v,
|
||||
Err(_) => "".to_string(),
|
||||
};
|
||||
Self {
|
||||
endpoint,
|
||||
caches,
|
||||
jwt,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn url(&self) -> &str {
|
||||
@@ -39,6 +48,7 @@ impl Api {
|
||||
.endpoint
|
||||
.get("proxy_get_role_secret")
|
||||
.header("X-Request-ID", &request_id)
|
||||
.header("Authorization", &self.jwt)
|
||||
.query(&[("session_id", extra.session_id)])
|
||||
.query(&[
|
||||
("application_name", extra.application_name),
|
||||
@@ -83,6 +93,7 @@ impl Api {
|
||||
.endpoint
|
||||
.get("proxy_wake_compute")
|
||||
.header("X-Request-ID", &request_id)
|
||||
.header("Authorization", &self.jwt)
|
||||
.query(&[("session_id", extra.session_id)])
|
||||
.query(&[
|
||||
("application_name", extra.application_name),
|
||||
|
||||
@@ -304,7 +304,7 @@ pub async fn task_main(
|
||||
|
||||
let make_svc =
|
||||
hyper::service::make_service_fn(|stream: &tokio_rustls::server::TlsStream<AddrStream>| {
|
||||
let sni_name = stream.get_ref().1.sni_hostname().map(|s| s.to_string());
|
||||
let sni_name = stream.get_ref().1.server_name().map(|s| s.to_string());
|
||||
let conn_pool = conn_pool.clone();
|
||||
|
||||
async move {
|
||||
|
||||
@@ -141,7 +141,7 @@ impl<S> Stream<S> {
|
||||
pub fn sni_hostname(&self) -> Option<&str> {
|
||||
match self {
|
||||
Stream::Raw { .. } => None,
|
||||
Stream::Tls { tls } => tls.get_ref().1.sni_hostname(),
|
||||
Stream::Tls { tls } => tls.get_ref().1.server_name(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -613,8 +613,3 @@ class PageserverHttpClient(requests.Session):
|
||||
},
|
||||
)
|
||||
self.verbose_error(res)
|
||||
|
||||
def deletion_queue_flush(self, execute: bool = False):
|
||||
self.put(
|
||||
f"http://localhost:{self.port}/v1/deletion_queue/flush?execute={'true' if execute else 'false'}"
|
||||
).raise_for_status()
|
||||
|
||||
@@ -12,10 +12,7 @@ from typing import Dict, List, Optional, Tuple
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
PgBin,
|
||||
last_flush_lsn_upload,
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient
|
||||
@@ -253,20 +250,35 @@ def test_remote_storage_upload_queue_retries(
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
def configure_storage_write_failpoints(action):
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
|
||||
endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
|
||||
|
||||
def configure_storage_sync_failpoints(action):
|
||||
client.configure_failpoints(
|
||||
[
|
||||
("before-upload-layer", action),
|
||||
("before-upload-index", action),
|
||||
("before-delete-layer", action),
|
||||
]
|
||||
)
|
||||
|
||||
def configure_storage_delete_failpoints(action):
|
||||
client.configure_failpoints(
|
||||
def overwrite_data_and_wait_for_it_to_arrive_at_pageserver(data):
|
||||
# create initial set of layers & upload them with failpoints configured
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
("deletion-queue-before-execute", action),
|
||||
f"""
|
||||
INSERT INTO foo (id, val)
|
||||
SELECT g, '{data}'
|
||||
FROM generate_series(1, 20000) g
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET val = EXCLUDED.val
|
||||
""",
|
||||
# to ensure that GC can actually remove some layers
|
||||
"VACUUM foo",
|
||||
]
|
||||
)
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
def get_queued_count(file_kind, op_kind):
|
||||
val = client.get_remote_timeline_client_metric(
|
||||
@@ -279,52 +291,23 @@ def test_remote_storage_upload_queue_retries(
|
||||
assert val is not None, "expecting metric to be present"
|
||||
return int(val)
|
||||
|
||||
def get_deletions_executed() -> int:
|
||||
executed = client.get_metric_value("pageserver_deletion_queue_executed_total")
|
||||
if executed is None:
|
||||
return 0
|
||||
else:
|
||||
return int(executed)
|
||||
# create some layers & wait for uploads to finish
|
||||
overwrite_data_and_wait_for_it_to_arrive_at_pageserver("a")
|
||||
client.timeline_checkpoint(tenant_id, timeline_id)
|
||||
client.timeline_compact(tenant_id, timeline_id)
|
||||
overwrite_data_and_wait_for_it_to_arrive_at_pageserver("b")
|
||||
client.timeline_checkpoint(tenant_id, timeline_id)
|
||||
client.timeline_compact(tenant_id, timeline_id)
|
||||
gc_result = client.timeline_gc(tenant_id, timeline_id, 0)
|
||||
print_gc_result(gc_result)
|
||||
assert gc_result["layers_removed"] > 0
|
||||
|
||||
def get_deletion_errors(op_type) -> int:
|
||||
executed = client.get_metric_value(
|
||||
"pageserver_deletion_queue_errors_total", {"op_kind": op_type}
|
||||
)
|
||||
if executed is None:
|
||||
return 0
|
||||
else:
|
||||
return int(executed)
|
||||
|
||||
def assert_queued_count(file_kind: str, op_kind: str, fn):
|
||||
v = get_queued_count(file_kind=file_kind, op_kind=op_kind)
|
||||
log.info(f"queue count: {file_kind} {op_kind} {v}")
|
||||
assert fn(v)
|
||||
|
||||
# Push some uploads into the remote_timeline_client queues, before failpoints
|
||||
# are enabled: these should execute and the queue should revert to zero depth
|
||||
generate_uploads_and_deletions(env, tenant_id=tenant_id, timeline_id=timeline_id)
|
||||
|
||||
wait_until(2, 1, lambda: assert_queued_count("layer", "upload", lambda v: v == 0))
|
||||
wait_until(2, 1, lambda: assert_queued_count("index", "upload", lambda v: v == 0))
|
||||
|
||||
# Wait for some deletions to happen in the above compactions, assert that
|
||||
# our metrics of interest exist
|
||||
wait_until(2, 1, lambda: assert_deletion_queue(client, lambda v: v is not None))
|
||||
|
||||
# Before enabling failpoints, flushing deletions through should work
|
||||
client.deletion_queue_flush(execute=True)
|
||||
executed = client.get_metric_value("pageserver_deletion_queue_executed_total")
|
||||
assert executed is not None
|
||||
assert executed > 0
|
||||
wait_until(2, 1, lambda: get_queued_count(file_kind="layer", op_kind="upload") == 0)
|
||||
wait_until(2, 1, lambda: get_queued_count(file_kind="index", op_kind="upload") == 0)
|
||||
wait_until(2, 1, lambda: get_queued_count(file_kind="layer", op_kind="delete") == 0)
|
||||
|
||||
# let all future operations queue up
|
||||
configure_storage_write_failpoints("return")
|
||||
configure_storage_delete_failpoints("return")
|
||||
|
||||
# Snapshot of executed deletions: should not increment while failpoint is enabled
|
||||
deletions_executed_pre_failpoint = client.get_metric_value(
|
||||
"pageserver_deletion_queue_executed_total"
|
||||
)
|
||||
configure_storage_sync_failpoints("return")
|
||||
|
||||
# Create more churn to generate all upload ops.
|
||||
# The checkpoint / compact / gc ops will block because they call remote_client.wait_completion().
|
||||
@@ -332,77 +315,38 @@ def test_remote_storage_upload_queue_retries(
|
||||
churn_thread_result = [False]
|
||||
|
||||
def churn_while_failpoints_active(result):
|
||||
generate_uploads_and_deletions(
|
||||
env, init=False, tenant_id=tenant_id, timeline_id=timeline_id, data="d"
|
||||
)
|
||||
overwrite_data_and_wait_for_it_to_arrive_at_pageserver("c")
|
||||
client.timeline_checkpoint(tenant_id, timeline_id)
|
||||
client.timeline_compact(tenant_id, timeline_id)
|
||||
overwrite_data_and_wait_for_it_to_arrive_at_pageserver("d")
|
||||
client.timeline_checkpoint(tenant_id, timeline_id)
|
||||
client.timeline_compact(tenant_id, timeline_id)
|
||||
gc_result = client.timeline_gc(tenant_id, timeline_id, 0)
|
||||
print_gc_result(gc_result)
|
||||
assert gc_result["layers_removed"] > 0
|
||||
result[0] = True
|
||||
|
||||
churn_while_failpoints_active_thread = threading.Thread(
|
||||
target=churn_while_failpoints_active, args=[churn_thread_result]
|
||||
)
|
||||
log.info("Entered churn phase")
|
||||
churn_while_failpoints_active_thread.start()
|
||||
|
||||
try:
|
||||
# wait for churn thread's data to get stuck in the upload queue
|
||||
wait_until(10, 0.1, lambda: assert_queued_count("layer", "upload", lambda v: v > 0))
|
||||
wait_until(10, 0.1, lambda: assert_queued_count("index", "upload", lambda v: v >= 2))
|
||||
# wait for churn thread's data to get stuck in the upload queue
|
||||
wait_until(10, 0.1, lambda: get_queued_count(file_kind="layer", op_kind="upload") > 0)
|
||||
wait_until(10, 0.1, lambda: get_queued_count(file_kind="index", op_kind="upload") >= 2)
|
||||
wait_until(10, 0.1, lambda: get_queued_count(file_kind="layer", op_kind="delete") > 0)
|
||||
|
||||
# Deletion queue should not grow, because deletions wait for upload of
|
||||
# metadata, and we blocked that upload.
|
||||
wait_until(10, 0.5, lambda: assert_deletion_queue(client, lambda v: v == 0))
|
||||
# unblock churn operations
|
||||
configure_storage_sync_failpoints("off")
|
||||
|
||||
# No more deletions should have executed
|
||||
assert get_deletions_executed() == deletions_executed_pre_failpoint
|
||||
|
||||
# unblock write operations
|
||||
log.info("Unblocking remote writes")
|
||||
configure_storage_write_failpoints("off")
|
||||
|
||||
# ... and wait for them to finish. Exponential back-off in upload queue, so, gracious timeouts.
|
||||
wait_until(30, 1, lambda: assert_queued_count("layer", "upload", lambda v: v == 0))
|
||||
wait_until(30, 1, lambda: assert_queued_count("index", "upload", lambda v: v == 0))
|
||||
|
||||
# Deletions should have been enqueued now that index uploads proceeded
|
||||
log.info("Waiting to see deletions enqueued")
|
||||
wait_until(10, 1, lambda: assert_deletion_queue(client, lambda v: v > 0))
|
||||
|
||||
# Run flush in the backgrorund because it will block on the failpoint
|
||||
class background_flush(threading.Thread):
|
||||
def run(self):
|
||||
client.deletion_queue_flush(execute=True)
|
||||
|
||||
flusher = background_flush()
|
||||
flusher.start()
|
||||
|
||||
def assert_failpoint_hit():
|
||||
assert get_deletion_errors("failpoint") > 0
|
||||
|
||||
# Our background flush thread should induce us to hit the failpoint
|
||||
wait_until(20, 0.25, assert_failpoint_hit)
|
||||
|
||||
# Deletions should not have been executed while failpoint is still active.
|
||||
assert get_deletion_queue_depth(client) is not None
|
||||
assert get_deletion_queue_depth(client) > 0
|
||||
assert get_deletions_executed() == deletions_executed_pre_failpoint
|
||||
|
||||
log.info("Unblocking remote deletes")
|
||||
configure_storage_delete_failpoints("off")
|
||||
|
||||
# An API flush should now complete
|
||||
flusher.join()
|
||||
|
||||
# Queue should drain, which should involve executing some deletions
|
||||
wait_until(2, 1, lambda: assert_deletion_queue(client, lambda v: v == 0))
|
||||
assert get_deletions_executed() > deletions_executed_pre_failpoint
|
||||
|
||||
finally:
|
||||
# The churn thread doesn't make progress once it blocks on the first wait_completion() call,
|
||||
# so, give it some time to wrap up.
|
||||
log.info("Joining churn workload")
|
||||
churn_while_failpoints_active_thread.join(30)
|
||||
log.info("Joined churn workload")
|
||||
# ... and wait for them to finish. Exponential back-off in upload queue, so, gracious timeouts.
|
||||
wait_until(30, 1, lambda: get_queued_count(file_kind="layer", op_kind="upload") == 0)
|
||||
wait_until(30, 1, lambda: get_queued_count(file_kind="index", op_kind="upload") == 0)
|
||||
wait_until(30, 1, lambda: get_queued_count(file_kind="layer", op_kind="delete") == 0)
|
||||
|
||||
# The churn thread doesn't make progress once it blocks on the first wait_completion() call,
|
||||
# so, give it some time to wrap up.
|
||||
churn_while_failpoints_active_thread.join(30)
|
||||
assert not churn_while_failpoints_active_thread.is_alive()
|
||||
assert churn_thread_result[0]
|
||||
|
||||
@@ -488,6 +432,7 @@ def test_remote_timeline_client_calls_started_metric(
|
||||
calls_started: Dict[Tuple[str, str], List[int]] = {
|
||||
("layer", "upload"): [0],
|
||||
("index", "upload"): [0],
|
||||
("layer", "delete"): [0],
|
||||
}
|
||||
|
||||
def fetch_calls_started():
|
||||
@@ -659,6 +604,7 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue(
|
||||
checkpoint_allowed_to_fail.set()
|
||||
env.pageserver.allowed_errors.append(
|
||||
".* ERROR .*Error processing HTTP request: InternalServerError\\(timeline is Stopping"
|
||||
".* ERROR .*[Cc]ould not flush frozen layer.*"
|
||||
)
|
||||
|
||||
# Generous timeout, because currently deletions can get blocked waiting for compaction
|
||||
@@ -985,154 +931,4 @@ def assert_nothing_to_upload(
|
||||
assert Lsn(detail["last_record_lsn"]) == Lsn(detail["remote_consistent_lsn"])
|
||||
|
||||
|
||||
def get_deletion_queue_depth(ps_http) -> int:
|
||||
"""
|
||||
Queue depth if at least one deletion has been submitted, else None
|
||||
"""
|
||||
submitted = ps_http.get_metric_value("pageserver_deletion_queue_submitted_total")
|
||||
|
||||
if submitted is None:
|
||||
return 0
|
||||
|
||||
executed = ps_http.get_metric_value("pageserver_deletion_queue_executed_total")
|
||||
executed = 0 if executed is None else executed
|
||||
|
||||
depth = submitted - executed
|
||||
assert depth >= 0
|
||||
|
||||
log.info(f"get_deletion_queue_depth: {depth} ({submitted} - {executed})")
|
||||
return int(depth)
|
||||
|
||||
|
||||
def assert_deletion_queue(ps_http, size_fn) -> None:
|
||||
v = get_deletion_queue_depth(ps_http)
|
||||
assert v is not None
|
||||
assert size_fn(v) is True
|
||||
|
||||
|
||||
# TODO Test that we correctly handle GC of files that are stuck in upload queue.
|
||||
|
||||
|
||||
def generate_uploads_and_deletions(
|
||||
env: NeonEnv,
|
||||
*,
|
||||
init: bool = True,
|
||||
tenant_id: Optional[TenantId] = None,
|
||||
timeline_id: Optional[TimelineId] = None,
|
||||
data: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Using the environment's default tenant + timeline, generate a load pattern
|
||||
that results in some uploads and some deletions to remote storage.
|
||||
"""
|
||||
|
||||
if tenant_id is None:
|
||||
tenant_id = env.initial_tenant
|
||||
assert tenant_id is not None
|
||||
|
||||
if timeline_id is None:
|
||||
timeline_id = env.initial_timeline
|
||||
assert timeline_id is not None
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
if init:
|
||||
endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
|
||||
last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
|
||||
|
||||
def churn(data):
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
f"""
|
||||
INSERT INTO foo (id, val)
|
||||
SELECT g, '{data}'
|
||||
FROM generate_series(1, 20000) g
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET val = EXCLUDED.val
|
||||
""",
|
||||
# to ensure that GC can actually remove some layers
|
||||
"VACUUM foo",
|
||||
]
|
||||
)
|
||||
assert tenant_id is not None
|
||||
assert timeline_id is not None
|
||||
wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
|
||||
ps_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
# Compaction should generate some GC-elegible layers
|
||||
for i in range(0, 2):
|
||||
churn(f"{i if data is None else data}")
|
||||
|
||||
gc_result = ps_http.timeline_gc(tenant_id, timeline_id, 0)
|
||||
print_gc_result(gc_result)
|
||||
assert gc_result["layers_removed"] > 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
|
||||
def test_deletion_queue_recovery(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
remote_storage_kind: RemoteStorageKind,
|
||||
pg_bin: PgBin,
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_deletion_queue_recovery",
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
# small checkpointing and compaction targets to ensure we generate many upload operations
|
||||
"checkpoint_distance": f"{128 * 1024}",
|
||||
"compaction_threshold": "1",
|
||||
"compaction_target_size": f"{128 * 1024}",
|
||||
# no PITR horizon, we specify the horizon when we request on-demand GC
|
||||
"pitr_interval": "0s",
|
||||
# disable background compaction and GC. We invoke it manually when we want it to happen.
|
||||
"gc_period": "0s",
|
||||
"compaction_period": "0s",
|
||||
# create image layers eagerly, so that GC can remove some layers
|
||||
"image_creation_threshold": "1",
|
||||
}
|
||||
)
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
# Prevent deletion lists from being executed, to build up some backlog of deletions
|
||||
ps_http.configure_failpoints(
|
||||
[
|
||||
("deletion-queue-before-execute", "return"),
|
||||
]
|
||||
)
|
||||
|
||||
generate_uploads_and_deletions(env)
|
||||
|
||||
# There should be entries in the deletion queue
|
||||
assert_deletion_queue(ps_http, lambda n: n > 0)
|
||||
ps_http.deletion_queue_flush()
|
||||
before_restart_depth = get_deletion_queue_depth(ps_http)
|
||||
|
||||
log.info(f"Restarting pageserver with {before_restart_depth} deletions enqueued")
|
||||
env.pageserver.stop(immediate=True)
|
||||
env.pageserver.start()
|
||||
|
||||
def assert_deletions_submitted(n: int):
|
||||
assert ps_http.get_metric_value("pageserver_deletion_queue_submitted_total") == n
|
||||
|
||||
# After restart, issue a flush to kick the deletion frorntend to do recovery.
|
||||
# It should recover all the operations we submitted before the restart.
|
||||
ps_http.deletion_queue_flush(execute=False)
|
||||
wait_until(20, 0.25, lambda: assert_deletions_submitted(before_restart_depth))
|
||||
|
||||
# The queue should drain through completely if we flush it
|
||||
ps_http.deletion_queue_flush(execute=True)
|
||||
wait_until(10, 1, lambda: assert_deletion_queue(ps_http, lambda n: n == 0))
|
||||
|
||||
# Restart again
|
||||
env.pageserver.stop(immediate=True)
|
||||
env.pageserver.start()
|
||||
|
||||
# No deletion lists should be recovered: this demonstrates that deletion lists
|
||||
# were cleaned up after being executed.
|
||||
time.sleep(1)
|
||||
assert_deletion_queue(ps_http, lambda n: n == 0)
|
||||
|
||||
@@ -47,15 +47,6 @@ def test_tenant_delete_smoke(
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
# The deletion queue will complain when it encounters simulated S3 errors
|
||||
".*deletion frontend: Failed to write deletion list.*",
|
||||
".*deletion backend: Failed to delete deletion list.*",
|
||||
".*deletion executor: DeleteObjects request failed.*",
|
||||
".*deletion backend: Failed to upload deletion queue header.*",
|
||||
]
|
||||
)
|
||||
|
||||
# lucky race with stopping from flushing a layer we fail to schedule any uploads
|
||||
env.pageserver.allowed_errors.append(
|
||||
@@ -100,9 +91,7 @@ def test_tenant_delete_smoke(
|
||||
|
||||
iterations = poll_for_remote_storage_iterations(remote_storage_kind)
|
||||
|
||||
# We are running with failures enabled, so this may take some time to make
|
||||
# it through all the remote storage operations required to complete
|
||||
tenant_delete_wait_completed(ps_http, tenant_id, iterations * 10)
|
||||
tenant_delete_wait_completed(ps_http, tenant_id, iterations)
|
||||
|
||||
tenant_path = env.tenant_dir(tenant_id=tenant_id)
|
||||
assert not tenant_path.exists()
|
||||
@@ -212,17 +201,6 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
|
||||
]
|
||||
)
|
||||
|
||||
if simulate_failures:
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
# The deletion queue will complain when it encounters simulated S3 errors
|
||||
".*deletion frontend: Failed to write deletion list.*",
|
||||
".*deletion backend: Failed to delete deletion list.*",
|
||||
".*deletion executor: DeleteObjects request failed.*",
|
||||
".*deletion backend: Failed to upload deletion queue header.*",
|
||||
]
|
||||
)
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
timeline_id = env.neon_cli.create_timeline("delete", tenant_id=tenant_id)
|
||||
|
||||
@@ -488,14 +488,7 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
|
||||
# Wait for tenant to finish loading.
|
||||
wait_until_tenant_active(ps_http, tenant_id=env.initial_tenant, iterations=10, period=1)
|
||||
|
||||
# Timeline deletion takes some finite time after startup
|
||||
wait_timeline_detail_404(
|
||||
ps_http,
|
||||
tenant_id=env.initial_tenant,
|
||||
timeline_id=leaf_timeline_id,
|
||||
iterations=20,
|
||||
interval=0.5,
|
||||
)
|
||||
wait_timeline_detail_404(ps_http, env.initial_tenant, leaf_timeline_id, iterations=4)
|
||||
|
||||
assert (
|
||||
not leaf_timeline_path.exists()
|
||||
@@ -541,7 +534,7 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
|
||||
wait_until(
|
||||
2,
|
||||
0.5,
|
||||
lambda: assert_prefix_empty(neon_env_builder, prefix="/tenants"),
|
||||
lambda: assert_prefix_empty(neon_env_builder),
|
||||
)
|
||||
|
||||
|
||||
@@ -695,7 +688,7 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):
|
||||
wait_until(50, 0.1, first_request_finished)
|
||||
|
||||
# check that the timeline is gone
|
||||
wait_timeline_detail_404(ps_http, env.initial_tenant, child_timeline_id, iterations=4)
|
||||
wait_timeline_detail_404(ps_http, env.initial_tenant, child_timeline_id, iterations=2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -779,11 +772,7 @@ def test_timeline_delete_works_for_remote_smoke(
|
||||
|
||||
# for some reason the check above doesnt immediately take effect for the below.
|
||||
# Assume it is mock server inconsistency and check twice.
|
||||
wait_until(
|
||||
2,
|
||||
0.5,
|
||||
lambda: assert_prefix_empty(neon_env_builder, "/tenants"),
|
||||
)
|
||||
wait_until(2, 0.5, lambda: assert_prefix_empty(neon_env_builder))
|
||||
|
||||
|
||||
def test_delete_orphaned_objects(
|
||||
@@ -838,8 +827,6 @@ def test_delete_orphaned_objects(
|
||||
reason = timeline_info["state"]["Broken"]["reason"]
|
||||
assert reason.endswith(f"failpoint: {failpoint}"), reason
|
||||
|
||||
ps_http.deletion_queue_flush(execute=True)
|
||||
|
||||
for orphan in orphans:
|
||||
assert not orphan.exists()
|
||||
assert env.pageserver.log_contains(
|
||||
|
||||
@@ -15,6 +15,7 @@ publish = false
|
||||
[dependencies]
|
||||
anyhow = { version = "1", features = ["backtrace"] }
|
||||
axum = { version = "0.6", features = ["ws"] }
|
||||
base64 = { version = "0.21", features = ["alloc"] }
|
||||
bytes = { version = "1", features = ["serde"] }
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] }
|
||||
clap = { version = "4", features = ["derive", "string"] }
|
||||
@@ -44,14 +45,14 @@ regex = { version = "1" }
|
||||
regex-syntax = { version = "0.7" }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "multipart", "rustls-tls"] }
|
||||
ring = { version = "0.16", features = ["std"] }
|
||||
rustls = { version = "0.20", features = ["dangerous_configuration"] }
|
||||
rustls = { version = "0.21", features = ["dangerous_configuration"] }
|
||||
scopeguard = { version = "1" }
|
||||
serde = { version = "1", features = ["alloc", "derive"] }
|
||||
serde_json = { version = "1", features = ["raw_value"] }
|
||||
smallvec = { version = "1", default-features = false, features = ["write"] }
|
||||
socket2 = { version = "0.4", default-features = false, features = ["all"] }
|
||||
tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util"] }
|
||||
tokio-rustls = { version = "0.23" }
|
||||
tokio-rustls = { version = "0.24" }
|
||||
tokio-util = { version = "0.7", features = ["codec", "io"] }
|
||||
toml_datetime = { version = "0.6", default-features = false, features = ["serde"] }
|
||||
toml_edit = { version = "0.19", features = ["serde"] }
|
||||
@@ -74,7 +75,7 @@ prost = { version = "0.11" }
|
||||
regex = { version = "1" }
|
||||
regex-syntax = { version = "0.7" }
|
||||
serde = { version = "1", features = ["alloc", "derive"] }
|
||||
syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full", "visit", "visit-mut"] }
|
||||
syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] }
|
||||
syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full", "visit"] }
|
||||
syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit", "visit-mut"] }
|
||||
|
||||
### END HAKARI SECTION
|
||||
|
||||
Reference in New Issue
Block a user