Compare commits

..

4 Commits

Author SHA1 Message Date
Conrad Ludgate
87c793f58c log spec json and return parse error
dont cancel e2e test early
2024-10-04 11:37:48 +01:00
Conrad Ludgate
52a7d780ad test 2024-10-04 10:32:18 +01:00
Conrad Ludgate
2255a8ebac minor changes to local_proxy 2024-10-04 09:18:43 +01:00
Conrad Ludgate
e109d5aac0 add local_proxy to computespec 2024-10-04 09:16:21 +01:00
311 changed files with 3113 additions and 4821 deletions

View File

@@ -5,7 +5,9 @@
!Cargo.toml !Cargo.toml
!Makefile !Makefile
!rust-toolchain.toml !rust-toolchain.toml
!scripts/combine_control_files.py
!scripts/ninstall.sh !scripts/ninstall.sh
!vm-cgconfig.conf
!docker-compose/run-tests.sh !docker-compose/run-tests.sh
# Directories # Directories
@@ -15,12 +17,15 @@
!compute_tools/ !compute_tools/
!control_plane/ !control_plane/
!libs/ !libs/
!neon_local/
!pageserver/ !pageserver/
!patches/
!pgxn/ !pgxn/
!proxy/ !proxy/
!storage_scrubber/ !storage_scrubber/
!safekeeper/ !safekeeper/
!storage_broker/ !storage_broker/
!storage_controller/ !storage_controller/
!trace/
!vendor/postgres-*/ !vendor/postgres-*/
!workspace_hack/ !workspace_hack/

View File

@@ -33,7 +33,7 @@ jobs:
github-event-name: ${{ github.event_name }} github-event-name: ${{ github.event_name }}
cancel-previous-e2e-tests: cancel-previous-e2e-tests:
needs: [ check-permissions ] needs: [ check-permissions, promote-images, tag ]
if: github.event_name == 'pull_request' if: github.event_name == 'pull_request'
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
@@ -518,7 +518,7 @@ jobs:
trigger-e2e-tests: trigger-e2e-tests:
if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' }} if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' }}
needs: [ check-permissions, promote-images, tag ] needs: [ check-permissions, promote-images, tag, cancel-previous-e2e-tests ]
uses: ./.github/workflows/trigger-e2e-tests.yml uses: ./.github/workflows/trigger-e2e-tests.yml
secrets: inherit secrets: inherit

272
Cargo.lock generated
View File

@@ -666,6 +666,34 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "axum"
version = "0.6.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf"
dependencies = [
"async-trait",
"axum-core 0.3.4",
"bitflags 1.3.2",
"bytes",
"futures-util",
"http 0.2.9",
"http-body 0.4.5",
"hyper 0.14.30",
"itoa",
"matchit 0.7.0",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"sync_wrapper 0.1.2",
"tower",
"tower-layer",
"tower-service",
]
[[package]] [[package]]
name = "axum" name = "axum"
version = "0.7.5" version = "0.7.5"
@@ -673,7 +701,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"axum-core", "axum-core 0.4.5",
"base64 0.21.1", "base64 0.21.1",
"bytes", "bytes",
"futures-util", "futures-util",
@@ -703,6 +731,23 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "axum-core"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c"
dependencies = [
"async-trait",
"bytes",
"futures-util",
"http 0.2.9",
"http-body 0.4.5",
"mime",
"rustversion",
"tower-layer",
"tower-service",
]
[[package]] [[package]]
name = "axum-core" name = "axum-core"
version = "0.4.5" version = "0.4.5"
@@ -926,7 +971,7 @@ dependencies = [
"clang-sys", "clang-sys",
"itertools 0.12.1", "itertools 0.12.1",
"log", "log",
"prettyplease", "prettyplease 0.2.17",
"proc-macro2", "proc-macro2",
"quote", "quote",
"regex", "regex",
@@ -1220,7 +1265,6 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytes", "bytes",
"camino",
"cfg-if", "cfg-if",
"chrono", "chrono",
"clap", "clap",
@@ -2409,6 +2453,15 @@ dependencies = [
"digest", "digest",
] ]
[[package]]
name = "home"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
dependencies = [
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "hostname" name = "hostname"
version = "0.4.0" version = "0.4.0"
@@ -2603,15 +2656,14 @@ dependencies = [
[[package]] [[package]]
name = "hyper-timeout" name = "hyper-timeout"
version = "0.5.1" version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793" checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1"
dependencies = [ dependencies = [
"hyper 1.4.1", "hyper 0.14.30",
"hyper-util",
"pin-project-lite", "pin-project-lite",
"tokio", "tokio",
"tower-service", "tokio-io-timeout",
] ]
[[package]] [[package]]
@@ -3417,7 +3469,7 @@ dependencies = [
"opentelemetry-http", "opentelemetry-http",
"opentelemetry-proto", "opentelemetry-proto",
"opentelemetry_sdk", "opentelemetry_sdk",
"prost", "prost 0.13.3",
"reqwest 0.12.4", "reqwest 0.12.4",
"thiserror", "thiserror",
] ]
@@ -3430,8 +3482,8 @@ checksum = "30ee9f20bff9c984511a02f082dc8ede839e4a9bf15cc2487c8d6fea5ad850d9"
dependencies = [ dependencies = [
"opentelemetry", "opentelemetry",
"opentelemetry_sdk", "opentelemetry_sdk",
"prost", "prost 0.13.3",
"tonic", "tonic 0.12.3",
] ]
[[package]] [[package]]
@@ -4125,6 +4177,16 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "prettyplease"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86"
dependencies = [
"proc-macro2",
"syn 1.0.109",
]
[[package]] [[package]]
name = "prettyplease" name = "prettyplease"
version = "0.2.17" version = "0.2.17"
@@ -4195,6 +4257,16 @@ dependencies = [
"thiserror", "thiserror",
] ]
[[package]]
name = "prost"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd"
dependencies = [
"bytes",
"prost-derive 0.11.9",
]
[[package]] [[package]]
name = "prost" name = "prost"
version = "0.13.3" version = "0.13.3"
@@ -4202,28 +4274,42 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f"
dependencies = [ dependencies = [
"bytes", "bytes",
"prost-derive", "prost-derive 0.13.3",
] ]
[[package]] [[package]]
name = "prost-build" name = "prost-build"
version = "0.13.3" version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270"
dependencies = [ dependencies = [
"bytes", "bytes",
"heck 0.5.0", "heck 0.4.1",
"itertools 0.12.1", "itertools 0.10.5",
"lazy_static",
"log", "log",
"multimap", "multimap",
"once_cell",
"petgraph", "petgraph",
"prettyplease", "prettyplease 0.1.25",
"prost", "prost 0.11.9",
"prost-types", "prost-types",
"regex", "regex",
"syn 2.0.52", "syn 1.0.109",
"tempfile", "tempfile",
"which",
]
[[package]]
name = "prost-derive"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4"
dependencies = [
"anyhow",
"itertools 0.10.5",
"proc-macro2",
"quote",
"syn 1.0.109",
] ]
[[package]] [[package]]
@@ -4241,11 +4327,11 @@ dependencies = [
[[package]] [[package]]
name = "prost-types" name = "prost-types"
version = "0.13.3" version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13"
dependencies = [ dependencies = [
"prost", "prost 0.11.9",
] ]
[[package]] [[package]]
@@ -5007,21 +5093,6 @@ dependencies = [
"zeroize", "zeroize",
] ]
[[package]]
name = "rustls"
version = "0.23.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebbbdb961df0ad3f2652da8f3fdc4b36122f568f968f45ad3316f26c025c677b"
dependencies = [
"log",
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki 0.102.2",
"subtle",
"zeroize",
]
[[package]] [[package]]
name = "rustls-native-certs" name = "rustls-native-certs"
version = "0.6.2" version = "0.6.2"
@@ -5047,19 +5118,6 @@ dependencies = [
"security-framework", "security-framework",
] ]
[[package]]
name = "rustls-native-certs"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a"
dependencies = [
"openssl-probe",
"rustls-pemfile 2.1.1",
"rustls-pki-types",
"schannel",
"security-framework",
]
[[package]] [[package]]
name = "rustls-pemfile" name = "rustls-pemfile"
version = "1.0.2" version = "1.0.2"
@@ -5135,7 +5193,6 @@ dependencies = [
"fail", "fail",
"futures", "futures",
"hex", "hex",
"http 1.1.0",
"humantime", "humantime",
"hyper 0.14.30", "hyper 0.14.30",
"metrics", "metrics",
@@ -5692,22 +5749,19 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-stream", "async-stream",
"bytes",
"clap", "clap",
"const_format", "const_format",
"futures", "futures",
"futures-core", "futures-core",
"futures-util", "futures-util",
"http-body-util",
"humantime", "humantime",
"hyper 1.4.1", "hyper 0.14.30",
"hyper-util",
"metrics", "metrics",
"once_cell", "once_cell",
"parking_lot 0.12.1", "parking_lot 0.12.1",
"prost", "prost 0.11.9",
"tokio", "tokio",
"tonic", "tonic 0.9.2",
"tonic-build", "tonic-build",
"tracing", "tracing",
"utils", "utils",
@@ -6251,17 +6305,6 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "tokio-rustls"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
dependencies = [
"rustls 0.23.7",
"rustls-pki-types",
"tokio",
]
[[package]] [[package]]
name = "tokio-stream" name = "tokio-stream"
version = "0.1.16" version = "0.1.16"
@@ -6353,30 +6396,29 @@ dependencies = [
[[package]] [[package]]
name = "tonic" name = "tonic"
version = "0.12.3" version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"async-trait", "async-trait",
"axum", "axum 0.6.20",
"base64 0.22.1", "base64 0.21.1",
"bytes", "bytes",
"h2 0.4.4", "futures-core",
"http 1.1.0", "futures-util",
"http-body 1.0.0", "h2 0.3.26",
"http-body-util", "http 0.2.9",
"hyper 1.4.1", "http-body 0.4.5",
"hyper 0.14.30",
"hyper-timeout", "hyper-timeout",
"hyper-util",
"percent-encoding", "percent-encoding",
"pin-project", "pin-project",
"prost", "prost 0.11.9",
"rustls-native-certs 0.8.0", "rustls-native-certs 0.6.2",
"rustls-pemfile 2.1.1", "rustls-pemfile 1.0.2",
"socket2",
"tokio", "tokio",
"tokio-rustls 0.26.0", "tokio-rustls 0.24.0",
"tokio-stream", "tokio-stream",
"tower", "tower",
"tower-layer", "tower-layer",
@@ -6385,17 +6427,37 @@ dependencies = [
] ]
[[package]] [[package]]
name = "tonic-build" name = "tonic"
version = "0.12.3" version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
dependencies = [ dependencies = [
"prettyplease", "async-trait",
"base64 0.22.1",
"bytes",
"http 1.1.0",
"http-body 1.0.0",
"http-body-util",
"percent-encoding",
"pin-project",
"prost 0.13.3",
"tokio-stream",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tonic-build"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6fdaae4c2c638bb70fe42803a26fbd6fc6ac8c72f5c59f67ecc2a2dcabf4b07"
dependencies = [
"prettyplease 0.1.25",
"proc-macro2", "proc-macro2",
"prost-build", "prost-build",
"prost-types",
"quote", "quote",
"syn 2.0.52", "syn 1.0.109",
] ]
[[package]] [[package]]
@@ -6801,7 +6863,7 @@ name = "vm_monitor"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"axum", "axum 0.7.5",
"cgroups-rs", "cgroups-rs",
"clap", "clap",
"futures", "futures",
@@ -7032,6 +7094,18 @@ dependencies = [
"rustls-pki-types", "rustls-pki-types",
] ]
[[package]]
name = "which"
version = "4.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
dependencies = [
"either",
"home",
"once_cell",
"rustix",
]
[[package]] [[package]]
name = "whoami" name = "whoami"
version = "1.5.1" version = "1.5.1"
@@ -7260,10 +7334,15 @@ version = "0.1.0"
dependencies = [ dependencies = [
"ahash", "ahash",
"anyhow", "anyhow",
"axum", "aws-config",
"axum-core", "aws-runtime",
"aws-sigv4",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-types",
"base64 0.21.1", "base64 0.21.1",
"base64ct", "base64ct",
"bitflags 2.4.1",
"bytes", "bytes",
"camino", "camino",
"cc", "cc",
@@ -7291,6 +7370,7 @@ dependencies = [
"hyper 1.4.1", "hyper 1.4.1",
"hyper-util", "hyper-util",
"indexmap 1.9.3", "indexmap 1.9.3",
"itertools 0.10.5",
"itertools 0.12.1", "itertools 0.12.1",
"lazy_static", "lazy_static",
"libc", "libc",
@@ -7302,15 +7382,15 @@ dependencies = [
"num-traits", "num-traits",
"once_cell", "once_cell",
"parquet", "parquet",
"prettyplease",
"proc-macro2", "proc-macro2",
"prost", "prost 0.11.9",
"quote", "quote",
"rand 0.8.5", "rand 0.8.5",
"regex", "regex",
"regex-automata 0.4.3", "regex-automata 0.4.3",
"regex-syntax 0.8.2", "regex-syntax 0.8.2",
"reqwest 0.12.4", "reqwest 0.12.4",
"rustls 0.21.11",
"scopeguard", "scopeguard",
"serde", "serde",
"serde_json", "serde_json",
@@ -7326,14 +7406,14 @@ dependencies = [
"time", "time",
"time-macros", "time-macros",
"tokio", "tokio",
"tokio-stream", "tokio-rustls 0.24.0",
"tokio-util", "tokio-util",
"toml_edit", "toml_edit",
"tonic",
"tower", "tower",
"tracing", "tracing",
"tracing-core", "tracing-core",
"url", "url",
"uuid",
"zeroize", "zeroize",
"zstd", "zstd",
"zstd-safe", "zstd-safe",

View File

@@ -53,7 +53,7 @@ azure_storage_blobs = { version = "0.19", default-features = false, features = [
flate2 = "1.0.26" flate2 = "1.0.26"
async-stream = "0.3" async-stream = "0.3"
async-trait = "0.1" async-trait = "0.1"
aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] } aws-config = { version = "1.5", default-features = false, features=["rustls"] }
aws-sdk-s3 = "1.52" aws-sdk-s3 = "1.52"
aws-sdk-iam = "1.46.0" aws-sdk-iam = "1.46.0"
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] } aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
@@ -130,7 +130,7 @@ pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
pin-project-lite = "0.2" pin-project-lite = "0.2"
procfs = "0.16" procfs = "0.16"
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
prost = "0.13" prost = "0.11"
rand = "0.8" rand = "0.8"
redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] } redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
regex = "1.10.2" regex = "1.10.2"
@@ -178,7 +178,7 @@ tokio-tar = "0.3"
tokio-util = { version = "0.7.10", features = ["io", "rt"] } tokio-util = { version = "0.7.10", features = ["io", "rt"] }
toml = "0.8" toml = "0.8"
toml_edit = "0.22" toml_edit = "0.22"
tonic = {version = "0.12.3", features = ["tls", "tls-roots"]} tonic = {version = "0.9", features = ["tls", "tls-roots"]}
tower-service = "0.3.2" tower-service = "0.3.2"
tracing = "0.1" tracing = "0.1"
tracing-error = "0.2" tracing-error = "0.2"
@@ -246,7 +246,7 @@ criterion = "0.5.1"
rcgen = "0.12" rcgen = "0.12"
rstest = "0.18" rstest = "0.18"
camino-tempfile = "1.0.2" camino-tempfile = "1.0.2"
tonic-build = "0.12" tonic-build = "0.9"
[patch.crates-io] [patch.crates-io]

View File

@@ -168,27 +168,27 @@ postgres-check-%: postgres-%
neon-pg-ext-%: postgres-% neon-pg-ext-%: postgres-%
+@echo "Compiling neon $*" +@echo "Compiling neon $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \ -C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install -f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
+@echo "Compiling neon_walredo $*" +@echo "Compiling neon_walredo $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \ -C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install -f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
+@echo "Compiling neon_rmgr $*" +@echo "Compiling neon_rmgr $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \ -C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install -f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
+@echo "Compiling neon_test_utils $*" +@echo "Compiling neon_test_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \ -C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install -f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
+@echo "Compiling neon_utils $*" +@echo "Compiling neon_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \ -C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install -f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
@@ -220,7 +220,7 @@ neon-pg-clean-ext-%:
walproposer-lib: neon-pg-ext-v17 walproposer-lib: neon-pg-ext-v17
+@echo "Compiling walproposer-lib" +@echo "Compiling walproposer-lib"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \ -C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib -f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
@@ -333,7 +333,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
# Indent pxgn/neon. # Indent pxgn/neon.
.PHONY: neon-pgindent .PHONY: neon-pgindent
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17 neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \ FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \ INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \ PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \

View File

@@ -1075,20 +1075,6 @@ RUN set -e \
&& make -j $(nproc) dist_man_MANS= \ && make -j $(nproc) dist_man_MANS= \
&& make install dist_man_MANS= && make install dist_man_MANS=
#########################################################################################
#
# Compile the Neon-specific `local_proxy` binary
#
#########################################################################################
FROM $REPOSITORY/$IMAGE:$TAG AS local_proxy
ARG BUILD_TAG
ENV BUILD_TAG=$BUILD_TAG
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin local_proxy
######################################################################################### #########################################################################################
# #
# Layers "postgres-exporter" and "sql-exporter" # Layers "postgres-exporter" and "sql-exporter"
@@ -1227,10 +1213,6 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
# local_proxy and its config
COPY --from=local_proxy --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
# Metrics exporter binaries and configuration files # Metrics exporter binaries and configuration files
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter

View File

@@ -19,10 +19,6 @@ commands:
user: postgres user: postgres
sysvInitAction: respawn sysvInitAction: respawn
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini' shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
- name: local_proxy
user: postgres
sysvInitAction: respawn
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
- name: postgres-exporter - name: postgres-exporter
user: nobody user: nobody
sysvInitAction: respawn sysvInitAction: respawn

View File

@@ -11,7 +11,7 @@ testing = []
[dependencies] [dependencies]
anyhow.workspace = true anyhow.workspace = true
camino.workspace = true # camino.workspace = true
chrono.workspace = true chrono.workspace = true
cfg-if.workspace = true cfg-if.workspace = true
clap.workspace = true clap.workspace = true

View File

@@ -402,7 +402,8 @@ fn start_postgres(
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> { ) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
// We got all we need, update the state. // We got all we need, update the state.
let mut state = compute.state.lock().unwrap(); let mut state = compute.state.lock().unwrap();
state.set_status(ComputeStatus::Init, &compute.state_changed); state.status = ComputeStatus::Init;
compute.state_changed.notify_all();
info!( info!(
"running compute with features: {:?}", "running compute with features: {:?}",

View File

@@ -34,7 +34,6 @@ use nix::sys::signal::{kill, Signal};
use remote_storage::{DownloadError, RemotePath}; use remote_storage::{DownloadError, RemotePath};
use crate::checker::create_availability_check_data; use crate::checker::create_availability_check_data;
use crate::local_proxy;
use crate::logger::inlinify; use crate::logger::inlinify;
use crate::pg_helpers::*; use crate::pg_helpers::*;
use crate::spec::*; use crate::spec::*;
@@ -109,18 +108,6 @@ impl ComputeState {
metrics: ComputeMetrics::default(), metrics: ComputeMetrics::default(),
} }
} }
pub fn set_status(&mut self, status: ComputeStatus, state_changed: &Condvar) {
let prev = self.status;
info!("Changing compute status from {} to {}", prev, status);
self.status = status;
state_changed.notify_all();
}
pub fn set_failed_status(&mut self, err: anyhow::Error, state_changed: &Condvar) {
self.error = Some(format!("{err:?}"));
self.set_status(ComputeStatus::Failed, state_changed);
}
} }
impl Default for ComputeState { impl Default for ComputeState {
@@ -315,12 +302,15 @@ impl ComputeNode {
pub fn set_status(&self, status: ComputeStatus) { pub fn set_status(&self, status: ComputeStatus) {
let mut state = self.state.lock().unwrap(); let mut state = self.state.lock().unwrap();
state.set_status(status, &self.state_changed); state.status = status;
self.state_changed.notify_all();
} }
pub fn set_failed_status(&self, err: anyhow::Error) { pub fn set_failed_status(&self, err: anyhow::Error) {
let mut state = self.state.lock().unwrap(); let mut state = self.state.lock().unwrap();
state.set_failed_status(err, &self.state_changed); state.error = Some(format!("{err:?}"));
state.status = ComputeStatus::Failed;
self.state_changed.notify_all();
} }
pub fn get_status(&self) -> ComputeStatus { pub fn get_status(&self) -> ComputeStatus {
@@ -890,19 +880,12 @@ impl ComputeNode {
.context("apply_config handle_grants")?; .context("apply_config handle_grants")?;
handle_extensions(spec, &mut client).context("apply_config handle_extensions")?; handle_extensions(spec, &mut client).context("apply_config handle_extensions")?;
handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?; handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?;
handle_jwt_extension(spec, &mut client, connstr.as_str())
.context("apply_config handle_jwt_extension")?;
create_availability_check_data(&mut client) create_availability_check_data(&mut client)
.context("apply_config create_availability_check_data")?; .context("apply_config create_availability_check_data")?;
// 'Close' connection // 'Close' connection
drop(client); drop(client);
if let Some(ref local_proxy) = spec.local_proxy_config {
info!("configuring local_proxy");
local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
}
// Run migrations separately to not hold up cold starts // Run migrations separately to not hold up cold starts
thread::spawn(move || { thread::spawn(move || {
let mut connstr = connstr.clone(); let mut connstr = connstr.clone();
@@ -953,19 +936,6 @@ impl ComputeNode {
}); });
} }
if let Some(ref local_proxy) = spec.local_proxy_config {
info!("configuring local_proxy");
// Spawn a thread to do the configuration,
// so that we don't block the main thread that starts Postgres.
let local_proxy = local_proxy.clone();
let _handle = Some(thread::spawn(move || {
if let Err(err) = local_proxy::configure(&local_proxy) {
error!("error while configuring local_proxy: {err:?}");
}
}));
}
// Write new config // Write new config
let pgdata_path = Path::new(&self.pgdata); let pgdata_path = Path::new(&self.pgdata);
let postgresql_conf_path = pgdata_path.join("postgresql.conf"); let postgresql_conf_path = pgdata_path.join("postgresql.conf");
@@ -994,7 +964,6 @@ impl ComputeNode {
)?; )?;
handle_extensions(&spec, &mut client)?; handle_extensions(&spec, &mut client)?;
handle_extension_neon(&mut client)?; handle_extension_neon(&mut client)?;
handle_jwt_extension(&spec, &mut client, self.connstr.as_str())?;
// We can skip handle_migrations here because a new migration can only appear // We can skip handle_migrations here because a new migration can only appear
// if we have a new version of the compute_ctl binary, which can only happen // if we have a new version of the compute_ctl binary, which can only happen
// if compute got restarted, in which case we'll end up inside of apply_config // if compute got restarted, in which case we'll end up inside of apply_config
@@ -1054,19 +1023,6 @@ impl ComputeNode {
}); });
} }
if let Some(local_proxy) = &pspec.spec.local_proxy_config {
info!("configuring local_proxy");
// Spawn a thread to do the configuration,
// so that we don't block the main thread that starts Postgres.
let local_proxy = local_proxy.clone();
let _handle = thread::spawn(move || {
if let Err(err) = local_proxy::configure(&local_proxy) {
error!("error while configuring local_proxy: {err:?}");
}
});
}
info!( info!(
"start_compute spec.remote_extensions {:?}", "start_compute spec.remote_extensions {:?}",
pspec.spec.remote_extensions pspec.spec.remote_extensions

View File

@@ -24,7 +24,8 @@ fn configurator_main_loop(compute: &Arc<ComputeNode>) {
// Re-check the status after waking up // Re-check the status after waking up
if state.status == ComputeStatus::ConfigurationPending { if state.status == ComputeStatus::ConfigurationPending {
info!("got configuration request"); info!("got configuration request");
state.set_status(ComputeStatus::Configuration, &compute.state_changed); state.status = ComputeStatus::Configuration;
compute.state_changed.notify_all();
drop(state); drop(state);
let mut new_status = ComputeStatus::Failed; let mut new_status = ComputeStatus::Failed;

View File

@@ -264,7 +264,8 @@ async fn handle_configure_request(
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap(); let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap(); let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) { match serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
Ok(request) => {
let spec = request.spec; let spec = request.spec;
let parsed_spec = match ParsedSpec::try_from(spec) { let parsed_spec = match ParsedSpec::try_from(spec) {
@@ -288,7 +289,8 @@ async fn handle_configure_request(
return Err((msg, StatusCode::PRECONDITION_FAILED)); return Err((msg, StatusCode::PRECONDITION_FAILED));
} }
state.pspec = Some(parsed_spec); state.pspec = Some(parsed_spec);
state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed); state.status = ComputeStatus::ConfigurationPending;
compute.state_changed.notify_all();
drop(state); drop(state);
info!("set new spec and notified waiters"); info!("set new spec and notified waiters");
} }
@@ -323,8 +325,11 @@ async fn handle_configure_request(
let state = compute.state.lock().unwrap().clone(); let state = compute.state.lock().unwrap().clone();
let status_response = status_response_from_state(&state); let status_response = status_response_from_state(&state);
Ok(serde_json::to_string(&status_response).unwrap()) Ok(serde_json::to_string(&status_response).unwrap())
} else { }
Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST)) Err(err) => {
error!("could not parse spec: {spec_raw}");
Err((format!("invalid spec: {err:?}"), StatusCode::BAD_REQUEST))
}
} }
} }
@@ -361,15 +366,15 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
} }
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running { if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
let msg = format!( let msg = format!(
"invalid compute status for termination request: {}", "invalid compute status for termination request: {:?}",
state.status state.status.clone()
); );
return Err((msg, StatusCode::PRECONDITION_FAILED)); return Err((msg, StatusCode::PRECONDITION_FAILED));
} }
state.set_status(ComputeStatus::TerminationPending, &compute.state_changed); state.status = ComputeStatus::TerminationPending;
compute.state_changed.notify_all();
drop(state); drop(state);
} }
forward_termination_signal(); forward_termination_signal();
info!("sent signal and notified waiters"); info!("sent signal and notified waiters");
@@ -383,8 +388,7 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
while state.status != ComputeStatus::Terminated { while state.status != ComputeStatus::Terminated {
state = c.state_changed.wait(state).unwrap(); state = c.state_changed.wait(state).unwrap();
info!( info!(
"waiting for compute to become {}, current status: {:?}", "waiting for compute to become Terminated, current status: {:?}",
ComputeStatus::Terminated,
state.status state.status
); );
} }

View File

@@ -15,7 +15,7 @@ pub mod catalog;
pub mod compute; pub mod compute;
pub mod disk_quota; pub mod disk_quota;
pub mod extension_server; pub mod extension_server;
pub mod local_proxy; // pub mod local_proxy;
pub mod lsn_lease; pub mod lsn_lease;
mod migration; mod migration;
pub mod monitor; pub mod monitor;

View File

@@ -1,4 +1,3 @@
use std::collections::HashSet;
use std::fs::File; use std::fs::File;
use std::path::Path; use std::path::Path;
use std::str::FromStr; use std::str::FromStr;
@@ -190,15 +189,6 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let mut xact = client.transaction()?; let mut xact = client.transaction()?;
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?; let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
let mut jwks_roles = HashSet::new();
if let Some(local_proxy) = &spec.local_proxy_config {
for jwks_setting in local_proxy.jwks.iter().flatten() {
for role_name in &jwks_setting.role_names {
jwks_roles.insert(role_name.clone());
}
}
}
// Print a list of existing Postgres roles (only in debug mode) // Print a list of existing Postgres roles (only in debug mode)
if span_enabled!(Level::INFO) { if span_enabled!(Level::INFO) {
let mut vec = Vec::new(); let mut vec = Vec::new();
@@ -318,9 +308,6 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser", "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
name.pg_quote() name.pg_quote()
); );
if jwks_roles.contains(name.as_str()) {
query = format!("CREATE ROLE {}", name.pg_quote());
}
info!("running role create query: '{}'", &query); info!("running role create query: '{}'", &query);
query.push_str(&role.to_pg_options()); query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?; xact.execute(query.as_str(), &[])?;
@@ -731,48 +718,7 @@ pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()>
client.simple_query(query)?; client.simple_query(query)?;
} }
} }
Ok(())
}
/// Create pg_session_jwt in all databases if configured
#[instrument(skip_all)]
pub fn handle_jwt_extension(spec: &ComputeSpec, client: &mut Client, connstr: &str) -> Result<()> {
if let Some(local_proxy) = &spec.local_proxy_config {
if let Some(jwks_list) = &local_proxy.jwks {
if !jwks_list.is_empty() {
info!("enabling pg_session_jwt extension");
let existing_dbs = get_existing_dbs(client)?;
for db in &spec.cluster.databases {
match existing_dbs.get(&db.name) {
Some(pg_db) => {
if pg_db.restrict_conn || pg_db.invalid {
info!(
"skipping extension for db {} (invalid: {}, connections not allowed: {})",
db.name, pg_db.invalid, pg_db.restrict_conn
);
continue;
}
}
None => {
bail!(
"database {} doesn't exist in Postgres after handle_databases()",
db.name
);
}
}
let mut conf = Config::from_str(connstr)?;
conf.dbname(&db.name);
let mut db_client = conf.connect(NoTls)?;
let query = "CREATE EXTENSION IF NOT EXISTS pg_session_jwt";
info!("creating pg_session_jwt extension with query: {}", query);
db_client.simple_query(query)?;
}
}
}
}
Ok(()) Ok(())
} }

View File

@@ -1,7 +1,5 @@
//! Structs representing the JSON formats used in the compute_ctl's HTTP API. //! Structs representing the JSON formats used in the compute_ctl's HTTP API.
use std::fmt::Display;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize, Serializer}; use serde::{Deserialize, Serialize, Serializer};
@@ -60,21 +58,6 @@ pub enum ComputeStatus {
Terminated, Terminated,
} }
impl Display for ComputeStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ComputeStatus::Empty => f.write_str("empty"),
ComputeStatus::ConfigurationPending => f.write_str("configuration-pending"),
ComputeStatus::Init => f.write_str("init"),
ComputeStatus::Running => f.write_str("running"),
ComputeStatus::Configuration => f.write_str("configuration"),
ComputeStatus::Failed => f.write_str("failed"),
ComputeStatus::TerminationPending => f.write_str("termination-pending"),
ComputeStatus::Terminated => f.write_str("terminated"),
}
}
}
fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error> fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
where where
S: Serializer, S: Serializer,

View File

@@ -109,6 +109,7 @@ pub struct ComputeSpec {
/// Local Proxy configuration used for JWT authentication /// Local Proxy configuration used for JWT authentication
#[serde(default)] #[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub local_proxy_config: Option<LocalProxySpec>, pub local_proxy_config: Option<LocalProxySpec>,
} }
@@ -282,13 +283,11 @@ pub struct GenericOption {
/// declare a `trait` on it. /// declare a `trait` on it.
pub type GenericOptions = Option<Vec<GenericOption>>; pub type GenericOptions = Option<Vec<GenericOption>>;
/// Configured the local_proxy application with the relevant JWKS and roles it should /// Configured the local-proxy application with the relevant JWKS and roles it should
/// use for authorizing connect requests using JWT. /// use for authorizing connect requests using JWT.
#[derive(Clone, Debug, Deserialize, Serialize)] #[derive(Clone, Debug, Deserialize, Serialize)]
pub struct LocalProxySpec { pub struct LocalProxySpec {
#[serde(default)] pub jwks: Vec<JwksSettings>,
#[serde(skip_serializing_if = "Option::is_none")]
pub jwks: Option<Vec<JwksSettings>>,
} }
#[derive(Clone, Debug, Deserialize, Serialize)] #[derive(Clone, Debug, Deserialize, Serialize)]

View File

@@ -14,7 +14,7 @@ use std::time::SystemTime;
use super::REMOTE_STORAGE_PREFIX_SEPARATOR; use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
use anyhow::Result; use anyhow::Result;
use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range}; use azure_core::request_options::{MaxResults, Metadata, Range};
use azure_core::{Continuable, RetryOptions}; use azure_core::{Continuable, RetryOptions};
use azure_identity::DefaultAzureCredential; use azure_identity::DefaultAzureCredential;
use azure_storage::StorageCredentials; use azure_storage::StorageCredentials;
@@ -33,10 +33,10 @@ use tracing::debug;
use utils::backoff; use utils::backoff;
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind}; use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
use crate::ListingObject;
use crate::{ use crate::{
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError, config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError, Listing,
DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata, ListingMode, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
TimeTravelError, TimeoutOrCancel,
}; };
pub struct AzureBlobStorage { pub struct AzureBlobStorage {
@@ -259,7 +259,6 @@ fn to_download_error(error: azure_core::Error) -> DownloadError {
if let Some(http_err) = error.as_http_error() { if let Some(http_err) = error.as_http_error() {
match http_err.status() { match http_err.status() {
StatusCode::NotFound => DownloadError::NotFound, StatusCode::NotFound => DownloadError::NotFound,
StatusCode::NotModified => DownloadError::Unmodified,
StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)), StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),
_ => DownloadError::Other(anyhow::Error::new(error)), _ => DownloadError::Other(anyhow::Error::new(error)),
} }
@@ -485,23 +484,32 @@ impl RemoteStorage for AzureBlobStorage {
async fn download( async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
opts: &DownloadOpts, cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
let blob_client = self.client.blob_client(self.relative_path_to_name(from));
let builder = blob_client.get();
self.download_for_builder(builder, cancel).await
}
async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError> { ) -> Result<Download, DownloadError> {
let blob_client = self.client.blob_client(self.relative_path_to_name(from)); let blob_client = self.client.blob_client(self.relative_path_to_name(from));
let mut builder = blob_client.get(); let mut builder = blob_client.get();
if let Some(ref etag) = opts.etag { let range: Range = if let Some(end_exclusive) = end_exclusive {
builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string())) (start_inclusive..end_exclusive).into()
} } else {
(start_inclusive..).into()
if let Some((start, end)) = opts.byte_range() { };
builder = builder.range(match end { builder = builder.range(range);
Some(end) => Range::Range(start..end),
None => Range::RangeFrom(start..),
});
}
self.download_for_builder(builder, cancel).await self.download_for_builder(builder, cancel).await
} }

View File

@@ -5,8 +5,6 @@ pub enum DownloadError {
BadInput(anyhow::Error), BadInput(anyhow::Error),
/// The file was not found in the remote storage. /// The file was not found in the remote storage.
NotFound, NotFound,
/// The caller provided an ETag, and the file was not modified.
Unmodified,
/// A cancellation token aborted the download, typically during /// A cancellation token aborted the download, typically during
/// tenant detach or process shutdown. /// tenant detach or process shutdown.
Cancelled, Cancelled,
@@ -26,7 +24,6 @@ impl std::fmt::Display for DownloadError {
write!(f, "Failed to download a remote file due to user input: {e}") write!(f, "Failed to download a remote file due to user input: {e}")
} }
DownloadError::NotFound => write!(f, "No file found for the remote object id given"), DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
DownloadError::Unmodified => write!(f, "File was not modified"),
DownloadError::Cancelled => write!(f, "Cancelled, shutting down"), DownloadError::Cancelled => write!(f, "Cancelled, shutting down"),
DownloadError::Timeout => write!(f, "timeout"), DownloadError::Timeout => write!(f, "timeout"),
DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"), DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
@@ -41,7 +38,7 @@ impl DownloadError {
pub fn is_permanent(&self) -> bool { pub fn is_permanent(&self) -> bool {
use DownloadError::*; use DownloadError::*;
match self { match self {
BadInput(_) | NotFound | Unmodified | Cancelled => true, BadInput(_) | NotFound | Cancelled => true,
Timeout | Other(_) => false, Timeout | Other(_) => false,
} }
} }

View File

@@ -19,8 +19,7 @@ mod simulate_failures;
mod support; mod support;
use std::{ use std::{
collections::HashMap, fmt::Debug, num::NonZeroU32, ops::Bound, pin::Pin, sync::Arc, collections::HashMap, fmt::Debug, num::NonZeroU32, pin::Pin, sync::Arc, time::SystemTime,
time::SystemTime,
}; };
use anyhow::Context; use anyhow::Context;
@@ -162,63 +161,6 @@ pub struct Listing {
pub keys: Vec<ListingObject>, pub keys: Vec<ListingObject>,
} }
/// Options for downloads. The default value is a plain GET.
pub struct DownloadOpts {
/// If given, returns [`DownloadError::Unmodified`] if the object still has
/// the same ETag (using If-None-Match).
pub etag: Option<Etag>,
/// The start of the byte range to download, or unbounded.
pub byte_start: Bound<u64>,
/// The end of the byte range to download, or unbounded. Must be after the
/// start bound.
pub byte_end: Bound<u64>,
}
impl Default for DownloadOpts {
fn default() -> Self {
Self {
etag: Default::default(),
byte_start: Bound::Unbounded,
byte_end: Bound::Unbounded,
}
}
}
impl DownloadOpts {
/// Returns the byte range with inclusive start and exclusive end, or None
/// if unbounded.
pub fn byte_range(&self) -> Option<(u64, Option<u64>)> {
if self.byte_start == Bound::Unbounded && self.byte_end == Bound::Unbounded {
return None;
}
let start = match self.byte_start {
Bound::Excluded(i) => i + 1,
Bound::Included(i) => i,
Bound::Unbounded => 0,
};
let end = match self.byte_end {
Bound::Excluded(i) => Some(i),
Bound::Included(i) => Some(i + 1),
Bound::Unbounded => None,
};
if let Some(end) = end {
assert!(start < end, "range end {end} at or before start {start}");
}
Some((start, end))
}
/// Returns the byte range as an RFC 2616 Range header value with inclusive
/// bounds, or None if unbounded.
pub fn byte_range_header(&self) -> Option<String> {
self.byte_range()
.map(|(start, end)| (start, end.map(|end| end - 1))) // make end inclusive
.map(|(start, end)| match end {
Some(end) => format!("bytes={start}-{end}"),
None => format!("bytes={start}-"),
})
}
}
/// Storage (potentially remote) API to manage its state. /// Storage (potentially remote) API to manage its state.
/// This storage tries to be unaware of any layered repository context, /// This storage tries to be unaware of any layered repository context,
/// providing basic CRUD operations for storage files. /// providing basic CRUD operations for storage files.
@@ -303,7 +245,21 @@ pub trait RemoteStorage: Send + Sync + 'static {
async fn download( async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
opts: &DownloadOpts, cancel: &CancellationToken,
) -> Result<Download, DownloadError>;
/// Streams a given byte range of the remote storage entry contents.
///
/// The returned download stream will obey initial timeout and cancellation signal by erroring
/// on whichever happens first. Only one of the reasons will fail the stream, which is usually
/// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
///
/// Returns the metadata, if any was stored with the file previously.
async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError>; ) -> Result<Download, DownloadError>;
@@ -445,18 +401,43 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
} }
} }
/// See [`RemoteStorage::download`]
pub async fn download( pub async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError> { ) -> Result<Download, DownloadError> {
match self { match self {
Self::LocalFs(s) => s.download(from, opts, cancel).await, Self::LocalFs(s) => s.download(from, cancel).await,
Self::AwsS3(s) => s.download(from, opts, cancel).await, Self::AwsS3(s) => s.download(from, cancel).await,
Self::AzureBlob(s) => s.download(from, opts, cancel).await, Self::AzureBlob(s) => s.download(from, cancel).await,
Self::Unreliable(s) => s.download(from, opts, cancel).await, Self::Unreliable(s) => s.download(from, cancel).await,
}
}
pub async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
match self {
Self::LocalFs(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
Self::AwsS3(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
Self::AzureBlob(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
Self::Unreliable(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
} }
} }
@@ -581,6 +562,20 @@ impl GenericRemoteStorage {
}) })
} }
/// Downloads the storage object into the `to_path` provided.
/// `byte_range` could be specified to dowload only a part of the file, if needed.
pub async fn download_storage_object(
&self,
byte_range: Option<(u64, Option<u64>)>,
from: &RemotePath,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
match byte_range {
Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
None => self.download(from, cancel).await,
}
}
/// The name of the bucket/container/etc. /// The name of the bucket/container/etc.
pub fn bucket_name(&self) -> Option<&str> { pub fn bucket_name(&self) -> Option<&str> {
match self { match self {
@@ -654,76 +649,6 @@ impl ConcurrencyLimiter {
mod tests { mod tests {
use super::*; use super::*;
/// DownloadOpts::byte_range() should generate (inclusive, exclusive) ranges
/// with optional end bound, or None when unbounded.
#[test]
fn download_opts_byte_range() {
// Consider using test_case or a similar table-driven test framework.
let cases = [
// (byte_start, byte_end, expected)
(Bound::Unbounded, Bound::Unbounded, None),
(Bound::Unbounded, Bound::Included(7), Some((0, Some(8)))),
(Bound::Unbounded, Bound::Excluded(7), Some((0, Some(7)))),
(Bound::Included(3), Bound::Unbounded, Some((3, None))),
(Bound::Included(3), Bound::Included(7), Some((3, Some(8)))),
(Bound::Included(3), Bound::Excluded(7), Some((3, Some(7)))),
(Bound::Excluded(3), Bound::Unbounded, Some((4, None))),
(Bound::Excluded(3), Bound::Included(7), Some((4, Some(8)))),
(Bound::Excluded(3), Bound::Excluded(7), Some((4, Some(7)))),
// 1-sized ranges are fine, 0 aren't and will panic (separate test).
(Bound::Included(3), Bound::Included(3), Some((3, Some(4)))),
(Bound::Included(3), Bound::Excluded(4), Some((3, Some(4)))),
];
for (byte_start, byte_end, expect) in cases {
let opts = DownloadOpts {
byte_start,
byte_end,
..Default::default()
};
let result = opts.byte_range();
assert_eq!(
result, expect,
"byte_start={byte_start:?} byte_end={byte_end:?}"
);
// Check generated HTTP header, which uses an inclusive range.
let expect_header = expect.map(|(start, end)| match end {
Some(end) => format!("bytes={start}-{}", end - 1), // inclusive end
None => format!("bytes={start}-"),
});
assert_eq!(
opts.byte_range_header(),
expect_header,
"byte_start={byte_start:?} byte_end={byte_end:?}"
);
}
}
/// DownloadOpts::byte_range() zero-sized byte range should panic.
#[test]
#[should_panic]
fn download_opts_byte_range_zero() {
DownloadOpts {
byte_start: Bound::Included(3),
byte_end: Bound::Excluded(3),
..Default::default()
}
.byte_range();
}
/// DownloadOpts::byte_range() negative byte range should panic.
#[test]
#[should_panic]
fn download_opts_byte_range_negative() {
DownloadOpts {
byte_start: Bound::Included(3),
byte_end: Bound::Included(2),
..Default::default()
}
.byte_range();
}
#[test] #[test]
fn test_object_name() { fn test_object_name() {
let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap(); let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap();

View File

@@ -23,8 +23,8 @@ use tokio_util::{io::ReaderStream, sync::CancellationToken};
use utils::crashsafe::path_with_suffix_extension; use utils::crashsafe::path_with_suffix_extension;
use crate::{ use crate::{
Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath, TimeTravelError,
TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
}; };
use super::{RemoteStorage, StorageMetadata}; use super::{RemoteStorage, StorageMetadata};
@@ -494,41 +494,22 @@ impl RemoteStorage for LocalFs {
async fn download( async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError> { ) -> Result<Download, DownloadError> {
let target_path = from.with_base(&self.storage_root); let target_path = from.with_base(&self.storage_root);
let file_metadata = file_metadata(&target_path).await?; let file_metadata = file_metadata(&target_path).await?;
let etag = mock_etag(&file_metadata);
if opts.etag.as_ref() == Some(&etag) { let source = ReaderStream::new(
return Err(DownloadError::Unmodified); fs::OpenOptions::new()
}
let mut file = fs::OpenOptions::new()
.read(true) .read(true)
.open(&target_path) .open(&target_path)
.await .await
.with_context(|| { .with_context(|| {
format!("Failed to open source file {target_path:?} to use in the download") format!("Failed to open source file {target_path:?} to use in the download")
}) })
.map_err(DownloadError::Other)?; .map_err(DownloadError::Other)?,
);
let mut take = file_metadata.len();
if let Some((start, end)) = opts.byte_range() {
if start > 0 {
file.seek(io::SeekFrom::Start(start))
.await
.context("Failed to seek to the range start in a local storage file")
.map_err(DownloadError::Other)?;
}
if let Some(end) = end {
take = end - start;
}
}
let source = ReaderStream::new(file.take(take));
let metadata = self let metadata = self
.read_storage_metadata(&target_path) .read_storage_metadata(&target_path)
@@ -538,6 +519,69 @@ impl RemoteStorage for LocalFs {
let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone()); let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
let source = crate::support::DownloadStream::new(cancel_or_timeout, source); let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
let etag = mock_etag(&file_metadata);
Ok(Download {
metadata,
last_modified: file_metadata
.modified()
.map_err(|e| DownloadError::Other(anyhow::anyhow!(e).context("Reading mtime")))?,
etag,
download_stream: Box::pin(source),
})
}
async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
if let Some(end_exclusive) = end_exclusive {
if end_exclusive <= start_inclusive {
return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) is not less than end_exclusive ({end_exclusive:?})")));
};
if start_inclusive == end_exclusive.saturating_sub(1) {
return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
}
}
let target_path = from.with_base(&self.storage_root);
let file_metadata = file_metadata(&target_path).await?;
let mut source = tokio::fs::OpenOptions::new()
.read(true)
.open(&target_path)
.await
.with_context(|| {
format!("Failed to open source file {target_path:?} to use in the download")
})
.map_err(DownloadError::Other)?;
let len = source
.metadata()
.await
.context("query file length")
.map_err(DownloadError::Other)?
.len();
source
.seek(io::SeekFrom::Start(start_inclusive))
.await
.context("Failed to seek to the range start in a local storage file")
.map_err(DownloadError::Other)?;
let metadata = self
.read_storage_metadata(&target_path)
.await
.map_err(DownloadError::Other)?;
let source = source.take(end_exclusive.unwrap_or(len) - start_inclusive);
let source = ReaderStream::new(source);
let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
let etag = mock_etag(&file_metadata);
Ok(Download { Ok(Download {
metadata, metadata,
last_modified: file_metadata last_modified: file_metadata
@@ -639,7 +683,7 @@ mod fs_tests {
use super::*; use super::*;
use camino_tempfile::tempdir; use camino_tempfile::tempdir;
use std::{collections::HashMap, io::Write, ops::Bound}; use std::{collections::HashMap, io::Write};
async fn read_and_check_metadata( async fn read_and_check_metadata(
storage: &LocalFs, storage: &LocalFs,
@@ -648,7 +692,7 @@ mod fs_tests {
) -> anyhow::Result<String> { ) -> anyhow::Result<String> {
let cancel = CancellationToken::new(); let cancel = CancellationToken::new();
let download = storage let download = storage
.download(remote_storage_path, &DownloadOpts::default(), &cancel) .download(remote_storage_path, &cancel)
.await .await
.map_err(|e| anyhow::anyhow!("Download failed: {e}"))?; .map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
ensure!( ensure!(
@@ -729,8 +773,8 @@ mod fs_tests {
"We should upload and download the same contents" "We should upload and download the same contents"
); );
let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?; let non_existing_path = "somewhere/else";
match storage.download(&non_existing_path, &DownloadOpts::default(), &cancel).await { match storage.download(&RemotePath::new(Utf8Path::new(non_existing_path))?, &cancel).await {
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"), other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
} }
@@ -755,12 +799,10 @@ mod fs_tests {
let (first_part_local, second_part_local) = uploaded_bytes.split_at(3); let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);
let first_part_download = storage let first_part_download = storage
.download( .download_byte_range(
&upload_target, &upload_target,
&DownloadOpts { 0,
byte_end: Bound::Excluded(first_part_local.len() as u64), Some(first_part_local.len() as u64),
..Default::default()
},
&cancel, &cancel,
) )
.await?; .await?;
@@ -776,15 +818,10 @@ mod fs_tests {
); );
let second_part_download = storage let second_part_download = storage
.download( .download_byte_range(
&upload_target, &upload_target,
&DownloadOpts { first_part_local.len() as u64,
byte_start: Bound::Included(first_part_local.len() as u64), Some((first_part_local.len() + second_part_local.len()) as u64),
byte_end: Bound::Excluded(
(first_part_local.len() + second_part_local.len()) as u64,
),
..Default::default()
},
&cancel, &cancel,
) )
.await?; .await?;
@@ -800,14 +837,7 @@ mod fs_tests {
); );
let suffix_bytes = storage let suffix_bytes = storage
.download( .download_byte_range(&upload_target, 13, None, &cancel)
&upload_target,
&DownloadOpts {
byte_start: Bound::Included(13),
..Default::default()
},
&cancel,
)
.await? .await?
.download_stream; .download_stream;
let suffix_bytes = aggregate(suffix_bytes).await?; let suffix_bytes = aggregate(suffix_bytes).await?;
@@ -815,7 +845,7 @@ mod fs_tests {
assert_eq!(upload_name, suffix); assert_eq!(upload_name, suffix);
let all_bytes = storage let all_bytes = storage
.download(&upload_target, &DownloadOpts::default(), &cancel) .download_byte_range(&upload_target, 0, None, &cancel)
.await? .await?
.download_stream; .download_stream;
let all_bytes = aggregate(all_bytes).await?; let all_bytes = aggregate(all_bytes).await?;
@@ -826,26 +856,48 @@ mod fs_tests {
} }
#[tokio::test] #[tokio::test]
#[should_panic(expected = "at or before start")] async fn download_file_range_negative() -> anyhow::Result<()> {
async fn download_file_range_negative() { let (storage, cancel) = create_storage()?;
let (storage, cancel) = create_storage().unwrap();
let upload_name = "upload_1"; let upload_name = "upload_1";
let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel) let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;
.await
.unwrap();
storage let start = 1_000_000_000;
.download( let end = start + 1;
match storage
.download_byte_range(
&upload_target, &upload_target,
&DownloadOpts { start,
byte_start: Bound::Included(10), Some(end), // exclusive end
byte_end: Bound::Excluded(10),
..Default::default()
},
&cancel, &cancel,
) )
.await .await
.unwrap(); {
Ok(_) => panic!("Should not allow downloading wrong ranges"),
Err(e) => {
let error_string = e.to_string();
assert!(error_string.contains("zero bytes"));
assert!(error_string.contains(&start.to_string()));
assert!(error_string.contains(&end.to_string()));
}
}
let start = 10000;
let end = 234;
assert!(start > end, "Should test an incorrect range");
match storage
.download_byte_range(&upload_target, start, Some(end), &cancel)
.await
{
Ok(_) => panic!("Should not allow downloading wrong ranges"),
Err(e) => {
let error_string = e.to_string();
assert!(error_string.contains("Invalid range"));
assert!(error_string.contains(&start.to_string()));
assert!(error_string.contains(&end.to_string()));
}
}
Ok(())
} }
#[tokio::test] #[tokio::test]
@@ -888,12 +940,10 @@ mod fs_tests {
let (first_part_local, _) = uploaded_bytes.split_at(3); let (first_part_local, _) = uploaded_bytes.split_at(3);
let partial_download_with_metadata = storage let partial_download_with_metadata = storage
.download( .download_byte_range(
&upload_target, &upload_target,
&DownloadOpts { 0,
byte_end: Bound::Excluded(first_part_local.len() as u64), Some(first_part_local.len() as u64),
..Default::default()
},
&cancel, &cancel,
) )
.await?; .await?;
@@ -1051,13 +1101,7 @@ mod fs_tests {
storage.upload(body, len, &path, None, &cancel).await?; storage.upload(body, len, &path, None, &cancel).await?;
} }
let read = aggregate( let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
storage
.download(&path, &DownloadOpts::default(), &cancel)
.await?
.download_stream,
)
.await?;
assert_eq!(body, read); assert_eq!(body, read);
let shorter = Bytes::from_static(b"shorter body"); let shorter = Bytes::from_static(b"shorter body");
@@ -1068,13 +1112,7 @@ mod fs_tests {
storage.upload(body, len, &path, None, &cancel).await?; storage.upload(body, len, &path, None, &cancel).await?;
} }
let read = aggregate( let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
storage
.download(&path, &DownloadOpts::default(), &cancel)
.await?
.download_stream,
)
.await?;
assert_eq!(shorter, read); assert_eq!(shorter, read);
Ok(()) Ok(())
} }
@@ -1107,13 +1145,7 @@ mod fs_tests {
storage.upload(body, len, &path, None, &cancel).await?; storage.upload(body, len, &path, None, &cancel).await?;
} }
let read = aggregate( let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
storage
.download(&path, &DownloadOpts::default(), &cancel)
.await?
.download_stream,
)
.await?;
assert_eq!(body, read); assert_eq!(body, read);
Ok(()) Ok(())

View File

@@ -28,7 +28,6 @@ use aws_sdk_s3::{
Client, Client,
}; };
use aws_smithy_async::rt::sleep::TokioSleep; use aws_smithy_async::rt::sleep::TokioSleep;
use http_types::StatusCode;
use aws_smithy_types::{body::SdkBody, DateTime}; use aws_smithy_types::{body::SdkBody, DateTime};
use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError}; use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError};
@@ -45,8 +44,8 @@ use crate::{
error::Cancelled, error::Cancelled,
metrics::{start_counting_cancelled_wait, start_measuring_requests}, metrics::{start_counting_cancelled_wait, start_measuring_requests},
support::PermitCarrying, support::PermitCarrying,
ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath,
RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
REMOTE_STORAGE_PREFIX_SEPARATOR, REMOTE_STORAGE_PREFIX_SEPARATOR,
}; };
@@ -68,7 +67,6 @@ pub struct S3Bucket {
struct GetObjectRequest { struct GetObjectRequest {
bucket: String, bucket: String,
key: String, key: String,
etag: Option<String>,
range: Option<String>, range: Option<String>,
} }
impl S3Bucket { impl S3Bucket {
@@ -250,18 +248,13 @@ impl S3Bucket {
let started_at = start_measuring_requests(kind); let started_at = start_measuring_requests(kind);
let mut builder = self let get_object = self
.client .client
.get_object() .get_object()
.bucket(request.bucket) .bucket(request.bucket)
.key(request.key) .key(request.key)
.set_range(request.range); .set_range(request.range)
.send();
if let Some(etag) = request.etag {
builder = builder.if_none_match(etag);
}
let get_object = builder.send();
let get_object = tokio::select! { let get_object = tokio::select! {
res = get_object => res, res = get_object => res,
@@ -284,20 +277,6 @@ impl S3Bucket {
); );
return Err(DownloadError::NotFound); return Err(DownloadError::NotFound);
} }
Err(SdkError::ServiceError(e))
// aws_smithy_runtime_api::http::response::StatusCode isn't
// re-exported by any aws crates, so just check the numeric
// status against http_types::StatusCode instead of pulling it.
if e.raw().status().as_u16() == StatusCode::NotModified =>
{
// Count an unmodified file as a success.
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
kind,
AttemptOutcome::Ok,
started_at,
);
return Err(DownloadError::Unmodified);
}
Err(e) => { Err(e) => {
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed( crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
kind, kind,
@@ -794,7 +773,6 @@ impl RemoteStorage for S3Bucket {
async fn download( async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError> { ) -> Result<Download, DownloadError> {
// if prefix is not none then download file `prefix/from` // if prefix is not none then download file `prefix/from`
@@ -803,8 +781,33 @@ impl RemoteStorage for S3Bucket {
GetObjectRequest { GetObjectRequest {
bucket: self.bucket_name.clone(), bucket: self.bucket_name.clone(),
key: self.relative_path_to_s3_object(from), key: self.relative_path_to_s3_object(from),
etag: opts.etag.as_ref().map(|e| e.to_string()), range: None,
range: opts.byte_range_header(), },
cancel,
)
.await
}
async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
// S3 accepts ranges as https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
// and needs both ends to be exclusive
let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
let range = Some(match end_inclusive {
Some(end_inclusive) => format!("bytes={start_inclusive}-{end_inclusive}"),
None => format!("bytes={start_inclusive}-"),
});
self.download_object(
GetObjectRequest {
bucket: self.bucket_name.clone(),
key: self.relative_path_to_s3_object(from),
range,
}, },
cancel, cancel,
) )

View File

@@ -12,8 +12,8 @@ use std::{collections::hash_map::Entry, sync::Arc};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use crate::{ use crate::{
Download, DownloadError, DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath, Download, DownloadError, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorage,
RemoteStorage, StorageMetadata, TimeTravelError, StorageMetadata, TimeTravelError,
}; };
pub struct UnreliableWrapper { pub struct UnreliableWrapper {
@@ -167,14 +167,28 @@ impl RemoteStorage for UnreliableWrapper {
async fn download( async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError> { ) -> Result<Download, DownloadError> {
// Note: We treat any byte range as an "attempt" of the same operation.
// We don't pay attention to the ranges. That's good enough for now.
self.attempt(RemoteOp::Download(from.clone())) self.attempt(RemoteOp::Download(from.clone()))
.map_err(DownloadError::Other)?; .map_err(DownloadError::Other)?;
self.inner.download(from, opts, cancel).await self.inner.download(from, cancel).await
}
async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
// Note: We treat any download_byte_range as an "attempt" of the same
// operation. We don't pay attention to the ranges. That's good enough
// for now.
self.attempt(RemoteOp::Download(from.clone()))
.map_err(DownloadError::Other)?;
self.inner
.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
} }
async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> { async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {

View File

@@ -1,8 +1,8 @@
use anyhow::Context; use anyhow::Context;
use camino::Utf8Path; use camino::Utf8Path;
use futures::StreamExt; use futures::StreamExt;
use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath}; use remote_storage::ListingMode;
use std::ops::Bound; use remote_storage::RemotePath;
use std::sync::Arc; use std::sync::Arc;
use std::{collections::HashSet, num::NonZeroU32}; use std::{collections::HashSet, num::NonZeroU32};
use test_context::test_context; use test_context::test_context;
@@ -284,25 +284,14 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
ctx.client.upload(data, len, &path, None, &cancel).await?; ctx.client.upload(data, len, &path, None, &cancel).await?;
// Normal download request // Normal download request
let dl = ctx let dl = ctx.client.download(&path, &cancel).await?;
.client
.download(&path, &DownloadOpts::default(), &cancel)
.await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig); assert_eq!(&buf, &orig);
// Full range (end specified) // Full range (end specified)
let dl = ctx let dl = ctx
.client .client
.download( .download_byte_range(&path, 0, Some(len as u64), &cancel)
&path,
&DownloadOpts {
byte_start: Bound::Included(0),
byte_end: Bound::Excluded(len as u64),
..Default::default()
},
&cancel,
)
.await?; .await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig); assert_eq!(&buf, &orig);
@@ -310,15 +299,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
// partial range (end specified) // partial range (end specified)
let dl = ctx let dl = ctx
.client .client
.download( .download_byte_range(&path, 4, Some(10), &cancel)
&path,
&DownloadOpts {
byte_start: Bound::Included(4),
byte_end: Bound::Excluded(10),
..Default::default()
},
&cancel,
)
.await?; .await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig[4..10]); assert_eq!(&buf, &orig[4..10]);
@@ -326,15 +307,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
// partial range (end beyond real end) // partial range (end beyond real end)
let dl = ctx let dl = ctx
.client .client
.download( .download_byte_range(&path, 8, Some(len as u64 * 100), &cancel)
&path,
&DownloadOpts {
byte_start: Bound::Included(8),
byte_end: Bound::Excluded(len as u64 * 100),
..Default::default()
},
&cancel,
)
.await?; .await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig[8..]); assert_eq!(&buf, &orig[8..]);
@@ -342,14 +315,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
// Partial range (end unspecified) // Partial range (end unspecified)
let dl = ctx let dl = ctx
.client .client
.download( .download_byte_range(&path, 4, None, &cancel)
&path,
&DownloadOpts {
byte_start: Bound::Included(4),
..Default::default()
},
&cancel,
)
.await?; .await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig[4..]); assert_eq!(&buf, &orig[4..]);
@@ -357,14 +323,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
// Full range (end unspecified) // Full range (end unspecified)
let dl = ctx let dl = ctx
.client .client
.download( .download_byte_range(&path, 0, None, &cancel)
&path,
&DownloadOpts {
byte_start: Bound::Included(0),
..Default::default()
},
&cancel,
)
.await?; .await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig); assert_eq!(&buf, &orig);
@@ -378,54 +337,6 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
Ok(()) Ok(())
} }
/// Tests that conditional downloads work properly, by returning
/// DownloadError::Unmodified when the object ETag matches the given ETag.
#[test_context(MaybeEnabledStorage)]
#[tokio::test]
async fn download_conditional(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
let MaybeEnabledStorage::Enabled(ctx) = ctx else {
return Ok(());
};
let cancel = CancellationToken::new();
// Create a file.
let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
let data = bytes::Bytes::from_static("foo".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
// Download it to obtain its etag.
let mut opts = DownloadOpts::default();
let download = ctx.client.download(&path, &opts, &cancel).await?;
// Download with the etag yields DownloadError::Unmodified.
opts.etag = Some(download.etag);
let result = ctx.client.download(&path, &opts, &cancel).await;
assert!(
matches!(result, Err(DownloadError::Unmodified)),
"expected DownloadError::Unmodified, got {result:?}"
);
// Replace the file contents.
let data = bytes::Bytes::from_static("bar".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
// A download with the old etag should yield the new file.
let download = ctx.client.download(&path, &opts, &cancel).await?;
assert_ne!(download.etag, opts.etag.unwrap(), "ETag did not change");
// A download with the new etag should yield Unmodified again.
opts.etag = Some(download.etag);
let result = ctx.client.download(&path, &opts, &cancel).await;
assert!(
matches!(result, Err(DownloadError::Unmodified)),
"expected DownloadError::Unmodified, got {result:?}"
);
Ok(())
}
#[test_context(MaybeEnabledStorage)] #[test_context(MaybeEnabledStorage)]
#[tokio::test] #[tokio::test]
async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> { async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
@@ -453,10 +364,7 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
// Normal download request // Normal download request
ctx.client.copy_object(&path, &path_dest, &cancel).await?; ctx.client.copy_object(&path, &path_dest, &cancel).await?;
let dl = ctx let dl = ctx.client.download(&path_dest, &cancel).await?;
.client
.download(&path_dest, &DownloadOpts::default(), &cancel)
.await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig); assert_eq!(&buf, &orig);
@@ -468,56 +376,3 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
Ok(()) Ok(())
} }
/// Tests that head_object works properly.
#[test_context(MaybeEnabledStorage)]
#[tokio::test]
async fn head_object(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
let MaybeEnabledStorage::Enabled(ctx) = ctx else {
return Ok(());
};
let cancel = CancellationToken::new();
let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
// Errors on missing file.
let result = ctx.client.head_object(&path, &cancel).await;
assert!(
matches!(result, Err(DownloadError::NotFound)),
"expected NotFound, got {result:?}"
);
// Create the file.
let data = bytes::Bytes::from_static("foo".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
// Fetch the head metadata.
let object = ctx.client.head_object(&path, &cancel).await?;
assert_eq!(
object,
ListingObject {
key: path.clone(),
last_modified: object.last_modified, // ignore
size: 3
}
);
// Wait for a couple of seconds, and then update the file to check the last
// modified timestamp.
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
let data = bytes::Bytes::from_static("bar".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
let new = ctx.client.head_object(&path, &cancel).await?;
assert!(
!new.last_modified
.duration_since(object.last_modified)?
.is_zero(),
"last_modified did not advance"
);
Ok(())
}

View File

@@ -12,8 +12,8 @@ use anyhow::Context;
use camino::Utf8Path; use camino::Utf8Path;
use futures_util::StreamExt; use futures_util::StreamExt;
use remote_storage::{ use remote_storage::{
DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath, DownloadError, GenericRemoteStorage, ListingMode, RemotePath, RemoteStorageConfig,
RemoteStorageConfig, RemoteStorageKind, S3Config, RemoteStorageKind, S3Config,
}; };
use test_context::test_context; use test_context::test_context;
use test_context::AsyncTestContext; use test_context::AsyncTestContext;
@@ -121,8 +121,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
// A little check to ensure that our clock is not too far off from the S3 clock // A little check to ensure that our clock is not too far off from the S3 clock
{ {
let opts = DownloadOpts::default(); let dl = retry(|| ctx.client.download(&path2, &cancel)).await?;
let dl = retry(|| ctx.client.download(&path2, &opts, &cancel)).await?;
let last_modified = dl.last_modified; let last_modified = dl.last_modified;
let half_wt = WAIT_TIME.mul_f32(0.5); let half_wt = WAIT_TIME.mul_f32(0.5);
let t0_hwt = t0 + half_wt; let t0_hwt = t0 + half_wt;
@@ -160,12 +159,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
let t2_files_recovered = list_files(&ctx.client, &cancel).await?; let t2_files_recovered = list_files(&ctx.client, &cancel).await?;
println!("after recovery to t2: {t2_files_recovered:?}"); println!("after recovery to t2: {t2_files_recovered:?}");
assert_eq!(t2_files, t2_files_recovered); assert_eq!(t2_files, t2_files_recovered);
let path2_recovered_t2 = download_to_vec( let path2_recovered_t2 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
ctx.client
.download(&path2, &DownloadOpts::default(), &cancel)
.await?,
)
.await?;
assert_eq!(path2_recovered_t2, new_data.as_bytes()); assert_eq!(path2_recovered_t2, new_data.as_bytes());
// after recovery to t1: path1 is back, path2 has the old content // after recovery to t1: path1 is back, path2 has the old content
@@ -176,12 +170,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
let t1_files_recovered = list_files(&ctx.client, &cancel).await?; let t1_files_recovered = list_files(&ctx.client, &cancel).await?;
println!("after recovery to t1: {t1_files_recovered:?}"); println!("after recovery to t1: {t1_files_recovered:?}");
assert_eq!(t1_files, t1_files_recovered); assert_eq!(t1_files, t1_files_recovered);
let path2_recovered_t1 = download_to_vec( let path2_recovered_t1 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
ctx.client
.download(&path2, &DownloadOpts::default(), &cancel)
.await?,
)
.await?;
assert_eq!(path2_recovered_t1, old_data.as_bytes()); assert_eq!(path2_recovered_t1, old_data.as_bytes());
// after recovery to t0: everything is gone except for path1 // after recovery to t0: everything is gone except for path1
@@ -427,7 +416,7 @@ async fn download_is_timeouted(ctx: &mut MaybeEnabledStorage) {
let started_at = std::time::Instant::now(); let started_at = std::time::Instant::now();
let mut stream = ctx let mut stream = ctx
.client .client
.download(&path, &DownloadOpts::default(), &cancel) .download(&path, &cancel)
.await .await
.expect("download succeeds") .expect("download succeeds")
.download_stream; .download_stream;
@@ -502,7 +491,7 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {
{ {
let stream = ctx let stream = ctx
.client .client
.download(&path, &DownloadOpts::default(), &cancel) .download(&path, &cancel)
.await .await
.expect("download succeeds") .expect("download succeeds")
.download_stream; .download_stream;

View File

@@ -79,7 +79,8 @@ pub struct Config {
/// memory. /// memory.
/// ///
/// The default value of `0.15` means that we *guarantee* sending upscale requests if the /// The default value of `0.15` means that we *guarantee* sending upscale requests if the
/// cgroup is using more than 85% of total memory. /// cgroup is using more than 85% of total memory (even if we're *not* separately reserving
/// memory for the file cache).
cgroup_min_overhead_fraction: f64, cgroup_min_overhead_fraction: f64,
cgroup_downscale_threshold_buffer_bytes: u64, cgroup_downscale_threshold_buffer_bytes: u64,
@@ -96,12 +97,24 @@ impl Default for Config {
} }
impl Config { impl Config {
fn cgroup_threshold(&self, total_mem: u64) -> u64 { fn cgroup_threshold(&self, total_mem: u64, file_cache_disk_size: u64) -> u64 {
// We want our threshold to be met gracefully instead of letting postgres get OOM-killed // If the file cache is in tmpfs, then it will count towards shmem usage of the cgroup,
// (or if there's room, spilling to swap). // and thus be non-reclaimable, so we should allow for additional memory usage.
//
// If the file cache sits on disk, our desired stable system state is for it to be fully
// page cached (its contents should only be paged to/from disk in situations where we can't
// upscale fast enough). Page-cached memory is reclaimable, so we need to lower the
// threshold for non-reclaimable memory so we scale up *before* the kernel starts paging
// out the file cache.
let memory_remaining_for_cgroup = total_mem.saturating_sub(file_cache_disk_size);
// Even if we're not separately making room for the file cache (if it's in tmpfs), we still
// want our threshold to be met gracefully instead of letting postgres get OOM-killed.
// So we guarantee that there's at least `cgroup_min_overhead_fraction` of total memory // So we guarantee that there's at least `cgroup_min_overhead_fraction` of total memory
// remaining above the threshold. // remaining above the threshold.
(total_mem as f64 * (1.0 - self.cgroup_min_overhead_fraction)) as u64 let max_threshold = (total_mem as f64 * (1.0 - self.cgroup_min_overhead_fraction)) as u64;
memory_remaining_for_cgroup.min(max_threshold)
} }
} }
@@ -136,6 +149,11 @@ impl Runner {
let mem = get_total_system_memory(); let mem = get_total_system_memory();
let mut file_cache_disk_size = 0;
// We need to process file cache initialization before cgroup initialization, so that the memory
// allocated to the file cache is appropriately taken into account when we decide the cgroup's
// memory limits.
if let Some(connstr) = &args.pgconnstr { if let Some(connstr) = &args.pgconnstr {
info!("initializing file cache"); info!("initializing file cache");
let config = FileCacheConfig::default(); let config = FileCacheConfig::default();
@@ -166,6 +184,7 @@ impl Runner {
info!("file cache size actually got set to {actual_size}") info!("file cache size actually got set to {actual_size}")
} }
file_cache_disk_size = actual_size;
state.filecache = Some(file_cache); state.filecache = Some(file_cache);
} }
@@ -188,7 +207,7 @@ impl Runner {
cgroup.watch(hist_tx).await cgroup.watch(hist_tx).await
}); });
let threshold = state.config.cgroup_threshold(mem); let threshold = state.config.cgroup_threshold(mem, file_cache_disk_size);
info!(threshold, "set initial cgroup threshold",); info!(threshold, "set initial cgroup threshold",);
state.cgroup = Some(CgroupState { state.cgroup = Some(CgroupState {
@@ -240,7 +259,9 @@ impl Runner {
return Ok((false, status.to_owned())); return Ok((false, status.to_owned()));
} }
let new_threshold = self.config.cgroup_threshold(usable_system_memory); let new_threshold = self
.config
.cgroup_threshold(usable_system_memory, expected_file_cache_size);
let current = last_history.avg_non_reclaimable; let current = last_history.avg_non_reclaimable;
@@ -261,11 +282,13 @@ impl Runner {
// The downscaling has been approved. Downscale the file cache, then the cgroup. // The downscaling has been approved. Downscale the file cache, then the cgroup.
let mut status = vec![]; let mut status = vec![];
let mut file_cache_disk_size = 0;
if let Some(file_cache) = &mut self.filecache { if let Some(file_cache) = &mut self.filecache {
let actual_usage = file_cache let actual_usage = file_cache
.set_file_cache_size(expected_file_cache_size) .set_file_cache_size(expected_file_cache_size)
.await .await
.context("failed to set file cache size")?; .context("failed to set file cache size")?;
file_cache_disk_size = actual_usage;
let message = format!( let message = format!(
"set file cache size to {} MiB", "set file cache size to {} MiB",
bytes_to_mebibytes(actual_usage), bytes_to_mebibytes(actual_usage),
@@ -275,7 +298,9 @@ impl Runner {
} }
if let Some(cgroup) = &mut self.cgroup { if let Some(cgroup) = &mut self.cgroup {
let new_threshold = self.config.cgroup_threshold(usable_system_memory); let new_threshold = self
.config
.cgroup_threshold(usable_system_memory, file_cache_disk_size);
let message = format!( let message = format!(
"set cgroup memory threshold from {} MiB to {} MiB, of new total {} MiB", "set cgroup memory threshold from {} MiB to {} MiB, of new total {} MiB",
@@ -304,6 +329,7 @@ impl Runner {
let new_mem = resources.mem; let new_mem = resources.mem;
let usable_system_memory = new_mem.saturating_sub(self.config.sys_buffer_bytes); let usable_system_memory = new_mem.saturating_sub(self.config.sys_buffer_bytes);
let mut file_cache_disk_size = 0;
if let Some(file_cache) = &mut self.filecache { if let Some(file_cache) = &mut self.filecache {
let expected_usage = file_cache.config.calculate_cache_size(usable_system_memory); let expected_usage = file_cache.config.calculate_cache_size(usable_system_memory);
info!( info!(
@@ -316,6 +342,7 @@ impl Runner {
.set_file_cache_size(expected_usage) .set_file_cache_size(expected_usage)
.await .await
.context("failed to set file cache size")?; .context("failed to set file cache size")?;
file_cache_disk_size = actual_usage;
if actual_usage != expected_usage { if actual_usage != expected_usage {
warn!( warn!(
@@ -327,7 +354,9 @@ impl Runner {
} }
if let Some(cgroup) = &mut self.cgroup { if let Some(cgroup) = &mut self.cgroup {
let new_threshold = self.config.cgroup_threshold(usable_system_memory); let new_threshold = self
.config
.cgroup_threshold(usable_system_memory, file_cache_disk_size);
info!( info!(
"set cgroup memory threshold from {} MiB to {} MiB of new total {} MiB", "set cgroup memory threshold from {} MiB to {} MiB of new total {} MiB",

View File

@@ -703,8 +703,6 @@ async fn timeline_archival_config_handler(
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
let request_data: TimelineArchivalConfigRequest = json_request(&mut request).await?; let request_data: TimelineArchivalConfigRequest = json_request(&mut request).await?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?; check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let state = get_state(&request); let state = get_state(&request);
@@ -715,7 +713,7 @@ async fn timeline_archival_config_handler(
.get_attached_tenant_shard(tenant_shard_id)?; .get_attached_tenant_shard(tenant_shard_id)?;
tenant tenant
.apply_timeline_archival_config(timeline_id, request_data.state, ctx) .apply_timeline_archival_config(timeline_id, request_data.state)
.await?; .await?;
Ok::<_, ApiError>(()) Ok::<_, ApiError>(())
} }

View File

@@ -38,7 +38,6 @@ use std::future::Future;
use std::sync::Weak; use std::sync::Weak;
use std::time::SystemTime; use std::time::SystemTime;
use storage_broker::BrokerClientChannel; use storage_broker::BrokerClientChannel;
use timeline::offload::offload_timeline;
use tokio::io::BufReader; use tokio::io::BufReader;
use tokio::sync::watch; use tokio::sync::watch;
use tokio::task::JoinSet; use tokio::task::JoinSet;
@@ -288,13 +287,9 @@ pub struct Tenant {
/// During timeline creation, we first insert the TimelineId to the /// During timeline creation, we first insert the TimelineId to the
/// creating map, then `timelines`, then remove it from the creating map. /// creating map, then `timelines`, then remove it from the creating map.
/// **Lock order**: if acquiring both, acquire`timelines` before `timelines_creating` /// **Lock order**: if acquring both, acquire`timelines` before `timelines_creating`
timelines_creating: std::sync::Mutex<HashSet<TimelineId>>, timelines_creating: std::sync::Mutex<HashSet<TimelineId>>,
/// Possibly offloaded and archived timelines
/// **Lock order**: if acquiring both, acquire`timelines` before `timelines_offloaded`
timelines_offloaded: Mutex<HashMap<TimelineId, Arc<OffloadedTimeline>>>,
// This mutex prevents creation of new timelines during GC. // This mutex prevents creation of new timelines during GC.
// Adding yet another mutex (in addition to `timelines`) is needed because holding // Adding yet another mutex (in addition to `timelines`) is needed because holding
// `timelines` mutex during all GC iteration // `timelines` mutex during all GC iteration
@@ -489,65 +484,6 @@ impl WalRedoManager {
} }
} }
pub struct OffloadedTimeline {
pub tenant_shard_id: TenantShardId,
pub timeline_id: TimelineId,
pub ancestor_timeline_id: Option<TimelineId>,
// TODO: once we persist offloaded state, make this lazily constructed
pub remote_client: Arc<RemoteTimelineClient>,
/// Prevent two tasks from deleting the timeline at the same time. If held, the
/// timeline is being deleted. If 'true', the timeline has already been deleted.
pub delete_progress: Arc<tokio::sync::Mutex<DeleteTimelineFlow>>,
}
impl OffloadedTimeline {
fn from_timeline(timeline: &Timeline) -> Self {
Self {
tenant_shard_id: timeline.tenant_shard_id,
timeline_id: timeline.timeline_id,
ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
remote_client: timeline.remote_client.clone(),
delete_progress: timeline.delete_progress.clone(),
}
}
}
#[derive(Clone)]
pub enum TimelineOrOffloaded {
Timeline(Arc<Timeline>),
Offloaded(Arc<OffloadedTimeline>),
}
impl TimelineOrOffloaded {
pub fn tenant_shard_id(&self) -> TenantShardId {
match self {
TimelineOrOffloaded::Timeline(timeline) => timeline.tenant_shard_id,
TimelineOrOffloaded::Offloaded(offloaded) => offloaded.tenant_shard_id,
}
}
pub fn timeline_id(&self) -> TimelineId {
match self {
TimelineOrOffloaded::Timeline(timeline) => timeline.timeline_id,
TimelineOrOffloaded::Offloaded(offloaded) => offloaded.timeline_id,
}
}
pub fn delete_progress(&self) -> &Arc<tokio::sync::Mutex<DeleteTimelineFlow>> {
match self {
TimelineOrOffloaded::Timeline(timeline) => &timeline.delete_progress,
TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.delete_progress,
}
}
pub fn remote_client(&self) -> &Arc<RemoteTimelineClient> {
match self {
TimelineOrOffloaded::Timeline(timeline) => &timeline.remote_client,
TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.remote_client,
}
}
}
#[derive(Debug, thiserror::Error, PartialEq, Eq)] #[derive(Debug, thiserror::Error, PartialEq, Eq)]
pub enum GetTimelineError { pub enum GetTimelineError {
#[error("Timeline is shutting down")] #[error("Timeline is shutting down")]
@@ -1470,10 +1406,30 @@ impl Tenant {
} }
} }
fn check_to_be_archived_has_no_unarchived_children( pub(crate) async fn apply_timeline_archival_config(
&self,
timeline_id: TimelineId, timeline_id: TimelineId,
timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>, state: TimelineArchivalState,
) -> Result<(), TimelineArchivalError> { ) -> Result<(), TimelineArchivalError> {
info!("setting timeline archival config");
let timeline = {
let timelines = self.timelines.lock().unwrap();
let Some(timeline) = timelines.get(&timeline_id) else {
return Err(TimelineArchivalError::NotFound);
};
if state == TimelineArchivalState::Unarchived {
if let Some(ancestor_timeline) = timeline.ancestor_timeline() {
if ancestor_timeline.is_archived() == Some(true) {
return Err(TimelineArchivalError::HasArchivedParent(
ancestor_timeline.timeline_id,
));
}
}
}
// Ensure that there are no non-archived child timelines
let children: Vec<TimelineId> = timelines let children: Vec<TimelineId> = timelines
.iter() .iter()
.filter_map(|(id, entry)| { .filter_map(|(id, entry)| {
@@ -1487,175 +1443,15 @@ impl Tenant {
}) })
.collect(); .collect();
if !children.is_empty() { if !children.is_empty() && state == TimelineArchivalState::Archived {
return Err(TimelineArchivalError::HasUnarchivedChildren(children)); return Err(TimelineArchivalError::HasUnarchivedChildren(children));
} }
Ok(()) Arc::clone(timeline)
}
fn check_ancestor_of_to_be_unarchived_is_not_archived(
ancestor_timeline_id: TimelineId,
timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,
offloaded_timelines: &std::sync::MutexGuard<
'_,
HashMap<TimelineId, Arc<OffloadedTimeline>>,
>,
) -> Result<(), TimelineArchivalError> {
let has_archived_parent =
if let Some(ancestor_timeline) = timelines.get(&ancestor_timeline_id) {
ancestor_timeline.is_archived() == Some(true)
} else if offloaded_timelines.contains_key(&ancestor_timeline_id) {
true
} else {
error!("ancestor timeline {ancestor_timeline_id} not found");
if cfg!(debug_assertions) {
panic!("ancestor timeline {ancestor_timeline_id} not found");
}
return Err(TimelineArchivalError::NotFound);
};
if has_archived_parent {
return Err(TimelineArchivalError::HasArchivedParent(
ancestor_timeline_id,
));
}
Ok(())
}
fn check_to_be_unarchived_timeline_has_no_archived_parent(
timeline: &Arc<Timeline>,
) -> Result<(), TimelineArchivalError> {
if let Some(ancestor_timeline) = timeline.ancestor_timeline() {
if ancestor_timeline.is_archived() == Some(true) {
return Err(TimelineArchivalError::HasArchivedParent(
ancestor_timeline.timeline_id,
));
}
}
Ok(())
}
/// Loads the specified (offloaded) timeline from S3 and attaches it as a loaded timeline
async fn unoffload_timeline(
self: &Arc<Self>,
timeline_id: TimelineId,
ctx: RequestContext,
) -> Result<Arc<Timeline>, TimelineArchivalError> {
let cancel = self.cancel.clone();
let timeline_preload = self
.load_timeline_metadata(timeline_id, self.remote_storage.clone(), cancel)
.await;
let index_part = match timeline_preload.index_part {
Ok(index_part) => {
debug!("remote index part exists for timeline {timeline_id}");
index_part
}
Err(DownloadError::NotFound) => {
error!(%timeline_id, "index_part not found on remote");
return Err(TimelineArchivalError::NotFound);
}
Err(e) => {
// Some (possibly ephemeral) error happened during index_part download.
warn!(%timeline_id, "Failed to load index_part from remote storage, failed creation? ({e})");
return Err(TimelineArchivalError::Other(
anyhow::Error::new(e).context("downloading index_part from remote storage"),
));
}
};
let index_part = match index_part {
MaybeDeletedIndexPart::IndexPart(index_part) => index_part,
MaybeDeletedIndexPart::Deleted(_index_part) => {
info!("timeline is deleted according to index_part.json");
return Err(TimelineArchivalError::NotFound);
}
};
let remote_metadata = index_part.metadata.clone();
let timeline_resources = self.build_timeline_resources(timeline_id);
self.load_remote_timeline(
timeline_id,
index_part,
remote_metadata,
timeline_resources,
&ctx,
)
.await
.with_context(|| {
format!(
"failed to load remote timeline {} for tenant {}",
timeline_id, self.tenant_shard_id
)
})?;
let timelines = self.timelines.lock().unwrap();
if let Some(timeline) = timelines.get(&timeline_id) {
let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
if offloaded_timelines.remove(&timeline_id).is_none() {
warn!("timeline already removed from offloaded timelines");
}
Ok(Arc::clone(timeline))
} else {
warn!("timeline not available directly after attach");
Err(TimelineArchivalError::Other(anyhow::anyhow!(
"timeline not available directly after attach"
)))
}
}
pub(crate) async fn apply_timeline_archival_config(
self: &Arc<Self>,
timeline_id: TimelineId,
new_state: TimelineArchivalState,
ctx: RequestContext,
) -> Result<(), TimelineArchivalError> {
info!("setting timeline archival config");
// First part: figure out what is needed to do, and do validation
let timeline_or_unarchive_offloaded = 'outer: {
let timelines = self.timelines.lock().unwrap();
let Some(timeline) = timelines.get(&timeline_id) else {
let offloaded_timelines = self.timelines_offloaded.lock().unwrap();
let Some(offloaded) = offloaded_timelines.get(&timeline_id) else {
return Err(TimelineArchivalError::NotFound);
};
if new_state == TimelineArchivalState::Archived {
// It's offloaded already, so nothing to do
return Ok(());
}
if let Some(ancestor_timeline_id) = offloaded.ancestor_timeline_id {
Self::check_ancestor_of_to_be_unarchived_is_not_archived(
ancestor_timeline_id,
&timelines,
&offloaded_timelines,
)?;
}
break 'outer None;
}; };
// Do some validation. We release the timelines lock below, so there is potential
// for race conditions: these checks are more present to prevent misunderstandings of
// the API's capabilities, instead of serving as the sole way to defend their invariants.
match new_state {
TimelineArchivalState::Unarchived => {
Self::check_to_be_unarchived_timeline_has_no_archived_parent(timeline)?
}
TimelineArchivalState::Archived => {
Self::check_to_be_archived_has_no_unarchived_children(timeline_id, &timelines)?
}
}
Some(Arc::clone(timeline))
};
// Second part: unarchive timeline (if needed)
let timeline = if let Some(timeline) = timeline_or_unarchive_offloaded {
timeline
} else {
// Turn offloaded timeline into a non-offloaded one
self.unoffload_timeline(timeline_id, ctx).await?
};
// Third part: upload new timeline archival state and block until it is present in S3
let upload_needed = timeline let upload_needed = timeline
.remote_client .remote_client
.schedule_index_upload_for_timeline_archival_state(new_state)?; .schedule_index_upload_for_timeline_archival_state(state)?;
if upload_needed { if upload_needed {
info!("Uploading new state"); info!("Uploading new state");
@@ -2088,7 +1884,7 @@ impl Tenant {
/// ///
/// Returns whether we have pending compaction task. /// Returns whether we have pending compaction task.
async fn compaction_iteration( async fn compaction_iteration(
self: &Arc<Self>, &self,
cancel: &CancellationToken, cancel: &CancellationToken,
ctx: &RequestContext, ctx: &RequestContext,
) -> Result<bool, timeline::CompactionError> { ) -> Result<bool, timeline::CompactionError> {
@@ -2109,28 +1905,21 @@ impl Tenant {
// while holding the lock. Then drop the lock and actually perform the // while holding the lock. Then drop the lock and actually perform the
// compactions. We don't want to block everything else while the // compactions. We don't want to block everything else while the
// compaction runs. // compaction runs.
let timelines_to_compact_or_offload; let timelines_to_compact = {
{
let timelines = self.timelines.lock().unwrap(); let timelines = self.timelines.lock().unwrap();
timelines_to_compact_or_offload = timelines let timelines_to_compact = timelines
.iter() .iter()
.filter_map(|(timeline_id, timeline)| { .filter_map(|(timeline_id, timeline)| {
let (is_active, can_offload) = (timeline.is_active(), timeline.can_offload()); if timeline.is_active() {
let has_no_unoffloaded_children = { Some((*timeline_id, timeline.clone()))
!timelines
.iter()
.any(|(_id, tl)| tl.get_ancestor_timeline_id() == Some(*timeline_id))
};
let can_offload = can_offload && has_no_unoffloaded_children;
if (is_active, can_offload) == (false, false) {
None
} else { } else {
Some((*timeline_id, timeline.clone(), (is_active, can_offload))) None
} }
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
drop(timelines); drop(timelines);
} timelines_to_compact
};
// Before doing any I/O work, check our circuit breaker // Before doing any I/O work, check our circuit breaker
if self.compaction_circuit_breaker.lock().unwrap().is_broken() { if self.compaction_circuit_breaker.lock().unwrap().is_broken() {
@@ -2140,11 +1929,8 @@ impl Tenant {
let mut has_pending_task = false; let mut has_pending_task = false;
for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload for (timeline_id, timeline) in &timelines_to_compact {
{ has_pending_task |= timeline
let pending_task_left = if *can_compact {
Some(
timeline
.compact(cancel, EnumSet::empty(), ctx) .compact(cancel, EnumSet::empty(), ctx)
.instrument(info_span!("compact_timeline", %timeline_id)) .instrument(info_span!("compact_timeline", %timeline_id))
.await .await
@@ -2156,18 +1942,7 @@ impl Tenant {
.unwrap() .unwrap()
.fail(&CIRCUIT_BREAKERS_BROKEN, e); .fail(&CIRCUIT_BREAKERS_BROKEN, e);
} }
})?, })?;
)
} else {
None
};
has_pending_task |= pending_task_left.unwrap_or(false);
if pending_task_left == Some(false) && *can_offload {
offload_timeline(self, timeline)
.instrument(info_span!("offload_timeline", %timeline_id))
.await
.map_err(timeline::CompactionError::Other)?;
}
} }
self.compaction_circuit_breaker self.compaction_circuit_breaker
@@ -3077,7 +2852,6 @@ impl Tenant {
constructed_at: Instant::now(), constructed_at: Instant::now(),
timelines: Mutex::new(HashMap::new()), timelines: Mutex::new(HashMap::new()),
timelines_creating: Mutex::new(HashSet::new()), timelines_creating: Mutex::new(HashSet::new()),
timelines_offloaded: Mutex::new(HashMap::new()),
gc_cs: tokio::sync::Mutex::new(()), gc_cs: tokio::sync::Mutex::new(()),
walredo_mgr, walredo_mgr,
remote_storage, remote_storage,

View File

@@ -141,14 +141,14 @@ impl GcBlock {
Ok(()) Ok(())
} }
pub(crate) fn before_delete(&self, timeline_id: &super::TimelineId) { pub(crate) fn before_delete(&self, timeline: &super::Timeline) {
let unblocked = { let unblocked = {
let mut g = self.reasons.lock().unwrap(); let mut g = self.reasons.lock().unwrap();
if g.is_empty() { if g.is_empty() {
return; return;
} }
g.remove(timeline_id); g.remove(&timeline.timeline_id);
BlockingReasons::clean_and_summarize(g).is_none() BlockingReasons::clean_and_summarize(g).is_none()
}; };

View File

@@ -27,7 +27,7 @@ use crate::tenant::Generation;
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt; use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile}; use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
use crate::TEMP_FILE_SUFFIX; use crate::TEMP_FILE_SUFFIX;
use remote_storage::{DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath}; use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode, RemotePath};
use utils::crashsafe::path_with_suffix_extension; use utils::crashsafe::path_with_suffix_extension;
use utils::id::{TenantId, TimelineId}; use utils::id::{TenantId, TimelineId};
use utils::pausable_failpoint; use utils::pausable_failpoint;
@@ -153,9 +153,7 @@ async fn download_object<'a>(
.with_context(|| format!("create a destination file for layer '{dst_path}'")) .with_context(|| format!("create a destination file for layer '{dst_path}'"))
.map_err(DownloadError::Other)?; .map_err(DownloadError::Other)?;
let download = storage let download = storage.download(src_path, cancel).await?;
.download(src_path, &DownloadOpts::default(), cancel)
.await?;
pausable_failpoint!("before-downloading-layer-stream-pausable"); pausable_failpoint!("before-downloading-layer-stream-pausable");
@@ -206,9 +204,7 @@ async fn download_object<'a>(
.with_context(|| format!("create a destination file for layer '{dst_path}'")) .with_context(|| format!("create a destination file for layer '{dst_path}'"))
.map_err(DownloadError::Other)?; .map_err(DownloadError::Other)?;
let mut download = storage let mut download = storage.download(src_path, cancel).await?;
.download(src_path, &DownloadOpts::default(), cancel)
.await?;
pausable_failpoint!("before-downloading-layer-stream-pausable"); pausable_failpoint!("before-downloading-layer-stream-pausable");
@@ -348,9 +344,7 @@ async fn do_download_index_part(
let index_part_bytes = download_retry_forever( let index_part_bytes = download_retry_forever(
|| async { || async {
let download = storage let download = storage.download(&remote_path, cancel).await?;
.download(&remote_path, &DownloadOpts::default(), cancel)
.await?;
let mut bytes = Vec::new(); let mut bytes = Vec::new();
@@ -532,15 +526,10 @@ pub(crate) async fn download_initdb_tar_zst(
.with_context(|| format!("tempfile creation {temp_path}")) .with_context(|| format!("tempfile creation {temp_path}"))
.map_err(DownloadError::Other)?; .map_err(DownloadError::Other)?;
let download = match storage let download = match storage.download(&remote_path, cancel).await {
.download(&remote_path, &DownloadOpts::default(), cancel)
.await
{
Ok(dl) => dl, Ok(dl) => dl,
Err(DownloadError::NotFound) => { Err(DownloadError::NotFound) => {
storage storage.download(&remote_preserved_path, cancel).await?
.download(&remote_preserved_path, &DownloadOpts::default(), cancel)
.await?
} }
Err(other) => Err(other)?, Err(other) => Err(other)?,
}; };

View File

@@ -49,7 +49,7 @@ use futures::Future;
use metrics::UIntGauge; use metrics::UIntGauge;
use pageserver_api::models::SecondaryProgress; use pageserver_api::models::SecondaryProgress;
use pageserver_api::shard::TenantShardId; use pageserver_api::shard::TenantShardId;
use remote_storage::{DownloadError, DownloadOpts, Etag, GenericRemoteStorage}; use remote_storage::{DownloadError, Etag, GenericRemoteStorage};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::{info_span, instrument, warn, Instrument}; use tracing::{info_span, instrument, warn, Instrument};
@@ -944,27 +944,27 @@ impl<'a> TenantDownloader<'a> {
) -> Result<HeatMapDownload, UpdateError> { ) -> Result<HeatMapDownload, UpdateError> {
debug_assert_current_span_has_tenant_id(); debug_assert_current_span_has_tenant_id();
let tenant_shard_id = self.secondary_state.get_tenant_shard_id(); let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
// TODO: pull up etag check into the request, to do a conditional GET rather than
// issuing a GET and then maybe ignoring the response body
// (https://github.com/neondatabase/neon/issues/6199)
tracing::debug!("Downloading heatmap for secondary tenant",); tracing::debug!("Downloading heatmap for secondary tenant",);
let heatmap_path = remote_heatmap_path(tenant_shard_id); let heatmap_path = remote_heatmap_path(tenant_shard_id);
let cancel = &self.secondary_state.cancel; let cancel = &self.secondary_state.cancel;
let opts = DownloadOpts {
etag: prev_etag.cloned(),
..Default::default()
};
backoff::retry( backoff::retry(
|| async { || async {
let download = match self let download = self
.remote_storage .remote_storage
.download(&heatmap_path, &opts, cancel) .download(&heatmap_path, cancel)
.await .await
{ .map_err(UpdateError::from)?;
Ok(download) => download,
Err(DownloadError::Unmodified) => return Ok(HeatMapDownload::Unmodified),
Err(err) => return Err(err.into()),
};
SECONDARY_MODE.download_heatmap.inc();
if Some(&download.etag) == prev_etag {
Ok(HeatMapDownload::Unmodified)
} else {
let mut heatmap_bytes = Vec::new(); let mut heatmap_bytes = Vec::new();
let mut body = tokio_util::io::StreamReader::new(download.download_stream); let mut body = tokio_util::io::StreamReader::new(download.download_stream);
let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?; let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
@@ -973,6 +973,7 @@ impl<'a> TenantDownloader<'a> {
last_modified: download.last_modified, last_modified: download.last_modified,
bytes: heatmap_bytes, bytes: heatmap_bytes,
})) }))
}
}, },
|e| matches!(e, UpdateError::NoData | UpdateError::Cancelled), |e| matches!(e, UpdateError::NoData | UpdateError::Cancelled),
FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_DOWNLOAD_WARN_THRESHOLD,
@@ -983,7 +984,6 @@ impl<'a> TenantDownloader<'a> {
.await .await
.ok_or_else(|| UpdateError::Cancelled) .ok_or_else(|| UpdateError::Cancelled)
.and_then(|x| x) .and_then(|x| x)
.inspect(|_| SECONDARY_MODE.download_heatmap.inc())
} }
/// Download heatmap layers that are not present on local disk, or update their /// Download heatmap layers that are not present on local disk, or update their

View File

@@ -7,7 +7,6 @@ pub(crate) mod handle;
mod init; mod init;
pub mod layer_manager; pub mod layer_manager;
pub(crate) mod logical_size; pub(crate) mod logical_size;
pub mod offload;
pub mod span; pub mod span;
pub mod uninit; pub mod uninit;
mod walreceiver; mod walreceiver;
@@ -1557,17 +1556,6 @@ impl Timeline {
} }
} }
/// Checks if the internal state of the timeline is consistent with it being able to be offloaded.
/// This is neccessary but not sufficient for offloading of the timeline as it might have
/// child timelines that are not offloaded yet.
pub(crate) fn can_offload(&self) -> bool {
if self.remote_client.is_archived() != Some(true) {
return false;
}
true
}
/// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending /// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending
/// compaction tasks. /// compaction tasks.
pub(crate) async fn compact( pub(crate) async fn compact(
@@ -1830,6 +1818,7 @@ impl Timeline {
self.current_state() == TimelineState::Active self.current_state() == TimelineState::Active
} }
#[allow(unused)]
pub(crate) fn is_archived(&self) -> Option<bool> { pub(crate) fn is_archived(&self) -> Option<bool> {
self.remote_client.is_archived() self.remote_client.is_archived()
} }

View File

@@ -15,7 +15,7 @@ use crate::{
tenant::{ tenant::{
metadata::TimelineMetadata, metadata::TimelineMetadata,
remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient}, remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
CreateTimelineCause, DeleteTimelineError, Tenant, TimelineOrOffloaded, CreateTimelineCause, DeleteTimelineError, Tenant,
}, },
}; };
@@ -24,14 +24,12 @@ use super::{Timeline, TimelineResources};
/// Mark timeline as deleted in S3 so we won't pick it up next time /// Mark timeline as deleted in S3 so we won't pick it up next time
/// during attach or pageserver restart. /// during attach or pageserver restart.
/// See comment in persist_index_part_with_deleted_flag. /// See comment in persist_index_part_with_deleted_flag.
async fn set_deleted_in_remote_index( async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
timeline: &TimelineOrOffloaded, match timeline
) -> Result<(), DeleteTimelineError> { .remote_client
let res = timeline
.remote_client()
.persist_index_part_with_deleted_flag() .persist_index_part_with_deleted_flag()
.await; .await
match res { {
// If we (now, or already) marked it successfully as deleted, we can proceed // If we (now, or already) marked it successfully as deleted, we can proceed
Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (), Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
// Bail out otherwise // Bail out otherwise
@@ -129,9 +127,9 @@ pub(super) async fn delete_local_timeline_directory(
} }
/// Removes remote layers and an index file after them. /// Removes remote layers and an index file after them.
async fn delete_remote_layers_and_index(timeline: &TimelineOrOffloaded) -> anyhow::Result<()> { async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<()> {
timeline timeline
.remote_client() .remote_client
.delete_all() .delete_all()
.await .await
.context("delete_all") .context("delete_all")
@@ -139,41 +137,27 @@ async fn delete_remote_layers_and_index(timeline: &TimelineOrOffloaded) -> anyho
/// It is important that this gets called when DeletionGuard is being held. /// It is important that this gets called when DeletionGuard is being held.
/// For more context see comments in [`DeleteTimelineFlow::prepare`] /// For more context see comments in [`DeleteTimelineFlow::prepare`]
async fn remove_maybe_offloaded_timeline_from_tenant( async fn remove_timeline_from_tenant(
tenant: &Tenant, tenant: &Tenant,
timeline: &TimelineOrOffloaded, timeline: &Timeline,
_: &DeletionGuard, // using it as a witness _: &DeletionGuard, // using it as a witness
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
// Remove the timeline from the map. // Remove the timeline from the map.
// This observes the locking order between timelines and timelines_offloaded
let mut timelines = tenant.timelines.lock().unwrap(); let mut timelines = tenant.timelines.lock().unwrap();
let mut timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
let offloaded_children_exist = timelines_offloaded
.iter()
.any(|(_, entry)| entry.ancestor_timeline_id == Some(timeline.timeline_id()));
let children_exist = timelines let children_exist = timelines
.iter() .iter()
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id())); .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
// XXX this can happen because of race conditions with branch creation. // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
// We already deleted the remote layer files, so it's probably best to panic. // We already deleted the layer files, so it's probably best to panic.
if children_exist || offloaded_children_exist { // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
if children_exist {
panic!("Timeline grew children while we removed layer files"); panic!("Timeline grew children while we removed layer files");
} }
match timeline { timelines
TimelineOrOffloaded::Timeline(timeline) => {
timelines.remove(&timeline.timeline_id).expect(
"timeline that we were deleting was concurrently removed from 'timelines' map",
);
}
TimelineOrOffloaded::Offloaded(timeline) => {
timelines_offloaded
.remove(&timeline.timeline_id) .remove(&timeline.timeline_id)
.expect("timeline that we were deleting was concurrently removed from 'timelines_offloaded' map"); .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
}
}
drop(timelines_offloaded);
drop(timelines); drop(timelines);
Ok(()) Ok(())
@@ -223,11 +207,9 @@ impl DeleteTimelineFlow {
guard.mark_in_progress()?; guard.mark_in_progress()?;
// Now that the Timeline is in Stopping state, request all the related tasks to shut down. // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
timeline.shutdown(super::ShutdownMode::Hard).await; timeline.shutdown(super::ShutdownMode::Hard).await;
}
tenant.gc_block.before_delete(&timeline.timeline_id()); tenant.gc_block.before_delete(&timeline);
fail::fail_point!("timeline-delete-before-index-deleted-at", |_| { fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
Err(anyhow::anyhow!( Err(anyhow::anyhow!(
@@ -303,16 +285,15 @@ impl DeleteTimelineFlow {
guard.mark_in_progress()?; guard.mark_in_progress()?;
let timeline = TimelineOrOffloaded::Timeline(timeline);
Self::schedule_background(guard, tenant.conf, tenant, timeline); Self::schedule_background(guard, tenant.conf, tenant, timeline);
Ok(()) Ok(())
} }
pub(super) fn prepare( fn prepare(
tenant: &Tenant, tenant: &Tenant,
timeline_id: TimelineId, timeline_id: TimelineId,
) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> { ) -> Result<(Arc<Timeline>, DeletionGuard), DeleteTimelineError> {
// Note the interaction between this guard and deletion guard. // Note the interaction between this guard and deletion guard.
// Here we attempt to lock deletion guard when we're holding a lock on timelines. // Here we attempt to lock deletion guard when we're holding a lock on timelines.
// This is important because when you take into account `remove_timeline_from_tenant` // This is important because when you take into account `remove_timeline_from_tenant`
@@ -326,14 +307,8 @@ impl DeleteTimelineFlow {
let timelines = tenant.timelines.lock().unwrap(); let timelines = tenant.timelines.lock().unwrap();
let timeline = match timelines.get(&timeline_id) { let timeline = match timelines.get(&timeline_id) {
Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)), Some(t) => t,
None => {
let offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
match offloaded_timelines.get(&timeline_id) {
Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
None => return Err(DeleteTimelineError::NotFound), None => return Err(DeleteTimelineError::NotFound),
}
}
}; };
// Ensure that there are no child timelines **attached to that pageserver**, // Ensure that there are no child timelines **attached to that pageserver**,
@@ -359,32 +334,30 @@ impl DeleteTimelineFlow {
// to remove the timeline from it. // to remove the timeline from it.
// Always if you have two locks that are taken in different order this can result in a deadlock. // Always if you have two locks that are taken in different order this can result in a deadlock.
let delete_progress = Arc::clone(timeline.delete_progress()); let delete_progress = Arc::clone(&timeline.delete_progress);
let delete_lock_guard = match delete_progress.try_lock_owned() { let delete_lock_guard = match delete_progress.try_lock_owned() {
Ok(guard) => DeletionGuard(guard), Ok(guard) => DeletionGuard(guard),
Err(_) => { Err(_) => {
// Unfortunately if lock fails arc is consumed. // Unfortunately if lock fails arc is consumed.
return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone( return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone(
timeline.delete_progress(), &timeline.delete_progress,
))); )));
} }
}; };
if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
timeline.set_state(TimelineState::Stopping); timeline.set_state(TimelineState::Stopping);
}
Ok((timeline, delete_lock_guard)) Ok((Arc::clone(timeline), delete_lock_guard))
} }
fn schedule_background( fn schedule_background(
guard: DeletionGuard, guard: DeletionGuard,
conf: &'static PageServerConf, conf: &'static PageServerConf,
tenant: Arc<Tenant>, tenant: Arc<Tenant>,
timeline: TimelineOrOffloaded, timeline: Arc<Timeline>,
) { ) {
let tenant_shard_id = timeline.tenant_shard_id(); let tenant_shard_id = timeline.tenant_shard_id;
let timeline_id = timeline.timeline_id(); let timeline_id = timeline.timeline_id;
task_mgr::spawn( task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(), task_mgr::BACKGROUND_RUNTIME.handle(),
@@ -395,9 +368,7 @@ impl DeleteTimelineFlow {
async move { async move {
if let Err(err) = Self::background(guard, conf, &tenant, &timeline).await { if let Err(err) = Self::background(guard, conf, &tenant, &timeline).await {
error!("Error: {err:#}"); error!("Error: {err:#}");
if let TimelineOrOffloaded::Timeline(timeline) = timeline {
timeline.set_broken(format!("{err:#}")) timeline.set_broken(format!("{err:#}"))
}
}; };
Ok(()) Ok(())
} }
@@ -409,19 +380,15 @@ impl DeleteTimelineFlow {
mut guard: DeletionGuard, mut guard: DeletionGuard,
conf: &PageServerConf, conf: &PageServerConf,
tenant: &Tenant, tenant: &Tenant,
timeline: &TimelineOrOffloaded, timeline: &Timeline,
) -> Result<(), DeleteTimelineError> { ) -> Result<(), DeleteTimelineError> {
// Offloaded timelines have no local state
// TODO: once we persist offloaded information, delete the timeline from there, too
if let TimelineOrOffloaded::Timeline(timeline) = timeline {
delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?; delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;
}
delete_remote_layers_and_index(timeline).await?; delete_remote_layers_and_index(timeline).await?;
pausable_failpoint!("in_progress_delete"); pausable_failpoint!("in_progress_delete");
remove_maybe_offloaded_timeline_from_tenant(tenant, timeline, &guard).await?; remove_timeline_from_tenant(tenant, timeline, &guard).await?;
*guard = Self::Finished; *guard = Self::Finished;
@@ -433,7 +400,7 @@ impl DeleteTimelineFlow {
} }
} }
pub(super) struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>); struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);
impl Deref for DeletionGuard { impl Deref for DeletionGuard {
type Target = DeleteTimelineFlow; type Target = DeleteTimelineFlow;

View File

@@ -1,69 +0,0 @@
use std::sync::Arc;
use crate::tenant::{OffloadedTimeline, Tenant, TimelineOrOffloaded};
use super::{
delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard},
Timeline,
};
pub(crate) async fn offload_timeline(
tenant: &Tenant,
timeline: &Arc<Timeline>,
) -> anyhow::Result<()> {
tracing::info!("offloading archived timeline");
let (timeline, guard) = DeleteTimelineFlow::prepare(tenant, timeline.timeline_id)?;
let TimelineOrOffloaded::Timeline(timeline) = timeline else {
tracing::error!("timeline already offloaded, but given timeline object");
return Ok(());
};
// TODO extend guard mechanism above with method
// to make deletions possible while offloading is in progress
// TODO mark timeline as offloaded in S3
let conf = &tenant.conf;
delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await?;
remove_timeline_from_tenant(tenant, &timeline, &guard).await?;
{
let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
offloaded_timelines.insert(
timeline.timeline_id,
Arc::new(OffloadedTimeline::from_timeline(&timeline)),
);
}
Ok(())
}
/// It is important that this gets called when DeletionGuard is being held.
/// For more context see comments in [`DeleteTimelineFlow::prepare`]
async fn remove_timeline_from_tenant(
tenant: &Tenant,
timeline: &Timeline,
_: &DeletionGuard, // using it as a witness
) -> anyhow::Result<()> {
// Remove the timeline from the map.
let mut timelines = tenant.timelines.lock().unwrap();
let children_exist = timelines
.iter()
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
// XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
// We already deleted the layer files, so it's probably best to panic.
// (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
if children_exist {
panic!("Timeline grew children while we removed layer files");
}
timelines
.remove(&timeline.timeline_id)
.expect("timeline that we were deleting was concurrently removed from 'timelines' map");
drop(timelines);
Ok(())
}

View File

@@ -146,8 +146,6 @@ ConstructDeltaMessage()
if (RootTable.role_table) if (RootTable.role_table)
{ {
JsonbValue roles; JsonbValue roles;
HASH_SEQ_STATUS status;
RoleEntry *entry;
roles.type = jbvString; roles.type = jbvString;
roles.val.string.val = "roles"; roles.val.string.val = "roles";
@@ -155,6 +153,9 @@ ConstructDeltaMessage()
pushJsonbValue(&state, WJB_KEY, &roles); pushJsonbValue(&state, WJB_KEY, &roles);
pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL); pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL);
HASH_SEQ_STATUS status;
RoleEntry *entry;
hash_seq_init(&status, RootTable.role_table); hash_seq_init(&status, RootTable.role_table);
while ((entry = hash_seq_search(&status)) != NULL) while ((entry = hash_seq_search(&status)) != NULL)
{ {
@@ -189,12 +190,10 @@ ConstructDeltaMessage()
} }
pushJsonbValue(&state, WJB_END_ARRAY, NULL); pushJsonbValue(&state, WJB_END_ARRAY, NULL);
} }
{
JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL); JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL);
Jsonb *jsonb = JsonbValueToJsonb(result); Jsonb *jsonb = JsonbValueToJsonb(result);
return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ ); return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ );
}
} }
#define ERROR_SIZE 1024 #define ERROR_SIZE 1024
@@ -273,18 +272,17 @@ SendDeltasToControlPlane()
curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, ErrorWriteCallback); curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, ErrorWriteCallback);
} }
{
char *message = ConstructDeltaMessage(); char *message = ConstructDeltaMessage();
ErrorString str; ErrorString str;
const int num_retries = 5;
CURLcode curl_status;
long response_code;
str.size = 0; str.size = 0;
curl_easy_setopt(handle, CURLOPT_POSTFIELDS, message); curl_easy_setopt(handle, CURLOPT_POSTFIELDS, message);
curl_easy_setopt(handle, CURLOPT_WRITEDATA, &str); curl_easy_setopt(handle, CURLOPT_WRITEDATA, &str);
const int num_retries = 5;
CURLcode curl_status;
for (int i = 0; i < num_retries; i++) for (int i = 0; i < num_retries; i++)
{ {
if ((curl_status = curl_easy_perform(handle)) == 0) if ((curl_status = curl_easy_perform(handle)) == 0)
@@ -293,7 +291,12 @@ SendDeltasToControlPlane()
pg_usleep(1000 * 1000); pg_usleep(1000 * 1000);
} }
if (curl_status != CURLE_OK) if (curl_status != CURLE_OK)
{
elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf); elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf);
}
else
{
long response_code;
if (curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code) != CURLE_UNKNOWN_OPTION) if (curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code) != CURLE_UNKNOWN_OPTION)
{ {
@@ -373,11 +376,10 @@ MergeTable()
if (old_table->db_table) if (old_table->db_table)
{ {
InitDbTableIfNeeded();
DbEntry *entry; DbEntry *entry;
HASH_SEQ_STATUS status; HASH_SEQ_STATUS status;
InitDbTableIfNeeded();
hash_seq_init(&status, old_table->db_table); hash_seq_init(&status, old_table->db_table);
while ((entry = hash_seq_search(&status)) != NULL) while ((entry = hash_seq_search(&status)) != NULL)
{ {
@@ -419,11 +421,10 @@ MergeTable()
if (old_table->role_table) if (old_table->role_table)
{ {
InitRoleTableIfNeeded();
RoleEntry *entry; RoleEntry *entry;
HASH_SEQ_STATUS status; HASH_SEQ_STATUS status;
InitRoleTableIfNeeded();
hash_seq_init(&status, old_table->role_table); hash_seq_init(&status, old_table->role_table);
while ((entry = hash_seq_search(&status)) != NULL) while ((entry = hash_seq_search(&status)) != NULL)
{ {
@@ -514,12 +515,9 @@ RoleIsNeonSuperuser(const char *role_name)
static void static void
HandleCreateDb(CreatedbStmt *stmt) HandleCreateDb(CreatedbStmt *stmt)
{ {
InitDbTableIfNeeded();
DefElem *downer = NULL; DefElem *downer = NULL;
ListCell *option; ListCell *option;
bool found = false;
DbEntry *entry;
InitDbTableIfNeeded();
foreach(option, stmt->options) foreach(option, stmt->options)
{ {
@@ -528,11 +526,13 @@ HandleCreateDb(CreatedbStmt *stmt)
if (strcmp(defel->defname, "owner") == 0) if (strcmp(defel->defname, "owner") == 0)
downer = defel; downer = defel;
} }
bool found = false;
entry = hash_search(CurrentDdlTable->db_table, DbEntry *entry = hash_search(
CurrentDdlTable->db_table,
stmt->dbname, stmt->dbname,
HASH_ENTER, HASH_ENTER,
&found); &found);
if (!found) if (!found)
memset(entry->old_name, 0, sizeof(entry->old_name)); memset(entry->old_name, 0, sizeof(entry->old_name));
@@ -554,24 +554,21 @@ HandleCreateDb(CreatedbStmt *stmt)
static void static void
HandleAlterOwner(AlterOwnerStmt *stmt) HandleAlterOwner(AlterOwnerStmt *stmt)
{ {
const char *name;
bool found = false;
DbEntry *entry;
const char *new_owner;
if (stmt->objectType != OBJECT_DATABASE) if (stmt->objectType != OBJECT_DATABASE)
return; return;
InitDbTableIfNeeded(); InitDbTableIfNeeded();
const char *name = strVal(stmt->object);
name = strVal(stmt->object); bool found = false;
entry = hash_search(CurrentDdlTable->db_table, DbEntry *entry = hash_search(
CurrentDdlTable->db_table,
name, name,
HASH_ENTER, HASH_ENTER,
&found); &found);
if (!found) if (!found)
memset(entry->old_name, 0, sizeof(entry->old_name)); memset(entry->old_name, 0, sizeof(entry->old_name));
const char *new_owner = get_rolespec_name(stmt->newowner);
new_owner = get_rolespec_name(stmt->newowner);
if (RoleIsNeonSuperuser(new_owner)) if (RoleIsNeonSuperuser(new_owner))
elog(ERROR, "can't alter owner to neon_superuser"); elog(ERROR, "can't alter owner to neon_superuser");
entry->owner = get_role_oid(new_owner, false); entry->owner = get_role_oid(new_owner, false);
@@ -581,23 +578,21 @@ HandleAlterOwner(AlterOwnerStmt *stmt)
static void static void
HandleDbRename(RenameStmt *stmt) HandleDbRename(RenameStmt *stmt)
{ {
bool found = false;
DbEntry *entry;
DbEntry *entry_for_new_name;
Assert(stmt->renameType == OBJECT_DATABASE); Assert(stmt->renameType == OBJECT_DATABASE);
InitDbTableIfNeeded(); InitDbTableIfNeeded();
entry = hash_search(CurrentDdlTable->db_table, bool found = false;
DbEntry *entry = hash_search(
CurrentDdlTable->db_table,
stmt->subname, stmt->subname,
HASH_FIND, HASH_FIND,
&found); &found);
DbEntry *entry_for_new_name = hash_search(
entry_for_new_name = hash_search(CurrentDdlTable->db_table, CurrentDdlTable->db_table,
stmt->newname, stmt->newname,
HASH_ENTER, HASH_ENTER,
NULL); NULL);
entry_for_new_name->type = Op_Set;
entry_for_new_name->type = Op_Set;
if (found) if (found)
{ {
if (entry->old_name[0] != '\0') if (entry->old_name[0] != '\0')
@@ -605,7 +600,8 @@ HandleDbRename(RenameStmt *stmt)
else else
strlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN); strlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN);
entry_for_new_name->owner = entry->owner; entry_for_new_name->owner = entry->owner;
hash_search(CurrentDdlTable->db_table, hash_search(
CurrentDdlTable->db_table,
stmt->subname, stmt->subname,
HASH_REMOVE, HASH_REMOVE,
NULL); NULL);
@@ -620,15 +616,14 @@ HandleDbRename(RenameStmt *stmt)
static void static void
HandleDropDb(DropdbStmt *stmt) HandleDropDb(DropdbStmt *stmt)
{ {
bool found = false;
DbEntry *entry;
InitDbTableIfNeeded(); InitDbTableIfNeeded();
bool found = false;
entry = hash_search(CurrentDdlTable->db_table, DbEntry *entry = hash_search(
CurrentDdlTable->db_table,
stmt->dbname, stmt->dbname,
HASH_ENTER, HASH_ENTER,
&found); &found);
entry->type = Op_Delete; entry->type = Op_Delete;
entry->owner = InvalidOid; entry->owner = InvalidOid;
if (!found) if (!found)
@@ -638,14 +633,16 @@ HandleDropDb(DropdbStmt *stmt)
static void static void
HandleCreateRole(CreateRoleStmt *stmt) HandleCreateRole(CreateRoleStmt *stmt)
{ {
InitRoleTableIfNeeded();
bool found = false; bool found = false;
RoleEntry *entry; RoleEntry *entry = hash_search(
DefElem *dpass; CurrentDdlTable->role_table,
stmt->role,
HASH_ENTER,
&found);
DefElem *dpass = NULL;
ListCell *option; ListCell *option;
InitRoleTableIfNeeded();
dpass = NULL;
foreach(option, stmt->options) foreach(option, stmt->options)
{ {
DefElem *defel = lfirst(option); DefElem *defel = lfirst(option);
@@ -653,11 +650,6 @@ HandleCreateRole(CreateRoleStmt *stmt)
if (strcmp(defel->defname, "password") == 0) if (strcmp(defel->defname, "password") == 0)
dpass = defel; dpass = defel;
} }
entry = hash_search(CurrentDdlTable->role_table,
stmt->role,
HASH_ENTER,
&found);
if (!found) if (!found)
memset(entry->old_name, 0, sizeof(entry->old_name)); memset(entry->old_name, 0, sizeof(entry->old_name));
if (dpass && dpass->arg) if (dpass && dpass->arg)
@@ -670,18 +662,14 @@ HandleCreateRole(CreateRoleStmt *stmt)
static void static void
HandleAlterRole(AlterRoleStmt *stmt) HandleAlterRole(AlterRoleStmt *stmt)
{ {
const char *role_name = stmt->role->rolename;
DefElem *dpass;
ListCell *option;
bool found = false;
RoleEntry *entry;
InitRoleTableIfNeeded(); InitRoleTableIfNeeded();
DefElem *dpass = NULL;
ListCell *option;
const char *role_name = stmt->role->rolename;
if (RoleIsNeonSuperuser(role_name) && !superuser()) if (RoleIsNeonSuperuser(role_name) && !superuser())
elog(ERROR, "can't ALTER neon_superuser"); elog(ERROR, "can't ALTER neon_superuser");
dpass = NULL;
foreach(option, stmt->options) foreach(option, stmt->options)
{ {
DefElem *defel = lfirst(option); DefElem *defel = lfirst(option);
@@ -692,11 +680,13 @@ HandleAlterRole(AlterRoleStmt *stmt)
/* We only care about updates to the password */ /* We only care about updates to the password */
if (!dpass) if (!dpass)
return; return;
bool found = false;
entry = hash_search(CurrentDdlTable->role_table, RoleEntry *entry = hash_search(
CurrentDdlTable->role_table,
role_name, role_name,
HASH_ENTER, HASH_ENTER,
&found); &found);
if (!found) if (!found)
memset(entry->old_name, 0, sizeof(entry->old_name)); memset(entry->old_name, 0, sizeof(entry->old_name));
if (dpass->arg) if (dpass->arg)
@@ -709,19 +699,17 @@ HandleAlterRole(AlterRoleStmt *stmt)
static void static void
HandleRoleRename(RenameStmt *stmt) HandleRoleRename(RenameStmt *stmt)
{ {
bool found = false;
RoleEntry *entry;
RoleEntry *entry_for_new_name;
Assert(stmt->renameType == OBJECT_ROLE);
InitRoleTableIfNeeded(); InitRoleTableIfNeeded();
Assert(stmt->renameType == OBJECT_ROLE);
entry = hash_search(CurrentDdlTable->role_table, bool found = false;
RoleEntry *entry = hash_search(
CurrentDdlTable->role_table,
stmt->subname, stmt->subname,
HASH_FIND, HASH_FIND,
&found); &found);
entry_for_new_name = hash_search(CurrentDdlTable->role_table, RoleEntry *entry_for_new_name = hash_search(
CurrentDdlTable->role_table,
stmt->newname, stmt->newname,
HASH_ENTER, HASH_ENTER,
NULL); NULL);
@@ -750,9 +738,8 @@ HandleRoleRename(RenameStmt *stmt)
static void static void
HandleDropRole(DropRoleStmt *stmt) HandleDropRole(DropRoleStmt *stmt)
{ {
ListCell *item;
InitRoleTableIfNeeded(); InitRoleTableIfNeeded();
ListCell *item;
foreach(item, stmt->roles) foreach(item, stmt->roles)
{ {

View File

@@ -170,14 +170,12 @@ lfc_disable(char const *op)
if (lfc_desc > 0) if (lfc_desc > 0)
{ {
int rc;
/* /*
* If the reason of error is ENOSPC, then truncation of file may * If the reason of error is ENOSPC, then truncation of file may
* help to reclaim some space * help to reclaim some space
*/ */
pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_TRUNCATE); pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_TRUNCATE);
rc = ftruncate(lfc_desc, 0); int rc = ftruncate(lfc_desc, 0);
pgstat_report_wait_end(); pgstat_report_wait_end();
if (rc < 0) if (rc < 0)
@@ -618,7 +616,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
*/ */
if (entry->bitmap[chunk_offs >> 5] == 0) if (entry->bitmap[chunk_offs >> 5] == 0)
{ {
bool has_remaining_pages = false; bool has_remaining_pages;
for (int i = 0; i < CHUNK_BITMAP_SIZE; i++) for (int i = 0; i < CHUNK_BITMAP_SIZE; i++)
{ {
@@ -668,6 +666,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
BufferTag tag; BufferTag tag;
FileCacheEntry *entry; FileCacheEntry *entry;
ssize_t rc; ssize_t rc;
bool result = true;
uint32 hash; uint32 hash;
uint64 generation; uint64 generation;
uint32 entry_offset; uint32 entry_offset;
@@ -926,10 +925,10 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
/* We can reuse a hole that was left behind when the LFC was shrunk previously */ /* We can reuse a hole that was left behind when the LFC was shrunk previously */
FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes)); FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
uint32 offset = hole->offset; uint32 offset = hole->offset;
bool hole_found; bool found;
hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &hole_found); hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
CriticalAssert(hole_found); CriticalAssert(found);
lfc_ctl->used += 1; lfc_ctl->used += 1;
entry->offset = offset; /* reuse the hole */ entry->offset = offset; /* reuse the hole */
@@ -1005,7 +1004,7 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
Datum result; Datum result;
HeapTuple tuple; HeapTuple tuple;
char const *key; char const *key;
uint64 value = 0; uint64 value;
Datum values[NUM_NEON_GET_STATS_COLS]; Datum values[NUM_NEON_GET_STATS_COLS];
bool nulls[NUM_NEON_GET_STATS_COLS]; bool nulls[NUM_NEON_GET_STATS_COLS];

View File

@@ -116,6 +116,8 @@ addSHLL(HyperLogLogState *cState, uint32 hash)
{ {
uint8 count; uint8 count;
uint32 index; uint32 index;
size_t i;
size_t j;
TimestampTz now = GetCurrentTimestamp(); TimestampTz now = GetCurrentTimestamp();
/* Use the first "k" (registerWidth) bits as a zero based index */ /* Use the first "k" (registerWidth) bits as a zero based index */

View File

@@ -89,6 +89,7 @@ typedef struct
#if PG_VERSION_NUM >= 150000 #if PG_VERSION_NUM >= 150000
static shmem_request_hook_type prev_shmem_request_hook = NULL; static shmem_request_hook_type prev_shmem_request_hook = NULL;
static void walproposer_shmem_request(void);
#endif #endif
static shmem_startup_hook_type prev_shmem_startup_hook; static shmem_startup_hook_type prev_shmem_startup_hook;
static PagestoreShmemState *pagestore_shared; static PagestoreShmemState *pagestore_shared;
@@ -440,8 +441,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
return false; return false;
} }
shard->state = PS_Connecting_Startup; shard->state = PS_Connecting_Startup;
/* fallthrough */
} }
/* FALLTHROUGH */
case PS_Connecting_Startup: case PS_Connecting_Startup:
{ {
char *pagestream_query; char *pagestream_query;
@@ -452,6 +453,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
do do
{ {
WaitEvent event;
switch (poll_result) switch (poll_result)
{ {
default: /* unknown/unused states are handled as a failed connection */ default: /* unknown/unused states are handled as a failed connection */
@@ -582,8 +585,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
} }
shard->state = PS_Connecting_PageStream; shard->state = PS_Connecting_PageStream;
/* fallthrough */
} }
/* FALLTHROUGH */
case PS_Connecting_PageStream: case PS_Connecting_PageStream:
{ {
neon_shard_log(shard_no, DEBUG5, "Connection state: Connecting_PageStream"); neon_shard_log(shard_no, DEBUG5, "Connection state: Connecting_PageStream");
@@ -628,8 +631,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
} }
shard->state = PS_Connected; shard->state = PS_Connected;
/* fallthrough */
} }
/* FALLTHROUGH */
case PS_Connected: case PS_Connected:
/* /*
* We successfully connected. Future connections to this PageServer * We successfully connected. Future connections to this PageServer

View File

@@ -94,6 +94,7 @@ neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t)); metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
uint64 bucket_accum; uint64 bucket_accum;
int i = 0; int i = 0;
Datum getpage_wait_str;
metrics[i].name = "getpage_wait_seconds_count"; metrics[i].name = "getpage_wait_seconds_count";
metrics[i].is_bucket = false; metrics[i].is_bucket = false;
@@ -223,6 +224,7 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
Datum values[3]; Datum values[3];
bool nulls[3]; bool nulls[3];
Datum getpage_wait_str;
neon_per_backend_counters totals = {0}; neon_per_backend_counters totals = {0};
metric_t *metrics; metric_t *metrics;

View File

@@ -7,7 +7,6 @@
#define NEON_PGVERSIONCOMPAT_H #define NEON_PGVERSIONCOMPAT_H
#include "fmgr.h" #include "fmgr.h"
#include "storage/buf_internals.h"
#if PG_MAJORVERSION_NUM < 17 #if PG_MAJORVERSION_NUM < 17
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId) #define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
@@ -21,24 +20,11 @@
NInfoGetRelNumber(a) == NInfoGetRelNumber(b) \ NInfoGetRelNumber(a) == NInfoGetRelNumber(b) \
) )
/* These macros were turned into static inline functions in v16 */ /* buftag population & RelFileNode/RelFileLocator rework */
#if PG_MAJORVERSION_NUM < 16 #if PG_MAJORVERSION_NUM < 16
static inline bool
BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
{
return BUFFERTAGS_EQUAL(*tag1, *tag2);
}
static inline void #define InitBufferTag(tag, rfn, fn, bn) INIT_BUFFERTAG(*tag, *rfn, fn, bn)
InitBufferTag(BufferTag *tag, const RelFileNode *rnode,
ForkNumber forkNum, BlockNumber blockNum)
{
INIT_BUFFERTAG(*tag, *rnode, forkNum, blockNum);
}
#endif
/* RelFileNode -> RelFileLocator rework */
#if PG_MAJORVERSION_NUM < 16
#define USE_RELFILENODE #define USE_RELFILENODE
#define RELFILEINFO_HDR "storage/relfilenode.h" #define RELFILEINFO_HDR "storage/relfilenode.h"
@@ -87,6 +73,8 @@ InitBufferTag(BufferTag *tag, const RelFileNode *rnode,
#define USE_RELFILELOCATOR #define USE_RELFILELOCATOR
#define BUFFERTAGS_EQUAL(a, b) BufferTagsEqual(&(a), &(b))
#define RELFILEINFO_HDR "storage/relfilelocator.h" #define RELFILEINFO_HDR "storage/relfilelocator.h"
#define NRelFileInfo RelFileLocator #define NRelFileInfo RelFileLocator

View File

@@ -213,6 +213,32 @@ extern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);
extern void smgr_init_neon(void); extern void smgr_init_neon(void);
extern void readahead_buffer_resize(int newsize, void *extra); extern void readahead_buffer_resize(int newsize, void *extra);
/* Neon storage manager functionality */
extern void neon_init(void);
extern void neon_open(SMgrRelation reln);
extern void neon_close(SMgrRelation reln, ForkNumber forknum);
extern void neon_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern bool neon_exists(SMgrRelation reln, ForkNumber forknum);
extern void neon_unlink(NRelFileInfoBackend rnode, ForkNumber forknum, bool isRedo);
#if PG_MAJORVERSION_NUM < 16
extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
#else
extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, const void *buffer, bool skipFsync);
extern void neon_zeroextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nbuffers, bool skipFsync);
#endif
#if PG_MAJORVERSION_NUM >=17
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nblocks);
#else
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
#endif
/* /*
* LSN values associated with each request to the pageserver * LSN values associated with each request to the pageserver
*/ */
@@ -252,7 +278,13 @@ extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum,
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno, extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
neon_request_lsns request_lsns, void *buffer); neon_request_lsns request_lsns, void *buffer);
#endif #endif
extern void neon_writeback(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, BlockNumber nblocks);
extern BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
extern int64 neon_dbsize(Oid dbNode); extern int64 neon_dbsize(Oid dbNode);
extern void neon_truncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks);
extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum);
/* utils for neon relsize cache */ /* utils for neon relsize cache */
extern void relsize_hash_init(void); extern void relsize_hash_init(void);

View File

@@ -118,8 +118,6 @@ static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
static bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id); static bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);
static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL; static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;
static BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
/* /*
* Prefetch implementation: * Prefetch implementation:
* *
@@ -217,7 +215,7 @@ typedef struct PrfHashEntry
sizeof(BufferTag) \ sizeof(BufferTag) \
) )
#define SH_EQUAL(tb, a, b) (BufferTagsEqual(&(a)->buftag, &(b)->buftag)) #define SH_EQUAL(tb, a, b) (BUFFERTAGS_EQUAL((a)->buftag, (b)->buftag))
#define SH_SCOPE static inline #define SH_SCOPE static inline
#define SH_DEFINE #define SH_DEFINE
#define SH_DECLARE #define SH_DECLARE
@@ -738,7 +736,7 @@ static void
prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns) prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns)
{ {
bool found; bool found;
uint64 mySlotNo PG_USED_FOR_ASSERTS_ONLY = slot->my_ring_index; uint64 mySlotNo = slot->my_ring_index;
NeonGetPageRequest request = { NeonGetPageRequest request = {
.req.tag = T_NeonGetPageRequest, .req.tag = T_NeonGetPageRequest,
@@ -805,19 +803,15 @@ prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
bool is_prefetch) bool is_prefetch)
{ {
uint64 min_ring_index; uint64 min_ring_index;
PrefetchRequest hashkey; PrefetchRequest req;
#if USE_ASSERT_CHECKING #if USE_ASSERT_CHECKING
bool any_hits = false; bool any_hits = false;
#endif #endif
/* We will never read further ahead than our buffer can store. */ /* We will never read further ahead than our buffer can store. */
nblocks = Max(1, Min(nblocks, readahead_buffer_size)); nblocks = Max(1, Min(nblocks, readahead_buffer_size));
/* /* use an intermediate PrefetchRequest struct to ensure correct alignment */
* Use an intermediate PrefetchRequest struct as the hash key to ensure req.buftag = tag;
* correct alignment and that the padding bytes are cleared.
*/
memset(&hashkey.buftag, 0, sizeof(BufferTag));
hashkey.buftag = tag;
Retry: Retry:
min_ring_index = UINT64_MAX; min_ring_index = UINT64_MAX;
@@ -843,8 +837,8 @@ Retry:
slot = NULL; slot = NULL;
entry = NULL; entry = NULL;
hashkey.buftag.blockNum = tag.blockNum + i; req.buftag.blockNum = tag.blockNum + i;
entry = prfh_lookup(MyPState->prf_hash, &hashkey); entry = prfh_lookup(MyPState->prf_hash, (PrefetchRequest *) &req);
if (entry != NULL) if (entry != NULL)
{ {
@@ -855,7 +849,7 @@ Retry:
Assert(slot->status != PRFS_UNUSED); Assert(slot->status != PRFS_UNUSED);
Assert(MyPState->ring_last <= ring_index && Assert(MyPState->ring_last <= ring_index &&
ring_index < MyPState->ring_unused); ring_index < MyPState->ring_unused);
Assert(BufferTagsEqual(&slot->buftag, &hashkey.buftag)); Assert(BUFFERTAGS_EQUAL(slot->buftag, req.buftag));
/* /*
* If the caller specified a request LSN to use, only accept * If the caller specified a request LSN to use, only accept
@@ -892,19 +886,12 @@ Retry:
{ {
min_ring_index = Min(min_ring_index, ring_index); min_ring_index = Min(min_ring_index, ring_index);
/* The buffered request is good enough, return that index */ /* The buffered request is good enough, return that index */
if (is_prefetch)
pgBufferUsage.prefetch.duplicates++; pgBufferUsage.prefetch.duplicates++;
else
pgBufferUsage.prefetch.hits++;
continue; continue;
} }
} }
} }
else if (!is_prefetch)
{
pgBufferUsage.prefetch.misses += 1;
MyNeonCounters->getpage_prefetch_misses_total++;
}
/* /*
* We can only leave the block above by finding that there's * We can only leave the block above by finding that there's
* no entry that can satisfy this request, either because there * no entry that can satisfy this request, either because there
@@ -987,7 +974,7 @@ Retry:
* We must update the slot data before insertion, because the hash * We must update the slot data before insertion, because the hash
* function reads the buffer tag from the slot. * function reads the buffer tag from the slot.
*/ */
slot->buftag = hashkey.buftag; slot->buftag = req.buftag;
slot->shard_no = get_shard_number(&tag); slot->shard_no = get_shard_number(&tag);
slot->my_ring_index = ring_index; slot->my_ring_index = ring_index;
@@ -1465,6 +1452,7 @@ log_newpages_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno,
BlockNumber blknos[XLR_MAX_BLOCK_ID]; BlockNumber blknos[XLR_MAX_BLOCK_ID];
Page pageptrs[XLR_MAX_BLOCK_ID]; Page pageptrs[XLR_MAX_BLOCK_ID];
int nregistered = 0; int nregistered = 0;
XLogRecPtr result = 0;
for (int i = 0; i < nblocks; i++) for (int i = 0; i < nblocks; i++)
{ {
@@ -1777,7 +1765,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
/* /*
* neon_init() -- Initialize private state * neon_init() -- Initialize private state
*/ */
static void void
neon_init(void) neon_init(void)
{ {
Size prfs_size; Size prfs_size;
@@ -2167,7 +2155,7 @@ neon_prefetch_response_usable(neon_request_lsns *request_lsns,
/* /*
* neon_exists() -- Does the physical file exist? * neon_exists() -- Does the physical file exist?
*/ */
static bool bool
neon_exists(SMgrRelation reln, ForkNumber forkNum) neon_exists(SMgrRelation reln, ForkNumber forkNum)
{ {
bool exists; bool exists;
@@ -2273,7 +2261,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
* *
* If isRedo is true, it's okay for the relation to exist already. * If isRedo is true, it's okay for the relation to exist already.
*/ */
static void void
neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo) neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
{ {
switch (reln->smgr_relpersistence) switch (reln->smgr_relpersistence)
@@ -2349,7 +2337,7 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
* Note: any failure should be reported as WARNING not ERROR, because * Note: any failure should be reported as WARNING not ERROR, because
* we are usually not in a transaction anymore when this is called. * we are usually not in a transaction anymore when this is called.
*/ */
static void void
neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo) neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
{ {
/* /*
@@ -2373,7 +2361,7 @@ neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
* EOF). Note that we assume writing a block beyond current EOF * EOF). Note that we assume writing a block beyond current EOF
* causes intervening file space to become filled with zeroes. * causes intervening file space to become filled with zeroes.
*/ */
static void void
#if PG_MAJORVERSION_NUM < 16 #if PG_MAJORVERSION_NUM < 16
neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
char *buffer, bool skipFsync) char *buffer, bool skipFsync)
@@ -2465,7 +2453,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
} }
#if PG_MAJORVERSION_NUM >= 16 #if PG_MAJORVERSION_NUM >= 16
static void void
neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum, neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
int nblocks, bool skipFsync) int nblocks, bool skipFsync)
{ {
@@ -2561,7 +2549,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
/* /*
* neon_open() -- Initialize newly-opened relation. * neon_open() -- Initialize newly-opened relation.
*/ */
static void void
neon_open(SMgrRelation reln) neon_open(SMgrRelation reln)
{ {
/* /*
@@ -2579,7 +2567,7 @@ neon_open(SMgrRelation reln)
/* /*
* neon_close() -- Close the specified relation, if it isn't closed already. * neon_close() -- Close the specified relation, if it isn't closed already.
*/ */
static void void
neon_close(SMgrRelation reln, ForkNumber forknum) neon_close(SMgrRelation reln, ForkNumber forknum)
{ {
/* /*
@@ -2594,12 +2582,13 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
/* /*
* neon_prefetch() -- Initiate asynchronous read of the specified block of a relation * neon_prefetch() -- Initiate asynchronous read of the specified block of a relation
*/ */
static bool bool
neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
int nblocks) int nblocks)
{ {
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY; uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
BufferTag tag; BufferTag tag;
bool io_initiated = false;
switch (reln->smgr_relpersistence) switch (reln->smgr_relpersistence)
{ {
@@ -2623,6 +2612,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
while (nblocks > 0) while (nblocks > 0)
{ {
int iterblocks = Min(nblocks, PG_IOV_MAX); int iterblocks = Min(nblocks, PG_IOV_MAX);
int seqlen = 0;
bits8 lfc_present[PG_IOV_MAX / 8]; bits8 lfc_present[PG_IOV_MAX / 8];
memset(lfc_present, 0, sizeof(lfc_present)); memset(lfc_present, 0, sizeof(lfc_present));
@@ -2634,6 +2624,8 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
continue; continue;
} }
io_initiated = true;
tag.blockNum = blocknum; tag.blockNum = blocknum;
for (int i = 0; i < PG_IOV_MAX / 8; i++) for (int i = 0; i < PG_IOV_MAX / 8; i++)
@@ -2656,7 +2648,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
/* /*
* neon_prefetch() -- Initiate asynchronous read of the specified block of a relation * neon_prefetch() -- Initiate asynchronous read of the specified block of a relation
*/ */
static bool bool
neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
{ {
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY; uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
@@ -2700,7 +2692,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
* This accepts a range of blocks because flushing several pages at once is * This accepts a range of blocks because flushing several pages at once is
* considerably more efficient than doing so individually. * considerably more efficient than doing so individually.
*/ */
static void void
neon_writeback(SMgrRelation reln, ForkNumber forknum, neon_writeback(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, BlockNumber nblocks) BlockNumber blocknum, BlockNumber nblocks)
{ {
@@ -2750,19 +2742,14 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
uint64 ring_index; uint64 ring_index;
PrfHashEntry *entry; PrfHashEntry *entry;
PrefetchRequest *slot; PrefetchRequest *slot;
PrefetchRequest hashkey; BufferTag buftag = {0};
Assert(PointerIsValid(request_lsns)); Assert(PointerIsValid(request_lsns));
Assert(nblocks >= 1); Assert(nblocks >= 1);
/* CopyNRelFileInfoToBufTag(buftag, rinfo);
* Use an intermediate PrefetchRequest struct as the hash key to ensure buftag.forkNum = forkNum;
* correct alignment and that the padding bytes are cleared. buftag.blockNum = base_blockno;
*/
memset(&hashkey.buftag, 0, sizeof(BufferTag));
CopyNRelFileInfoToBufTag(hashkey.buftag, rinfo);
hashkey.buftag.forkNum = forkNum;
hashkey.buftag.blockNum = base_blockno;
/* /*
* The redo process does not lock pages that it needs to replay but are * The redo process does not lock pages that it needs to replay but are
@@ -2780,7 +2767,7 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
* weren't for the behaviour of the LwLsn cache that uses the highest * weren't for the behaviour of the LwLsn cache that uses the highest
* value of the LwLsn cache when the entry is not found. * value of the LwLsn cache when the entry is not found.
*/ */
prefetch_register_bufferv(hashkey.buftag, request_lsns, nblocks, mask, false); prefetch_register_bufferv(buftag, request_lsns, nblocks, mask, false);
for (int i = 0; i < nblocks; i++) for (int i = 0; i < nblocks; i++)
{ {
@@ -2801,8 +2788,8 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
* Try to find prefetched page in the list of received pages. * Try to find prefetched page in the list of received pages.
*/ */
Retry: Retry:
hashkey.buftag.blockNum = blockno; buftag.blockNum = blockno;
entry = prfh_lookup(MyPState->prf_hash, &hashkey); entry = prfh_lookup(MyPState->prf_hash, (PrefetchRequest *) &buftag);
if (entry != NULL) if (entry != NULL)
{ {
@@ -2810,6 +2797,7 @@ Retry:
if (neon_prefetch_response_usable(reqlsns, slot)) if (neon_prefetch_response_usable(reqlsns, slot))
{ {
ring_index = slot->my_ring_index; ring_index = slot->my_ring_index;
pgBufferUsage.prefetch.hits += 1;
} }
else else
{ {
@@ -2839,7 +2827,10 @@ Retry:
{ {
if (entry == NULL) if (entry == NULL)
{ {
ring_index = prefetch_register_bufferv(hashkey.buftag, reqlsns, 1, NULL, false); pgBufferUsage.prefetch.misses += 1;
MyNeonCounters->getpage_prefetch_misses_total++;
ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL, false);
Assert(ring_index != UINT64_MAX); Assert(ring_index != UINT64_MAX);
slot = GetPrfSlot(ring_index); slot = GetPrfSlot(ring_index);
} }
@@ -2864,8 +2855,8 @@ Retry:
} while (!prefetch_wait_for(ring_index)); } while (!prefetch_wait_for(ring_index));
Assert(slot->status == PRFS_RECEIVED); Assert(slot->status == PRFS_RECEIVED);
Assert(memcmp(&hashkey.buftag, &slot->buftag, sizeof(BufferTag)) == 0); Assert(memcmp(&buftag, &slot->buftag, sizeof(BufferTag)) == 0);
Assert(hashkey.buftag.blockNum == base_blockno + i); Assert(buftag.blockNum == base_blockno + i);
resp = slot->response; resp = slot->response;
@@ -2921,10 +2912,10 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
* neon_read() -- Read the specified block from a relation. * neon_read() -- Read the specified block from a relation.
*/ */
#if PG_MAJORVERSION_NUM < 16 #if PG_MAJORVERSION_NUM < 16
static void void
neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, char *buffer) neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, char *buffer)
#else #else
static void void
neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer) neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer)
#endif #endif
{ {
@@ -3033,7 +3024,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
#endif /* PG_MAJORVERSION_NUM <= 16 */ #endif /* PG_MAJORVERSION_NUM <= 16 */
#if PG_MAJORVERSION_NUM >= 17 #if PG_MAJORVERSION_NUM >= 17
static void void
neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
void **buffers, BlockNumber nblocks) void **buffers, BlockNumber nblocks)
{ {
@@ -3068,9 +3059,6 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
lfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers, lfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers,
nblocks, read); nblocks, read);
if (lfc_result > 0)
MyNeonCounters->file_cache_hits_total += lfc_result;
/* Read all blocks from LFC, so we're done */ /* Read all blocks from LFC, so we're done */
if (lfc_result == nblocks) if (lfc_result == nblocks)
return; return;
@@ -3197,7 +3185,6 @@ hexdump_page(char *page)
} }
#endif #endif
#if PG_MAJORVERSION_NUM < 17
/* /*
* neon_write() -- Write the supplied block at the appropriate location. * neon_write() -- Write the supplied block at the appropriate location.
* *
@@ -3205,7 +3192,7 @@ hexdump_page(char *page)
* relation (ie, those before the current EOF). To extend a relation, * relation (ie, those before the current EOF). To extend a relation,
* use mdextend(). * use mdextend().
*/ */
static void void
#if PG_MAJORVERSION_NUM < 16 #if PG_MAJORVERSION_NUM < 16
neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync) neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
#else #else
@@ -3271,12 +3258,11 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
#endif #endif
#endif #endif
} }
#endif
#if PG_MAJORVERSION_NUM >= 17 #if PG_MAJORVERSION_NUM >= 17
static void void
neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
const void **buffers, BlockNumber nblocks, bool skipFsync) const void **buffers, BlockNumber nblocks, bool skipFsync)
{ {
@@ -3326,7 +3312,7 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
/* /*
* neon_nblocks() -- Get the number of blocks stored in a relation. * neon_nblocks() -- Get the number of blocks stored in a relation.
*/ */
static BlockNumber BlockNumber
neon_nblocks(SMgrRelation reln, ForkNumber forknum) neon_nblocks(SMgrRelation reln, ForkNumber forknum)
{ {
NeonResponse *resp; NeonResponse *resp;
@@ -3463,7 +3449,7 @@ neon_dbsize(Oid dbNode)
/* /*
* neon_truncate() -- Truncate relation to specified number of blocks. * neon_truncate() -- Truncate relation to specified number of blocks.
*/ */
static void void
neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
{ {
XLogRecPtr lsn; XLogRecPtr lsn;
@@ -3532,7 +3518,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
* crash before the next checkpoint syncs the newly-inactive segment, that * crash before the next checkpoint syncs the newly-inactive segment, that
* segment may survive recovery, reintroducing unwanted data into the table. * segment may survive recovery, reintroducing unwanted data into the table.
*/ */
static void void
neon_immedsync(SMgrRelation reln, ForkNumber forknum) neon_immedsync(SMgrRelation reln, ForkNumber forknum)
{ {
switch (reln->smgr_relpersistence) switch (reln->smgr_relpersistence)
@@ -3562,8 +3548,8 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)
} }
#if PG_MAJORVERSION_NUM >= 17 #if PG_MAJORVERSION_NUM >= 17
static void void
neon_registersync(SMgrRelation reln, ForkNumber forknum) neon_regisersync(SMgrRelation reln, ForkNumber forknum)
{ {
switch (reln->smgr_relpersistence) switch (reln->smgr_relpersistence)
{ {
@@ -3747,8 +3733,6 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
SlruKind kind; SlruKind kind;
int n_blocks; int n_blocks;
shardno_t shard_no = 0; /* All SLRUs are at shard 0 */ shardno_t shard_no = 0; /* All SLRUs are at shard 0 */
NeonResponse *resp;
NeonGetSlruSegmentRequest request;
/* /*
* Compute a request LSN to use, similar to neon_get_request_lsns() but the * Compute a request LSN to use, similar to neon_get_request_lsns() but the
@@ -3787,7 +3771,8 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
else else
return -1; return -1;
request = (NeonGetSlruSegmentRequest) { NeonResponse *resp;
NeonGetSlruSegmentRequest request = {
.req.tag = T_NeonGetSlruSegmentRequest, .req.tag = T_NeonGetSlruSegmentRequest,
.req.lsn = request_lsn, .req.lsn = request_lsn,
.req.not_modified_since = not_modified_since, .req.not_modified_since = not_modified_since,
@@ -3894,7 +3879,7 @@ static const struct f_smgr neon_smgr =
.smgr_truncate = neon_truncate, .smgr_truncate = neon_truncate,
.smgr_immedsync = neon_immedsync, .smgr_immedsync = neon_immedsync,
#if PG_MAJORVERSION_NUM >= 17 #if PG_MAJORVERSION_NUM >= 17
.smgr_registersync = neon_registersync, .smgr_registersync = neon_regisersync,
#endif #endif
.smgr_start_unlogged_build = neon_start_unlogged_build, .smgr_start_unlogged_build = neon_start_unlogged_build,
.smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1, .smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1,

View File

@@ -252,6 +252,8 @@ WalProposerPoll(WalProposer *wp)
/* timeout expired: poll state */ /* timeout expired: poll state */
if (rc == 0 || TimeToReconnect(wp, now) <= 0) if (rc == 0 || TimeToReconnect(wp, now) <= 0)
{ {
TimestampTz now;
/* /*
* If no WAL was generated during timeout (and we have already * If no WAL was generated during timeout (and we have already
* collected the quorum), then send empty keepalive message * collected the quorum), then send empty keepalive message
@@ -267,7 +269,8 @@ WalProposerPoll(WalProposer *wp)
now = wp->api.get_current_timestamp(wp); now = wp->api.get_current_timestamp(wp);
for (int i = 0; i < wp->n_safekeepers; i++) for (int i = 0; i < wp->n_safekeepers; i++)
{ {
sk = &wp->safekeeper[i]; Safekeeper *sk = &wp->safekeeper[i];
if (TimestampDifferenceExceeds(sk->latestMsgReceivedAt, now, if (TimestampDifferenceExceeds(sk->latestMsgReceivedAt, now,
wp->config->safekeeper_connection_timeout)) wp->config->safekeeper_connection_timeout))
{ {
@@ -1077,7 +1080,7 @@ SendProposerElected(Safekeeper *sk)
ProposerElected msg; ProposerElected msg;
TermHistory *th; TermHistory *th;
term_t lastCommonTerm; term_t lastCommonTerm;
int idx; int i;
/* Now that we are ready to send it's a good moment to create WAL reader */ /* Now that we are ready to send it's a good moment to create WAL reader */
wp->api.wal_reader_allocate(sk); wp->api.wal_reader_allocate(sk);
@@ -1096,15 +1099,15 @@ SendProposerElected(Safekeeper *sk)
/* We must start somewhere. */ /* We must start somewhere. */
Assert(wp->propTermHistory.n_entries >= 1); Assert(wp->propTermHistory.n_entries >= 1);
for (idx = 0; idx < Min(wp->propTermHistory.n_entries, th->n_entries); idx++) for (i = 0; i < Min(wp->propTermHistory.n_entries, th->n_entries); i++)
{ {
if (wp->propTermHistory.entries[idx].term != th->entries[idx].term) if (wp->propTermHistory.entries[i].term != th->entries[i].term)
break; break;
/* term must begin everywhere at the same point */ /* term must begin everywhere at the same point */
Assert(wp->propTermHistory.entries[idx].lsn == th->entries[idx].lsn); Assert(wp->propTermHistory.entries[i].lsn == th->entries[i].lsn);
} }
idx--; /* step back to the last common term */ i--; /* step back to the last common term */
if (idx < 0) if (i < 0)
{ {
/* safekeeper is empty or no common point, start from the beginning */ /* safekeeper is empty or no common point, start from the beginning */
sk->startStreamingAt = wp->propTermHistory.entries[0].lsn; sk->startStreamingAt = wp->propTermHistory.entries[0].lsn;
@@ -1125,14 +1128,14 @@ SendProposerElected(Safekeeper *sk)
* proposer, LSN it is currently writing, but then we just pick * proposer, LSN it is currently writing, but then we just pick
* safekeeper pos as it obviously can't be higher. * safekeeper pos as it obviously can't be higher.
*/ */
if (wp->propTermHistory.entries[idx].term == wp->propTerm) if (wp->propTermHistory.entries[i].term == wp->propTerm)
{ {
sk->startStreamingAt = sk->voteResponse.flushLsn; sk->startStreamingAt = sk->voteResponse.flushLsn;
} }
else else
{ {
XLogRecPtr propEndLsn = wp->propTermHistory.entries[idx + 1].lsn; XLogRecPtr propEndLsn = wp->propTermHistory.entries[i + 1].lsn;
XLogRecPtr skEndLsn = (idx + 1 < th->n_entries ? th->entries[idx + 1].lsn : sk->voteResponse.flushLsn); XLogRecPtr skEndLsn = (i + 1 < th->n_entries ? th->entries[i + 1].lsn : sk->voteResponse.flushLsn);
sk->startStreamingAt = Min(propEndLsn, skEndLsn); sk->startStreamingAt = Min(propEndLsn, skEndLsn);
} }
@@ -1146,7 +1149,7 @@ SendProposerElected(Safekeeper *sk)
msg.termHistory = &wp->propTermHistory; msg.termHistory = &wp->propTermHistory;
msg.timelineStartLsn = wp->timelineStartLsn; msg.timelineStartLsn = wp->timelineStartLsn;
lastCommonTerm = idx >= 0 ? wp->propTermHistory.entries[idx].term : 0; lastCommonTerm = i >= 0 ? wp->propTermHistory.entries[i].term : 0;
wp_log(LOG, wp_log(LOG,
"sending elected msg to node " UINT64_FORMAT " term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s, timelineStartLsn=%X/%X", "sending elected msg to node " UINT64_FORMAT " term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s, timelineStartLsn=%X/%X",
sk->greetResponse.nodeId, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port, LSN_FORMAT_ARGS(msg.timelineStartLsn)); sk->greetResponse.nodeId, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port, LSN_FORMAT_ARGS(msg.timelineStartLsn));
@@ -1638,7 +1641,7 @@ UpdateDonorShmem(WalProposer *wp)
* Process AppendResponse message from safekeeper. * Process AppendResponse message from safekeeper.
*/ */
static void static void
HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk) HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk)
{ {
XLogRecPtr candidateTruncateLsn; XLogRecPtr candidateTruncateLsn;
XLogRecPtr newCommitLsn; XLogRecPtr newCommitLsn;
@@ -1657,7 +1660,7 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk)
* and WAL is committed by the quorum. BroadcastAppendRequest() should be * and WAL is committed by the quorum. BroadcastAppendRequest() should be
* called to notify safekeepers about the new commitLsn. * called to notify safekeepers about the new commitLsn.
*/ */
wp->api.process_safekeeper_feedback(wp, fromsk); wp->api.process_safekeeper_feedback(wp, sk);
/* /*
* Try to advance truncateLsn -- the last record flushed to all * Try to advance truncateLsn -- the last record flushed to all

View File

@@ -725,7 +725,7 @@ extern void WalProposerBroadcast(WalProposer *wp, XLogRecPtr startpos, XLogRecPt
extern void WalProposerPoll(WalProposer *wp); extern void WalProposerPoll(WalProposer *wp);
extern void WalProposerFree(WalProposer *wp); extern void WalProposerFree(WalProposer *wp);
extern WalproposerShmemState *GetWalpropShmemState(void); extern WalproposerShmemState *GetWalpropShmemState();
/* /*
* WaitEventSet API doesn't allow to remove socket, so walproposer_pg uses it to * WaitEventSet API doesn't allow to remove socket, so walproposer_pg uses it to
@@ -745,7 +745,7 @@ extern TimeLineID walprop_pg_get_timeline_id(void);
* catch logging. * catch logging.
*/ */
#ifdef WALPROPOSER_LIB #ifdef WALPROPOSER_LIB
extern void WalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...) pg_attribute_printf(3, 4); extern void WalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...);
#define wp_log(elevel, fmt, ...) WalProposerLibLog(wp, elevel, fmt, ## __VA_ARGS__) #define wp_log(elevel, fmt, ...) WalProposerLibLog(wp, elevel, fmt, ## __VA_ARGS__)
#else #else
#define wp_log(elevel, fmt, ...) elog(elevel, WP_LOG_PREFIX fmt, ## __VA_ARGS__) #define wp_log(elevel, fmt, ...) elog(elevel, WP_LOG_PREFIX fmt, ## __VA_ARGS__)

View File

@@ -286,9 +286,6 @@ safekeepers_cmp(char *old, char *new)
static void static void
assign_neon_safekeepers(const char *newval, void *extra) assign_neon_safekeepers(const char *newval, void *extra)
{ {
char *newval_copy;
char *oldval;
if (!am_walproposer) if (!am_walproposer)
return; return;
@@ -298,8 +295,8 @@ assign_neon_safekeepers(const char *newval, void *extra)
} }
/* Copy values because we will modify them in split_safekeepers_list() */ /* Copy values because we will modify them in split_safekeepers_list() */
newval_copy = pstrdup(newval); char *newval_copy = pstrdup(newval);
oldval = pstrdup(wal_acceptors_list); char *oldval = pstrdup(wal_acceptors_list);
/* /*
* TODO: restarting through FATAL is stupid and introduces 1s delay before * TODO: restarting through FATAL is stupid and introduces 1s delay before
@@ -541,7 +538,7 @@ nwp_shmem_startup_hook(void)
} }
WalproposerShmemState * WalproposerShmemState *
GetWalpropShmemState(void) GetWalpropShmemState()
{ {
Assert(walprop_shared != NULL); Assert(walprop_shared != NULL);
return walprop_shared; return walprop_shared;

View File

@@ -191,14 +191,13 @@ NeonOnDemandXLogReaderRoutines(XLogReaderRoutine *xlr)
if (!wal_reader) if (!wal_reader)
{ {
XLogRecPtr basebackupLsn = GetRedoStartLsn(); XLogRecPtr epochStartLsn = pg_atomic_read_u64(&GetWalpropShmemState()->propEpochStartLsn);
/* should never happen */ if (epochStartLsn == 0)
if (basebackupLsn == 0)
{ {
elog(ERROR, "unable to start walsender when basebackupLsn is 0"); elog(ERROR, "Unable to start walsender when propEpochStartLsn is 0!");
} }
wal_reader = NeonWALReaderAllocate(wal_segment_size, basebackupLsn, "[walsender] "); wal_reader = NeonWALReaderAllocate(wal_segment_size, epochStartLsn, "[walsender] ");
} }
xlr->page_read = NeonWALPageRead; xlr->page_read = NeonWALPageRead;
xlr->segment_open = NeonWALReadSegmentOpen; xlr->segment_open = NeonWALReadSegmentOpen;

View File

@@ -44,6 +44,27 @@ infobits_desc(StringInfo buf, uint8 infobits, const char *keyname)
appendStringInfoString(buf, "]"); appendStringInfoString(buf, "]");
} }
static void
truncate_flags_desc(StringInfo buf, uint8 flags)
{
appendStringInfoString(buf, "flags: [");
if (flags & XLH_TRUNCATE_CASCADE)
appendStringInfoString(buf, "CASCADE, ");
if (flags & XLH_TRUNCATE_RESTART_SEQS)
appendStringInfoString(buf, "RESTART_SEQS, ");
if (buf->data[buf->len - 1] == ' ')
{
/* Truncate-away final unneeded ", " */
Assert(buf->data[buf->len - 2] == ',');
buf->len -= 2;
buf->data[buf->len] = '\0';
}
appendStringInfoString(buf, "]");
}
void void
neon_rm_desc(StringInfo buf, XLogReaderState *record) neon_rm_desc(StringInfo buf, XLogReaderState *record)
{ {

View File

@@ -136,7 +136,7 @@ static bool redo_block_filter(XLogReaderState *record, uint8 block_id);
static void GetPage(StringInfo input_message); static void GetPage(StringInfo input_message);
static void Ping(StringInfo input_message); static void Ping(StringInfo input_message);
static ssize_t buffered_read(void *buf, size_t count); static ssize_t buffered_read(void *buf, size_t count);
static void CreateFakeSharedMemoryAndSemaphores(void); static void CreateFakeSharedMemoryAndSemaphores();
static BufferTag target_redo_tag; static BufferTag target_redo_tag;
@@ -170,9 +170,21 @@ close_range_syscall(unsigned int start_fd, unsigned int count, unsigned int flag
return syscall(__NR_close_range, start_fd, count, flags); return syscall(__NR_close_range, start_fd, count, flags);
} }
static void
static PgSeccompRule allowed_syscalls[] = enter_seccomp_mode(void)
{ {
/*
* The pageserver process relies on us to close all the file descriptors
* it potentially leaked to us, _before_ we start processing potentially dangerous
* wal records. See the comment in the Rust code that launches this process.
*/
if (close_range_syscall(3, ~0U, 0) != 0)
ereport(FATAL,
(errcode(ERRCODE_SYSTEM_ERROR),
errmsg("seccomp: could not close files >= fd 3")));
PgSeccompRule syscalls[] =
{
/* Hard requirements */ /* Hard requirements */
PG_SCMP_ALLOW(exit_group), PG_SCMP_ALLOW(exit_group),
PG_SCMP_ALLOW(pselect6), PG_SCMP_ALLOW(pselect6),
@@ -195,34 +207,20 @@ static PgSeccompRule allowed_syscalls[] =
*/ */
PG_SCMP_ALLOW(getpid), PG_SCMP_ALLOW(getpid),
/* Enable those for a proper shutdown. */ /* Enable those for a proper shutdown.
#if 0
PG_SCMP_ALLOW(munmap), PG_SCMP_ALLOW(munmap),
PG_SCMP_ALLOW(shmctl), PG_SCMP_ALLOW(shmctl),
PG_SCMP_ALLOW(shmdt), PG_SCMP_ALLOW(shmdt),
PG_SCMP_ALLOW(unlink), /* shm_unlink */ PG_SCMP_ALLOW(unlink), // shm_unlink
#endif
};
static void
enter_seccomp_mode(void)
{
/*
* The pageserver process relies on us to close all the file descriptors
* it potentially leaked to us, _before_ we start processing potentially dangerous
* wal records. See the comment in the Rust code that launches this process.
*/ */
if (close_range_syscall(3, ~0U, 0) != 0) };
ereport(FATAL,
(errcode(ERRCODE_SYSTEM_ERROR),
errmsg("seccomp: could not close files >= fd 3")));
#ifdef MALLOC_NO_MMAP #ifdef MALLOC_NO_MMAP
/* Ask glibc not to use mmap() */ /* Ask glibc not to use mmap() */
mallopt(M_MMAP_MAX, 0); mallopt(M_MMAP_MAX, 0);
#endif #endif
seccomp_load_rules(allowed_syscalls, lengthof(allowed_syscalls)); seccomp_load_rules(syscalls, lengthof(syscalls));
} }
#endif /* HAVE_LIBSECCOMP */ #endif /* HAVE_LIBSECCOMP */
@@ -451,7 +449,7 @@ WalRedoMain(int argc, char *argv[])
* half-initialized postgres. * half-initialized postgres.
*/ */
static void static void
CreateFakeSharedMemoryAndSemaphores(void) CreateFakeSharedMemoryAndSemaphores()
{ {
PGShmemHeader *shim = NULL; PGShmemHeader *shim = NULL;
PGShmemHeader *hdr; PGShmemHeader *hdr;
@@ -994,7 +992,7 @@ redo_block_filter(XLogReaderState *record, uint8 block_id)
* If this block isn't one we are currently restoring, then return 'true' * If this block isn't one we are currently restoring, then return 'true'
* so that this gets ignored * so that this gets ignored
*/ */
return !BufferTagsEqual(&target_tag, &target_redo_tag); return !BUFFERTAGS_EQUAL(target_tag, target_redo_tag);
} }
/* /*

View File

@@ -1,12 +1,11 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import argparse import argparse
import enum import enum
import os import os
import subprocess import subprocess
import sys import sys
from typing import List
@enum.unique @enum.unique
@@ -56,12 +55,12 @@ def mypy() -> str:
return "poetry run mypy" return "poetry run mypy"
def get_commit_files() -> list[str]: def get_commit_files() -> List[str]:
files = subprocess.check_output("git diff --cached --name-only --diff-filter=ACM".split()) files = subprocess.check_output("git diff --cached --name-only --diff-filter=ACM".split())
return files.decode().splitlines() return files.decode().splitlines()
def check(name: str, suffix: str, cmd: str, changed_files: list[str], no_color: bool = False): def check(name: str, suffix: str, cmd: str, changed_files: List[str], no_color: bool = False):
print(f"Checking: {name} ", end="") print(f"Checking: {name} ", end="")
applicable_files = list(filter(lambda fname: fname.strip().endswith(suffix), changed_files)) applicable_files = list(filter(lambda fname: fname.strip().endswith(suffix), changed_files))
if not applicable_files: if not applicable_files:

View File

@@ -39,7 +39,7 @@ http.workspace = true
humantime.workspace = true humantime.workspace = true
humantime-serde.workspace = true humantime-serde.workspace = true
hyper0.workspace = true hyper0.workspace = true
hyper = { workspace = true, features = ["server", "http1", "http2"] } hyper1 = { package = "hyper", version = "1.2", features = ["server"] }
hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] } hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] }
http-body-util = { version = "0.1" } http-body-util = { version = "0.1" }
indexmap.workspace = true indexmap.workspace = true

View File

@@ -18,7 +18,7 @@ pub(crate) use flow::*;
use tokio::time::error::Elapsed; use tokio::time::error::Elapsed;
use crate::{ use crate::{
control_plane, console,
error::{ReportableError, UserFacingError}, error::{ReportableError, UserFacingError},
}; };
use std::{io, net::IpAddr}; use std::{io, net::IpAddr};
@@ -34,7 +34,7 @@ pub(crate) enum AuthErrorImpl {
Web(#[from] backend::WebAuthError), Web(#[from] backend::WebAuthError),
#[error(transparent)] #[error(transparent)]
GetAuthInfo(#[from] control_plane::errors::GetAuthInfoError), GetAuthInfo(#[from] console::errors::GetAuthInfoError),
/// SASL protocol errors (includes [SCRAM](crate::scram)). /// SASL protocol errors (includes [SCRAM](crate::scram)).
#[error(transparent)] #[error(transparent)]

View File

@@ -1,27 +1,27 @@
mod classic; mod classic;
mod console_redirect;
mod hacks; mod hacks;
pub mod jwt; pub mod jwt;
pub mod local; pub mod local;
mod web;
use std::net::IpAddr; use std::net::IpAddr;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
pub(crate) use console_redirect::WebAuthError;
use ipnet::{Ipv4Net, Ipv6Net}; use ipnet::{Ipv4Net, Ipv6Net};
use local::LocalBackend; use local::LocalBackend;
use tokio::io::{AsyncRead, AsyncWrite}; use tokio::io::{AsyncRead, AsyncWrite};
use tokio_postgres::config::AuthKeys; use tokio_postgres::config::AuthKeys;
use tracing::{info, warn}; use tracing::{info, warn};
pub(crate) use web::WebAuthError;
use crate::auth::credentials::check_peer_addr_is_in_list; use crate::auth::credentials::check_peer_addr_is_in_list;
use crate::auth::{validate_password_and_exchange, AuthError}; use crate::auth::{validate_password_and_exchange, AuthError};
use crate::cache::Cached; use crate::cache::Cached;
use crate::console::errors::GetAuthInfoError;
use crate::console::provider::{CachedRoleSecret, ConsoleBackend};
use crate::console::{AuthSecret, NodeInfo};
use crate::context::RequestMonitoring; use crate::context::RequestMonitoring;
use crate::control_plane::errors::GetAuthInfoError;
use crate::control_plane::provider::{CachedRoleSecret, ControlPlaneBackend};
use crate::control_plane::{AuthSecret, NodeInfo};
use crate::intern::EndpointIdInt; use crate::intern::EndpointIdInt;
use crate::metrics::Metrics; use crate::metrics::Metrics;
use crate::proxy::connect_compute::ComputeConnectBackend; use crate::proxy::connect_compute::ComputeConnectBackend;
@@ -31,7 +31,7 @@ use crate::stream::Stream;
use crate::{ use crate::{
auth::{self, ComputeUserInfoMaybeEndpoint}, auth::{self, ComputeUserInfoMaybeEndpoint},
config::AuthenticationConfig, config::AuthenticationConfig,
control_plane::{ console::{
self, self,
provider::{CachedAllowedIps, CachedNodeInfo}, provider::{CachedAllowedIps, CachedNodeInfo},
Api, Api,
@@ -67,19 +67,19 @@ impl<T> std::ops::Deref for MaybeOwned<'_, T> {
/// backends which require them for the authentication process. /// backends which require them for the authentication process.
pub enum Backend<'a, T, D> { pub enum Backend<'a, T, D> {
/// Cloud API (V2). /// Cloud API (V2).
ControlPlane(MaybeOwned<'a, ControlPlaneBackend>, T), Console(MaybeOwned<'a, ConsoleBackend>, T),
/// Authentication via a web browser. /// Authentication via a web browser.
ConsoleRedirect(MaybeOwned<'a, url::ApiUrl>, D), Web(MaybeOwned<'a, url::ApiUrl>, D),
/// Local proxy uses configured auth credentials and does not wake compute /// Local proxy uses configured auth credentials and does not wake compute
Local(MaybeOwned<'a, LocalBackend>), Local(MaybeOwned<'a, LocalBackend>),
} }
#[cfg(test)] #[cfg(test)]
pub(crate) trait TestBackend: Send + Sync + 'static { pub(crate) trait TestBackend: Send + Sync + 'static {
fn wake_compute(&self) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError>; fn wake_compute(&self) -> Result<CachedNodeInfo, console::errors::WakeComputeError>;
fn get_allowed_ips_and_secret( fn get_allowed_ips_and_secret(
&self, &self,
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), control_plane::errors::GetAuthInfoError>; ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>;
fn dyn_clone(&self) -> Box<dyn TestBackend>; fn dyn_clone(&self) -> Box<dyn TestBackend>;
} }
@@ -93,23 +93,18 @@ impl Clone for Box<dyn TestBackend> {
impl std::fmt::Display for Backend<'_, (), ()> { impl std::fmt::Display for Backend<'_, (), ()> {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Self::ControlPlane(api, ()) => match &**api { Self::Console(api, ()) => match &**api {
ControlPlaneBackend::Management(endpoint) => fmt ConsoleBackend::Console(endpoint) => {
.debug_tuple("ControlPlane::Management") fmt.debug_tuple("Console").field(&endpoint.url()).finish()
.field(&endpoint.url()) }
.finish(),
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
ControlPlaneBackend::PostgresMock(endpoint) => fmt ConsoleBackend::Postgres(endpoint) => {
.debug_tuple("ControlPlane::PostgresMock") fmt.debug_tuple("Postgres").field(&endpoint.url()).finish()
.field(&endpoint.url()) }
.finish(),
#[cfg(test)] #[cfg(test)]
ControlPlaneBackend::Test(_) => fmt.debug_tuple("ControlPlane::Test").finish(), ConsoleBackend::Test(_) => fmt.debug_tuple("Test").finish(),
}, },
Self::ConsoleRedirect(url, ()) => fmt Self::Web(url, ()) => fmt.debug_tuple("Web").field(&url.as_str()).finish(),
.debug_tuple("ConsoleRedirect")
.field(&url.as_str())
.finish(),
Self::Local(_) => fmt.debug_tuple("Local").finish(), Self::Local(_) => fmt.debug_tuple("Local").finish(),
} }
} }
@@ -120,8 +115,8 @@ impl<T, D> Backend<'_, T, D> {
/// This helps us pass structured config to async tasks. /// This helps us pass structured config to async tasks.
pub(crate) fn as_ref(&self) -> Backend<'_, &T, &D> { pub(crate) fn as_ref(&self) -> Backend<'_, &T, &D> {
match self { match self {
Self::ControlPlane(c, x) => Backend::ControlPlane(MaybeOwned::Borrowed(c), x), Self::Console(c, x) => Backend::Console(MaybeOwned::Borrowed(c), x),
Self::ConsoleRedirect(c, x) => Backend::ConsoleRedirect(MaybeOwned::Borrowed(c), x), Self::Web(c, x) => Backend::Web(MaybeOwned::Borrowed(c), x),
Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)), Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)),
} }
} }
@@ -133,8 +128,8 @@ impl<'a, T, D> Backend<'a, T, D> {
/// a function to a contained value. /// a function to a contained value.
pub(crate) fn map<R>(self, f: impl FnOnce(T) -> R) -> Backend<'a, R, D> { pub(crate) fn map<R>(self, f: impl FnOnce(T) -> R) -> Backend<'a, R, D> {
match self { match self {
Self::ControlPlane(c, x) => Backend::ControlPlane(c, f(x)), Self::Console(c, x) => Backend::Console(c, f(x)),
Self::ConsoleRedirect(c, x) => Backend::ConsoleRedirect(c, x), Self::Web(c, x) => Backend::Web(c, x),
Self::Local(l) => Backend::Local(l), Self::Local(l) => Backend::Local(l),
} }
} }
@@ -144,8 +139,8 @@ impl<'a, T, D, E> Backend<'a, Result<T, E>, D> {
/// This is most useful for error handling. /// This is most useful for error handling.
pub(crate) fn transpose(self) -> Result<Backend<'a, T, D>, E> { pub(crate) fn transpose(self) -> Result<Backend<'a, T, D>, E> {
match self { match self {
Self::ControlPlane(c, x) => x.map(|x| Backend::ControlPlane(c, x)), Self::Console(c, x) => x.map(|x| Backend::Console(c, x)),
Self::ConsoleRedirect(c, x) => Ok(Backend::ConsoleRedirect(c, x)), Self::Web(c, x) => Ok(Backend::Web(c, x)),
Self::Local(l) => Ok(Backend::Local(l)), Self::Local(l) => Ok(Backend::Local(l)),
} }
} }
@@ -295,7 +290,7 @@ impl AuthenticationConfig {
/// All authentication flows will emit an AuthenticationOk message if successful. /// All authentication flows will emit an AuthenticationOk message if successful.
async fn auth_quirks( async fn auth_quirks(
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
api: &impl control_plane::Api, api: &impl console::Api,
user_info: ComputeUserInfoMaybeEndpoint, user_info: ComputeUserInfoMaybeEndpoint,
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>, client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
allow_cleartext: bool, allow_cleartext: bool,
@@ -417,8 +412,8 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
/// Get username from the credentials. /// Get username from the credentials.
pub(crate) fn get_user(&self) -> &str { pub(crate) fn get_user(&self) -> &str {
match self { match self {
Self::ControlPlane(_, user_info) => &user_info.user, Self::Console(_, user_info) => &user_info.user,
Self::ConsoleRedirect(_, ()) => "web", Self::Web(_, ()) => "web",
Self::Local(_) => "local", Self::Local(_) => "local",
} }
} }
@@ -434,7 +429,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
endpoint_rate_limiter: Arc<EndpointRateLimiter>, endpoint_rate_limiter: Arc<EndpointRateLimiter>,
) -> auth::Result<Backend<'a, ComputeCredentials, NodeInfo>> { ) -> auth::Result<Backend<'a, ComputeCredentials, NodeInfo>> {
let res = match self { let res = match self {
Self::ControlPlane(api, user_info) => { Self::Console(api, user_info) => {
info!( info!(
user = &*user_info.user, user = &*user_info.user,
project = user_info.endpoint(), project = user_info.endpoint(),
@@ -451,15 +446,15 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
endpoint_rate_limiter, endpoint_rate_limiter,
) )
.await?; .await?;
Backend::ControlPlane(api, credentials) Backend::Console(api, credentials)
} }
// NOTE: this auth backend doesn't use client credentials. // NOTE: this auth backend doesn't use client credentials.
Self::ConsoleRedirect(url, ()) => { Self::Web(url, ()) => {
info!("performing web authentication"); info!("performing web authentication");
let info = console_redirect::authenticate(ctx, config, &url, client).await?; let info = web::authenticate(ctx, config, &url, client).await?;
Backend::ConsoleRedirect(url, info) Backend::Web(url, info)
} }
Self::Local(_) => { Self::Local(_) => {
return Err(auth::AuthError::bad_auth_method("invalid for local proxy")) return Err(auth::AuthError::bad_auth_method("invalid for local proxy"))
@@ -477,8 +472,8 @@ impl Backend<'_, ComputeUserInfo, &()> {
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
) -> Result<CachedRoleSecret, GetAuthInfoError> { ) -> Result<CachedRoleSecret, GetAuthInfoError> {
match self { match self {
Self::ControlPlane(api, user_info) => api.get_role_secret(ctx, user_info).await, Self::Console(api, user_info) => api.get_role_secret(ctx, user_info).await,
Self::ConsoleRedirect(_, ()) => Ok(Cached::new_uncached(None)), Self::Web(_, ()) => Ok(Cached::new_uncached(None)),
Self::Local(_) => Ok(Cached::new_uncached(None)), Self::Local(_) => Ok(Cached::new_uncached(None)),
} }
} }
@@ -488,10 +483,8 @@ impl Backend<'_, ComputeUserInfo, &()> {
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> { ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
match self { match self {
Self::ControlPlane(api, user_info) => { Self::Console(api, user_info) => api.get_allowed_ips_and_secret(ctx, user_info).await,
api.get_allowed_ips_and_secret(ctx, user_info).await Self::Web(_, ()) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
}
Self::ConsoleRedirect(_, ()) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)), Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
} }
} }
@@ -502,18 +495,18 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, NodeInfo> {
async fn wake_compute( async fn wake_compute(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> { ) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
match self { match self {
Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await, Self::Console(api, creds) => api.wake_compute(ctx, &creds.info).await,
Self::ConsoleRedirect(_, info) => Ok(Cached::new_uncached(info.clone())), Self::Web(_, info) => Ok(Cached::new_uncached(info.clone())),
Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())), Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),
} }
} }
fn get_keys(&self) -> &ComputeCredentialKeys { fn get_keys(&self) -> &ComputeCredentialKeys {
match self { match self {
Self::ControlPlane(_, creds) => &creds.keys, Self::Console(_, creds) => &creds.keys,
Self::ConsoleRedirect(_, _) => &ComputeCredentialKeys::None, Self::Web(_, _) => &ComputeCredentialKeys::None,
Self::Local(_) => &ComputeCredentialKeys::None, Self::Local(_) => &ComputeCredentialKeys::None,
} }
} }
@@ -524,10 +517,10 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, &()> {
async fn wake_compute( async fn wake_compute(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> { ) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
match self { match self {
Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await, Self::Console(api, creds) => api.wake_compute(ctx, &creds.info).await,
Self::ConsoleRedirect(_, ()) => { Self::Web(_, ()) => {
unreachable!("web auth flow doesn't support waking the compute") unreachable!("web auth flow doesn't support waking the compute")
} }
Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())), Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),
@@ -536,8 +529,8 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, &()> {
fn get_keys(&self) -> &ComputeCredentialKeys { fn get_keys(&self) -> &ComputeCredentialKeys {
match self { match self {
Self::ControlPlane(_, creds) => &creds.keys, Self::Console(_, creds) => &creds.keys,
Self::ConsoleRedirect(_, ()) => &ComputeCredentialKeys::None, Self::Web(_, ()) => &ComputeCredentialKeys::None,
Self::Local(_) => &ComputeCredentialKeys::None, Self::Local(_) => &ComputeCredentialKeys::None,
} }
} }
@@ -560,12 +553,12 @@ mod tests {
use crate::{ use crate::{
auth::{backend::MaskedIp, ComputeUserInfoMaybeEndpoint, IpPattern}, auth::{backend::MaskedIp, ComputeUserInfoMaybeEndpoint, IpPattern},
config::AuthenticationConfig, config::AuthenticationConfig,
context::RequestMonitoring, console::{
control_plane::{
self, self,
provider::{self, CachedAllowedIps, CachedRoleSecret}, provider::{self, CachedAllowedIps, CachedRoleSecret},
CachedNodeInfo, CachedNodeInfo,
}, },
context::RequestMonitoring,
proxy::NeonOptions, proxy::NeonOptions,
rate_limiter::{EndpointRateLimiter, RateBucketInfo}, rate_limiter::{EndpointRateLimiter, RateBucketInfo},
scram::{threadpool::ThreadPool, ServerSecret}, scram::{threadpool::ThreadPool, ServerSecret},
@@ -579,12 +572,12 @@ mod tests {
secret: AuthSecret, secret: AuthSecret,
} }
impl control_plane::Api for Auth { impl console::Api for Auth {
async fn get_role_secret( async fn get_role_secret(
&self, &self,
_ctx: &RequestMonitoring, _ctx: &RequestMonitoring,
_user_info: &super::ComputeUserInfo, _user_info: &super::ComputeUserInfo,
) -> Result<CachedRoleSecret, control_plane::errors::GetAuthInfoError> { ) -> Result<CachedRoleSecret, console::errors::GetAuthInfoError> {
Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone()))) Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
} }
@@ -592,10 +585,8 @@ mod tests {
&self, &self,
_ctx: &RequestMonitoring, _ctx: &RequestMonitoring,
_user_info: &super::ComputeUserInfo, _user_info: &super::ComputeUserInfo,
) -> Result< ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>
(CachedAllowedIps, Option<CachedRoleSecret>), {
control_plane::errors::GetAuthInfoError,
> {
Ok(( Ok((
CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())), CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())),
Some(CachedRoleSecret::new_uncached(Some(self.secret.clone()))), Some(CachedRoleSecret::new_uncached(Some(self.secret.clone()))),
@@ -614,7 +605,7 @@ mod tests {
&self, &self,
_ctx: &RequestMonitoring, _ctx: &RequestMonitoring,
_user_info: &super::ComputeUserInfo, _user_info: &super::ComputeUserInfo,
) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> { ) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
unimplemented!() unimplemented!()
} }
} }

View File

@@ -3,8 +3,8 @@ use crate::{
auth::{self, backend::ComputeCredentialKeys, AuthFlow}, auth::{self, backend::ComputeCredentialKeys, AuthFlow},
compute, compute,
config::AuthenticationConfig, config::AuthenticationConfig,
console::AuthSecret,
context::RequestMonitoring, context::RequestMonitoring,
control_plane::AuthSecret,
sasl, sasl,
stream::{PqStream, Stream}, stream::{PqStream, Stream},
}; };

View File

@@ -2,8 +2,8 @@ use super::{ComputeCredentials, ComputeUserInfo, ComputeUserInfoNoEndpoint};
use crate::{ use crate::{
auth::{self, AuthFlow}, auth::{self, AuthFlow},
config::AuthenticationConfig, config::AuthenticationConfig,
console::AuthSecret,
context::RequestMonitoring, context::RequestMonitoring,
control_plane::AuthSecret,
intern::EndpointIdInt, intern::EndpointIdInt,
sasl, sasl,
stream::{self, Stream}, stream::{self, Stream},

View File

@@ -571,7 +571,7 @@ mod tests {
use bytes::Bytes; use bytes::Bytes;
use http::Response; use http::Response;
use http_body_util::Full; use http_body_util::Full;
use hyper::service::service_fn; use hyper1::service::service_fn;
use hyper_util::rt::TokioIo; use hyper_util::rt::TokioIo;
use rand::rngs::OsRng; use rand::rngs::OsRng;
use rsa::pkcs8::DecodePrivateKey; use rsa::pkcs8::DecodePrivateKey;
@@ -736,7 +736,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
}); });
let listener = TcpListener::bind("0.0.0.0:0").await.unwrap(); let listener = TcpListener::bind("0.0.0.0:0").await.unwrap();
let server = hyper::server::conn::http1::Builder::new(); let server = hyper1::server::conn::http1::Builder::new();
let addr = listener.local_addr().unwrap(); let addr = listener.local_addr().unwrap();
tokio::spawn(async move { tokio::spawn(async move {
loop { loop {

View File

@@ -5,11 +5,11 @@ use arc_swap::ArcSwapOption;
use crate::{ use crate::{
compute::ConnCfg, compute::ConnCfg,
context::RequestMonitoring, console::{
control_plane::{
messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo}, messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo},
NodeInfo, NodeInfo,
}, },
context::RequestMonitoring,
intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag}, intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag},
EndpointId, EndpointId,
}; };

View File

@@ -1,8 +1,8 @@
use crate::{ use crate::{
auth, compute, auth, compute,
config::AuthenticationConfig, config::AuthenticationConfig,
console::{self, provider::NodeInfo},
context::RequestMonitoring, context::RequestMonitoring,
control_plane::{self, provider::NodeInfo},
error::{ReportableError, UserFacingError}, error::{ReportableError, UserFacingError},
stream::PqStream, stream::PqStream,
waiters, waiters,
@@ -70,7 +70,7 @@ pub(super) async fn authenticate(
let (psql_session_id, waiter) = loop { let (psql_session_id, waiter) = loop {
let psql_session_id = new_psql_session_id(); let psql_session_id = new_psql_session_id();
match control_plane::mgmt::get_waiter(&psql_session_id) { match console::mgmt::get_waiter(&psql_session_id) {
Ok(waiter) => break (psql_session_id, waiter), Ok(waiter) => break (psql_session_id, waiter),
Err(_e) => continue, Err(_e) => continue,
} }

View File

@@ -3,8 +3,8 @@
use super::{backend::ComputeCredentialKeys, AuthErrorImpl, PasswordHackPayload}; use super::{backend::ComputeCredentialKeys, AuthErrorImpl, PasswordHackPayload};
use crate::{ use crate::{
config::TlsServerEndPoint, config::TlsServerEndPoint,
console::AuthSecret,
context::RequestMonitoring, context::RequestMonitoring,
control_plane::AuthSecret,
intern::EndpointIdInt, intern::EndpointIdInt,
sasl, sasl,
scram::{self, threadpool::ThreadPool}, scram::{self, threadpool::ThreadPool},

View File

@@ -12,7 +12,7 @@ use proxy::{
}, },
cancellation::CancellationHandlerMain, cancellation::CancellationHandlerMain,
config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig}, config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig},
control_plane::{ console::{
locks::ApiLocks, locks::ApiLocks,
messages::{EndpointJwksResponse, JwksSettings}, messages::{EndpointJwksResponse, JwksSettings},
}, },
@@ -77,10 +77,10 @@ struct LocalProxyCliArgs {
#[clap(long, default_value = "127.0.0.1:5432")] #[clap(long, default_value = "127.0.0.1:5432")]
compute: SocketAddr, compute: SocketAddr,
/// Path of the local proxy config file /// Path of the local proxy config file
#[clap(long, default_value = "./local_proxy.json")] #[clap(long, default_value = "./localproxy.json")]
config_path: Utf8PathBuf, config_path: Utf8PathBuf,
/// Path of the local proxy PID file /// Path of the local proxy PID file
#[clap(long, default_value = "./local_proxy.pid")] #[clap(long, default_value = "./localproxy.pid")]
pid_path: Utf8PathBuf, pid_path: Utf8PathBuf,
} }
@@ -305,7 +305,7 @@ async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> {
let mut jwks_set = vec![]; let mut jwks_set = vec![];
for jwks in data.jwks.into_iter().flatten() { for jwks in data.jwks {
let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?; let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?;
ensure!( ensure!(

View File

@@ -19,8 +19,8 @@ use proxy::config::CacheOptions;
use proxy::config::HttpConfig; use proxy::config::HttpConfig;
use proxy::config::ProjectInfoCacheOptions; use proxy::config::ProjectInfoCacheOptions;
use proxy::config::ProxyProtocolV2; use proxy::config::ProxyProtocolV2;
use proxy::console;
use proxy::context::parquet::ParquetUploadArgs; use proxy::context::parquet::ParquetUploadArgs;
use proxy::control_plane;
use proxy::http; use proxy::http;
use proxy::http::health_server::AppMetrics; use proxy::http::health_server::AppMetrics;
use proxy::metrics::Metrics; use proxy::metrics::Metrics;
@@ -495,7 +495,7 @@ async fn main() -> anyhow::Result<()> {
proxy: proxy::metrics::Metrics::get(), proxy: proxy::metrics::Metrics::get(),
}, },
)); ));
maintenance_tasks.spawn(control_plane::mgmt::task_main(mgmt_listener)); maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener));
if let Some(metrics_config) = &config.metric_collection { if let Some(metrics_config) = &config.metric_collection {
// TODO: Add gc regardles of the metric collection being enabled. // TODO: Add gc regardles of the metric collection being enabled.
@@ -506,8 +506,8 @@ async fn main() -> anyhow::Result<()> {
)); ));
} }
if let auth::Backend::ControlPlane(api, _) = &config.auth_backend { if let auth::Backend::Console(api, _) = &config.auth_backend {
if let proxy::control_plane::provider::ControlPlaneBackend::Management(api) = &**api { if let proxy::console::provider::ConsoleBackend::Console(api) = &**api {
match (redis_notifications_client, regional_redis_client.clone()) { match (redis_notifications_client, regional_redis_client.clone()) {
(None, None) => {} (None, None) => {}
(client1, client2) => { (client1, client2) => {
@@ -623,7 +623,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
"Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}" "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
); );
info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}"); info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new( let caches = Box::leak(Box::new(console::caches::ApiCaches::new(
wake_compute_cache_config, wake_compute_cache_config,
project_info_cache_config, project_info_cache_config,
endpoint_cache_config, endpoint_cache_config,
@@ -636,7 +636,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
timeout, timeout,
} = args.wake_compute_lock.parse()?; } = args.wake_compute_lock.parse()?;
info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)"); info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new( let locks = Box::leak(Box::new(console::locks::ApiLocks::new(
"wake_compute_lock", "wake_compute_lock",
limiter, limiter,
shards, shards,
@@ -653,27 +653,27 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
RateBucketInfo::validate(&mut wake_compute_rps_limit)?; RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
let wake_compute_endpoint_rate_limiter = let wake_compute_endpoint_rate_limiter =
Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit)); Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
let api = control_plane::provider::neon::Api::new( let api = console::provider::neon::Api::new(
endpoint, endpoint,
caches, caches,
locks, locks,
wake_compute_endpoint_rate_limiter, wake_compute_endpoint_rate_limiter,
); );
let api = control_plane::provider::ControlPlaneBackend::Management(api); let api = console::provider::ConsoleBackend::Console(api);
auth::Backend::ControlPlane(MaybeOwned::Owned(api), ()) auth::Backend::Console(MaybeOwned::Owned(api), ())
} }
AuthBackendType::Web => { AuthBackendType::Web => {
let url = args.uri.parse()?; let url = args.uri.parse()?;
auth::Backend::ConsoleRedirect(MaybeOwned::Owned(url), ()) auth::Backend::Web(MaybeOwned::Owned(url), ())
} }
#[cfg(feature = "testing")] #[cfg(feature = "testing")]
AuthBackendType::Postgres => { AuthBackendType::Postgres => {
let url = args.auth_endpoint.parse()?; let url = args.auth_endpoint.parse()?;
let api = control_plane::provider::mock::Api::new(url, !args.is_private_access_proxy); let api = console::provider::mock::Api::new(url, !args.is_private_access_proxy);
let api = control_plane::provider::ControlPlaneBackend::PostgresMock(api); let api = console::provider::ConsoleBackend::Postgres(api);
auth::Backend::ControlPlane(MaybeOwned::Owned(api), ()) auth::Backend::Console(MaybeOwned::Owned(api), ())
} }
}; };
@@ -689,7 +689,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
?epoch, ?epoch,
"Using NodeLocks (connect_compute)" "Using NodeLocks (connect_compute)"
); );
let connect_compute_locks = control_plane::locks::ApiLocks::new( let connect_compute_locks = console::locks::ApiLocks::new(
"connect_compute_lock", "connect_compute_lock",
limiter, limiter,
shards, shards,

View File

@@ -16,7 +16,7 @@ use tracing::{debug, info};
use crate::{ use crate::{
auth::IpPattern, auth::IpPattern,
config::ProjectInfoCacheOptions, config::ProjectInfoCacheOptions,
control_plane::AuthSecret, console::AuthSecret,
intern::{EndpointIdInt, ProjectIdInt, RoleNameInt}, intern::{EndpointIdInt, ProjectIdInt, RoleNameInt},
EndpointId, RoleName, EndpointId, RoleName,
}; };

View File

@@ -1,8 +1,8 @@
use crate::{ use crate::{
auth::parse_endpoint_param, auth::parse_endpoint_param,
cancellation::CancelClosure, cancellation::CancelClosure,
console::{errors::WakeComputeError, messages::MetricsAuxInfo, provider::ApiLockError},
context::RequestMonitoring, context::RequestMonitoring,
control_plane::{errors::WakeComputeError, messages::MetricsAuxInfo, provider::ApiLockError},
error::{ReportableError, UserFacingError}, error::{ReportableError, UserFacingError},
metrics::{Metrics, NumDbConnectionsGuard}, metrics::{Metrics, NumDbConnectionsGuard},
proxy::neon_option, proxy::neon_option,
@@ -20,7 +20,7 @@ use tokio_postgres::tls::MakeTlsConnect;
use tokio_postgres_rustls::MakeRustlsConnect; use tokio_postgres_rustls::MakeRustlsConnect;
use tracing::{error, info, warn}; use tracing::{error, info, warn};
pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node"; const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub(crate) enum ConnectionError { pub(crate) enum ConnectionError {

View File

@@ -3,7 +3,7 @@ use crate::{
self, self,
backend::{jwt::JwkCache, AuthRateLimiter}, backend::{jwt::JwkCache, AuthRateLimiter},
}, },
control_plane::locks::ApiLocks, console::locks::ApiLocks,
rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig}, rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig},
scram::threadpool::ThreadPool, scram::threadpool::ThreadPool,
serverless::{cancel_set::CancelSet, GlobalConnPoolOptions}, serverless::{cancel_set::CancelSet, GlobalConnPoolOptions},
@@ -372,7 +372,7 @@ pub struct EndpointCacheConfig {
} }
impl EndpointCacheConfig { impl EndpointCacheConfig {
/// Default options for [`crate::control_plane::provider::NodeInfoCache`]. /// Default options for [`crate::console::provider::NodeInfoCache`].
/// Notice that by default the limiter is empty, which means that cache is disabled. /// Notice that by default the limiter is empty, which means that cache is disabled.
pub const CACHE_DEFAULT_OPTIONS: &'static str = pub const CACHE_DEFAULT_OPTIONS: &'static str =
"initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s"; "initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s";
@@ -447,7 +447,7 @@ pub struct CacheOptions {
} }
impl CacheOptions { impl CacheOptions {
/// Default options for [`crate::control_plane::provider::NodeInfoCache`]. /// Default options for [`crate::console::provider::NodeInfoCache`].
pub const CACHE_DEFAULT_OPTIONS: &'static str = "size=4000,ttl=4m"; pub const CACHE_DEFAULT_OPTIONS: &'static str = "size=4000,ttl=4m";
/// Parse cache options passed via cmdline. /// Parse cache options passed via cmdline.
@@ -503,7 +503,7 @@ pub struct ProjectInfoCacheOptions {
} }
impl ProjectInfoCacheOptions { impl ProjectInfoCacheOptions {
/// Default options for [`crate::control_plane::provider::NodeInfoCache`]. /// Default options for [`crate::console::provider::NodeInfoCache`].
pub const CACHE_DEFAULT_OPTIONS: &'static str = pub const CACHE_DEFAULT_OPTIONS: &'static str =
"size=10000,ttl=4m,max_roles=10,gc_interval=60m"; "size=10000,ttl=4m,max_roles=10,gc_interval=60m";
@@ -622,9 +622,9 @@ pub struct ConcurrencyLockOptions {
} }
impl ConcurrencyLockOptions { impl ConcurrencyLockOptions {
/// Default options for [`crate::control_plane::provider::ApiLocks`]. /// Default options for [`crate::console::provider::ApiLocks`].
pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "permits=0"; pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "permits=0";
/// Default options for [`crate::control_plane::provider::ApiLocks`]. /// Default options for [`crate::console::provider::ApiLocks`].
pub const DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK: &'static str = pub const DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK: &'static str =
"shards=64,permits=100,epoch=10m,timeout=10ms"; "shards=64,permits=100,epoch=10m,timeout=10ms";

View File

@@ -10,14 +10,14 @@ use crate::proxy::retry::CouldRetry;
/// Generic error response with human-readable description. /// Generic error response with human-readable description.
/// Note that we can't always present it to user as is. /// Note that we can't always present it to user as is.
#[derive(Debug, Deserialize, Clone)] #[derive(Debug, Deserialize, Clone)]
pub(crate) struct ControlPlaneError { pub(crate) struct ConsoleError {
pub(crate) error: Box<str>, pub(crate) error: Box<str>,
#[serde(skip)] #[serde(skip)]
pub(crate) http_status_code: http::StatusCode, pub(crate) http_status_code: http::StatusCode,
pub(crate) status: Option<Status>, pub(crate) status: Option<Status>,
} }
impl ControlPlaneError { impl ConsoleError {
pub(crate) fn get_reason(&self) -> Reason { pub(crate) fn get_reason(&self) -> Reason {
self.status self.status
.as_ref() .as_ref()
@@ -51,7 +51,7 @@ impl ControlPlaneError {
} }
} }
impl Display for ControlPlaneError { impl Display for ConsoleError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let msg: &str = self let msg: &str = self
.status .status
@@ -62,7 +62,7 @@ impl Display for ControlPlaneError {
} }
} }
impl CouldRetry for ControlPlaneError { impl CouldRetry for ConsoleError {
fn could_retry(&self) -> bool { fn could_retry(&self) -> bool {
// If the error message does not have a status, // If the error message does not have a status,
// the error is unknown and probably should not retry automatically // the error is unknown and probably should not retry automatically

View File

@@ -1,5 +1,5 @@
use crate::{ use crate::{
control_plane::messages::{DatabaseInfo, KickSession}, console::messages::{DatabaseInfo, KickSession},
waiters::{self, Waiter, Waiters}, waiters::{self, Waiter, Waiters},
}; };
use anyhow::Context; use anyhow::Context;

View File

@@ -2,7 +2,7 @@
pub mod mock; pub mod mock;
pub mod neon; pub mod neon;
use super::messages::{ControlPlaneError, MetricsAuxInfo}; use super::messages::{ConsoleError, MetricsAuxInfo};
use crate::{ use crate::{
auth::{ auth::{
backend::{ backend::{
@@ -28,7 +28,7 @@ use tracing::info;
pub(crate) mod errors { pub(crate) mod errors {
use crate::{ use crate::{
control_plane::messages::{self, ControlPlaneError, Reason}, console::messages::{self, ConsoleError, Reason},
error::{io_error, ErrorKind, ReportableError, UserFacingError}, error::{io_error, ErrorKind, ReportableError, UserFacingError},
proxy::retry::CouldRetry, proxy::retry::CouldRetry,
}; };
@@ -44,7 +44,7 @@ pub(crate) mod errors {
pub(crate) enum ApiError { pub(crate) enum ApiError {
/// Error returned by the console itself. /// Error returned by the console itself.
#[error("{REQUEST_FAILED} with {0}")] #[error("{REQUEST_FAILED} with {0}")]
ControlPlane(ControlPlaneError), Console(ConsoleError),
/// Various IO errors like broken pipe or malformed payload. /// Various IO errors like broken pipe or malformed payload.
#[error("{REQUEST_FAILED}: {0}")] #[error("{REQUEST_FAILED}: {0}")]
@@ -55,7 +55,7 @@ pub(crate) mod errors {
/// Returns HTTP status code if it's the reason for failure. /// Returns HTTP status code if it's the reason for failure.
pub(crate) fn get_reason(&self) -> messages::Reason { pub(crate) fn get_reason(&self) -> messages::Reason {
match self { match self {
ApiError::ControlPlane(e) => e.get_reason(), ApiError::Console(e) => e.get_reason(),
ApiError::Transport(_) => messages::Reason::Unknown, ApiError::Transport(_) => messages::Reason::Unknown,
} }
} }
@@ -65,7 +65,7 @@ pub(crate) mod errors {
fn to_string_client(&self) -> String { fn to_string_client(&self) -> String {
match self { match self {
// To minimize risks, only select errors are forwarded to users. // To minimize risks, only select errors are forwarded to users.
ApiError::ControlPlane(c) => c.get_user_facing_message(), ApiError::Console(c) => c.get_user_facing_message(),
ApiError::Transport(_) => REQUEST_FAILED.to_owned(), ApiError::Transport(_) => REQUEST_FAILED.to_owned(),
} }
} }
@@ -74,7 +74,7 @@ pub(crate) mod errors {
impl ReportableError for ApiError { impl ReportableError for ApiError {
fn get_error_kind(&self) -> crate::error::ErrorKind { fn get_error_kind(&self) -> crate::error::ErrorKind {
match self { match self {
ApiError::ControlPlane(e) => match e.get_reason() { ApiError::Console(e) => match e.get_reason() {
Reason::RoleProtected => ErrorKind::User, Reason::RoleProtected => ErrorKind::User,
Reason::ResourceNotFound => ErrorKind::User, Reason::ResourceNotFound => ErrorKind::User,
Reason::ProjectNotFound => ErrorKind::User, Reason::ProjectNotFound => ErrorKind::User,
@@ -91,12 +91,12 @@ pub(crate) mod errors {
Reason::LockAlreadyTaken => ErrorKind::ControlPlane, Reason::LockAlreadyTaken => ErrorKind::ControlPlane,
Reason::RunningOperations => ErrorKind::ControlPlane, Reason::RunningOperations => ErrorKind::ControlPlane,
Reason::Unknown => match &e { Reason::Unknown => match &e {
ControlPlaneError { ConsoleError {
http_status_code: http_status_code:
http::StatusCode::NOT_FOUND | http::StatusCode::NOT_ACCEPTABLE, http::StatusCode::NOT_FOUND | http::StatusCode::NOT_ACCEPTABLE,
.. ..
} => crate::error::ErrorKind::User, } => crate::error::ErrorKind::User,
ControlPlaneError { ConsoleError {
http_status_code: http::StatusCode::UNPROCESSABLE_ENTITY, http_status_code: http::StatusCode::UNPROCESSABLE_ENTITY,
error, error,
.. ..
@@ -105,7 +105,7 @@ pub(crate) mod errors {
{ {
crate::error::ErrorKind::User crate::error::ErrorKind::User
} }
ControlPlaneError { ConsoleError {
http_status_code: http::StatusCode::LOCKED, http_status_code: http::StatusCode::LOCKED,
error, error,
.. ..
@@ -114,11 +114,11 @@ pub(crate) mod errors {
{ {
crate::error::ErrorKind::User crate::error::ErrorKind::User
} }
ControlPlaneError { ConsoleError {
http_status_code: http::StatusCode::TOO_MANY_REQUESTS, http_status_code: http::StatusCode::TOO_MANY_REQUESTS,
.. ..
} => crate::error::ErrorKind::ServiceRateLimit, } => crate::error::ErrorKind::ServiceRateLimit,
ControlPlaneError { .. } => crate::error::ErrorKind::ControlPlane, ConsoleError { .. } => crate::error::ErrorKind::ControlPlane,
}, },
}, },
ApiError::Transport(_) => crate::error::ErrorKind::ControlPlane, ApiError::Transport(_) => crate::error::ErrorKind::ControlPlane,
@@ -131,7 +131,7 @@ pub(crate) mod errors {
match self { match self {
// retry some transport errors // retry some transport errors
Self::Transport(io) => io.could_retry(), Self::Transport(io) => io.could_retry(),
Self::ControlPlane(e) => e.could_retry(), Self::Console(e) => e.could_retry(),
} }
} }
} }
@@ -314,8 +314,7 @@ impl NodeInfo {
} }
} }
pub(crate) type NodeInfoCache = pub(crate) type NodeInfoCache = TimedLru<EndpointCacheKey, Result<NodeInfo, Box<ConsoleError>>>;
TimedLru<EndpointCacheKey, Result<NodeInfo, Box<ControlPlaneError>>>;
pub(crate) type CachedNodeInfo = Cached<&'static NodeInfoCache, NodeInfo>; pub(crate) type CachedNodeInfo = Cached<&'static NodeInfoCache, NodeInfo>;
pub(crate) type CachedRoleSecret = Cached<&'static ProjectInfoCacheImpl, Option<AuthSecret>>; pub(crate) type CachedRoleSecret = Cached<&'static ProjectInfoCacheImpl, Option<AuthSecret>>;
pub(crate) type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<IpPattern>>>; pub(crate) type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<IpPattern>>>;
@@ -354,28 +353,28 @@ pub(crate) trait Api {
#[non_exhaustive] #[non_exhaustive]
#[derive(Clone)] #[derive(Clone)]
pub enum ControlPlaneBackend { pub enum ConsoleBackend {
/// Current Management API (V2). /// Current Cloud API (V2).
Management(neon::Api), Console(neon::Api),
/// Local mock control plane. /// Local mock of Cloud API (V2).
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
PostgresMock(mock::Api), Postgres(mock::Api),
/// Internal testing /// Internal testing
#[cfg(test)] #[cfg(test)]
#[allow(private_interfaces)] #[allow(private_interfaces)]
Test(Box<dyn crate::auth::backend::TestBackend>), Test(Box<dyn crate::auth::backend::TestBackend>),
} }
impl Api for ControlPlaneBackend { impl Api for ConsoleBackend {
async fn get_role_secret( async fn get_role_secret(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
user_info: &ComputeUserInfo, user_info: &ComputeUserInfo,
) -> Result<CachedRoleSecret, errors::GetAuthInfoError> { ) -> Result<CachedRoleSecret, errors::GetAuthInfoError> {
match self { match self {
Self::Management(api) => api.get_role_secret(ctx, user_info).await, Self::Console(api) => api.get_role_secret(ctx, user_info).await,
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
Self::PostgresMock(api) => api.get_role_secret(ctx, user_info).await, Self::Postgres(api) => api.get_role_secret(ctx, user_info).await,
#[cfg(test)] #[cfg(test)]
Self::Test(_) => { Self::Test(_) => {
unreachable!("this function should never be called in the test backend") unreachable!("this function should never be called in the test backend")
@@ -389,9 +388,9 @@ impl Api for ControlPlaneBackend {
user_info: &ComputeUserInfo, user_info: &ComputeUserInfo,
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError> { ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError> {
match self { match self {
Self::Management(api) => api.get_allowed_ips_and_secret(ctx, user_info).await, Self::Console(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
Self::PostgresMock(api) => api.get_allowed_ips_and_secret(ctx, user_info).await, Self::Postgres(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
#[cfg(test)] #[cfg(test)]
Self::Test(api) => api.get_allowed_ips_and_secret(), Self::Test(api) => api.get_allowed_ips_and_secret(),
} }
@@ -403,9 +402,9 @@ impl Api for ControlPlaneBackend {
endpoint: EndpointId, endpoint: EndpointId,
) -> anyhow::Result<Vec<AuthRule>> { ) -> anyhow::Result<Vec<AuthRule>> {
match self { match self {
Self::Management(api) => api.get_endpoint_jwks(ctx, endpoint).await, Self::Console(api) => api.get_endpoint_jwks(ctx, endpoint).await,
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
Self::PostgresMock(api) => api.get_endpoint_jwks(ctx, endpoint).await, Self::Postgres(api) => api.get_endpoint_jwks(ctx, endpoint).await,
#[cfg(test)] #[cfg(test)]
Self::Test(_api) => Ok(vec![]), Self::Test(_api) => Ok(vec![]),
} }
@@ -417,16 +416,16 @@ impl Api for ControlPlaneBackend {
user_info: &ComputeUserInfo, user_info: &ComputeUserInfo,
) -> Result<CachedNodeInfo, errors::WakeComputeError> { ) -> Result<CachedNodeInfo, errors::WakeComputeError> {
match self { match self {
Self::Management(api) => api.wake_compute(ctx, user_info).await, Self::Console(api) => api.wake_compute(ctx, user_info).await,
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
Self::PostgresMock(api) => api.wake_compute(ctx, user_info).await, Self::Postgres(api) => api.wake_compute(ctx, user_info).await,
#[cfg(test)] #[cfg(test)]
Self::Test(api) => api.wake_compute(), Self::Test(api) => api.wake_compute(),
} }
} }
} }
/// Various caches for [`control_plane`](super). /// Various caches for [`console`](super).
pub struct ApiCaches { pub struct ApiCaches {
/// Cache for the `wake_compute` API method. /// Cache for the `wake_compute` API method.
pub(crate) node_info: NodeInfoCache, pub(crate) node_info: NodeInfoCache,
@@ -455,7 +454,7 @@ impl ApiCaches {
} }
} }
/// Various caches for [`control_plane`](super). /// Various caches for [`console`](super).
pub struct ApiLocks<K> { pub struct ApiLocks<K> {
name: &'static str, name: &'static str,
node_locks: DashMap<K, Arc<DynamicLimiter>>, node_locks: DashMap<K, Arc<DynamicLimiter>>,
@@ -578,7 +577,7 @@ impl WakeComputePermit {
} }
} }
impl FetchAuthRules for ControlPlaneBackend { impl FetchAuthRules for ConsoleBackend {
async fn fetch_auth_rules( async fn fetch_auth_rules(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,

View File

@@ -10,7 +10,7 @@ use crate::{
use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl}; use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
use crate::{auth::IpPattern, cache::Cached}; use crate::{auth::IpPattern, cache::Cached};
use crate::{ use crate::{
control_plane::{ console::{
messages::MetricsAuxInfo, messages::MetricsAuxInfo,
provider::{CachedAllowedIps, CachedRoleSecret}, provider::{CachedAllowedIps, CachedRoleSecret},
}, },
@@ -166,7 +166,7 @@ impl Api {
endpoint_id: (&EndpointId::from("endpoint")).into(), endpoint_id: (&EndpointId::from("endpoint")).into(),
project_id: (&ProjectId::from("project")).into(), project_id: (&ProjectId::from("project")).into(),
branch_id: (&BranchId::from("branch")).into(), branch_id: (&BranchId::from("branch")).into(),
cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm, cold_start_info: crate::console::messages::ColdStartInfo::Warm,
}, },
allow_self_signed_compute: false, allow_self_signed_compute: false,
}; };

View File

@@ -1,7 +1,7 @@
//! Production console backend. //! Production console backend.
use super::{ use super::{
super::messages::{ControlPlaneError, GetRoleSecret, WakeCompute}, super::messages::{ConsoleError, GetRoleSecret, WakeCompute},
errors::{ApiError, GetAuthInfoError, WakeComputeError}, errors::{ApiError, GetAuthInfoError, WakeComputeError},
ApiCaches, ApiLocks, AuthInfo, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ApiCaches, ApiLocks, AuthInfo, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret,
NodeInfo, NodeInfo,
@@ -9,7 +9,7 @@ use super::{
use crate::{ use crate::{
auth::backend::{jwt::AuthRule, ComputeUserInfo}, auth::backend::{jwt::AuthRule, ComputeUserInfo},
compute, compute,
control_plane::messages::{ColdStartInfo, EndpointJwksResponse, Reason}, console::messages::{ColdStartInfo, EndpointJwksResponse, Reason},
http, http,
metrics::{CacheOutcome, Metrics}, metrics::{CacheOutcome, Metrics},
rate_limiter::WakeComputeRateLimiter, rate_limiter::WakeComputeRateLimiter,
@@ -348,7 +348,7 @@ impl super::Api for Api {
let (cached, info) = cached.take_value(); let (cached, info) = cached.take_value();
let info = info.map_err(|c| { let info = info.map_err(|c| {
info!(key = &*key, "found cached wake_compute error"); info!(key = &*key, "found cached wake_compute error");
WakeComputeError::ApiError(ApiError::ControlPlane(*c)) WakeComputeError::ApiError(ApiError::Console(*c))
})?; })?;
debug!(key = &*key, "found cached compute node info"); debug!(key = &*key, "found cached compute node info");
@@ -395,9 +395,9 @@ impl super::Api for Api {
Ok(cached.map(|()| node)) Ok(cached.map(|()| node))
} }
Err(err) => match err { Err(err) => match err {
WakeComputeError::ApiError(ApiError::ControlPlane(err)) => { WakeComputeError::ApiError(ApiError::Console(err)) => {
let Some(status) = &err.status else { let Some(status) = &err.status else {
return Err(WakeComputeError::ApiError(ApiError::ControlPlane(err))); return Err(WakeComputeError::ApiError(ApiError::Console(err)));
}; };
let reason = status let reason = status
@@ -407,7 +407,7 @@ impl super::Api for Api {
// if we can retry this error, do not cache it. // if we can retry this error, do not cache it.
if reason.can_retry() { if reason.can_retry() {
return Err(WakeComputeError::ApiError(ApiError::ControlPlane(err))); return Err(WakeComputeError::ApiError(ApiError::Console(err)));
} }
// at this point, we should only have quota errors. // at this point, we should only have quota errors.
@@ -422,7 +422,7 @@ impl super::Api for Api {
Duration::from_secs(30), Duration::from_secs(30),
); );
Err(WakeComputeError::ApiError(ApiError::ControlPlane(err))) Err(WakeComputeError::ApiError(ApiError::Console(err)))
} }
err => return Err(err), err => return Err(err),
}, },
@@ -448,7 +448,7 @@ async fn parse_body<T: for<'a> serde::Deserialize<'a>>(
// as the fact that the request itself has failed. // as the fact that the request itself has failed.
let mut body = serde_json::from_slice(&s).unwrap_or_else(|e| { let mut body = serde_json::from_slice(&s).unwrap_or_else(|e| {
warn!("failed to parse error body: {e}"); warn!("failed to parse error body: {e}");
ControlPlaneError { ConsoleError {
error: "reason unclear (malformed error message)".into(), error: "reason unclear (malformed error message)".into(),
http_status_code: status, http_status_code: status,
status: None, status: None,
@@ -457,7 +457,7 @@ async fn parse_body<T: for<'a> serde::Deserialize<'a>>(
body.http_status_code = status; body.http_status_code = status;
error!("console responded with an error ({status}): {body:?}"); error!("console responded with an error ({status}): {body:?}");
Err(ApiError::ControlPlane(body)) Err(ApiError::Console(body))
} }
fn parse_host_port(input: &str) -> Option<(&str, u16)> { fn parse_host_port(input: &str) -> Option<(&str, u16)> {

View File

@@ -11,7 +11,7 @@ use try_lock::TryLock;
use uuid::Uuid; use uuid::Uuid;
use crate::{ use crate::{
control_plane::messages::{ColdStartInfo, MetricsAuxInfo}, console::messages::{ColdStartInfo, MetricsAuxInfo},
error::ErrorKind, error::ErrorKind,
intern::{BranchIdInt, ProjectIdInt}, intern::{BranchIdInt, ProjectIdInt},
metrics::{ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting}, metrics::{ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting},

View File

@@ -9,7 +9,7 @@ use std::time::Duration;
use anyhow::bail; use anyhow::bail;
use bytes::Bytes; use bytes::Bytes;
use http_body_util::BodyExt; use http_body_util::BodyExt;
use hyper::body::Body; use hyper1::body::Body;
use serde::de::DeserializeOwned; use serde::de::DeserializeOwned;
pub(crate) use reqwest::{Request, Response}; pub(crate) use reqwest::{Request, Response};

View File

@@ -1,5 +1,5 @@
use anyhow::{anyhow, bail}; use anyhow::{anyhow, bail};
use hyper0::{header::CONTENT_TYPE, Body, Request, Response, StatusCode}; use hyper::{header::CONTENT_TYPE, Body, Request, Response, StatusCode};
use measured::{text::BufferedTextEncoder, MetricGroup}; use measured::{text::BufferedTextEncoder, MetricGroup};
use metrics::NeonMetrics; use metrics::NeonMetrics;
use std::{ use std::{
@@ -21,7 +21,7 @@ async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
json_response(StatusCode::OK, "") json_response(StatusCode::OK, "")
} }
fn make_router(metrics: AppMetrics) -> RouterBuilder<hyper0::Body, ApiError> { fn make_router(metrics: AppMetrics) -> RouterBuilder<hyper::Body, ApiError> {
let state = Arc::new(Mutex::new(PrometheusHandler { let state = Arc::new(Mutex::new(PrometheusHandler {
encoder: BufferedTextEncoder::new(), encoder: BufferedTextEncoder::new(),
metrics, metrics,
@@ -45,7 +45,7 @@ pub async fn task_main(
let service = || RouterService::new(make_router(metrics).build()?); let service = || RouterService::new(make_router(metrics).build()?);
hyper0::Server::from_tcp(http_listener)? hyper::Server::from_tcp(http_listener)?
.serve(service().map_err(|e| anyhow!(e))?) .serve(service().map_err(|e| anyhow!(e))?)
.await?; .await?;

View File

@@ -90,13 +90,15 @@ use tokio::task::JoinError;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::warn; use tracing::warn;
extern crate hyper0 as hyper;
pub mod auth; pub mod auth;
pub mod cache; pub mod cache;
pub mod cancellation; pub mod cancellation;
pub mod compute; pub mod compute;
pub mod config; pub mod config;
pub mod console;
pub mod context; pub mod context;
pub mod control_plane;
pub mod error; pub mod error;
pub mod http; pub mod http;
pub mod intern; pub mod intern;

View File

@@ -11,7 +11,7 @@ use metrics::{CounterPairAssoc, CounterPairVec, HyperLogLog, HyperLogLogVec};
use tokio::time::{self, Instant}; use tokio::time::{self, Instant};
use crate::control_plane::messages::ColdStartInfo; use crate::console::messages::ColdStartInfo;
#[derive(MetricGroup)] #[derive(MetricGroup)]
#[metric(new(thread_pool: Arc<ThreadPoolMetrics>))] #[metric(new(thread_pool: Arc<ThreadPoolMetrics>))]

View File

@@ -1,10 +1,9 @@
use crate::{ use crate::{
auth::backend::ComputeCredentialKeys, auth::backend::ComputeCredentialKeys,
compute::COULD_NOT_CONNECT,
compute::{self, PostgresConnection}, compute::{self, PostgresConnection},
config::RetryConfig, config::RetryConfig,
console::{self, errors::WakeComputeError, locks::ApiLocks, CachedNodeInfo, NodeInfo},
context::RequestMonitoring, context::RequestMonitoring,
control_plane::{self, errors::WakeComputeError, locks::ApiLocks, CachedNodeInfo, NodeInfo},
error::ReportableError, error::ReportableError,
metrics::{ConnectOutcome, ConnectionFailureKind, Metrics, RetriesMetricGroup, RetryType}, metrics::{ConnectOutcome, ConnectionFailureKind, Metrics, RetriesMetricGroup, RetryType},
proxy::{ proxy::{
@@ -16,7 +15,7 @@ use crate::{
use async_trait::async_trait; use async_trait::async_trait;
use pq_proto::StartupMessageParams; use pq_proto::StartupMessageParams;
use tokio::time; use tokio::time;
use tracing::{debug, info, warn}; use tracing::{error, info, warn};
use super::retry::ShouldRetryWakeCompute; use super::retry::ShouldRetryWakeCompute;
@@ -26,7 +25,7 @@ const CONNECT_TIMEOUT: time::Duration = time::Duration::from_secs(2);
/// (e.g. the compute node's address might've changed at the wrong time). /// (e.g. the compute node's address might've changed at the wrong time).
/// Invalidate the cache entry (if any) to prevent subsequent errors. /// Invalidate the cache entry (if any) to prevent subsequent errors.
#[tracing::instrument(name = "invalidate_cache", skip_all)] #[tracing::instrument(name = "invalidate_cache", skip_all)]
pub(crate) fn invalidate_cache(node_info: control_plane::CachedNodeInfo) -> NodeInfo { pub(crate) fn invalidate_cache(node_info: console::CachedNodeInfo) -> NodeInfo {
let is_cached = node_info.cached(); let is_cached = node_info.cached();
if is_cached { if is_cached {
warn!("invalidating stalled compute node info cache entry"); warn!("invalidating stalled compute node info cache entry");
@@ -49,7 +48,7 @@ pub(crate) trait ConnectMechanism {
async fn connect_once( async fn connect_once(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
node_info: &control_plane::CachedNodeInfo, node_info: &console::CachedNodeInfo,
timeout: time::Duration, timeout: time::Duration,
) -> Result<Self::Connection, Self::ConnectError>; ) -> Result<Self::Connection, Self::ConnectError>;
@@ -61,7 +60,7 @@ pub(crate) trait ComputeConnectBackend {
async fn wake_compute( async fn wake_compute(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError>; ) -> Result<CachedNodeInfo, console::errors::WakeComputeError>;
fn get_keys(&self) -> &ComputeCredentialKeys; fn get_keys(&self) -> &ComputeCredentialKeys;
} }
@@ -84,7 +83,7 @@ impl ConnectMechanism for TcpMechanism<'_> {
async fn connect_once( async fn connect_once(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
node_info: &control_plane::CachedNodeInfo, node_info: &console::CachedNodeInfo,
timeout: time::Duration, timeout: time::Duration,
) -> Result<PostgresConnection, Self::Error> { ) -> Result<PostgresConnection, Self::Error> {
let host = node_info.config.get_host()?; let host = node_info.config.get_host()?;
@@ -117,6 +116,7 @@ where
node_info.set_keys(user_info.get_keys()); node_info.set_keys(user_info.get_keys());
node_info.allow_self_signed_compute = allow_self_signed_compute; node_info.allow_self_signed_compute = allow_self_signed_compute;
// let mut node_info = credentials.get_node_info(ctx, user_info).await?;
mechanism.update_connect_config(&mut node_info.config); mechanism.update_connect_config(&mut node_info.config);
let retry_type = RetryType::ConnectToCompute; let retry_type = RetryType::ConnectToCompute;
@@ -139,10 +139,10 @@ where
Err(e) => e, Err(e) => e,
}; };
debug!(error = ?err, COULD_NOT_CONNECT); error!(error = ?err, "could not connect to compute node");
let node_info = if !node_info.cached() || !err.should_retry_wake_compute() { let node_info = if !node_info.cached() || !err.should_retry_wake_compute() {
// If we just recieved this from cplane and didn't get it from cache, we shouldn't retry. // If we just recieved this from cplane and dodn't get it from cache, we shouldn't retry.
// Do not need to retrieve a new node_info, just return the old one. // Do not need to retrieve a new node_info, just return the old one.
if should_retry(&err, num_retries, connect_to_compute_retry_config) { if should_retry(&err, num_retries, connect_to_compute_retry_config) {
Metrics::get().proxy.retries_metric.observe( Metrics::get().proxy.retries_metric.observe(
@@ -191,7 +191,7 @@ where
} }
Err(e) => { Err(e) => {
if !should_retry(&e, num_retries, connect_to_compute_retry_config) { if !should_retry(&e, num_retries, connect_to_compute_retry_config) {
// Don't log an error here, caller will print the error error!(error = ?e, num_retries, retriable = false, "couldn't connect to compute node");
Metrics::get().proxy.retries_metric.observe( Metrics::get().proxy.retries_metric.observe(
RetriesMetricGroup { RetriesMetricGroup {
outcome: ConnectOutcome::Failed, outcome: ConnectOutcome::Failed,
@@ -202,7 +202,7 @@ where
return Err(e.into()); return Err(e.into());
} }
warn!(error = ?e, num_retries, retriable = true, COULD_NOT_CONNECT); warn!(error = ?e, num_retries, retriable = true, "couldn't connect to compute node");
} }
}; };

View File

@@ -1,7 +1,7 @@
use crate::{ use crate::{
cancellation, cancellation,
compute::PostgresConnection, compute::PostgresConnection,
control_plane::messages::MetricsAuxInfo, console::messages::MetricsAuxInfo,
metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard}, metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard},
stream::Stream, stream::Stream,
usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS}, usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS},

View File

@@ -11,11 +11,9 @@ use crate::auth::backend::{
ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, MaybeOwned, TestBackend, ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, MaybeOwned, TestBackend,
}; };
use crate::config::{CertResolver, RetryConfig}; use crate::config::{CertResolver, RetryConfig};
use crate::control_plane::messages::{ControlPlaneError, Details, MetricsAuxInfo, Status}; use crate::console::messages::{ConsoleError, Details, MetricsAuxInfo, Status};
use crate::control_plane::provider::{ use crate::console::provider::{CachedAllowedIps, CachedRoleSecret, ConsoleBackend, NodeInfoCache};
CachedAllowedIps, CachedRoleSecret, ControlPlaneBackend, NodeInfoCache, use crate::console::{self, CachedNodeInfo, NodeInfo};
};
use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
use crate::error::ErrorKind; use crate::error::ErrorKind;
use crate::{sasl, scram, BranchId, EndpointId, ProjectId}; use crate::{sasl, scram, BranchId, EndpointId, ProjectId};
use anyhow::{bail, Context}; use anyhow::{bail, Context};
@@ -461,7 +459,7 @@ impl ConnectMechanism for TestConnectMechanism {
async fn connect_once( async fn connect_once(
&self, &self,
_ctx: &RequestMonitoring, _ctx: &RequestMonitoring,
_node_info: &control_plane::CachedNodeInfo, _node_info: &console::CachedNodeInfo,
_timeout: std::time::Duration, _timeout: std::time::Duration,
) -> Result<Self::Connection, Self::ConnectError> { ) -> Result<Self::Connection, Self::ConnectError> {
let mut counter = self.counter.lock().unwrap(); let mut counter = self.counter.lock().unwrap();
@@ -485,23 +483,23 @@ impl ConnectMechanism for TestConnectMechanism {
} }
impl TestBackend for TestConnectMechanism { impl TestBackend for TestConnectMechanism {
fn wake_compute(&self) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> { fn wake_compute(&self) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
let mut counter = self.counter.lock().unwrap(); let mut counter = self.counter.lock().unwrap();
let action = self.sequence[*counter]; let action = self.sequence[*counter];
*counter += 1; *counter += 1;
match action { match action {
ConnectAction::Wake => Ok(helper_create_cached_node_info(self.cache)), ConnectAction::Wake => Ok(helper_create_cached_node_info(self.cache)),
ConnectAction::WakeFail => { ConnectAction::WakeFail => {
let err = control_plane::errors::ApiError::ControlPlane(ControlPlaneError { let err = console::errors::ApiError::Console(ConsoleError {
http_status_code: StatusCode::BAD_REQUEST, http_status_code: StatusCode::BAD_REQUEST,
error: "TEST".into(), error: "TEST".into(),
status: None, status: None,
}); });
assert!(!err.could_retry()); assert!(!err.could_retry());
Err(control_plane::errors::WakeComputeError::ApiError(err)) Err(console::errors::WakeComputeError::ApiError(err))
} }
ConnectAction::WakeRetry => { ConnectAction::WakeRetry => {
let err = control_plane::errors::ApiError::ControlPlane(ControlPlaneError { let err = console::errors::ApiError::Console(ConsoleError {
http_status_code: StatusCode::BAD_REQUEST, http_status_code: StatusCode::BAD_REQUEST,
error: "TEST".into(), error: "TEST".into(),
status: Some(Status { status: Some(Status {
@@ -509,15 +507,13 @@ impl TestBackend for TestConnectMechanism {
message: "error".into(), message: "error".into(),
details: Details { details: Details {
error_info: None, error_info: None,
retry_info: Some(control_plane::messages::RetryInfo { retry_info: Some(console::messages::RetryInfo { retry_delay_ms: 1 }),
retry_delay_ms: 1,
}),
user_facing_message: None, user_facing_message: None,
}, },
}), }),
}); });
assert!(err.could_retry()); assert!(err.could_retry());
Err(control_plane::errors::WakeComputeError::ApiError(err)) Err(console::errors::WakeComputeError::ApiError(err))
} }
x => panic!("expecting action {x:?}, wake_compute is called instead"), x => panic!("expecting action {x:?}, wake_compute is called instead"),
} }
@@ -525,7 +521,7 @@ impl TestBackend for TestConnectMechanism {
fn get_allowed_ips_and_secret( fn get_allowed_ips_and_secret(
&self, &self,
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), control_plane::errors::GetAuthInfoError> ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>
{ {
unimplemented!("not used in tests") unimplemented!("not used in tests")
} }
@@ -542,7 +538,7 @@ fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeIn
endpoint_id: (&EndpointId::from("endpoint")).into(), endpoint_id: (&EndpointId::from("endpoint")).into(),
project_id: (&ProjectId::from("project")).into(), project_id: (&ProjectId::from("project")).into(),
branch_id: (&BranchId::from("branch")).into(), branch_id: (&BranchId::from("branch")).into(),
cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm, cold_start_info: crate::console::messages::ColdStartInfo::Warm,
}, },
allow_self_signed_compute: false, allow_self_signed_compute: false,
}; };
@@ -553,8 +549,8 @@ fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeIn
fn helper_create_connect_info( fn helper_create_connect_info(
mechanism: &TestConnectMechanism, mechanism: &TestConnectMechanism,
) -> auth::Backend<'static, ComputeCredentials, &()> { ) -> auth::Backend<'static, ComputeCredentials, &()> {
let user_info = auth::Backend::ControlPlane( let user_info = auth::Backend::Console(
MaybeOwned::Owned(ControlPlaneBackend::Test(Box::new(mechanism.clone()))), MaybeOwned::Owned(ConsoleBackend::Test(Box::new(mechanism.clone()))),
ComputeCredentials { ComputeCredentials {
info: ComputeUserInfo { info: ComputeUserInfo {
endpoint: "endpoint".into(), endpoint: "endpoint".into(),

View File

@@ -1,13 +1,13 @@
use crate::config::RetryConfig; use crate::config::RetryConfig;
use crate::console::messages::{ConsoleError, Reason};
use crate::console::{errors::WakeComputeError, provider::CachedNodeInfo};
use crate::context::RequestMonitoring; use crate::context::RequestMonitoring;
use crate::control_plane::messages::{ControlPlaneError, Reason};
use crate::control_plane::{errors::WakeComputeError, provider::CachedNodeInfo};
use crate::metrics::{ use crate::metrics::{
ConnectOutcome, ConnectionFailuresBreakdownGroup, Metrics, RetriesMetricGroup, RetryType, ConnectOutcome, ConnectionFailuresBreakdownGroup, Metrics, RetriesMetricGroup, RetryType,
WakeupFailureKind, WakeupFailureKind,
}; };
use crate::proxy::retry::{retry_after, should_retry}; use crate::proxy::retry::{retry_after, should_retry};
use hyper::StatusCode; use hyper1::StatusCode;
use tracing::{error, info, warn}; use tracing::{error, info, warn};
use super::connect_compute::ComputeConnectBackend; use super::connect_compute::ComputeConnectBackend;
@@ -59,11 +59,11 @@ pub(crate) async fn wake_compute<B: ComputeConnectBackend>(
} }
fn report_error(e: &WakeComputeError, retry: bool) { fn report_error(e: &WakeComputeError, retry: bool) {
use crate::control_plane::errors::ApiError; use crate::console::errors::ApiError;
let kind = match e { let kind = match e {
WakeComputeError::BadComputeAddress(_) => WakeupFailureKind::BadComputeAddress, WakeComputeError::BadComputeAddress(_) => WakeupFailureKind::BadComputeAddress,
WakeComputeError::ApiError(ApiError::Transport(_)) => WakeupFailureKind::ApiTransportError, WakeComputeError::ApiError(ApiError::Transport(_)) => WakeupFailureKind::ApiTransportError,
WakeComputeError::ApiError(ApiError::ControlPlane(e)) => match e.get_reason() { WakeComputeError::ApiError(ApiError::Console(e)) => match e.get_reason() {
Reason::RoleProtected => WakeupFailureKind::ApiConsoleBadRequest, Reason::RoleProtected => WakeupFailureKind::ApiConsoleBadRequest,
Reason::ResourceNotFound => WakeupFailureKind::ApiConsoleBadRequest, Reason::ResourceNotFound => WakeupFailureKind::ApiConsoleBadRequest,
Reason::ProjectNotFound => WakeupFailureKind::ApiConsoleBadRequest, Reason::ProjectNotFound => WakeupFailureKind::ApiConsoleBadRequest,
@@ -80,7 +80,7 @@ fn report_error(e: &WakeComputeError, retry: bool) {
Reason::LockAlreadyTaken => WakeupFailureKind::ApiConsoleLocked, Reason::LockAlreadyTaken => WakeupFailureKind::ApiConsoleLocked,
Reason::RunningOperations => WakeupFailureKind::ApiConsoleLocked, Reason::RunningOperations => WakeupFailureKind::ApiConsoleLocked,
Reason::Unknown => match e { Reason::Unknown => match e {
ControlPlaneError { ConsoleError {
http_status_code: StatusCode::LOCKED, http_status_code: StatusCode::LOCKED,
ref error, ref error,
.. ..
@@ -89,27 +89,27 @@ fn report_error(e: &WakeComputeError, retry: bool) {
{ {
WakeupFailureKind::QuotaExceeded WakeupFailureKind::QuotaExceeded
} }
ControlPlaneError { ConsoleError {
http_status_code: StatusCode::UNPROCESSABLE_ENTITY, http_status_code: StatusCode::UNPROCESSABLE_ENTITY,
ref error, ref error,
.. ..
} if error.contains("compute time quota of non-primary branches is exceeded") => { } if error.contains("compute time quota of non-primary branches is exceeded") => {
WakeupFailureKind::QuotaExceeded WakeupFailureKind::QuotaExceeded
} }
ControlPlaneError { ConsoleError {
http_status_code: StatusCode::LOCKED, http_status_code: StatusCode::LOCKED,
.. ..
} => WakeupFailureKind::ApiConsoleLocked, } => WakeupFailureKind::ApiConsoleLocked,
ControlPlaneError { ConsoleError {
http_status_code: StatusCode::BAD_REQUEST, http_status_code: StatusCode::BAD_REQUEST,
.. ..
} => WakeupFailureKind::ApiConsoleBadRequest, } => WakeupFailureKind::ApiConsoleBadRequest,
ControlPlaneError { ConsoleError {
http_status_code, .. http_status_code, ..
} if http_status_code.is_server_error() => { } if http_status_code.is_server_error() => {
WakeupFailureKind::ApiConsoleOtherServerError WakeupFailureKind::ApiConsoleOtherServerError
} }
ControlPlaneError { .. } => WakeupFailureKind::ApiConsoleOtherError, ConsoleError { .. } => WakeupFailureKind::ApiConsoleOtherError,
}, },
}, },
WakeComputeError::TooManyConnections => WakeupFailureKind::ApiConsoleLocked, WakeComputeError::TooManyConnections => WakeupFailureKind::ApiConsoleLocked,

View File

@@ -22,7 +22,7 @@ use futures::TryFutureExt;
use http::{Method, Response, StatusCode}; use http::{Method, Response, StatusCode};
use http_body_util::combinators::BoxBody; use http_body_util::combinators::BoxBody;
use http_body_util::{BodyExt, Empty}; use http_body_util::{BodyExt, Empty};
use hyper::body::Incoming; use hyper1::body::Incoming;
use hyper_util::rt::TokioExecutor; use hyper_util::rt::TokioExecutor;
use hyper_util::server::conn::auto::Builder; use hyper_util::server::conn::auto::Builder;
use rand::rngs::StdRng; use rand::rngs::StdRng;
@@ -302,7 +302,7 @@ async fn connection_handler(
let server = Builder::new(TokioExecutor::new()); let server = Builder::new(TokioExecutor::new());
let conn = server.serve_connection_with_upgrades( let conn = server.serve_connection_with_upgrades(
hyper_util::rt::TokioIo::new(conn), hyper_util::rt::TokioIo::new(conn),
hyper::service::service_fn(move |req: hyper::Request<Incoming>| { hyper1::service::service_fn(move |req: hyper1::Request<Incoming>| {
// First HTTP request shares the same session ID // First HTTP request shares the same session ID
let session_id = session_id.take().unwrap_or_else(uuid::Uuid::new_v4); let session_id = session_id.take().unwrap_or_else(uuid::Uuid::new_v4);
@@ -355,7 +355,7 @@ async fn connection_handler(
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
async fn request_handler( async fn request_handler(
mut request: hyper::Request<Incoming>, mut request: hyper1::Request<Incoming>,
config: &'static ProxyConfig, config: &'static ProxyConfig,
backend: Arc<PoolingBackend>, backend: Arc<PoolingBackend>,
ws_connections: TaskTracker, ws_connections: TaskTracker,
@@ -365,7 +365,7 @@ async fn request_handler(
// used to cancel in-flight HTTP requests. not used to cancel websockets // used to cancel in-flight HTTP requests. not used to cancel websockets
http_cancellation_token: CancellationToken, http_cancellation_token: CancellationToken,
endpoint_rate_limiter: Arc<EndpointRateLimiter>, endpoint_rate_limiter: Arc<EndpointRateLimiter>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> { ) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
let host = request let host = request
.headers() .headers()
.get("host") .get("host")

View File

@@ -12,13 +12,13 @@ use crate::{
}, },
compute, compute,
config::{AuthenticationConfig, ProxyConfig}, config::{AuthenticationConfig, ProxyConfig},
context::RequestMonitoring, console::{
control_plane::{
errors::{GetAuthInfoError, WakeComputeError}, errors::{GetAuthInfoError, WakeComputeError},
locks::ApiLocks, locks::ApiLocks,
provider::ApiLockError, provider::ApiLockError,
CachedNodeInfo, CachedNodeInfo,
}, },
context::RequestMonitoring,
error::{ErrorKind, ReportableError, UserFacingError}, error::{ErrorKind, ReportableError, UserFacingError},
intern::EndpointIdInt, intern::EndpointIdInt,
proxy::{ proxy::{
@@ -114,7 +114,7 @@ impl PoolingBackend {
jwt: String, jwt: String,
) -> Result<(), AuthError> { ) -> Result<(), AuthError> {
match &self.config.auth_backend { match &self.config.auth_backend {
crate::auth::Backend::ControlPlane(console, ()) => { crate::auth::Backend::Console(console, ()) => {
config config
.jwks_cache .jwks_cache
.check_jwt( .check_jwt(
@@ -129,7 +129,7 @@ impl PoolingBackend {
Ok(()) Ok(())
} }
crate::auth::Backend::ConsoleRedirect(_, ()) => Err(AuthError::auth_failed( crate::auth::Backend::Web(_, ()) => Err(AuthError::auth_failed(
"JWT login over web auth proxy is not supported", "JWT login over web auth proxy is not supported",
)), )),
crate::auth::Backend::Local(_) => { crate::auth::Backend::Local(_) => {
@@ -257,7 +257,7 @@ pub(crate) enum LocalProxyConnError {
#[error("error with connection to local-proxy")] #[error("error with connection to local-proxy")]
Io(#[source] std::io::Error), Io(#[source] std::io::Error),
#[error("could not establish h2 connection")] #[error("could not establish h2 connection")]
H2(#[from] hyper::Error), H2(#[from] hyper1::Error),
} }
impl ReportableError for HttpConnError { impl ReportableError for HttpConnError {
@@ -481,7 +481,7 @@ async fn connect_http2(
}; };
}; };
let (client, connection) = hyper::client::conn::http2::Builder::new(TokioExecutor::new()) let (client, connection) = hyper1::client::conn::http2::Builder::new(TokioExecutor::new())
.timer(TokioTimer::new()) .timer(TokioTimer::new())
.keep_alive_interval(Duration::from_secs(20)) .keep_alive_interval(Duration::from_secs(20))
.keep_alive_while_idle(true) .keep_alive_while_idle(true)

View File

@@ -17,7 +17,7 @@ use tokio_postgres::tls::NoTlsStream;
use tokio_postgres::{AsyncMessage, ReadyForQueryStatus, Socket}; use tokio_postgres::{AsyncMessage, ReadyForQueryStatus, Socket};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::console::messages::{ColdStartInfo, MetricsAuxInfo};
use crate::metrics::{HttpEndpointPoolsGuard, Metrics}; use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS}; use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
use crate::{ use crate::{
@@ -760,7 +760,7 @@ mod tests {
endpoint_id: (&EndpointId::from("endpoint")).into(), endpoint_id: (&EndpointId::from("endpoint")).into(),
project_id: (&ProjectId::from("project")).into(), project_id: (&ProjectId::from("project")).into(),
branch_id: (&BranchId::from("branch")).into(), branch_id: (&BranchId::from("branch")).into(),
cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm, cold_start_info: crate::console::messages::ColdStartInfo::Warm,
}, },
conn_id: uuid::Uuid::new_v4(), conn_id: uuid::Uuid::new_v4(),
} }

View File

@@ -1,5 +1,5 @@
use dashmap::DashMap; use dashmap::DashMap;
use hyper::client::conn::http2; use hyper1::client::conn::http2;
use hyper_util::rt::{TokioExecutor, TokioIo}; use hyper_util::rt::{TokioExecutor, TokioIo};
use parking_lot::RwLock; use parking_lot::RwLock;
use rand::Rng; use rand::Rng;
@@ -8,7 +8,7 @@ use std::sync::atomic::{self, AtomicUsize};
use std::{sync::Arc, sync::Weak}; use std::{sync::Arc, sync::Weak};
use tokio::net::TcpStream; use tokio::net::TcpStream;
use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::console::messages::{ColdStartInfo, MetricsAuxInfo};
use crate::metrics::{HttpEndpointPoolsGuard, Metrics}; use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS}; use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
use crate::{context::RequestMonitoring, EndpointCacheKey}; use crate::{context::RequestMonitoring, EndpointCacheKey};
@@ -18,9 +18,9 @@ use tracing::{info, info_span, Instrument};
use super::conn_pool::ConnInfo; use super::conn_pool::ConnInfo;
pub(crate) type Send = http2::SendRequest<hyper::body::Incoming>; pub(crate) type Send = http2::SendRequest<hyper1::body::Incoming>;
pub(crate) type Connect = pub(crate) type Connect =
http2::Connection<TokioIo<TcpStream>, hyper::body::Incoming, TokioExecutor>; http2::Connection<TokioIo<TcpStream>, hyper1::body::Incoming, TokioExecutor>;
#[derive(Clone)] #[derive(Clone)]
struct ConnPoolEntry { struct ConnPoolEntry {

View File

@@ -11,7 +11,7 @@ use serde::Serialize;
use utils::http::error::ApiError; use utils::http::error::ApiError;
/// Like [`ApiError::into_response`] /// Like [`ApiError::into_response`]
pub(crate) fn api_error_into_response(this: ApiError) -> Response<BoxBody<Bytes, hyper::Error>> { pub(crate) fn api_error_into_response(this: ApiError) -> Response<BoxBody<Bytes, hyper1::Error>> {
match this { match this {
ApiError::BadRequest(err) => HttpErrorBody::response_from_msg_and_status( ApiError::BadRequest(err) => HttpErrorBody::response_from_msg_and_status(
format!("{err:#?}"), // use debug printing so that we give the cause format!("{err:#?}"), // use debug printing so that we give the cause
@@ -67,12 +67,12 @@ impl HttpErrorBody {
fn response_from_msg_and_status( fn response_from_msg_and_status(
msg: String, msg: String,
status: StatusCode, status: StatusCode,
) -> Response<BoxBody<Bytes, hyper::Error>> { ) -> Response<BoxBody<Bytes, hyper1::Error>> {
HttpErrorBody { msg }.to_response(status) HttpErrorBody { msg }.to_response(status)
} }
/// Same as [`utils::http::error::HttpErrorBody::to_response`] /// Same as [`utils::http::error::HttpErrorBody::to_response`]
fn to_response(&self, status: StatusCode) -> Response<BoxBody<Bytes, hyper::Error>> { fn to_response(&self, status: StatusCode) -> Response<BoxBody<Bytes, hyper1::Error>> {
Response::builder() Response::builder()
.status(status) .status(status)
.header(http::header::CONTENT_TYPE, "application/json") .header(http::header::CONTENT_TYPE, "application/json")
@@ -90,7 +90,7 @@ impl HttpErrorBody {
pub(crate) fn json_response<T: Serialize>( pub(crate) fn json_response<T: Serialize>(
status: StatusCode, status: StatusCode,
data: T, data: T,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> { ) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
let json = serde_json::to_string(&data) let json = serde_json::to_string(&data)
.context("Failed to serialize JSON response") .context("Failed to serialize JSON response")
.map_err(ApiError::InternalServerError)?; .map_err(ApiError::InternalServerError)?;

View File

@@ -12,14 +12,14 @@ use http::Method;
use http_body_util::combinators::BoxBody; use http_body_util::combinators::BoxBody;
use http_body_util::BodyExt; use http_body_util::BodyExt;
use http_body_util::Full; use http_body_util::Full;
use hyper::body::Body; use hyper1::body::Body;
use hyper::body::Incoming; use hyper1::body::Incoming;
use hyper::header; use hyper1::header;
use hyper::http::HeaderName; use hyper1::http::HeaderName;
use hyper::http::HeaderValue; use hyper1::http::HeaderValue;
use hyper::Response; use hyper1::Response;
use hyper::StatusCode; use hyper1::StatusCode;
use hyper::{HeaderMap, Request}; use hyper1::{HeaderMap, Request};
use pq_proto::StartupMessageParamsBuilder; use pq_proto::StartupMessageParamsBuilder;
use serde::Serialize; use serde::Serialize;
use serde_json::Value; use serde_json::Value;
@@ -272,7 +272,7 @@ pub(crate) async fn handle(
request: Request<Incoming>, request: Request<Incoming>,
backend: Arc<PoolingBackend>, backend: Arc<PoolingBackend>,
cancel: CancellationToken, cancel: CancellationToken,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> { ) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
let result = handle_inner(cancel, config, &ctx, request, backend).await; let result = handle_inner(cancel, config, &ctx, request, backend).await;
let mut response = match result { let mut response = match result {
@@ -435,7 +435,7 @@ impl UserFacingError for SqlOverHttpError {
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
pub(crate) enum ReadPayloadError { pub(crate) enum ReadPayloadError {
#[error("could not read the HTTP request body: {0}")] #[error("could not read the HTTP request body: {0}")]
Read(#[from] hyper::Error), Read(#[from] hyper1::Error),
#[error("could not parse the HTTP request body: {0}")] #[error("could not parse the HTTP request body: {0}")]
Parse(#[from] serde_json::Error), Parse(#[from] serde_json::Error),
} }
@@ -476,7 +476,7 @@ struct HttpHeaders {
} }
impl HttpHeaders { impl HttpHeaders {
fn try_parse(headers: &hyper::http::HeaderMap) -> Result<Self, SqlOverHttpError> { fn try_parse(headers: &hyper1::http::HeaderMap) -> Result<Self, SqlOverHttpError> {
// Determine the output options. Default behaviour is 'false'. Anything that is not // Determine the output options. Default behaviour is 'false'. Anything that is not
// strictly 'true' assumed to be false. // strictly 'true' assumed to be false.
let raw_output = headers.get(&RAW_TEXT_OUTPUT) == Some(&HEADER_VALUE_TRUE); let raw_output = headers.get(&RAW_TEXT_OUTPUT) == Some(&HEADER_VALUE_TRUE);
@@ -529,7 +529,7 @@ async fn handle_inner(
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
request: Request<Incoming>, request: Request<Incoming>,
backend: Arc<PoolingBackend>, backend: Arc<PoolingBackend>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> { ) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
let _requeset_gauge = Metrics::get() let _requeset_gauge = Metrics::get()
.proxy .proxy
.connection_requests .connection_requests
@@ -577,7 +577,7 @@ async fn handle_db_inner(
conn_info: ConnInfo, conn_info: ConnInfo,
auth: AuthData, auth: AuthData,
backend: Arc<PoolingBackend>, backend: Arc<PoolingBackend>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> { ) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
// //
// Determine the destination and connection params // Determine the destination and connection params
// //
@@ -744,7 +744,7 @@ async fn handle_auth_broker_inner(
conn_info: ConnInfo, conn_info: ConnInfo,
jwt: String, jwt: String,
backend: Arc<PoolingBackend>, backend: Arc<PoolingBackend>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> { ) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
backend backend
.authenticate_with_jwt( .authenticate_with_jwt(
ctx, ctx,

View File

@@ -12,7 +12,7 @@ use anyhow::Context as _;
use bytes::{Buf, BufMut, Bytes, BytesMut}; use bytes::{Buf, BufMut, Bytes, BytesMut};
use framed_websockets::{Frame, OpCode, WebSocketServer}; use framed_websockets::{Frame, OpCode, WebSocketServer};
use futures::{Sink, Stream}; use futures::{Sink, Stream};
use hyper::upgrade::OnUpgrade; use hyper1::upgrade::OnUpgrade;
use hyper_util::rt::TokioIo; use hyper_util::rt::TokioIo;
use pin_project_lite::pin_project; use pin_project_lite::pin_project;

View File

@@ -485,51 +485,49 @@ async fn upload_events_chunk(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use std::{
net::TcpListener,
sync::{Arc, Mutex},
};
use crate::{http, BranchId, EndpointId};
use anyhow::Error; use anyhow::Error;
use chrono::Utc; use chrono::Utc;
use consumption_metrics::{Event, EventChunk}; use consumption_metrics::{Event, EventChunk};
use http_body_util::BodyExt; use hyper::{
use hyper::{body::Incoming, server::conn::http1, service::service_fn, Request, Response}; service::{make_service_fn, service_fn},
use hyper_util::rt::TokioIo; Body, Response,
use std::sync::{Arc, Mutex}; };
use tokio::net::TcpListener;
use url::Url; use url::Url;
use super::*;
use crate::{http, BranchId, EndpointId};
#[tokio::test] #[tokio::test]
async fn metrics() { async fn metrics() {
type Report = EventChunk<'static, Event<Ids, String>>; let listener = TcpListener::bind("0.0.0.0:0").unwrap();
let reports: Arc<Mutex<Vec<Report>>> = Arc::default();
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let reports = Arc::new(Mutex::new(vec![]));
let addr = listener.local_addr().unwrap(); let reports2 = reports.clone();
tokio::spawn({
let server = hyper::server::Server::from_tcp(listener)
.unwrap()
.serve(make_service_fn(move |_| {
let reports = reports.clone(); let reports = reports.clone();
async move { async move {
loop { Ok::<_, Error>(service_fn(move |req| {
if let Ok((stream, _addr)) = listener.accept().await {
let reports = reports.clone();
http1::Builder::new()
.serve_connection(
TokioIo::new(stream),
service_fn(move |req: Request<Incoming>| {
let reports = reports.clone(); let reports = reports.clone();
async move { async move {
let bytes = req.into_body().collect().await?.to_bytes(); let bytes = hyper::body::to_bytes(req.into_body()).await?;
let events = serde_json::from_slice(&bytes)?; let events: EventChunk<'static, Event<Ids, String>> =
serde_json::from_slice(&bytes)?;
reports.lock().unwrap().push(events); reports.lock().unwrap().push(events);
Ok::<_, Error>(Response::new(String::new())) Ok::<_, Error>(Response::new(Body::from(vec![])))
} }
}), }))
)
.await
.unwrap();
} }
} }));
} let addr = server.local_addr();
}); tokio::spawn(server);
let metrics = Metrics::default(); let metrics = Metrics::default();
let client = http::new_client(); let client = http::new_client();
@@ -538,7 +536,7 @@ mod tests {
// no counters have been registered // no counters have been registered
collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await; collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
let r = std::mem::take(&mut *reports.lock().unwrap()); let r = std::mem::take(&mut *reports2.lock().unwrap());
assert!(r.is_empty()); assert!(r.is_empty());
// register a new counter // register a new counter
@@ -550,7 +548,7 @@ mod tests {
// the counter should be observed despite 0 egress // the counter should be observed despite 0 egress
collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await; collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
let r = std::mem::take(&mut *reports.lock().unwrap()); let r = std::mem::take(&mut *reports2.lock().unwrap());
assert_eq!(r.len(), 1); assert_eq!(r.len(), 1);
assert_eq!(r[0].events.len(), 1); assert_eq!(r[0].events.len(), 1);
assert_eq!(r[0].events[0].value, 0); assert_eq!(r[0].events[0].value, 0);
@@ -560,7 +558,7 @@ mod tests {
// egress should be observered // egress should be observered
collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await; collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
let r = std::mem::take(&mut *reports.lock().unwrap()); let r = std::mem::take(&mut *reports2.lock().unwrap());
assert_eq!(r.len(), 1); assert_eq!(r.len(), 1);
assert_eq!(r[0].events.len(), 1); assert_eq!(r[0].events.len(), 1);
assert_eq!(r[0].events[0].value, 1); assert_eq!(r[0].events[0].value, 1);
@@ -570,7 +568,7 @@ mod tests {
// we do not observe the counter // we do not observe the counter
collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await; collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
let r = std::mem::take(&mut *reports.lock().unwrap()); let r = std::mem::take(&mut *reports2.lock().unwrap());
assert!(r.is_empty()); assert!(r.is_empty());
// counter is unregistered // counter is unregistered

View File

@@ -97,8 +97,5 @@ select = [
"I", # isort "I", # isort
"W", # pycodestyle "W", # pycodestyle
"B", # bugbear "B", # bugbear
"UP", # pyupgrade "UP032", # f-string
] ]
[tool.ruff.lint.pyupgrade]
keep-runtime-typing = true # Remove this stanza when we require Python 3.10

View File

@@ -23,7 +23,6 @@ crc32c.workspace = true
fail.workspace = true fail.workspace = true
hex.workspace = true hex.workspace = true
humantime.workspace = true humantime.workspace = true
http.workspace = true
hyper0.workspace = true hyper0.workspace = true
futures.workspace = true futures.workspace = true
once_cell.workspace = true once_cell.workspace = true

View File

@@ -12,8 +12,8 @@ use metrics::{
core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts}, core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts},
proto::MetricFamily, proto::MetricFamily,
register_histogram_vec, register_int_counter, register_int_counter_pair, register_histogram_vec, register_int_counter, register_int_counter_pair,
register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge, Gauge, register_int_counter_pair_vec, register_int_counter_vec, Gauge, HistogramVec, IntCounter,
HistogramVec, IntCounter, IntCounterPair, IntCounterPairVec, IntCounterVec, IntGaugeVec, IntCounterPair, IntCounterPairVec, IntCounterVec, IntGaugeVec,
}; };
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
@@ -231,14 +231,6 @@ pub(crate) static EVICTION_EVENTS_COMPLETED: Lazy<IntCounterVec> = Lazy::new(||
.expect("Failed to register metric") .expect("Failed to register metric")
}); });
pub static NUM_EVICTED_TIMELINES: Lazy<IntGauge> = Lazy::new(|| {
register_int_gauge!(
"safekeeper_evicted_timelines",
"Number of currently evicted timelines"
)
.expect("Failed to register metric")
});
pub const LABEL_UNKNOWN: &str = "unknown"; pub const LABEL_UNKNOWN: &str = "unknown";
/// Labels for traffic metrics. /// Labels for traffic metrics.

View File

@@ -2,29 +2,21 @@ use utils::lsn::Lsn;
use crate::timeline_manager::StateSnapshot; use crate::timeline_manager::StateSnapshot;
/// Get oldest LSN we still need to keep. /// Get oldest LSN we still need to keep. We hold WAL till it is consumed
/// /// by all of 1) pageserver (remote_consistent_lsn) 2) peers 3) s3
/// We hold WAL till it is consumed by /// offloading.
/// 1) pageserver (remote_consistent_lsn) /// While it is safe to use inmem values for determining horizon,
/// 2) s3 offloading. /// we use persistent to make possible normal states less surprising.
/// 3) Additionally we must store WAL since last local commit_lsn because /// All segments covering LSNs before horizon_lsn can be removed.
/// that's where we start looking for last WAL record on start.
///
/// If some peer safekeeper misses data it will fetch it from the remote
/// storage. While it is safe to use inmem values for determining horizon, we
/// use persistent to make possible normal states less surprising. All segments
/// covering LSNs before horizon_lsn can be removed.
pub(crate) fn calc_horizon_lsn(state: &StateSnapshot, extra_horizon_lsn: Option<Lsn>) -> Lsn { pub(crate) fn calc_horizon_lsn(state: &StateSnapshot, extra_horizon_lsn: Option<Lsn>) -> Lsn {
use std::cmp::min; use std::cmp::min;
let mut horizon_lsn = state.cfile_remote_consistent_lsn; let mut horizon_lsn = min(
state.cfile_remote_consistent_lsn,
state.cfile_peer_horizon_lsn,
);
// we don't want to remove WAL that is not yet offloaded to s3 // we don't want to remove WAL that is not yet offloaded to s3
horizon_lsn = min(horizon_lsn, state.cfile_backup_lsn); horizon_lsn = min(horizon_lsn, state.cfile_backup_lsn);
// Min by local commit_lsn to be able to begin reading WAL from somewhere on
// sk start. Technically we don't allow local commit_lsn to be higher than
// flush_lsn, but let's be double safe by including it as well.
horizon_lsn = min(horizon_lsn, state.cfile_commit_lsn);
horizon_lsn = min(horizon_lsn, state.flush_lsn);
if let Some(extra_horizon_lsn) = extra_horizon_lsn { if let Some(extra_horizon_lsn) = extra_horizon_lsn {
horizon_lsn = min(horizon_lsn, extra_horizon_lsn); horizon_lsn = min(horizon_lsn, extra_horizon_lsn);
} }

View File

@@ -631,19 +631,13 @@ impl Timeline {
return Err(e); return Err(e);
} }
self.bootstrap( self.bootstrap(conf, broker_active_set, partial_backup_rate_limiter);
shared_state,
conf,
broker_active_set,
partial_backup_rate_limiter,
);
Ok(()) Ok(())
} }
/// Bootstrap new or existing timeline starting background tasks. /// Bootstrap new or existing timeline starting background tasks.
pub fn bootstrap( pub fn bootstrap(
self: &Arc<Timeline>, self: &Arc<Timeline>,
_shared_state: &mut WriteGuardSharedState<'_>,
conf: &SafeKeeperConf, conf: &SafeKeeperConf,
broker_active_set: Arc<TimelinesSet>, broker_active_set: Arc<TimelinesSet>,
partial_backup_rate_limiter: RateLimiter, partial_backup_rate_limiter: RateLimiter,

View File

@@ -15,9 +15,7 @@ use tracing::{debug, info, instrument, warn};
use utils::crashsafe::durable_rename; use utils::crashsafe::durable_rename;
use crate::{ use crate::{
metrics::{ metrics::{EvictionEvent, EVICTION_EVENTS_COMPLETED, EVICTION_EVENTS_STARTED},
EvictionEvent, EVICTION_EVENTS_COMPLETED, EVICTION_EVENTS_STARTED, NUM_EVICTED_TIMELINES,
},
rate_limit::rand_duration, rate_limit::rand_duration,
timeline_manager::{Manager, StateSnapshot}, timeline_manager::{Manager, StateSnapshot},
wal_backup, wal_backup,
@@ -95,7 +93,6 @@ impl Manager {
} }
info!("successfully evicted timeline"); info!("successfully evicted timeline");
NUM_EVICTED_TIMELINES.inc();
} }
/// Attempt to restore evicted timeline from remote storage; it must be /// Attempt to restore evicted timeline from remote storage; it must be
@@ -131,7 +128,6 @@ impl Manager {
tokio::time::Instant::now() + rand_duration(&self.conf.eviction_min_resident); tokio::time::Instant::now() + rand_duration(&self.conf.eviction_min_resident);
info!("successfully restored evicted timeline"); info!("successfully restored evicted timeline");
NUM_EVICTED_TIMELINES.dec();
} }
} }

View File

@@ -25,10 +25,7 @@ use utils::lsn::Lsn;
use crate::{ use crate::{
control_file::{FileStorage, Storage}, control_file::{FileStorage, Storage},
metrics::{ metrics::{MANAGER_ACTIVE_CHANGES, MANAGER_ITERATIONS_TOTAL, MISC_OPERATION_SECONDS},
MANAGER_ACTIVE_CHANGES, MANAGER_ITERATIONS_TOTAL, MISC_OPERATION_SECONDS,
NUM_EVICTED_TIMELINES,
},
rate_limit::{rand_duration, RateLimiter}, rate_limit::{rand_duration, RateLimiter},
recovery::recovery_main, recovery::recovery_main,
remove_wal::calc_horizon_lsn, remove_wal::calc_horizon_lsn,
@@ -50,7 +47,7 @@ pub(crate) struct StateSnapshot {
pub(crate) remote_consistent_lsn: Lsn, pub(crate) remote_consistent_lsn: Lsn,
// persistent control file values // persistent control file values
pub(crate) cfile_commit_lsn: Lsn, pub(crate) cfile_peer_horizon_lsn: Lsn,
pub(crate) cfile_remote_consistent_lsn: Lsn, pub(crate) cfile_remote_consistent_lsn: Lsn,
pub(crate) cfile_backup_lsn: Lsn, pub(crate) cfile_backup_lsn: Lsn,
@@ -73,7 +70,7 @@ impl StateSnapshot {
commit_lsn: state.inmem.commit_lsn, commit_lsn: state.inmem.commit_lsn,
backup_lsn: state.inmem.backup_lsn, backup_lsn: state.inmem.backup_lsn,
remote_consistent_lsn: state.inmem.remote_consistent_lsn, remote_consistent_lsn: state.inmem.remote_consistent_lsn,
cfile_commit_lsn: state.commit_lsn, cfile_peer_horizon_lsn: state.peer_horizon_lsn,
cfile_remote_consistent_lsn: state.remote_consistent_lsn, cfile_remote_consistent_lsn: state.remote_consistent_lsn,
cfile_backup_lsn: state.backup_lsn, cfile_backup_lsn: state.backup_lsn,
flush_lsn: read_guard.sk.flush_lsn(), flush_lsn: read_guard.sk.flush_lsn(),
@@ -254,11 +251,6 @@ pub async fn main_task(
mgr.recovery_task = Some(tokio::spawn(recovery_main(tli, mgr.conf.clone()))); mgr.recovery_task = Some(tokio::spawn(recovery_main(tli, mgr.conf.clone())));
} }
// If timeline is evicted, reflect that in the metric.
if mgr.is_offloaded {
NUM_EVICTED_TIMELINES.inc();
}
let last_state = 'outer: loop { let last_state = 'outer: loop {
MANAGER_ITERATIONS_TOTAL.inc(); MANAGER_ITERATIONS_TOTAL.inc();
@@ -375,11 +367,6 @@ pub async fn main_task(
mgr.update_wal_removal_end(res); mgr.update_wal_removal_end(res);
} }
// If timeline is deleted while evicted decrement the gauge.
if mgr.tli.is_cancelled() && mgr.is_offloaded {
NUM_EVICTED_TIMELINES.dec();
}
mgr.set_status(Status::Finished); mgr.set_status(Status::Finished);
} }

View File

@@ -165,14 +165,12 @@ impl GlobalTimelines {
match Timeline::load_timeline(&conf, ttid) { match Timeline::load_timeline(&conf, ttid) {
Ok(timeline) => { Ok(timeline) => {
let tli = Arc::new(timeline); let tli = Arc::new(timeline);
let mut shared_state = tli.write_shared_state().await;
TIMELINES_STATE TIMELINES_STATE
.lock() .lock()
.unwrap() .unwrap()
.timelines .timelines
.insert(ttid, tli.clone()); .insert(ttid, tli.clone());
tli.bootstrap( tli.bootstrap(
&mut shared_state,
&conf, &conf,
broker_active_set.clone(), broker_active_set.clone(),
partial_backup_rate_limiter.clone(), partial_backup_rate_limiter.clone(),
@@ -215,7 +213,6 @@ impl GlobalTimelines {
match Timeline::load_timeline(&conf, ttid) { match Timeline::load_timeline(&conf, ttid) {
Ok(timeline) => { Ok(timeline) => {
let tli = Arc::new(timeline); let tli = Arc::new(timeline);
let mut shared_state = tli.write_shared_state().await;
// TODO: prevent concurrent timeline creation/loading // TODO: prevent concurrent timeline creation/loading
{ {
@@ -230,13 +227,8 @@ impl GlobalTimelines {
state.timelines.insert(ttid, tli.clone()); state.timelines.insert(ttid, tli.clone());
} }
tli.bootstrap( tli.bootstrap(&conf, broker_active_set, partial_backup_rate_limiter);
&mut shared_state,
&conf,
broker_active_set,
partial_backup_rate_limiter,
);
drop(shared_state);
Ok(tli) Ok(tli)
} }
// If we can't load a timeline, it's bad. Caller will figure it out. // If we can't load a timeline, it's bad. Caller will figure it out.

View File

@@ -17,9 +17,7 @@ use std::time::Duration;
use postgres_ffi::v14::xlog_utils::XLogSegNoOffsetToRecPtr; use postgres_ffi::v14::xlog_utils::XLogSegNoOffsetToRecPtr;
use postgres_ffi::XLogFileName; use postgres_ffi::XLogFileName;
use postgres_ffi::{XLogSegNo, PG_TLI}; use postgres_ffi::{XLogSegNo, PG_TLI};
use remote_storage::{ use remote_storage::{GenericRemoteStorage, ListingMode, RemotePath, StorageMetadata};
DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath, StorageMetadata,
};
use tokio::fs::File; use tokio::fs::File;
use tokio::select; use tokio::select;
@@ -505,12 +503,8 @@ pub async fn read_object(
let cancel = CancellationToken::new(); let cancel = CancellationToken::new();
let opts = DownloadOpts {
byte_start: std::ops::Bound::Included(offset),
..Default::default()
};
let download = storage let download = storage
.download(file_path, &opts, &cancel) .download_storage_object(Some((offset, None)), file_path, &cancel)
.await .await
.with_context(|| { .with_context(|| {
format!("Failed to open WAL segment download stream for remote path {file_path:?}") format!("Failed to open WAL segment download stream for remote path {file_path:?}")

Some files were not shown because too many files have changed in this diff Show More