mirror of
https://github.com/neondatabase/neon.git
synced 2026-07-05 05:00:37 +00:00
Compare commits
67 Commits
release-72
...
heikki/per
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b4973ecdfa | ||
|
|
27403e6c3b | ||
|
|
2c10e8a966 | ||
|
|
0128948d8d | ||
|
|
7a92454fbe | ||
|
|
7fed72e951 | ||
|
|
19510f3315 | ||
|
|
13dd2f11c6 | ||
|
|
4dbb469539 | ||
|
|
3114a9f992 | ||
|
|
668845fcb7 | ||
|
|
0f63037345 | ||
|
|
dd826c4c81 | ||
|
|
8782018438 | ||
|
|
f4196983d2 | ||
|
|
293f22056f | ||
|
|
cfcb197d85 | ||
|
|
fa3dc91fb5 | ||
|
|
0841fb9b7b | ||
|
|
b5802abab9 | ||
|
|
5d12e5a72d | ||
|
|
3179b9bed2 | ||
|
|
e48a8415ff | ||
|
|
fc041a213b | ||
|
|
3890f5846c | ||
|
|
585672f5fc | ||
|
|
92ae912ebe | ||
|
|
953ad21104 | ||
|
|
0fcf20075a | ||
|
|
c4affc7859 | ||
|
|
368fc2c6c3 | ||
|
|
dfed7029e1 | ||
|
|
6c0e44be4c | ||
|
|
dc780c9e1e | ||
|
|
45dcc66bf4 | ||
|
|
d91c5c3523 | ||
|
|
b79e2062bd | ||
|
|
ffc24d6d8d | ||
|
|
8d93d02c2f | ||
|
|
023821a80c | ||
|
|
944c1adc4c | ||
|
|
ca85f364ba | ||
|
|
9ef0662a42 | ||
|
|
3baef0bca3 | ||
|
|
f312c6571f | ||
|
|
27a42d0f96 | ||
|
|
b04ab468ee | ||
|
|
dcb629532b | ||
|
|
71d004289c | ||
|
|
4d422b937c | ||
|
|
bbe4dfa991 | ||
|
|
dcb24ce170 | ||
|
|
a2a942f93c | ||
|
|
cb10be710d | ||
|
|
15d01b257a | ||
|
|
aaee713e53 | ||
|
|
2e9207fdf3 | ||
|
|
d8ebd33fe6 | ||
|
|
2dc238e5b3 | ||
|
|
243bca1c49 | ||
|
|
fa909c27fc | ||
|
|
1b60571636 | ||
|
|
c18716bb3f | ||
|
|
cd1d2d1996 | ||
|
|
bd09369198 | ||
|
|
5330122049 | ||
|
|
45658ccccb |
2
.github/workflows/benchmarking.yml
vendored
2
.github/workflows/benchmarking.yml
vendored
@@ -249,7 +249,7 @@ jobs:
|
||||
|
||||
# Post both success and failure to the Slack channel
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule }}
|
||||
if: ${{ github.event.schedule && !cancelled() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06T9AMNDQQ" # on-call-compute-staging-stream
|
||||
|
||||
15
.github/workflows/build_and_test.yml
vendored
15
.github/workflows/build_and_test.yml
vendored
@@ -669,7 +669,7 @@ jobs:
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
|
||||
- name: Build neon extensions test image
|
||||
if: matrix.version.pg == 'v16'
|
||||
if: matrix.version.pg >= 'v16'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
@@ -684,8 +684,7 @@ jobs:
|
||||
pull: true
|
||||
file: compute/compute-node.Dockerfile
|
||||
target: neon-pg-ext-test
|
||||
cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
|
||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
tags: |
|
||||
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
|
||||
@@ -708,7 +707,7 @@ jobs:
|
||||
push: true
|
||||
pull: true
|
||||
file: compute/compute-node.Dockerfile
|
||||
cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-tools-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
|
||||
tags: |
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
@@ -744,7 +743,7 @@ jobs:
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
|
||||
- name: Create multi-arch neon-test-extensions image
|
||||
if: matrix.version.pg == 'v16'
|
||||
if: matrix.version.pg >= 'v16'
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
|
||||
-t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
|
||||
@@ -833,6 +832,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
pg_version: [v16, v17]
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
|
||||
|
||||
@@ -871,7 +871,10 @@ jobs:
|
||||
|
||||
- name: Verify docker-compose example and test extensions
|
||||
timeout-minutes: 20
|
||||
run: env TAG=${{needs.tag.outputs.build-tag}} ./docker-compose/docker_compose_test.sh
|
||||
env:
|
||||
TAG: ${{needs.tag.outputs.build-tag}}
|
||||
TEST_VERSION_ONLY: ${{ matrix.pg_version }}
|
||||
run: ./docker-compose/docker_compose_test.sh
|
||||
|
||||
- name: Print logs and clean up
|
||||
if: always()
|
||||
|
||||
249
Cargo.lock
generated
249
Cargo.lock
generated
@@ -185,7 +185,7 @@ checksum = "965c2d33e53cb6b267e148a4cb0760bc01f4904c1cd4bb4002a085bb016d1490"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
@@ -197,7 +197,7 @@ checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -256,7 +256,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -267,7 +267,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -301,7 +301,7 @@ dependencies = [
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"fastrand 2.0.0",
|
||||
"fastrand 2.2.0",
|
||||
"hex",
|
||||
"http 0.2.9",
|
||||
"hyper 0.14.30",
|
||||
@@ -341,7 +341,7 @@ dependencies = [
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"fastrand 2.0.0",
|
||||
"fastrand 2.2.0",
|
||||
"http 0.2.9",
|
||||
"http-body 0.4.5",
|
||||
"once_cell",
|
||||
@@ -417,7 +417,7 @@ dependencies = [
|
||||
"aws-smithy-xml",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"fastrand 2.0.0",
|
||||
"fastrand 2.2.0",
|
||||
"hex",
|
||||
"hmac",
|
||||
"http 0.2.9",
|
||||
@@ -621,7 +621,7 @@ dependencies = [
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"fastrand 2.0.0",
|
||||
"fastrand 2.2.0",
|
||||
"h2 0.3.26",
|
||||
"http 0.2.9",
|
||||
"http-body 0.4.5",
|
||||
@@ -969,7 +969,7 @@ dependencies = [
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1198,7 +1198,7 @@ dependencies = [
|
||||
"heck 0.4.1",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1615,7 +1615,7 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1626,7 +1626,7 @@ checksum = "29a358ff9f12ec09c3e61fef9b5a9902623a695a46a917b07f269bff1445611a"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1749,7 +1749,7 @@ dependencies = [
|
||||
"dsl_auto_type",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1769,7 +1769,7 @@ version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "209c735641a413bc68c4923a9d6ad4bcb3ca306b794edaa7eb0b3228a99ffb25"
|
||||
dependencies = [
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1792,7 +1792,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1815,7 +1815,7 @@ dependencies = [
|
||||
"heck 0.5.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1947,7 +1947,7 @@ dependencies = [
|
||||
"darling",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1980,7 +1980,7 @@ checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2054,9 +2054,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.0.0"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764"
|
||||
checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4"
|
||||
|
||||
[[package]]
|
||||
name = "ff"
|
||||
@@ -2234,7 +2234,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2337,7 +2337,7 @@ checksum = "53010ccb100b96a67bc32c0175f0ed1426b31b655d562898e57325f81c023ac0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2912,6 +2912,23 @@ version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
|
||||
|
||||
[[package]]
|
||||
name = "jemalloc_pprof"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a883828bd6a4b957cd9f618886ff19e5f3ebd34e06ba0e855849e049fef32fb"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"libc",
|
||||
"mappings",
|
||||
"once_cell",
|
||||
"pprof_util",
|
||||
"tempfile",
|
||||
"tikv-jemalloc-ctl",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.32"
|
||||
@@ -3022,9 +3039,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.150"
|
||||
version = "0.2.167"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
|
||||
checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
@@ -3044,9 +3061,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.13"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
|
||||
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
@@ -3079,6 +3096,19 @@ dependencies = [
|
||||
"hashbrown 0.14.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mappings"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce9229c438fbf1c333926e2053c4c091feabbd40a1b590ec62710fea2384af9e"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"pprof_util",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.1.0"
|
||||
@@ -3142,7 +3172,7 @@ dependencies = [
|
||||
"heck 0.5.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3346,6 +3376,7 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af"
|
||||
dependencies = [
|
||||
"num-bigint",
|
||||
"num-complex",
|
||||
"num-integer",
|
||||
"num-iter",
|
||||
@@ -3434,6 +3465,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"num-bigint",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
]
|
||||
@@ -3497,9 +3529,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.18.0"
|
||||
version = "1.20.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
|
||||
|
||||
[[package]]
|
||||
name = "oorandom"
|
||||
@@ -3515,9 +3547,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry"
|
||||
version = "0.24.0"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c365a63eec4f55b7efeceb724f1336f26a9cf3427b70e59e2cd2a5b947fba96"
|
||||
checksum = "570074cc999d1a58184080966e5bd3bf3a9a4af650c3b05047c2621e7405cd17"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
@@ -3529,9 +3561,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-http"
|
||||
version = "0.13.0"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad31e9de44ee3538fb9d64fe3376c1362f406162434609e79aea2a41a0af78ab"
|
||||
checksum = "6351496aeaa49d7c267fb480678d85d1cd30c5edb20b497c48c56f62a8c14b99"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -3542,9 +3574,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-otlp"
|
||||
version = "0.17.0"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b925a602ffb916fb7421276b86756027b37ee708f9dce2dbdcc51739f07e727"
|
||||
checksum = "29e1f9c8b032d4f635c730c0efcf731d5e2530ea13fa8bef7939ddc8420696bd"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-core",
|
||||
@@ -3560,9 +3592,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-proto"
|
||||
version = "0.7.0"
|
||||
version = "0.26.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30ee9f20bff9c984511a02f082dc8ede839e4a9bf15cc2487c8d6fea5ad850d9"
|
||||
checksum = "c9d3968ce3aefdcca5c27e3c4ea4391b37547726a70893aab52d3de95d5f8b34"
|
||||
dependencies = [
|
||||
"opentelemetry",
|
||||
"opentelemetry_sdk",
|
||||
@@ -3572,15 +3604,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-semantic-conventions"
|
||||
version = "0.16.0"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1cefe0543875379e47eb5f1e68ff83f45cc41366a92dfd0d073d513bf68e9a05"
|
||||
checksum = "db945c1eaea8ac6a9677185357480d215bb6999faa9f691d0c4d4d641eab7a09"
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry_sdk"
|
||||
version = "0.24.1"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "692eac490ec80f24a17828d49b40b60f5aeaccdfe6a503f939713afd22bc28df"
|
||||
checksum = "d2c627d9f4c9cdc1f21a29ee4bfbd6028fcb8bcf2a857b43f3abdf72c9c862f3"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-channel",
|
||||
@@ -3954,7 +3986,7 @@ dependencies = [
|
||||
"parquet",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4056,7 +4088,7 @@ checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4177,7 +4209,6 @@ dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"hmac",
|
||||
"md-5",
|
||||
"memchr",
|
||||
"rand 0.8.5",
|
||||
"sha2",
|
||||
@@ -4298,6 +4329,19 @@ dependencies = [
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pprof_util"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "65c568b3f8c1c37886ae07459b1946249e725c315306b03be5632f84c239f781"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
"num",
|
||||
"paste",
|
||||
"prost",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.17"
|
||||
@@ -4334,7 +4378,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4348,9 +4392,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.78"
|
||||
version = "1.0.92"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
|
||||
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
@@ -4424,7 +4468,7 @@ dependencies = [
|
||||
"prost",
|
||||
"prost-types",
|
||||
"regex",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
@@ -4438,7 +4482,7 @@ dependencies = [
|
||||
"itertools 0.12.1",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4567,6 +4611,7 @@ dependencies = [
|
||||
"tikv-jemalloc-ctl",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres2",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-tungstenite",
|
||||
@@ -4992,9 +5037,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "reqwest-middleware"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0209efb52486ad88136190094ee214759ef7507068b27992256ed6610eb71a01"
|
||||
checksum = "d1ccd3b55e711f91a9885a2fa6fbbb2e39db1776420b062efc058c6410f7e5e3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -5007,13 +5052,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "reqwest-retry"
|
||||
version = "0.5.0"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40f342894422862af74c50e1e9601cf0931accc9c6981e5eb413c46603b616b5"
|
||||
checksum = "29c73e4195a6bfbcb174b790d9b3407ab90646976c55de58a6515da25d851178"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"chrono",
|
||||
"futures",
|
||||
"getrandom 0.2.11",
|
||||
"http 1.1.0",
|
||||
@@ -5022,6 +5066,7 @@ dependencies = [
|
||||
"reqwest 0.12.4",
|
||||
"reqwest-middleware",
|
||||
"retry-policies",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"wasm-timer",
|
||||
@@ -5029,9 +5074,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "reqwest-tracing"
|
||||
version = "0.5.3"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bfdd9bfa64c72233d8dd99ab7883efcdefe9e16d46488ecb9228b71a2e2ceb45"
|
||||
checksum = "ff82cf5730a1311fb9413b0bc2b8e743e0157cd73f010ab4ec374a923873b6a2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -5047,12 +5092,10 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "retry-policies"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "493b4243e32d6eedd29f9a398896e35c6943a123b55eec97dcaee98310d25810"
|
||||
checksum = "5875471e6cab2871bc150ecb8c727db5113c9338cc3354dc5ee3425b6aa40a1c"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
@@ -5176,7 +5219,7 @@ dependencies = [
|
||||
"regex",
|
||||
"relative-path",
|
||||
"rustc_version",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
@@ -5222,14 +5265,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.28"
|
||||
version = "0.38.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
|
||||
checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys 0.4.13",
|
||||
"linux-raw-sys 0.4.14",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
@@ -5684,7 +5727,7 @@ checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5766,7 +5809,7 @@ dependencies = [
|
||||
"darling",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6139,7 +6182,7 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6190,9 +6233,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.52"
|
||||
version = "2.0.90"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07"
|
||||
checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -6222,7 +6265,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6253,13 +6296,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.9.0"
|
||||
version = "3.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa"
|
||||
checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"fastrand 2.0.0",
|
||||
"redox_syscall 0.4.1",
|
||||
"fastrand 2.2.0",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
@@ -6300,27 +6343,27 @@ checksum = "78ea17a2dc368aeca6f554343ced1b1e31f76d63683fa8016e5844bd7a5144a1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.57"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b"
|
||||
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.57"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81"
|
||||
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6494,7 +6537,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6719,7 +6762,7 @@ dependencies = [
|
||||
"prost-build",
|
||||
"prost-types",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6756,9 +6799,9 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
|
||||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
version = "0.1.40"
|
||||
version = "0.1.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
|
||||
checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
|
||||
dependencies = [
|
||||
"log",
|
||||
"pin-project-lite",
|
||||
@@ -6779,20 +6822,20 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.27"
|
||||
version = "0.1.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
|
||||
checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.32"
|
||||
version = "0.1.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
|
||||
checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"valuable",
|
||||
@@ -6821,9 +6864,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tracing-opentelemetry"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9784ed4da7d921bc8df6963f8c80a0e4ce34ba6ba76668acadd3edbd985ff3b"
|
||||
checksum = "dc58af5d3f6c5811462cabb3289aec0093f7338e367e5a33d28c0433b3c7360b"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"once_cell",
|
||||
@@ -6839,9 +6882,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tracing-serde"
|
||||
version = "0.1.3"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1"
|
||||
checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"tracing-core",
|
||||
@@ -6849,9 +6892,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.18"
|
||||
version = "0.3.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
|
||||
checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
|
||||
dependencies = [
|
||||
"matchers",
|
||||
"once_cell",
|
||||
@@ -7060,6 +7103,7 @@ dependencies = [
|
||||
"hex-literal",
|
||||
"humantime",
|
||||
"hyper 0.14.30",
|
||||
"jemalloc_pprof",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"nix 0.27.1",
|
||||
@@ -7258,7 +7302,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
@@ -7292,7 +7336,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
@@ -7646,8 +7690,12 @@ dependencies = [
|
||||
"memchr",
|
||||
"nix 0.26.4",
|
||||
"nom",
|
||||
"num",
|
||||
"num-bigint",
|
||||
"num-complex",
|
||||
"num-integer",
|
||||
"num-iter",
|
||||
"num-rational",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"parquet",
|
||||
@@ -7669,8 +7717,9 @@ dependencies = [
|
||||
"smallvec",
|
||||
"spki 0.7.3",
|
||||
"subtle",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
"sync_wrapper 0.1.2",
|
||||
"tikv-jemalloc-ctl",
|
||||
"tikv-jemalloc-sys",
|
||||
"time",
|
||||
"time-macros",
|
||||
@@ -7769,7 +7818,7 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7790,7 +7839,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
19
Cargo.toml
19
Cargo.toml
@@ -115,6 +115,7 @@ indoc = "2"
|
||||
ipnet = "2.10.0"
|
||||
itertools = "0.10"
|
||||
itoa = "1.0.11"
|
||||
jemalloc_pprof = "0.6"
|
||||
jsonwebtoken = "9"
|
||||
lasso = "0.7"
|
||||
libc = "0.2"
|
||||
@@ -127,10 +128,10 @@ notify = "6.0.0"
|
||||
num_cpus = "1.15"
|
||||
num-traits = "0.2.15"
|
||||
once_cell = "1.13"
|
||||
opentelemetry = "0.24"
|
||||
opentelemetry_sdk = "0.24"
|
||||
opentelemetry-otlp = { version = "0.17", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.16"
|
||||
opentelemetry = "0.26"
|
||||
opentelemetry_sdk = "0.26"
|
||||
opentelemetry-otlp = { version = "0.26", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.26"
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "53"
|
||||
@@ -144,9 +145,9 @@ rand = "0.8"
|
||||
redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
|
||||
regex = "1.10.2"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
|
||||
reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_24"] }
|
||||
reqwest-middleware = "0.3.0"
|
||||
reqwest-retry = "0.5"
|
||||
reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_26"] }
|
||||
reqwest-middleware = "0.4"
|
||||
reqwest-retry = "0.7"
|
||||
routerify = "3"
|
||||
rpds = "0.13"
|
||||
rustc-hash = "1.1.0"
|
||||
@@ -175,7 +176,7 @@ sync_wrapper = "0.1.2"
|
||||
tar = "0.4"
|
||||
test-context = "0.3"
|
||||
thiserror = "1.0"
|
||||
tikv-jemallocator = { version = "0.6", features = ["stats"] }
|
||||
tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
|
||||
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
|
||||
tokio = { version = "1.17", features = ["macros"] }
|
||||
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
|
||||
@@ -191,7 +192,7 @@ tonic = {version = "0.12.3", features = ["tls", "tls-roots"]}
|
||||
tower-service = "0.3.2"
|
||||
tracing = "0.1"
|
||||
tracing-error = "0.2"
|
||||
tracing-opentelemetry = "0.25"
|
||||
tracing-opentelemetry = "0.27"
|
||||
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
|
||||
try-lock = "0.2.5"
|
||||
twox-hash = { version = "1.6.3", default-features = false }
|
||||
|
||||
@@ -358,10 +358,10 @@ COPY compute/patches/pgvector.patch /pgvector.patch
|
||||
# because we build the images on different machines than where we run them.
|
||||
# Pass OPTFLAGS="" to remove it.
|
||||
#
|
||||
# vector 0.7.4 supports v17
|
||||
# last release v0.7.4 - Aug 5, 2024
|
||||
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.4.tar.gz -O pgvector.tar.gz && \
|
||||
echo "0341edf89b1924ae0d552f617e14fb7f8867c0194ed775bcc44fa40288642583 pgvector.tar.gz" | sha256sum --check && \
|
||||
# vector >0.7.4 supports v17
|
||||
# last release v0.8.0 - Oct 30, 2024
|
||||
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.8.0.tar.gz -O pgvector.tar.gz && \
|
||||
echo "867a2c328d4928a5a9d6f052cd3bc78c7d60228a9b914ad32aa3db88e9de27b0 pgvector.tar.gz" | sha256sum --check && \
|
||||
mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
||||
patch -p1 < /pgvector.patch && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -1367,15 +1367,12 @@ RUN make PG_VERSION="${PG_VERSION}" -C compute
|
||||
|
||||
FROM neon-pg-ext-build AS neon-pg-ext-test
|
||||
ARG PG_VERSION
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
mkdir /ext-src
|
||||
RUN mkdir /ext-src
|
||||
|
||||
#COPY --from=postgis-build /postgis.tar.gz /ext-src/
|
||||
#COPY --from=postgis-build /sfcgal/* /usr
|
||||
COPY --from=plv8-build /plv8.tar.gz /ext-src/
|
||||
COPY --from=h3-pg-build /h3-pg.tar.gz /ext-src/
|
||||
#COPY --from=h3-pg-build /h3-pg.tar.gz /ext-src/
|
||||
COPY --from=unit-pg-build /postgresql-unit.tar.gz /ext-src/
|
||||
COPY --from=vector-pg-build /pgvector.tar.gz /ext-src/
|
||||
COPY --from=vector-pg-build /pgvector.patch /ext-src/
|
||||
@@ -1395,7 +1392,7 @@ COPY --from=hll-pg-build /hll.tar.gz /ext-src
|
||||
COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
|
||||
#COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
|
||||
COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
|
||||
COPY compute/patches/pg_hint_plan.patch /ext-src
|
||||
COPY compute/patches/pg_hint_plan_${PG_VERSION}.patch /ext-src
|
||||
COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
|
||||
COPY compute/patches/pg_cron.patch /ext-src
|
||||
#COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
|
||||
@@ -1405,38 +1402,23 @@ COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src
|
||||
COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
|
||||
#COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
|
||||
#COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
|
||||
COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
|
||||
#pg_anon is not supported yet for pg v17 so, don't fail if nothing found
|
||||
COPY --from=pg-anon-pg-build /pg_anon.tar.g? /ext-src
|
||||
COPY compute/patches/pg_anon.patch /ext-src
|
||||
COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
|
||||
COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
cd /ext-src/ && for f in *.tar.gz; \
|
||||
RUN cd /ext-src/ && for f in *.tar.gz; \
|
||||
do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
|
||||
rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
|
||||
|| exit 1; rm -f $f; done
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
cd /ext-src/rum-src && patch -p1 <../rum.patch
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
|
||||
RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
|
||||
RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
|
||||
RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan_${PG_VERSION}.patch
|
||||
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
patch -p1 </ext-src/pg_anon.patch
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
patch -p1 </ext-src/pg_cron.patch
|
||||
echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
|
||||
esac && patch -p1 </ext-src/pg_anon.patch
|
||||
RUN patch -p1 </ext-src/pg_cron.patch
|
||||
ENV PATH=/usr/local/pgsql/bin:$PATH
|
||||
ENV PGHOST=compute
|
||||
ENV PGPORT=55433
|
||||
|
||||
174
compute/patches/pg_hint_plan_v17.patch
Normal file
174
compute/patches/pg_hint_plan_v17.patch
Normal file
@@ -0,0 +1,174 @@
|
||||
diff --git a/expected/ut-A.out b/expected/ut-A.out
|
||||
index e7d68a1..65a056c 100644
|
||||
--- a/expected/ut-A.out
|
||||
+++ b/expected/ut-A.out
|
||||
@@ -9,13 +9,16 @@ SET search_path TO public;
|
||||
----
|
||||
-- No.A-1-1-3
|
||||
CREATE EXTENSION pg_hint_plan;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
|
||||
-- No.A-1-2-3
|
||||
DROP EXTENSION pg_hint_plan;
|
||||
-- No.A-1-1-4
|
||||
CREATE SCHEMA other_schema;
|
||||
CREATE EXTENSION pg_hint_plan SCHEMA other_schema;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
|
||||
ERROR: extension "pg_hint_plan" must be installed in schema "hint_plan"
|
||||
CREATE EXTENSION pg_hint_plan;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
|
||||
DROP SCHEMA other_schema;
|
||||
----
|
||||
---- No. A-5-1 comment pattern
|
||||
diff --git a/expected/ut-J.out b/expected/ut-J.out
|
||||
index 2fa3c70..314e929 100644
|
||||
--- a/expected/ut-J.out
|
||||
+++ b/expected/ut-J.out
|
||||
@@ -789,38 +789,6 @@ NestLoop(st1 st2)
|
||||
MergeJoin(t1 t2)
|
||||
not used hint:
|
||||
duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-NestLoop(st1 st2)
|
||||
-MergeJoin(t1 t2)
|
||||
-duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-NestLoop(st1 st2)
|
||||
-MergeJoin(t1 t2)
|
||||
-duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-NestLoop(st1 st2)
|
||||
-MergeJoin(t1 t2)
|
||||
-duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-NestLoop(st1 st2)
|
||||
-MergeJoin(t1 t2)
|
||||
-duplication hint:
|
||||
error hint:
|
||||
|
||||
explain_filter
|
||||
diff --git a/expected/ut-S.out b/expected/ut-S.out
|
||||
index 0bfcfb8..e75f581 100644
|
||||
--- a/expected/ut-S.out
|
||||
+++ b/expected/ut-S.out
|
||||
@@ -4415,34 +4415,6 @@ used hint:
|
||||
IndexScan(ti1 ti1_pred)
|
||||
not used hint:
|
||||
duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-IndexScan(ti1 ti1_pred)
|
||||
-duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-IndexScan(ti1 ti1_pred)
|
||||
-duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-IndexScan(ti1 ti1_pred)
|
||||
-duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-IndexScan(ti1 ti1_pred)
|
||||
-duplication hint:
|
||||
error hint:
|
||||
|
||||
explain_filter
|
||||
diff --git a/expected/ut-W.out b/expected/ut-W.out
|
||||
index a09bd34..0ad227c 100644
|
||||
--- a/expected/ut-W.out
|
||||
+++ b/expected/ut-W.out
|
||||
@@ -1341,54 +1341,6 @@ IndexScan(ft1)
|
||||
IndexScan(t)
|
||||
Parallel(s1 3 hard)
|
||||
duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-IndexScan(*VALUES*)
|
||||
-SeqScan(cte1)
|
||||
-IndexScan(ft1)
|
||||
-IndexScan(t)
|
||||
-Parallel(p1 5 hard)
|
||||
-Parallel(s1 3 hard)
|
||||
-duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-IndexScan(*VALUES*)
|
||||
-SeqScan(cte1)
|
||||
-IndexScan(ft1)
|
||||
-IndexScan(t)
|
||||
-Parallel(p1 5 hard)
|
||||
-Parallel(s1 3 hard)
|
||||
-duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-IndexScan(*VALUES*)
|
||||
-SeqScan(cte1)
|
||||
-IndexScan(ft1)
|
||||
-IndexScan(t)
|
||||
-Parallel(p1 5 hard)
|
||||
-Parallel(s1 3 hard)
|
||||
-duplication hint:
|
||||
-error hint:
|
||||
-
|
||||
-LOG: pg_hint_plan:
|
||||
-used hint:
|
||||
-not used hint:
|
||||
-IndexScan(*VALUES*)
|
||||
-SeqScan(cte1)
|
||||
-IndexScan(ft1)
|
||||
-IndexScan(t)
|
||||
-Parallel(p1 5 hard)
|
||||
-Parallel(s1 3 hard)
|
||||
-duplication hint:
|
||||
error hint:
|
||||
|
||||
explain_filter
|
||||
diff --git a/expected/ut-fdw.out b/expected/ut-fdw.out
|
||||
index 017fa4b..98d989b 100644
|
||||
--- a/expected/ut-fdw.out
|
||||
+++ b/expected/ut-fdw.out
|
||||
@@ -7,6 +7,7 @@ SET pg_hint_plan.debug_print TO on;
|
||||
SET client_min_messages TO LOG;
|
||||
SET pg_hint_plan.enable_hint TO on;
|
||||
CREATE EXTENSION file_fdw;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/file_fdw
|
||||
CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw;
|
||||
CREATE USER MAPPING FOR PUBLIC SERVER file_server;
|
||||
CREATE FOREIGN TABLE ft1 (id int, val int) SERVER file_server OPTIONS (format 'csv', filename :'filename');
|
||||
@@ -335,6 +335,7 @@ fn wait_spec(
|
||||
pgdata: pgdata.to_string(),
|
||||
pgbin: pgbin.to_string(),
|
||||
pgversion: get_pg_version_string(pgbin),
|
||||
http_port,
|
||||
live_config_allowed,
|
||||
state: Mutex::new(new_state),
|
||||
state_changed: Condvar::new(),
|
||||
@@ -389,7 +390,6 @@ fn wait_spec(
|
||||
|
||||
Ok(WaitSpecResult {
|
||||
compute,
|
||||
http_port,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(),
|
||||
})
|
||||
@@ -397,8 +397,6 @@ fn wait_spec(
|
||||
|
||||
struct WaitSpecResult {
|
||||
compute: Arc<ComputeNode>,
|
||||
// passed through from ProcessCliResult
|
||||
http_port: u16,
|
||||
resize_swap_on_bind: bool,
|
||||
set_disk_quota_for_fs: Option<String>,
|
||||
}
|
||||
@@ -408,7 +406,6 @@ fn start_postgres(
|
||||
#[allow(unused_variables)] matches: &clap::ArgMatches,
|
||||
WaitSpecResult {
|
||||
compute,
|
||||
http_port,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
}: WaitSpecResult,
|
||||
@@ -481,12 +478,10 @@ fn start_postgres(
|
||||
}
|
||||
}
|
||||
|
||||
let extension_server_port: u16 = http_port;
|
||||
|
||||
// Start Postgres
|
||||
let mut pg = None;
|
||||
if !prestartup_failed {
|
||||
pg = match compute.start_compute(extension_server_port) {
|
||||
pg = match compute.start_compute() {
|
||||
Ok(pg) => Some(pg),
|
||||
Err(err) => {
|
||||
error!("could not start the compute node: {:#}", err);
|
||||
|
||||
@@ -79,6 +79,8 @@ pub struct ComputeNode {
|
||||
/// - we push spec and it does configuration
|
||||
/// - but then it is restarted without any spec again
|
||||
pub live_config_allowed: bool,
|
||||
/// The port that the compute's HTTP server listens on
|
||||
pub http_port: u16,
|
||||
/// Volatile part of the `ComputeNode`, which should be used under `Mutex`.
|
||||
/// To allow HTTP API server to serving status requests, while configuration
|
||||
/// is in progress, lock should be held only for short periods of time to do
|
||||
@@ -611,11 +613,7 @@ impl ComputeNode {
|
||||
/// Do all the preparations like PGDATA directory creation, configuration,
|
||||
/// safekeepers sync, basebackup, etc.
|
||||
#[instrument(skip_all)]
|
||||
pub fn prepare_pgdata(
|
||||
&self,
|
||||
compute_state: &ComputeState,
|
||||
extension_server_port: u16,
|
||||
) -> Result<()> {
|
||||
pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
|
||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
let spec = &pspec.spec;
|
||||
let pgdata_path = Path::new(&self.pgdata);
|
||||
@@ -625,7 +623,7 @@ impl ComputeNode {
|
||||
config::write_postgres_conf(
|
||||
&pgdata_path.join("postgresql.conf"),
|
||||
&pspec.spec,
|
||||
Some(extension_server_port),
|
||||
self.http_port,
|
||||
)?;
|
||||
|
||||
// Syncing safekeepers is only safe with primary nodes: if a primary
|
||||
@@ -1243,7 +1241,7 @@ impl ComputeNode {
|
||||
// Write new config
|
||||
let pgdata_path = Path::new(&self.pgdata);
|
||||
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
|
||||
config::write_postgres_conf(&postgresql_conf_path, &spec, None)?;
|
||||
config::write_postgres_conf(&postgresql_conf_path, &spec, self.http_port)?;
|
||||
|
||||
// TODO(ololobus): We need a concurrency during reconfiguration as well,
|
||||
// but DB is already running and used by user. We can easily get out of
|
||||
@@ -1284,10 +1282,7 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
pub fn start_compute(
|
||||
&self,
|
||||
extension_server_port: u16,
|
||||
) -> Result<(std::process::Child, std::thread::JoinHandle<()>)> {
|
||||
pub fn start_compute(&self) -> Result<(std::process::Child, std::thread::JoinHandle<()>)> {
|
||||
let compute_state = self.state.lock().unwrap().clone();
|
||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
info!(
|
||||
@@ -1362,7 +1357,7 @@ impl ComputeNode {
|
||||
info!("{:?}", remote_ext_metrics);
|
||||
}
|
||||
|
||||
self.prepare_pgdata(&compute_state, extension_server_port)?;
|
||||
self.prepare_pgdata(&compute_state)?;
|
||||
|
||||
let start_time = Utc::now();
|
||||
let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?;
|
||||
|
||||
@@ -37,7 +37,7 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
|
||||
pub fn write_postgres_conf(
|
||||
path: &Path,
|
||||
spec: &ComputeSpec,
|
||||
extension_server_port: Option<u16>,
|
||||
extension_server_port: u16,
|
||||
) -> Result<()> {
|
||||
// File::create() destroys the file content if it exists.
|
||||
let mut file = File::create(path)?;
|
||||
@@ -127,9 +127,7 @@ pub fn write_postgres_conf(
|
||||
writeln!(file, "# Managed by compute_ctl: end")?;
|
||||
}
|
||||
|
||||
if let Some(port) = extension_server_port {
|
||||
writeln!(file, "neon.extension_server_port={}", port)?;
|
||||
}
|
||||
writeln!(file, "neon.extension_server_port={}", extension_server_port)?;
|
||||
|
||||
// This is essential to keep this line at the end of the file,
|
||||
// because it is intended to override any settings above.
|
||||
|
||||
@@ -310,6 +310,10 @@ impl Endpoint {
|
||||
conf.append("wal_log_hints", "off");
|
||||
conf.append("max_replication_slots", "10");
|
||||
conf.append("hot_standby", "on");
|
||||
// Set to 1MB to both exercise getPage requests/LFC, and still have enough room for
|
||||
// Postgres to operate. Everything smaller might be not enough for Postgres under load,
|
||||
// and can cause errors like 'no unpinned buffers available', see
|
||||
// <https://github.com/neondatabase/neon/issues/9956>
|
||||
conf.append("shared_buffers", "1MB");
|
||||
conf.append("fsync", "off");
|
||||
conf.append("max_connections", "100");
|
||||
|
||||
@@ -560,14 +560,26 @@ async fn main() -> anyhow::Result<()> {
|
||||
.await?;
|
||||
}
|
||||
Command::TenantDescribe { tenant_id } => {
|
||||
let describe_response = storcon_client
|
||||
let TenantDescribeResponse {
|
||||
tenant_id,
|
||||
shards,
|
||||
stripe_size,
|
||||
policy,
|
||||
config,
|
||||
} = storcon_client
|
||||
.dispatch::<(), TenantDescribeResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/tenant/{tenant_id}"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
let shards = describe_response.shards;
|
||||
println!("Tenant {tenant_id}");
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.add_row(["Policy", &format!("{:?}", policy)]);
|
||||
table.add_row(["Stripe size", &format!("{:?}", stripe_size)]);
|
||||
table.add_row(["Config", &serde_json::to_string_pretty(&config).unwrap()]);
|
||||
println!("{table}");
|
||||
println!("Shards:");
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header(["Shard", "Attached", "Secondary", "Last error", "status"]);
|
||||
for shard in shards {
|
||||
|
||||
@@ -4,14 +4,16 @@ ARG TAG=latest
|
||||
|
||||
FROM $REPOSITORY/${COMPUTE_IMAGE}:$TAG
|
||||
|
||||
ARG COMPUTE_IMAGE
|
||||
|
||||
USER root
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl \
|
||||
jq \
|
||||
python3-pip \
|
||||
netcat
|
||||
netcat-openbsd
|
||||
#Faker is required for the pg_anon test
|
||||
RUN pip3 install Faker
|
||||
RUN case $COMPUTE_IMAGE in compute-node-v17) OPT="--break-system-packages";; *) OPT= ;; esac && pip3 install $OPT Faker
|
||||
#This is required for the pg_hintplan test
|
||||
RUN mkdir -p /ext-src/pg_hint_plan-src && chown postgres /ext-src/pg_hint_plan-src
|
||||
|
||||
|
||||
@@ -30,10 +30,17 @@ cleanup() {
|
||||
docker compose --profile test-extensions -f $COMPOSE_FILE down
|
||||
}
|
||||
|
||||
for pg_version in 14 15 16; do
|
||||
for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
pg_version=${pg_version/v/}
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
PG_TEST_VERSION=$(($pg_version < 16 ? 16 : $pg_version))
|
||||
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
|
||||
# The support of pg_anon not yet added to PG17, so we have to remove the corresponding option
|
||||
if [ $pg_version -eq 17 ]; then
|
||||
SPEC_PATH="compute_wrapper/var/db/postgres/specs"
|
||||
mv $SPEC_PATH/spec.json $SPEC_PATH/spec.bak
|
||||
jq 'del(.cluster.settings[] | select (.name == "session_preload_libraries"))' $SPEC_PATH/spec.bak > $SPEC_PATH/spec.json
|
||||
fi
|
||||
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d
|
||||
|
||||
echo "wait until the compute is ready. timeout after 60s. "
|
||||
@@ -54,8 +61,7 @@ for pg_version in 14 15 16; do
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $pg_version -ge 16 ]
|
||||
then
|
||||
if [ $pg_version -ge 16 ]; then
|
||||
echo Enabling trust connection
|
||||
docker exec $COMPUTE_CONTAINER_NAME bash -c "sed -i '\$d' /var/db/postgres/compute/pg_hba.conf && echo -e 'host\t all\t all\t all\t trust' >> /var/db/postgres/compute/pg_hba.conf && psql $PSQL_OPTION -c 'select pg_reload_conf()' "
|
||||
echo Adding postgres role
|
||||
@@ -68,10 +74,13 @@ for pg_version in 14 15 16; do
|
||||
# The test assumes that it is running on the same host with the postgres engine.
|
||||
# In our case it's not true, that's why we are copying files to the compute node
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_anon-src/data $TMPDIR/data
|
||||
echo -e '1\t too \t many \t tabs' > $TMPDIR/data/bad.csv
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/tmp/tmp_anon_alternate_data
|
||||
# Add support for pg_anon for pg_v16
|
||||
if [ $pg_version -ne 17 ]; then
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_anon-src/data $TMPDIR/data
|
||||
echo -e '1\t too \t many \t tabs' > $TMPDIR/data/bad.csv
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/tmp/tmp_anon_alternate_data
|
||||
rm -rf $TMPDIR
|
||||
fi
|
||||
TMPDIR=$(mktemp -d)
|
||||
# The following block does the same for the pg_hintplan test
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data
|
||||
@@ -97,4 +106,8 @@ for pg_version in 14 15 16; do
|
||||
fi
|
||||
fi
|
||||
cleanup
|
||||
# The support of pg_anon not yet added to PG17, so we have to remove the corresponding option
|
||||
if [ $pg_version -eq 17 ]; then
|
||||
mv $SPEC_PATH/spec.bak $SPEC_PATH/spec.json
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -442,7 +442,14 @@ impl Default for ConfigToml {
|
||||
tenant_config: TenantConfigToml::default(),
|
||||
no_sync: None,
|
||||
wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL,
|
||||
page_service_pipelining: PageServicePipeliningConfig::Serial,
|
||||
page_service_pipelining: if !cfg!(test) {
|
||||
PageServicePipeliningConfig::Serial
|
||||
} else {
|
||||
PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
|
||||
max_batch_size: NonZeroUsize::new(32).unwrap(),
|
||||
execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
|
||||
})
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ pub struct TenantCreateResponse {
|
||||
pub shards: Vec<TenantCreateResponseShard>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct NodeRegisterRequest {
|
||||
pub node_id: NodeId,
|
||||
|
||||
@@ -75,7 +75,7 @@ pub struct TenantPolicyRequest {
|
||||
pub scheduling: Option<ShardSchedulingPolicy>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug)]
|
||||
pub struct AvailabilityZone(pub String);
|
||||
|
||||
impl Display for AvailabilityZone {
|
||||
|
||||
@@ -770,6 +770,11 @@ impl Key {
|
||||
&& self.field6 == 1
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_aux_file_key(&self) -> bool {
|
||||
self.field1 == AUX_KEY_PREFIX
|
||||
}
|
||||
|
||||
/// Guaranteed to return `Ok()` if [`Self::is_rel_block_key`] returns `true` for `key`.
|
||||
#[inline(always)]
|
||||
pub fn to_rel_block(self) -> anyhow::Result<(RelTag, BlockNumber)> {
|
||||
|
||||
@@ -501,7 +501,9 @@ pub struct EvictionPolicyLayerAccessThreshold {
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub struct ThrottleConfig {
|
||||
pub task_kinds: Vec<String>, // TaskKind
|
||||
/// See [`ThrottleConfigTaskKinds`] for why we do the serde `rename`.
|
||||
#[serde(rename = "task_kinds")]
|
||||
pub enabled: ThrottleConfigTaskKinds,
|
||||
pub initial: u32,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub refill_interval: Duration,
|
||||
@@ -509,10 +511,38 @@ pub struct ThrottleConfig {
|
||||
pub max: u32,
|
||||
}
|
||||
|
||||
/// Before <https://github.com/neondatabase/neon/pull/9962>
|
||||
/// the throttle was a per `Timeline::get`/`Timeline::get_vectored` call.
|
||||
/// The `task_kinds` field controlled which Pageserver "Task Kind"s
|
||||
/// were subject to the throttle.
|
||||
///
|
||||
/// After that PR, the throttle is applied at pagestream request level
|
||||
/// and the `task_kinds` field does not apply since the only task kind
|
||||
/// that us subject to the throttle is that of the page service.
|
||||
///
|
||||
/// However, we don't want to make a breaking config change right now
|
||||
/// because it means we have to migrate all the tenant configs.
|
||||
/// This will be done in a future PR.
|
||||
///
|
||||
/// In the meantime, we use emptiness / non-emptsiness of the `task_kinds`
|
||||
/// field to determine if the throttle is enabled or not.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(transparent)]
|
||||
pub struct ThrottleConfigTaskKinds(Vec<String>);
|
||||
|
||||
impl ThrottleConfigTaskKinds {
|
||||
pub fn disabled() -> Self {
|
||||
Self(vec![])
|
||||
}
|
||||
pub fn is_enabled(&self) -> bool {
|
||||
!self.0.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl ThrottleConfig {
|
||||
pub fn disabled() -> Self {
|
||||
Self {
|
||||
task_kinds: vec![], // effectively disables the throttle
|
||||
enabled: ThrottleConfigTaskKinds::disabled(),
|
||||
// other values don't matter with emtpy `task_kinds`.
|
||||
initial: 0,
|
||||
refill_interval: Duration::from_millis(1),
|
||||
@@ -526,6 +556,30 @@ impl ThrottleConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod throttle_config_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_disabled_is_disabled() {
|
||||
let config = ThrottleConfig::disabled();
|
||||
assert!(!config.enabled.is_enabled());
|
||||
}
|
||||
#[test]
|
||||
fn test_enabled_backwards_compat() {
|
||||
let input = serde_json::json!({
|
||||
"task_kinds": ["PageRequestHandler"],
|
||||
"initial": 40000,
|
||||
"refill_interval": "50ms",
|
||||
"refill_amount": 1000,
|
||||
"max": 40000,
|
||||
"fair": true
|
||||
});
|
||||
let config: ThrottleConfig = serde_json::from_value(input).unwrap();
|
||||
assert!(config.enabled.is_enabled());
|
||||
}
|
||||
}
|
||||
|
||||
/// A flattened analog of a `pagesever::tenant::LocationMode`, which
|
||||
/// lists out all possible states (and the virtual "Detached" state)
|
||||
/// in a flat form rather than using rust-style enums.
|
||||
|
||||
@@ -170,19 +170,37 @@ impl ShardIdentity {
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if the key should be stored on all shards, not just one.
|
||||
fn is_key_global(&self, key: &Key) -> bool {
|
||||
if key.is_slru_block_key() || key.is_slru_segment_size_key() || key.is_aux_file_key() {
|
||||
// Special keys that are only stored on shard 0
|
||||
false
|
||||
} else if key.is_rel_block_key() {
|
||||
// Ordinary relation blocks are distributed across shards
|
||||
false
|
||||
} else if key.is_rel_size_key() {
|
||||
// All shards maintain rel size keys (although only shard 0 is responsible for
|
||||
// keeping it strictly accurate, other shards just reflect the highest block they've ingested)
|
||||
true
|
||||
} else {
|
||||
// For everything else, we assume it must be kept everywhere, because ingest code
|
||||
// might assume this -- this covers functionality where the ingest code has
|
||||
// not (yet) been made fully shard aware.
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if the key should be discarded if found in this shard's
|
||||
/// data store, e.g. during compaction after a split.
|
||||
///
|
||||
/// Shards _may_ drop keys which return false here, but are not obliged to.
|
||||
pub fn is_key_disposable(&self, key: &Key) -> bool {
|
||||
if key_is_shard0(key) {
|
||||
// Q: Why can't we dispose of shard0 content if we're not shard 0?
|
||||
// A1: because the WAL ingestion logic currently ingests some shard 0
|
||||
// content on all shards, even though it's only read on shard 0. If we
|
||||
// dropped it, then subsequent WAL ingest to these keys would encounter
|
||||
// an error.
|
||||
// A2: because key_is_shard0 also covers relation size keys, which are written
|
||||
// on all shards even though they're only maintained accurately on shard 0.
|
||||
if self.count < ShardCount(2) {
|
||||
// Fast path: unsharded tenant doesn't dispose of anything
|
||||
return false;
|
||||
}
|
||||
|
||||
if self.is_key_global(key) {
|
||||
false
|
||||
} else {
|
||||
!self.is_key_local(key)
|
||||
|
||||
@@ -278,7 +278,7 @@ pub fn generate_pg_control(
|
||||
checkpoint_bytes: &[u8],
|
||||
lsn: Lsn,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<(Bytes, u64)> {
|
||||
) -> anyhow::Result<(Bytes, u64, bool)> {
|
||||
dispatch_pgversion!(
|
||||
pg_version,
|
||||
pgv::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
|
||||
|
||||
@@ -124,23 +124,64 @@ pub fn normalize_lsn(lsn: Lsn, seg_sz: usize) -> Lsn {
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a pg_control file, for a basebackup for starting up Postgres at the given LSN
|
||||
///
|
||||
/// 'pg_control_bytes' and 'checkpoint_bytes' are the contents of those keys persisted in
|
||||
/// the pageserver. They use the same format as the PostgreSQL control file and the
|
||||
/// checkpoint record, but see walingest.rs for how exactly they are kept up to date.
|
||||
/// 'lsn' is the LSN at which we're starting up.
|
||||
///
|
||||
/// Returns:
|
||||
/// - pg_control file contents
|
||||
/// - system_identifier, extracted from the persisted information
|
||||
/// - true, if we're starting up from a "clean shutdown", i.e. if there was a shutdown
|
||||
/// checkpoint at the given LSN
|
||||
pub fn generate_pg_control(
|
||||
pg_control_bytes: &[u8],
|
||||
checkpoint_bytes: &[u8],
|
||||
lsn: Lsn,
|
||||
) -> anyhow::Result<(Bytes, u64)> {
|
||||
) -> anyhow::Result<(Bytes, u64, bool)> {
|
||||
let mut pg_control = ControlFileData::decode(pg_control_bytes)?;
|
||||
let mut checkpoint = CheckPoint::decode(checkpoint_bytes)?;
|
||||
let was_shutdown;
|
||||
|
||||
// Generate new pg_control needed for bootstrap
|
||||
checkpoint.redo = normalize_lsn(lsn, WAL_SEGMENT_SIZE).0;
|
||||
//
|
||||
// NB: In the checkpoint struct that we persist in the pageserver, we have a different
|
||||
// convention for the 'redo' field than in PostgreSQL: On a shutdown checkpoint,
|
||||
// 'redo' points the *end* of the checkpoint WAL record. On PostgreSQL, it points to
|
||||
// the beginning. Furthermore, on an online checkpoint, 'redo' is set to 0.
|
||||
//
|
||||
// We didn't always have this convention however, and old persisted records will have
|
||||
// old REDO values that point to some old LSN.
|
||||
//
|
||||
// The upshot is that if 'redo' is equal to the "current" LSN, there was a shutdown
|
||||
// checkpoint record at that point in WAL, with no new WAL records after it. That case
|
||||
// can be treated as starting from a clean shutdown. All other cases are treated as
|
||||
// non-clean shutdown. In Neon, we don't do WAL replay at startup in either case, so
|
||||
// that distinction doesn't matter very much. As of this writing, it only affects
|
||||
// whether the persisted pg_stats information can be used or not.
|
||||
//
|
||||
// In the Checkpoint struct in the returned pg_control file, the redo pointer is
|
||||
// always set to the LSN we're starting at, to hint that no WAL replay is required.
|
||||
// (There's some neon-specific code in Postgres startup to make that work, though.
|
||||
// Just setting the redo pointer is not sufficient.)
|
||||
if Lsn(checkpoint.redo) == lsn {
|
||||
was_shutdown = true;
|
||||
} else {
|
||||
checkpoint.redo = normalize_lsn(lsn, WAL_SEGMENT_SIZE).0;
|
||||
was_shutdown = false;
|
||||
}
|
||||
|
||||
//save new values in pg_control
|
||||
// We use DBState_DB_SHUTDOWNED even if it was not a clean shutdown. The
|
||||
// neon-specific code at postgres startup ignores the state stored in the control
|
||||
// file, similar to archive recovery in standalone PostgreSQL. Similarly, the
|
||||
// checkPoint pointer is ignored, so just set it to 0.
|
||||
pg_control.checkPoint = 0;
|
||||
pg_control.checkPointCopy = checkpoint;
|
||||
pg_control.state = DBState_DB_SHUTDOWNED;
|
||||
|
||||
Ok((pg_control.encode(), pg_control.system_identifier))
|
||||
Ok((pg_control.encode(), pg_control.system_identifier, was_shutdown))
|
||||
}
|
||||
|
||||
pub fn get_current_timestamp() -> TimestampTz {
|
||||
|
||||
@@ -565,6 +565,8 @@ pub enum BeMessage<'a> {
|
||||
/// Batch of interpreted, shard filtered WAL records,
|
||||
/// ready for the pageserver to ingest
|
||||
InterpretedWalRecords(InterpretedWalRecordsBody<'a>),
|
||||
|
||||
Raw(u8, &'a [u8]),
|
||||
}
|
||||
|
||||
/// Common shorthands.
|
||||
@@ -754,6 +756,10 @@ impl BeMessage<'_> {
|
||||
/// one more buffer.
|
||||
pub fn write(buf: &mut BytesMut, message: &BeMessage) -> Result<(), ProtocolError> {
|
||||
match message {
|
||||
BeMessage::Raw(code, data) => {
|
||||
buf.put_u8(*code);
|
||||
write_body(buf, |b| b.put_slice(data))
|
||||
}
|
||||
BeMessage::AuthenticationOk => {
|
||||
buf.put_u8(b'R');
|
||||
write_body(buf, |buf| {
|
||||
|
||||
@@ -10,7 +10,6 @@ byteorder.workspace = true
|
||||
bytes.workspace = true
|
||||
fallible-iterator.workspace = true
|
||||
hmac.workspace = true
|
||||
md-5 = "0.10"
|
||||
memchr = "2.0"
|
||||
rand.workspace = true
|
||||
sha2.workspace = true
|
||||
|
||||
@@ -1,37 +1,2 @@
|
||||
//! Authentication protocol support.
|
||||
use md5::{Digest, Md5};
|
||||
|
||||
pub mod sasl;
|
||||
|
||||
/// Hashes authentication information in a way suitable for use in response
|
||||
/// to an `AuthenticationMd5Password` message.
|
||||
///
|
||||
/// The resulting string should be sent back to the database in a
|
||||
/// `PasswordMessage` message.
|
||||
#[inline]
|
||||
pub fn md5_hash(username: &[u8], password: &[u8], salt: [u8; 4]) -> String {
|
||||
let mut md5 = Md5::new();
|
||||
md5.update(password);
|
||||
md5.update(username);
|
||||
let output = md5.finalize_reset();
|
||||
md5.update(format!("{:x}", output));
|
||||
md5.update(salt);
|
||||
format!("md5{:x}", md5.finalize())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn md5() {
|
||||
let username = b"md5_user";
|
||||
let password = b"password";
|
||||
let salt = [0x2a, 0x3d, 0x8f, 0xe0];
|
||||
|
||||
assert_eq!(
|
||||
md5_hash(username, password, salt),
|
||||
"md562af4dd09bbb41884907a838a3233294"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,7 +79,7 @@ pub enum Message {
|
||||
AuthenticationCleartextPassword,
|
||||
AuthenticationGss,
|
||||
AuthenticationKerberosV5,
|
||||
AuthenticationMd5Password(AuthenticationMd5PasswordBody),
|
||||
AuthenticationMd5Password,
|
||||
AuthenticationOk,
|
||||
AuthenticationScmCredential,
|
||||
AuthenticationSspi,
|
||||
@@ -191,11 +191,7 @@ impl Message {
|
||||
0 => Message::AuthenticationOk,
|
||||
2 => Message::AuthenticationKerberosV5,
|
||||
3 => Message::AuthenticationCleartextPassword,
|
||||
5 => {
|
||||
let mut salt = [0; 4];
|
||||
buf.read_exact(&mut salt)?;
|
||||
Message::AuthenticationMd5Password(AuthenticationMd5PasswordBody { salt })
|
||||
}
|
||||
5 => Message::AuthenticationMd5Password,
|
||||
6 => Message::AuthenticationScmCredential,
|
||||
7 => Message::AuthenticationGss,
|
||||
8 => Message::AuthenticationGssContinue,
|
||||
@@ -541,6 +537,10 @@ impl NoticeResponseBody {
|
||||
pub fn fields(&self) -> ErrorFields<'_> {
|
||||
ErrorFields { buf: &self.storage }
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
&self.storage
|
||||
}
|
||||
}
|
||||
|
||||
pub struct NotificationResponseBody {
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
|
||||
use crate::authentication::sasl;
|
||||
use hmac::{Hmac, Mac};
|
||||
use md5::Md5;
|
||||
use rand::RngCore;
|
||||
use sha2::digest::FixedOutput;
|
||||
use sha2::{Digest, Sha256};
|
||||
@@ -88,20 +87,3 @@ pub(crate) async fn scram_sha_256_salt(
|
||||
base64::encode(server_key)
|
||||
)
|
||||
}
|
||||
|
||||
/// **Not recommended, as MD5 is not considered to be secure.**
|
||||
///
|
||||
/// Hash password using MD5 with the username as the salt.
|
||||
///
|
||||
/// The client may assume the returned string doesn't contain any
|
||||
/// special characters that would require escaping.
|
||||
pub fn md5(password: &[u8], username: &str) -> String {
|
||||
// salt password with username
|
||||
let mut salted_password = Vec::from(password);
|
||||
salted_password.extend_from_slice(username.as_bytes());
|
||||
|
||||
let mut hash = Md5::new();
|
||||
hash.update(&salted_password);
|
||||
let digest = hash.finalize();
|
||||
format!("md5{:x}", digest)
|
||||
}
|
||||
|
||||
@@ -9,11 +9,3 @@ async fn test_encrypt_scram_sha_256() {
|
||||
"SCRAM-SHA-256$4096:AQIDBAUGBwgJCgsMDQ4PEA==$8rrDg00OqaiWXJ7p+sCgHEIaBSHY89ZJl3mfIsf32oY=:05L1f+yZbiN8O0AnO40Og85NNRhvzTS57naKRWCcsIA="
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encrypt_md5() {
|
||||
assert_eq!(
|
||||
password::md5(b"secret", "foo"),
|
||||
"md54ab2c5d00339c4b2a4e921d2dc4edec7"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -10,10 +10,10 @@ use tokio::net::TcpStream;
|
||||
/// connection.
|
||||
#[derive(Clone)]
|
||||
pub struct CancelToken {
|
||||
pub(crate) socket_config: Option<SocketConfig>,
|
||||
pub(crate) ssl_mode: SslMode,
|
||||
pub(crate) process_id: i32,
|
||||
pub(crate) secret_key: i32,
|
||||
pub socket_config: Option<SocketConfig>,
|
||||
pub ssl_mode: SslMode,
|
||||
pub process_id: i32,
|
||||
pub secret_key: i32,
|
||||
}
|
||||
|
||||
impl CancelToken {
|
||||
|
||||
@@ -138,7 +138,7 @@ impl InnerClient {
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct SocketConfig {
|
||||
pub struct SocketConfig {
|
||||
pub host: Host,
|
||||
pub port: u16,
|
||||
pub connect_timeout: Option<Duration>,
|
||||
@@ -152,7 +152,7 @@ pub(crate) struct SocketConfig {
|
||||
pub struct Client {
|
||||
inner: Arc<InnerClient>,
|
||||
|
||||
socket_config: Option<SocketConfig>,
|
||||
socket_config: SocketConfig,
|
||||
ssl_mode: SslMode,
|
||||
process_id: i32,
|
||||
secret_key: i32,
|
||||
@@ -161,6 +161,7 @@ pub struct Client {
|
||||
impl Client {
|
||||
pub(crate) fn new(
|
||||
sender: mpsc::UnboundedSender<Request>,
|
||||
socket_config: SocketConfig,
|
||||
ssl_mode: SslMode,
|
||||
process_id: i32,
|
||||
secret_key: i32,
|
||||
@@ -172,7 +173,7 @@ impl Client {
|
||||
buffer: Default::default(),
|
||||
}),
|
||||
|
||||
socket_config: None,
|
||||
socket_config,
|
||||
ssl_mode,
|
||||
process_id,
|
||||
secret_key,
|
||||
@@ -188,10 +189,6 @@ impl Client {
|
||||
&self.inner
|
||||
}
|
||||
|
||||
pub(crate) fn set_socket_config(&mut self, socket_config: SocketConfig) {
|
||||
self.socket_config = Some(socket_config);
|
||||
}
|
||||
|
||||
/// Creates a new prepared statement.
|
||||
///
|
||||
/// Prepared statements can be executed repeatedly, and may contain query parameters (indicated by `$1`, `$2`, etc),
|
||||
@@ -412,7 +409,7 @@ impl Client {
|
||||
/// connection associated with this client.
|
||||
pub fn cancel_token(&self) -> CancelToken {
|
||||
CancelToken {
|
||||
socket_config: self.socket_config.clone(),
|
||||
socket_config: Some(self.socket_config.clone()),
|
||||
ssl_mode: self.ssl_mode,
|
||||
process_id: self.process_id,
|
||||
secret_key: self.secret_key,
|
||||
|
||||
@@ -2,14 +2,13 @@
|
||||
|
||||
use crate::connect::connect;
|
||||
use crate::connect_raw::connect_raw;
|
||||
use crate::connect_raw::RawConnection;
|
||||
use crate::tls::MakeTlsConnect;
|
||||
use crate::tls::TlsConnect;
|
||||
use crate::{Client, Connection, Error};
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use std::str;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
use std::{error, fmt, iter, mem};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
|
||||
pub use postgres_protocol2::authentication::sasl::ScramKeys;
|
||||
@@ -147,6 +146,9 @@ pub enum AuthKeys {
|
||||
/// ```
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
pub struct Config {
|
||||
pub(crate) host: Host,
|
||||
pub(crate) port: u16,
|
||||
|
||||
pub(crate) user: Option<String>,
|
||||
pub(crate) password: Option<Vec<u8>>,
|
||||
pub(crate) auth_keys: Option<Box<AuthKeys>>,
|
||||
@@ -154,8 +156,6 @@ pub struct Config {
|
||||
pub(crate) options: Option<String>,
|
||||
pub(crate) application_name: Option<String>,
|
||||
pub(crate) ssl_mode: SslMode,
|
||||
pub(crate) host: Vec<Host>,
|
||||
pub(crate) port: Vec<u16>,
|
||||
pub(crate) connect_timeout: Option<Duration>,
|
||||
pub(crate) target_session_attrs: TargetSessionAttrs,
|
||||
pub(crate) channel_binding: ChannelBinding,
|
||||
@@ -163,16 +163,12 @@ pub struct Config {
|
||||
pub(crate) max_backend_message_size: Option<usize>,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Config {
|
||||
Config::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Creates a new configuration.
|
||||
pub fn new() -> Config {
|
||||
pub fn new(host: String, port: u16) -> Config {
|
||||
Config {
|
||||
host: Host::Tcp(host),
|
||||
port,
|
||||
user: None,
|
||||
password: None,
|
||||
auth_keys: None,
|
||||
@@ -180,8 +176,6 @@ impl Config {
|
||||
options: None,
|
||||
application_name: None,
|
||||
ssl_mode: SslMode::Prefer,
|
||||
host: vec![],
|
||||
port: vec![],
|
||||
connect_timeout: None,
|
||||
target_session_attrs: TargetSessionAttrs::Any,
|
||||
channel_binding: ChannelBinding::Prefer,
|
||||
@@ -284,32 +278,14 @@ impl Config {
|
||||
self.ssl_mode
|
||||
}
|
||||
|
||||
/// Adds a host to the configuration.
|
||||
///
|
||||
/// Multiple hosts can be specified by calling this method multiple times, and each will be tried in order.
|
||||
pub fn host(&mut self, host: &str) -> &mut Config {
|
||||
self.host.push(Host::Tcp(host.to_string()));
|
||||
self
|
||||
}
|
||||
|
||||
/// Gets the hosts that have been added to the configuration with `host`.
|
||||
pub fn get_hosts(&self) -> &[Host] {
|
||||
pub fn get_host(&self) -> &Host {
|
||||
&self.host
|
||||
}
|
||||
|
||||
/// Adds a port to the configuration.
|
||||
///
|
||||
/// Multiple ports can be specified by calling this method multiple times. There must either be no ports, in which
|
||||
/// case the default of 5432 is used, a single port, in which it is used for all hosts, or the same number of ports
|
||||
/// as hosts.
|
||||
pub fn port(&mut self, port: u16) -> &mut Config {
|
||||
self.port.push(port);
|
||||
self
|
||||
}
|
||||
|
||||
/// Gets the ports that have been added to the configuration with `port`.
|
||||
pub fn get_ports(&self) -> &[u16] {
|
||||
&self.port
|
||||
pub fn get_port(&self) -> u16 {
|
||||
self.port
|
||||
}
|
||||
|
||||
/// Sets the timeout applied to socket-level connection attempts.
|
||||
@@ -379,99 +355,6 @@ impl Config {
|
||||
self.max_backend_message_size
|
||||
}
|
||||
|
||||
fn param(&mut self, key: &str, value: &str) -> Result<(), Error> {
|
||||
match key {
|
||||
"user" => {
|
||||
self.user(value);
|
||||
}
|
||||
"password" => {
|
||||
self.password(value);
|
||||
}
|
||||
"dbname" => {
|
||||
self.dbname(value);
|
||||
}
|
||||
"options" => {
|
||||
self.options(value);
|
||||
}
|
||||
"application_name" => {
|
||||
self.application_name(value);
|
||||
}
|
||||
"sslmode" => {
|
||||
let mode = match value {
|
||||
"disable" => SslMode::Disable,
|
||||
"prefer" => SslMode::Prefer,
|
||||
"require" => SslMode::Require,
|
||||
_ => return Err(Error::config_parse(Box::new(InvalidValue("sslmode")))),
|
||||
};
|
||||
self.ssl_mode(mode);
|
||||
}
|
||||
"host" => {
|
||||
for host in value.split(',') {
|
||||
self.host(host);
|
||||
}
|
||||
}
|
||||
"port" => {
|
||||
for port in value.split(',') {
|
||||
let port = if port.is_empty() {
|
||||
5432
|
||||
} else {
|
||||
port.parse()
|
||||
.map_err(|_| Error::config_parse(Box::new(InvalidValue("port"))))?
|
||||
};
|
||||
self.port(port);
|
||||
}
|
||||
}
|
||||
"connect_timeout" => {
|
||||
let timeout = value
|
||||
.parse::<i64>()
|
||||
.map_err(|_| Error::config_parse(Box::new(InvalidValue("connect_timeout"))))?;
|
||||
if timeout > 0 {
|
||||
self.connect_timeout(Duration::from_secs(timeout as u64));
|
||||
}
|
||||
}
|
||||
"target_session_attrs" => {
|
||||
let target_session_attrs = match value {
|
||||
"any" => TargetSessionAttrs::Any,
|
||||
"read-write" => TargetSessionAttrs::ReadWrite,
|
||||
_ => {
|
||||
return Err(Error::config_parse(Box::new(InvalidValue(
|
||||
"target_session_attrs",
|
||||
))));
|
||||
}
|
||||
};
|
||||
self.target_session_attrs(target_session_attrs);
|
||||
}
|
||||
"channel_binding" => {
|
||||
let channel_binding = match value {
|
||||
"disable" => ChannelBinding::Disable,
|
||||
"prefer" => ChannelBinding::Prefer,
|
||||
"require" => ChannelBinding::Require,
|
||||
_ => {
|
||||
return Err(Error::config_parse(Box::new(InvalidValue(
|
||||
"channel_binding",
|
||||
))))
|
||||
}
|
||||
};
|
||||
self.channel_binding(channel_binding);
|
||||
}
|
||||
"max_backend_message_size" => {
|
||||
let limit = value.parse::<usize>().map_err(|_| {
|
||||
Error::config_parse(Box::new(InvalidValue("max_backend_message_size")))
|
||||
})?;
|
||||
if limit > 0 {
|
||||
self.max_backend_message_size(limit);
|
||||
}
|
||||
}
|
||||
key => {
|
||||
return Err(Error::config_parse(Box::new(UnknownOption(
|
||||
key.to_string(),
|
||||
))));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Opens a connection to a PostgreSQL database.
|
||||
///
|
||||
/// Requires the `runtime` Cargo feature (enabled by default).
|
||||
@@ -485,14 +368,11 @@ impl Config {
|
||||
connect(tls, self).await
|
||||
}
|
||||
|
||||
/// Connects to a PostgreSQL database over an arbitrary stream.
|
||||
///
|
||||
/// All of the settings other than `user`, `password`, `dbname`, `options`, and `application_name` name are ignored.
|
||||
pub async fn connect_raw<S, T>(
|
||||
&self,
|
||||
stream: S,
|
||||
tls: T,
|
||||
) -> Result<(Client, Connection<S, T::Stream>), Error>
|
||||
) -> Result<RawConnection<S, T::Stream>, Error>
|
||||
where
|
||||
S: AsyncRead + AsyncWrite + Unpin,
|
||||
T: TlsConnect<S>,
|
||||
@@ -501,17 +381,6 @@ impl Config {
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Config {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Config, Error> {
|
||||
match UrlParser::parse(s)? {
|
||||
Some(config) => Ok(config),
|
||||
None => Parser::parse(s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Omit password from debug output
|
||||
impl fmt::Debug for Config {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
@@ -538,360 +407,3 @@ impl fmt::Debug for Config {
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct UnknownOption(String);
|
||||
|
||||
impl fmt::Display for UnknownOption {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(fmt, "unknown option `{}`", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for UnknownOption {}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct InvalidValue(&'static str);
|
||||
|
||||
impl fmt::Display for InvalidValue {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(fmt, "invalid value for option `{}`", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for InvalidValue {}
|
||||
|
||||
struct Parser<'a> {
|
||||
s: &'a str,
|
||||
it: iter::Peekable<str::CharIndices<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
fn parse(s: &'a str) -> Result<Config, Error> {
|
||||
let mut parser = Parser {
|
||||
s,
|
||||
it: s.char_indices().peekable(),
|
||||
};
|
||||
|
||||
let mut config = Config::new();
|
||||
|
||||
while let Some((key, value)) = parser.parameter()? {
|
||||
config.param(key, &value)?;
|
||||
}
|
||||
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
fn skip_ws(&mut self) {
|
||||
self.take_while(char::is_whitespace);
|
||||
}
|
||||
|
||||
fn take_while<F>(&mut self, f: F) -> &'a str
|
||||
where
|
||||
F: Fn(char) -> bool,
|
||||
{
|
||||
let start = match self.it.peek() {
|
||||
Some(&(i, _)) => i,
|
||||
None => return "",
|
||||
};
|
||||
|
||||
loop {
|
||||
match self.it.peek() {
|
||||
Some(&(_, c)) if f(c) => {
|
||||
self.it.next();
|
||||
}
|
||||
Some(&(i, _)) => return &self.s[start..i],
|
||||
None => return &self.s[start..],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn eat(&mut self, target: char) -> Result<(), Error> {
|
||||
match self.it.next() {
|
||||
Some((_, c)) if c == target => Ok(()),
|
||||
Some((i, c)) => {
|
||||
let m = format!(
|
||||
"unexpected character at byte {}: expected `{}` but got `{}`",
|
||||
i, target, c
|
||||
);
|
||||
Err(Error::config_parse(m.into()))
|
||||
}
|
||||
None => Err(Error::config_parse("unexpected EOF".into())),
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_if(&mut self, target: char) -> bool {
|
||||
match self.it.peek() {
|
||||
Some(&(_, c)) if c == target => {
|
||||
self.it.next();
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn keyword(&mut self) -> Option<&'a str> {
|
||||
let s = self.take_while(|c| match c {
|
||||
c if c.is_whitespace() => false,
|
||||
'=' => false,
|
||||
_ => true,
|
||||
});
|
||||
|
||||
if s.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(s)
|
||||
}
|
||||
}
|
||||
|
||||
fn value(&mut self) -> Result<String, Error> {
|
||||
let value = if self.eat_if('\'') {
|
||||
let value = self.quoted_value()?;
|
||||
self.eat('\'')?;
|
||||
value
|
||||
} else {
|
||||
self.simple_value()?
|
||||
};
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
fn simple_value(&mut self) -> Result<String, Error> {
|
||||
let mut value = String::new();
|
||||
|
||||
while let Some(&(_, c)) = self.it.peek() {
|
||||
if c.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
|
||||
self.it.next();
|
||||
if c == '\\' {
|
||||
if let Some((_, c2)) = self.it.next() {
|
||||
value.push(c2);
|
||||
}
|
||||
} else {
|
||||
value.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
if value.is_empty() {
|
||||
return Err(Error::config_parse("unexpected EOF".into()));
|
||||
}
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
fn quoted_value(&mut self) -> Result<String, Error> {
|
||||
let mut value = String::new();
|
||||
|
||||
while let Some(&(_, c)) = self.it.peek() {
|
||||
if c == '\'' {
|
||||
return Ok(value);
|
||||
}
|
||||
|
||||
self.it.next();
|
||||
if c == '\\' {
|
||||
if let Some((_, c2)) = self.it.next() {
|
||||
value.push(c2);
|
||||
}
|
||||
} else {
|
||||
value.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
Err(Error::config_parse(
|
||||
"unterminated quoted connection parameter value".into(),
|
||||
))
|
||||
}
|
||||
|
||||
fn parameter(&mut self) -> Result<Option<(&'a str, String)>, Error> {
|
||||
self.skip_ws();
|
||||
let keyword = match self.keyword() {
|
||||
Some(keyword) => keyword,
|
||||
None => return Ok(None),
|
||||
};
|
||||
self.skip_ws();
|
||||
self.eat('=')?;
|
||||
self.skip_ws();
|
||||
let value = self.value()?;
|
||||
|
||||
Ok(Some((keyword, value)))
|
||||
}
|
||||
}
|
||||
|
||||
// This is a pretty sloppy "URL" parser, but it matches the behavior of libpq, where things really aren't very strict
|
||||
struct UrlParser<'a> {
|
||||
s: &'a str,
|
||||
config: Config,
|
||||
}
|
||||
|
||||
impl<'a> UrlParser<'a> {
|
||||
fn parse(s: &'a str) -> Result<Option<Config>, Error> {
|
||||
let s = match Self::remove_url_prefix(s) {
|
||||
Some(s) => s,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
let mut parser = UrlParser {
|
||||
s,
|
||||
config: Config::new(),
|
||||
};
|
||||
|
||||
parser.parse_credentials()?;
|
||||
parser.parse_host()?;
|
||||
parser.parse_path()?;
|
||||
parser.parse_params()?;
|
||||
|
||||
Ok(Some(parser.config))
|
||||
}
|
||||
|
||||
fn remove_url_prefix(s: &str) -> Option<&str> {
|
||||
for prefix in &["postgres://", "postgresql://"] {
|
||||
if let Some(stripped) = s.strip_prefix(prefix) {
|
||||
return Some(stripped);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn take_until(&mut self, end: &[char]) -> Option<&'a str> {
|
||||
match self.s.find(end) {
|
||||
Some(pos) => {
|
||||
let (head, tail) = self.s.split_at(pos);
|
||||
self.s = tail;
|
||||
Some(head)
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn take_all(&mut self) -> &'a str {
|
||||
mem::take(&mut self.s)
|
||||
}
|
||||
|
||||
fn eat_byte(&mut self) {
|
||||
self.s = &self.s[1..];
|
||||
}
|
||||
|
||||
fn parse_credentials(&mut self) -> Result<(), Error> {
|
||||
let creds = match self.take_until(&['@']) {
|
||||
Some(creds) => creds,
|
||||
None => return Ok(()),
|
||||
};
|
||||
self.eat_byte();
|
||||
|
||||
let mut it = creds.splitn(2, ':');
|
||||
let user = self.decode(it.next().unwrap())?;
|
||||
self.config.user(&user);
|
||||
|
||||
if let Some(password) = it.next() {
|
||||
let password = Cow::from(percent_encoding::percent_decode(password.as_bytes()));
|
||||
self.config.password(password);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_host(&mut self) -> Result<(), Error> {
|
||||
let host = match self.take_until(&['/', '?']) {
|
||||
Some(host) => host,
|
||||
None => self.take_all(),
|
||||
};
|
||||
|
||||
if host.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for chunk in host.split(',') {
|
||||
let (host, port) = if chunk.starts_with('[') {
|
||||
let idx = match chunk.find(']') {
|
||||
Some(idx) => idx,
|
||||
None => return Err(Error::config_parse(InvalidValue("host").into())),
|
||||
};
|
||||
|
||||
let host = &chunk[1..idx];
|
||||
let remaining = &chunk[idx + 1..];
|
||||
let port = if let Some(port) = remaining.strip_prefix(':') {
|
||||
Some(port)
|
||||
} else if remaining.is_empty() {
|
||||
None
|
||||
} else {
|
||||
return Err(Error::config_parse(InvalidValue("host").into()));
|
||||
};
|
||||
|
||||
(host, port)
|
||||
} else {
|
||||
let mut it = chunk.splitn(2, ':');
|
||||
(it.next().unwrap(), it.next())
|
||||
};
|
||||
|
||||
self.host_param(host)?;
|
||||
let port = self.decode(port.unwrap_or("5432"))?;
|
||||
self.config.param("port", &port)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_path(&mut self) -> Result<(), Error> {
|
||||
if !self.s.starts_with('/') {
|
||||
return Ok(());
|
||||
}
|
||||
self.eat_byte();
|
||||
|
||||
let dbname = match self.take_until(&['?']) {
|
||||
Some(dbname) => dbname,
|
||||
None => self.take_all(),
|
||||
};
|
||||
|
||||
if !dbname.is_empty() {
|
||||
self.config.dbname(&self.decode(dbname)?);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_params(&mut self) -> Result<(), Error> {
|
||||
if !self.s.starts_with('?') {
|
||||
return Ok(());
|
||||
}
|
||||
self.eat_byte();
|
||||
|
||||
while !self.s.is_empty() {
|
||||
let key = match self.take_until(&['=']) {
|
||||
Some(key) => self.decode(key)?,
|
||||
None => return Err(Error::config_parse("unterminated parameter".into())),
|
||||
};
|
||||
self.eat_byte();
|
||||
|
||||
let value = match self.take_until(&['&']) {
|
||||
Some(value) => {
|
||||
self.eat_byte();
|
||||
value
|
||||
}
|
||||
None => self.take_all(),
|
||||
};
|
||||
|
||||
if key == "host" {
|
||||
self.host_param(value)?;
|
||||
} else {
|
||||
let value = self.decode(value)?;
|
||||
self.config.param(&key, &value)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn host_param(&mut self, s: &str) -> Result<(), Error> {
|
||||
let s = self.decode(s)?;
|
||||
self.config.param("host", &s)
|
||||
}
|
||||
|
||||
fn decode(&self, s: &'a str) -> Result<Cow<'a, str>, Error> {
|
||||
percent_encoding::percent_decode(s.as_bytes())
|
||||
.decode_utf8()
|
||||
.map_err(|e| Error::config_parse(e.into()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
use crate::client::SocketConfig;
|
||||
use crate::codec::BackendMessage;
|
||||
use crate::config::{Host, TargetSessionAttrs};
|
||||
use crate::connect_raw::connect_raw;
|
||||
use crate::connect_socket::connect_socket;
|
||||
use crate::tls::{MakeTlsConnect, TlsConnect};
|
||||
use crate::{Client, Config, Connection, Error, SimpleQueryMessage};
|
||||
use crate::{Client, Config, Connection, Error, RawConnection, SimpleQueryMessage};
|
||||
use futures_util::{future, pin_mut, Future, FutureExt, Stream};
|
||||
use postgres_protocol2::message::backend::Message;
|
||||
use std::io;
|
||||
use std::task::Poll;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
pub async fn connect<T>(
|
||||
mut tls: T,
|
||||
@@ -16,38 +19,18 @@ pub async fn connect<T>(
|
||||
where
|
||||
T: MakeTlsConnect<TcpStream>,
|
||||
{
|
||||
if config.host.is_empty() {
|
||||
return Err(Error::config("host missing".into()));
|
||||
let hostname = match &config.host {
|
||||
Host::Tcp(host) => host.as_str(),
|
||||
};
|
||||
|
||||
let tls = tls
|
||||
.make_tls_connect(hostname)
|
||||
.map_err(|e| Error::tls(e.into()))?;
|
||||
|
||||
match connect_once(&config.host, config.port, tls, config).await {
|
||||
Ok((client, connection)) => Ok((client, connection)),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
|
||||
if config.port.len() > 1 && config.port.len() != config.host.len() {
|
||||
return Err(Error::config("invalid number of ports".into()));
|
||||
}
|
||||
|
||||
let mut error = None;
|
||||
for (i, host) in config.host.iter().enumerate() {
|
||||
let port = config
|
||||
.port
|
||||
.get(i)
|
||||
.or_else(|| config.port.first())
|
||||
.copied()
|
||||
.unwrap_or(5432);
|
||||
|
||||
let hostname = match host {
|
||||
Host::Tcp(host) => host.as_str(),
|
||||
};
|
||||
|
||||
let tls = tls
|
||||
.make_tls_connect(hostname)
|
||||
.map_err(|e| Error::tls(e.into()))?;
|
||||
|
||||
match connect_once(host, port, tls, config).await {
|
||||
Ok((client, connection)) => return Ok((client, connection)),
|
||||
Err(e) => error = Some(e),
|
||||
}
|
||||
}
|
||||
|
||||
Err(error.unwrap())
|
||||
}
|
||||
|
||||
async fn connect_once<T>(
|
||||
@@ -60,7 +43,36 @@ where
|
||||
T: TlsConnect<TcpStream>,
|
||||
{
|
||||
let socket = connect_socket(host, port, config.connect_timeout).await?;
|
||||
let (mut client, mut connection) = connect_raw(socket, tls, config).await?;
|
||||
let RawConnection {
|
||||
stream,
|
||||
parameters,
|
||||
delayed_notice,
|
||||
process_id,
|
||||
secret_key,
|
||||
} = connect_raw(socket, tls, config).await?;
|
||||
|
||||
let socket_config = SocketConfig {
|
||||
host: host.clone(),
|
||||
port,
|
||||
connect_timeout: config.connect_timeout,
|
||||
};
|
||||
|
||||
let (sender, receiver) = mpsc::unbounded_channel();
|
||||
let client = Client::new(
|
||||
sender,
|
||||
socket_config,
|
||||
config.ssl_mode,
|
||||
process_id,
|
||||
secret_key,
|
||||
);
|
||||
|
||||
// delayed notices are always sent as "Async" messages.
|
||||
let delayed = delayed_notice
|
||||
.into_iter()
|
||||
.map(|m| BackendMessage::Async(Message::NoticeResponse(m)))
|
||||
.collect();
|
||||
|
||||
let mut connection = Connection::new(stream, delayed, parameters, receiver);
|
||||
|
||||
if let TargetSessionAttrs::ReadWrite = config.target_session_attrs {
|
||||
let rows = client.simple_query_raw("SHOW transaction_read_only");
|
||||
@@ -102,11 +114,5 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
client.set_socket_config(SocketConfig {
|
||||
host: host.clone(),
|
||||
port,
|
||||
connect_timeout: config.connect_timeout,
|
||||
});
|
||||
|
||||
Ok((client, connection))
|
||||
}
|
||||
|
||||
@@ -3,27 +3,25 @@ use crate::config::{self, AuthKeys, Config, ReplicationMode};
|
||||
use crate::connect_tls::connect_tls;
|
||||
use crate::maybe_tls_stream::MaybeTlsStream;
|
||||
use crate::tls::{TlsConnect, TlsStream};
|
||||
use crate::{Client, Connection, Error};
|
||||
use crate::Error;
|
||||
use bytes::BytesMut;
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use futures_util::{ready, Sink, SinkExt, Stream, TryStreamExt};
|
||||
use postgres_protocol2::authentication;
|
||||
use postgres_protocol2::authentication::sasl;
|
||||
use postgres_protocol2::authentication::sasl::ScramSha256;
|
||||
use postgres_protocol2::message::backend::{AuthenticationSaslBody, Message};
|
||||
use postgres_protocol2::message::backend::{AuthenticationSaslBody, Message, NoticeResponseBody};
|
||||
use postgres_protocol2::message::frontend;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_util::codec::Framed;
|
||||
|
||||
pub struct StartupStream<S, T> {
|
||||
inner: Framed<MaybeTlsStream<S, T>, PostgresCodec>,
|
||||
buf: BackendMessages,
|
||||
delayed: VecDeque<BackendMessage>,
|
||||
delayed_notice: Vec<NoticeResponseBody>,
|
||||
}
|
||||
|
||||
impl<S, T> Sink<FrontendMessage> for StartupStream<S, T>
|
||||
@@ -78,11 +76,19 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RawConnection<S, T> {
|
||||
pub stream: Framed<MaybeTlsStream<S, T>, PostgresCodec>,
|
||||
pub parameters: HashMap<String, String>,
|
||||
pub delayed_notice: Vec<NoticeResponseBody>,
|
||||
pub process_id: i32,
|
||||
pub secret_key: i32,
|
||||
}
|
||||
|
||||
pub async fn connect_raw<S, T>(
|
||||
stream: S,
|
||||
tls: T,
|
||||
config: &Config,
|
||||
) -> Result<(Client, Connection<S, T::Stream>), Error>
|
||||
) -> Result<RawConnection<S, T::Stream>, Error>
|
||||
where
|
||||
S: AsyncRead + AsyncWrite + Unpin,
|
||||
T: TlsConnect<S>,
|
||||
@@ -97,18 +103,20 @@ where
|
||||
},
|
||||
),
|
||||
buf: BackendMessages::empty(),
|
||||
delayed: VecDeque::new(),
|
||||
delayed_notice: Vec::new(),
|
||||
};
|
||||
|
||||
startup(&mut stream, config).await?;
|
||||
authenticate(&mut stream, config).await?;
|
||||
let (process_id, secret_key, parameters) = read_info(&mut stream).await?;
|
||||
|
||||
let (sender, receiver) = mpsc::unbounded_channel();
|
||||
let client = Client::new(sender, config.ssl_mode, process_id, secret_key);
|
||||
let connection = Connection::new(stream.inner, stream.delayed, parameters, receiver);
|
||||
|
||||
Ok((client, connection))
|
||||
Ok(RawConnection {
|
||||
stream: stream.inner,
|
||||
parameters,
|
||||
delayed_notice: stream.delayed_notice,
|
||||
process_id,
|
||||
secret_key,
|
||||
})
|
||||
}
|
||||
|
||||
async fn startup<S, T>(stream: &mut StartupStream<S, T>, config: &Config) -> Result<(), Error>
|
||||
@@ -165,25 +173,11 @@ where
|
||||
|
||||
authenticate_password(stream, pass).await?;
|
||||
}
|
||||
Some(Message::AuthenticationMd5Password(body)) => {
|
||||
can_skip_channel_binding(config)?;
|
||||
|
||||
let user = config
|
||||
.user
|
||||
.as_ref()
|
||||
.ok_or_else(|| Error::config("user missing".into()))?;
|
||||
let pass = config
|
||||
.password
|
||||
.as_ref()
|
||||
.ok_or_else(|| Error::config("password missing".into()))?;
|
||||
|
||||
let output = authentication::md5_hash(user.as_bytes(), pass, body.salt());
|
||||
authenticate_password(stream, output.as_bytes()).await?;
|
||||
}
|
||||
Some(Message::AuthenticationSasl(body)) => {
|
||||
authenticate_sasl(stream, body, config).await?;
|
||||
}
|
||||
Some(Message::AuthenticationKerberosV5)
|
||||
Some(Message::AuthenticationMd5Password)
|
||||
| Some(Message::AuthenticationKerberosV5)
|
||||
| Some(Message::AuthenticationScmCredential)
|
||||
| Some(Message::AuthenticationGss)
|
||||
| Some(Message::AuthenticationSspi) => {
|
||||
@@ -347,9 +341,7 @@ where
|
||||
body.value().map_err(Error::parse)?.to_string(),
|
||||
);
|
||||
}
|
||||
Some(msg @ Message::NoticeResponse(_)) => {
|
||||
stream.delayed.push_back(BackendMessage::Async(msg))
|
||||
}
|
||||
Some(Message::NoticeResponse(body)) => stream.delayed_notice.push(body),
|
||||
Some(Message::ReadyForQuery(_)) => return Ok((process_id, secret_key, parameters)),
|
||||
Some(Message::ErrorResponse(body)) => return Err(Error::db(body)),
|
||||
Some(_) => return Err(Error::unexpected_message()),
|
||||
|
||||
@@ -349,7 +349,6 @@ enum Kind {
|
||||
Parse,
|
||||
Encode,
|
||||
Authentication,
|
||||
ConfigParse,
|
||||
Config,
|
||||
Connect,
|
||||
Timeout,
|
||||
@@ -386,7 +385,6 @@ impl fmt::Display for Error {
|
||||
Kind::Parse => fmt.write_str("error parsing response from server")?,
|
||||
Kind::Encode => fmt.write_str("error encoding message to server")?,
|
||||
Kind::Authentication => fmt.write_str("authentication error")?,
|
||||
Kind::ConfigParse => fmt.write_str("invalid connection string")?,
|
||||
Kind::Config => fmt.write_str("invalid configuration")?,
|
||||
Kind::Connect => fmt.write_str("error connecting to server")?,
|
||||
Kind::Timeout => fmt.write_str("timeout waiting for server")?,
|
||||
@@ -482,10 +480,6 @@ impl Error {
|
||||
Error::new(Kind::Authentication, Some(e))
|
||||
}
|
||||
|
||||
pub(crate) fn config_parse(e: Box<dyn error::Error + Sync + Send>) -> Error {
|
||||
Error::new(Kind::ConfigParse, Some(e))
|
||||
}
|
||||
|
||||
pub(crate) fn config(e: Box<dyn error::Error + Sync + Send>) -> Error {
|
||||
Error::new(Kind::Config, Some(e))
|
||||
}
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
//! An asynchronous, pipelined, PostgreSQL client.
|
||||
#![warn(rust_2018_idioms, clippy::all, missing_docs)]
|
||||
#![warn(rust_2018_idioms, clippy::all)]
|
||||
|
||||
pub use crate::cancel_token::CancelToken;
|
||||
pub use crate::client::Client;
|
||||
pub use crate::client::{Client, SocketConfig};
|
||||
pub use crate::config::Config;
|
||||
pub use crate::connect_raw::RawConnection;
|
||||
pub use crate::connection::Connection;
|
||||
use crate::error::DbError;
|
||||
pub use crate::error::Error;
|
||||
@@ -12,14 +13,12 @@ pub use crate::query::RowStream;
|
||||
pub use crate::row::{Row, SimpleQueryRow};
|
||||
pub use crate::simple_query::SimpleQueryStream;
|
||||
pub use crate::statement::{Column, Statement};
|
||||
use crate::tls::MakeTlsConnect;
|
||||
pub use crate::tls::NoTls;
|
||||
pub use crate::to_statement::ToStatement;
|
||||
pub use crate::transaction::Transaction;
|
||||
pub use crate::transaction_builder::{IsolationLevel, TransactionBuilder};
|
||||
use crate::types::ToSql;
|
||||
use postgres_protocol2::message::backend::ReadyForQueryBody;
|
||||
use tokio::net::TcpStream;
|
||||
|
||||
/// After executing a query, the connection will be in one of these states
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
@@ -71,24 +70,6 @@ mod transaction;
|
||||
mod transaction_builder;
|
||||
pub mod types;
|
||||
|
||||
/// A convenience function which parses a connection string and connects to the database.
|
||||
///
|
||||
/// See the documentation for [`Config`] for details on the connection string format.
|
||||
///
|
||||
/// Requires the `runtime` Cargo feature (enabled by default).
|
||||
///
|
||||
/// [`Config`]: config/struct.Config.html
|
||||
pub async fn connect<T>(
|
||||
config: &str,
|
||||
tls: T,
|
||||
) -> Result<(Client, Connection<TcpStream, T::Stream>), Error>
|
||||
where
|
||||
T: MakeTlsConnect<TcpStream>,
|
||||
{
|
||||
let config = config.parse::<Config>()?;
|
||||
config.connect(tls).await
|
||||
}
|
||||
|
||||
/// An asynchronous notification.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Notification {
|
||||
|
||||
@@ -26,6 +26,7 @@ humantime.workspace = true
|
||||
hyper0 = { workspace = true, features = ["full"] }
|
||||
fail.workspace = true
|
||||
futures = { workspace = true}
|
||||
jemalloc_pprof.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
nix.workspace = true
|
||||
once_cell.workspace = true
|
||||
|
||||
@@ -10,6 +10,7 @@ use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
|
||||
use once_cell::sync::Lazy;
|
||||
use routerify::ext::RequestExt;
|
||||
use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
|
||||
use tokio_util::io::ReaderStream;
|
||||
use tracing::{debug, info, info_span, warn, Instrument};
|
||||
|
||||
use std::future::Future;
|
||||
@@ -407,6 +408,69 @@ pub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, A
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates heap profiles.
|
||||
///
|
||||
/// This only works with jemalloc on Linux.
|
||||
pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
enum Format {
|
||||
Jemalloc,
|
||||
Pprof,
|
||||
}
|
||||
|
||||
// Parameters.
|
||||
let format = match get_query_param(&req, "format")?.as_deref() {
|
||||
None => Format::Pprof,
|
||||
Some("jemalloc") => Format::Jemalloc,
|
||||
Some("pprof") => Format::Pprof,
|
||||
Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))),
|
||||
};
|
||||
|
||||
// Obtain profiler handle.
|
||||
let mut prof_ctl = jemalloc_pprof::PROF_CTL
|
||||
.as_ref()
|
||||
.ok_or(ApiError::InternalServerError(anyhow!(
|
||||
"heap profiling not enabled"
|
||||
)))?
|
||||
.lock()
|
||||
.await;
|
||||
if !prof_ctl.activated() {
|
||||
return Err(ApiError::InternalServerError(anyhow!(
|
||||
"heap profiling not enabled"
|
||||
)));
|
||||
}
|
||||
|
||||
// Take and return the profile.
|
||||
match format {
|
||||
Format::Jemalloc => {
|
||||
// NB: file is an open handle to a tempfile that's already deleted.
|
||||
let file = tokio::task::spawn_blocking(move || prof_ctl.dump())
|
||||
.await
|
||||
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
let stream = ReaderStream::new(tokio::fs::File::from_std(file));
|
||||
Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, "application/octet-stream")
|
||||
.header(CONTENT_DISPOSITION, "attachment; filename=\"heap.dump\"")
|
||||
.body(Body::wrap_stream(stream))
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))
|
||||
}
|
||||
|
||||
Format::Pprof => {
|
||||
let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof())
|
||||
.await
|
||||
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, "application/octet-stream")
|
||||
.header(CONTENT_DISPOSITION, "attachment; filename=\"heap.pb\"")
|
||||
.body(Body::from(data))
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_request_id_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
|
||||
) -> Middleware<B, ApiError> {
|
||||
Middleware::pre(move |req| async move {
|
||||
|
||||
@@ -112,30 +112,38 @@ impl MetadataRecord {
|
||||
};
|
||||
|
||||
// Next, filter the metadata record by shard.
|
||||
|
||||
// Route VM page updates to the shards that own them. VM pages are stored in the VM fork
|
||||
// of the main relation. These are sharded and managed just like regular relation pages.
|
||||
// See: https://github.com/neondatabase/neon/issues/9855
|
||||
if let Some(
|
||||
MetadataRecord::Heapam(HeapamRecord::ClearVmBits(ref mut clear_vm_bits))
|
||||
| MetadataRecord::Neonrmgr(NeonrmgrRecord::ClearVmBits(ref mut clear_vm_bits)),
|
||||
) = metadata_record
|
||||
{
|
||||
let is_local_vm_page = |heap_blk| {
|
||||
let vm_blk = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blk);
|
||||
shard.is_key_local(&rel_block_to_key(clear_vm_bits.vm_rel, vm_blk))
|
||||
};
|
||||
// Send the old and new VM page updates to their respective shards.
|
||||
clear_vm_bits.old_heap_blkno = clear_vm_bits
|
||||
.old_heap_blkno
|
||||
.filter(|&blkno| is_local_vm_page(blkno));
|
||||
clear_vm_bits.new_heap_blkno = clear_vm_bits
|
||||
.new_heap_blkno
|
||||
.filter(|&blkno| is_local_vm_page(blkno));
|
||||
// If neither VM page belongs to this shard, discard the record.
|
||||
if clear_vm_bits.old_heap_blkno.is_none() && clear_vm_bits.new_heap_blkno.is_none() {
|
||||
metadata_record = None
|
||||
match metadata_record {
|
||||
Some(
|
||||
MetadataRecord::Heapam(HeapamRecord::ClearVmBits(ref mut clear_vm_bits))
|
||||
| MetadataRecord::Neonrmgr(NeonrmgrRecord::ClearVmBits(ref mut clear_vm_bits)),
|
||||
) => {
|
||||
// Route VM page updates to the shards that own them. VM pages are stored in the VM fork
|
||||
// of the main relation. These are sharded and managed just like regular relation pages.
|
||||
// See: https://github.com/neondatabase/neon/issues/9855
|
||||
let is_local_vm_page = |heap_blk| {
|
||||
let vm_blk = pg_constants::HEAPBLK_TO_MAPBLOCK(heap_blk);
|
||||
shard.is_key_local(&rel_block_to_key(clear_vm_bits.vm_rel, vm_blk))
|
||||
};
|
||||
// Send the old and new VM page updates to their respective shards.
|
||||
clear_vm_bits.old_heap_blkno = clear_vm_bits
|
||||
.old_heap_blkno
|
||||
.filter(|&blkno| is_local_vm_page(blkno));
|
||||
clear_vm_bits.new_heap_blkno = clear_vm_bits
|
||||
.new_heap_blkno
|
||||
.filter(|&blkno| is_local_vm_page(blkno));
|
||||
// If neither VM page belongs to this shard, discard the record.
|
||||
if clear_vm_bits.old_heap_blkno.is_none() && clear_vm_bits.new_heap_blkno.is_none()
|
||||
{
|
||||
metadata_record = None
|
||||
}
|
||||
}
|
||||
Some(MetadataRecord::LogicalMessage(LogicalMessageRecord::Put(_))) => {
|
||||
// Filter LogicalMessage records (AUX files) to only be stored on shard zero
|
||||
if !shard.is_shard_zero() {
|
||||
metadata_record = None;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
Ok(metadata_record)
|
||||
|
||||
@@ -345,6 +345,7 @@ impl AuxFileV2 {
|
||||
AuxFileV2::Recognized("pg_logical/replorigin_checkpoint", hash)
|
||||
}
|
||||
(2, 1) => AuxFileV2::Recognized("pg_replslot/", hash),
|
||||
(3, 1) => AuxFileV2::Recognized("pg_stat/pgstat.stat", hash),
|
||||
(1, 0xff) => AuxFileV2::OtherWithPrefix("pg_logical/", hash),
|
||||
(0xff, 0xff) => AuxFileV2::Other(hash),
|
||||
_ => return None,
|
||||
|
||||
@@ -39,6 +39,7 @@ fn aux_hash_to_metadata_key(dir_level1: u8, dir_level2: u8, data: &[u8]) -> Key
|
||||
|
||||
const AUX_DIR_PG_LOGICAL: u8 = 0x01;
|
||||
const AUX_DIR_PG_REPLSLOT: u8 = 0x02;
|
||||
const AUX_DIR_PG_STAT: u8 = 0x03;
|
||||
const AUX_DIR_PG_UNKNOWN: u8 = 0xFF;
|
||||
|
||||
/// Encode the aux file into a fixed-size key.
|
||||
@@ -53,6 +54,7 @@ const AUX_DIR_PG_UNKNOWN: u8 = 0xFF;
|
||||
/// * pg_logical/replorigin_checkpoint -> 0x0103
|
||||
/// * pg_logical/others -> 0x01FF
|
||||
/// * pg_replslot/ -> 0x0201
|
||||
/// * pg_stat/pgstat.stat -> 0x0301
|
||||
/// * others -> 0xFFFF
|
||||
///
|
||||
/// If you add new AUX files to this function, please also add a test case to `test_encoding_portable`.
|
||||
@@ -75,6 +77,8 @@ pub fn encode_aux_file_key(path: &str) -> Key {
|
||||
aux_hash_to_metadata_key(AUX_DIR_PG_LOGICAL, 0xFF, fname.as_bytes())
|
||||
} else if let Some(fname) = path.strip_prefix("pg_replslot/") {
|
||||
aux_hash_to_metadata_key(AUX_DIR_PG_REPLSLOT, 0x01, fname.as_bytes())
|
||||
} else if let Some(fname) = path.strip_prefix("pg_stat/pgstat.stat") {
|
||||
aux_hash_to_metadata_key(AUX_DIR_PG_STAT, 0x01, fname.as_bytes())
|
||||
} else {
|
||||
if cfg!(debug_assertions) {
|
||||
warn!(
|
||||
|
||||
@@ -255,6 +255,31 @@ where
|
||||
async fn send_tarball(mut self) -> Result<(), BasebackupError> {
|
||||
// TODO include checksum
|
||||
|
||||
// Construct the pg_control file from the persisted checkpoint and pg_control
|
||||
// information. But we only add this to the tarball at the end, so that if the
|
||||
// writing is interrupted half-way through, the resulting incomplete tarball will
|
||||
// be missing the pg_control file, which prevents PostgreSQL from starting up on
|
||||
// it. With proper error handling, you should never try to start up from an
|
||||
// incomplete basebackup in the first place, of course, but this is a nice little
|
||||
// extra safety measure.
|
||||
let checkpoint_bytes = self
|
||||
.timeline
|
||||
.get_checkpoint(self.lsn, self.ctx)
|
||||
.await
|
||||
.context("failed to get checkpoint bytes")?;
|
||||
let pg_control_bytes = self
|
||||
.timeline
|
||||
.get_control_file(self.lsn, self.ctx)
|
||||
.await
|
||||
.context("failed get control bytes")?;
|
||||
let (pg_control_bytes, system_identifier, was_shutdown) =
|
||||
postgres_ffi::generate_pg_control(
|
||||
&pg_control_bytes,
|
||||
&checkpoint_bytes,
|
||||
self.lsn,
|
||||
self.timeline.pg_version,
|
||||
)?;
|
||||
|
||||
let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;
|
||||
|
||||
let pgversion = self.timeline.pg_version;
|
||||
@@ -392,6 +417,10 @@ where
|
||||
// In future we will not generate AUX record for "pg_logical/replorigin_checkpoint" at all,
|
||||
// but now we should handle (skip) it for backward compatibility.
|
||||
continue;
|
||||
} else if path == "pg_stat/pgstat.stat" && !was_shutdown {
|
||||
// Drop statistic in case of abnormal termination, i.e. if we're not starting from the exact LSN
|
||||
// of a shutdown checkpoint.
|
||||
continue;
|
||||
}
|
||||
let header = new_tar_header(&path, content.len() as u64)?;
|
||||
self.ar
|
||||
@@ -453,8 +482,9 @@ where
|
||||
)))
|
||||
});
|
||||
|
||||
// Generate pg_control and bootstrap WAL segment.
|
||||
self.add_pgcontrol_file().await?;
|
||||
// Last, add the pg_control file and bootstrap WAL segment.
|
||||
self.add_pgcontrol_file(pg_control_bytes, system_identifier)
|
||||
.await?;
|
||||
self.ar.finish().await.map_err(BasebackupError::Client)?;
|
||||
debug!("all tarred up!");
|
||||
Ok(())
|
||||
@@ -657,7 +687,11 @@ where
|
||||
// Add generated pg_control file and bootstrap WAL segment.
|
||||
// Also send zenith.signal file with extra bootstrap data.
|
||||
//
|
||||
async fn add_pgcontrol_file(&mut self) -> Result<(), BasebackupError> {
|
||||
async fn add_pgcontrol_file(
|
||||
&mut self,
|
||||
pg_control_bytes: Bytes,
|
||||
system_identifier: u64,
|
||||
) -> Result<(), BasebackupError> {
|
||||
// add zenith.signal file
|
||||
let mut zenith_signal = String::new();
|
||||
if self.prev_record_lsn == Lsn(0) {
|
||||
@@ -680,24 +714,6 @@ where
|
||||
.await
|
||||
.map_err(BasebackupError::Client)?;
|
||||
|
||||
let checkpoint_bytes = self
|
||||
.timeline
|
||||
.get_checkpoint(self.lsn, self.ctx)
|
||||
.await
|
||||
.context("failed to get checkpoint bytes")?;
|
||||
let pg_control_bytes = self
|
||||
.timeline
|
||||
.get_control_file(self.lsn, self.ctx)
|
||||
.await
|
||||
.context("failed get control bytes")?;
|
||||
|
||||
let (pg_control_bytes, system_identifier) = postgres_ffi::generate_pg_control(
|
||||
&pg_control_bytes,
|
||||
&checkpoint_bytes,
|
||||
self.lsn,
|
||||
self.timeline.pg_version,
|
||||
)?;
|
||||
|
||||
//send pg_control
|
||||
let header = new_tar_header("global/pg_control", pg_control_bytes.len() as u64)?;
|
||||
self.ar
|
||||
|
||||
@@ -53,6 +53,11 @@ project_build_tag!(BUILD_TAG);
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
|
||||
#[allow(non_upper_case_globals)]
|
||||
#[export_name = "malloc_conf"]
|
||||
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
|
||||
|
||||
const PID_FILE_NAME: &str = "pageserver.pid";
|
||||
|
||||
const FEATURES: &[&str] = &[
|
||||
@@ -127,6 +132,7 @@ fn main() -> anyhow::Result<()> {
|
||||
info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
|
||||
info!(?conf.virtual_file_io_mode, "starting with virtual_file IO mode");
|
||||
info!(?conf.wal_receiver_protocol, "starting with WAL receiver protocol");
|
||||
info!(?conf.page_service_pipelining, "starting with page service pipelining config");
|
||||
|
||||
// The tenants directory contains all the pageserver local disk state.
|
||||
// Create if not exists and make sure all the contents are durable before proceeding.
|
||||
@@ -302,7 +308,7 @@ fn start_pageserver(
|
||||
pageserver::metrics::tokio_epoll_uring::Collector::new(),
|
||||
))
|
||||
.unwrap();
|
||||
pageserver::preinitialize_metrics();
|
||||
pageserver::preinitialize_metrics(conf);
|
||||
|
||||
// If any failpoints were set from FAILPOINTS environment variable,
|
||||
// print them to the log for debugging purposes
|
||||
@@ -630,45 +636,59 @@ fn start_pageserver(
|
||||
tokio::net::TcpListener::from_std(pageserver_listener).context("create tokio listener")?
|
||||
});
|
||||
|
||||
let mut shutdown_pageserver = Some(shutdown_pageserver.drop_guard());
|
||||
|
||||
// All started up! Now just sit and wait for shutdown signal.
|
||||
BACKGROUND_RUNTIME.block_on(async move {
|
||||
let signal_token = CancellationToken::new();
|
||||
let signal_cancel = signal_token.child_token();
|
||||
|
||||
{
|
||||
BACKGROUND_RUNTIME.block_on(async move {
|
||||
// Spawn signal handlers. Runs in a loop since we want to be responsive to multiple signals
|
||||
// even after triggering shutdown (e.g. a SIGQUIT after a slow SIGTERM shutdown). See:
|
||||
// https://github.com/neondatabase/neon/issues/9740.
|
||||
tokio::spawn(async move {
|
||||
let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt()).unwrap();
|
||||
let mut sigterm = tokio::signal::unix::signal(SignalKind::terminate()).unwrap();
|
||||
let mut sigquit = tokio::signal::unix::signal(SignalKind::quit()).unwrap();
|
||||
let signal = tokio::select! {
|
||||
_ = sigquit.recv() => {
|
||||
info!("Got signal SIGQUIT. Terminating in immediate shutdown mode",);
|
||||
std::process::exit(111);
|
||||
|
||||
loop {
|
||||
let signal = tokio::select! {
|
||||
_ = sigquit.recv() => {
|
||||
info!("Got signal SIGQUIT. Terminating in immediate shutdown mode.");
|
||||
std::process::exit(111);
|
||||
}
|
||||
_ = sigint.recv() => "SIGINT",
|
||||
_ = sigterm.recv() => "SIGTERM",
|
||||
};
|
||||
|
||||
if !signal_token.is_cancelled() {
|
||||
info!("Got signal {signal}. Terminating gracefully in fast shutdown mode.");
|
||||
signal_token.cancel();
|
||||
} else {
|
||||
info!("Got signal {signal}. Already shutting down.");
|
||||
}
|
||||
_ = sigint.recv() => { "SIGINT" },
|
||||
_ = sigterm.recv() => { "SIGTERM" },
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
info!("Got signal {signal}. Terminating gracefully in fast shutdown mode",);
|
||||
// Wait for cancellation signal and shut down the pageserver.
|
||||
//
|
||||
// This cancels the `shutdown_pageserver` cancellation tree. Right now that tree doesn't
|
||||
// reach very far, and `task_mgr` is used instead. The plan is to change that over time.
|
||||
signal_cancel.cancelled().await;
|
||||
|
||||
// This cancels the `shutdown_pageserver` cancellation tree.
|
||||
// Right now that tree doesn't reach very far, and `task_mgr` is used instead.
|
||||
// The plan is to change that over time.
|
||||
shutdown_pageserver.take();
|
||||
pageserver::shutdown_pageserver(
|
||||
http_endpoint_listener,
|
||||
page_service,
|
||||
consumption_metrics_tasks,
|
||||
disk_usage_eviction_task,
|
||||
&tenant_manager,
|
||||
background_purges,
|
||||
deletion_queue.clone(),
|
||||
secondary_controller_tasks,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
unreachable!()
|
||||
})
|
||||
}
|
||||
shutdown_pageserver.cancel();
|
||||
pageserver::shutdown_pageserver(
|
||||
http_endpoint_listener,
|
||||
page_service,
|
||||
consumption_metrics_tasks,
|
||||
disk_usage_eviction_task,
|
||||
&tenant_manager,
|
||||
background_purges,
|
||||
deletion_queue.clone(),
|
||||
secondary_controller_tasks,
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
unreachable!();
|
||||
})
|
||||
}
|
||||
|
||||
async fn create_remote_storage_client(
|
||||
|
||||
@@ -91,8 +91,6 @@
|
||||
|
||||
use crate::task_mgr::TaskKind;
|
||||
|
||||
pub(crate) mod optional_counter;
|
||||
|
||||
// The main structure of this module, see module-level comment.
|
||||
#[derive(Debug)]
|
||||
pub struct RequestContext {
|
||||
@@ -100,7 +98,6 @@ pub struct RequestContext {
|
||||
download_behavior: DownloadBehavior,
|
||||
access_stats_behavior: AccessStatsBehavior,
|
||||
page_content_kind: PageContentKind,
|
||||
pub micros_spent_throttled: optional_counter::MicroSecondsCounterU32,
|
||||
}
|
||||
|
||||
/// The kind of access to the page cache.
|
||||
@@ -158,7 +155,6 @@ impl RequestContextBuilder {
|
||||
download_behavior: DownloadBehavior::Download,
|
||||
access_stats_behavior: AccessStatsBehavior::Update,
|
||||
page_content_kind: PageContentKind::Unknown,
|
||||
micros_spent_throttled: Default::default(),
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -172,7 +168,6 @@ impl RequestContextBuilder {
|
||||
download_behavior: original.download_behavior,
|
||||
access_stats_behavior: original.access_stats_behavior,
|
||||
page_content_kind: original.page_content_kind,
|
||||
micros_spent_throttled: Default::default(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
use std::{
|
||||
sync::atomic::{AtomicU32, Ordering},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CounterU32 {
|
||||
inner: AtomicU32,
|
||||
}
|
||||
impl Default for CounterU32 {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
inner: AtomicU32::new(u32::MAX),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl CounterU32 {
|
||||
pub fn open(&self) -> Result<(), &'static str> {
|
||||
match self
|
||||
.inner
|
||||
.compare_exchange(u32::MAX, 0, Ordering::Relaxed, Ordering::Relaxed)
|
||||
{
|
||||
Ok(_) => Ok(()),
|
||||
Err(_) => Err("open() called on clsoed state"),
|
||||
}
|
||||
}
|
||||
pub fn close(&self) -> Result<u32, &'static str> {
|
||||
match self.inner.swap(u32::MAX, Ordering::Relaxed) {
|
||||
u32::MAX => Err("close() called on closed state"),
|
||||
x => Ok(x),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&self, count: u32) -> Result<(), &'static str> {
|
||||
if count == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
let mut had_err = None;
|
||||
self.inner
|
||||
.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |cur| match cur {
|
||||
u32::MAX => {
|
||||
had_err = Some("add() called on closed state");
|
||||
None
|
||||
}
|
||||
x => {
|
||||
let (new, overflowed) = x.overflowing_add(count);
|
||||
if new == u32::MAX || overflowed {
|
||||
had_err = Some("add() overflowed the counter");
|
||||
None
|
||||
} else {
|
||||
Some(new)
|
||||
}
|
||||
}
|
||||
})
|
||||
.map_err(|_| had_err.expect("we set it whenever the function returns None"))
|
||||
.map(|_| ())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
pub struct MicroSecondsCounterU32 {
|
||||
inner: CounterU32,
|
||||
}
|
||||
|
||||
impl MicroSecondsCounterU32 {
|
||||
pub fn open(&self) -> Result<(), &'static str> {
|
||||
self.inner.open()
|
||||
}
|
||||
pub fn add(&self, duration: Duration) -> Result<(), &'static str> {
|
||||
match duration.as_micros().try_into() {
|
||||
Ok(x) => self.inner.add(x),
|
||||
Err(_) => Err("add(): duration conversion error"),
|
||||
}
|
||||
}
|
||||
pub fn close_and_checked_sub_from(&self, from: Duration) -> Result<Duration, &'static str> {
|
||||
let val = self.inner.close()?;
|
||||
let val = Duration::from_micros(val as u64);
|
||||
let subbed = match from.checked_sub(val) {
|
||||
Some(v) => v,
|
||||
None => return Err("Duration::checked_sub"),
|
||||
};
|
||||
Ok(subbed)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
let counter = MicroSecondsCounterU32::default();
|
||||
counter.open().unwrap();
|
||||
counter.add(Duration::from_micros(23)).unwrap();
|
||||
let res = counter
|
||||
.close_and_checked_sub_from(Duration::from_micros(42))
|
||||
.unwrap();
|
||||
assert_eq!(res, Duration::from_micros(42 - 23));
|
||||
}
|
||||
}
|
||||
@@ -115,6 +115,10 @@ impl ControllerUpcallClient {
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
pub(crate) fn base_url(&self) -> &Url {
|
||||
&self.base_url
|
||||
}
|
||||
}
|
||||
|
||||
impl ControlPlaneGenerationsApi for ControllerUpcallClient {
|
||||
@@ -191,13 +195,15 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
|
||||
|
||||
let request = ReAttachRequest {
|
||||
node_id: self.node_id,
|
||||
register,
|
||||
register: register.clone(),
|
||||
};
|
||||
|
||||
let response: ReAttachResponse = self.retry_http_forever(&re_attach_path, request).await?;
|
||||
tracing::info!(
|
||||
"Received re-attach response with {} tenants",
|
||||
response.tenants.len()
|
||||
"Received re-attach response with {} tenants (node {}, register: {:?})",
|
||||
response.tenants.len(),
|
||||
self.node_id,
|
||||
register,
|
||||
);
|
||||
|
||||
failpoint_support::sleep_millis_async!("control-plane-client-re-attach");
|
||||
|
||||
@@ -56,9 +56,9 @@ use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::auth::JwtAuth;
|
||||
use utils::failpoint_support::failpoints_handler;
|
||||
use utils::http::endpoint::profile_cpu_handler;
|
||||
use utils::http::endpoint::prometheus_metrics_handler;
|
||||
use utils::http::endpoint::request_span;
|
||||
use utils::http::endpoint::{
|
||||
profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span,
|
||||
};
|
||||
use utils::http::request::must_parse_query_param;
|
||||
use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};
|
||||
|
||||
@@ -155,6 +155,7 @@ impl State {
|
||||
"/swagger.yml",
|
||||
"/metrics",
|
||||
"/profile/cpu",
|
||||
"/profile/heap",
|
||||
];
|
||||
Ok(Self {
|
||||
conf,
|
||||
@@ -3203,6 +3204,7 @@ pub fn make_router(
|
||||
.data(state)
|
||||
.get("/metrics", |r| request_span(r, prometheus_metrics_handler))
|
||||
.get("/profile/cpu", |r| request_span(r, profile_cpu_handler))
|
||||
.get("/profile/heap", |r| request_span(r, profile_heap_handler))
|
||||
.get("/v1/status", |r| api_handler(r, status_handler))
|
||||
.put("/v1/failpoints", |r| {
|
||||
testing_api_handler("manage failpoints", r, failpoints_handler)
|
||||
|
||||
@@ -575,18 +575,24 @@ async fn import_file(
|
||||
} else if file_path.starts_with("pg_xact") {
|
||||
let slru = SlruKind::Clog;
|
||||
|
||||
import_slru(modification, slru, file_path, reader, len, ctx).await?;
|
||||
debug!("imported clog slru");
|
||||
if modification.tline.tenant_shard_id.is_shard_zero() {
|
||||
import_slru(modification, slru, file_path, reader, len, ctx).await?;
|
||||
debug!("imported clog slru");
|
||||
}
|
||||
} else if file_path.starts_with("pg_multixact/offsets") {
|
||||
let slru = SlruKind::MultiXactOffsets;
|
||||
|
||||
import_slru(modification, slru, file_path, reader, len, ctx).await?;
|
||||
debug!("imported multixact offsets slru");
|
||||
if modification.tline.tenant_shard_id.is_shard_zero() {
|
||||
import_slru(modification, slru, file_path, reader, len, ctx).await?;
|
||||
debug!("imported multixact offsets slru");
|
||||
}
|
||||
} else if file_path.starts_with("pg_multixact/members") {
|
||||
let slru = SlruKind::MultiXactMembers;
|
||||
|
||||
import_slru(modification, slru, file_path, reader, len, ctx).await?;
|
||||
debug!("imported multixact members slru");
|
||||
if modification.tline.tenant_shard_id.is_shard_zero() {
|
||||
import_slru(modification, slru, file_path, reader, len, ctx).await?;
|
||||
debug!("imported multixact members slru");
|
||||
}
|
||||
} else if file_path.starts_with("pg_twophase") {
|
||||
let bytes = read_all_bytes(reader).await?;
|
||||
|
||||
|
||||
@@ -7,6 +7,10 @@ use metrics::{
|
||||
IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::config::{
|
||||
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
|
||||
PageServiceProtocolPipelinedExecutionStrategy,
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use postgres_backend::{is_expected_io_error, QueryError};
|
||||
use pq_proto::framed::ConnectionError;
|
||||
@@ -213,31 +217,16 @@ impl<'a> ScanLatencyOngoingRecording<'a> {
|
||||
ScanLatencyOngoingRecording { parent, start }
|
||||
}
|
||||
|
||||
pub(crate) fn observe(self, throttled: Option<Duration>) {
|
||||
pub(crate) fn observe(self) {
|
||||
let elapsed = self.start.elapsed();
|
||||
let ex_throttled = if let Some(throttled) = throttled {
|
||||
elapsed.checked_sub(throttled)
|
||||
} else {
|
||||
Some(elapsed)
|
||||
};
|
||||
if let Some(ex_throttled) = ex_throttled {
|
||||
self.parent.observe(ex_throttled.as_secs_f64());
|
||||
} else {
|
||||
use utils::rate_limit::RateLimit;
|
||||
static LOGGED: Lazy<Mutex<RateLimit>> =
|
||||
Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
|
||||
let mut rate_limit = LOGGED.lock().unwrap();
|
||||
rate_limit.call(|| {
|
||||
warn!("error deducting time spent throttled; this message is logged at a global rate limit");
|
||||
});
|
||||
}
|
||||
self.parent.observe(elapsed.as_secs_f64());
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
|
||||
let inner = register_histogram_vec!(
|
||||
"pageserver_get_vectored_seconds",
|
||||
"Time spent in get_vectored, excluding time spent in timeline_get_throttle.",
|
||||
"Time spent in get_vectored.",
|
||||
&["task_kind"],
|
||||
CRITICAL_OP_BUCKETS.into(),
|
||||
)
|
||||
@@ -260,7 +249,7 @@ pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(||
|
||||
pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
|
||||
let inner = register_histogram_vec!(
|
||||
"pageserver_scan_seconds",
|
||||
"Time spent in scan, excluding time spent in timeline_get_throttle.",
|
||||
"Time spent in scan.",
|
||||
&["task_kind"],
|
||||
CRITICAL_OP_BUCKETS.into(),
|
||||
)
|
||||
@@ -1216,28 +1205,33 @@ pub(crate) mod virtual_file_io_engine {
|
||||
});
|
||||
}
|
||||
|
||||
struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
|
||||
global_latency_histo: &'a Histogram,
|
||||
pub(crate) struct SmgrOpTimer {
|
||||
global_latency_histo: Histogram,
|
||||
|
||||
// Optional because not all op types are tracked per-timeline
|
||||
per_timeline_latency_histo: Option<&'a Histogram>,
|
||||
per_timeline_latency_histo: Option<Histogram>,
|
||||
|
||||
ctx: &'c RequestContext,
|
||||
start: std::time::Instant,
|
||||
start: Instant,
|
||||
throttled: Duration,
|
||||
op: SmgrQueryType,
|
||||
count: usize,
|
||||
}
|
||||
|
||||
impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> {
|
||||
impl SmgrOpTimer {
|
||||
pub(crate) fn deduct_throttle(&mut self, throttle: &Option<Duration>) {
|
||||
let Some(throttle) = throttle else {
|
||||
return;
|
||||
};
|
||||
self.throttled += *throttle;
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for SmgrOpTimer {
|
||||
fn drop(&mut self) {
|
||||
let elapsed = self.start.elapsed();
|
||||
let ex_throttled = self
|
||||
.ctx
|
||||
.micros_spent_throttled
|
||||
.close_and_checked_sub_from(elapsed);
|
||||
let ex_throttled = match ex_throttled {
|
||||
Ok(res) => res,
|
||||
Err(error) => {
|
||||
|
||||
let elapsed = match elapsed.checked_sub(self.throttled) {
|
||||
Some(elapsed) => elapsed,
|
||||
None => {
|
||||
use utils::rate_limit::RateLimit;
|
||||
static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
|
||||
Lazy::new(|| {
|
||||
@@ -1248,18 +1242,17 @@ impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> {
|
||||
let mut guard = LOGGED.lock().unwrap();
|
||||
let rate_limit = &mut guard[self.op];
|
||||
rate_limit.call(|| {
|
||||
warn!(op=?self.op, error, "error deducting time spent throttled; this message is logged at a global rate limit");
|
||||
warn!(op=?self.op, ?elapsed, ?self.throttled, "implementation error: time spent throttled exceeds total request wall clock time");
|
||||
});
|
||||
elapsed
|
||||
elapsed // un-throttled time, more info than just saturating to 0
|
||||
}
|
||||
};
|
||||
|
||||
for _ in 0..self.count {
|
||||
self.global_latency_histo
|
||||
.observe(ex_throttled.as_secs_f64());
|
||||
if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo {
|
||||
per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64());
|
||||
}
|
||||
let elapsed = elapsed.as_secs_f64();
|
||||
|
||||
self.global_latency_histo.observe(elapsed);
|
||||
if let Some(per_timeline_getpage_histo) = &self.per_timeline_latency_histo {
|
||||
per_timeline_getpage_histo.observe(elapsed);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1289,6 +1282,8 @@ pub(crate) struct SmgrQueryTimePerTimeline {
|
||||
global_latency: [Histogram; SmgrQueryType::COUNT],
|
||||
per_timeline_getpage_started: IntCounter,
|
||||
per_timeline_getpage_latency: Histogram,
|
||||
global_batch_size: Histogram,
|
||||
per_timeline_batch_size: Histogram,
|
||||
}
|
||||
|
||||
static SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
@@ -1381,6 +1376,76 @@ static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy<Vec<f64>> = Lazy::new(|| {
|
||||
(1..=u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap())
|
||||
.map(|v| v.into())
|
||||
.collect()
|
||||
});
|
||||
|
||||
static PAGE_SERVICE_BATCH_SIZE_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_page_service_batch_size_global",
|
||||
"Batch size of pageserver page service requests",
|
||||
PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL.clone(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy<Vec<f64>> = Lazy::new(|| {
|
||||
let mut buckets = Vec::new();
|
||||
for i in 0.. {
|
||||
let bucket = 1 << i;
|
||||
if bucket > u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap() {
|
||||
break;
|
||||
}
|
||||
buckets.push(bucket.into());
|
||||
}
|
||||
buckets
|
||||
});
|
||||
|
||||
static PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
register_histogram_vec!(
|
||||
"pageserver_page_service_batch_size",
|
||||
"Batch size of pageserver page service requests",
|
||||
&["tenant_id", "shard_id", "timeline_id"],
|
||||
PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE.clone()
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
register_int_gauge_vec!(
|
||||
"pageserver_page_service_config_max_batch_size",
|
||||
"Configured maximum batch size for the server-side batching functionality of page_service. \
|
||||
Labels expose more of the configuration parameters.",
|
||||
&["mode", "execution"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
|
||||
PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE.reset();
|
||||
let (label_values, value) = match conf {
|
||||
PageServicePipeliningConfig::Serial => (["serial", "-"], 1),
|
||||
PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
|
||||
max_batch_size,
|
||||
execution,
|
||||
}) => {
|
||||
let mode = "pipelined";
|
||||
let execution = match execution {
|
||||
PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures => {
|
||||
"concurrent-futures"
|
||||
}
|
||||
PageServiceProtocolPipelinedExecutionStrategy::Tasks => "tasks",
|
||||
};
|
||||
([mode, execution], max_batch_size.get())
|
||||
}
|
||||
};
|
||||
PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE
|
||||
.with_label_values(&label_values)
|
||||
.set(value.try_into().unwrap());
|
||||
}
|
||||
|
||||
impl SmgrQueryTimePerTimeline {
|
||||
pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
|
||||
let tenant_id = tenant_shard_id.tenant_id.to_string();
|
||||
@@ -1416,78 +1481,53 @@ impl SmgrQueryTimePerTimeline {
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
let global_batch_size = PAGE_SERVICE_BATCH_SIZE_GLOBAL.clone();
|
||||
let per_timeline_batch_size = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
|
||||
.unwrap();
|
||||
|
||||
Self {
|
||||
global_started,
|
||||
global_latency,
|
||||
per_timeline_getpage_latency,
|
||||
per_timeline_getpage_started,
|
||||
global_batch_size,
|
||||
per_timeline_batch_size,
|
||||
}
|
||||
}
|
||||
pub(crate) fn start_timer<'c: 'a, 'a>(
|
||||
&'a self,
|
||||
op: SmgrQueryType,
|
||||
ctx: &'c RequestContext,
|
||||
) -> Option<impl Drop + 'a> {
|
||||
self.start_timer_many(op, 1, ctx)
|
||||
}
|
||||
pub(crate) fn start_timer_many<'c: 'a, 'a>(
|
||||
&'a self,
|
||||
op: SmgrQueryType,
|
||||
count: usize,
|
||||
ctx: &'c RequestContext,
|
||||
) -> Option<impl Drop + 'a> {
|
||||
let start = Instant::now();
|
||||
|
||||
pub(crate) fn start_smgr_op(&self, op: SmgrQueryType, started_at: Instant) -> SmgrOpTimer {
|
||||
self.global_started[op as usize].inc();
|
||||
|
||||
// We subtract time spent throttled from the observed latency.
|
||||
match ctx.micros_spent_throttled.open() {
|
||||
Ok(()) => (),
|
||||
Err(error) => {
|
||||
use utils::rate_limit::RateLimit;
|
||||
static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
|
||||
Lazy::new(|| {
|
||||
Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| {
|
||||
RateLimit::new(Duration::from_secs(10))
|
||||
})))
|
||||
});
|
||||
let mut guard = LOGGED.lock().unwrap();
|
||||
let rate_limit = &mut guard[op];
|
||||
rate_limit.call(|| {
|
||||
warn!(?op, error, "error opening micros_spent_throttled; this message is logged at a global rate limit");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) {
|
||||
self.per_timeline_getpage_started.inc();
|
||||
Some(&self.per_timeline_getpage_latency)
|
||||
Some(self.per_timeline_getpage_latency.clone())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Some(GlobalAndPerTimelineHistogramTimer {
|
||||
global_latency_histo: &self.global_latency[op as usize],
|
||||
SmgrOpTimer {
|
||||
global_latency_histo: self.global_latency[op as usize].clone(),
|
||||
per_timeline_latency_histo,
|
||||
ctx,
|
||||
start,
|
||||
start: started_at,
|
||||
op,
|
||||
count,
|
||||
})
|
||||
throttled: Duration::ZERO,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn observe_getpage_batch_start(&self, batch_size: usize) {
|
||||
self.global_batch_size.observe(batch_size as f64);
|
||||
self.per_timeline_batch_size.observe(batch_size as f64);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod smgr_query_time_tests {
|
||||
use std::time::Instant;
|
||||
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use strum::IntoEnumIterator;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use crate::{
|
||||
context::{DownloadBehavior, RequestContext},
|
||||
task_mgr::TaskKind,
|
||||
};
|
||||
|
||||
// Regression test, we used hard-coded string constants before using an enum.
|
||||
#[test]
|
||||
fn op_label_name() {
|
||||
@@ -1531,8 +1571,7 @@ mod smgr_query_time_tests {
|
||||
let (pre_global, pre_per_tenant_timeline) = get_counts();
|
||||
assert_eq!(pre_per_tenant_timeline, 0);
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Download);
|
||||
let timer = metrics.start_timer(*op, &ctx);
|
||||
let timer = metrics.start_smgr_op(*op, Instant::now());
|
||||
drop(timer);
|
||||
|
||||
let (post_global, post_per_tenant_timeline) = get_counts();
|
||||
@@ -1579,58 +1618,24 @@ pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|
|
||||
}
|
||||
});
|
||||
|
||||
pub(crate) struct BasebackupQueryTimeOngoingRecording<'a, 'c> {
|
||||
pub(crate) struct BasebackupQueryTimeOngoingRecording<'a> {
|
||||
parent: &'a BasebackupQueryTime,
|
||||
ctx: &'c RequestContext,
|
||||
start: std::time::Instant,
|
||||
}
|
||||
|
||||
impl BasebackupQueryTime {
|
||||
pub(crate) fn start_recording<'c: 'a, 'a>(
|
||||
&'a self,
|
||||
ctx: &'c RequestContext,
|
||||
) -> BasebackupQueryTimeOngoingRecording<'a, 'a> {
|
||||
pub(crate) fn start_recording(&self) -> BasebackupQueryTimeOngoingRecording<'_> {
|
||||
let start = Instant::now();
|
||||
match ctx.micros_spent_throttled.open() {
|
||||
Ok(()) => (),
|
||||
Err(error) => {
|
||||
use utils::rate_limit::RateLimit;
|
||||
static LOGGED: Lazy<Mutex<RateLimit>> =
|
||||
Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
|
||||
let mut rate_limit = LOGGED.lock().unwrap();
|
||||
rate_limit.call(|| {
|
||||
warn!(error, "error opening micros_spent_throttled; this message is logged at a global rate limit");
|
||||
});
|
||||
}
|
||||
}
|
||||
BasebackupQueryTimeOngoingRecording {
|
||||
parent: self,
|
||||
ctx,
|
||||
start,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BasebackupQueryTimeOngoingRecording<'_, '_> {
|
||||
impl BasebackupQueryTimeOngoingRecording<'_> {
|
||||
pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
|
||||
let elapsed = self.start.elapsed();
|
||||
let ex_throttled = self
|
||||
.ctx
|
||||
.micros_spent_throttled
|
||||
.close_and_checked_sub_from(elapsed);
|
||||
let ex_throttled = match ex_throttled {
|
||||
Ok(ex_throttled) => ex_throttled,
|
||||
Err(error) => {
|
||||
use utils::rate_limit::RateLimit;
|
||||
static LOGGED: Lazy<Mutex<RateLimit>> =
|
||||
Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
|
||||
let mut rate_limit = LOGGED.lock().unwrap();
|
||||
rate_limit.call(|| {
|
||||
warn!(error, "error deducting time spent throttled; this message is logged at a global rate limit");
|
||||
});
|
||||
elapsed
|
||||
}
|
||||
};
|
||||
let elapsed = self.start.elapsed().as_secs_f64();
|
||||
// If you want to change categorize of a specific error, also change it in `log_query_error`.
|
||||
let metric = match res {
|
||||
Ok(_) => &self.parent.ok,
|
||||
@@ -1641,7 +1646,7 @@ impl BasebackupQueryTimeOngoingRecording<'_, '_> {
|
||||
}
|
||||
Err(_) => &self.parent.error,
|
||||
};
|
||||
metric.observe(ex_throttled.as_secs_f64());
|
||||
metric.observe(elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2722,6 +2727,11 @@ impl TimelineMetrics {
|
||||
shard_id,
|
||||
timeline_id,
|
||||
]);
|
||||
let _ = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE.remove_label_values(&[
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2747,10 +2757,12 @@ use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{PageContentKind, RequestContext};
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::mgr::TenantSlot;
|
||||
use crate::tenant::tasks::BackgroundLoopKind;
|
||||
use crate::tenant::Timeline;
|
||||
|
||||
/// Maintain a per timeline gauge in addition to the global gauge.
|
||||
pub(crate) struct PerTimelineRemotePhysicalSizeGauge {
|
||||
@@ -3307,7 +3319,7 @@ pub(crate) mod tenant_throttling {
|
||||
use once_cell::sync::Lazy;
|
||||
use utils::shard::TenantShardId;
|
||||
|
||||
use crate::tenant::{self, throttle::Metric};
|
||||
use crate::tenant::{self};
|
||||
|
||||
struct GlobalAndPerTenantIntCounter {
|
||||
global: IntCounter,
|
||||
@@ -3326,7 +3338,7 @@ pub(crate) mod tenant_throttling {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct TimelineGet {
|
||||
pub(crate) struct Metrics<const KIND: usize> {
|
||||
count_accounted_start: GlobalAndPerTenantIntCounter,
|
||||
count_accounted_finish: GlobalAndPerTenantIntCounter,
|
||||
wait_time: GlobalAndPerTenantIntCounter,
|
||||
@@ -3399,40 +3411,41 @@ pub(crate) mod tenant_throttling {
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
const KIND: &str = "timeline_get";
|
||||
const KINDS: &[&str] = &["pagestream"];
|
||||
pub type Pagestream = Metrics<0>;
|
||||
|
||||
impl TimelineGet {
|
||||
impl<const KIND: usize> Metrics<KIND> {
|
||||
pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
|
||||
let per_tenant_label_values = &[
|
||||
KIND,
|
||||
KINDS[KIND],
|
||||
&tenant_shard_id.tenant_id.to_string(),
|
||||
&tenant_shard_id.shard_slug().to_string(),
|
||||
];
|
||||
TimelineGet {
|
||||
Metrics {
|
||||
count_accounted_start: {
|
||||
GlobalAndPerTenantIntCounter {
|
||||
global: COUNT_ACCOUNTED_START.with_label_values(&[KIND]),
|
||||
global: COUNT_ACCOUNTED_START.with_label_values(&[KINDS[KIND]]),
|
||||
per_tenant: COUNT_ACCOUNTED_START_PER_TENANT
|
||||
.with_label_values(per_tenant_label_values),
|
||||
}
|
||||
},
|
||||
count_accounted_finish: {
|
||||
GlobalAndPerTenantIntCounter {
|
||||
global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KIND]),
|
||||
global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KINDS[KIND]]),
|
||||
per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT
|
||||
.with_label_values(per_tenant_label_values),
|
||||
}
|
||||
},
|
||||
wait_time: {
|
||||
GlobalAndPerTenantIntCounter {
|
||||
global: WAIT_USECS.with_label_values(&[KIND]),
|
||||
global: WAIT_USECS.with_label_values(&[KINDS[KIND]]),
|
||||
per_tenant: WAIT_USECS_PER_TENANT
|
||||
.with_label_values(per_tenant_label_values),
|
||||
}
|
||||
},
|
||||
count_throttled: {
|
||||
GlobalAndPerTenantIntCounter {
|
||||
global: WAIT_COUNT.with_label_values(&[KIND]),
|
||||
global: WAIT_COUNT.with_label_values(&[KINDS[KIND]]),
|
||||
per_tenant: WAIT_COUNT_PER_TENANT
|
||||
.with_label_values(per_tenant_label_values),
|
||||
}
|
||||
@@ -3455,15 +3468,17 @@ pub(crate) mod tenant_throttling {
|
||||
&WAIT_USECS_PER_TENANT,
|
||||
&WAIT_COUNT_PER_TENANT,
|
||||
] {
|
||||
let _ = m.remove_label_values(&[
|
||||
KIND,
|
||||
&tenant_shard_id.tenant_id.to_string(),
|
||||
&tenant_shard_id.shard_slug().to_string(),
|
||||
]);
|
||||
for kind in KINDS {
|
||||
let _ = m.remove_label_values(&[
|
||||
kind,
|
||||
&tenant_shard_id.tenant_id.to_string(),
|
||||
&tenant_shard_id.shard_slug().to_string(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Metric for TimelineGet {
|
||||
impl<const KIND: usize> tenant::throttle::Metric for Metrics<KIND> {
|
||||
#[inline(always)]
|
||||
fn accounting_start(&self) {
|
||||
self.count_accounted_start.inc();
|
||||
@@ -3562,7 +3577,9 @@ pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
|
||||
.set(u64::try_from(num_threads.get()).unwrap());
|
||||
}
|
||||
|
||||
pub fn preinitialize_metrics() {
|
||||
pub fn preinitialize_metrics(conf: &'static PageServerConf) {
|
||||
set_page_service_config_max_batch_size(&conf.page_service_pipelining);
|
||||
|
||||
// Python tests need these and on some we do alerting.
|
||||
//
|
||||
// FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
|
||||
@@ -3630,6 +3647,7 @@ pub fn preinitialize_metrics() {
|
||||
&WAL_REDO_RECORDS_HISTOGRAM,
|
||||
&WAL_REDO_BYTES_HISTOGRAM,
|
||||
&WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
|
||||
&PAGE_SERVICE_BATCH_SIZE_GLOBAL,
|
||||
]
|
||||
.into_iter()
|
||||
.for_each(|h| {
|
||||
|
||||
@@ -51,7 +51,7 @@ use crate::auth::check_permission;
|
||||
use crate::basebackup::BasebackupError;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::metrics::{self};
|
||||
use crate::metrics::{self, SmgrOpTimer};
|
||||
use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS};
|
||||
use crate::pgdatadir_mapping::Version;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
@@ -540,11 +540,13 @@ impl From<WaitLsnError> for QueryError {
|
||||
enum BatchedFeMessage {
|
||||
Exists {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
req: models::PagestreamExistsRequest,
|
||||
},
|
||||
Nblocks {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
req: models::PagestreamNblocksRequest,
|
||||
},
|
||||
@@ -552,15 +554,17 @@ enum BatchedFeMessage {
|
||||
span: Span,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
effective_request_lsn: Lsn,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber, SmgrOpTimer); 1]>,
|
||||
},
|
||||
DbSize {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
req: models::PagestreamDbSizeRequest,
|
||||
},
|
||||
GetSlruSegment {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
req: models::PagestreamGetSlruSegmentRequest,
|
||||
},
|
||||
@@ -570,6 +574,41 @@ enum BatchedFeMessage {
|
||||
},
|
||||
}
|
||||
|
||||
impl BatchedFeMessage {
|
||||
async fn throttle(&mut self, cancel: &CancellationToken) -> Result<(), QueryError> {
|
||||
let (shard, tokens, timers) = match self {
|
||||
BatchedFeMessage::Exists { shard, timer, .. }
|
||||
| BatchedFeMessage::Nblocks { shard, timer, .. }
|
||||
| BatchedFeMessage::DbSize { shard, timer, .. }
|
||||
| BatchedFeMessage::GetSlruSegment { shard, timer, .. } => {
|
||||
(
|
||||
shard,
|
||||
// 1 token is probably under-estimating because these
|
||||
// request handlers typically do several Timeline::get calls.
|
||||
1,
|
||||
itertools::Either::Left(std::iter::once(timer)),
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::GetPage { shard, pages, .. } => (
|
||||
shard,
|
||||
pages.len(),
|
||||
itertools::Either::Right(pages.iter_mut().map(|(_, _, timer)| timer)),
|
||||
),
|
||||
BatchedFeMessage::RespondError { .. } => return Ok(()),
|
||||
};
|
||||
let throttled = tokio::select! {
|
||||
throttled = shard.pagestream_throttle.throttle(tokens) => { throttled }
|
||||
_ = cancel.cancelled() => {
|
||||
return Err(QueryError::Shutdown);
|
||||
}
|
||||
};
|
||||
for timer in timers {
|
||||
timer.deduct_throttle(&throttled);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl PageServerHandler {
|
||||
pub fn new(
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
@@ -632,6 +671,8 @@ impl PageServerHandler {
|
||||
msg = pgb.read_message() => { msg }
|
||||
};
|
||||
|
||||
let received_at = Instant::now();
|
||||
|
||||
let copy_data_bytes = match msg? {
|
||||
Some(FeMessage::CopyData(bytes)) => bytes,
|
||||
Some(FeMessage::Terminate) => {
|
||||
@@ -660,7 +701,15 @@ impl PageServerHandler {
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
.await?;
|
||||
BatchedFeMessage::Exists { span, shard, req }
|
||||
let timer = shard
|
||||
.query_metrics
|
||||
.start_smgr_op(metrics::SmgrQueryType::GetRelExists, received_at);
|
||||
BatchedFeMessage::Exists {
|
||||
span,
|
||||
timer,
|
||||
shard,
|
||||
req,
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::Nblocks(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn);
|
||||
@@ -668,7 +717,15 @@ impl PageServerHandler {
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
.await?;
|
||||
BatchedFeMessage::Nblocks { span, shard, req }
|
||||
let timer = shard
|
||||
.query_metrics
|
||||
.start_smgr_op(metrics::SmgrQueryType::GetRelSize, received_at);
|
||||
BatchedFeMessage::Nblocks {
|
||||
span,
|
||||
timer,
|
||||
shard,
|
||||
req,
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::DbSize(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn);
|
||||
@@ -676,7 +733,15 @@ impl PageServerHandler {
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
.await?;
|
||||
BatchedFeMessage::DbSize { span, shard, req }
|
||||
let timer = shard
|
||||
.query_metrics
|
||||
.start_smgr_op(metrics::SmgrQueryType::GetDbSize, received_at);
|
||||
BatchedFeMessage::DbSize {
|
||||
span,
|
||||
timer,
|
||||
shard,
|
||||
req,
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::GetSlruSegment(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn);
|
||||
@@ -684,7 +749,15 @@ impl PageServerHandler {
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
.await?;
|
||||
BatchedFeMessage::GetSlruSegment { span, shard, req }
|
||||
let timer = shard
|
||||
.query_metrics
|
||||
.start_smgr_op(metrics::SmgrQueryType::GetSlruSegment, received_at);
|
||||
BatchedFeMessage::GetSlruSegment {
|
||||
span,
|
||||
timer,
|
||||
shard,
|
||||
req,
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
request_lsn,
|
||||
@@ -728,6 +801,14 @@ impl PageServerHandler {
|
||||
return respond_error!(e.into());
|
||||
}
|
||||
};
|
||||
|
||||
// It's important to start the timer before waiting for the LSN
|
||||
// so that the _started counters are incremented before we do
|
||||
// any serious waiting, e.g., for LSNs.
|
||||
let timer = shard
|
||||
.query_metrics
|
||||
.start_smgr_op(metrics::SmgrQueryType::GetPageAtLsn, received_at);
|
||||
|
||||
let effective_request_lsn = match Self::wait_or_get_last_lsn(
|
||||
&shard,
|
||||
request_lsn,
|
||||
@@ -747,7 +828,7 @@ impl PageServerHandler {
|
||||
span,
|
||||
shard,
|
||||
effective_request_lsn,
|
||||
pages: smallvec::smallvec![(rel, blkno)],
|
||||
pages: smallvec::smallvec![(rel, blkno, timer)],
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -832,88 +913,112 @@ impl PageServerHandler {
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
|
||||
{
|
||||
// invoke handler function
|
||||
let (handler_results, span): (Vec<Result<PagestreamBeMessage, PageStreamError>>, _) =
|
||||
match batch {
|
||||
BatchedFeMessage::Exists { span, shard, req } => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::exists");
|
||||
(
|
||||
vec![
|
||||
self.handle_get_rel_exists_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
],
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::Nblocks { span, shard, req } => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::nblocks");
|
||||
(
|
||||
vec![
|
||||
self.handle_get_nblocks_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
],
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::GetPage {
|
||||
let (handler_results, span): (
|
||||
Vec<Result<(PagestreamBeMessage, SmgrOpTimer), PageStreamError>>,
|
||||
_,
|
||||
) = match batch {
|
||||
BatchedFeMessage::Exists {
|
||||
span,
|
||||
timer,
|
||||
shard,
|
||||
req,
|
||||
} => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::exists");
|
||||
(
|
||||
vec![self
|
||||
.handle_get_rel_exists_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
span,
|
||||
shard,
|
||||
effective_request_lsn,
|
||||
pages,
|
||||
} => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::getpage");
|
||||
(
|
||||
{
|
||||
let npages = pages.len();
|
||||
trace!(npages, "handling getpage request");
|
||||
let res = self
|
||||
.handle_get_page_at_lsn_request_batched(
|
||||
&shard,
|
||||
effective_request_lsn,
|
||||
pages,
|
||||
ctx,
|
||||
)
|
||||
.instrument(span.clone())
|
||||
.await;
|
||||
assert_eq!(res.len(), npages);
|
||||
res
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::DbSize { span, shard, req } => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::dbsize");
|
||||
(
|
||||
vec![
|
||||
self.handle_db_size_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
],
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::GetSlruSegment { span, shard, req } => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
|
||||
(
|
||||
vec![
|
||||
self.handle_get_slru_segment_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
],
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::RespondError { span, error } => {
|
||||
// We've already decided to respond with an error, so we don't need to
|
||||
// call the handler.
|
||||
(vec![Err(error)], span)
|
||||
}
|
||||
};
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::Nblocks {
|
||||
span,
|
||||
timer,
|
||||
shard,
|
||||
req,
|
||||
} => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::nblocks");
|
||||
(
|
||||
vec![self
|
||||
.handle_get_nblocks_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::GetPage {
|
||||
span,
|
||||
shard,
|
||||
effective_request_lsn,
|
||||
pages,
|
||||
} => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::getpage");
|
||||
(
|
||||
{
|
||||
let npages = pages.len();
|
||||
trace!(npages, "handling getpage request");
|
||||
let res = self
|
||||
.handle_get_page_at_lsn_request_batched(
|
||||
&shard,
|
||||
effective_request_lsn,
|
||||
pages,
|
||||
ctx,
|
||||
)
|
||||
.instrument(span.clone())
|
||||
.await;
|
||||
assert_eq!(res.len(), npages);
|
||||
res
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::DbSize {
|
||||
span,
|
||||
timer,
|
||||
shard,
|
||||
req,
|
||||
} => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::dbsize");
|
||||
(
|
||||
vec![self
|
||||
.handle_db_size_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::GetSlruSegment {
|
||||
span,
|
||||
timer,
|
||||
shard,
|
||||
req,
|
||||
} => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
|
||||
(
|
||||
vec![self
|
||||
.handle_get_slru_segment_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::RespondError { span, error } => {
|
||||
// We've already decided to respond with an error, so we don't need to
|
||||
// call the handler.
|
||||
(vec![Err(error)], span)
|
||||
}
|
||||
};
|
||||
|
||||
// Map handler result to protocol behavior.
|
||||
// Some handler errors cause exit from pagestream protocol.
|
||||
// Other handler errors are sent back as an error message and we stay in pagestream protocol.
|
||||
let mut timers: smallvec::SmallVec<[_; 1]> =
|
||||
smallvec::SmallVec::with_capacity(handler_results.len());
|
||||
for handler_result in handler_results {
|
||||
let response_msg = match handler_result {
|
||||
Err(e) => match &e {
|
||||
@@ -944,7 +1049,12 @@ impl PageServerHandler {
|
||||
})
|
||||
}
|
||||
},
|
||||
Ok(response_msg) => response_msg,
|
||||
Ok((response_msg, timer)) => {
|
||||
// Extending the lifetime of the timers so observations on drop
|
||||
// include the flush time.
|
||||
timers.push(timer);
|
||||
response_msg
|
||||
}
|
||||
};
|
||||
|
||||
// marshal & transmit response message
|
||||
@@ -961,6 +1071,7 @@ impl PageServerHandler {
|
||||
res?;
|
||||
}
|
||||
}
|
||||
drop(timers);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1081,13 +1192,18 @@ impl PageServerHandler {
|
||||
Ok(msg) => msg,
|
||||
Err(e) => break e,
|
||||
};
|
||||
let msg = match msg {
|
||||
let mut msg = match msg {
|
||||
Some(msg) => msg,
|
||||
None => {
|
||||
debug!("pagestream subprotocol end observed");
|
||||
return ((pgb_reader, timeline_handles), Ok(()));
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(cancelled) = msg.throttle(&self.cancel).await {
|
||||
break cancelled;
|
||||
}
|
||||
|
||||
let err = self
|
||||
.pagesteam_handle_batched_message(pgb_writer, msg, &cancel, ctx)
|
||||
.await;
|
||||
@@ -1245,12 +1361,13 @@ impl PageServerHandler {
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
let batch = match batch {
|
||||
let mut batch = match batch {
|
||||
Ok(batch) => batch,
|
||||
Err(e) => {
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
batch.throttle(&self.cancel).await?;
|
||||
self.pagesteam_handle_batched_message(pgb_writer, batch, &cancel, &ctx)
|
||||
.await?;
|
||||
}
|
||||
@@ -1423,10 +1540,6 @@ impl PageServerHandler {
|
||||
req: &PagestreamExistsRequest,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<PagestreamBeMessage, PageStreamError> {
|
||||
let _timer = timeline
|
||||
.query_metrics
|
||||
.start_timer(metrics::SmgrQueryType::GetRelExists, ctx);
|
||||
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
@@ -1453,10 +1566,6 @@ impl PageServerHandler {
|
||||
req: &PagestreamNblocksRequest,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<PagestreamBeMessage, PageStreamError> {
|
||||
let _timer = timeline
|
||||
.query_metrics
|
||||
.start_timer(metrics::SmgrQueryType::GetRelSize, ctx);
|
||||
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
@@ -1483,10 +1592,6 @@ impl PageServerHandler {
|
||||
req: &PagestreamDbSizeRequest,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<PagestreamBeMessage, PageStreamError> {
|
||||
let _timer = timeline
|
||||
.query_metrics
|
||||
.start_timer(metrics::SmgrQueryType::GetDbSize, ctx);
|
||||
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
@@ -1512,26 +1617,41 @@ impl PageServerHandler {
|
||||
&mut self,
|
||||
timeline: &Timeline,
|
||||
effective_lsn: Lsn,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
|
||||
requests: smallvec::SmallVec<[(RelTag, BlockNumber, SmgrOpTimer); 1]>,
|
||||
ctx: &RequestContext,
|
||||
) -> Vec<Result<PagestreamBeMessage, PageStreamError>> {
|
||||
) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), PageStreamError>> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
let _timer = timeline.query_metrics.start_timer_many(
|
||||
metrics::SmgrQueryType::GetPageAtLsn,
|
||||
pages.len(),
|
||||
ctx,
|
||||
);
|
||||
|
||||
let pages = timeline
|
||||
.get_rel_page_at_lsn_batched(pages, effective_lsn, ctx)
|
||||
timeline
|
||||
.query_metrics
|
||||
.observe_getpage_batch_start(requests.len());
|
||||
|
||||
let results = timeline
|
||||
.get_rel_page_at_lsn_batched(
|
||||
requests.iter().map(|(reltag, blkno, _)| (reltag, blkno)),
|
||||
effective_lsn,
|
||||
ctx,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(results.len(), requests.len());
|
||||
|
||||
Vec::from_iter(pages.into_iter().map(|page| {
|
||||
page.map(|page| {
|
||||
PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse { page })
|
||||
})
|
||||
.map_err(PageStreamError::from)
|
||||
}))
|
||||
// TODO: avoid creating the new Vec here
|
||||
Vec::from_iter(
|
||||
requests
|
||||
.into_iter()
|
||||
.zip(results.into_iter())
|
||||
.map(|((_, _, timer), res)| {
|
||||
res.map(|page| {
|
||||
(
|
||||
PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse {
|
||||
page,
|
||||
}),
|
||||
timer,
|
||||
)
|
||||
})
|
||||
.map_err(PageStreamError::from)
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(shard_id))]
|
||||
@@ -1541,10 +1661,6 @@ impl PageServerHandler {
|
||||
req: &PagestreamGetSlruSegmentRequest,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<PagestreamBeMessage, PageStreamError> {
|
||||
let _timer = timeline
|
||||
.query_metrics
|
||||
.start_timer(metrics::SmgrQueryType::GetSlruSegment, ctx);
|
||||
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
@@ -2045,7 +2161,7 @@ where
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::Basebackup)
|
||||
.inc();
|
||||
let metric_recording = metrics::BASEBACKUP_QUERY_TIME.start_recording(&ctx);
|
||||
let metric_recording = metrics::BASEBACKUP_QUERY_TIME.start_recording();
|
||||
let res = async {
|
||||
self.handle_basebackup_request(
|
||||
pgb,
|
||||
|
||||
@@ -203,9 +203,13 @@ impl Timeline {
|
||||
) -> Result<Bytes, PageReconstructError> {
|
||||
match version {
|
||||
Version::Lsn(effective_lsn) => {
|
||||
let pages = smallvec::smallvec![(tag, blknum)];
|
||||
let pages: smallvec::SmallVec<[_; 1]> = smallvec::smallvec![(tag, blknum)];
|
||||
let res = self
|
||||
.get_rel_page_at_lsn_batched(pages, effective_lsn, ctx)
|
||||
.get_rel_page_at_lsn_batched(
|
||||
pages.iter().map(|(tag, blknum)| (tag, blknum)),
|
||||
effective_lsn,
|
||||
ctx,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(res.len(), 1);
|
||||
res.into_iter().next().unwrap()
|
||||
@@ -240,7 +244,7 @@ impl Timeline {
|
||||
/// The ordering of the returned vec corresponds to the ordering of `pages`.
|
||||
pub(crate) async fn get_rel_page_at_lsn_batched(
|
||||
&self,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
|
||||
pages: impl ExactSizeIterator<Item = (&RelTag, &BlockNumber)>,
|
||||
effective_lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> Vec<Result<Bytes, PageReconstructError>> {
|
||||
@@ -254,7 +258,7 @@ impl Timeline {
|
||||
let result_slots = result.spare_capacity_mut();
|
||||
|
||||
let mut keys_slots: BTreeMap<Key, smallvec::SmallVec<[usize; 1]>> = BTreeMap::default();
|
||||
for (response_slot_idx, (tag, blknum)) in pages.into_iter().enumerate() {
|
||||
for (response_slot_idx, (tag, blknum)) in pages.enumerate() {
|
||||
if tag.relnode == 0 {
|
||||
result_slots[response_slot_idx].write(Err(PageReconstructError::Other(
|
||||
RelationError::InvalidRelnode.into(),
|
||||
@@ -265,7 +269,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
let nblocks = match self
|
||||
.get_rel_size(tag, Version::Lsn(effective_lsn), ctx)
|
||||
.get_rel_size(*tag, Version::Lsn(effective_lsn), ctx)
|
||||
.await
|
||||
{
|
||||
Ok(nblocks) => nblocks,
|
||||
@@ -276,7 +280,7 @@ impl Timeline {
|
||||
}
|
||||
};
|
||||
|
||||
if blknum >= nblocks {
|
||||
if *blknum >= nblocks {
|
||||
debug!(
|
||||
"read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page",
|
||||
tag, blknum, effective_lsn, nblocks
|
||||
@@ -286,7 +290,7 @@ impl Timeline {
|
||||
continue;
|
||||
}
|
||||
|
||||
let key = rel_block_to_key(tag, blknum);
|
||||
let key = rel_block_to_key(*tag, *blknum);
|
||||
|
||||
let key_slots = keys_slots.entry(key).or_default();
|
||||
key_slots.push(response_slot_idx);
|
||||
@@ -526,6 +530,7 @@ impl Timeline {
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Bytes, PageReconstructError> {
|
||||
assert!(self.tenant_shard_id.is_shard_zero());
|
||||
let n_blocks = self
|
||||
.get_slru_segment_size(kind, segno, Version::Lsn(lsn), ctx)
|
||||
.await?;
|
||||
@@ -548,6 +553,7 @@ impl Timeline {
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Bytes, PageReconstructError> {
|
||||
assert!(self.tenant_shard_id.is_shard_zero());
|
||||
let key = slru_block_to_key(kind, segno, blknum);
|
||||
self.get(key, lsn, ctx).await
|
||||
}
|
||||
@@ -560,6 +566,7 @@ impl Timeline {
|
||||
version: Version<'_>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<BlockNumber, PageReconstructError> {
|
||||
assert!(self.tenant_shard_id.is_shard_zero());
|
||||
let key = slru_segment_size_to_key(kind, segno);
|
||||
let mut buf = version.get(self, key, ctx).await?;
|
||||
Ok(buf.get_u32_le())
|
||||
@@ -573,6 +580,7 @@ impl Timeline {
|
||||
version: Version<'_>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, PageReconstructError> {
|
||||
assert!(self.tenant_shard_id.is_shard_zero());
|
||||
// fetch directory listing
|
||||
let key = slru_dir_to_key(kind);
|
||||
let buf = version.get(self, key, ctx).await?;
|
||||
@@ -1043,26 +1051,28 @@ impl Timeline {
|
||||
}
|
||||
|
||||
// Iterate SLRUs next
|
||||
for kind in [
|
||||
SlruKind::Clog,
|
||||
SlruKind::MultiXactMembers,
|
||||
SlruKind::MultiXactOffsets,
|
||||
] {
|
||||
let slrudir_key = slru_dir_to_key(kind);
|
||||
result.add_key(slrudir_key);
|
||||
let buf = self.get(slrudir_key, lsn, ctx).await?;
|
||||
let dir = SlruSegmentDirectory::des(&buf)?;
|
||||
let mut segments: Vec<u32> = dir.segments.iter().cloned().collect();
|
||||
segments.sort_unstable();
|
||||
for segno in segments {
|
||||
let segsize_key = slru_segment_size_to_key(kind, segno);
|
||||
let mut buf = self.get(segsize_key, lsn, ctx).await?;
|
||||
let segsize = buf.get_u32_le();
|
||||
if self.tenant_shard_id.is_shard_zero() {
|
||||
for kind in [
|
||||
SlruKind::Clog,
|
||||
SlruKind::MultiXactMembers,
|
||||
SlruKind::MultiXactOffsets,
|
||||
] {
|
||||
let slrudir_key = slru_dir_to_key(kind);
|
||||
result.add_key(slrudir_key);
|
||||
let buf = self.get(slrudir_key, lsn, ctx).await?;
|
||||
let dir = SlruSegmentDirectory::des(&buf)?;
|
||||
let mut segments: Vec<u32> = dir.segments.iter().cloned().collect();
|
||||
segments.sort_unstable();
|
||||
for segno in segments {
|
||||
let segsize_key = slru_segment_size_to_key(kind, segno);
|
||||
let mut buf = self.get(segsize_key, lsn, ctx).await?;
|
||||
let segsize = buf.get_u32_le();
|
||||
|
||||
result.add_range(
|
||||
slru_block_to_key(kind, segno, 0)..slru_block_to_key(kind, segno, segsize),
|
||||
);
|
||||
result.add_key(segsize_key);
|
||||
result.add_range(
|
||||
slru_block_to_key(kind, segno, 0)..slru_block_to_key(kind, segno, segsize),
|
||||
);
|
||||
result.add_key(segsize_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1464,6 +1474,10 @@ impl<'a> DatadirModification<'a> {
|
||||
blknum: BlockNumber,
|
||||
rec: NeonWalRecord,
|
||||
) -> anyhow::Result<()> {
|
||||
if !self.tline.tenant_shard_id.is_shard_zero() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.put(
|
||||
slru_block_to_key(kind, segno, blknum),
|
||||
Value::WalRecord(rec),
|
||||
@@ -1497,6 +1511,8 @@ impl<'a> DatadirModification<'a> {
|
||||
blknum: BlockNumber,
|
||||
img: Bytes,
|
||||
) -> anyhow::Result<()> {
|
||||
assert!(self.tline.tenant_shard_id.is_shard_zero());
|
||||
|
||||
let key = slru_block_to_key(kind, segno, blknum);
|
||||
if !key.is_valid_key_on_write_path() {
|
||||
anyhow::bail!(
|
||||
@@ -1538,6 +1554,7 @@ impl<'a> DatadirModification<'a> {
|
||||
segno: u32,
|
||||
blknum: BlockNumber,
|
||||
) -> anyhow::Result<()> {
|
||||
assert!(self.tline.tenant_shard_id.is_shard_zero());
|
||||
let key = slru_block_to_key(kind, segno, blknum);
|
||||
if !key.is_valid_key_on_write_path() {
|
||||
anyhow::bail!(
|
||||
@@ -1849,6 +1866,8 @@ impl<'a> DatadirModification<'a> {
|
||||
nblocks: BlockNumber,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
assert!(self.tline.tenant_shard_id.is_shard_zero());
|
||||
|
||||
// Add it to the directory entry
|
||||
let dir_key = slru_dir_to_key(kind);
|
||||
let buf = self.get(dir_key, ctx).await?;
|
||||
@@ -1881,6 +1900,8 @@ impl<'a> DatadirModification<'a> {
|
||||
segno: u32,
|
||||
nblocks: BlockNumber,
|
||||
) -> anyhow::Result<()> {
|
||||
assert!(self.tline.tenant_shard_id.is_shard_zero());
|
||||
|
||||
// Put size
|
||||
let size_key = slru_segment_size_to_key(kind, segno);
|
||||
let buf = nblocks.to_le_bytes();
|
||||
|
||||
@@ -357,8 +357,8 @@ pub struct Tenant {
|
||||
|
||||
/// Throttle applied at the top of [`Timeline::get`].
|
||||
/// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance.
|
||||
pub(crate) timeline_get_throttle:
|
||||
Arc<throttle::Throttle<crate::metrics::tenant_throttling::TimelineGet>>,
|
||||
pub(crate) pagestream_throttle:
|
||||
Arc<throttle::Throttle<crate::metrics::tenant_throttling::Pagestream>>,
|
||||
|
||||
/// An ongoing timeline detach concurrency limiter.
|
||||
///
|
||||
@@ -1678,7 +1678,7 @@ impl Tenant {
|
||||
remote_metadata,
|
||||
TimelineResources {
|
||||
remote_client,
|
||||
timeline_get_throttle: self.timeline_get_throttle.clone(),
|
||||
pagestream_throttle: self.pagestream_throttle.clone(),
|
||||
l0_flush_global_state: self.l0_flush_global_state.clone(),
|
||||
},
|
||||
LoadTimelineCause::Attach,
|
||||
@@ -3835,7 +3835,7 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_timeline_get_throttle_config(
|
||||
fn get_pagestream_throttle_config(
|
||||
psconf: &'static PageServerConf,
|
||||
overrides: &TenantConfOpt,
|
||||
) -> throttle::Config {
|
||||
@@ -3846,8 +3846,8 @@ impl Tenant {
|
||||
}
|
||||
|
||||
pub(crate) fn tenant_conf_updated(&self, new_conf: &TenantConfOpt) {
|
||||
let conf = Self::get_timeline_get_throttle_config(self.conf, new_conf);
|
||||
self.timeline_get_throttle.reconfigure(conf)
|
||||
let conf = Self::get_pagestream_throttle_config(self.conf, new_conf);
|
||||
self.pagestream_throttle.reconfigure(conf)
|
||||
}
|
||||
|
||||
/// Helper function to create a new Timeline struct.
|
||||
@@ -4009,9 +4009,9 @@ impl Tenant {
|
||||
attach_wal_lag_cooldown: Arc::new(std::sync::OnceLock::new()),
|
||||
cancel: CancellationToken::default(),
|
||||
gate: Gate::default(),
|
||||
timeline_get_throttle: Arc::new(throttle::Throttle::new(
|
||||
Tenant::get_timeline_get_throttle_config(conf, &attached_conf.tenant_conf),
|
||||
crate::metrics::tenant_throttling::TimelineGet::new(&tenant_shard_id),
|
||||
pagestream_throttle: Arc::new(throttle::Throttle::new(
|
||||
Tenant::get_pagestream_throttle_config(conf, &attached_conf.tenant_conf),
|
||||
crate::metrics::tenant_throttling::Metrics::new(&tenant_shard_id),
|
||||
)),
|
||||
tenant_conf: Arc::new(ArcSwap::from_pointee(attached_conf)),
|
||||
ongoing_timeline_detach: std::sync::Mutex::default(),
|
||||
@@ -4909,7 +4909,7 @@ impl Tenant {
|
||||
fn build_timeline_resources(&self, timeline_id: TimelineId) -> TimelineResources {
|
||||
TimelineResources {
|
||||
remote_client: self.build_timeline_remote_client(timeline_id),
|
||||
timeline_get_throttle: self.timeline_get_throttle.clone(),
|
||||
pagestream_throttle: self.pagestream_throttle.clone(),
|
||||
l0_flush_global_state: self.l0_flush_global_state.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -347,7 +347,7 @@ async fn init_load_generations(
|
||||
);
|
||||
emergency_generations(tenant_confs)
|
||||
} else if let Some(client) = ControllerUpcallClient::new(conf, cancel) {
|
||||
info!("Calling control plane API to re-attach tenants");
|
||||
info!("Calling {} API to re-attach tenants", client.base_url());
|
||||
// If we are configured to use the control plane API, then it is the source of truth for what tenants to load.
|
||||
match client.re_attach(conf).await {
|
||||
Ok(tenants) => tenants
|
||||
|
||||
@@ -2564,9 +2564,9 @@ pub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
|
||||
}
|
||||
|
||||
/// Given the key of a tenant manifest, parse out the generation number
|
||||
pub(crate) fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option<Generation> {
|
||||
pub fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option<Generation> {
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
let re = RE.get_or_init(|| Regex::new(r".+tenant-manifest-([0-9a-f]{8}).json").unwrap());
|
||||
let re = RE.get_or_init(|| Regex::new(r".*tenant-manifest-([0-9a-f]{8}).json").unwrap());
|
||||
re.captures(path.get_path().as_str())
|
||||
.and_then(|c| c.get(1))
|
||||
.and_then(|m| Generation::parse_suffix(m.as_str()))
|
||||
|
||||
@@ -43,7 +43,7 @@ impl TenantManifest {
|
||||
offloaded_timelines: vec![],
|
||||
}
|
||||
}
|
||||
pub(crate) fn from_json_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
|
||||
pub fn from_json_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
|
||||
serde_json::from_slice::<Self>(bytes)
|
||||
}
|
||||
|
||||
|
||||
@@ -52,8 +52,8 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use futures::StreamExt;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::config::MaxVectoredReadBytes;
|
||||
use pageserver_api::key::DBDIR_KEY;
|
||||
use pageserver_api::key::{Key, KEY_SIZE};
|
||||
use pageserver_api::key::{AUX_FILES_KEY, DBDIR_KEY};
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
@@ -969,7 +969,11 @@ impl DeltaLayerInner {
|
||||
.as_slice()
|
||||
.iter()
|
||||
.filter_map(|(_, blob_meta)| {
|
||||
if blob_meta.key.is_rel_dir_key() || blob_meta.key == DBDIR_KEY {
|
||||
if blob_meta.key.is_rel_dir_key()
|
||||
|| blob_meta.key == DBDIR_KEY
|
||||
|| blob_meta.key == AUX_FILES_KEY
|
||||
|| blob_meta.key.is_aux_file_key()
|
||||
{
|
||||
// The size of values for these keys is unbounded and can
|
||||
// grow very large in pathological cases.
|
||||
None
|
||||
|
||||
@@ -49,8 +49,8 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hex;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::config::MaxVectoredReadBytes;
|
||||
use pageserver_api::key::DBDIR_KEY;
|
||||
use pageserver_api::key::{Key, KEY_SIZE};
|
||||
use pageserver_api::key::{AUX_FILES_KEY, DBDIR_KEY};
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::shard::{ShardIdentity, TenantShardId};
|
||||
use pageserver_api::value::Value;
|
||||
@@ -591,7 +591,11 @@ impl ImageLayerInner {
|
||||
.as_slice()
|
||||
.iter()
|
||||
.filter_map(|(_, blob_meta)| {
|
||||
if blob_meta.key.is_rel_dir_key() || blob_meta.key == DBDIR_KEY {
|
||||
if blob_meta.key.is_rel_dir_key()
|
||||
|| blob_meta.key == DBDIR_KEY
|
||||
|| blob_meta.key == AUX_FILES_KEY
|
||||
|| blob_meta.key.is_aux_file_key()
|
||||
{
|
||||
// The size of values for these keys is unbounded and can
|
||||
// grow very large in pathological cases.
|
||||
None
|
||||
|
||||
@@ -471,14 +471,14 @@ async fn ingest_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken
|
||||
|
||||
// TODO: rename the background loop kind to something more generic, like, tenant housekeeping.
|
||||
// Or just spawn another background loop for this throttle, it's not like it's super costly.
|
||||
info_span!(parent: None, "timeline_get_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| {
|
||||
info_span!(parent: None, "pagestream_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| {
|
||||
let now = Instant::now();
|
||||
let prev = std::mem::replace(&mut last_throttle_flag_reset_at, now);
|
||||
let Stats { count_accounted_start, count_accounted_finish, count_throttled, sum_throttled_usecs} = tenant.timeline_get_throttle.reset_stats();
|
||||
let Stats { count_accounted_start, count_accounted_finish, count_throttled, sum_throttled_usecs} = tenant.pagestream_throttle.reset_stats();
|
||||
if count_throttled == 0 {
|
||||
return;
|
||||
}
|
||||
let allowed_rps = tenant.timeline_get_throttle.steady_rps();
|
||||
let allowed_rps = tenant.pagestream_throttle.steady_rps();
|
||||
let delta = now - prev;
|
||||
info!(
|
||||
n_seconds=%format_args!("{:.3}", delta.as_secs_f64()),
|
||||
|
||||
@@ -1,19 +1,14 @@
|
||||
use std::{
|
||||
str::FromStr,
|
||||
sync::{
|
||||
atomic::{AtomicU64, Ordering},
|
||||
Arc, Mutex,
|
||||
Arc,
|
||||
},
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use enumset::EnumSet;
|
||||
use tracing::{error, warn};
|
||||
use utils::leaky_bucket::{LeakyBucketConfig, RateLimiter};
|
||||
|
||||
use crate::{context::RequestContext, task_mgr::TaskKind};
|
||||
|
||||
/// Throttle for `async` functions.
|
||||
///
|
||||
/// Runtime reconfigurable.
|
||||
@@ -35,7 +30,7 @@ pub struct Throttle<M: Metric> {
|
||||
}
|
||||
|
||||
pub struct Inner {
|
||||
task_kinds: EnumSet<TaskKind>,
|
||||
enabled: bool,
|
||||
rate_limiter: Arc<RateLimiter>,
|
||||
}
|
||||
|
||||
@@ -79,26 +74,12 @@ where
|
||||
}
|
||||
fn new_inner(config: Config) -> Inner {
|
||||
let Config {
|
||||
task_kinds,
|
||||
enabled,
|
||||
initial,
|
||||
refill_interval,
|
||||
refill_amount,
|
||||
max,
|
||||
} = config;
|
||||
let task_kinds: EnumSet<TaskKind> = task_kinds
|
||||
.iter()
|
||||
.filter_map(|s| match TaskKind::from_str(s) {
|
||||
Ok(v) => Some(v),
|
||||
Err(e) => {
|
||||
// TODO: avoid this failure mode
|
||||
error!(
|
||||
"cannot parse task kind, ignoring for rate limiting {}",
|
||||
utils::error::report_compact_sources(&e)
|
||||
);
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// steady rate, we expect `refill_amount` requests per `refill_interval`.
|
||||
// dividing gives us the rps.
|
||||
@@ -112,7 +93,7 @@ where
|
||||
let rate_limiter = RateLimiter::with_initial_tokens(config, f64::from(initial_tokens));
|
||||
|
||||
Inner {
|
||||
task_kinds,
|
||||
enabled: enabled.is_enabled(),
|
||||
rate_limiter: Arc::new(rate_limiter),
|
||||
}
|
||||
}
|
||||
@@ -141,11 +122,13 @@ where
|
||||
self.inner.load().rate_limiter.steady_rps()
|
||||
}
|
||||
|
||||
pub async fn throttle(&self, ctx: &RequestContext, key_count: usize) -> Option<Duration> {
|
||||
pub async fn throttle(&self, key_count: usize) -> Option<Duration> {
|
||||
let inner = self.inner.load_full(); // clones the `Inner` Arc
|
||||
if !inner.task_kinds.contains(ctx.task_kind()) {
|
||||
|
||||
if !inner.enabled {
|
||||
return None;
|
||||
};
|
||||
}
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
self.metric.accounting_start();
|
||||
@@ -162,19 +145,6 @@ where
|
||||
.fetch_add(wait_time.as_micros() as u64, Ordering::Relaxed);
|
||||
let observation = Observation { wait_time };
|
||||
self.metric.observe_throttling(&observation);
|
||||
match ctx.micros_spent_throttled.add(wait_time) {
|
||||
Ok(res) => res,
|
||||
Err(error) => {
|
||||
use once_cell::sync::Lazy;
|
||||
use utils::rate_limit::RateLimit;
|
||||
static WARN_RATE_LIMIT: Lazy<Mutex<RateLimit>> =
|
||||
Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
|
||||
let mut guard = WARN_RATE_LIMIT.lock().unwrap();
|
||||
guard.call(move || {
|
||||
warn!(error, "error adding time spent throttled; this message is logged at a global rate limit");
|
||||
});
|
||||
}
|
||||
}
|
||||
Some(wait_time)
|
||||
} else {
|
||||
None
|
||||
|
||||
@@ -208,8 +208,8 @@ fn drop_wlock<T>(rlock: tokio::sync::RwLockWriteGuard<'_, T>) {
|
||||
/// The outward-facing resources required to build a Timeline
|
||||
pub struct TimelineResources {
|
||||
pub remote_client: RemoteTimelineClient,
|
||||
pub timeline_get_throttle:
|
||||
Arc<crate::tenant::throttle::Throttle<crate::metrics::tenant_throttling::TimelineGet>>,
|
||||
pub pagestream_throttle:
|
||||
Arc<crate::tenant::throttle::Throttle<crate::metrics::tenant_throttling::Pagestream>>,
|
||||
pub l0_flush_global_state: l0_flush::L0FlushGlobalState,
|
||||
}
|
||||
|
||||
@@ -411,9 +411,9 @@ pub struct Timeline {
|
||||
/// Timeline deletion will acquire both compaction and gc locks in whatever order.
|
||||
gc_lock: tokio::sync::Mutex<()>,
|
||||
|
||||
/// Cloned from [`super::Tenant::timeline_get_throttle`] on construction.
|
||||
timeline_get_throttle:
|
||||
Arc<crate::tenant::throttle::Throttle<crate::metrics::tenant_throttling::TimelineGet>>,
|
||||
/// Cloned from [`super::Tenant::pagestream_throttle`] on construction.
|
||||
pub(crate) pagestream_throttle:
|
||||
Arc<crate::tenant::throttle::Throttle<crate::metrics::tenant_throttling::Pagestream>>,
|
||||
|
||||
/// Size estimator for aux file v2
|
||||
pub(crate) aux_file_size_estimator: AuxFileSizeEstimator,
|
||||
@@ -949,7 +949,7 @@ impl Timeline {
|
||||
/// If a remote layer file is needed, it is downloaded as part of this
|
||||
/// call.
|
||||
///
|
||||
/// This method enforces [`Self::timeline_get_throttle`] internally.
|
||||
/// This method enforces [`Self::pagestream_throttle`] internally.
|
||||
///
|
||||
/// NOTE: It is considered an error to 'get' a key that doesn't exist. The
|
||||
/// abstraction above this needs to store suitable metadata to track what
|
||||
@@ -977,8 +977,6 @@ impl Timeline {
|
||||
// page_service.
|
||||
debug_assert!(!self.shard_identity.is_key_disposable(&key));
|
||||
|
||||
self.timeline_get_throttle.throttle(ctx, 1).await;
|
||||
|
||||
let keyspace = KeySpace {
|
||||
ranges: vec![key..key.next()],
|
||||
};
|
||||
@@ -1058,13 +1056,6 @@ impl Timeline {
|
||||
.for_task_kind(ctx.task_kind())
|
||||
.map(|metric| (metric, Instant::now()));
|
||||
|
||||
// start counting after throttle so that throttle time
|
||||
// is always less than observation time
|
||||
let throttled = self
|
||||
.timeline_get_throttle
|
||||
.throttle(ctx, key_count as usize)
|
||||
.await;
|
||||
|
||||
let res = self
|
||||
.get_vectored_impl(
|
||||
keyspace.clone(),
|
||||
@@ -1076,23 +1067,7 @@ impl Timeline {
|
||||
|
||||
if let Some((metric, start)) = start {
|
||||
let elapsed = start.elapsed();
|
||||
let ex_throttled = if let Some(throttled) = throttled {
|
||||
elapsed.checked_sub(throttled)
|
||||
} else {
|
||||
Some(elapsed)
|
||||
};
|
||||
|
||||
if let Some(ex_throttled) = ex_throttled {
|
||||
metric.observe(ex_throttled.as_secs_f64());
|
||||
} else {
|
||||
use utils::rate_limit::RateLimit;
|
||||
static LOGGED: Lazy<Mutex<RateLimit>> =
|
||||
Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
|
||||
let mut rate_limit = LOGGED.lock().unwrap();
|
||||
rate_limit.call(|| {
|
||||
warn!("error deducting time spent throttled; this message is logged at a global rate limit");
|
||||
});
|
||||
}
|
||||
metric.observe(elapsed.as_secs_f64());
|
||||
}
|
||||
|
||||
res
|
||||
@@ -1137,14 +1112,6 @@ impl Timeline {
|
||||
.for_task_kind(ctx.task_kind())
|
||||
.map(ScanLatencyOngoingRecording::start_recording);
|
||||
|
||||
// start counting after throttle so that throttle time
|
||||
// is always less than observation time
|
||||
let throttled = self
|
||||
.timeline_get_throttle
|
||||
// assume scan = 1 quota for now until we find a better way to process this
|
||||
.throttle(ctx, 1)
|
||||
.await;
|
||||
|
||||
let vectored_res = self
|
||||
.get_vectored_impl(
|
||||
keyspace.clone(),
|
||||
@@ -1155,7 +1122,7 @@ impl Timeline {
|
||||
.await;
|
||||
|
||||
if let Some(recording) = start {
|
||||
recording.observe(throttled);
|
||||
recording.observe();
|
||||
}
|
||||
|
||||
vectored_res
|
||||
@@ -2371,7 +2338,7 @@ impl Timeline {
|
||||
|
||||
standby_horizon: AtomicLsn::new(0),
|
||||
|
||||
timeline_get_throttle: resources.timeline_get_throttle,
|
||||
pagestream_throttle: resources.pagestream_throttle,
|
||||
|
||||
aux_file_size_estimator: AuxFileSizeEstimator::new(aux_file_metrics),
|
||||
|
||||
@@ -4272,10 +4239,12 @@ impl Timeline {
|
||||
// Normal path: we have written some data into the new image layer for this
|
||||
// partition, so flush it to disk.
|
||||
let (desc, path) = image_layer_writer.finish(ctx).await?;
|
||||
let file_size = desc.file_size;
|
||||
let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?;
|
||||
info!(
|
||||
"created image layer for metadata {}",
|
||||
image_layer.local_path()
|
||||
"created image layer for metadata {} size {}",
|
||||
image_layer.local_path(),
|
||||
file_size,
|
||||
);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
image: Some(image_layer),
|
||||
|
||||
@@ -298,7 +298,7 @@ impl DeleteTimelineFlow {
|
||||
None, // Ancestor is not needed for deletion.
|
||||
TimelineResources {
|
||||
remote_client,
|
||||
timeline_get_throttle: tenant.timeline_get_throttle.clone(),
|
||||
pagestream_throttle: tenant.pagestream_throttle.clone(),
|
||||
l0_flush_global_state: tenant.l0_flush_global_state.clone(),
|
||||
},
|
||||
// Important. We dont pass ancestor above because it can be missing.
|
||||
|
||||
@@ -129,22 +129,23 @@ impl Flow {
|
||||
}
|
||||
|
||||
// Import SLRUs
|
||||
|
||||
// pg_xact (01:00 keyspace)
|
||||
self.import_slru(SlruKind::Clog, &self.storage.pgdata().join("pg_xact"))
|
||||
if self.timeline.tenant_shard_id.is_shard_zero() {
|
||||
// pg_xact (01:00 keyspace)
|
||||
self.import_slru(SlruKind::Clog, &self.storage.pgdata().join("pg_xact"))
|
||||
.await?;
|
||||
// pg_multixact/members (01:01 keyspace)
|
||||
self.import_slru(
|
||||
SlruKind::MultiXactMembers,
|
||||
&self.storage.pgdata().join("pg_multixact/members"),
|
||||
)
|
||||
.await?;
|
||||
// pg_multixact/members (01:01 keyspace)
|
||||
self.import_slru(
|
||||
SlruKind::MultiXactMembers,
|
||||
&self.storage.pgdata().join("pg_multixact/members"),
|
||||
)
|
||||
.await?;
|
||||
// pg_multixact/offsets (01:02 keyspace)
|
||||
self.import_slru(
|
||||
SlruKind::MultiXactOffsets,
|
||||
&self.storage.pgdata().join("pg_multixact/offsets"),
|
||||
)
|
||||
.await?;
|
||||
// pg_multixact/offsets (01:02 keyspace)
|
||||
self.import_slru(
|
||||
SlruKind::MultiXactOffsets,
|
||||
&self.storage.pgdata().join("pg_multixact/offsets"),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Import pg_twophase.
|
||||
// TODO: as empty
|
||||
@@ -302,6 +303,8 @@ impl Flow {
|
||||
}
|
||||
|
||||
async fn import_slru(&mut self, kind: SlruKind, path: &RemotePath) -> anyhow::Result<()> {
|
||||
assert!(self.timeline.tenant_shard_id.is_shard_zero());
|
||||
|
||||
let segments = self.storage.listfilesindir(path).await?;
|
||||
let segments: Vec<(String, u32, usize)> = segments
|
||||
.into_iter()
|
||||
@@ -337,7 +340,6 @@ impl Flow {
|
||||
debug!(%p, segno=%segno, %size, %start_key, %end_key, "scheduling SLRU segment");
|
||||
self.tasks
|
||||
.push(AnyImportTask::SlruBlocks(ImportSlruBlocksTask::new(
|
||||
*self.timeline.get_shard_identity(),
|
||||
start_key..end_key,
|
||||
&p,
|
||||
self.storage.clone(),
|
||||
@@ -631,21 +633,14 @@ impl ImportTask for ImportRelBlocksTask {
|
||||
}
|
||||
|
||||
struct ImportSlruBlocksTask {
|
||||
shard_identity: ShardIdentity,
|
||||
key_range: Range<Key>,
|
||||
path: RemotePath,
|
||||
storage: RemoteStorageWrapper,
|
||||
}
|
||||
|
||||
impl ImportSlruBlocksTask {
|
||||
fn new(
|
||||
shard_identity: ShardIdentity,
|
||||
key_range: Range<Key>,
|
||||
path: &RemotePath,
|
||||
storage: RemoteStorageWrapper,
|
||||
) -> Self {
|
||||
fn new(key_range: Range<Key>, path: &RemotePath, storage: RemoteStorageWrapper) -> Self {
|
||||
ImportSlruBlocksTask {
|
||||
shard_identity,
|
||||
key_range,
|
||||
path: path.clone(),
|
||||
storage,
|
||||
@@ -673,17 +668,13 @@ impl ImportTask for ImportSlruBlocksTask {
|
||||
let mut file_offset = 0;
|
||||
while blknum < end_blk {
|
||||
let key = slru_block_to_key(kind, segno, blknum);
|
||||
assert!(
|
||||
!self.shard_identity.is_key_disposable(&key),
|
||||
"SLRU keys need to go into every shard"
|
||||
);
|
||||
let buf = &buf[file_offset..(file_offset + 8192)];
|
||||
file_offset += 8192;
|
||||
layer_writer
|
||||
.put_image(key, Bytes::copy_from_slice(buf), ctx)
|
||||
.await?;
|
||||
blknum += 1;
|
||||
nimages += 1;
|
||||
blknum += 1;
|
||||
}
|
||||
Ok(nimages)
|
||||
}
|
||||
|
||||
@@ -1187,6 +1187,50 @@ impl WalIngest {
|
||||
} else {
|
||||
cp.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
|
||||
}
|
||||
// NB: We abuse the Checkpoint.redo field:
|
||||
//
|
||||
// - In PostgreSQL, the Checkpoint struct doesn't store the information
|
||||
// of whether this is an online checkpoint or a shutdown checkpoint. It's
|
||||
// stored in the XLOG info field of the WAL record, shutdown checkpoints
|
||||
// use record type XLOG_CHECKPOINT_SHUTDOWN and online checkpoints use
|
||||
// XLOG_CHECKPOINT_ONLINE. We don't store the original WAL record headers
|
||||
// in the pageserver, however.
|
||||
//
|
||||
// - In PostgreSQL, the Checkpoint.redo field stores the *start* of the
|
||||
// checkpoint record, if it's a shutdown checkpoint. But when we are
|
||||
// starting from a shutdown checkpoint, the basebackup LSN is the *end*
|
||||
// of the shutdown checkpoint WAL record. That makes it difficult to
|
||||
// correctly detect whether we're starting from a shutdown record or
|
||||
// not.
|
||||
//
|
||||
// To address both of those issues, we store 0 in the redo field if it's
|
||||
// an online checkpoint record, and the record's *end* LSN if it's a
|
||||
// shutdown checkpoint. We don't need the original redo pointer in neon,
|
||||
// because we don't perform WAL replay at startup anyway, so we can get
|
||||
// away with abusing the redo field like this.
|
||||
//
|
||||
// XXX: Ideally, we would persist the extra information in a more
|
||||
// explicit format, rather than repurpose the fields of the Postgres
|
||||
// struct like this. However, we already have persisted data like this,
|
||||
// so we need to maintain backwards compatibility.
|
||||
//
|
||||
// NB: We didn't originally have this convention, so there are still old
|
||||
// persisted records that didn't do this. Before, we didn't update the
|
||||
// persisted redo field at all. That means that old records have a bogus
|
||||
// redo pointer that points to some old value, from the checkpoint record
|
||||
// that was originally imported from the data directory. If it was a
|
||||
// project created in Neon, that means it points to the first checkpoint
|
||||
// after initdb. That's OK for our purposes: all such old checkpoints are
|
||||
// treated as old online checkpoints when the basebackup is created.
|
||||
cp.redo = if info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN {
|
||||
// Store the *end* LSN of the checkpoint record. Or to be precise,
|
||||
// the start LSN of the *next* record, i.e. if the record ends
|
||||
// exactly at page boundary, the redo LSN points to just after the
|
||||
// page header on the next page.
|
||||
lsn.into()
|
||||
} else {
|
||||
Lsn::INVALID.into()
|
||||
};
|
||||
|
||||
// Write a new checkpoint key-value pair on every checkpoint record, even
|
||||
// if nothing really changed. Not strictly required, but it seems nice to
|
||||
@@ -1392,6 +1436,10 @@ impl WalIngest {
|
||||
img: Bytes,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<()> {
|
||||
if !self.shard.is_shard_zero() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.handle_slru_extend(modification, kind, segno, blknum, ctx)
|
||||
.await?;
|
||||
modification.put_slru_page_image(kind, segno, blknum, img)?;
|
||||
|
||||
@@ -6,7 +6,7 @@ license.workspace = true
|
||||
|
||||
[features]
|
||||
default = []
|
||||
testing = []
|
||||
testing = ["dep:tokio-postgres"]
|
||||
|
||||
[dependencies]
|
||||
ahash.workspace = true
|
||||
@@ -55,6 +55,7 @@ parquet.workspace = true
|
||||
parquet_derive.workspace = true
|
||||
pin-project-lite.workspace = true
|
||||
postgres_backend.workspace = true
|
||||
postgres-client = { package = "tokio-postgres2", path = "../libs/proxy/tokio-postgres2" }
|
||||
postgres-protocol = { package = "postgres-protocol2", path = "../libs/proxy/postgres-protocol2" }
|
||||
pq_proto.workspace = true
|
||||
prometheus.workspace = true
|
||||
@@ -81,7 +82,7 @@ subtle.workspace = true
|
||||
thiserror.workspace = true
|
||||
tikv-jemallocator.workspace = true
|
||||
tikv-jemalloc-ctl = { workspace = true, features = ["use_std"] }
|
||||
tokio-postgres = { package = "tokio-postgres2", path = "../libs/proxy/tokio-postgres2" }
|
||||
tokio-postgres = { workspace = true, optional = true }
|
||||
tokio-rustls.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tokio = { workspace = true, features = ["signal"] }
|
||||
@@ -119,3 +120,4 @@ rcgen.workspace = true
|
||||
rstest.workspace = true
|
||||
walkdir.workspace = true
|
||||
rand_distr = "0.4"
|
||||
tokio-postgres.workspace = true
|
||||
|
||||
@@ -66,7 +66,7 @@ pub(super) async fn authenticate(
|
||||
|
||||
Ok(ComputeCredentials {
|
||||
info: creds,
|
||||
keys: ComputeCredentialKeys::AuthKeys(tokio_postgres::config::AuthKeys::ScramSha256(
|
||||
keys: ComputeCredentialKeys::AuthKeys(postgres_client::config::AuthKeys::ScramSha256(
|
||||
scram_keys,
|
||||
)),
|
||||
})
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use async_trait::async_trait;
|
||||
use postgres_client::config::SslMode;
|
||||
use pq_proto::BeMessage as Be;
|
||||
use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_postgres::config::SslMode;
|
||||
use tracing::{info, info_span};
|
||||
|
||||
use super::ComputeCredentialKeys;
|
||||
@@ -49,13 +49,19 @@ impl ReportableError for ConsoleRedirectError {
|
||||
}
|
||||
}
|
||||
|
||||
fn hello_message(redirect_uri: &reqwest::Url, session_id: &str) -> String {
|
||||
fn hello_message(
|
||||
redirect_uri: &reqwest::Url,
|
||||
session_id: &str,
|
||||
duration: std::time::Duration,
|
||||
) -> String {
|
||||
let formatted_duration = humantime::format_duration(duration).to_string();
|
||||
format!(
|
||||
concat![
|
||||
"Welcome to Neon!\n",
|
||||
"Authenticate by visiting:\n",
|
||||
"Authenticate by visiting (will expire in {duration}):\n",
|
||||
" {redirect_uri}{session_id}\n\n",
|
||||
],
|
||||
duration = formatted_duration,
|
||||
redirect_uri = redirect_uri,
|
||||
session_id = session_id,
|
||||
)
|
||||
@@ -118,7 +124,11 @@ async fn authenticate(
|
||||
};
|
||||
|
||||
let span = info_span!("console_redirect", psql_session_id = &psql_session_id);
|
||||
let greeting = hello_message(link_uri, &psql_session_id);
|
||||
let greeting = hello_message(
|
||||
link_uri,
|
||||
&psql_session_id,
|
||||
auth_config.console_redirect_confirmation_timeout,
|
||||
);
|
||||
|
||||
// Give user a URL to spawn a new database.
|
||||
info!(parent: &span, "sending the auth URL to the user");
|
||||
@@ -151,12 +161,8 @@ async fn authenticate(
|
||||
|
||||
// This config should be self-contained, because we won't
|
||||
// take username or dbname from client's startup message.
|
||||
let mut config = compute::ConnCfg::new();
|
||||
config
|
||||
.host(&db_info.host)
|
||||
.port(db_info.port)
|
||||
.dbname(&db_info.dbname)
|
||||
.user(&db_info.user);
|
||||
let mut config = compute::ConnCfg::new(db_info.host.to_string(), db_info.port);
|
||||
config.dbname(&db_info.dbname).user(&db_info.user);
|
||||
|
||||
ctx.set_dbname(db_info.dbname.into());
|
||||
ctx.set_user(db_info.user.into());
|
||||
|
||||
@@ -350,6 +350,13 @@ impl JwkCacheEntryLock {
|
||||
let header = base64::decode_config(header, base64::URL_SAFE_NO_PAD)?;
|
||||
let header = serde_json::from_slice::<JwtHeader<'_>>(&header)?;
|
||||
|
||||
let payloadb = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)?;
|
||||
let payload = serde_json::from_slice::<JwtPayload<'_>>(&payloadb)?;
|
||||
|
||||
if let Some(iss) = &payload.issuer {
|
||||
ctx.set_jwt_issuer(iss.as_ref().to_owned());
|
||||
}
|
||||
|
||||
let sig = base64::decode_config(signature, base64::URL_SAFE_NO_PAD)?;
|
||||
|
||||
let kid = header.key_id.ok_or(JwtError::MissingKeyId)?;
|
||||
@@ -388,9 +395,6 @@ impl JwkCacheEntryLock {
|
||||
key => return Err(JwtError::UnsupportedKeyType(key.into())),
|
||||
};
|
||||
|
||||
let payloadb = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)?;
|
||||
let payload = serde_json::from_slice::<JwtPayload<'_>>(&payloadb)?;
|
||||
|
||||
tracing::debug!(?payload, "JWT signature valid with claims");
|
||||
|
||||
if let Some(aud) = expected_audience {
|
||||
|
||||
@@ -29,12 +29,7 @@ impl LocalBackend {
|
||||
api: http::Endpoint::new(compute_ctl, http::new_client()),
|
||||
},
|
||||
node_info: NodeInfo {
|
||||
config: {
|
||||
let mut cfg = ConnCfg::new();
|
||||
cfg.host(&postgres_addr.ip().to_string());
|
||||
cfg.port(postgres_addr.port());
|
||||
cfg
|
||||
},
|
||||
config: ConnCfg::new(postgres_addr.ip().to_string(), postgres_addr.port()),
|
||||
// TODO(conrad): make this better reflect compute info rather than endpoint info.
|
||||
aux: MetricsAuxInfo {
|
||||
endpoint_id: EndpointIdTag::get_interner().get_or_intern("local"),
|
||||
|
||||
@@ -11,8 +11,8 @@ pub use console_redirect::ConsoleRedirectBackend;
|
||||
pub(crate) use console_redirect::ConsoleRedirectError;
|
||||
use ipnet::{Ipv4Net, Ipv6Net};
|
||||
use local::LocalBackend;
|
||||
use postgres_client::config::AuthKeys;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_postgres::config::AuthKeys;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::auth::credentials::check_peer_addr_is_in_list;
|
||||
|
||||
@@ -227,7 +227,7 @@ pub(crate) async fn validate_password_and_exchange(
|
||||
};
|
||||
|
||||
Ok(sasl::Outcome::Success(ComputeCredentialKeys::AuthKeys(
|
||||
tokio_postgres::config::AuthKeys::ScramSha256(keys),
|
||||
postgres_client::config::AuthKeys::ScramSha256(keys),
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,14 +3,6 @@ use std::pin::pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::bail;
|
||||
use aws_config::environment::EnvironmentVariableCredentialsProvider;
|
||||
use aws_config::imds::credentials::ImdsCredentialsProvider;
|
||||
use aws_config::meta::credentials::CredentialsProviderChain;
|
||||
use aws_config::meta::region::RegionProviderChain;
|
||||
use aws_config::profile::ProfileFileCredentialsProvider;
|
||||
use aws_config::provider_config::ProviderConfig;
|
||||
use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;
|
||||
use aws_config::Region;
|
||||
use futures::future::Either;
|
||||
use proxy::auth::backend::jwt::JwkCache;
|
||||
use proxy::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned};
|
||||
@@ -314,39 +306,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
};
|
||||
info!("Using region: {}", args.aws_region);
|
||||
|
||||
let region_provider =
|
||||
RegionProviderChain::default_provider().or_else(Region::new(args.aws_region.clone()));
|
||||
let provider_conf =
|
||||
ProviderConfig::without_region().with_region(region_provider.region().await);
|
||||
let aws_credentials_provider = {
|
||||
// uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
|
||||
CredentialsProviderChain::first_try("env", EnvironmentVariableCredentialsProvider::new())
|
||||
// uses "AWS_PROFILE" / `aws sso login --profile <profile>`
|
||||
.or_else(
|
||||
"profile-sso",
|
||||
ProfileFileCredentialsProvider::builder()
|
||||
.configure(&provider_conf)
|
||||
.build(),
|
||||
)
|
||||
// uses "AWS_WEB_IDENTITY_TOKEN_FILE", "AWS_ROLE_ARN", "AWS_ROLE_SESSION_NAME"
|
||||
// needed to access remote extensions bucket
|
||||
.or_else(
|
||||
"token",
|
||||
WebIdentityTokenCredentialsProvider::builder()
|
||||
.configure(&provider_conf)
|
||||
.build(),
|
||||
)
|
||||
// uses imds v2
|
||||
.or_else("imds", ImdsCredentialsProvider::builder().build())
|
||||
};
|
||||
let elasticache_credentials_provider = Arc::new(elasticache::CredentialsProvider::new(
|
||||
elasticache::AWSIRSAConfig::new(
|
||||
args.aws_region.clone(),
|
||||
args.redis_cluster_name,
|
||||
args.redis_user_id,
|
||||
),
|
||||
aws_credentials_provider,
|
||||
));
|
||||
// TODO: untangle the config args
|
||||
let regional_redis_client = match (args.redis_auth_type.as_str(), &args.redis_notifications) {
|
||||
("plain", redis_url) => match redis_url {
|
||||
None => {
|
||||
@@ -361,7 +321,12 @@ async fn main() -> anyhow::Result<()> {
|
||||
ConnectionWithCredentialsProvider::new_with_credentials_provider(
|
||||
host.to_string(),
|
||||
port,
|
||||
elasticache_credentials_provider.clone(),
|
||||
elasticache::CredentialsProvider::new(
|
||||
args.aws_region,
|
||||
args.redis_cluster_name,
|
||||
args.redis_user_id,
|
||||
)
|
||||
.await,
|
||||
),
|
||||
),
|
||||
(None, None) => {
|
||||
|
||||
@@ -3,11 +3,11 @@ use std::sync::Arc;
|
||||
|
||||
use dashmap::DashMap;
|
||||
use ipnet::{IpNet, Ipv4Net, Ipv6Net};
|
||||
use postgres_client::{CancelToken, NoTls};
|
||||
use pq_proto::CancelKeyData;
|
||||
use thiserror::Error;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio_postgres::{CancelToken, NoTls};
|
||||
use tracing::{debug, info};
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -44,7 +44,7 @@ pub(crate) enum CancelError {
|
||||
IO(#[from] std::io::Error),
|
||||
|
||||
#[error("{0}")]
|
||||
Postgres(#[from] tokio_postgres::Error),
|
||||
Postgres(#[from] postgres_client::Error),
|
||||
|
||||
#[error("rate limit exceeded")]
|
||||
RateLimit,
|
||||
@@ -70,7 +70,7 @@ impl ReportableError for CancelError {
|
||||
impl<P: CancellationPublisher> CancellationHandler<P> {
|
||||
/// Run async action within an ephemeral session identified by [`CancelKeyData`].
|
||||
pub(crate) fn get_session(self: Arc<Self>) -> Session<P> {
|
||||
// HACK: We'd rather get the real backend_pid but tokio_postgres doesn't
|
||||
// HACK: We'd rather get the real backend_pid but postgres_client doesn't
|
||||
// expose it and we don't want to do another roundtrip to query
|
||||
// for it. The client will be able to notice that this is not the
|
||||
// actual backend_pid, but backend_pid is not used for anything
|
||||
|
||||
@@ -6,13 +6,15 @@ use std::time::Duration;
|
||||
use futures::{FutureExt, TryFutureExt};
|
||||
use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
use postgres_client::tls::MakeTlsConnect;
|
||||
use postgres_client::{CancelToken, RawConnection};
|
||||
use postgres_protocol::message::backend::NoticeResponseBody;
|
||||
use pq_proto::StartupMessageParams;
|
||||
use rustls::client::danger::ServerCertVerifier;
|
||||
use rustls::crypto::ring;
|
||||
use rustls::pki_types::InvalidDnsNameError;
|
||||
use thiserror::Error;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio_postgres::tls::MakeTlsConnect;
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::auth::parse_endpoint_param;
|
||||
@@ -32,9 +34,9 @@ pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
|
||||
#[derive(Debug, Error)]
|
||||
pub(crate) enum ConnectionError {
|
||||
/// This error doesn't seem to reveal any secrets; for instance,
|
||||
/// `tokio_postgres::error::Kind` doesn't contain ip addresses and such.
|
||||
/// `postgres_client::error::Kind` doesn't contain ip addresses and such.
|
||||
#[error("{COULD_NOT_CONNECT}: {0}")]
|
||||
Postgres(#[from] tokio_postgres::Error),
|
||||
Postgres(#[from] postgres_client::Error),
|
||||
|
||||
#[error("{COULD_NOT_CONNECT}: {0}")]
|
||||
CouldNotConnect(#[from] io::Error),
|
||||
@@ -97,18 +99,18 @@ impl ReportableError for ConnectionError {
|
||||
}
|
||||
|
||||
/// A pair of `ClientKey` & `ServerKey` for `SCRAM-SHA-256`.
|
||||
pub(crate) type ScramKeys = tokio_postgres::config::ScramKeys<32>;
|
||||
pub(crate) type ScramKeys = postgres_client::config::ScramKeys<32>;
|
||||
|
||||
/// A config for establishing a connection to compute node.
|
||||
/// Eventually, `tokio_postgres` will be replaced with something better.
|
||||
/// Eventually, `postgres_client` will be replaced with something better.
|
||||
/// Newtype allows us to implement methods on top of it.
|
||||
#[derive(Clone, Default)]
|
||||
pub(crate) struct ConnCfg(Box<tokio_postgres::Config>);
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct ConnCfg(Box<postgres_client::Config>);
|
||||
|
||||
/// Creation and initialization routines.
|
||||
impl ConnCfg {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self::default()
|
||||
pub(crate) fn new(host: String, port: u16) -> Self {
|
||||
Self(Box::new(postgres_client::Config::new(host, port)))
|
||||
}
|
||||
|
||||
/// Reuse password or auth keys from the other config.
|
||||
@@ -122,13 +124,9 @@ impl ConnCfg {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_host(&self) -> Result<Host, WakeComputeError> {
|
||||
match self.0.get_hosts() {
|
||||
[tokio_postgres::config::Host::Tcp(s)] => Ok(s.into()),
|
||||
// we should not have multiple address or unix addresses.
|
||||
_ => Err(WakeComputeError::BadComputeAddress(
|
||||
"invalid compute address".into(),
|
||||
)),
|
||||
pub(crate) fn get_host(&self) -> Host {
|
||||
match self.0.get_host() {
|
||||
postgres_client::config::Host::Tcp(s) => s.into(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -158,7 +156,7 @@ impl ConnCfg {
|
||||
|
||||
// TODO: This is especially ugly...
|
||||
if let Some(replication) = params.get("replication") {
|
||||
use tokio_postgres::config::ReplicationMode;
|
||||
use postgres_client::config::ReplicationMode;
|
||||
match replication {
|
||||
"true" | "on" | "yes" | "1" => {
|
||||
self.replication_mode(ReplicationMode::Physical);
|
||||
@@ -180,7 +178,7 @@ impl ConnCfg {
|
||||
}
|
||||
|
||||
impl std::ops::Deref for ConnCfg {
|
||||
type Target = tokio_postgres::Config;
|
||||
type Target = postgres_client::Config;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
@@ -197,7 +195,7 @@ impl std::ops::DerefMut for ConnCfg {
|
||||
impl ConnCfg {
|
||||
/// Establish a raw TCP connection to the compute node.
|
||||
async fn connect_raw(&self, timeout: Duration) -> io::Result<(SocketAddr, TcpStream, &str)> {
|
||||
use tokio_postgres::config::Host;
|
||||
use postgres_client::config::Host;
|
||||
|
||||
// wrap TcpStream::connect with timeout
|
||||
let connect_with_timeout = |host, port| {
|
||||
@@ -222,46 +220,23 @@ impl ConnCfg {
|
||||
})
|
||||
};
|
||||
|
||||
// We can't reuse connection establishing logic from `tokio_postgres` here,
|
||||
// We can't reuse connection establishing logic from `postgres_client` here,
|
||||
// because it has no means for extracting the underlying socket which we
|
||||
// require for our business.
|
||||
let mut connection_error = None;
|
||||
let ports = self.0.get_ports();
|
||||
let hosts = self.0.get_hosts();
|
||||
// the ports array is supposed to have 0 entries, 1 entry, or as many entries as in the hosts array
|
||||
if ports.len() > 1 && ports.len() != hosts.len() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!(
|
||||
"bad compute config, \
|
||||
ports and hosts entries' count does not match: {:?}",
|
||||
self.0
|
||||
),
|
||||
));
|
||||
}
|
||||
let port = self.0.get_port();
|
||||
let host = self.0.get_host();
|
||||
|
||||
for (i, host) in hosts.iter().enumerate() {
|
||||
let port = ports.get(i).or_else(|| ports.first()).unwrap_or(&5432);
|
||||
let host = match host {
|
||||
Host::Tcp(host) => host.as_str(),
|
||||
};
|
||||
let host = match host {
|
||||
Host::Tcp(host) => host.as_str(),
|
||||
};
|
||||
|
||||
match connect_once(host, *port).await {
|
||||
Ok((sockaddr, stream)) => return Ok((sockaddr, stream, host)),
|
||||
Err(err) => {
|
||||
// We can't throw an error here, as there might be more hosts to try.
|
||||
warn!("couldn't connect to compute node at {host}:{port}: {err}");
|
||||
connection_error = Some(err);
|
||||
}
|
||||
match connect_once(host, port).await {
|
||||
Ok((sockaddr, stream)) => Ok((sockaddr, stream, host)),
|
||||
Err(err) => {
|
||||
warn!("couldn't connect to compute node at {host}:{port}: {err}");
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
|
||||
Err(connection_error.unwrap_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("bad compute config: {:?}", self.0),
|
||||
)
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -270,13 +245,15 @@ type RustlsStream = <MakeRustlsConnect as MakeTlsConnect<tokio::net::TcpStream>>
|
||||
pub(crate) struct PostgresConnection {
|
||||
/// Socket connected to a compute node.
|
||||
pub(crate) stream:
|
||||
tokio_postgres::maybe_tls_stream::MaybeTlsStream<tokio::net::TcpStream, RustlsStream>,
|
||||
postgres_client::maybe_tls_stream::MaybeTlsStream<tokio::net::TcpStream, RustlsStream>,
|
||||
/// PostgreSQL connection parameters.
|
||||
pub(crate) params: std::collections::HashMap<String, String>,
|
||||
/// Query cancellation token.
|
||||
pub(crate) cancel_closure: CancelClosure,
|
||||
/// Labels for proxy's metrics.
|
||||
pub(crate) aux: MetricsAuxInfo,
|
||||
/// Notices received from compute after authenticating
|
||||
pub(crate) delayed_notice: Vec<NoticeResponseBody>,
|
||||
|
||||
_guage: NumDbConnectionsGuard<'static>,
|
||||
}
|
||||
@@ -322,10 +299,19 @@ impl ConnCfg {
|
||||
|
||||
// connect_raw() will not use TLS if sslmode is "disable"
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
||||
let (client, connection) = self.0.connect_raw(stream, tls).await?;
|
||||
let connection = self.0.connect_raw(stream, tls).await?;
|
||||
drop(pause);
|
||||
tracing::Span::current().record("pid", tracing::field::display(client.get_process_id()));
|
||||
let stream = connection.stream.into_inner();
|
||||
|
||||
let RawConnection {
|
||||
stream,
|
||||
parameters,
|
||||
delayed_notice,
|
||||
process_id,
|
||||
secret_key,
|
||||
} = connection;
|
||||
|
||||
tracing::Span::current().record("pid", tracing::field::display(process_id));
|
||||
let stream = stream.into_inner();
|
||||
|
||||
// TODO: lots of useful info but maybe we can move it elsewhere (eg traces?)
|
||||
info!(
|
||||
@@ -334,18 +320,23 @@ impl ConnCfg {
|
||||
self.0.get_ssl_mode()
|
||||
);
|
||||
|
||||
// This is very ugly but as of now there's no better way to
|
||||
// extract the connection parameters from tokio-postgres' connection.
|
||||
// TODO: solve this problem in a more elegant manner (e.g. the new library).
|
||||
let params = connection.parameters;
|
||||
|
||||
// NB: CancelToken is supposed to hold socket_addr, but we use connect_raw.
|
||||
// Yet another reason to rework the connection establishing code.
|
||||
let cancel_closure = CancelClosure::new(socket_addr, client.cancel_token(), vec![]);
|
||||
let cancel_closure = CancelClosure::new(
|
||||
socket_addr,
|
||||
CancelToken {
|
||||
socket_config: None,
|
||||
ssl_mode: self.0.get_ssl_mode(),
|
||||
process_id,
|
||||
secret_key,
|
||||
},
|
||||
vec![],
|
||||
);
|
||||
|
||||
let connection = PostgresConnection {
|
||||
stream,
|
||||
params,
|
||||
params: parameters,
|
||||
delayed_notice,
|
||||
cancel_closure,
|
||||
aux,
|
||||
_guage: Metrics::get().proxy.db_connections.guard(ctx.protocol()),
|
||||
|
||||
@@ -57,6 +57,7 @@ struct RequestContextInner {
|
||||
application: Option<SmolStr>,
|
||||
error_kind: Option<ErrorKind>,
|
||||
pub(crate) auth_method: Option<AuthMethod>,
|
||||
jwt_issuer: Option<String>,
|
||||
success: bool,
|
||||
pub(crate) cold_start_info: ColdStartInfo,
|
||||
pg_options: Option<StartupMessageParams>,
|
||||
@@ -79,6 +80,7 @@ pub(crate) enum AuthMethod {
|
||||
ScramSha256,
|
||||
ScramSha256Plus,
|
||||
Cleartext,
|
||||
Jwt,
|
||||
}
|
||||
|
||||
impl Clone for RequestContext {
|
||||
@@ -100,6 +102,7 @@ impl Clone for RequestContext {
|
||||
application: inner.application.clone(),
|
||||
error_kind: inner.error_kind,
|
||||
auth_method: inner.auth_method.clone(),
|
||||
jwt_issuer: inner.jwt_issuer.clone(),
|
||||
success: inner.success,
|
||||
rejected: inner.rejected,
|
||||
cold_start_info: inner.cold_start_info,
|
||||
@@ -148,6 +151,7 @@ impl RequestContext {
|
||||
application: None,
|
||||
error_kind: None,
|
||||
auth_method: None,
|
||||
jwt_issuer: None,
|
||||
success: false,
|
||||
rejected: None,
|
||||
cold_start_info: ColdStartInfo::Unknown,
|
||||
@@ -246,6 +250,11 @@ impl RequestContext {
|
||||
this.auth_method = Some(auth_method);
|
||||
}
|
||||
|
||||
pub(crate) fn set_jwt_issuer(&self, jwt_issuer: String) {
|
||||
let mut this = self.0.try_lock().expect("should not deadlock");
|
||||
this.jwt_issuer = Some(jwt_issuer);
|
||||
}
|
||||
|
||||
pub fn has_private_peer_addr(&self) -> bool {
|
||||
self.0
|
||||
.try_lock()
|
||||
|
||||
@@ -87,6 +87,8 @@ pub(crate) struct RequestData {
|
||||
branch: Option<String>,
|
||||
pg_options: Option<String>,
|
||||
auth_method: Option<&'static str>,
|
||||
jwt_issuer: Option<String>,
|
||||
|
||||
error: Option<&'static str>,
|
||||
/// Success is counted if we form a HTTP response with sql rows inside
|
||||
/// Or if we make it to proxy_pass
|
||||
@@ -138,7 +140,9 @@ impl From<&RequestContextInner> for RequestData {
|
||||
super::AuthMethod::ScramSha256 => "scram_sha_256",
|
||||
super::AuthMethod::ScramSha256Plus => "scram_sha_256_plus",
|
||||
super::AuthMethod::Cleartext => "cleartext",
|
||||
super::AuthMethod::Jwt => "jwt",
|
||||
}),
|
||||
jwt_issuer: value.jwt_issuer.clone(),
|
||||
protocol: value.protocol.as_str(),
|
||||
region: value.region,
|
||||
error: value.error_kind.as_ref().map(|e| e.to_metric_label()),
|
||||
@@ -519,6 +523,7 @@ mod tests {
|
||||
branch: Some(hex::encode(rng.gen::<[u8; 16]>())),
|
||||
pg_options: None,
|
||||
auth_method: None,
|
||||
jwt_issuer: None,
|
||||
protocol: ["tcp", "ws", "http"][rng.gen_range(0..3)],
|
||||
region: "us-east-1",
|
||||
error: None,
|
||||
@@ -599,15 +604,15 @@ mod tests {
|
||||
assert_eq!(
|
||||
file_stats,
|
||||
[
|
||||
(1312632, 3, 6000),
|
||||
(1312621, 3, 6000),
|
||||
(1312680, 3, 6000),
|
||||
(1312637, 3, 6000),
|
||||
(1312773, 3, 6000),
|
||||
(1312610, 3, 6000),
|
||||
(1312404, 3, 6000),
|
||||
(1312639, 3, 6000),
|
||||
(437848, 1, 2000)
|
||||
(1313105, 3, 6000),
|
||||
(1313094, 3, 6000),
|
||||
(1313153, 3, 6000),
|
||||
(1313110, 3, 6000),
|
||||
(1313246, 3, 6000),
|
||||
(1313083, 3, 6000),
|
||||
(1312877, 3, 6000),
|
||||
(1313112, 3, 6000),
|
||||
(438020, 1, 2000)
|
||||
]
|
||||
);
|
||||
|
||||
@@ -639,11 +644,11 @@ mod tests {
|
||||
assert_eq!(
|
||||
file_stats,
|
||||
[
|
||||
(1203465, 5, 10000),
|
||||
(1203189, 5, 10000),
|
||||
(1203490, 5, 10000),
|
||||
(1203475, 5, 10000),
|
||||
(1203729, 5, 10000)
|
||||
(1204324, 5, 10000),
|
||||
(1204048, 5, 10000),
|
||||
(1204349, 5, 10000),
|
||||
(1204334, 5, 10000),
|
||||
(1204588, 5, 10000)
|
||||
]
|
||||
);
|
||||
|
||||
@@ -668,15 +673,15 @@ mod tests {
|
||||
assert_eq!(
|
||||
file_stats,
|
||||
[
|
||||
(1312632, 3, 6000),
|
||||
(1312621, 3, 6000),
|
||||
(1312680, 3, 6000),
|
||||
(1312637, 3, 6000),
|
||||
(1312773, 3, 6000),
|
||||
(1312610, 3, 6000),
|
||||
(1312404, 3, 6000),
|
||||
(1312639, 3, 6000),
|
||||
(437848, 1, 2000)
|
||||
(1313105, 3, 6000),
|
||||
(1313094, 3, 6000),
|
||||
(1313153, 3, 6000),
|
||||
(1313110, 3, 6000),
|
||||
(1313246, 3, 6000),
|
||||
(1313083, 3, 6000),
|
||||
(1312877, 3, 6000),
|
||||
(1313112, 3, 6000),
|
||||
(438020, 1, 2000)
|
||||
]
|
||||
);
|
||||
|
||||
@@ -713,7 +718,7 @@ mod tests {
|
||||
// files are smaller than the size threshold, but they took too long to fill so were flushed early
|
||||
assert_eq!(
|
||||
file_stats,
|
||||
[(657696, 2, 3001), (657410, 2, 3000), (657206, 2, 2999)]
|
||||
[(658014, 2, 3001), (657728, 2, 3000), (657524, 2, 2999)]
|
||||
);
|
||||
|
||||
tmpdir.close().unwrap();
|
||||
|
||||
@@ -5,7 +5,6 @@ use std::sync::Arc;
|
||||
|
||||
use futures::TryFutureExt;
|
||||
use thiserror::Error;
|
||||
use tokio_postgres::config::SslMode;
|
||||
use tokio_postgres::Client;
|
||||
use tracing::{error, info, info_span, warn, Instrument};
|
||||
|
||||
@@ -161,11 +160,11 @@ impl MockControlPlane {
|
||||
}
|
||||
|
||||
async fn do_wake_compute(&self) -> Result<NodeInfo, WakeComputeError> {
|
||||
let mut config = compute::ConnCfg::new();
|
||||
config
|
||||
.host(self.endpoint.host_str().unwrap_or("localhost"))
|
||||
.port(self.endpoint.port().unwrap_or(5432))
|
||||
.ssl_mode(SslMode::Disable);
|
||||
let mut config = compute::ConnCfg::new(
|
||||
self.endpoint.host_str().unwrap_or("localhost").to_owned(),
|
||||
self.endpoint.port().unwrap_or(5432),
|
||||
);
|
||||
config.ssl_mode(postgres_client::config::SslMode::Disable);
|
||||
|
||||
let node = NodeInfo {
|
||||
config,
|
||||
|
||||
@@ -6,8 +6,8 @@ use std::time::Duration;
|
||||
use ::http::header::AUTHORIZATION;
|
||||
use ::http::HeaderName;
|
||||
use futures::TryFutureExt;
|
||||
use postgres_client::config::SslMode;
|
||||
use tokio::time::Instant;
|
||||
use tokio_postgres::config::SslMode;
|
||||
use tracing::{debug, info, info_span, warn, Instrument};
|
||||
|
||||
use super::super::messages::{ControlPlaneErrorMessage, GetRoleSecret, WakeCompute};
|
||||
@@ -241,8 +241,8 @@ impl NeonControlPlaneClient {
|
||||
// Don't set anything but host and port! This config will be cached.
|
||||
// We'll set username and such later using the startup message.
|
||||
// TODO: add more type safety (in progress).
|
||||
let mut config = compute::ConnCfg::new();
|
||||
config.host(host).port(port).ssl_mode(SslMode::Disable); // TLS is not configured on compute nodes.
|
||||
let mut config = compute::ConnCfg::new(host.to_owned(), port);
|
||||
config.ssl_mode(SslMode::Disable); // TLS is not configured on compute nodes.
|
||||
|
||||
let node = NodeInfo {
|
||||
config,
|
||||
|
||||
@@ -84,7 +84,7 @@ pub(crate) trait ReportableError: fmt::Display + Send + 'static {
|
||||
fn get_error_kind(&self) -> ErrorKind;
|
||||
}
|
||||
|
||||
impl ReportableError for tokio_postgres::error::Error {
|
||||
impl ReportableError for postgres_client::error::Error {
|
||||
fn get_error_kind(&self) -> ErrorKind {
|
||||
if self.as_db_error().is_some() {
|
||||
ErrorKind::Postgres
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use std::convert::TryFrom;
|
||||
use std::sync::Arc;
|
||||
|
||||
use postgres_client::tls::MakeTlsConnect;
|
||||
use rustls::pki_types::ServerName;
|
||||
use rustls::ClientConfig;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_postgres::tls::MakeTlsConnect;
|
||||
|
||||
mod private {
|
||||
use std::future::Future;
|
||||
@@ -12,9 +12,9 @@ mod private {
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use postgres_client::tls::{ChannelBinding, TlsConnect};
|
||||
use rustls::pki_types::ServerName;
|
||||
use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
|
||||
use tokio_postgres::tls::{ChannelBinding, TlsConnect};
|
||||
use tokio_rustls::client::TlsStream;
|
||||
use tokio_rustls::TlsConnector;
|
||||
|
||||
@@ -59,7 +59,7 @@ mod private {
|
||||
|
||||
pub struct RustlsStream<S>(TlsStream<S>);
|
||||
|
||||
impl<S> tokio_postgres::tls::TlsStream for RustlsStream<S>
|
||||
impl<S> postgres_client::tls::TlsStream for RustlsStream<S>
|
||||
where
|
||||
S: AsyncRead + AsyncWrite + Unpin,
|
||||
{
|
||||
|
||||
@@ -86,7 +86,7 @@ impl ConnectMechanism for TcpMechanism<'_> {
|
||||
node_info: &control_plane::CachedNodeInfo,
|
||||
timeout: time::Duration,
|
||||
) -> Result<PostgresConnection, Self::Error> {
|
||||
let host = node_info.config.get_host()?;
|
||||
let host = node_info.config.get_host();
|
||||
let permit = self.locks.get_permit(&host).await?;
|
||||
permit.release_result(node_info.connect(ctx, timeout).await)
|
||||
}
|
||||
|
||||
@@ -384,11 +384,13 @@ pub(crate) async fn prepare_client_connection<P>(
|
||||
// The new token (cancel_key_data) will be sent to the client.
|
||||
let cancel_key_data = session.enable_query_cancellation(node.cancel_closure.clone());
|
||||
|
||||
// Forward all deferred notices to the client.
|
||||
for notice in &node.delayed_notice {
|
||||
stream.write_message_noflush(&Be::Raw(b'N', notice.as_bytes()))?;
|
||||
}
|
||||
|
||||
// Forward all postgres connection params to the client.
|
||||
// Right now the implementation is very hacky and inefficent (ideally,
|
||||
// we don't need an intermediate hashmap), but at least it should be correct.
|
||||
for (name, value) in &node.params {
|
||||
// TODO: Theoretically, this could result in a big pile of params...
|
||||
stream.write_message_noflush(&Be::ParameterStatus {
|
||||
name: name.as_bytes(),
|
||||
value: value.as_bytes(),
|
||||
|
||||
@@ -31,9 +31,9 @@ impl CouldRetry for io::Error {
|
||||
}
|
||||
}
|
||||
|
||||
impl CouldRetry for tokio_postgres::error::DbError {
|
||||
impl CouldRetry for postgres_client::error::DbError {
|
||||
fn could_retry(&self) -> bool {
|
||||
use tokio_postgres::error::SqlState;
|
||||
use postgres_client::error::SqlState;
|
||||
matches!(
|
||||
self.code(),
|
||||
&SqlState::CONNECTION_FAILURE
|
||||
@@ -43,9 +43,9 @@ impl CouldRetry for tokio_postgres::error::DbError {
|
||||
)
|
||||
}
|
||||
}
|
||||
impl ShouldRetryWakeCompute for tokio_postgres::error::DbError {
|
||||
impl ShouldRetryWakeCompute for postgres_client::error::DbError {
|
||||
fn should_retry_wake_compute(&self) -> bool {
|
||||
use tokio_postgres::error::SqlState;
|
||||
use postgres_client::error::SqlState;
|
||||
// Here are errors that happens after the user successfully authenticated to the database.
|
||||
// TODO: there are pgbouncer errors that should be retried, but they are not listed here.
|
||||
!matches!(
|
||||
@@ -61,21 +61,21 @@ impl ShouldRetryWakeCompute for tokio_postgres::error::DbError {
|
||||
}
|
||||
}
|
||||
|
||||
impl CouldRetry for tokio_postgres::Error {
|
||||
impl CouldRetry for postgres_client::Error {
|
||||
fn could_retry(&self) -> bool {
|
||||
if let Some(io_err) = self.source().and_then(|x| x.downcast_ref()) {
|
||||
io::Error::could_retry(io_err)
|
||||
} else if let Some(db_err) = self.source().and_then(|x| x.downcast_ref()) {
|
||||
tokio_postgres::error::DbError::could_retry(db_err)
|
||||
postgres_client::error::DbError::could_retry(db_err)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
impl ShouldRetryWakeCompute for tokio_postgres::Error {
|
||||
impl ShouldRetryWakeCompute for postgres_client::Error {
|
||||
fn should_retry_wake_compute(&self) -> bool {
|
||||
if let Some(db_err) = self.source().and_then(|x| x.downcast_ref()) {
|
||||
tokio_postgres::error::DbError::should_retry_wake_compute(db_err)
|
||||
postgres_client::error::DbError::should_retry_wake_compute(db_err)
|
||||
} else {
|
||||
// likely an IO error. Possible the compute has shutdown and the
|
||||
// cache is stale.
|
||||
|
||||
@@ -8,9 +8,9 @@ use std::fmt::Debug;
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use postgres_client::tls::TlsConnect;
|
||||
use postgres_protocol::message::frontend;
|
||||
use tokio::io::{AsyncReadExt, DuplexStream};
|
||||
use tokio_postgres::tls::TlsConnect;
|
||||
use tokio_util::codec::{Decoder, Encoder};
|
||||
|
||||
use super::*;
|
||||
@@ -158,8 +158,8 @@ async fn scram_auth_disable_channel_binding() -> anyhow::Result<()> {
|
||||
Scram::new("password").await?,
|
||||
));
|
||||
|
||||
let _client_err = tokio_postgres::Config::new()
|
||||
.channel_binding(tokio_postgres::config::ChannelBinding::Disable)
|
||||
let _client_err = postgres_client::Config::new("test".to_owned(), 5432)
|
||||
.channel_binding(postgres_client::config::ChannelBinding::Disable)
|
||||
.user("user")
|
||||
.dbname("db")
|
||||
.password("password")
|
||||
@@ -175,7 +175,7 @@ async fn scram_auth_disable_channel_binding() -> anyhow::Result<()> {
|
||||
async fn scram_auth_prefer_channel_binding() -> anyhow::Result<()> {
|
||||
connect_failure(
|
||||
Intercept::None,
|
||||
tokio_postgres::config::ChannelBinding::Prefer,
|
||||
postgres_client::config::ChannelBinding::Prefer,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -185,7 +185,7 @@ async fn scram_auth_prefer_channel_binding() -> anyhow::Result<()> {
|
||||
async fn scram_auth_prefer_channel_binding_intercept() -> anyhow::Result<()> {
|
||||
connect_failure(
|
||||
Intercept::Methods,
|
||||
tokio_postgres::config::ChannelBinding::Prefer,
|
||||
postgres_client::config::ChannelBinding::Prefer,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -195,7 +195,7 @@ async fn scram_auth_prefer_channel_binding_intercept() -> anyhow::Result<()> {
|
||||
async fn scram_auth_prefer_channel_binding_intercept_response() -> anyhow::Result<()> {
|
||||
connect_failure(
|
||||
Intercept::SASLResponse,
|
||||
tokio_postgres::config::ChannelBinding::Prefer,
|
||||
postgres_client::config::ChannelBinding::Prefer,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -205,7 +205,7 @@ async fn scram_auth_prefer_channel_binding_intercept_response() -> anyhow::Resul
|
||||
async fn scram_auth_require_channel_binding() -> anyhow::Result<()> {
|
||||
connect_failure(
|
||||
Intercept::None,
|
||||
tokio_postgres::config::ChannelBinding::Require,
|
||||
postgres_client::config::ChannelBinding::Require,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -215,7 +215,7 @@ async fn scram_auth_require_channel_binding() -> anyhow::Result<()> {
|
||||
async fn scram_auth_require_channel_binding_intercept() -> anyhow::Result<()> {
|
||||
connect_failure(
|
||||
Intercept::Methods,
|
||||
tokio_postgres::config::ChannelBinding::Require,
|
||||
postgres_client::config::ChannelBinding::Require,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -225,14 +225,14 @@ async fn scram_auth_require_channel_binding_intercept() -> anyhow::Result<()> {
|
||||
async fn scram_auth_require_channel_binding_intercept_response() -> anyhow::Result<()> {
|
||||
connect_failure(
|
||||
Intercept::SASLResponse,
|
||||
tokio_postgres::config::ChannelBinding::Require,
|
||||
postgres_client::config::ChannelBinding::Require,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn connect_failure(
|
||||
intercept: Intercept,
|
||||
channel_binding: tokio_postgres::config::ChannelBinding,
|
||||
channel_binding: postgres_client::config::ChannelBinding,
|
||||
) -> anyhow::Result<()> {
|
||||
let (server, client, client_config, server_config) = proxy_mitm(intercept).await;
|
||||
let proxy = tokio::spawn(dummy_proxy(
|
||||
@@ -241,7 +241,7 @@ async fn connect_failure(
|
||||
Scram::new("password").await?,
|
||||
));
|
||||
|
||||
let _client_err = tokio_postgres::Config::new()
|
||||
let _client_err = postgres_client::Config::new("test".to_owned(), 5432)
|
||||
.channel_binding(channel_binding)
|
||||
.user("user")
|
||||
.dbname("db")
|
||||
|
||||
@@ -7,13 +7,13 @@ use std::time::Duration;
|
||||
use anyhow::{bail, Context};
|
||||
use async_trait::async_trait;
|
||||
use http::StatusCode;
|
||||
use postgres_client::config::SslMode;
|
||||
use postgres_client::tls::{MakeTlsConnect, NoTls};
|
||||
use retry::{retry_after, ShouldRetryWakeCompute};
|
||||
use rstest::rstest;
|
||||
use rustls::crypto::ring;
|
||||
use rustls::pki_types;
|
||||
use tokio::io::DuplexStream;
|
||||
use tokio_postgres::config::SslMode;
|
||||
use tokio_postgres::tls::{MakeTlsConnect, NoTls};
|
||||
|
||||
use super::connect_compute::ConnectMechanism;
|
||||
use super::retry::CouldRetry;
|
||||
@@ -204,7 +204,7 @@ async fn handshake_tls_is_enforced_by_proxy() -> anyhow::Result<()> {
|
||||
let (_, server_config) = generate_tls_config("generic-project-name.localhost", "localhost")?;
|
||||
let proxy = tokio::spawn(dummy_proxy(client, Some(server_config), NoAuth));
|
||||
|
||||
let client_err = tokio_postgres::Config::new()
|
||||
let client_err = postgres_client::Config::new("test".to_owned(), 5432)
|
||||
.user("john_doe")
|
||||
.dbname("earth")
|
||||
.ssl_mode(SslMode::Disable)
|
||||
@@ -233,7 +233,7 @@ async fn handshake_tls() -> anyhow::Result<()> {
|
||||
generate_tls_config("generic-project-name.localhost", "localhost")?;
|
||||
let proxy = tokio::spawn(dummy_proxy(client, Some(server_config), NoAuth));
|
||||
|
||||
let (_client, _conn) = tokio_postgres::Config::new()
|
||||
let _conn = postgres_client::Config::new("test".to_owned(), 5432)
|
||||
.user("john_doe")
|
||||
.dbname("earth")
|
||||
.ssl_mode(SslMode::Require)
|
||||
@@ -249,7 +249,7 @@ async fn handshake_raw() -> anyhow::Result<()> {
|
||||
|
||||
let proxy = tokio::spawn(dummy_proxy(client, None, NoAuth));
|
||||
|
||||
let (_client, _conn) = tokio_postgres::Config::new()
|
||||
let _conn = postgres_client::Config::new("test".to_owned(), 5432)
|
||||
.user("john_doe")
|
||||
.dbname("earth")
|
||||
.options("project=generic-project-name")
|
||||
@@ -296,8 +296,8 @@ async fn scram_auth_good(#[case] password: &str) -> anyhow::Result<()> {
|
||||
Scram::new(password).await?,
|
||||
));
|
||||
|
||||
let (_client, _conn) = tokio_postgres::Config::new()
|
||||
.channel_binding(tokio_postgres::config::ChannelBinding::Require)
|
||||
let _conn = postgres_client::Config::new("test".to_owned(), 5432)
|
||||
.channel_binding(postgres_client::config::ChannelBinding::Require)
|
||||
.user("user")
|
||||
.dbname("db")
|
||||
.password(password)
|
||||
@@ -320,8 +320,8 @@ async fn scram_auth_disable_channel_binding() -> anyhow::Result<()> {
|
||||
Scram::new("password").await?,
|
||||
));
|
||||
|
||||
let (_client, _conn) = tokio_postgres::Config::new()
|
||||
.channel_binding(tokio_postgres::config::ChannelBinding::Disable)
|
||||
let _conn = postgres_client::Config::new("test".to_owned(), 5432)
|
||||
.channel_binding(postgres_client::config::ChannelBinding::Disable)
|
||||
.user("user")
|
||||
.dbname("db")
|
||||
.password("password")
|
||||
@@ -348,7 +348,7 @@ async fn scram_auth_mock() -> anyhow::Result<()> {
|
||||
.map(char::from)
|
||||
.collect();
|
||||
|
||||
let _client_err = tokio_postgres::Config::new()
|
||||
let _client_err = postgres_client::Config::new("test".to_owned(), 5432)
|
||||
.user("user")
|
||||
.dbname("db")
|
||||
.password(&password) // no password will match the mocked secret
|
||||
@@ -546,7 +546,7 @@ impl TestControlPlaneClient for TestConnectMechanism {
|
||||
|
||||
fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeInfo {
|
||||
let node = NodeInfo {
|
||||
config: compute::ConnCfg::new(),
|
||||
config: compute::ConnCfg::new("test".to_owned(), 5432),
|
||||
aux: MetricsAuxInfo {
|
||||
endpoint_id: (&EndpointId::from("endpoint")).into(),
|
||||
project_id: (&ProjectId::from("project")).into(),
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use aws_config::environment::EnvironmentVariableCredentialsProvider;
|
||||
use aws_config::imds::credentials::ImdsCredentialsProvider;
|
||||
use aws_config::meta::credentials::CredentialsProviderChain;
|
||||
use aws_config::meta::region::RegionProviderChain;
|
||||
use aws_config::profile::ProfileFileCredentialsProvider;
|
||||
use aws_config::provider_config::ProviderConfig;
|
||||
use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;
|
||||
use aws_config::Region;
|
||||
use aws_sdk_iam::config::ProvideCredentials;
|
||||
use aws_sigv4::http_request::{
|
||||
self, SignableBody, SignableRequest, SignatureLocation, SigningSettings,
|
||||
@@ -45,12 +53,45 @@ pub struct CredentialsProvider {
|
||||
}
|
||||
|
||||
impl CredentialsProvider {
|
||||
pub fn new(config: AWSIRSAConfig, credentials_provider: CredentialsProviderChain) -> Self {
|
||||
CredentialsProvider {
|
||||
config,
|
||||
credentials_provider,
|
||||
}
|
||||
pub async fn new(
|
||||
aws_region: String,
|
||||
redis_cluster_name: Option<String>,
|
||||
redis_user_id: Option<String>,
|
||||
) -> Arc<CredentialsProvider> {
|
||||
let region_provider =
|
||||
RegionProviderChain::default_provider().or_else(Region::new(aws_region.clone()));
|
||||
let provider_conf =
|
||||
ProviderConfig::without_region().with_region(region_provider.region().await);
|
||||
let aws_credentials_provider = {
|
||||
// uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
|
||||
CredentialsProviderChain::first_try(
|
||||
"env",
|
||||
EnvironmentVariableCredentialsProvider::new(),
|
||||
)
|
||||
// uses "AWS_PROFILE" / `aws sso login --profile <profile>`
|
||||
.or_else(
|
||||
"profile-sso",
|
||||
ProfileFileCredentialsProvider::builder()
|
||||
.configure(&provider_conf)
|
||||
.build(),
|
||||
)
|
||||
// uses "AWS_WEB_IDENTITY_TOKEN_FILE", "AWS_ROLE_ARN", "AWS_ROLE_SESSION_NAME"
|
||||
// needed to access remote extensions bucket
|
||||
.or_else(
|
||||
"token",
|
||||
WebIdentityTokenCredentialsProvider::builder()
|
||||
.configure(&provider_conf)
|
||||
.build(),
|
||||
)
|
||||
// uses imds v2
|
||||
.or_else("imds", ImdsCredentialsProvider::builder().build())
|
||||
};
|
||||
Arc::new(CredentialsProvider {
|
||||
config: AWSIRSAConfig::new(aws_region, redis_cluster_name, redis_user_id),
|
||||
credentials_provider: aws_credentials_provider,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn provide_credentials(&self) -> anyhow::Result<(String, String)> {
|
||||
let aws_credentials = self
|
||||
.credentials_provider
|
||||
|
||||
@@ -37,9 +37,9 @@ use crate::types::{EndpointId, Host, LOCAL_PROXY_SUFFIX};
|
||||
|
||||
pub(crate) struct PoolingBackend {
|
||||
pub(crate) http_conn_pool: Arc<GlobalConnPool<Send, HttpConnPool<Send>>>,
|
||||
pub(crate) local_pool: Arc<LocalConnPool<tokio_postgres::Client>>,
|
||||
pub(crate) local_pool: Arc<LocalConnPool<postgres_client::Client>>,
|
||||
pub(crate) pool:
|
||||
Arc<GlobalConnPool<tokio_postgres::Client, EndpointConnPool<tokio_postgres::Client>>>,
|
||||
Arc<GlobalConnPool<postgres_client::Client, EndpointConnPool<postgres_client::Client>>>,
|
||||
|
||||
pub(crate) config: &'static ProxyConfig,
|
||||
pub(crate) auth_backend: &'static crate::auth::Backend<'static, ()>,
|
||||
@@ -53,6 +53,8 @@ impl PoolingBackend {
|
||||
user_info: &ComputeUserInfo,
|
||||
password: &[u8],
|
||||
) -> Result<ComputeCredentials, AuthError> {
|
||||
ctx.set_auth_method(crate::context::AuthMethod::Cleartext);
|
||||
|
||||
let user_info = user_info.clone();
|
||||
let backend = self.auth_backend.as_ref().map(|()| user_info.clone());
|
||||
let (allowed_ips, maybe_secret) = backend.get_allowed_ips_and_secret(ctx).await?;
|
||||
@@ -115,6 +117,8 @@ impl PoolingBackend {
|
||||
user_info: &ComputeUserInfo,
|
||||
jwt: String,
|
||||
) -> Result<ComputeCredentials, AuthError> {
|
||||
ctx.set_auth_method(crate::context::AuthMethod::Jwt);
|
||||
|
||||
match &self.auth_backend {
|
||||
crate::auth::Backend::ControlPlane(console, ()) => {
|
||||
self.config
|
||||
@@ -166,7 +170,7 @@ impl PoolingBackend {
|
||||
conn_info: ConnInfo,
|
||||
keys: ComputeCredentials,
|
||||
force_new: bool,
|
||||
) -> Result<Client<tokio_postgres::Client>, HttpConnError> {
|
||||
) -> Result<Client<postgres_client::Client>, HttpConnError> {
|
||||
let maybe_client = if force_new {
|
||||
debug!("pool: pool is disabled");
|
||||
None
|
||||
@@ -252,7 +256,7 @@ impl PoolingBackend {
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
conn_info: ConnInfo,
|
||||
) -> Result<Client<tokio_postgres::Client>, HttpConnError> {
|
||||
) -> Result<Client<postgres_client::Client>, HttpConnError> {
|
||||
if let Some(client) = self.local_pool.get(ctx, &conn_info)? {
|
||||
return Ok(client);
|
||||
}
|
||||
@@ -311,7 +315,7 @@ impl PoolingBackend {
|
||||
));
|
||||
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
||||
let (client, connection) = config.connect(tokio_postgres::NoTls).await?;
|
||||
let (client, connection) = config.connect(postgres_client::NoTls).await?;
|
||||
drop(pause);
|
||||
|
||||
let pid = client.get_process_id();
|
||||
@@ -356,7 +360,7 @@ pub(crate) enum HttpConnError {
|
||||
#[error("pooled connection closed at inconsistent state")]
|
||||
ConnectionClosedAbruptly(#[from] tokio::sync::watch::error::SendError<uuid::Uuid>),
|
||||
#[error("could not connection to postgres in compute")]
|
||||
PostgresConnectionError(#[from] tokio_postgres::Error),
|
||||
PostgresConnectionError(#[from] postgres_client::Error),
|
||||
#[error("could not connection to local-proxy in compute")]
|
||||
LocalProxyConnectionError(#[from] LocalProxyConnError),
|
||||
#[error("could not parse JWT payload")]
|
||||
@@ -475,7 +479,7 @@ impl ShouldRetryWakeCompute for LocalProxyConnError {
|
||||
}
|
||||
|
||||
struct TokioMechanism {
|
||||
pool: Arc<GlobalConnPool<tokio_postgres::Client, EndpointConnPool<tokio_postgres::Client>>>,
|
||||
pool: Arc<GlobalConnPool<postgres_client::Client, EndpointConnPool<postgres_client::Client>>>,
|
||||
conn_info: ConnInfo,
|
||||
conn_id: uuid::Uuid,
|
||||
|
||||
@@ -485,7 +489,7 @@ struct TokioMechanism {
|
||||
|
||||
#[async_trait]
|
||||
impl ConnectMechanism for TokioMechanism {
|
||||
type Connection = Client<tokio_postgres::Client>;
|
||||
type Connection = Client<postgres_client::Client>;
|
||||
type ConnectError = HttpConnError;
|
||||
type Error = HttpConnError;
|
||||
|
||||
@@ -495,7 +499,7 @@ impl ConnectMechanism for TokioMechanism {
|
||||
node_info: &CachedNodeInfo,
|
||||
timeout: Duration,
|
||||
) -> Result<Self::Connection, Self::ConnectError> {
|
||||
let host = node_info.config.get_host()?;
|
||||
let host = node_info.config.get_host();
|
||||
let permit = self.locks.get_permit(&host).await?;
|
||||
|
||||
let mut config = (*node_info.config).clone();
|
||||
@@ -505,7 +509,7 @@ impl ConnectMechanism for TokioMechanism {
|
||||
.connect_timeout(timeout);
|
||||
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
||||
let res = config.connect(tokio_postgres::NoTls).await;
|
||||
let res = config.connect(postgres_client::NoTls).await;
|
||||
drop(pause);
|
||||
let (client, connection) = permit.release_result(res)?;
|
||||
|
||||
@@ -545,16 +549,12 @@ impl ConnectMechanism for HyperMechanism {
|
||||
node_info: &CachedNodeInfo,
|
||||
timeout: Duration,
|
||||
) -> Result<Self::Connection, Self::ConnectError> {
|
||||
let host = node_info.config.get_host()?;
|
||||
let host = node_info.config.get_host();
|
||||
let permit = self.locks.get_permit(&host).await?;
|
||||
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
||||
|
||||
let port = *node_info.config.get_ports().first().ok_or_else(|| {
|
||||
HttpConnError::WakeCompute(WakeComputeError::BadComputeAddress(
|
||||
"local-proxy port missing on compute address".into(),
|
||||
))
|
||||
})?;
|
||||
let port = node_info.config.get_port();
|
||||
let res = connect_http2(&host, port, timeout).await;
|
||||
drop(pause);
|
||||
let (client, connection) = permit.release_result(res)?;
|
||||
|
||||
@@ -5,11 +5,11 @@ use std::task::{ready, Poll};
|
||||
|
||||
use futures::future::poll_fn;
|
||||
use futures::Future;
|
||||
use postgres_client::tls::NoTlsStream;
|
||||
use postgres_client::AsyncMessage;
|
||||
use smallvec::SmallVec;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::time::Instant;
|
||||
use tokio_postgres::tls::NoTlsStream;
|
||||
use tokio_postgres::AsyncMessage;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info, info_span, warn, Instrument};
|
||||
#[cfg(test)]
|
||||
@@ -58,7 +58,7 @@ pub(crate) fn poll_client<C: ClientInnerExt>(
|
||||
ctx: &RequestContext,
|
||||
conn_info: ConnInfo,
|
||||
client: C,
|
||||
mut connection: tokio_postgres::Connection<TcpStream, NoTlsStream>,
|
||||
mut connection: postgres_client::Connection<TcpStream, NoTlsStream>,
|
||||
conn_id: uuid::Uuid,
|
||||
aux: MetricsAuxInfo,
|
||||
) -> Client<C> {
|
||||
|
||||
@@ -7,8 +7,8 @@ use std::time::Duration;
|
||||
|
||||
use dashmap::DashMap;
|
||||
use parking_lot::RwLock;
|
||||
use postgres_client::ReadyForQueryStatus;
|
||||
use rand::Rng;
|
||||
use tokio_postgres::ReadyForQueryStatus;
|
||||
use tracing::{debug, info, Span};
|
||||
|
||||
use super::backend::HttpConnError;
|
||||
@@ -683,7 +683,7 @@ pub(crate) trait ClientInnerExt: Sync + Send + 'static {
|
||||
fn get_process_id(&self) -> i32;
|
||||
}
|
||||
|
||||
impl ClientInnerExt for tokio_postgres::Client {
|
||||
impl ClientInnerExt for postgres_client::Client {
|
||||
fn is_closed(&self) -> bool {
|
||||
self.is_closed()
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use postgres_client::types::{Kind, Type};
|
||||
use postgres_client::Row;
|
||||
use serde_json::{Map, Value};
|
||||
use tokio_postgres::types::{Kind, Type};
|
||||
use tokio_postgres::Row;
|
||||
|
||||
//
|
||||
// Convert json non-string types to strings, so that they can be passed to Postgres
|
||||
@@ -61,7 +61,7 @@ fn json_array_to_pg_array(value: &Value) -> Option<String> {
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum JsonConversionError {
|
||||
#[error("internal error compute returned invalid data: {0}")]
|
||||
AsTextError(tokio_postgres::Error),
|
||||
AsTextError(postgres_client::Error),
|
||||
#[error("parse int error: {0}")]
|
||||
ParseIntError(#[from] std::num::ParseIntError),
|
||||
#[error("parse float error: {0}")]
|
||||
|
||||
@@ -22,13 +22,13 @@ use indexmap::IndexMap;
|
||||
use jose_jwk::jose_b64::base64ct::{Base64UrlUnpadded, Encoding};
|
||||
use p256::ecdsa::{Signature, SigningKey};
|
||||
use parking_lot::RwLock;
|
||||
use postgres_client::tls::NoTlsStream;
|
||||
use postgres_client::types::ToSql;
|
||||
use postgres_client::AsyncMessage;
|
||||
use serde_json::value::RawValue;
|
||||
use signature::Signer;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::time::Instant;
|
||||
use tokio_postgres::tls::NoTlsStream;
|
||||
use tokio_postgres::types::ToSql;
|
||||
use tokio_postgres::AsyncMessage;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, error, info, info_span, warn, Instrument};
|
||||
|
||||
@@ -164,7 +164,7 @@ pub(crate) fn poll_client<C: ClientInnerExt>(
|
||||
ctx: &RequestContext,
|
||||
conn_info: ConnInfo,
|
||||
client: C,
|
||||
mut connection: tokio_postgres::Connection<TcpStream, NoTlsStream>,
|
||||
mut connection: postgres_client::Connection<TcpStream, NoTlsStream>,
|
||||
key: SigningKey,
|
||||
conn_id: uuid::Uuid,
|
||||
aux: MetricsAuxInfo,
|
||||
@@ -280,7 +280,7 @@ pub(crate) fn poll_client<C: ClientInnerExt>(
|
||||
)
|
||||
}
|
||||
|
||||
impl ClientInnerCommon<tokio_postgres::Client> {
|
||||
impl ClientInnerCommon<postgres_client::Client> {
|
||||
pub(crate) async fn set_jwt_session(&mut self, payload: &[u8]) -> Result<(), HttpConnError> {
|
||||
if let ClientDataEnum::Local(local_data) = &mut self.data {
|
||||
local_data.jti += 1;
|
||||
|
||||
@@ -11,12 +11,12 @@ use http_body_util::{BodyExt, Full};
|
||||
use hyper::body::Incoming;
|
||||
use hyper::http::{HeaderName, HeaderValue};
|
||||
use hyper::{header, HeaderMap, Request, Response, StatusCode};
|
||||
use postgres_client::error::{DbError, ErrorPosition, SqlState};
|
||||
use postgres_client::{GenericClient, IsolationLevel, NoTls, ReadyForQueryStatus, Transaction};
|
||||
use pq_proto::StartupMessageParamsBuilder;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use tokio::time::{self, Instant};
|
||||
use tokio_postgres::error::{DbError, ErrorPosition, SqlState};
|
||||
use tokio_postgres::{GenericClient, IsolationLevel, NoTls, ReadyForQueryStatus, Transaction};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, error, info};
|
||||
use typed_json::json;
|
||||
@@ -139,9 +139,6 @@ fn get_conn_info(
|
||||
headers: &HeaderMap,
|
||||
tls: Option<&TlsConfig>,
|
||||
) -> Result<ConnInfoWithAuth, ConnInfoError> {
|
||||
// HTTP only uses cleartext (for now and likely always)
|
||||
ctx.set_auth_method(crate::context::AuthMethod::Cleartext);
|
||||
|
||||
let connection_string = headers
|
||||
.get(&CONN_STRING)
|
||||
.ok_or(ConnInfoError::InvalidHeader(&CONN_STRING))?
|
||||
@@ -364,7 +361,7 @@ pub(crate) enum SqlOverHttpError {
|
||||
#[error("invalid isolation level")]
|
||||
InvalidIsolationLevel,
|
||||
#[error("{0}")]
|
||||
Postgres(#[from] tokio_postgres::Error),
|
||||
Postgres(#[from] postgres_client::Error),
|
||||
#[error("{0}")]
|
||||
JsonConversion(#[from] JsonConversionError),
|
||||
#[error("{0}")]
|
||||
@@ -989,7 +986,7 @@ async fn query_to_json<T: GenericClient>(
|
||||
// Manually drain the stream into a vector to leave row_stream hanging
|
||||
// around to get a command tag. Also check that the response is not too
|
||||
// big.
|
||||
let mut rows: Vec<tokio_postgres::Row> = Vec::new();
|
||||
let mut rows: Vec<postgres_client::Row> = Vec::new();
|
||||
while let Some(row) = row_stream.next().await {
|
||||
let row = row?;
|
||||
*current_size += row.body_len();
|
||||
@@ -1066,13 +1063,13 @@ async fn query_to_json<T: GenericClient>(
|
||||
}
|
||||
|
||||
enum Client {
|
||||
Remote(conn_pool_lib::Client<tokio_postgres::Client>),
|
||||
Local(conn_pool_lib::Client<tokio_postgres::Client>),
|
||||
Remote(conn_pool_lib::Client<postgres_client::Client>),
|
||||
Local(conn_pool_lib::Client<postgres_client::Client>),
|
||||
}
|
||||
|
||||
enum Discard<'a> {
|
||||
Remote(conn_pool_lib::Discard<'a, tokio_postgres::Client>),
|
||||
Local(conn_pool_lib::Discard<'a, tokio_postgres::Client>),
|
||||
Remote(conn_pool_lib::Discard<'a, postgres_client::Client>),
|
||||
Local(conn_pool_lib::Discard<'a, postgres_client::Client>),
|
||||
}
|
||||
|
||||
impl Client {
|
||||
@@ -1083,7 +1080,7 @@ impl Client {
|
||||
}
|
||||
}
|
||||
|
||||
fn inner(&mut self) -> (&mut tokio_postgres::Client, Discard<'_>) {
|
||||
fn inner(&mut self) -> (&mut postgres_client::Client, Discard<'_>) {
|
||||
match self {
|
||||
Client::Remote(client) => {
|
||||
let (c, d) = client.inner();
|
||||
|
||||
@@ -24,9 +24,15 @@ const KB: usize = 1024;
|
||||
const MB: usize = 1024 * KB;
|
||||
const GB: usize = 1024 * MB;
|
||||
|
||||
/// Use jemalloc, and configure it to sample allocations for profiles every 1 MB.
|
||||
/// This mirrors the configuration in bin/safekeeper.rs.
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
#[allow(non_upper_case_globals)]
|
||||
#[export_name = "malloc_conf"]
|
||||
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
|
||||
|
||||
// Register benchmarks with Criterion.
|
||||
criterion_group!(
|
||||
name = benches;
|
||||
|
||||
3
safekeeper/spec/.gitignore
vendored
Normal file
3
safekeeper/spec/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
*TTrace*
|
||||
*.toolbox/
|
||||
states/
|
||||
31
safekeeper/spec/MCProposerAcceptorStatic.tla
Normal file
31
safekeeper/spec/MCProposerAcceptorStatic.tla
Normal file
@@ -0,0 +1,31 @@
|
||||
---- MODULE MCProposerAcceptorStatic ----
|
||||
EXTENDS TLC, ProposerAcceptorStatic
|
||||
|
||||
\* Augments the spec with model checking constraints.
|
||||
|
||||
\* For model checking.
|
||||
CONSTANTS
|
||||
max_entries, \* model constraint: max log entries acceptor/proposer can hold
|
||||
max_term \* model constraint: max allowed term
|
||||
|
||||
ASSUME max_entries \in Nat /\ max_term \in Nat
|
||||
|
||||
\* Model space constraint.
|
||||
StateConstraint == \A p \in proposers:
|
||||
/\ prop_state[p].term <= max_term
|
||||
/\ Len(prop_state[p].wal) <= max_entries
|
||||
\* Sets of proposers and acceptors are symmetric because we don't take any
|
||||
\* actions depending on some concrete proposer/acceptor (like IF p = p1 THEN
|
||||
\* ...)
|
||||
ProposerAcceptorSymmetry == Permutations(proposers) \union Permutations(acceptors)
|
||||
|
||||
\* enforce order of the vars in the error trace with ALIAS
|
||||
\* Note that ALIAS is supported only since version 1.8.0 which is pre-release
|
||||
\* as of writing this.
|
||||
Alias == [
|
||||
prop_state |-> prop_state,
|
||||
acc_state |-> acc_state,
|
||||
committed |-> committed
|
||||
]
|
||||
|
||||
====
|
||||
@@ -1,34 +0,0 @@
|
||||
\* MV CONSTANT declarations
|
||||
CONSTANT NULL = NULL
|
||||
CONSTANTS
|
||||
p1 = p1
|
||||
p2 = p2
|
||||
p3 = p3
|
||||
a1 = a1
|
||||
a2 = a2
|
||||
a3 = a3
|
||||
\* MV CONSTANT definitions
|
||||
CONSTANT
|
||||
proposers = {p1, p2}
|
||||
acceptors = {a1, a2, a3}
|
||||
\* SYMMETRY definition
|
||||
SYMMETRY perms
|
||||
\* CONSTANT definitions
|
||||
CONSTANT
|
||||
max_term = 3
|
||||
CONSTANT
|
||||
max_entries = 3
|
||||
\* INIT definition
|
||||
INIT
|
||||
Init
|
||||
\* NEXT definition
|
||||
NEXT
|
||||
Next
|
||||
\* INVARIANT definition
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ElectionSafety
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
CommittedNotOverwritten
|
||||
CHECK_DEADLOCK FALSE
|
||||
@@ -1,363 +0,0 @@
|
||||
---- MODULE ProposerAcceptorConsensus ----
|
||||
|
||||
\* Differences from current implementation:
|
||||
\* - unified not-globally-unique epoch & term (node_id)
|
||||
\* Simplifications:
|
||||
\* - instant message delivery
|
||||
\* - feedback is not modeled separately, commit_lsn is updated directly
|
||||
|
||||
EXTENDS Integers, Sequences, FiniteSets, TLC
|
||||
|
||||
VARIABLES
|
||||
prop_state, \* prop_state[p] is state of proposer p
|
||||
acc_state, \* acc_state[a] is state of acceptor a
|
||||
commit_lsns \* map of acceptor -> commit_lsn
|
||||
|
||||
CONSTANT
|
||||
acceptors,
|
||||
proposers,
|
||||
max_entries, \* model constraint: max log entries acceptor/proposer can hold
|
||||
max_term \* model constraint: max allowed term
|
||||
|
||||
CONSTANT NULL
|
||||
|
||||
ASSUME max_entries \in Nat /\ max_term \in Nat
|
||||
|
||||
\* For specifying symmetry set in manual cfg file, see
|
||||
\* https://github.com/tlaplus/tlaplus/issues/404
|
||||
perms == Permutations(proposers) \union Permutations(acceptors)
|
||||
|
||||
\********************************************************************************
|
||||
\* Helpers
|
||||
\********************************************************************************
|
||||
|
||||
Maximum(S) ==
|
||||
(*************************************************************************)
|
||||
(* If S is a set of numbers, then this define Maximum(S) to be the *)
|
||||
(* maximum of those numbers, or -1 if S is empty. *)
|
||||
(*************************************************************************)
|
||||
IF S = {} THEN -1
|
||||
ELSE CHOOSE n \in S : \A m \in S : n \geq m
|
||||
|
||||
\* minimum of numbers in the set, error if set is empty
|
||||
Minimum(S) ==
|
||||
CHOOSE min \in S : \A n \in S : min <= n
|
||||
|
||||
\* Min of two numbers
|
||||
Min(a, b) == IF a < b THEN a ELSE b
|
||||
|
||||
\* Set of values of function f. XXX is there a such builtin?
|
||||
FValues(f) == {f[a] : a \in DOMAIN f}
|
||||
|
||||
\* Sort of 0 for functions
|
||||
EmptyF == [x \in {} |-> 42]
|
||||
IsEmptyF(f) == DOMAIN f = {}
|
||||
|
||||
\* Next entry proposer p will push to acceptor a or NULL.
|
||||
NextEntry(p, a) ==
|
||||
IF Len(prop_state[p].wal) >= prop_state[p].next_send_lsn[a] THEN
|
||||
CHOOSE r \in FValues(prop_state[p].wal) : r.lsn = prop_state[p].next_send_lsn[a]
|
||||
ELSE
|
||||
NULL
|
||||
|
||||
|
||||
\*****************
|
||||
|
||||
NumAccs == Cardinality(acceptors)
|
||||
|
||||
\* does acc_set form the quorum?
|
||||
Quorum(acc_set) == Cardinality(acc_set) >= (NumAccs \div 2 + 1)
|
||||
\* all quorums of acceptors
|
||||
Quorums == {subset \in SUBSET acceptors: Quorum(subset)}
|
||||
|
||||
\* flush_lsn of acceptor a.
|
||||
FlushLsn(a) == Len(acc_state[a].wal)
|
||||
|
||||
|
||||
\********************************************************************************
|
||||
\* Type assertion
|
||||
\********************************************************************************
|
||||
\* Defining sets of all possible tuples and using them in TypeOk in usual
|
||||
\* all-tuples constructor is not practical because such definitions force
|
||||
\* TLC to enumerate them, while they are are horribly enormous
|
||||
\* (TLC screams "Attempted to construct a set with too many elements").
|
||||
\* So instead check types manually.
|
||||
TypeOk ==
|
||||
/\ \A p \in proposers:
|
||||
/\ DOMAIN prop_state[p] = {"state", "term", "votes", "donor_epoch", "vcl", "wal", "next_send_lsn"}
|
||||
\* in campaign proposer sends RequestVote and waits for acks;
|
||||
\* in leader he is elected
|
||||
/\ prop_state[p].state \in {"campaign", "leader"}
|
||||
\* 0..max_term should be actually Nat in the unbounded model, but TLC won't
|
||||
\* swallow it
|
||||
/\ prop_state[p].term \in 0..max_term
|
||||
\* votes received
|
||||
/\ \A voter \in DOMAIN prop_state[p].votes:
|
||||
/\ voter \in acceptors
|
||||
/\ prop_state[p].votes[voter] \in [epoch: 0..max_term, flush_lsn: 0..max_entries]
|
||||
/\ prop_state[p].donor_epoch \in 0..max_term
|
||||
\* wal is sequence of just <lsn, epoch of author> records
|
||||
/\ \A i \in DOMAIN prop_state[p].wal:
|
||||
prop_state[p].wal[i] \in [lsn: 1..max_entries, epoch: 1..max_term]
|
||||
\* Following implementation, we skew the original Aurora meaning of this;
|
||||
\* here it is lsn of highest definitely committed record as set by proposer
|
||||
\* when it is elected; it doesn't change since then
|
||||
/\ prop_state[p].vcl \in 0..max_entries
|
||||
\* map of acceptor -> next lsn to send
|
||||
/\ \A a \in DOMAIN prop_state[p].next_send_lsn:
|
||||
/\ a \in acceptors
|
||||
/\ prop_state[p].next_send_lsn[a] \in 1..(max_entries + 1)
|
||||
/\ \A a \in acceptors:
|
||||
/\ DOMAIN acc_state[a] = {"term", "epoch", "wal"}
|
||||
/\ acc_state[a].term \in 0..max_term
|
||||
/\ acc_state[a].epoch \in 0..max_term
|
||||
/\ \A i \in DOMAIN acc_state[a].wal:
|
||||
acc_state[a].wal[i] \in [lsn: 1..max_entries, epoch: 1..max_term]
|
||||
/\ \A a \in DOMAIN commit_lsns:
|
||||
/\ a \in acceptors
|
||||
/\ commit_lsns[a] \in 0..max_entries
|
||||
|
||||
\********************************************************************************
|
||||
\* Initial
|
||||
\********************************************************************************
|
||||
|
||||
Init ==
|
||||
/\ prop_state = [p \in proposers |-> [
|
||||
state |-> "campaign",
|
||||
term |-> 1,
|
||||
votes |-> EmptyF,
|
||||
donor_epoch |-> 0,
|
||||
vcl |-> 0,
|
||||
wal |-> << >>,
|
||||
next_send_lsn |-> EmptyF
|
||||
]]
|
||||
/\ acc_state = [a \in acceptors |-> [
|
||||
\* there will be no leader in this term, 1 is the first real
|
||||
term |-> 0,
|
||||
epoch |-> 0,
|
||||
wal |-> << >>
|
||||
]]
|
||||
/\ commit_lsns = [a \in acceptors |-> 0]
|
||||
|
||||
|
||||
\********************************************************************************
|
||||
\* Actions
|
||||
\********************************************************************************
|
||||
|
||||
\* Proposer loses all state.
|
||||
\* For simplicity (and to reduct state space), we assume it immediately gets
|
||||
\* current state from quorum q of acceptors determining the term he will request
|
||||
\* to vote for.
|
||||
RestartProposer(p, q) ==
|
||||
/\ Quorum(q)
|
||||
/\ LET
|
||||
new_term == Maximum({acc_state[a].term : a \in q}) + 1
|
||||
IN
|
||||
/\ new_term <= max_term
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].state = "campaign",
|
||||
![p].term = new_term,
|
||||
![p].votes = EmptyF,
|
||||
![p].donor_epoch = 0,
|
||||
![p].vcl = 0,
|
||||
![p].wal = << >>,
|
||||
![p].next_send_lsn = EmptyF]
|
||||
/\ UNCHANGED <<acc_state, commit_lsns>>
|
||||
|
||||
\* Acceptor a immediately votes for proposer p.
|
||||
Vote(p, a) ==
|
||||
/\ prop_state[p].state = "campaign"
|
||||
/\ acc_state[a].term < prop_state[p].term \* main voting condition
|
||||
/\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term]
|
||||
/\ LET
|
||||
vote == [epoch |-> acc_state[a].epoch, flush_lsn |-> FlushLsn(a)]
|
||||
IN
|
||||
prop_state' = [prop_state EXCEPT ![p].votes = prop_state[p].votes @@ (a :> vote)]
|
||||
/\ UNCHANGED <<commit_lsns>>
|
||||
|
||||
|
||||
\* Proposer p gets elected.
|
||||
BecomeLeader(p) ==
|
||||
/\ prop_state[p].state = "campaign"
|
||||
/\ Quorum(DOMAIN prop_state[p].votes)
|
||||
/\ LET
|
||||
max_epoch == Maximum({v.epoch : v \in FValues(prop_state[p].votes)})
|
||||
max_epoch_votes == {v \in FValues(prop_state[p].votes) : v.epoch = max_epoch}
|
||||
donor == CHOOSE dv \in DOMAIN prop_state[p].votes :
|
||||
/\ prop_state[p].votes[dv].epoch = max_epoch
|
||||
/\ \A v \in max_epoch_votes:
|
||||
prop_state[p].votes[dv].flush_lsn >= v.flush_lsn
|
||||
max_vote == prop_state[p].votes[donor]
|
||||
\* Establish lsn to stream from for voters.
|
||||
\* At some point it seemed like we can regard log as correct and only
|
||||
\* append to it if has in the max_epoch, however TLC showed that's not
|
||||
\* the case; we must always stream since first not matching record.
|
||||
next_send_lsn == [voter \in DOMAIN prop_state[p].votes |-> 1]
|
||||
IN
|
||||
\* we fetch log from the most advanced node (this is separate
|
||||
\* roundtrip), make sure node is still on one term with us
|
||||
/\ acc_state[donor].term = prop_state[p].term
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].state = "leader",
|
||||
\* fetch the log from donor
|
||||
![p].wal = acc_state[donor].wal,
|
||||
![p].donor_epoch = max_epoch,
|
||||
![p].vcl = max_vote.flush_lsn,
|
||||
![p].next_send_lsn = next_send_lsn]
|
||||
/\ UNCHANGED <<acc_state, commit_lsns>>
|
||||
|
||||
|
||||
\* acceptor a learns about elected proposer p's term.
|
||||
UpdateTerm(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ acc_state[a].term < prop_state[p].term
|
||||
/\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term]
|
||||
/\ UNCHANGED <<prop_state, commit_lsns>>
|
||||
|
||||
|
||||
\* Acceptor a which didn't participate in voting connects to elected proposer p
|
||||
\* and p sets the streaming point
|
||||
HandshakeWithLeader(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ acc_state[a].term = prop_state[p].term
|
||||
/\ a \notin DOMAIN prop_state[p].next_send_lsn
|
||||
/\ LET
|
||||
next_send_lsn == prop_state[p].next_send_lsn @@ (a :> 1)
|
||||
IN
|
||||
prop_state' = [prop_state EXCEPT ![p].next_send_lsn = next_send_lsn]
|
||||
/\ UNCHANGED <<acc_state, commit_lsns>>
|
||||
|
||||
|
||||
\* Append new log entry to elected proposer
|
||||
NewEntry(p) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ Len(prop_state[p].wal) < max_entries \* model constraint
|
||||
/\ LET
|
||||
new_lsn == IF Len(prop_state[p].wal) = 0 THEN
|
||||
prop_state[p].vcl + 1
|
||||
ELSE
|
||||
\* lsn of last record + 1
|
||||
prop_state[p].wal[Len(prop_state[p].wal)].lsn + 1
|
||||
new_entry == [lsn |-> new_lsn, epoch |-> prop_state[p].term]
|
||||
IN
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].wal = Append(prop_state[p].wal, new_entry)]
|
||||
/\ UNCHANGED <<acc_state, commit_lsns>>
|
||||
|
||||
|
||||
\* Write entry new_e to log wal, rolling back all higher entries if e is different.
|
||||
\* If bump_epoch is TRUE, it means we get record with lsn=vcl and going to update
|
||||
\* the epoch. Truncate log in this case as well, as we might have correct <= vcl
|
||||
\* part and some outdated entries behind it which we want to purge before
|
||||
\* declaring us as recovered. Another way to accomplish this (in previous commit)
|
||||
\* is wait for first-entry-from-new-epoch before bumping it.
|
||||
WriteEntry(wal, new_e, bump_epoch) ==
|
||||
(new_e.lsn :> new_e) @@
|
||||
\* If wal has entry with such lsn and it is different, truncate all higher log.
|
||||
IF \/ (new_e.lsn \in DOMAIN wal /\ wal[new_e.lsn] /= new_e)
|
||||
\/ bump_epoch THEN
|
||||
SelectSeq(wal, LAMBDA e: e.lsn < new_e.lsn)
|
||||
ELSE
|
||||
wal
|
||||
|
||||
|
||||
\* Try to transfer entry from elected proposer p to acceptor a
|
||||
TransferEntry(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ prop_state[p].term = acc_state[a].term
|
||||
/\ a \in DOMAIN prop_state[p].next_send_lsn
|
||||
/\ LET
|
||||
next_e == NextEntry(p, a)
|
||||
IN
|
||||
/\ next_e /= NULL
|
||||
/\ LET
|
||||
\* Consider bumping epoch if getting this entry recovers the acceptor,
|
||||
\* that is, we reach first record behind VCL.
|
||||
new_epoch ==
|
||||
IF /\ acc_state[a].epoch < prop_state[p].term
|
||||
/\ next_e.lsn >= prop_state[p].vcl
|
||||
THEN
|
||||
prop_state[p].term
|
||||
ELSE
|
||||
acc_state[a].epoch
|
||||
\* Also check whether this entry allows to advance commit_lsn and
|
||||
\* if so, bump it where possible. Modeling this as separate action
|
||||
\* significantly bloats the space (5m vs 15m on max_entries=3 max_term=3,
|
||||
\* so act immediately.
|
||||
entry_owners == {o \in acceptors:
|
||||
/\ o /= a
|
||||
\* only recovered acceptors advance commit_lsn
|
||||
/\ acc_state[o].epoch = prop_state[p].term
|
||||
/\ next_e \in FValues(acc_state[o].wal)} \cup {a}
|
||||
IN
|
||||
/\ acc_state' = [acc_state EXCEPT ![a].wal = WriteEntry(acc_state[a].wal, next_e, new_epoch /= acc_state[a].epoch),
|
||||
![a].epoch = new_epoch]
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].next_send_lsn[a] =
|
||||
prop_state[p].next_send_lsn[a] + 1]
|
||||
/\ commit_lsns' = IF /\ new_epoch = prop_state[p].term
|
||||
/\ Quorum(entry_owners)
|
||||
THEN
|
||||
[acc \in acceptors |->
|
||||
IF /\ acc \in entry_owners
|
||||
/\ next_e.lsn > commit_lsns[acc]
|
||||
THEN
|
||||
next_e.lsn
|
||||
ELSE
|
||||
commit_lsns[acc]]
|
||||
ELSE
|
||||
commit_lsns
|
||||
|
||||
|
||||
\*******************************************************************************
|
||||
\* Final spec
|
||||
\*******************************************************************************
|
||||
|
||||
Next ==
|
||||
\/ \E q \in Quorums: \E p \in proposers: RestartProposer(p, q)
|
||||
\/ \E p \in proposers: \E a \in acceptors: Vote(p, a)
|
||||
\/ \E p \in proposers: BecomeLeader(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: UpdateTerm(p, a)
|
||||
\/ \E p \in proposers: \E a \in acceptors: HandshakeWithLeader(p, a)
|
||||
\/ \E p \in proposers: NewEntry(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: TransferEntry(p, a)
|
||||
|
||||
Spec == Init /\ [][Next]_<<prop_state, acc_state, commit_lsns>>
|
||||
|
||||
|
||||
\********************************************************************************
|
||||
\* Invariants
|
||||
\********************************************************************************
|
||||
|
||||
\* we don't track history, but this property is fairly convincing anyway
|
||||
ElectionSafety ==
|
||||
\A p1, p2 \in proposers:
|
||||
(/\ prop_state[p1].state = "leader"
|
||||
/\ prop_state[p2].state = "leader"
|
||||
/\ prop_state[p1].term = prop_state[p2].term) => (p1 = p2)
|
||||
|
||||
LogIsMonotonic ==
|
||||
\A a \in acceptors:
|
||||
\A i \in DOMAIN acc_state[a].wal: \A j \in DOMAIN acc_state[a].wal:
|
||||
(i > j) => (/\ acc_state[a].wal[i].lsn > acc_state[a].wal[j].lsn
|
||||
/\ acc_state[a].wal[i].epoch >= acc_state[a].wal[j].epoch)
|
||||
|
||||
\* Main invariant: log under commit_lsn must match everywhere.
|
||||
LogSafety ==
|
||||
\A a1 \in acceptors: \A a2 \in acceptors:
|
||||
LET
|
||||
common_len == Min(commit_lsns[a1], commit_lsns[a2])
|
||||
IN
|
||||
SubSeq(acc_state[a1].wal, 1, common_len) = SubSeq(acc_state[a2].wal, 1, common_len)
|
||||
|
||||
\* Next record we are going to push to acceptor must never overwrite committed
|
||||
\* different record.
|
||||
CommittedNotOverwritten ==
|
||||
\A p \in proposers: \A a \in acceptors:
|
||||
(/\ prop_state[p].state = "leader"
|
||||
/\ prop_state[p].term = acc_state[a].term
|
||||
/\ a \in DOMAIN prop_state[p].next_send_lsn) =>
|
||||
LET
|
||||
next_e == NextEntry(p, a)
|
||||
IN
|
||||
(next_e /= NULL) =>
|
||||
((commit_lsns[a] >= next_e.lsn) => (acc_state[a].wal[next_e.lsn] = next_e))
|
||||
|
||||
|
||||
====
|
||||
449
safekeeper/spec/ProposerAcceptorStatic.tla
Normal file
449
safekeeper/spec/ProposerAcceptorStatic.tla
Normal file
@@ -0,0 +1,449 @@
|
||||
---- MODULE ProposerAcceptorStatic ----
|
||||
|
||||
(*
|
||||
The protocol is very similar to Raft. The key differences are:
|
||||
- Leaders (proposers) are separated from storage nodes (acceptors), which has
|
||||
been already an established way to think about Paxos.
|
||||
- We don't want to stamp each log record with term, so instead carry around
|
||||
term histories which are sequences of <term, LSN where term begins> pairs.
|
||||
As a bonus (and subtlety) this allows the proposer to commit entries from
|
||||
previous terms without writing new records -- if acceptor's log is caught
|
||||
up, update of term history on it updates last_log_term as well.
|
||||
*)
|
||||
|
||||
\* Model simplifications:
|
||||
\* - Instant message delivery. Notably, ProposerElected message (TruncateWal action) is not
|
||||
\* delayed, so we don't attempt to truncate WAL when the same wp already appended something
|
||||
\* on the acceptor since common point had been calculated (this should be rejected).
|
||||
\* - old WAL is immediately copied to proposer on its election, without on-demand fetch later.
|
||||
|
||||
\* Some ideas how to break it to play around to get a feeling:
|
||||
\* - replace Quorums with BadQuorums.
|
||||
\* - remove 'don't commit entries from previous terms separately' rule in
|
||||
\* CommitEntries and observe figure 8 from the raft paper.
|
||||
\* With p2a3t4l4 32 steps error was found in 1h on 80 cores.
|
||||
|
||||
EXTENDS Integers, Sequences, FiniteSets, TLC
|
||||
|
||||
VARIABLES
|
||||
prop_state, \* prop_state[p] is state of proposer p
|
||||
acc_state, \* acc_state[a] is state of acceptor a
|
||||
committed, \* bag (set) of ever committed <<term, lsn>> entries
|
||||
elected_history \* counter for elected terms, see TypeOk for details
|
||||
|
||||
CONSTANT
|
||||
acceptors,
|
||||
proposers
|
||||
|
||||
CONSTANT NULL
|
||||
|
||||
\********************************************************************************
|
||||
\* Helpers
|
||||
\********************************************************************************
|
||||
|
||||
Maximum(S) ==
|
||||
(*************************************************************************)
|
||||
(* If S is a set of numbers, then this define Maximum(S) to be the *)
|
||||
(* maximum of those numbers, or -1 if S is empty. *)
|
||||
(*************************************************************************)
|
||||
IF S = {} THEN -1 ELSE CHOOSE n \in S : \A m \in S : n \geq m
|
||||
|
||||
\* minimum of numbers in the set, error if set is empty
|
||||
Minimum(S) == CHOOSE min \in S : \A n \in S : min <= n
|
||||
|
||||
\* Min of two numbers
|
||||
Min(a, b) == IF a < b THEN a ELSE b
|
||||
|
||||
\* Sort of 0 for functions
|
||||
EmptyF == [x \in {} |-> 42]
|
||||
IsEmptyF(f) == DOMAIN f = {}
|
||||
|
||||
\* Set of values (image) of the function f. Apparently no such builtin.
|
||||
Range(f) == {f[x] : x \in DOMAIN f}
|
||||
|
||||
\* If key k is in function f, map it using l, otherwise insert v. Returns the
|
||||
\* updated function.
|
||||
Upsert(f, k, v, l(_)) ==
|
||||
LET new_val == IF k \in DOMAIN f THEN l(f[k]) ELSE v IN
|
||||
(k :> new_val) @@ f
|
||||
|
||||
\*****************
|
||||
|
||||
NumAccs == Cardinality(acceptors)
|
||||
|
||||
\* does acc_set form the quorum?
|
||||
Quorum(acc_set) == Cardinality(acc_set) >= (NumAccs \div 2 + 1)
|
||||
\* all quorums of acceptors
|
||||
Quorums == {subset \in SUBSET acceptors: Quorum(subset)}
|
||||
|
||||
\* For substituting Quorums and seeing what happens.
|
||||
BadQuorum(acc_set) == Cardinality(acc_set) >= (NumAccs \div 2)
|
||||
BadQuorums == {subset \in SUBSET acceptors: BadQuorum(subset)}
|
||||
|
||||
\* flushLsn (end of WAL, i.e. index of next entry) of acceptor a.
|
||||
FlushLsn(a) == Len(acc_state[a].wal) + 1
|
||||
|
||||
\* Typedefs. Note that TLA+ Nat includes zero.
|
||||
Terms == Nat
|
||||
Lsns == Nat
|
||||
|
||||
\********************************************************************************
|
||||
\* Type assertion
|
||||
\********************************************************************************
|
||||
\* Defining sets of all possible tuples and using them in TypeOk in usual
|
||||
\* all-tuples constructor is not practical because such definitions force
|
||||
\* TLC to enumerate them, while they are are horribly enormous
|
||||
\* (TLC screams "Attempted to construct a set with too many elements").
|
||||
\* So instead check types manually.
|
||||
|
||||
|
||||
\* Term history is a sequence of <term, LSN where term begins> pairs.
|
||||
IsTermHistory(th) ==
|
||||
\A th_entry \in Range(th): th_entry.term \in Terms /\ th_entry.lsn \in Lsns
|
||||
|
||||
IsWal(w) ==
|
||||
\A i \in DOMAIN w:
|
||||
/\ i \in Lsns
|
||||
/\ w[i] \in Terms
|
||||
|
||||
TypeOk ==
|
||||
/\ \A p \in proposers:
|
||||
\* '_' in field names hinders pretty printing
|
||||
\* https://github.com/tlaplus/tlaplus/issues/1051
|
||||
\* so use camel case.
|
||||
/\ DOMAIN prop_state[p] = {"state", "term", "votes", "termHistory", "wal", "nextSendLsn"}
|
||||
\* In campaign proposer sends RequestVote and waits for acks;
|
||||
\* in leader he is elected.
|
||||
/\ prop_state[p].state \in {"campaign", "leader"}
|
||||
\* term for which it will campaign, or won term in leader state
|
||||
/\ prop_state[p].term \in Terms
|
||||
\* votes received
|
||||
/\ \A voter \in DOMAIN prop_state[p].votes: voter \in acceptors
|
||||
/\ \A vote \in Range(prop_state[p].votes):
|
||||
/\ IsTermHistory(vote.termHistory)
|
||||
/\ vote.flushLsn \in Lsns
|
||||
\* Proposer's term history. Empty while proposer is in "campaign".
|
||||
/\ IsTermHistory(prop_state[p].termHistory)
|
||||
\* In the model we identify WAL entries only by <term, LSN> pairs
|
||||
\* without additional unique id, which is enough for its purposes.
|
||||
\* It means that with term history fully modeled wal becomes
|
||||
\* redundant as it can be computed from term history + WAL length.
|
||||
\* However, we still keep it here and at acceptors as explicit sequence
|
||||
\* where index is LSN and value is the term to avoid artificial mapping to
|
||||
\* figure out real entries. It shouldn't bloat model much because this
|
||||
\* doesn't increase number of distinct states.
|
||||
/\ IsWal(prop_state[p].wal)
|
||||
\* Map of acceptor -> next lsn to send. It is set when truncate_wal is
|
||||
\* done so sending entries is allowed only after that. In the impl TCP
|
||||
\* ensures this ordering.
|
||||
/\ \A a \in DOMAIN prop_state[p].nextSendLsn:
|
||||
/\ a \in acceptors
|
||||
/\ prop_state[p].nextSendLsn[a] \in Lsns
|
||||
/\ \A a \in acceptors:
|
||||
/\ DOMAIN acc_state[a] = {"term", "termHistory", "wal"}
|
||||
/\ acc_state[a].term \in Terms
|
||||
/\ IsTermHistory(acc_state[a].termHistory)
|
||||
/\ IsWal(acc_state[a].wal)
|
||||
/\ \A c \in committed:
|
||||
/\ c.term \in Terms
|
||||
/\ c.lsn \in Lsns
|
||||
\* elected_history is a retrospective map of term -> number of times it was
|
||||
\* elected, for use in ElectionSafetyFull invariant. For static spec it is
|
||||
\* fairly convincing that it holds, but with membership change it is less
|
||||
\* trivial. And as we identify log entries only with <term, lsn>, importance
|
||||
\* of it is quite high as violation of log safety might go undetected if
|
||||
\* election safety is violated. Note though that this is not always the
|
||||
\* case, i.e. you can imagine (and TLC should find) schedule where log
|
||||
\* safety violation is still detected because two leaders with the same term
|
||||
\* commit histories which are different in previous terms, so it is not that
|
||||
\* crucial. Plus if spec allows ElectionSafetyFull violation, likely
|
||||
\* ElectionSafety will also be violated in some schedules. But neither it
|
||||
\* should bloat the model too much.
|
||||
/\ \A term \in DOMAIN elected_history:
|
||||
/\ term \in Terms
|
||||
/\ elected_history[term] \in Nat
|
||||
|
||||
\********************************************************************************
|
||||
\* Initial
|
||||
\********************************************************************************
|
||||
|
||||
Init ==
|
||||
/\ prop_state = [p \in proposers |-> [
|
||||
state |-> "campaign",
|
||||
term |-> 1,
|
||||
votes |-> EmptyF,
|
||||
termHistory |-> << >>,
|
||||
wal |-> << >>,
|
||||
nextSendLsn |-> EmptyF
|
||||
]]
|
||||
/\ acc_state = [a \in acceptors |-> [
|
||||
\* There will be no leader in zero term, 1 is the first
|
||||
\* real.
|
||||
term |-> 0,
|
||||
\* Again, leader in term 0 doesn't exist, but we initialize
|
||||
\* term histories with it to always have common point in
|
||||
\* them. Lsn is 1 because TLA+ sequences are indexed from 1
|
||||
\* (we don't want to truncate WAL out of range).
|
||||
termHistory |-> << [term |-> 0, lsn |-> 1] >>,
|
||||
wal |-> << >>
|
||||
]]
|
||||
/\ committed = {}
|
||||
/\ elected_history = EmptyF
|
||||
|
||||
|
||||
\********************************************************************************
|
||||
\* Actions
|
||||
\********************************************************************************
|
||||
|
||||
\* Proposer loses all state.
|
||||
\* For simplicity (and to reduct state space), we assume it immediately gets
|
||||
\* current state from quorum q of acceptors determining the term he will request
|
||||
\* to vote for.
|
||||
RestartProposer(p, q) ==
|
||||
/\ Quorum(q)
|
||||
/\ LET new_term == Maximum({acc_state[a].term : a \in q}) + 1 IN
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].state = "campaign",
|
||||
![p].term = new_term,
|
||||
![p].votes = EmptyF,
|
||||
![p].termHistory = << >>,
|
||||
![p].wal = << >>,
|
||||
![p].nextSendLsn = EmptyF]
|
||||
/\ UNCHANGED <<acc_state, committed, elected_history>>
|
||||
|
||||
\* Term history of acceptor a's WAL: the one saved truncated to contain only <=
|
||||
\* local FlushLsn entries.
|
||||
AcceptorTermHistory(a) ==
|
||||
SelectSeq(acc_state[a].termHistory, LAMBDA th_entry: th_entry.lsn <= FlushLsn(a))
|
||||
|
||||
\* Acceptor a immediately votes for proposer p.
|
||||
Vote(p, a) ==
|
||||
/\ prop_state[p].state = "campaign"
|
||||
/\ acc_state[a].term < prop_state[p].term \* main voting condition
|
||||
/\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term]
|
||||
/\ LET
|
||||
vote == [termHistory |-> AcceptorTermHistory(a), flushLsn |-> FlushLsn(a)]
|
||||
IN
|
||||
prop_state' = [prop_state EXCEPT ![p].votes = (a :> vote) @@ prop_state[p].votes]
|
||||
/\ UNCHANGED <<committed, elected_history>>
|
||||
|
||||
|
||||
\* Get lastLogTerm from term history th.
|
||||
LastLogTerm(th) == th[Len(th)].term
|
||||
|
||||
\* Proposer p gets elected.
|
||||
BecomeLeader(p) ==
|
||||
/\ prop_state[p].state = "campaign"
|
||||
/\ Quorum(DOMAIN prop_state[p].votes)
|
||||
/\ LET
|
||||
\* Find acceptor with the highest <last_log_term, lsn> vote.
|
||||
max_vote_acc ==
|
||||
CHOOSE a \in DOMAIN prop_state[p].votes:
|
||||
LET v == prop_state[p].votes[a]
|
||||
IN \A v2 \in Range(prop_state[p].votes):
|
||||
/\ LastLogTerm(v.termHistory) >= LastLogTerm(v2.termHistory)
|
||||
/\ (LastLogTerm(v.termHistory) = LastLogTerm(v2.termHistory) => v.flushLsn >= v2.flushLsn)
|
||||
max_vote == prop_state[p].votes[max_vote_acc]
|
||||
prop_th == Append(max_vote.termHistory, [term |-> prop_state[p].term, lsn |-> max_vote.flushLsn])
|
||||
IN
|
||||
\* We copy all log preceding proposer's term from the max vote node so
|
||||
\* make sure it is still on one term with us. This is a model
|
||||
\* simplification which can be removed, in impl we fetch WAL on demand
|
||||
\* from safekeeper which has it later. Note though that in case of on
|
||||
\* demand fetch we must check on donor not only term match, but that
|
||||
\* truncate_wal had already been done (if it is not max_vote_acc).
|
||||
/\ acc_state[max_vote_acc].term = prop_state[p].term
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].state = "leader",
|
||||
![p].termHistory = prop_th,
|
||||
![p].wal = acc_state[max_vote_acc].wal
|
||||
]
|
||||
/\ elected_history' = Upsert(elected_history, prop_state[p].term, 1, LAMBDA c: c + 1)
|
||||
/\ UNCHANGED <<acc_state, committed>>
|
||||
|
||||
|
||||
\* Acceptor a learns about elected proposer p's term. In impl it matches to
|
||||
\* VoteRequest/VoteResponse exchange when leader is already elected and is not
|
||||
\* interested in the vote result.
|
||||
UpdateTerm(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ acc_state[a].term < prop_state[p].term
|
||||
/\ acc_state' = [acc_state EXCEPT ![a].term = prop_state[p].term]
|
||||
/\ UNCHANGED <<prop_state, committed, elected_history>>
|
||||
|
||||
\* Find highest common point (LSN of the first divergent record) in the logs of
|
||||
\* proposer p and acceptor a. Returns <term, lsn> of the highest common point.
|
||||
FindHighestCommonPoint(prop_th, acc_th, acc_flush_lsn) ==
|
||||
LET
|
||||
\* First find index of the highest common term.
|
||||
\* It must exist because we initialize th with <0, 1>.
|
||||
last_common_idx == Maximum({i \in 1..Min(Len(prop_th), Len(acc_th)): prop_th[i].term = acc_th[i].term})
|
||||
last_common_term == prop_th[last_common_idx].term
|
||||
\* Now find where it ends at both prop and acc and take min. End of term
|
||||
\* is the start of the next unless it is the last one; there it is
|
||||
\* flush_lsn in case of acceptor. In case of proposer it is the current
|
||||
\* writing position, but it can't be less than flush_lsn, so we
|
||||
\* take flush_lsn.
|
||||
acc_common_term_end == IF last_common_idx = Len(acc_th) THEN acc_flush_lsn ELSE acc_th[last_common_idx + 1].lsn
|
||||
prop_common_term_end == IF last_common_idx = Len(prop_th) THEN acc_flush_lsn ELSE prop_th[last_common_idx + 1].lsn
|
||||
IN
|
||||
[term |-> last_common_term, lsn |-> Min(acc_common_term_end, prop_common_term_end)]
|
||||
|
||||
\* Elected proposer p immediately truncates WAL (and term history) of acceptor a
|
||||
\* before starting streaming. Establishes nextSendLsn for a.
|
||||
\*
|
||||
\* In impl this happens at each reconnection, here we also allow to do it multiple times.
|
||||
TruncateWal(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ acc_state[a].term = prop_state[p].term
|
||||
/\ LET
|
||||
hcp == FindHighestCommonPoint(prop_state[p].termHistory, AcceptorTermHistory(a), FlushLsn(a))
|
||||
next_send_lsn == (a :> hcp.lsn) @@ prop_state[p].nextSendLsn
|
||||
IN
|
||||
\* Acceptor persists full history immediately; reads adjust it to the
|
||||
\* really existing wal with AcceptorTermHistory.
|
||||
/\ acc_state' = [acc_state EXCEPT ![a].termHistory = prop_state[p].termHistory,
|
||||
\* note: SubSeq is inclusive, hence -1.
|
||||
![a].wal = SubSeq(acc_state[a].wal, 1, hcp.lsn - 1)
|
||||
]
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].nextSendLsn = next_send_lsn]
|
||||
/\ UNCHANGED <<committed, elected_history>>
|
||||
|
||||
\* Append new log entry to elected proposer
|
||||
NewEntry(p) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ LET
|
||||
\* entry consists only of term, index serves as LSN.
|
||||
new_entry == prop_state[p].term
|
||||
IN
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].wal = Append(prop_state[p].wal, new_entry)]
|
||||
/\ UNCHANGED <<acc_state, committed, elected_history>>
|
||||
|
||||
\* Immediately append next entry from elected proposer to acceptor a.
|
||||
AppendEntry(p, a) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ acc_state[a].term = prop_state[p].term
|
||||
/\ a \in DOMAIN prop_state[p].nextSendLsn \* did TruncateWal
|
||||
/\ prop_state[p].nextSendLsn[a] <= Len(prop_state[p].wal) \* have smth to send
|
||||
/\ LET
|
||||
send_lsn == prop_state[p].nextSendLsn[a]
|
||||
entry == prop_state[p].wal[send_lsn]
|
||||
\* Since message delivery is instant we don't check that send_lsn follows
|
||||
\* the last acc record, it must always be true.
|
||||
IN
|
||||
/\ prop_state' = [prop_state EXCEPT ![p].nextSendLsn[a] = send_lsn + 1]
|
||||
/\ acc_state' = [acc_state EXCEPT ![a].wal = Append(acc_state[a].wal, entry)]
|
||||
/\ UNCHANGED <<committed, elected_history>>
|
||||
|
||||
\* LSN where elected proposer p starts writing its records.
|
||||
PropStartLsn(p) ==
|
||||
IF prop_state[p].state = "leader" THEN prop_state[p].termHistory[Len(prop_state[p].termHistory)].lsn ELSE NULL
|
||||
|
||||
\* Proposer p commits all entries it can using quorum q. Note that unlike
|
||||
\* will62794/logless-reconfig this allows to commit entries from previous terms
|
||||
\* (when conditions for that are met).
|
||||
CommitEntries(p, q) ==
|
||||
/\ prop_state[p].state = "leader"
|
||||
/\ \A a \in q:
|
||||
/\ acc_state[a].term = prop_state[p].term
|
||||
\* nextSendLsn existence means TruncateWal has happened, it ensures
|
||||
\* acceptor's WAL (and FlushLsn) are from proper proposer's history.
|
||||
\* Alternatively we could compare LastLogTerm here, but that's closer to
|
||||
\* what we do in the impl (we check flushLsn in AppendResponse, but
|
||||
\* AppendRequest is processed only if HandleElected handling was good).
|
||||
/\ a \in DOMAIN prop_state[p].nextSendLsn
|
||||
\* Now find the LSN present on all the quorum.
|
||||
/\ LET quorum_lsn == Minimum({FlushLsn(a): a \in q}) IN
|
||||
\* This is the basic Raft rule of not committing entries from previous
|
||||
\* terms except along with current term entry (commit them only when
|
||||
\* quorum recovers, i.e. last_log_term on it reaches leader's term).
|
||||
/\ quorum_lsn >= PropStartLsn(p)
|
||||
/\ committed' = committed \cup {[term |-> prop_state[p].wal[lsn], lsn |-> lsn]: lsn \in 1..(quorum_lsn - 1)}
|
||||
/\ UNCHANGED <<prop_state, acc_state, elected_history>>
|
||||
|
||||
\*******************************************************************************
|
||||
\* Final spec
|
||||
\*******************************************************************************
|
||||
|
||||
Next ==
|
||||
\/ \E q \in Quorums: \E p \in proposers: RestartProposer(p, q)
|
||||
\/ \E p \in proposers: \E a \in acceptors: Vote(p, a)
|
||||
\/ \E p \in proposers: BecomeLeader(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: UpdateTerm(p, a)
|
||||
\/ \E p \in proposers: \E a \in acceptors: TruncateWal(p, a)
|
||||
\/ \E p \in proposers: NewEntry(p)
|
||||
\/ \E p \in proposers: \E a \in acceptors: AppendEntry(p, a)
|
||||
\/ \E q \in Quorums: \E p \in proposers: CommitEntries(p, q)
|
||||
|
||||
Spec == Init /\ [][Next]_<<prop_state, acc_state, committed, elected_history>>
|
||||
|
||||
|
||||
\********************************************************************************
|
||||
\* Invariants
|
||||
\********************************************************************************
|
||||
|
||||
\* Lighter version of ElectionSafetyFull which doesn't require elected_history.
|
||||
ElectionSafety ==
|
||||
\A p1, p2 \in proposers:
|
||||
(/\ prop_state[p1].state = "leader"
|
||||
/\ prop_state[p2].state = "leader"
|
||||
/\ prop_state[p1].term = prop_state[p2].term) => (p1 = p2)
|
||||
|
||||
\* Single term must never be elected more than once.
|
||||
ElectionSafetyFull == \A term \in DOMAIN elected_history: elected_history[term] <= 1
|
||||
|
||||
\* Log is expected to be monotonic by <term, lsn> comparison. This is not true
|
||||
\* in variants of multi Paxos, but in Raft (and here) it is.
|
||||
LogIsMonotonic ==
|
||||
\A a \in acceptors:
|
||||
\A i, j \in DOMAIN acc_state[a].wal:
|
||||
(i > j) => (acc_state[a].wal[i] >= acc_state[a].wal[j])
|
||||
|
||||
\* Main invariant: If two entries are committed at the same LSN, they must be
|
||||
\* the same entry.
|
||||
LogSafety ==
|
||||
\A c1, c2 \in committed: (c1.lsn = c2.lsn) => (c1 = c2)
|
||||
|
||||
|
||||
\********************************************************************************
|
||||
\* Invariants which don't need to hold, but useful for playing/debugging.
|
||||
\********************************************************************************
|
||||
|
||||
\* Limits term of elected proposers
|
||||
MaxTerm == \A p \in proposers: (prop_state[p].state = "leader" => prop_state[p].term < 2)
|
||||
|
||||
MaxAccWalLen == \A a \in acceptors: Len(acc_state[a].wal) < 2
|
||||
|
||||
\* Limits max number of committed entries. That way we can check that we'are
|
||||
\* actually committing something.
|
||||
MaxCommitLsn == Cardinality(committed) < 2
|
||||
|
||||
\* How many records with different terms can be removed in single WAL
|
||||
\* truncation.
|
||||
MaxTruncatedTerms ==
|
||||
\A p \in proposers: \A a \in acceptors:
|
||||
(/\ prop_state[p].state = "leader"
|
||||
/\ prop_state[p].term = acc_state[a].term) =>
|
||||
LET
|
||||
hcp == FindHighestCommonPoint(prop_state[p].termHistory, AcceptorTermHistory(a), FlushLsn(a))
|
||||
truncated_lsns == {lsn \in DOMAIN acc_state[a].wal: lsn >= hcp.lsn}
|
||||
truncated_records_terms == {acc_state[a].wal[lsn]: lsn \in truncated_lsns}
|
||||
IN
|
||||
Cardinality(truncated_records_terms) < 2
|
||||
|
||||
\* Check that TruncateWal never deletes committed record.
|
||||
\* It might seem that this should an invariant, but it is not.
|
||||
\* With 5 nodes, it is legit to truncate record which had been
|
||||
\* globally committed: e.g. nodes abc can commit record of term 1 in
|
||||
\* term 3, and after that leader of term 2 can delete such record
|
||||
\* on d. On 10 cores TLC can find such a trace in ~7 hours.
|
||||
CommittedNotTruncated ==
|
||||
\A p \in proposers: \A a \in acceptors:
|
||||
(/\ prop_state[p].state = "leader"
|
||||
/\ prop_state[p].term = acc_state[a].term) =>
|
||||
LET
|
||||
hcp == FindHighestCommonPoint(prop_state[p].termHistory, AcceptorTermHistory(a), FlushLsn(a))
|
||||
truncated_lsns == {lsn \in DOMAIN acc_state[a].wal: lsn >= hcp.lsn}
|
||||
truncated_records == {[term |-> acc_state[a].wal[lsn], lsn |-> lsn]: lsn \in truncated_lsns}
|
||||
IN
|
||||
\A r \in truncated_records: r \notin committed
|
||||
|
||||
====
|
||||
49
safekeeper/spec/modelcheck.sh
Executable file
49
safekeeper/spec/modelcheck.sh
Executable file
@@ -0,0 +1,49 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Usage: ./modelcheck.sh <config_file> <spec_file>, e.g.
|
||||
# ./modelcheck.sh models/MCProposerAcceptorStatic_p2_a3_t3_l3.cfg MCProposerAcceptorStatic.tla
|
||||
CONFIG=$1
|
||||
SPEC=$2
|
||||
|
||||
MEM=7G
|
||||
TOOLSPATH="/opt/TLA+Toolbox/tla2tools.jar"
|
||||
|
||||
mkdir -p "tlc-results"
|
||||
CONFIG_FILE=$(basename -- "$CONFIG")
|
||||
outfilename="$SPEC-${CONFIG_FILE}-$(date --utc +%Y-%m-%d--%H-%M-%S)".log
|
||||
outfile="tlc-results/$outfilename"
|
||||
touch $outfile
|
||||
|
||||
# Save some info about the run.
|
||||
GIT_REV=`git rev-parse --short HEAD`
|
||||
INFO=`uname -a`
|
||||
|
||||
# First for Linux, second for Mac.
|
||||
CPUNAMELinux=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1')
|
||||
CPUCORESLinux=`nproc`
|
||||
CPUNAMEMac=`sysctl -n machdep.cpu.brand_string`
|
||||
CPUCORESMac=`sysctl -n machdep.cpu.thread_count`
|
||||
|
||||
echo "git revision: $GIT_REV" >> $outfile
|
||||
echo "Platform: $INFO" >> $outfile
|
||||
echo "CPU Info Linux: $CPUNAMELinux" >> $outfile
|
||||
echo "CPU Cores Linux: $CPUCORESLinux" >> $outfile
|
||||
echo "CPU Info Mac: $CPUNAMEMac" >> $outfile
|
||||
echo "CPU Cores Mac: $CPUCORESMac" >> $outfile
|
||||
echo "Spec: $SPEC" >> $outfile
|
||||
echo "Config: $CONFIG" >> $outfile
|
||||
echo "----" >> $outfile
|
||||
cat $CONFIG >> $outfile
|
||||
echo "" >> $outfile
|
||||
echo "----" >> $outfile
|
||||
echo "" >> $outfile
|
||||
|
||||
# see
|
||||
# https://lamport.azurewebsites.net/tla/current-tools.pdf
|
||||
# for TLC options.
|
||||
# OffHeapDiskFPSet is the optimal fingerprint set implementation
|
||||
# https://docs.tlapl.us/codebase:architecture#fingerprint_sets_fpsets
|
||||
#
|
||||
# Add -simulate to run in infinite simulation mode.
|
||||
java -Xmx$MEM -XX:MaxDirectMemorySize=$MEM -XX:+UseParallelGC -Dtlc2.tool.fp.FPSet.impl=tlc2.tool.fp.OffHeapDiskFPSet \
|
||||
-cp "${TOOLSPATH}" tlc2.TLC $SPEC -config $CONFIG -workers auto -gzip | tee -a $outfile
|
||||
@@ -0,0 +1,19 @@
|
||||
\* A very small model just to play.
|
||||
CONSTANTS
|
||||
NULL = NULL
|
||||
proposers = {p1, p2}
|
||||
acceptors = {a1, a2, a3}
|
||||
max_term = 2
|
||||
max_entries = 2
|
||||
SPECIFICATION Spec
|
||||
CONSTRAINT StateConstraint
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ElectionSafetyFull
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
CommittedNotTruncated
|
||||
SYMMETRY ProposerAcceptorSymmetry
|
||||
CHECK_DEADLOCK FALSE
|
||||
ALIAS Alias
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
\* A model next to the smallest one.
|
||||
CONSTANTS
|
||||
NULL = NULL
|
||||
proposers = {p1, p2}
|
||||
acceptors = {a1, a2, a3}
|
||||
max_term = 3
|
||||
max_entries = 2
|
||||
SPECIFICATION Spec
|
||||
CONSTRAINT StateConstraint
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ElectionSafetyFull
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
CommittedNotTruncated
|
||||
SYMMETRY ProposerAcceptorSymmetry
|
||||
CHECK_DEADLOCK FALSE
|
||||
ALIAS Alias
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
CONSTANTS
|
||||
NULL = NULL
|
||||
proposers = {p1, p2}
|
||||
acceptors = {a1, a2, a3}
|
||||
max_term = 3
|
||||
max_entries = 3
|
||||
SPECIFICATION Spec
|
||||
CONSTRAINT StateConstraint
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ElectionSafety
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
CommittedNotTruncated
|
||||
SYMMETRY ProposerAcceptorSymmetry
|
||||
CHECK_DEADLOCK FALSE
|
||||
ALIAS Alias
|
||||
@@ -0,0 +1,17 @@
|
||||
CONSTANTS
|
||||
NULL = NULL
|
||||
proposers = {p1, p2}
|
||||
acceptors = {a1, a2, a3}
|
||||
max_term = 4
|
||||
max_entries = 4
|
||||
SPECIFICATION Spec
|
||||
CONSTRAINT StateConstraint
|
||||
INVARIANT
|
||||
TypeOk
|
||||
ElectionSafety
|
||||
LogIsMonotonic
|
||||
LogSafety
|
||||
CommittedNotTruncated
|
||||
SYMMETRY ProposerAcceptorSymmetry
|
||||
CHECK_DEADLOCK FALSE
|
||||
ALIAS Alias
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user