mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-15 16:40:37 +00:00
Compare commits
83 Commits
skyzh/sha2
...
jcsp/issue
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
82bf77c1ae | ||
|
|
122c89f98b | ||
|
|
24d137a524 | ||
|
|
94b4d3cc78 | ||
|
|
95967f07b3 | ||
|
|
af24b63cd3 | ||
|
|
df8d888d90 | ||
|
|
03c6039707 | ||
|
|
c6d5ff944d | ||
|
|
4b97683338 | ||
|
|
affc18f912 | ||
|
|
3ef6e21211 | ||
|
|
1075386d77 | ||
|
|
c3dd646ab3 | ||
|
|
bc78b0e9cc | ||
|
|
f342b87f30 | ||
|
|
438bacc32e | ||
|
|
bc5c2d00cb | ||
|
|
1a2a3cb446 | ||
|
|
e0871a49f4 | ||
|
|
c2e1b9d94b | ||
|
|
eb732d38aa | ||
|
|
e28122326c | ||
|
|
4eedb3b6f1 | ||
|
|
e67fcf9563 | ||
|
|
82960b2175 | ||
|
|
30d15ad403 | ||
|
|
b6ee91835b | ||
|
|
df0f1e359b | ||
|
|
cd0e344938 | ||
|
|
22afaea6e1 | ||
|
|
ba20752b76 | ||
|
|
3a6fa76828 | ||
|
|
9ffb852359 | ||
|
|
972470b174 | ||
|
|
1412e9b3e8 | ||
|
|
be0c73f8e7 | ||
|
|
7f51764001 | ||
|
|
4d8a10af1c | ||
|
|
55ba885f6b | ||
|
|
6ff74295b5 | ||
|
|
bbe730d7ca | ||
|
|
5a0da93c53 | ||
|
|
d9dcbffac3 | ||
|
|
f50ff14560 | ||
|
|
b58a615197 | ||
|
|
1a1d527875 | ||
|
|
216fc5ba7b | ||
|
|
4270e86eb2 | ||
|
|
6351313ae9 | ||
|
|
95098c3216 | ||
|
|
d7c68dc981 | ||
|
|
6206f76419 | ||
|
|
d7f34bc339 | ||
|
|
86905c1322 | ||
|
|
0b02043ba4 | ||
|
|
873b222080 | ||
|
|
13d9589c35 | ||
|
|
be1a88e574 | ||
|
|
b9fd8dcf13 | ||
|
|
5ea117cddf | ||
|
|
2682e0254f | ||
|
|
41fb838799 | ||
|
|
107f535294 | ||
|
|
39c712f2ca | ||
|
|
ab10523cc1 | ||
|
|
d5399b729b | ||
|
|
b06eec41fa | ||
|
|
ca154d9cd8 | ||
|
|
1173ee6a7e | ||
|
|
21e1a496a3 | ||
|
|
0457980728 | ||
|
|
8728d5a5fd | ||
|
|
a4a4d78993 | ||
|
|
870786bd82 | ||
|
|
b6d547cf92 | ||
|
|
e3a2631df9 | ||
|
|
02d42861e4 | ||
|
|
586e77bb24 | ||
|
|
b827e7b330 | ||
|
|
26b1483204 | ||
|
|
d709bcba81 | ||
|
|
b158a5eda0 |
@@ -1,2 +1,2 @@
|
||||
[profile.default]
|
||||
slow-timeout = { period = "20s", terminate-after = 3 }
|
||||
slow-timeout = { period = "60s", terminate-after = 3 }
|
||||
|
||||
5
.github/actionlint.yml
vendored
5
.github/actionlint.yml
vendored
@@ -1,12 +1,11 @@
|
||||
self-hosted-runner:
|
||||
labels:
|
||||
- arm64
|
||||
- dev
|
||||
- gen3
|
||||
- large
|
||||
# Remove `macos-14` from the list after https://github.com/rhysd/actionlint/pull/392 is merged.
|
||||
- macos-14
|
||||
- large-arm64
|
||||
- small
|
||||
- small-arm64
|
||||
- us-east-2
|
||||
config-variables:
|
||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "dev", "{0}"]', matrix.arch)) }}
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "gen3", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
|
||||
env:
|
||||
IMAGE_TAG: ${{ inputs.image-tag }}
|
||||
|
||||
21
.github/workflows/build_and_test.yml
vendored
21
.github/workflows/build_and_test.yml
vendored
@@ -341,6 +341,9 @@ jobs:
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
#nextest does not yet support running doctests
|
||||
cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
||||
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
|
||||
done
|
||||
@@ -543,9 +546,27 @@ jobs:
|
||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||
# while coverage is currently collected for the debug ones
|
||||
|
||||
report-benchmarks-failures:
|
||||
needs: [ benchmarks, create-test-report ]
|
||||
if: github.ref_name == 'main' && needs.benchmarks.result == 'failure'
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: C060CNA47S9 # on-call-staging-storage-stream
|
||||
slack-message: |
|
||||
Benchmarks failed on main: ${{ github.event.head_commit.url }}
|
||||
|
||||
Allure report: ${{ needs.create-test-report.outputs.report-url }}
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
create-test-report:
|
||||
needs: [ check-permissions, regress-tests, coverage-report, benchmarks, build-build-tools-image ]
|
||||
if: ${{ !cancelled() && contains(fromJSON('["skipped", "success"]'), needs.check-permissions.result) }}
|
||||
outputs:
|
||||
report-url: ${{ steps.create-allure-report.outputs.report-url }}
|
||||
|
||||
runs-on: [ self-hosted, gen3, small ]
|
||||
container:
|
||||
|
||||
33
.github/workflows/neon_extra_builds.yml
vendored
33
.github/workflows/neon_extra_builds.yml
vendored
@@ -136,7 +136,7 @@ jobs:
|
||||
check-linux-arm-build:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
timeout-minutes: 90
|
||||
runs-on: [ self-hosted, dev, arm64 ]
|
||||
runs-on: [ self-hosted, small-arm64 ]
|
||||
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
@@ -232,20 +232,20 @@ jobs:
|
||||
|
||||
- name: Run cargo build
|
||||
run: |
|
||||
mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests
|
||||
mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests -j$(nproc)
|
||||
|
||||
- name: Run cargo test
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
cargo nextest run $CARGO_FEATURES
|
||||
cargo nextest run $CARGO_FEATURES -j$(nproc)
|
||||
|
||||
# Run separate tests for real S3
|
||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
cargo nextest run --package remote_storage --test test_real_s3
|
||||
cargo nextest run --package remote_storage --test test_real_s3 -j$(nproc)
|
||||
|
||||
# Run separate tests for real Azure Blob Storage
|
||||
# XXX: replace region with `eu-central-1`-like region
|
||||
@@ -255,12 +255,12 @@ jobs:
|
||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
cargo nextest run --package remote_storage --test test_real_azure
|
||||
cargo nextest run --package remote_storage --test test_real_azure -j$(nproc)
|
||||
|
||||
check-codestyle-rust-arm:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
timeout-minutes: 90
|
||||
runs-on: [ self-hosted, dev, arm64 ]
|
||||
runs-on: [ self-hosted, small-arm64 ]
|
||||
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
@@ -269,6 +269,11 @@ jobs:
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [ debug, release ]
|
||||
|
||||
steps:
|
||||
- name: Fix git ownership
|
||||
run: |
|
||||
@@ -305,31 +310,35 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
|
||||
|
||||
- name: Run cargo clippy (debug)
|
||||
if: matrix.build_type == 'debug'
|
||||
run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
|
||||
- name: Run cargo clippy (release)
|
||||
if: matrix.build_type == 'release'
|
||||
run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS
|
||||
|
||||
- name: Check documentation generation
|
||||
run: cargo doc --workspace --no-deps --document-private-items
|
||||
if: matrix.build_type == 'release'
|
||||
run: cargo doc --workspace --no-deps --document-private-items -j$(nproc)
|
||||
env:
|
||||
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
|
||||
|
||||
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
|
||||
- name: Check formatting
|
||||
if: ${{ !cancelled() }}
|
||||
if: ${{ !cancelled() && matrix.build_type == 'release' }}
|
||||
run: cargo fmt --all -- --check
|
||||
|
||||
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
|
||||
- name: Check rust dependencies
|
||||
if: ${{ !cancelled() }}
|
||||
if: ${{ !cancelled() && matrix.build_type == 'release' }}
|
||||
run: |
|
||||
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
|
||||
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
|
||||
|
||||
# https://github.com/EmbarkStudios/cargo-deny
|
||||
- name: Check rust licenses/bans/advisories/sources
|
||||
if: ${{ !cancelled() }}
|
||||
if: ${{ !cancelled() && matrix.build_type == 'release' }}
|
||||
run: cargo deny check
|
||||
|
||||
gather-rust-build-stats:
|
||||
@@ -338,7 +347,7 @@ jobs:
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
runs-on: [ self-hosted, gen3, large ]
|
||||
runs-on: [ self-hosted, large ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
credentials:
|
||||
@@ -369,7 +378,7 @@ jobs:
|
||||
run: make walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Produce the build stats
|
||||
run: cargo build --all --release --timings
|
||||
run: cargo build --all --release --timings -j$(nproc)
|
||||
|
||||
- name: Upload the build stats
|
||||
id: upload-stats
|
||||
|
||||
91
Cargo.lock
generated
91
Cargo.lock
generated
@@ -25,9 +25,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.8.9"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d713b3834d76b85304d4d525563c1276e2e30dc97cc67bfb4585a4a29fc2c89f"
|
||||
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"const-random",
|
||||
@@ -284,9 +284,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "aws-config"
|
||||
version = "1.1.4"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b30c39ebe61f75d1b3785362b1586b41991873c9ab3e317a9181c246fb71d82"
|
||||
checksum = "baaa0be6ee7d90b775ae6ccb6d2ba182b91219ec2001f92338773a094246af1d"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
@@ -309,14 +309,15 @@ dependencies = [
|
||||
"time",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"url",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-credential-types"
|
||||
version = "1.1.8"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa8587ae17c8e967e4b05a62d495be2fb7701bec52a97f7acfe8a29f938384c8"
|
||||
checksum = "e16838e6c9e12125face1c1eff1343c75e3ff540de98ff7ebd61874a89bcfeb9"
|
||||
dependencies = [
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-runtime-api",
|
||||
@@ -326,9 +327,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-runtime"
|
||||
version = "1.1.8"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b13dc54b4b49f8288532334bba8f87386a40571c47c37b1304979b556dc613c8"
|
||||
checksum = "785da4a15e7b166b505fd577e4560c7a7cd8fbdf842eb1336cbcbf8944ce56f1"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-sigv4",
|
||||
@@ -373,10 +374,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-s3"
|
||||
version = "1.14.0"
|
||||
version = "1.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "951f7730f51a2155c711c85c79f337fbc02a577fa99d2a0a8059acfce5392113"
|
||||
checksum = "7bc5ce518d4b8d16e0408de7bdf1b3097cec61a7daa979750a208f8d9934386d"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-sigv4",
|
||||
@@ -391,20 +393,25 @@ dependencies = [
|
||||
"aws-smithy-xml",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"fastrand 2.0.0",
|
||||
"hex",
|
||||
"hmac",
|
||||
"http 0.2.9",
|
||||
"http-body 0.4.5",
|
||||
"lru",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"regex-lite",
|
||||
"sha2",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-sso"
|
||||
version = "1.12.0"
|
||||
version = "1.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f486420a66caad72635bc2ce0ff6581646e0d32df02aa39dc983bfe794955a5b"
|
||||
checksum = "ca3d6c4cba4e009391b72b0fcf12aff04ea3c9c3aa2ecaafa330326a8bd7e601"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
@@ -424,9 +431,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-ssooidc"
|
||||
version = "1.12.0"
|
||||
version = "1.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39ddccf01d82fce9b4a15c8ae8608211ee7db8ed13a70b514bbfe41df3d24841"
|
||||
checksum = "73400dc239d14f63d932f4ca7b55af5e9ef1f857f7d70655249ccc287adb2570"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
@@ -446,9 +453,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-sts"
|
||||
version = "1.12.0"
|
||||
version = "1.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a591f8c7e6a621a501b2b5d2e88e1697fcb6274264523a6ad4d5959889a41ce"
|
||||
checksum = "10f8858308af76fba3e5ffcf1bb56af5471574d2bdfaf0159470c25bc2f760e5"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
@@ -469,9 +476,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sigv4"
|
||||
version = "1.2.0"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11d6f29688a4be9895c0ba8bef861ad0c0dac5c15e9618b9b7a6c233990fc263"
|
||||
checksum = "58b56f1cbe6fd4d0c2573df72868f20ab1c125ca9c9dbce17927a463433a2e57"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-smithy-eventstream",
|
||||
@@ -498,9 +505,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-async"
|
||||
version = "1.1.8"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d26ea8fa03025b2face2b3038a63525a10891e3d8829901d502e5384a0d8cd46"
|
||||
checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
@@ -509,9 +516,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-checksums"
|
||||
version = "0.60.4"
|
||||
version = "0.60.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be2acd1b9c6ae5859999250ed5a62423aedc5cf69045b844432de15fa2f31f2b"
|
||||
checksum = "83fa43bc04a6b2441968faeab56e68da3812f978a670a5db32accbdcafddd12f"
|
||||
dependencies = [
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
@@ -541,9 +548,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-http"
|
||||
version = "0.60.7"
|
||||
version = "0.60.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f10fa66956f01540051b0aa7ad54574640f748f9839e843442d99b970d3aff9"
|
||||
checksum = "4a7de001a1b9a25601016d8057ea16e31a45fdca3751304c8edf4ad72e706c08"
|
||||
dependencies = [
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-runtime-api",
|
||||
@@ -581,9 +588,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-runtime"
|
||||
version = "1.1.8"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec81002d883e5a7fd2bb063d6fb51c4999eb55d404f4fff3dd878bf4733b9f01"
|
||||
checksum = "c9ac79e9f3a4d576f3cd4a470a0275b138d9e7b11b1cd514a6858ae0a79dd5bb"
|
||||
dependencies = [
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
@@ -594,6 +601,7 @@ dependencies = [
|
||||
"h2 0.3.26",
|
||||
"http 0.2.9",
|
||||
"http-body 0.4.5",
|
||||
"http-body 1.0.0",
|
||||
"hyper 0.14.26",
|
||||
"hyper-rustls 0.24.0",
|
||||
"once_cell",
|
||||
@@ -606,9 +614,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-runtime-api"
|
||||
version = "1.2.0"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9acb931e0adaf5132de878f1398d83f8677f90ba70f01f65ff87f6d7244be1c5"
|
||||
checksum = "04ec42c2f5c0e7796a2848dde4d9f3bf8ce12ccbb3d5aa40c52fa0cdd61a1c47"
|
||||
dependencies = [
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-types",
|
||||
@@ -623,16 +631,19 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-types"
|
||||
version = "1.1.8"
|
||||
version = "1.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "abe14dceea1e70101d38fbf2a99e6a34159477c0fb95e68e05c66bd7ae4c3729"
|
||||
checksum = "baf98d97bba6ddaba180f1b1147e202d8fe04940403a95a3f826c790f931bbd1"
|
||||
dependencies = [
|
||||
"base64-simd",
|
||||
"bytes",
|
||||
"bytes-utils",
|
||||
"futures-core",
|
||||
"http 0.2.9",
|
||||
"http 1.1.0",
|
||||
"http-body 0.4.5",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"itoa",
|
||||
"num-integer",
|
||||
"pin-project-lite",
|
||||
@@ -646,18 +657,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-xml"
|
||||
version = "0.60.7"
|
||||
version = "0.60.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "872c68cf019c0e4afc5de7753c4f7288ce4b71663212771bf5e4542eb9346ca9"
|
||||
checksum = "d123fbc2a4adc3c301652ba8e149bf4bc1d1725affb9784eb20c953ace06bf55"
|
||||
dependencies = [
|
||||
"xmlparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-types"
|
||||
version = "1.1.8"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0dbf2f3da841a8930f159163175cf6a3d16ddde517c1b0fba7aa776822800f40"
|
||||
checksum = "5a43b56df2c529fe44cb4d92bd64d0479883fb9608ff62daede4df5405381814"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-smithy-async",
|
||||
@@ -2935,6 +2946,15 @@ version = "0.4.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
||||
|
||||
[[package]]
|
||||
name = "lru"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3262e75e648fce39813cb56ac41f3c3e3f65217ebf3844d818d1f9398cfb0dc"
|
||||
dependencies = [
|
||||
"hashbrown 0.14.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "match_cfg"
|
||||
version = "0.1.0"
|
||||
@@ -3692,7 +3712,6 @@ dependencies = [
|
||||
"serde_json",
|
||||
"serde_path_to_error",
|
||||
"serde_with",
|
||||
"sha2",
|
||||
"signal-hook",
|
||||
"smallvec",
|
||||
"storage_broker",
|
||||
@@ -5933,7 +5952,7 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
||||
[[package]]
|
||||
name = "svg_fmt"
|
||||
version = "0.4.2"
|
||||
source = "git+https://github.com/neondatabase/fork--nical--rust_debug?branch=neon#c1820b28664b5df68de7f043fccf2ed5d67b6ae8"
|
||||
source = "git+https://github.com/nical/rust_debug?rev=28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4#28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
|
||||
18
Cargo.toml
18
Cargo.toml
@@ -52,14 +52,14 @@ azure_storage_blobs = "0.19"
|
||||
flate2 = "1.0.26"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
aws-config = { version = "1.1.4", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "1.14"
|
||||
aws-config = { version = "1.3", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "1.26"
|
||||
aws-sdk-iam = "1.15.0"
|
||||
aws-smithy-async = { version = "1.1.4", default-features = false, features=["rt-tokio"] }
|
||||
aws-smithy-types = "1.1.4"
|
||||
aws-credential-types = "1.1.4"
|
||||
aws-sigv4 = { version = "1.2.0", features = ["sign-http"] }
|
||||
aws-types = "1.1.7"
|
||||
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
|
||||
aws-smithy-types = "1.1.9"
|
||||
aws-credential-types = "1.2.0"
|
||||
aws-sigv4 = { version = "1.2.1", features = ["sign-http"] }
|
||||
aws-types = "1.2.0"
|
||||
axum = { version = "0.6.20", features = ["ws"] }
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
@@ -158,8 +158,8 @@ socket2 = "0.5"
|
||||
strum = "0.24"
|
||||
strum_macros = "0.24"
|
||||
"subtle" = "2.5.0"
|
||||
# https://github.com/nical/rust_debug/pull/4
|
||||
svg_fmt = { git = "https://github.com/neondatabase/fork--nical--rust_debug", branch = "neon" }
|
||||
# Our PR https://github.com/nical/rust_debug/pull/4 has been merged but no new version released yet
|
||||
svg_fmt = { git = "https://github.com/nical/rust_debug", rev = "28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4" }
|
||||
sync_wrapper = "0.1.2"
|
||||
tar = "0.4"
|
||||
task-local-extensions = "0.1.4"
|
||||
|
||||
@@ -87,7 +87,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
|
||||
&& rm awscliv2.zip
|
||||
|
||||
# Mold: A Modern Linker
|
||||
ENV MOLD_VERSION v2.4.0
|
||||
ENV MOLD_VERSION v2.31.0
|
||||
RUN set -e \
|
||||
&& git clone https://github.com/rui314/mold.git \
|
||||
&& mkdir mold/build \
|
||||
|
||||
@@ -51,6 +51,7 @@ use tracing::{error, info, warn};
|
||||
use url::Url;
|
||||
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::ComputeSpec;
|
||||
|
||||
use compute_tools::compute::{
|
||||
forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
|
||||
@@ -69,6 +70,34 @@ use compute_tools::swap::resize_swap;
|
||||
const BUILD_TAG_DEFAULT: &str = "latest";
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let (build_tag, clap_args) = init()?;
|
||||
|
||||
let (pg_handle, start_pg_result) = {
|
||||
// Enter startup tracing context
|
||||
let _startup_context_guard = startup_context_from_env();
|
||||
|
||||
let cli_args = process_cli(&clap_args)?;
|
||||
|
||||
let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
|
||||
|
||||
let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
|
||||
|
||||
start_postgres(&clap_args, wait_spec_result)?
|
||||
|
||||
// Startup is finished, exit the startup tracing span
|
||||
};
|
||||
|
||||
// PostgreSQL is now running, if startup was successful. Wait until it exits.
|
||||
let wait_pg_result = wait_postgres(pg_handle)?;
|
||||
|
||||
let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
|
||||
|
||||
maybe_delay_exit(delay_exit);
|
||||
|
||||
deinit_and_exit(wait_pg_result);
|
||||
}
|
||||
|
||||
fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
@@ -83,9 +112,15 @@ fn main() -> Result<()> {
|
||||
.to_string();
|
||||
info!("build_tag: {build_tag}");
|
||||
|
||||
let matches = cli().get_matches();
|
||||
let pgbin_default = String::from("postgres");
|
||||
let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);
|
||||
Ok((build_tag, cli().get_matches()))
|
||||
}
|
||||
|
||||
fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
|
||||
let pgbin_default = "postgres";
|
||||
let pgbin = matches
|
||||
.get_one::<String>("pgbin")
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or(pgbin_default);
|
||||
|
||||
let ext_remote_storage = matches
|
||||
.get_one::<String>("remote-ext-config")
|
||||
@@ -113,6 +148,30 @@ fn main() -> Result<()> {
|
||||
let spec_path = matches.get_one::<String>("spec-path");
|
||||
let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
|
||||
|
||||
Ok(ProcessCliResult {
|
||||
connstr,
|
||||
pgdata,
|
||||
pgbin,
|
||||
ext_remote_storage,
|
||||
http_port,
|
||||
spec_json,
|
||||
spec_path,
|
||||
resize_swap_on_bind,
|
||||
})
|
||||
}
|
||||
|
||||
struct ProcessCliResult<'clap> {
|
||||
connstr: &'clap str,
|
||||
pgdata: &'clap str,
|
||||
pgbin: &'clap str,
|
||||
ext_remote_storage: Option<&'clap str>,
|
||||
http_port: u16,
|
||||
spec_json: Option<&'clap String>,
|
||||
spec_path: Option<&'clap String>,
|
||||
resize_swap_on_bind: bool,
|
||||
}
|
||||
|
||||
fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
// Extract OpenTelemetry context for the startup actions from the
|
||||
// TRACEPARENT and TRACESTATE env variables, and attach it to the current
|
||||
// tracing context.
|
||||
@@ -149,7 +208,7 @@ fn main() -> Result<()> {
|
||||
if let Ok(val) = std::env::var("TRACESTATE") {
|
||||
startup_tracing_carrier.insert("tracestate".to_string(), val);
|
||||
}
|
||||
let startup_context_guard = if !startup_tracing_carrier.is_empty() {
|
||||
if !startup_tracing_carrier.is_empty() {
|
||||
use opentelemetry::propagation::TextMapPropagator;
|
||||
use opentelemetry::sdk::propagation::TraceContextPropagator;
|
||||
let guard = TraceContextPropagator::new()
|
||||
@@ -159,8 +218,17 @@ fn main() -> Result<()> {
|
||||
Some(guard)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn try_spec_from_cli(
|
||||
matches: &clap::ArgMatches,
|
||||
ProcessCliResult {
|
||||
spec_json,
|
||||
spec_path,
|
||||
..
|
||||
}: &ProcessCliResult,
|
||||
) -> Result<CliSpecParams> {
|
||||
let compute_id = matches.get_one::<String>("compute-id");
|
||||
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
|
||||
|
||||
@@ -201,6 +269,34 @@ fn main() -> Result<()> {
|
||||
}
|
||||
};
|
||||
|
||||
Ok(CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed,
|
||||
})
|
||||
}
|
||||
|
||||
struct CliSpecParams {
|
||||
/// If a spec was provided via CLI or file, the [`ComputeSpec`]
|
||||
spec: Option<ComputeSpec>,
|
||||
live_config_allowed: bool,
|
||||
}
|
||||
|
||||
fn wait_spec(
|
||||
build_tag: String,
|
||||
ProcessCliResult {
|
||||
connstr,
|
||||
pgdata,
|
||||
pgbin,
|
||||
ext_remote_storage,
|
||||
resize_swap_on_bind,
|
||||
http_port,
|
||||
..
|
||||
}: ProcessCliResult,
|
||||
CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed,
|
||||
}: CliSpecParams,
|
||||
) -> Result<WaitSpecResult> {
|
||||
let mut new_state = ComputeState::new();
|
||||
let spec_set;
|
||||
|
||||
@@ -239,8 +335,6 @@ fn main() -> Result<()> {
|
||||
let _http_handle =
|
||||
launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
|
||||
|
||||
let extension_server_port: u16 = http_port;
|
||||
|
||||
if !spec_set {
|
||||
// No spec provided, hang waiting for it.
|
||||
info!("no compute spec provided, waiting");
|
||||
@@ -269,6 +363,29 @@ fn main() -> Result<()> {
|
||||
state.start_time = now;
|
||||
}
|
||||
|
||||
Ok(WaitSpecResult {
|
||||
compute,
|
||||
http_port,
|
||||
resize_swap_on_bind,
|
||||
})
|
||||
}
|
||||
|
||||
struct WaitSpecResult {
|
||||
compute: Arc<ComputeNode>,
|
||||
// passed through from ProcessCliResult
|
||||
http_port: u16,
|
||||
resize_swap_on_bind: bool,
|
||||
}
|
||||
|
||||
fn start_postgres(
|
||||
// need to allow unused because `matches` is only used if target_os = "linux"
|
||||
#[allow(unused_variables)] matches: &clap::ArgMatches,
|
||||
WaitSpecResult {
|
||||
compute,
|
||||
http_port,
|
||||
resize_swap_on_bind,
|
||||
}: WaitSpecResult,
|
||||
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
|
||||
// We got all we need, update the state.
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
state.status = ComputeStatus::Init;
|
||||
@@ -318,10 +435,10 @@ fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
let extension_server_port: u16 = http_port;
|
||||
|
||||
// Start Postgres
|
||||
let mut pg = None;
|
||||
let mut exit_code = None;
|
||||
|
||||
if !prestartup_failed {
|
||||
pg = match compute.start_compute(extension_server_port) {
|
||||
Ok(pg) => Some(pg),
|
||||
@@ -376,7 +493,7 @@ fn main() -> Result<()> {
|
||||
// This token is used internally by the monitor to clean up all threads
|
||||
let token = CancellationToken::new();
|
||||
|
||||
let vm_monitor = &rt.as_ref().map(|rt| {
|
||||
let vm_monitor = rt.as_ref().map(|rt| {
|
||||
rt.spawn(vm_monitor::start(
|
||||
Box::leak(Box::new(vm_monitor::Args {
|
||||
cgroup: cgroup.cloned(),
|
||||
@@ -389,12 +506,41 @@ fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
Ok((
|
||||
pg,
|
||||
StartPostgresResult {
|
||||
delay_exit,
|
||||
compute,
|
||||
#[cfg(target_os = "linux")]
|
||||
rt,
|
||||
#[cfg(target_os = "linux")]
|
||||
token,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
type PostgresHandle = (std::process::Child, std::thread::JoinHandle<()>);
|
||||
|
||||
struct StartPostgresResult {
|
||||
delay_exit: bool,
|
||||
// passed through from WaitSpecResult
|
||||
compute: Arc<ComputeNode>,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
rt: Option<tokio::runtime::Runtime>,
|
||||
#[cfg(target_os = "linux")]
|
||||
token: tokio_util::sync::CancellationToken,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
|
||||
}
|
||||
|
||||
fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
|
||||
// Wait for the child Postgres process forever. In this state Ctrl+C will
|
||||
// propagate to Postgres and it will be shut down as well.
|
||||
let mut exit_code = None;
|
||||
if let Some((mut pg, logs_handle)) = pg {
|
||||
// Startup is finished, exit the startup tracing span
|
||||
drop(startup_context_guard);
|
||||
|
||||
let ecode = pg
|
||||
.wait()
|
||||
.expect("failed to start waiting on Postgres process");
|
||||
@@ -409,6 +555,25 @@ fn main() -> Result<()> {
|
||||
exit_code = ecode.code()
|
||||
}
|
||||
|
||||
Ok(WaitPostgresResult { exit_code })
|
||||
}
|
||||
|
||||
struct WaitPostgresResult {
|
||||
exit_code: Option<i32>,
|
||||
}
|
||||
|
||||
fn cleanup_after_postgres_exit(
|
||||
StartPostgresResult {
|
||||
mut delay_exit,
|
||||
compute,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor,
|
||||
#[cfg(target_os = "linux")]
|
||||
token,
|
||||
#[cfg(target_os = "linux")]
|
||||
rt,
|
||||
}: StartPostgresResult,
|
||||
) -> Result<bool> {
|
||||
// Terminate the vm_monitor so it releases the file watcher on
|
||||
// /sys/fs/cgroup/neon-postgres.
|
||||
// Note: the vm-monitor only runs on linux because it requires cgroups.
|
||||
@@ -450,13 +615,19 @@ fn main() -> Result<()> {
|
||||
error!("error while checking for core dumps: {err:?}");
|
||||
}
|
||||
|
||||
Ok(delay_exit)
|
||||
}
|
||||
|
||||
fn maybe_delay_exit(delay_exit: bool) {
|
||||
// If launch failed, keep serving HTTP requests for a while, so the cloud
|
||||
// control plane can get the actual error.
|
||||
if delay_exit {
|
||||
info!("giving control plane 30s to collect the error before shutdown");
|
||||
thread::sleep(Duration::from_secs(30));
|
||||
}
|
||||
}
|
||||
|
||||
fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
|
||||
// Shutdown trace pipeline gracefully, so that it has a chance to send any
|
||||
// pending traces before we exit. Shutting down OTEL tracing provider may
|
||||
// hang for quite some time, see, for example:
|
||||
|
||||
@@ -9,8 +9,11 @@ use anyhow::{anyhow, bail, Context, Result};
|
||||
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
|
||||
use compute_api::spec::ComputeMode;
|
||||
use control_plane::endpoint::ComputeControlPlane;
|
||||
use control_plane::local_env::{InitForceMode, LocalEnv};
|
||||
use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
|
||||
use control_plane::local_env::{
|
||||
InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf, NeonLocalInitPageserverConf,
|
||||
SafekeeperConf,
|
||||
};
|
||||
use control_plane::pageserver::PageServerNode;
|
||||
use control_plane::safekeeper::SafekeeperNode;
|
||||
use control_plane::storage_controller::StorageController;
|
||||
use control_plane::{broker, local_env};
|
||||
@@ -52,44 +55,6 @@ const DEFAULT_PG_VERSION: &str = "15";
|
||||
|
||||
const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
|
||||
|
||||
fn default_conf(num_pageservers: u16) -> String {
|
||||
let mut template = format!(
|
||||
r#"
|
||||
# Default built-in configuration, defined in main.rs
|
||||
control_plane_api = '{DEFAULT_PAGESERVER_CONTROL_PLANE_API}'
|
||||
|
||||
[broker]
|
||||
listen_addr = '{DEFAULT_BROKER_ADDR}'
|
||||
|
||||
[[safekeepers]]
|
||||
id = {DEFAULT_SAFEKEEPER_ID}
|
||||
pg_port = {DEFAULT_SAFEKEEPER_PG_PORT}
|
||||
http_port = {DEFAULT_SAFEKEEPER_HTTP_PORT}
|
||||
|
||||
"#,
|
||||
);
|
||||
|
||||
for i in 0..num_pageservers {
|
||||
let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
|
||||
let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
|
||||
let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
|
||||
|
||||
template += &format!(
|
||||
r#"
|
||||
[[pageservers]]
|
||||
id = {pageserver_id}
|
||||
listen_pg_addr = '127.0.0.1:{pg_port}'
|
||||
listen_http_addr = '127.0.0.1:{http_port}'
|
||||
pg_auth_type = '{trust_auth}'
|
||||
http_auth_type = '{trust_auth}'
|
||||
"#,
|
||||
trust_auth = AuthType::Trust,
|
||||
)
|
||||
}
|
||||
|
||||
template
|
||||
}
|
||||
|
||||
///
|
||||
/// Timelines tree element used as a value in the HashMap.
|
||||
///
|
||||
@@ -152,7 +117,7 @@ fn main() -> Result<()> {
|
||||
};
|
||||
|
||||
match subcommand_result {
|
||||
Ok(Some(updated_env)) => updated_env.persist_config(&updated_env.base_data_dir)?,
|
||||
Ok(Some(updated_env)) => updated_env.persist_config()?,
|
||||
Ok(None) => (),
|
||||
Err(e) => {
|
||||
eprintln!("command failed: {e:?}");
|
||||
@@ -341,55 +306,65 @@ fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId
|
||||
}
|
||||
|
||||
fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
|
||||
let num_pageservers = init_match
|
||||
.get_one::<u16>("num-pageservers")
|
||||
.expect("num-pageservers arg has a default");
|
||||
// Create config file
|
||||
let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
|
||||
let num_pageservers = init_match.get_one::<u16>("num-pageservers");
|
||||
|
||||
let force = init_match.get_one("force").expect("we set a default value");
|
||||
|
||||
// Create the in-memory `LocalEnv` that we'd normally load from disk in `load_config`.
|
||||
let init_conf: NeonLocalInitConf = if let Some(config_path) =
|
||||
init_match.get_one::<PathBuf>("config")
|
||||
{
|
||||
// User (likely the Python test suite) provided a description of the environment.
|
||||
if num_pageservers.is_some() {
|
||||
bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead");
|
||||
}
|
||||
// load and parse the file
|
||||
std::fs::read_to_string(config_path).with_context(|| {
|
||||
let contents = std::fs::read_to_string(config_path).with_context(|| {
|
||||
format!(
|
||||
"Could not read configuration file '{}'",
|
||||
config_path.display()
|
||||
)
|
||||
})?
|
||||
})?;
|
||||
toml_edit::de::from_str(&contents)?
|
||||
} else {
|
||||
// Built-in default config
|
||||
default_conf(*num_pageservers)
|
||||
// User (likely interactive) did not provide a description of the environment, give them the default
|
||||
NeonLocalInitConf {
|
||||
control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
|
||||
broker: NeonBroker {
|
||||
listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
|
||||
},
|
||||
safekeepers: vec![SafekeeperConf {
|
||||
id: DEFAULT_SAFEKEEPER_ID,
|
||||
pg_port: DEFAULT_SAFEKEEPER_PG_PORT,
|
||||
http_port: DEFAULT_SAFEKEEPER_HTTP_PORT,
|
||||
..Default::default()
|
||||
}],
|
||||
pageservers: (0..num_pageservers.copied().unwrap_or(1))
|
||||
.map(|i| {
|
||||
let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
|
||||
let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
|
||||
let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
|
||||
NeonLocalInitPageserverConf {
|
||||
id: pageserver_id,
|
||||
listen_pg_addr: format!("127.0.0.1:{pg_port}"),
|
||||
listen_http_addr: format!("127.0.0.1:{http_port}"),
|
||||
pg_auth_type: AuthType::Trust,
|
||||
http_auth_type: AuthType::Trust,
|
||||
other: Default::default(),
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
pg_distrib_dir: None,
|
||||
neon_distrib_dir: None,
|
||||
default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
|
||||
storage_controller: None,
|
||||
control_plane_compute_hook_api: None,
|
||||
}
|
||||
};
|
||||
|
||||
let pageserver_config: toml_edit::Document =
|
||||
if let Some(path) = init_match.get_one::<PathBuf>("pageserver-config") {
|
||||
std::fs::read_to_string(path)?.parse()?
|
||||
} else {
|
||||
toml_edit::Document::new()
|
||||
};
|
||||
|
||||
let pg_version = init_match
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
let mut env =
|
||||
LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
|
||||
let force = init_match.get_one("force").expect("we set a default value");
|
||||
env.init(pg_version, force)
|
||||
.context("Failed to initialize neon repository")?;
|
||||
|
||||
// Create remote storage location for default LocalFs remote storage
|
||||
std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
|
||||
|
||||
// Initialize pageserver, create initial tenant and timeline.
|
||||
for ps_conf in &env.pageservers {
|
||||
PageServerNode::from_env(&env, ps_conf)
|
||||
.initialize(&pageserver_config)
|
||||
.unwrap_or_else(|e| {
|
||||
eprintln!("pageserver init failed: {e:?}");
|
||||
exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
Ok(env)
|
||||
LocalEnv::init(init_conf, force)
|
||||
.context("materialize initial neon_local environment on disk")?;
|
||||
Ok(LocalEnv::load_config().expect("freshly written config should be loadable"))
|
||||
}
|
||||
|
||||
/// The default pageserver is the one where CLI tenant/timeline operations are sent by default.
|
||||
@@ -1418,9 +1393,7 @@ fn cli() -> Command {
|
||||
let num_pageservers_arg = Arg::new("num-pageservers")
|
||||
.value_parser(value_parser!(u16))
|
||||
.long("num-pageservers")
|
||||
.help("How many pageservers to create (default 1)")
|
||||
.required(false)
|
||||
.default_value("1");
|
||||
.help("How many pageservers to create (default 1)");
|
||||
|
||||
let update_catalog = Arg::new("update-catalog")
|
||||
.value_parser(value_parser!(bool))
|
||||
@@ -1454,14 +1427,6 @@ fn cli() -> Command {
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.value_name("config")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pageserver-config")
|
||||
.long("pageserver-config")
|
||||
.required(false)
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.value_name("pageserver-config")
|
||||
.help("Merge the provided pageserver config into the one generated by neon_local."),
|
||||
)
|
||||
.arg(pg_version_arg.clone())
|
||||
.arg(force_arg)
|
||||
)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//! Now it also provides init method which acts like a stub for proper installation
|
||||
//! script which will use local paths.
|
||||
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use anyhow::{bail, Context};
|
||||
|
||||
use clap::ValueEnum;
|
||||
use postgres_backend::AuthType;
|
||||
@@ -23,6 +23,8 @@ use utils::{
|
||||
id::{NodeId, TenantId, TenantTimelineId, TimelineId},
|
||||
};
|
||||
|
||||
use crate::pageserver::PageServerNode;
|
||||
use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR;
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
|
||||
pub const DEFAULT_PG_VERSION: u32 = 15;
|
||||
@@ -34,7 +36,7 @@ pub const DEFAULT_PG_VERSION: u32 = 15;
|
||||
// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
|
||||
// an example.
|
||||
//
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
||||
pub struct LocalEnv {
|
||||
// Base directory for all the nodes (the pageserver, safekeepers and
|
||||
// compute endpoints).
|
||||
@@ -42,59 +44,99 @@ pub struct LocalEnv {
|
||||
// This is not stored in the config file. Rather, this is the path where the
|
||||
// config file itself is. It is read from the NEON_REPO_DIR env variable or
|
||||
// '.neon' if not given.
|
||||
#[serde(skip)]
|
||||
pub base_data_dir: PathBuf,
|
||||
|
||||
// Path to postgres distribution. It's expected that "bin", "include",
|
||||
// "lib", "share" from postgres distribution are there. If at some point
|
||||
// in time we will be able to run against vanilla postgres we may split that
|
||||
// to four separate paths and match OS-specific installation layout.
|
||||
#[serde(default)]
|
||||
pub pg_distrib_dir: PathBuf,
|
||||
|
||||
// Path to pageserver binary.
|
||||
#[serde(default)]
|
||||
pub neon_distrib_dir: PathBuf,
|
||||
|
||||
// Default tenant ID to use with the 'neon_local' command line utility, when
|
||||
// --tenant_id is not explicitly specified.
|
||||
#[serde(default)]
|
||||
pub default_tenant_id: Option<TenantId>,
|
||||
|
||||
// used to issue tokens during e.g pg start
|
||||
#[serde(default)]
|
||||
pub private_key_path: PathBuf,
|
||||
|
||||
pub broker: NeonBroker,
|
||||
|
||||
// Configuration for the storage controller (1 per neon_local environment)
|
||||
#[serde(default)]
|
||||
pub storage_controller: NeonStorageControllerConf,
|
||||
|
||||
/// This Vec must always contain at least one pageserver
|
||||
/// Populdated by [`Self::load_config`] from the individual `pageserver.toml`s.
|
||||
/// NB: not used anymore except for informing users that they need to change their `.neon/config`.
|
||||
pub pageservers: Vec<PageServerConf>,
|
||||
|
||||
#[serde(default)]
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
|
||||
// Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will
|
||||
// be propagated into each pageserver's configuration.
|
||||
#[serde(default)]
|
||||
pub control_plane_api: Option<Url>,
|
||||
|
||||
// Control plane upcall API for storage controller. If set, this will be propagated into the
|
||||
// storage controller's configuration.
|
||||
#[serde(default)]
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
|
||||
/// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
|
||||
#[serde(default)]
|
||||
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
|
||||
// but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
|
||||
// https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
|
||||
pub branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
}
|
||||
|
||||
/// On-disk state stored in `.neon/config`.
|
||||
#[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]
|
||||
#[serde(default, deny_unknown_fields)]
|
||||
pub struct OnDiskConfig {
|
||||
pub pg_distrib_dir: PathBuf,
|
||||
pub neon_distrib_dir: PathBuf,
|
||||
pub default_tenant_id: Option<TenantId>,
|
||||
pub private_key_path: PathBuf,
|
||||
pub broker: NeonBroker,
|
||||
pub storage_controller: NeonStorageControllerConf,
|
||||
#[serde(
|
||||
skip_serializing,
|
||||
deserialize_with = "fail_if_pageservers_field_specified"
|
||||
)]
|
||||
pub pageservers: Vec<PageServerConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
}
|
||||
|
||||
fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
Err(serde::de::Error::custom(
|
||||
"The 'pageservers' field is no longer used; pageserver.toml is now authoritative; \
|
||||
Please remove the `pageservers` from your .neon/config.",
|
||||
))
|
||||
}
|
||||
|
||||
/// The description of the neon_local env to be initialized by `neon_local init --config`.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct NeonLocalInitConf {
|
||||
// TODO: do we need this? Seems unused
|
||||
pub pg_distrib_dir: Option<PathBuf>,
|
||||
// TODO: do we need this? Seems unused
|
||||
pub neon_distrib_dir: Option<PathBuf>,
|
||||
pub default_tenant_id: TenantId,
|
||||
pub broker: NeonBroker,
|
||||
pub storage_controller: Option<NeonStorageControllerConf>,
|
||||
pub pageservers: Vec<NeonLocalInitPageserverConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Option<Url>>,
|
||||
pub control_plane_compute_hook_api: Option<Option<Url>>,
|
||||
}
|
||||
|
||||
/// Broker config for cluster internal communication.
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
@@ -110,6 +152,9 @@ pub struct NeonStorageControllerConf {
|
||||
/// Heartbeat timeout before marking a node offline
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub max_unavailable: Duration,
|
||||
|
||||
/// Threshold for auto-splitting a tenant into shards
|
||||
pub split_threshold: Option<u64>,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerConf {
|
||||
@@ -122,6 +167,7 @@ impl Default for NeonStorageControllerConf {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_unavailable: Self::DEFAULT_MAX_UNAVAILABLE_INTERVAL,
|
||||
split_threshold: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -141,24 +187,18 @@ impl NeonBroker {
|
||||
}
|
||||
}
|
||||
|
||||
// neon_local needs to know this subset of pageserver configuration.
|
||||
// For legacy reasons, this information is duplicated from `pageserver.toml` into `.neon/config`.
|
||||
// It can get stale if `pageserver.toml` is changed.
|
||||
// TODO(christian): don't store this at all in `.neon/config`, always load it from `pageserver.toml`
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default, deny_unknown_fields)]
|
||||
pub struct PageServerConf {
|
||||
// node id
|
||||
pub id: NodeId,
|
||||
|
||||
// Pageserver connection settings
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
|
||||
// auth type used for the PG and HTTP ports
|
||||
pub pg_auth_type: AuthType,
|
||||
pub http_auth_type: AuthType,
|
||||
|
||||
pub(crate) virtual_file_io_engine: Option<String>,
|
||||
pub(crate) get_vectored_impl: Option<String>,
|
||||
pub(crate) get_impl: Option<String>,
|
||||
pub(crate) validate_vectored_get: Option<bool>,
|
||||
}
|
||||
|
||||
impl Default for PageServerConf {
|
||||
@@ -169,10 +209,40 @@ impl Default for PageServerConf {
|
||||
listen_http_addr: String::new(),
|
||||
pg_auth_type: AuthType::Trust,
|
||||
http_auth_type: AuthType::Trust,
|
||||
virtual_file_io_engine: None,
|
||||
get_vectored_impl: None,
|
||||
get_impl: None,
|
||||
validate_vectored_get: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The toml that can be passed to `neon_local init --config`.
|
||||
/// This is a subset of the `pageserver.toml` configuration.
|
||||
// TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
|
||||
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
|
||||
pub struct NeonLocalInitPageserverConf {
|
||||
pub id: NodeId,
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
pub pg_auth_type: AuthType,
|
||||
pub http_auth_type: AuthType,
|
||||
#[serde(flatten)]
|
||||
pub other: HashMap<String, toml::Value>,
|
||||
}
|
||||
|
||||
impl From<&NeonLocalInitPageserverConf> for PageServerConf {
|
||||
fn from(conf: &NeonLocalInitPageserverConf) -> Self {
|
||||
let NeonLocalInitPageserverConf {
|
||||
id,
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
other: _,
|
||||
} = conf;
|
||||
Self {
|
||||
id: *id,
|
||||
listen_pg_addr: listen_pg_addr.clone(),
|
||||
listen_http_addr: listen_http_addr.clone(),
|
||||
pg_auth_type: *pg_auth_type,
|
||||
http_auth_type: *http_auth_type,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -360,44 +430,7 @@ impl LocalEnv {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Create a LocalEnv from a config file.
|
||||
///
|
||||
/// Unlike 'load_config', this function fills in any defaults that are missing
|
||||
/// from the config file.
|
||||
pub fn parse_config(toml: &str) -> anyhow::Result<Self> {
|
||||
let mut env: LocalEnv = toml::from_str(toml)?;
|
||||
|
||||
// Find postgres binaries.
|
||||
// Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
|
||||
// Note that later in the code we assume, that distrib dirs follow the same pattern
|
||||
// for all postgres versions.
|
||||
if env.pg_distrib_dir == Path::new("") {
|
||||
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
|
||||
env.pg_distrib_dir = postgres_bin.into();
|
||||
} else {
|
||||
let cwd = env::current_dir()?;
|
||||
env.pg_distrib_dir = cwd.join("pg_install")
|
||||
}
|
||||
}
|
||||
|
||||
// Find neon binaries.
|
||||
if env.neon_distrib_dir == Path::new("") {
|
||||
env::current_exe()?
|
||||
.parent()
|
||||
.unwrap()
|
||||
.clone_into(&mut env.neon_distrib_dir);
|
||||
}
|
||||
|
||||
if env.pageservers.is_empty() {
|
||||
anyhow::bail!("Configuration must contain at least one pageserver");
|
||||
}
|
||||
|
||||
env.base_data_dir = base_path();
|
||||
|
||||
Ok(env)
|
||||
}
|
||||
|
||||
/// Locate and load config
|
||||
/// Construct `Self` from on-disk state.
|
||||
pub fn load_config() -> anyhow::Result<Self> {
|
||||
let repopath = base_path();
|
||||
|
||||
@@ -411,38 +444,129 @@ impl LocalEnv {
|
||||
// TODO: check that it looks like a neon repository
|
||||
|
||||
// load and parse file
|
||||
let config = fs::read_to_string(repopath.join("config"))?;
|
||||
let mut env: LocalEnv = toml::from_str(config.as_str())?;
|
||||
let config_file_contents = fs::read_to_string(repopath.join("config"))?;
|
||||
let on_disk_config: OnDiskConfig = toml::from_str(config_file_contents.as_str())?;
|
||||
let mut env = {
|
||||
let OnDiskConfig {
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id,
|
||||
private_key_path,
|
||||
broker,
|
||||
storage_controller,
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
branch_name_mappings,
|
||||
} = on_disk_config;
|
||||
LocalEnv {
|
||||
base_data_dir: repopath.clone(),
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id,
|
||||
private_key_path,
|
||||
broker,
|
||||
storage_controller,
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
branch_name_mappings,
|
||||
}
|
||||
};
|
||||
|
||||
env.base_data_dir = repopath;
|
||||
// The source of truth for pageserver configuration is the pageserver.toml.
|
||||
assert!(
|
||||
env.pageservers.is_empty(),
|
||||
"we ensure this during deserialization"
|
||||
);
|
||||
env.pageservers = {
|
||||
let iter = std::fs::read_dir(&repopath).context("open dir")?;
|
||||
let mut pageservers = Vec::new();
|
||||
for res in iter {
|
||||
let dentry = res?;
|
||||
const PREFIX: &str = "pageserver_";
|
||||
let dentry_name = dentry
|
||||
.file_name()
|
||||
.into_string()
|
||||
.ok()
|
||||
.with_context(|| format!("non-utf8 dentry: {:?}", dentry.path()))
|
||||
.unwrap();
|
||||
if !dentry_name.starts_with(PREFIX) {
|
||||
continue;
|
||||
}
|
||||
if !dentry.file_type().context("determine file type")?.is_dir() {
|
||||
anyhow::bail!("expected a directory, got {:?}", dentry.path());
|
||||
}
|
||||
let id = dentry_name[PREFIX.len()..]
|
||||
.parse::<NodeId>()
|
||||
.with_context(|| format!("parse id from {:?}", dentry.path()))?;
|
||||
// TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
|
||||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
// (allow unknown fields, unlike PageServerConf)
|
||||
struct PageserverConfigTomlSubset {
|
||||
id: NodeId,
|
||||
listen_pg_addr: String,
|
||||
listen_http_addr: String,
|
||||
pg_auth_type: AuthType,
|
||||
http_auth_type: AuthType,
|
||||
}
|
||||
let config_toml_path = dentry.path().join("pageserver.toml");
|
||||
let config_toml: PageserverConfigTomlSubset = toml_edit::de::from_str(
|
||||
&std::fs::read_to_string(&config_toml_path)
|
||||
.with_context(|| format!("read {:?}", config_toml_path))?,
|
||||
)
|
||||
.context("parse pageserver.toml")?;
|
||||
let PageserverConfigTomlSubset {
|
||||
id: config_toml_id,
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
} = config_toml;
|
||||
let conf = PageServerConf {
|
||||
id: {
|
||||
anyhow::ensure!(
|
||||
config_toml_id == id,
|
||||
"id mismatch: config_toml.id={config_toml_id} id={id}",
|
||||
);
|
||||
id
|
||||
},
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
};
|
||||
pageservers.push(conf);
|
||||
}
|
||||
pageservers
|
||||
};
|
||||
|
||||
Ok(env)
|
||||
}
|
||||
|
||||
pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
|
||||
// Currently, the user first passes a config file with 'neon_local init --config=<path>'
|
||||
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
|
||||
// to .neon/config. TODO: We lose any formatting and comments along the way, which is
|
||||
// a bit sad.
|
||||
let mut conf_content = r#"# This file describes a local deployment of the page server
|
||||
# and safekeeeper node. It is read by the 'neon_local' command-line
|
||||
# utility.
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
// Convert the LocalEnv to a toml file.
|
||||
//
|
||||
// This could be as simple as this:
|
||||
//
|
||||
// conf_content += &toml::to_string_pretty(env)?;
|
||||
//
|
||||
// But it results in a "values must be emitted before tables". I'm not sure
|
||||
// why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
|
||||
// Maybe rust reorders the fields to squeeze avoid padding or something?
|
||||
// In any case, converting to toml::Value first, and serializing that, works.
|
||||
// See https://github.com/alexcrichton/toml-rs/issues/142
|
||||
conf_content += &toml::to_string_pretty(&toml::Value::try_from(self)?)?;
|
||||
pub fn persist_config(&self) -> anyhow::Result<()> {
|
||||
Self::persist_config_impl(
|
||||
&self.base_data_dir,
|
||||
&OnDiskConfig {
|
||||
pg_distrib_dir: self.pg_distrib_dir.clone(),
|
||||
neon_distrib_dir: self.neon_distrib_dir.clone(),
|
||||
default_tenant_id: self.default_tenant_id,
|
||||
private_key_path: self.private_key_path.clone(),
|
||||
broker: self.broker.clone(),
|
||||
storage_controller: self.storage_controller.clone(),
|
||||
pageservers: vec![], // it's skip_serializing anyway
|
||||
safekeepers: self.safekeepers.clone(),
|
||||
control_plane_api: self.control_plane_api.clone(),
|
||||
control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
|
||||
branch_name_mappings: self.branch_name_mappings.clone(),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn persist_config_impl(base_path: &Path, config: &OnDiskConfig) -> anyhow::Result<()> {
|
||||
let conf_content = &toml::to_string_pretty(config)?;
|
||||
let target_config_path = base_path.join("config");
|
||||
fs::write(&target_config_path, conf_content).with_context(|| {
|
||||
format!(
|
||||
@@ -467,17 +591,13 @@ impl LocalEnv {
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Initialize a new Neon repository
|
||||
//
|
||||
pub fn init(&mut self, pg_version: u32, force: &InitForceMode) -> anyhow::Result<()> {
|
||||
// check if config already exists
|
||||
let base_path = &self.base_data_dir;
|
||||
ensure!(
|
||||
base_path != Path::new(""),
|
||||
"repository base path is missing"
|
||||
);
|
||||
/// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
|
||||
pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
|
||||
let base_path = base_path();
|
||||
assert_ne!(base_path, Path::new(""));
|
||||
let base_path = &base_path;
|
||||
|
||||
// create base_path dir
|
||||
if base_path.exists() {
|
||||
match force {
|
||||
InitForceMode::MustNotExist => {
|
||||
@@ -509,70 +629,96 @@ impl LocalEnv {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
|
||||
bail!(
|
||||
"Can't find postgres binary at {}",
|
||||
self.pg_bin_dir(pg_version)?.display()
|
||||
);
|
||||
}
|
||||
for binary in ["pageserver", "safekeeper"] {
|
||||
if !self.neon_distrib_dir.join(binary).exists() {
|
||||
bail!(
|
||||
"Can't find binary '{binary}' in neon distrib dir '{}'",
|
||||
self.neon_distrib_dir.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if !base_path.exists() {
|
||||
fs::create_dir(base_path)?;
|
||||
}
|
||||
|
||||
let NeonLocalInitConf {
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id,
|
||||
broker,
|
||||
storage_controller,
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
} = conf;
|
||||
|
||||
// Find postgres binaries.
|
||||
// Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
|
||||
// Note that later in the code we assume, that distrib dirs follow the same pattern
|
||||
// for all postgres versions.
|
||||
let pg_distrib_dir = pg_distrib_dir.unwrap_or_else(|| {
|
||||
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
|
||||
postgres_bin.into()
|
||||
} else {
|
||||
let cwd = env::current_dir().unwrap();
|
||||
cwd.join("pg_install")
|
||||
}
|
||||
});
|
||||
|
||||
// Find neon binaries.
|
||||
let neon_distrib_dir = neon_distrib_dir
|
||||
.unwrap_or_else(|| env::current_exe().unwrap().parent().unwrap().to_owned());
|
||||
|
||||
// Generate keypair for JWT.
|
||||
//
|
||||
// The keypair is only needed if authentication is enabled in any of the
|
||||
// components. For convenience, we generate the keypair even if authentication
|
||||
// is not enabled, so that you can easily enable it after the initialization
|
||||
// step. However, if the key generation fails, we treat it as non-fatal if
|
||||
// authentication was not enabled.
|
||||
if self.private_key_path == PathBuf::new() {
|
||||
match generate_auth_keys(
|
||||
base_path.join("auth_private_key.pem").as_path(),
|
||||
base_path.join("auth_public_key.pem").as_path(),
|
||||
) {
|
||||
Ok(()) => {
|
||||
self.private_key_path = PathBuf::from("auth_private_key.pem");
|
||||
}
|
||||
Err(e) => {
|
||||
if !self.auth_keys_needed() {
|
||||
eprintln!("Could not generate keypair for JWT authentication: {e}");
|
||||
eprintln!("Continuing anyway because authentication was not enabled");
|
||||
self.private_key_path = PathBuf::from("auth_private_key.pem");
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
// step.
|
||||
generate_auth_keys(
|
||||
base_path.join("auth_private_key.pem").as_path(),
|
||||
base_path.join("auth_public_key.pem").as_path(),
|
||||
)
|
||||
.context("generate auth keys")?;
|
||||
let private_key_path = PathBuf::from("auth_private_key.pem");
|
||||
|
||||
// create the runtime type because the remaining initialization code below needs
|
||||
// a LocalEnv instance op operation
|
||||
// TODO: refactor to avoid this, LocalEnv should only be constructed from on-disk state
|
||||
let env = LocalEnv {
|
||||
base_data_dir: base_path.clone(),
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id: Some(default_tenant_id),
|
||||
private_key_path,
|
||||
broker,
|
||||
storage_controller: storage_controller.unwrap_or_default(),
|
||||
pageservers: pageservers.iter().map(Into::into).collect(),
|
||||
safekeepers,
|
||||
control_plane_api: control_plane_api.unwrap_or_default(),
|
||||
control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
|
||||
branch_name_mappings: Default::default(),
|
||||
};
|
||||
|
||||
// create endpoints dir
|
||||
fs::create_dir_all(env.endpoints_path())?;
|
||||
|
||||
// create safekeeper dirs
|
||||
for safekeeper in &env.safekeepers {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
|
||||
}
|
||||
|
||||
fs::create_dir_all(self.endpoints_path())?;
|
||||
|
||||
for safekeeper in &self.safekeepers {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
|
||||
// initialize pageserver state
|
||||
for (i, ps) in pageservers.into_iter().enumerate() {
|
||||
let runtime_ps = &env.pageservers[i];
|
||||
assert_eq!(&PageServerConf::from(&ps), runtime_ps);
|
||||
fs::create_dir(env.pageserver_data_dir(ps.id))?;
|
||||
PageServerNode::from_env(&env, runtime_ps)
|
||||
.initialize(ps)
|
||||
.context("pageserver init failed")?;
|
||||
}
|
||||
|
||||
self.persist_config(base_path)
|
||||
}
|
||||
// setup remote remote location for default LocalFs remote storage
|
||||
std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
|
||||
|
||||
fn auth_keys_needed(&self) -> bool {
|
||||
self.pageservers.iter().any(|ps| {
|
||||
ps.pg_auth_type == AuthType::NeonJWT || ps.http_auth_type == AuthType::NeonJWT
|
||||
}) || self.safekeepers.iter().any(|sk| sk.auth_enabled)
|
||||
env.persist_config()
|
||||
}
|
||||
}
|
||||
|
||||
fn base_path() -> PathBuf {
|
||||
pub fn base_path() -> PathBuf {
|
||||
match std::env::var_os("NEON_REPO_DIR") {
|
||||
Some(val) => PathBuf::from(val),
|
||||
None => PathBuf::from(".neon"),
|
||||
@@ -615,31 +761,3 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn simple_conf_parsing() {
|
||||
let simple_conf_toml = include_str!("../simple.conf");
|
||||
let simple_conf_parse_result = LocalEnv::parse_config(simple_conf_toml);
|
||||
assert!(
|
||||
simple_conf_parse_result.is_ok(),
|
||||
"failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
|
||||
);
|
||||
|
||||
let string_to_replace = "listen_addr = '127.0.0.1:50051'";
|
||||
let spoiled_url_str = "listen_addr = '!@$XOXO%^&'";
|
||||
let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
|
||||
assert!(
|
||||
spoiled_url_toml.contains(spoiled_url_str),
|
||||
"Failed to replace string {string_to_replace} in the toml file {simple_conf_toml}"
|
||||
);
|
||||
let spoiled_url_parse_result = LocalEnv::parse_config(&spoiled_url_toml);
|
||||
assert!(
|
||||
spoiled_url_parse_result.is_err(),
|
||||
"expected toml with invalid Url {spoiled_url_toml} to fail the parsing, but got {spoiled_url_parse_result:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ use std::io;
|
||||
use std::io::Write;
|
||||
use std::num::NonZeroU64;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
@@ -30,7 +30,7 @@ use utils::{
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use crate::local_env::PageServerConf;
|
||||
use crate::local_env::{NeonLocalInitPageserverConf, PageServerConf};
|
||||
use crate::{background_process, local_env::LocalEnv};
|
||||
|
||||
/// Directory within .neon which will be used by default for LocalFs remote storage.
|
||||
@@ -74,71 +74,23 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge overrides provided by the user on the command line with our default overides derived from neon_local configuration.
|
||||
///
|
||||
/// These all end up on the command line of the `pageserver` binary.
|
||||
fn neon_local_overrides(&self, cli_overrides: &toml_edit::Document) -> Vec<String> {
|
||||
fn pageserver_init_make_toml(
|
||||
&self,
|
||||
conf: NeonLocalInitPageserverConf,
|
||||
) -> anyhow::Result<toml_edit::Document> {
|
||||
assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
|
||||
|
||||
// TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)
|
||||
|
||||
// FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
|
||||
let pg_distrib_dir_param = format!(
|
||||
"pg_distrib_dir='{}'",
|
||||
self.env.pg_distrib_dir_raw().display()
|
||||
);
|
||||
|
||||
let PageServerConf {
|
||||
id,
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
virtual_file_io_engine,
|
||||
get_vectored_impl,
|
||||
get_impl,
|
||||
validate_vectored_get,
|
||||
} = &self.conf;
|
||||
|
||||
let id = format!("id={}", id);
|
||||
|
||||
let http_auth_type_param = format!("http_auth_type='{}'", http_auth_type);
|
||||
let listen_http_addr_param = format!("listen_http_addr='{}'", listen_http_addr);
|
||||
|
||||
let pg_auth_type_param = format!("pg_auth_type='{}'", pg_auth_type);
|
||||
let listen_pg_addr_param = format!("listen_pg_addr='{}'", listen_pg_addr);
|
||||
let virtual_file_io_engine = if let Some(virtual_file_io_engine) = virtual_file_io_engine {
|
||||
format!("virtual_file_io_engine='{virtual_file_io_engine}'")
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let get_vectored_impl = if let Some(get_vectored_impl) = get_vectored_impl {
|
||||
format!("get_vectored_impl='{get_vectored_impl}'")
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let get_impl = if let Some(get_impl) = get_impl {
|
||||
format!("get_impl='{get_impl}'")
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let validate_vectored_get = if let Some(validate_vectored_get) = validate_vectored_get {
|
||||
format!("validate_vectored_get={validate_vectored_get}")
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
let broker_endpoint_param = format!("broker_endpoint='{}'", self.env.broker.client_url());
|
||||
|
||||
let mut overrides = vec![
|
||||
id,
|
||||
pg_distrib_dir_param,
|
||||
http_auth_type_param,
|
||||
pg_auth_type_param,
|
||||
listen_http_addr_param,
|
||||
listen_pg_addr_param,
|
||||
broker_endpoint_param,
|
||||
virtual_file_io_engine,
|
||||
get_vectored_impl,
|
||||
get_impl,
|
||||
validate_vectored_get,
|
||||
];
|
||||
let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param];
|
||||
|
||||
if let Some(control_plane_api) = &self.env.control_plane_api {
|
||||
overrides.push(format!(
|
||||
@@ -148,7 +100,7 @@ impl PageServerNode {
|
||||
|
||||
// Storage controller uses the same auth as pageserver: if JWT is enabled
|
||||
// for us, we will also need it to talk to them.
|
||||
if matches!(http_auth_type, AuthType::NeonJWT) {
|
||||
if matches!(conf.http_auth_type, AuthType::NeonJWT) {
|
||||
let jwt_token = self
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
|
||||
@@ -157,28 +109,40 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
|
||||
if !cli_overrides.contains_key("remote_storage") {
|
||||
if !conf.other.contains_key("remote_storage") {
|
||||
overrides.push(format!(
|
||||
"remote_storage={{local_path='../{PAGESERVER_REMOTE_STORAGE_DIR}'}}"
|
||||
));
|
||||
}
|
||||
|
||||
if *http_auth_type != AuthType::Trust || *pg_auth_type != AuthType::Trust {
|
||||
if conf.http_auth_type != AuthType::Trust || conf.pg_auth_type != AuthType::Trust {
|
||||
// Keys are generated in the toplevel repo dir, pageservers' workdirs
|
||||
// are one level below that, so refer to keys with ../
|
||||
overrides.push("auth_validation_public_key_path='../auth_public_key.pem'".to_owned());
|
||||
}
|
||||
|
||||
// Apply the user-provided overrides
|
||||
overrides.push(cli_overrides.to_string());
|
||||
overrides.push(
|
||||
toml_edit::ser::to_string_pretty(&conf)
|
||||
.expect("we deserialized this from toml earlier"),
|
||||
);
|
||||
|
||||
overrides
|
||||
// Turn `overrides` into a toml document.
|
||||
// TODO: above code is legacy code, it should be refactored to use toml_edit directly.
|
||||
let mut config_toml = toml_edit::Document::new();
|
||||
for fragment_str in overrides {
|
||||
let fragment = toml_edit::Document::from_str(&fragment_str)
|
||||
.expect("all fragments in `overrides` are valid toml documents, this function controls that");
|
||||
for (key, item) in fragment.iter() {
|
||||
config_toml.insert(key, item.clone());
|
||||
}
|
||||
}
|
||||
Ok(config_toml)
|
||||
}
|
||||
|
||||
/// Initializes a pageserver node by creating its config with the overrides provided.
|
||||
pub fn initialize(&self, config_overrides: &toml_edit::Document) -> anyhow::Result<()> {
|
||||
// First, run `pageserver --init` and wait for it to write a config into FS and exit.
|
||||
self.pageserver_init(config_overrides)
|
||||
pub fn initialize(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {
|
||||
self.pageserver_init(conf)
|
||||
.with_context(|| format!("Failed to run init for pageserver node {}", self.conf.id))
|
||||
}
|
||||
|
||||
@@ -198,7 +162,7 @@ impl PageServerNode {
|
||||
self.start_node().await
|
||||
}
|
||||
|
||||
fn pageserver_init(&self, config_overrides: &toml_edit::Document) -> anyhow::Result<()> {
|
||||
fn pageserver_init(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {
|
||||
let datadir = self.repo_path();
|
||||
let node_id = self.conf.id;
|
||||
println!(
|
||||
@@ -209,36 +173,20 @@ impl PageServerNode {
|
||||
);
|
||||
io::stdout().flush()?;
|
||||
|
||||
if !datadir.exists() {
|
||||
std::fs::create_dir(&datadir)?;
|
||||
}
|
||||
|
||||
let datadir_path_str = datadir.to_str().with_context(|| {
|
||||
format!("Cannot start pageserver node {node_id} in path that has no string representation: {datadir:?}")
|
||||
})?;
|
||||
|
||||
// `pageserver --init` merges the `--config-override`s into a built-in default config,
|
||||
// then writes out the merged product to `pageserver.toml`.
|
||||
// TODO: just write the full `pageserver.toml` and get rid of `--config-override`.
|
||||
let mut args = vec!["--init", "--workdir", datadir_path_str];
|
||||
let overrides = self.neon_local_overrides(config_overrides);
|
||||
for piece in &overrides {
|
||||
args.push("--config-override");
|
||||
args.push(piece);
|
||||
}
|
||||
let init_output = Command::new(self.env.pageserver_bin())
|
||||
.args(args)
|
||||
.envs(self.pageserver_env_variables()?)
|
||||
.output()
|
||||
.with_context(|| format!("Failed to run pageserver init for node {node_id}"))?;
|
||||
|
||||
anyhow::ensure!(
|
||||
init_output.status.success(),
|
||||
"Pageserver init for node {} did not finish successfully, stdout: {}, stderr: {}",
|
||||
node_id,
|
||||
String::from_utf8_lossy(&init_output.stdout),
|
||||
String::from_utf8_lossy(&init_output.stderr),
|
||||
);
|
||||
let config = self
|
||||
.pageserver_init_make_toml(conf)
|
||||
.context("make pageserver toml")?;
|
||||
let config_file_path = datadir.join("pageserver.toml");
|
||||
let mut config_file = std::fs::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true)
|
||||
.open(&config_file_path)
|
||||
.with_context(|| format!("open pageserver toml for write: {config_file_path:?}"))?;
|
||||
config_file
|
||||
.write_all(config.to_string().as_bytes())
|
||||
.context("write pageserver toml")?;
|
||||
drop(config_file);
|
||||
// TODO: invoke a TBD config-check command to validate that pageserver will start with the written config
|
||||
|
||||
// Write metadata file, used by pageserver on startup to register itself with
|
||||
// the storage controller
|
||||
|
||||
@@ -305,6 +305,10 @@ impl StorageController {
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(split_threshold) = self.config.split_threshold.as_ref() {
|
||||
args.push(format!("--split-threshold={split_threshold}"))
|
||||
}
|
||||
|
||||
background_process::start_process(
|
||||
COMMAND,
|
||||
&self.env.base_data_dir,
|
||||
|
||||
@@ -745,6 +745,16 @@ impl HistoricLayerInfo {
|
||||
};
|
||||
*field = value;
|
||||
}
|
||||
pub fn layer_file_size(&self) -> u64 {
|
||||
match self {
|
||||
HistoricLayerInfo::Delta {
|
||||
layer_file_size, ..
|
||||
} => *layer_file_size,
|
||||
HistoricLayerInfo::Image {
|
||||
layer_file_size, ..
|
||||
} => *layer_file_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -776,9 +786,6 @@ pub struct TimelineGcRequest {
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WalRedoManagerProcessStatus {
|
||||
pub pid: u32,
|
||||
/// The strum-generated `into::<&'static str>()` for `pageserver::walredo::ProcessKind`.
|
||||
/// `ProcessKind` are a transitory thing, so, they have no enum representation in `pageserver_api`.
|
||||
pub kind: Cow<'static, str>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -817,6 +824,55 @@ pub struct TenantScanRemoteStorageResponse {
|
||||
pub shards: Vec<TenantScanRemoteStorageShard>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum TenantSorting {
|
||||
ResidentSize,
|
||||
MaxLogicalSize,
|
||||
}
|
||||
|
||||
impl Default for TenantSorting {
|
||||
fn default() -> Self {
|
||||
Self::ResidentSize
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct TopTenantShardsRequest {
|
||||
// How would you like to sort the tenants?
|
||||
pub order_by: TenantSorting,
|
||||
|
||||
// How many results?
|
||||
pub limit: usize,
|
||||
|
||||
// Omit tenants with more than this many shards (e.g. if this is the max number of shards
|
||||
// that the caller would ever split to)
|
||||
pub where_shards_lt: Option<ShardCount>,
|
||||
|
||||
// Omit tenants where the ordering metric is less than this (this is an optimization to
|
||||
// let us quickly exclude numerous tiny shards)
|
||||
pub where_gt: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
|
||||
pub struct TopTenantShardItem {
|
||||
pub id: TenantShardId,
|
||||
|
||||
/// Total size of layers on local disk for all timelines in this tenant
|
||||
pub resident_size: u64,
|
||||
|
||||
/// Total size of layers in remote storage for all timelines in this tenant
|
||||
pub physical_size: u64,
|
||||
|
||||
/// The largest logical size of a timeline within this tenant
|
||||
pub max_logical_size: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Default)]
|
||||
pub struct TopTenantShardsResponse {
|
||||
pub shards: Vec<TopTenantShardItem>,
|
||||
}
|
||||
|
||||
pub mod virtual_file {
|
||||
#[derive(
|
||||
Copy,
|
||||
|
||||
@@ -125,7 +125,7 @@ impl ShardCount {
|
||||
|
||||
/// `v` may be zero, or the number of shards in the tenant. `v` is what
|
||||
/// [`Self::literal`] would return.
|
||||
pub fn new(val: u8) -> Self {
|
||||
pub const fn new(val: u8) -> Self {
|
||||
Self(val)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@ use http_types::{StatusCode, Url};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::RemoteStorageActivity;
|
||||
use crate::{
|
||||
error::Cancelled, s3_bucket::RequestKind, AzureConfig, ConcurrencyLimiter, Download,
|
||||
DownloadError, Listing, ListingMode, RemotePath, RemoteStorage, StorageMetadata,
|
||||
@@ -525,6 +526,10 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
// https://learn.microsoft.com/en-us/azure/storage/blobs/point-in-time-restore-overview
|
||||
Err(TimeTravelError::Unimplemented)
|
||||
}
|
||||
|
||||
fn activity(&self) -> RemoteStorageActivity {
|
||||
self.concurrency_limiter.activity()
|
||||
}
|
||||
}
|
||||
|
||||
pin_project_lite::pin_project! {
|
||||
|
||||
@@ -263,6 +263,17 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
||||
done_if_after: SystemTime,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(), TimeTravelError>;
|
||||
|
||||
/// Query how busy we currently are: may be used by callers which wish to politely
|
||||
/// back off if there are already a lot of operations underway.
|
||||
fn activity(&self) -> RemoteStorageActivity;
|
||||
}
|
||||
|
||||
pub struct RemoteStorageActivity {
|
||||
pub read_available: usize,
|
||||
pub read_total: usize,
|
||||
pub write_available: usize,
|
||||
pub write_total: usize,
|
||||
}
|
||||
|
||||
/// DownloadStream is sensitive to the timeout and cancellation used with the original
|
||||
@@ -444,6 +455,15 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn activity(&self) -> RemoteStorageActivity {
|
||||
match self {
|
||||
Self::LocalFs(s) => s.activity(),
|
||||
Self::AwsS3(s) => s.activity(),
|
||||
Self::AzureBlob(s) => s.activity(),
|
||||
Self::Unreliable(s) => s.activity(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl GenericRemoteStorage {
|
||||
@@ -774,6 +794,9 @@ struct ConcurrencyLimiter {
|
||||
// The helps to ensure we don't exceed the thresholds.
|
||||
write: Arc<Semaphore>,
|
||||
read: Arc<Semaphore>,
|
||||
|
||||
write_total: usize,
|
||||
read_total: usize,
|
||||
}
|
||||
|
||||
impl ConcurrencyLimiter {
|
||||
@@ -802,10 +825,21 @@ impl ConcurrencyLimiter {
|
||||
Arc::clone(self.for_kind(kind)).acquire_owned().await
|
||||
}
|
||||
|
||||
fn activity(&self) -> RemoteStorageActivity {
|
||||
RemoteStorageActivity {
|
||||
read_available: self.read.available_permits(),
|
||||
read_total: self.read_total,
|
||||
write_available: self.write.available_permits(),
|
||||
write_total: self.write_total,
|
||||
}
|
||||
}
|
||||
|
||||
fn new(limit: usize) -> ConcurrencyLimiter {
|
||||
Self {
|
||||
read: Arc::new(Semaphore::new(limit)),
|
||||
write: Arc::new(Semaphore::new(limit)),
|
||||
read_total: limit,
|
||||
write_total: limit,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ use tokio_util::{io::ReaderStream, sync::CancellationToken};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
|
||||
use crate::{
|
||||
Download, DownloadError, Listing, ListingMode, RemotePath, TimeTravelError, TimeoutOrCancel,
|
||||
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
Download, DownloadError, Listing, ListingMode, RemotePath, RemoteStorageActivity,
|
||||
TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
};
|
||||
|
||||
use super::{RemoteStorage, StorageMetadata};
|
||||
@@ -605,6 +605,16 @@ impl RemoteStorage for LocalFs {
|
||||
) -> Result<(), TimeTravelError> {
|
||||
Err(TimeTravelError::Unimplemented)
|
||||
}
|
||||
|
||||
fn activity(&self) -> RemoteStorageActivity {
|
||||
// LocalFS has no concurrency limiting: give callers the impression that plenty of units are available
|
||||
RemoteStorageActivity {
|
||||
read_available: 16,
|
||||
read_total: 16,
|
||||
write_available: 16,
|
||||
write_total: 16,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn storage_metadata_path(original_path: &Utf8Path) -> Utf8PathBuf {
|
||||
|
||||
@@ -27,7 +27,7 @@ use aws_config::{
|
||||
};
|
||||
use aws_credential_types::provider::SharedCredentialsProvider;
|
||||
use aws_sdk_s3::{
|
||||
config::{AsyncSleep, Builder, IdentityCache, Region, SharedAsyncSleep},
|
||||
config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep},
|
||||
error::SdkError,
|
||||
operation::get_object::GetObjectError,
|
||||
types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass},
|
||||
@@ -47,8 +47,8 @@ use utils::backoff;
|
||||
use super::StorageMetadata;
|
||||
use crate::{
|
||||
error::Cancelled, support::PermitCarrying, ConcurrencyLimiter, Download, DownloadError,
|
||||
Listing, ListingMode, RemotePath, RemoteStorage, S3Config, TimeTravelError, TimeoutOrCancel,
|
||||
MAX_KEYS_PER_DELETE, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
Listing, ListingMode, RemotePath, RemoteStorage, RemoteStorageActivity, S3Config,
|
||||
TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
};
|
||||
|
||||
pub(super) mod metrics;
|
||||
@@ -75,13 +75,13 @@ struct GetObjectRequest {
|
||||
}
|
||||
impl S3Bucket {
|
||||
/// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
|
||||
pub fn new(aws_config: &S3Config, timeout: Duration) -> anyhow::Result<Self> {
|
||||
pub fn new(remote_storage_config: &S3Config, timeout: Duration) -> anyhow::Result<Self> {
|
||||
tracing::debug!(
|
||||
"Creating s3 remote storage for S3 bucket {}",
|
||||
aws_config.bucket_name
|
||||
remote_storage_config.bucket_name
|
||||
);
|
||||
|
||||
let region = Some(Region::new(aws_config.bucket_region.clone()));
|
||||
let region = Some(Region::new(remote_storage_config.bucket_region.clone()));
|
||||
|
||||
let provider_conf = ProviderConfig::without_region().with_region(region.clone());
|
||||
|
||||
@@ -113,6 +113,38 @@ impl S3Bucket {
|
||||
// AWS SDK requires us to specify how the RetryConfig should sleep when it wants to back off
|
||||
let sleep_impl: Arc<dyn AsyncSleep> = Arc::new(TokioSleep::new());
|
||||
|
||||
let sdk_config_loader: aws_config::ConfigLoader = aws_config::defaults(
|
||||
#[allow(deprecated)] /* TODO: https://github.com/neondatabase/neon/issues/7665 */
|
||||
BehaviorVersion::v2023_11_09(),
|
||||
)
|
||||
.region(region)
|
||||
.identity_cache(IdentityCache::lazy().build())
|
||||
.credentials_provider(SharedCredentialsProvider::new(credentials_provider))
|
||||
.sleep_impl(SharedAsyncSleep::from(sleep_impl));
|
||||
|
||||
let sdk_config: aws_config::SdkConfig = std::thread::scope(|s| {
|
||||
s.spawn(|| {
|
||||
// TODO: make this function async.
|
||||
tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap()
|
||||
.block_on(sdk_config_loader.load())
|
||||
})
|
||||
.join()
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
let mut s3_config_builder = aws_sdk_s3::config::Builder::from(&sdk_config);
|
||||
|
||||
// Technically, the `remote_storage_config.endpoint` field only applies to S3 interactions.
|
||||
// (In case we ever re-use the `sdk_config` for more than just the S3 client in the future)
|
||||
if let Some(custom_endpoint) = remote_storage_config.endpoint.clone() {
|
||||
s3_config_builder = s3_config_builder
|
||||
.endpoint_url(custom_endpoint)
|
||||
.force_path_style(true);
|
||||
}
|
||||
|
||||
// We do our own retries (see [`backoff::retry`]). However, for the AWS SDK to enable rate limiting in response to throttling
|
||||
// responses (e.g. 429 on too many ListObjectsv2 requests), we must provide a retry config. We set it to use at most one
|
||||
// attempt, and enable 'Adaptive' mode, which causes rate limiting to be enabled.
|
||||
@@ -120,42 +152,36 @@ impl S3Bucket {
|
||||
retry_config
|
||||
.set_max_attempts(Some(1))
|
||||
.set_mode(Some(RetryMode::Adaptive));
|
||||
s3_config_builder = s3_config_builder.retry_config(retry_config.build());
|
||||
|
||||
let mut config_builder = Builder::default()
|
||||
.behavior_version(BehaviorVersion::v2023_11_09())
|
||||
.region(region)
|
||||
.identity_cache(IdentityCache::lazy().build())
|
||||
.credentials_provider(SharedCredentialsProvider::new(credentials_provider))
|
||||
.retry_config(retry_config.build())
|
||||
.sleep_impl(SharedAsyncSleep::from(sleep_impl));
|
||||
let s3_config = s3_config_builder.build();
|
||||
let client = aws_sdk_s3::Client::from_conf(s3_config);
|
||||
|
||||
if let Some(custom_endpoint) = aws_config.endpoint.clone() {
|
||||
config_builder = config_builder
|
||||
.endpoint_url(custom_endpoint)
|
||||
.force_path_style(true);
|
||||
}
|
||||
let prefix_in_bucket = remote_storage_config
|
||||
.prefix_in_bucket
|
||||
.as_deref()
|
||||
.map(|prefix| {
|
||||
let mut prefix = prefix;
|
||||
while prefix.starts_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
|
||||
prefix = &prefix[1..]
|
||||
}
|
||||
|
||||
let client = Client::from_conf(config_builder.build());
|
||||
let mut prefix = prefix.to_string();
|
||||
while prefix.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
|
||||
prefix.pop();
|
||||
}
|
||||
prefix
|
||||
});
|
||||
|
||||
let prefix_in_bucket = aws_config.prefix_in_bucket.as_deref().map(|prefix| {
|
||||
let mut prefix = prefix;
|
||||
while prefix.starts_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
|
||||
prefix = &prefix[1..]
|
||||
}
|
||||
|
||||
let mut prefix = prefix.to_string();
|
||||
while prefix.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
|
||||
prefix.pop();
|
||||
}
|
||||
prefix
|
||||
});
|
||||
Ok(Self {
|
||||
client,
|
||||
bucket_name: aws_config.bucket_name.clone(),
|
||||
max_keys_per_list_response: aws_config.max_keys_per_list_response,
|
||||
bucket_name: remote_storage_config.bucket_name.clone(),
|
||||
max_keys_per_list_response: remote_storage_config.max_keys_per_list_response,
|
||||
prefix_in_bucket,
|
||||
concurrency_limiter: ConcurrencyLimiter::new(aws_config.concurrency_limit.get()),
|
||||
upload_storage_class: aws_config.upload_storage_class.clone(),
|
||||
concurrency_limiter: ConcurrencyLimiter::new(
|
||||
remote_storage_config.concurrency_limit.get(),
|
||||
),
|
||||
upload_storage_class: remote_storage_config.upload_storage_class.clone(),
|
||||
timeout,
|
||||
})
|
||||
}
|
||||
@@ -949,6 +975,10 @@ impl RemoteStorage for S3Bucket {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn activity(&self) -> RemoteStorageActivity {
|
||||
self.concurrency_limiter.activity()
|
||||
}
|
||||
}
|
||||
|
||||
/// On drop (cancellation) count towards [`metrics::BucketMetrics::cancelled_waits`].
|
||||
|
||||
@@ -12,7 +12,7 @@ use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::{
|
||||
Download, DownloadError, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorage,
|
||||
StorageMetadata, TimeTravelError,
|
||||
RemoteStorageActivity, StorageMetadata, TimeTravelError,
|
||||
};
|
||||
|
||||
pub struct UnreliableWrapper {
|
||||
@@ -213,4 +213,8 @@ impl RemoteStorage for UnreliableWrapper {
|
||||
.time_travel_recover(prefix, timestamp, done_if_after, cancel)
|
||||
.await
|
||||
}
|
||||
|
||||
fn activity(&self) -> RemoteStorageActivity {
|
||||
self.inner.activity()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//! # Example
|
||||
//!
|
||||
//! ```
|
||||
//! # tokio_test::block_on(async {
|
||||
//! # tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap().block_on(async {
|
||||
//! use utils::poison::Poison;
|
||||
//! use std::time::Duration;
|
||||
//!
|
||||
|
||||
@@ -88,7 +88,6 @@ reqwest.workspace = true
|
||||
rpds.workspace = true
|
||||
enum-map.workspace = true
|
||||
enumset = { workspace = true, features = ["serde"]}
|
||||
sha2.workspace = true
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use pageserver::keyspace::{KeyPartitioning, KeySpace};
|
||||
use pageserver::repository::Key;
|
||||
use pageserver::tenant::layer_map::LayerMap;
|
||||
use pageserver::tenant::storage_layer::LayerFileName;
|
||||
use pageserver::tenant::storage_layer::LayerName;
|
||||
use pageserver::tenant::storage_layer::PersistentLayerDesc;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
||||
@@ -28,7 +28,7 @@ fn build_layer_map(filename_dump: PathBuf) -> LayerMap {
|
||||
let mut updates = layer_map.batch_update();
|
||||
for fname in filenames {
|
||||
let fname = fname.unwrap();
|
||||
let fname = LayerFileName::from_str(&fname).unwrap();
|
||||
let fname = LayerName::from_str(&fname).unwrap();
|
||||
let layer = PersistentLayerDesc::from(fname);
|
||||
|
||||
let lsn_range = layer.get_lsn_range();
|
||||
|
||||
@@ -30,47 +30,27 @@
|
||||
//! 2024-04-15 on i3en.3xlarge
|
||||
//!
|
||||
//! ```text
|
||||
//! async-short/1 time: [24.584 µs 24.737 µs 24.922 µs]
|
||||
//! async-short/2 time: [33.479 µs 33.660 µs 33.888 µs]
|
||||
//! async-short/4 time: [42.713 µs 43.046 µs 43.440 µs]
|
||||
//! async-short/8 time: [71.814 µs 72.478 µs 73.240 µs]
|
||||
//! async-short/16 time: [132.73 µs 134.45 µs 136.22 µs]
|
||||
//! async-short/32 time: [258.31 µs 260.73 µs 263.27 µs]
|
||||
//! async-short/64 time: [511.61 µs 514.44 µs 517.51 µs]
|
||||
//! async-short/128 time: [992.64 µs 998.23 µs 1.0042 ms]
|
||||
//! async-medium/1 time: [110.11 µs 110.50 µs 110.96 µs]
|
||||
//! async-medium/2 time: [153.06 µs 153.85 µs 154.99 µs]
|
||||
//! async-medium/4 time: [317.51 µs 319.92 µs 322.85 µs]
|
||||
//! async-medium/8 time: [638.30 µs 644.68 µs 652.12 µs]
|
||||
//! async-medium/16 time: [1.2651 ms 1.2773 ms 1.2914 ms]
|
||||
//! async-medium/32 time: [2.5117 ms 2.5410 ms 2.5720 ms]
|
||||
//! async-medium/64 time: [4.8088 ms 4.8555 ms 4.9047 ms]
|
||||
//! async-medium/128 time: [8.8311 ms 8.9849 ms 9.1263 ms]
|
||||
//! sync-short/1 time: [25.503 µs 25.626 µs 25.771 µs]
|
||||
//! sync-short/2 time: [30.850 µs 31.013 µs 31.208 µs]
|
||||
//! sync-short/4 time: [45.543 µs 45.856 µs 46.193 µs]
|
||||
//! sync-short/8 time: [84.114 µs 84.639 µs 85.220 µs]
|
||||
//! sync-short/16 time: [185.22 µs 186.15 µs 187.13 µs]
|
||||
//! sync-short/32 time: [377.43 µs 378.87 µs 380.46 µs]
|
||||
//! sync-short/64 time: [756.49 µs 759.04 µs 761.70 µs]
|
||||
//! sync-short/128 time: [1.4825 ms 1.4874 ms 1.4923 ms]
|
||||
//! sync-medium/1 time: [105.66 µs 106.01 µs 106.43 µs]
|
||||
//! sync-medium/2 time: [153.10 µs 153.84 µs 154.72 µs]
|
||||
//! sync-medium/4 time: [327.13 µs 329.44 µs 332.27 µs]
|
||||
//! sync-medium/8 time: [654.26 µs 658.73 µs 663.63 µs]
|
||||
//! sync-medium/16 time: [1.2682 ms 1.2748 ms 1.2816 ms]
|
||||
//! sync-medium/32 time: [2.4456 ms 2.4595 ms 2.4731 ms]
|
||||
//! sync-medium/64 time: [4.6523 ms 4.6890 ms 4.7256 ms]
|
||||
//! sync-medium/128 time: [8.7215 ms 8.8323 ms 8.9344 ms]
|
||||
//! short/1 time: [24.584 µs 24.737 µs 24.922 µs]
|
||||
//! short/2 time: [33.479 µs 33.660 µs 33.888 µs]
|
||||
//! short/4 time: [42.713 µs 43.046 µs 43.440 µs]
|
||||
//! short/8 time: [71.814 µs 72.478 µs 73.240 µs]
|
||||
//! short/16 time: [132.73 µs 134.45 µs 136.22 µs]
|
||||
//! short/32 time: [258.31 µs 260.73 µs 263.27 µs]
|
||||
//! short/64 time: [511.61 µs 514.44 µs 517.51 µs]
|
||||
//! short/128 time: [992.64 µs 998.23 µs 1.0042 ms]
|
||||
//! medium/1 time: [110.11 µs 110.50 µs 110.96 µs]
|
||||
//! medium/2 time: [153.06 µs 153.85 µs 154.99 µs]
|
||||
//! medium/4 time: [317.51 µs 319.92 µs 322.85 µs]
|
||||
//! medium/8 time: [638.30 µs 644.68 µs 652.12 µs]
|
||||
//! medium/16 time: [1.2651 ms 1.2773 ms 1.2914 ms]
|
||||
//! medium/32 time: [2.5117 ms 2.5410 ms 2.5720 ms]
|
||||
//! medium/64 time: [4.8088 ms 4.8555 ms 4.9047 ms]
|
||||
//! medium/128 time: [8.8311 ms 8.9849 ms 9.1263 ms]
|
||||
//! ```
|
||||
|
||||
use bytes::{Buf, Bytes};
|
||||
use criterion::{BenchmarkId, Criterion};
|
||||
use pageserver::{
|
||||
config::PageServerConf,
|
||||
walrecord::NeonWalRecord,
|
||||
walredo::{PostgresRedoManager, ProcessKind},
|
||||
};
|
||||
use pageserver::{config::PageServerConf, walrecord::NeonWalRecord, walredo::PostgresRedoManager};
|
||||
use pageserver_api::{key::Key, shard::TenantShardId};
|
||||
use std::{
|
||||
sync::Arc,
|
||||
@@ -80,39 +60,32 @@ use tokio::{sync::Barrier, task::JoinSet};
|
||||
use utils::{id::TenantId, lsn::Lsn};
|
||||
|
||||
fn bench(c: &mut Criterion) {
|
||||
for process_kind in &[ProcessKind::Async, ProcessKind::Sync] {
|
||||
{
|
||||
let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
|
||||
for nclients in nclients {
|
||||
let mut group = c.benchmark_group(format!("{process_kind}-short"));
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(nclients),
|
||||
&nclients,
|
||||
|b, nclients| {
|
||||
let redo_work = Arc::new(Request::short_input());
|
||||
b.iter_custom(|iters| {
|
||||
bench_impl(*process_kind, Arc::clone(&redo_work), iters, *nclients)
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
{
|
||||
let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
|
||||
for nclients in nclients {
|
||||
let mut group = c.benchmark_group("short");
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(nclients),
|
||||
&nclients,
|
||||
|b, nclients| {
|
||||
let redo_work = Arc::new(Request::short_input());
|
||||
b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
{
|
||||
let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
|
||||
for nclients in nclients {
|
||||
let mut group = c.benchmark_group(format!("{process_kind}-medium"));
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(nclients),
|
||||
&nclients,
|
||||
|b, nclients| {
|
||||
let redo_work = Arc::new(Request::medium_input());
|
||||
b.iter_custom(|iters| {
|
||||
bench_impl(*process_kind, Arc::clone(&redo_work), iters, *nclients)
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
{
|
||||
let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
|
||||
for nclients in nclients {
|
||||
let mut group = c.benchmark_group("medium");
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(nclients),
|
||||
&nclients,
|
||||
|b, nclients| {
|
||||
let redo_work = Arc::new(Request::medium_input());
|
||||
b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -120,16 +93,10 @@ criterion::criterion_group!(benches, bench);
|
||||
criterion::criterion_main!(benches);
|
||||
|
||||
// Returns the sum of each client's wall-clock time spent executing their share of the n_redos.
|
||||
fn bench_impl(
|
||||
process_kind: ProcessKind,
|
||||
redo_work: Arc<Request>,
|
||||
n_redos: u64,
|
||||
nclients: u64,
|
||||
) -> Duration {
|
||||
fn bench_impl(redo_work: Arc<Request>, n_redos: u64, nclients: u64) -> Duration {
|
||||
let repo_dir = camino_tempfile::tempdir_in(env!("CARGO_TARGET_TMPDIR")).unwrap();
|
||||
|
||||
let mut conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());
|
||||
conf.walredo_process_kind = process_kind;
|
||||
let conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());
|
||||
let conf = Box::leak(Box::new(conf));
|
||||
let tenant_shard_id = TenantShardId::unsharded(TenantId::generate());
|
||||
|
||||
@@ -158,27 +125,13 @@ fn bench_impl(
|
||||
});
|
||||
}
|
||||
|
||||
let elapsed = rt.block_on(async move {
|
||||
rt.block_on(async move {
|
||||
let mut total_wallclock_time = Duration::ZERO;
|
||||
while let Some(res) = tasks.join_next().await {
|
||||
total_wallclock_time += res.unwrap();
|
||||
}
|
||||
total_wallclock_time
|
||||
});
|
||||
|
||||
// consistency check to ensure process kind setting worked
|
||||
if nredos_per_client > 0 {
|
||||
assert_eq!(
|
||||
manager
|
||||
.status()
|
||||
.process
|
||||
.map(|p| p.kind)
|
||||
.expect("the benchmark work causes a walredo process to be spawned"),
|
||||
std::borrow::Cow::Borrowed(process_kind.into())
|
||||
);
|
||||
}
|
||||
|
||||
elapsed
|
||||
})
|
||||
}
|
||||
|
||||
async fn client(
|
||||
|
||||
@@ -486,6 +486,18 @@ impl Client {
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn top_tenant_shards(
|
||||
&self,
|
||||
request: TopTenantShardsRequest,
|
||||
) -> Result<TopTenantShardsResponse> {
|
||||
let uri = format!("{}/v1/top_tenants", self.mgmt_api_endpoint);
|
||||
self.request(Method::POST, uri, request)
|
||||
.await?
|
||||
.json()
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn layer_map_info(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
|
||||
@@ -24,7 +24,9 @@ use tracing::{debug, info};
|
||||
use std::collections::{HashSet, VecDeque};
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::helpers::{accum_key_values, keyspace_total_size, merge_delta_keys, overlaps_with};
|
||||
use crate::helpers::{
|
||||
accum_key_values, keyspace_total_size, merge_delta_keys_buffered, overlaps_with,
|
||||
};
|
||||
use crate::interface::*;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -104,7 +106,13 @@ pub async fn compact_tiered<E: CompactionJobExecutor>(
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
if target_file_size == u64::MAX {
|
||||
if current_level_target_height == u64::MAX {
|
||||
// our target height includes all possible lsns
|
||||
info!(
|
||||
level = current_level_no,
|
||||
depth = depth,
|
||||
"compaction loop reached max current_level_target_height"
|
||||
);
|
||||
break;
|
||||
}
|
||||
current_level_no += 1;
|
||||
@@ -522,8 +530,6 @@ where
|
||||
// If we have accumulated only a narrow band of keyspace, create an
|
||||
// image layer. Otherwise write a delta layer.
|
||||
|
||||
// FIXME: deal with the case of lots of values for same key
|
||||
|
||||
// FIXME: we are ignoring images here. Did we already divide the work
|
||||
// so that we won't encounter them here?
|
||||
|
||||
@@ -535,43 +541,101 @@ where
|
||||
}
|
||||
}
|
||||
// Open stream
|
||||
let key_value_stream = std::pin::pin!(merge_delta_keys::<E>(deltas.as_slice(), ctx));
|
||||
let key_value_stream =
|
||||
std::pin::pin!(merge_delta_keys_buffered::<E>(deltas.as_slice(), ctx)
|
||||
.await?
|
||||
.map(Result::<_, anyhow::Error>::Ok));
|
||||
let mut new_jobs = Vec::new();
|
||||
|
||||
// Slide a window through the keyspace
|
||||
let mut key_accum = std::pin::pin!(accum_key_values(key_value_stream));
|
||||
let mut key_accum =
|
||||
std::pin::pin!(accum_key_values(key_value_stream, self.target_file_size));
|
||||
let mut all_in_window: bool = false;
|
||||
let mut window = Window::new();
|
||||
|
||||
// Helper function to create a job for a new delta layer with given key-lsn
|
||||
// rectangle.
|
||||
let create_delta_job = |key_range, lsn_range: &Range<Lsn>, new_jobs: &mut Vec<_>| {
|
||||
// The inputs for the job are all the input layers of the original job that
|
||||
// overlap with the rectangle.
|
||||
let batch_layers: Vec<LayerId> = job
|
||||
.input_layers
|
||||
.iter()
|
||||
.filter(|layer_id| {
|
||||
overlaps_with(self.layers[layer_id.0].layer.key_range(), &key_range)
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
assert!(!batch_layers.is_empty());
|
||||
new_jobs.push(CompactionJob {
|
||||
key_range,
|
||||
lsn_range: lsn_range.clone(),
|
||||
strategy: CompactionStrategy::CreateDelta,
|
||||
input_layers: batch_layers,
|
||||
completed: false,
|
||||
});
|
||||
};
|
||||
|
||||
loop {
|
||||
if all_in_window && window.elems.is_empty() {
|
||||
if all_in_window && window.is_empty() {
|
||||
// All done!
|
||||
break;
|
||||
}
|
||||
|
||||
// If we now have enough keyspace for next delta layer in the window, create a
|
||||
// new delta layer
|
||||
if let Some(key_range) = window.choose_next_delta(self.target_file_size, !all_in_window)
|
||||
{
|
||||
let batch_layers: Vec<LayerId> = job
|
||||
.input_layers
|
||||
.iter()
|
||||
.filter(|layer_id| {
|
||||
overlaps_with(self.layers[layer_id.0].layer.key_range(), &key_range)
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
assert!(!batch_layers.is_empty());
|
||||
new_jobs.push(CompactionJob {
|
||||
key_range,
|
||||
lsn_range: job.lsn_range.clone(),
|
||||
strategy: CompactionStrategy::CreateDelta,
|
||||
input_layers: batch_layers,
|
||||
completed: false,
|
||||
});
|
||||
} else {
|
||||
assert!(!all_in_window);
|
||||
if let Some(next_key) = key_accum.next().await.transpose()? {
|
||||
window.feed(next_key.key, next_key.size);
|
||||
} else {
|
||||
create_delta_job(key_range, &job.lsn_range, &mut new_jobs);
|
||||
continue;
|
||||
}
|
||||
assert!(!all_in_window);
|
||||
|
||||
// Process next key in the key space
|
||||
match key_accum.next().await.transpose()? {
|
||||
None => {
|
||||
all_in_window = true;
|
||||
}
|
||||
Some(next_key) if next_key.partition_lsns.is_empty() => {
|
||||
// Normal case: extend the window by the key
|
||||
window.feed(next_key.key, next_key.size);
|
||||
}
|
||||
Some(next_key) => {
|
||||
// A key with too large size impact for a single delta layer. This
|
||||
// case occurs if you make a huge number of updates for a single key.
|
||||
//
|
||||
// Drain the window with has_more = false to make a clean cut before
|
||||
// the key, and then make dedicated delta layers for the single key.
|
||||
//
|
||||
// We cannot cluster the key with the others, because we don't want
|
||||
// layer files to overlap with each other in the lsn,key space (no
|
||||
// overlaps for the rectangles).
|
||||
let key = next_key.key;
|
||||
debug!("key {key} with size impact larger than the layer size");
|
||||
while !window.is_empty() {
|
||||
let has_more = false;
|
||||
let key_range = window.choose_next_delta(self.target_file_size, has_more)
|
||||
.expect("with has_more==false, choose_next_delta always returns something for a non-empty Window");
|
||||
create_delta_job(key_range, &job.lsn_range, &mut new_jobs);
|
||||
}
|
||||
|
||||
// Not really required: but here for future resilience:
|
||||
// We make a "gap" here, so any structure the window holds should
|
||||
// probably be reset.
|
||||
window = Window::new();
|
||||
|
||||
let mut prior_lsn = job.lsn_range.start;
|
||||
let mut lsn_ranges = Vec::new();
|
||||
for (lsn, _size) in next_key.partition_lsns.iter() {
|
||||
lsn_ranges.push(prior_lsn..*lsn);
|
||||
prior_lsn = *lsn;
|
||||
}
|
||||
lsn_ranges.push(prior_lsn..job.lsn_range.end);
|
||||
for lsn_range in lsn_ranges {
|
||||
let key_range = key..key.next();
|
||||
create_delta_job(key_range, &lsn_range, &mut new_jobs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -792,6 +856,10 @@ where
|
||||
self.elems.front().unwrap().accum_size - self.splitoff_size
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.elems.is_empty()
|
||||
}
|
||||
|
||||
fn commit_upto(&mut self, mut upto: usize) {
|
||||
while upto > 1 {
|
||||
let popped = self.elems.pop_front().unwrap();
|
||||
|
||||
@@ -9,10 +9,12 @@ use pageserver_api::shard::ShardIdentity;
|
||||
use pin_project_lite::pin_project;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::collections::VecDeque;
|
||||
use std::fmt::Display;
|
||||
use std::future::Future;
|
||||
use std::ops::{DerefMut, Range};
|
||||
use std::pin::Pin;
|
||||
use std::task::{ready, Poll};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
pub fn keyspace_total_size<K>(
|
||||
keyspace: &CompactionKeySpace<K>,
|
||||
@@ -108,17 +110,40 @@ pub fn merge_delta_keys<'a, E: CompactionJobExecutor>(
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn merge_delta_keys_buffered<'a, E: CompactionJobExecutor + 'a>(
|
||||
layers: &'a [E::DeltaLayer],
|
||||
ctx: &'a E::RequestContext,
|
||||
) -> anyhow::Result<impl Stream<Item = <E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>>
|
||||
{
|
||||
let mut keys = Vec::new();
|
||||
for l in layers {
|
||||
// Boxing and casting to LoadFuture is required to obtain the right Sync bound.
|
||||
// If we do l.load_keys(ctx).await? directly, there is a compilation error.
|
||||
let load_future: LoadFuture<'a, _> = Box::pin(l.load_keys(ctx));
|
||||
keys.extend(load_future.await?.into_iter());
|
||||
}
|
||||
keys.sort_by_key(|k| (k.key(), k.lsn()));
|
||||
let stream = futures::stream::iter(keys.into_iter());
|
||||
Ok(stream)
|
||||
}
|
||||
|
||||
enum LazyLoadLayer<'a, E: CompactionJobExecutor> {
|
||||
Loaded(VecDeque<<E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>),
|
||||
Unloaded(&'a E::DeltaLayer),
|
||||
}
|
||||
impl<'a, E: CompactionJobExecutor> LazyLoadLayer<'a, E> {
|
||||
fn key(&self) -> E::Key {
|
||||
fn min_key(&self) -> E::Key {
|
||||
match self {
|
||||
Self::Loaded(entries) => entries.front().unwrap().key(),
|
||||
Self::Unloaded(dl) => dl.key_range().start,
|
||||
}
|
||||
}
|
||||
fn min_lsn(&self) -> Lsn {
|
||||
match self {
|
||||
Self::Loaded(entries) => entries.front().unwrap().lsn(),
|
||||
Self::Unloaded(dl) => dl.lsn_range().start,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a, E: CompactionJobExecutor> PartialOrd for LazyLoadLayer<'a, E> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
@@ -128,12 +153,12 @@ impl<'a, E: CompactionJobExecutor> PartialOrd for LazyLoadLayer<'a, E> {
|
||||
impl<'a, E: CompactionJobExecutor> Ord for LazyLoadLayer<'a, E> {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
// reverse order so that we get a min-heap
|
||||
other.key().cmp(&self.key())
|
||||
(other.min_key(), other.min_lsn()).cmp(&(self.min_key(), self.min_lsn()))
|
||||
}
|
||||
}
|
||||
impl<'a, E: CompactionJobExecutor> PartialEq for LazyLoadLayer<'a, E> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.key().eq(&other.key())
|
||||
self.cmp(other) == std::cmp::Ordering::Equal
|
||||
}
|
||||
}
|
||||
impl<'a, E: CompactionJobExecutor> Eq for LazyLoadLayer<'a, E> {}
|
||||
@@ -210,11 +235,16 @@ pub struct KeySize<K> {
|
||||
pub key: K,
|
||||
pub num_values: u64,
|
||||
pub size: u64,
|
||||
/// The lsns to partition at (if empty then no per-lsn partitioning)
|
||||
pub partition_lsns: Vec<(Lsn, u64)>,
|
||||
}
|
||||
|
||||
pub fn accum_key_values<'a, I, K, D, E>(input: I) -> impl Stream<Item = Result<KeySize<K>, E>>
|
||||
pub fn accum_key_values<'a, I, K, D, E>(
|
||||
input: I,
|
||||
target_size: u64,
|
||||
) -> impl Stream<Item = Result<KeySize<K>, E>>
|
||||
where
|
||||
K: Eq,
|
||||
K: Eq + PartialOrd + Display + Copy,
|
||||
I: Stream<Item = Result<D, E>>,
|
||||
D: CompactionDeltaEntry<'a, K>,
|
||||
{
|
||||
@@ -224,25 +254,39 @@ where
|
||||
|
||||
if let Some(first) = input.next().await {
|
||||
let first = first?;
|
||||
let mut part_size = first.size();
|
||||
let mut accum: KeySize<K> = KeySize {
|
||||
key: first.key(),
|
||||
num_values: 1,
|
||||
size: first.size(),
|
||||
size: part_size,
|
||||
partition_lsns: Vec::new(),
|
||||
};
|
||||
let mut last_key = accum.key;
|
||||
while let Some(this) = input.next().await {
|
||||
let this = this?;
|
||||
if this.key() == accum.key {
|
||||
accum.size += this.size();
|
||||
let add_size = this.size();
|
||||
if part_size + add_size > target_size {
|
||||
accum.partition_lsns.push((this.lsn(), part_size));
|
||||
part_size = 0;
|
||||
}
|
||||
part_size += add_size;
|
||||
accum.size += add_size;
|
||||
accum.num_values += 1;
|
||||
} else {
|
||||
assert!(last_key <= accum.key, "last_key={last_key} <= accum.key={}", accum.key);
|
||||
last_key = accum.key;
|
||||
yield accum;
|
||||
part_size = this.size();
|
||||
accum = KeySize {
|
||||
key: this.key(),
|
||||
num_values: 1,
|
||||
size: this.size(),
|
||||
size: part_size,
|
||||
partition_lsns: Vec::new(),
|
||||
};
|
||||
}
|
||||
}
|
||||
assert!(last_key <= accum.key, "last_key={last_key} <= accum.key={}", accum.key);
|
||||
yield accum;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,6 +184,12 @@ impl<L> Level<L> {
|
||||
}
|
||||
let mut events: Vec<Event<K>> = Vec::new();
|
||||
for (idx, l) in self.layers.iter().enumerate() {
|
||||
let key_range = l.key_range();
|
||||
if key_range.end == key_range.start.next() && l.is_delta() {
|
||||
// Ignore single-key delta layers as they can be stacked on top of each other
|
||||
// as that is the only way to cut further.
|
||||
continue;
|
||||
}
|
||||
events.push(Event {
|
||||
key: l.key_range().start,
|
||||
layer_idx: idx,
|
||||
|
||||
@@ -1,23 +1,35 @@
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_compaction::interface::CompactionLayer;
|
||||
use pageserver_compaction::simulator::MockTimeline;
|
||||
use utils::logging;
|
||||
|
||||
static LOG_HANDLE: OnceCell<()> = OnceCell::new();
|
||||
|
||||
pub(crate) fn setup_logging() {
|
||||
LOG_HANDLE.get_or_init(|| {
|
||||
logging::init(
|
||||
logging::LogFormat::Test,
|
||||
logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
|
||||
logging::Output::Stdout,
|
||||
)
|
||||
.expect("Failed to init test logging")
|
||||
});
|
||||
}
|
||||
|
||||
/// Test the extreme case that there are so many updates for a single key that
|
||||
/// even if we produce an extremely narrow delta layer, spanning just that one
|
||||
/// key, we still too many records to fit in the target file size. We need to
|
||||
/// split in the LSN dimension too in that case.
|
||||
///
|
||||
/// TODO: The code to avoid this problem has not been implemented yet! So the
|
||||
/// assertion currently fails, but we need to make it not fail.
|
||||
#[ignore]
|
||||
#[tokio::test]
|
||||
async fn test_many_updates_for_single_key() {
|
||||
setup_logging();
|
||||
let mut executor = MockTimeline::new();
|
||||
executor.target_file_size = 10_000_000; // 10 MB
|
||||
executor.target_file_size = 1_000_000; // 1 MB
|
||||
|
||||
// Ingest 100 MB of updates to a single key.
|
||||
// Ingest 10 MB of updates to a single key.
|
||||
for _ in 1..1000 {
|
||||
executor.ingest_uniform(100, 10, &(0..100_000)).unwrap();
|
||||
executor.ingest_uniform(10_000, 10, &(0..1)).unwrap();
|
||||
executor.ingest_uniform(1000, 10, &(0..1)).unwrap();
|
||||
executor.compact().await.unwrap();
|
||||
}
|
||||
|
||||
@@ -27,9 +39,32 @@ async fn test_many_updates_for_single_key() {
|
||||
}
|
||||
for l in executor.live_layers.iter() {
|
||||
assert!(l.file_size() < executor.target_file_size * 2);
|
||||
// sanity check that none of the delta layers are stupidly small either
|
||||
// Sanity check that none of the delta layers are empty either.
|
||||
if l.is_delta() {
|
||||
assert!(l.file_size() > executor.target_file_size / 2);
|
||||
assert!(l.file_size() > 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_simple_updates() {
|
||||
setup_logging();
|
||||
let mut executor = MockTimeline::new();
|
||||
executor.target_file_size = 500_000; // 500 KB
|
||||
|
||||
// Ingest some traffic.
|
||||
for _ in 1..400 {
|
||||
executor.ingest_uniform(100, 500, &(0..100_000)).unwrap();
|
||||
}
|
||||
|
||||
for l in executor.live_layers.iter() {
|
||||
println!("layer {}: {}", l.short_id(), l.file_size());
|
||||
}
|
||||
|
||||
println!("Running compaction...");
|
||||
executor.compact().await.unwrap();
|
||||
|
||||
for l in executor.live_layers.iter() {
|
||||
println!("layer {}: {}", l.short_id(), l.file_size());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,8 @@
|
||||
//! # From an `index_part.json` in S3
|
||||
//! (jq -r '.layer_metadata | keys[]' | cargo run -p pagectl draw-timeline ) < index_part.json-00000016 > out.svg
|
||||
//!
|
||||
//! # enrich with lines for gc_cutoff and a child branch point
|
||||
//! cat <(jq -r '.historic_layers[] | .layer_file_name' < layers.json) <(echo -e 'gc_cutoff:0000001CE3FE32C9\nbranch:0000001DE3FE32C9') | cargo run --bin pagectl draw-timeline >| out.svg
|
||||
//! ```
|
||||
//!
|
||||
//! ## Viewing
|
||||
@@ -48,9 +50,8 @@
|
||||
//! ```
|
||||
//!
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{Context, Result};
|
||||
use pageserver::repository::Key;
|
||||
use pageserver::METADATA_FILE_NAME;
|
||||
use std::cmp::Ordering;
|
||||
use std::io::{self, BufRead};
|
||||
use std::path::PathBuf;
|
||||
@@ -81,6 +82,11 @@ fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
|
||||
let split: Vec<&str> = name.split("__").collect();
|
||||
let keys: Vec<&str> = split[0].split('-').collect();
|
||||
let mut lsns: Vec<&str> = split[1].split('-').collect();
|
||||
|
||||
if lsns.last().expect("should").len() == 8 {
|
||||
lsns.pop();
|
||||
}
|
||||
|
||||
if lsns.len() == 1 {
|
||||
lsns.push(lsns[0]);
|
||||
}
|
||||
@@ -90,6 +96,33 @@ fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
|
||||
(keys, lsns)
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum LineKind {
|
||||
GcCutoff,
|
||||
Branch,
|
||||
}
|
||||
|
||||
impl From<LineKind> for Fill {
|
||||
fn from(value: LineKind) -> Self {
|
||||
match value {
|
||||
LineKind::GcCutoff => Fill::Color(rgb(255, 0, 0)),
|
||||
LineKind::Branch => Fill::Color(rgb(0, 255, 0)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for LineKind {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> std::prelude::v1::Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"gc_cutoff" => LineKind::GcCutoff,
|
||||
"branch" => LineKind::Branch,
|
||||
_ => anyhow::bail!("unsupported linekind: {s}"),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn main() -> Result<()> {
|
||||
// Parse layer filenames from stdin
|
||||
struct Layer {
|
||||
@@ -99,15 +132,32 @@ pub fn main() -> Result<()> {
|
||||
}
|
||||
let mut files: Vec<Layer> = vec![];
|
||||
let stdin = io::stdin();
|
||||
for line in stdin.lock().lines() {
|
||||
|
||||
let mut lines: Vec<(Lsn, LineKind)> = vec![];
|
||||
|
||||
for (lineno, line) in stdin.lock().lines().enumerate() {
|
||||
let lineno = lineno + 1;
|
||||
|
||||
let line = line.unwrap();
|
||||
if let Some((kind, lsn)) = line.split_once(':') {
|
||||
let (kind, lsn) = LineKind::from_str(kind)
|
||||
.context("parse kind")
|
||||
.and_then(|kind| {
|
||||
if lsn.contains('/') {
|
||||
Lsn::from_str(lsn)
|
||||
} else {
|
||||
Lsn::from_hex(lsn)
|
||||
}
|
||||
.map(|lsn| (kind, lsn))
|
||||
.context("parse lsn")
|
||||
})
|
||||
.with_context(|| format!("parse {line:?} on {lineno}"))?;
|
||||
lines.push((lsn, kind));
|
||||
continue;
|
||||
}
|
||||
let line = PathBuf::from_str(&line).unwrap();
|
||||
let filename = line.file_name().unwrap();
|
||||
let filename = filename.to_str().unwrap();
|
||||
if filename == METADATA_FILE_NAME {
|
||||
// Don't try and parse "metadata" like a key-lsn range
|
||||
continue;
|
||||
}
|
||||
let (key_range, lsn_range) = parse_filename(filename);
|
||||
files.push(Layer {
|
||||
filename: filename.to_owned(),
|
||||
@@ -117,8 +167,9 @@ pub fn main() -> Result<()> {
|
||||
}
|
||||
|
||||
// Collect all coordinates
|
||||
let mut keys: Vec<Key> = vec![];
|
||||
let mut lsns: Vec<Lsn> = vec![];
|
||||
let mut keys: Vec<Key> = Vec::with_capacity(files.len());
|
||||
let mut lsns: Vec<Lsn> = Vec::with_capacity(files.len() + lines.len());
|
||||
|
||||
for Layer {
|
||||
key_range: keyr,
|
||||
lsn_range: lsnr,
|
||||
@@ -131,6 +182,8 @@ pub fn main() -> Result<()> {
|
||||
lsns.push(lsnr.end);
|
||||
}
|
||||
|
||||
lsns.extend(lines.iter().map(|(lsn, _)| *lsn));
|
||||
|
||||
// Analyze
|
||||
let key_map = build_coordinate_compression_map(keys);
|
||||
let lsn_map = build_coordinate_compression_map(lsns);
|
||||
@@ -144,10 +197,13 @@ pub fn main() -> Result<()> {
|
||||
println!(
|
||||
"{}",
|
||||
BeginSvg {
|
||||
w: key_map.len() as f32,
|
||||
w: (key_map.len() + 10) as f32,
|
||||
h: stretch * lsn_map.len() as f32
|
||||
}
|
||||
);
|
||||
|
||||
let xmargin = 0.05; // Height-dependent margin to disambiguate overlapping deltas
|
||||
|
||||
for Layer {
|
||||
filename,
|
||||
key_range: keyr,
|
||||
@@ -169,7 +225,6 @@ pub fn main() -> Result<()> {
|
||||
let mut lsn_diff = (lsn_end - lsn_start) as f32;
|
||||
let mut fill = Fill::None;
|
||||
let mut ymargin = 0.05 * lsn_diff; // Height-dependent margin to disambiguate overlapping deltas
|
||||
let xmargin = 0.05; // Height-dependent margin to disambiguate overlapping deltas
|
||||
let mut lsn_offset = 0.0;
|
||||
|
||||
// Fill in and thicken rectangle if it's an
|
||||
@@ -189,7 +244,7 @@ pub fn main() -> Result<()> {
|
||||
println!(
|
||||
" {}",
|
||||
rectangle(
|
||||
key_start as f32 + stretch * xmargin,
|
||||
5.0 + key_start as f32 + stretch * xmargin,
|
||||
stretch * (lsn_max as f32 - (lsn_end as f32 - ymargin - lsn_offset)),
|
||||
key_diff as f32 - stretch * 2.0 * xmargin,
|
||||
stretch * (lsn_diff - 2.0 * ymargin)
|
||||
@@ -200,6 +255,26 @@ pub fn main() -> Result<()> {
|
||||
.comment(filename)
|
||||
);
|
||||
}
|
||||
|
||||
for (lsn, kind) in lines {
|
||||
let lsn_start = *lsn_map.get(&lsn).unwrap();
|
||||
let lsn_end = lsn_start;
|
||||
let stretch = 2.0;
|
||||
let lsn_diff = 0.3;
|
||||
let lsn_offset = -lsn_diff / 2.0;
|
||||
let ymargin = 0.05;
|
||||
println!(
|
||||
"{}",
|
||||
rectangle(
|
||||
0.0f32 + stretch * xmargin,
|
||||
stretch * (lsn_map.len() as f32 - (lsn_end as f32 - ymargin - lsn_offset)),
|
||||
(key_map.len() + 10) as f32,
|
||||
stretch * (lsn_diff - 2.0 * ymargin)
|
||||
)
|
||||
.fill(kind)
|
||||
);
|
||||
}
|
||||
|
||||
println!("{}", EndSvg);
|
||||
|
||||
eprintln!("num_images: {}", num_images);
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::collections::HashMap;
|
||||
use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver::tenant::remote_timeline_client::index::IndexLayerMetadata;
|
||||
use pageserver::tenant::storage_layer::LayerFileName;
|
||||
use pageserver::tenant::storage_layer::LayerName;
|
||||
use pageserver::tenant::{metadata::TimelineMetadata, IndexPart};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -19,7 +19,7 @@ pub(crate) async fn main(cmd: &IndexPartCmd) -> anyhow::Result<()> {
|
||||
let des: IndexPart = IndexPart::from_s3_bytes(&bytes).context("deserialize")?;
|
||||
#[derive(serde::Serialize)]
|
||||
struct Output<'a> {
|
||||
layer_metadata: &'a HashMap<LayerFileName, IndexLayerMetadata>,
|
||||
layer_metadata: &'a HashMap<LayerName, IndexLayerMetadata>,
|
||||
disk_consistent_lsn: Lsn,
|
||||
timeline_metadata: &'a TimelineMetadata,
|
||||
}
|
||||
|
||||
@@ -100,7 +100,7 @@ pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {
|
||||
|
||||
// Finds the max_holes largest holes, ignoring any that are smaller than MIN_HOLE_LENGTH"
|
||||
async fn get_holes(path: &Utf8Path, max_holes: usize, ctx: &RequestContext) -> Result<Vec<Hole>> {
|
||||
let file = VirtualFile::open(path).await?;
|
||||
let file = VirtualFile::open(path, ctx).await?;
|
||||
let file_id = page_cache::next_file_id();
|
||||
let block_reader = FileBlockReader::new(&file, file_id);
|
||||
let summary_blk = block_reader.read_blk(0, ctx).await?;
|
||||
|
||||
@@ -61,7 +61,7 @@ async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
|
||||
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
|
||||
virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
|
||||
page_cache::init(100);
|
||||
let file = VirtualFile::open(path).await?;
|
||||
let file = VirtualFile::open(path, ctx).await?;
|
||||
let file_id = page_cache::next_file_id();
|
||||
let block_reader = FileBlockReader::new(&file, file_id);
|
||||
let summary_blk = block_reader.read_blk(0, ctx).await?;
|
||||
|
||||
@@ -2,9 +2,11 @@ use pageserver_api::{models::HistoricLayerInfo, shard::TenantShardId};
|
||||
|
||||
use pageserver_client::mgmt_api;
|
||||
use rand::seq::SliceRandom;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, info};
|
||||
use utils::id::{TenantTimelineId, TimelineId};
|
||||
|
||||
use std::{f64, sync::Arc};
|
||||
use tokio::{
|
||||
sync::{mpsc, OwnedSemaphorePermit},
|
||||
task::JoinSet,
|
||||
@@ -12,10 +14,7 @@ use tokio::{
|
||||
|
||||
use std::{
|
||||
num::NonZeroUsize,
|
||||
sync::{
|
||||
atomic::{AtomicU64, Ordering},
|
||||
Arc,
|
||||
},
|
||||
sync::atomic::{AtomicU64, Ordering},
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
@@ -51,19 +50,31 @@ pub(crate) fn main(args: Args) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
struct Output {
|
||||
downloads_count: u64,
|
||||
downloads_bytes: u64,
|
||||
evictions_count: u64,
|
||||
timeline_restarts: u64,
|
||||
#[serde(with = "humantime_serde")]
|
||||
runtime: Duration,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct LiveStats {
|
||||
evictions: AtomicU64,
|
||||
downloads: AtomicU64,
|
||||
evictions_count: AtomicU64,
|
||||
downloads_count: AtomicU64,
|
||||
downloads_bytes: AtomicU64,
|
||||
timeline_restarts: AtomicU64,
|
||||
}
|
||||
|
||||
impl LiveStats {
|
||||
fn eviction_done(&self) {
|
||||
self.evictions.fetch_add(1, Ordering::Relaxed);
|
||||
self.evictions_count.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
fn download_done(&self) {
|
||||
self.downloads.fetch_add(1, Ordering::Relaxed);
|
||||
fn download_done(&self, size: u64) {
|
||||
self.downloads_count.fetch_add(1, Ordering::Relaxed);
|
||||
self.downloads_bytes.fetch_add(size, Ordering::Relaxed);
|
||||
}
|
||||
fn timeline_restart_done(&self) {
|
||||
self.timeline_restarts.fetch_add(1, Ordering::Relaxed);
|
||||
@@ -92,28 +103,49 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let token = CancellationToken::new();
|
||||
let mut tasks = JoinSet::new();
|
||||
|
||||
let live_stats = Arc::new(LiveStats::default());
|
||||
let periodic_stats = Arc::new(LiveStats::default());
|
||||
let total_stats = Arc::new(LiveStats::default());
|
||||
|
||||
let start = Instant::now();
|
||||
tasks.spawn({
|
||||
let live_stats = Arc::clone(&live_stats);
|
||||
let periodic_stats = Arc::clone(&periodic_stats);
|
||||
let total_stats = Arc::clone(&total_stats);
|
||||
let cloned_token = token.clone();
|
||||
async move {
|
||||
let mut last_at = Instant::now();
|
||||
loop {
|
||||
if cloned_token.is_cancelled() {
|
||||
return;
|
||||
}
|
||||
tokio::time::sleep_until((last_at + Duration::from_secs(1)).into()).await;
|
||||
let now = Instant::now();
|
||||
let delta: Duration = now - last_at;
|
||||
last_at = now;
|
||||
|
||||
let LiveStats {
|
||||
evictions,
|
||||
downloads,
|
||||
evictions_count,
|
||||
downloads_count,
|
||||
downloads_bytes,
|
||||
timeline_restarts,
|
||||
} = &*live_stats;
|
||||
let evictions = evictions.swap(0, Ordering::Relaxed) as f64 / delta.as_secs_f64();
|
||||
let downloads = downloads.swap(0, Ordering::Relaxed) as f64 / delta.as_secs_f64();
|
||||
} = &*periodic_stats;
|
||||
let evictions_count = evictions_count.swap(0, Ordering::Relaxed);
|
||||
let downloads_count = downloads_count.swap(0, Ordering::Relaxed);
|
||||
let downloads_bytes = downloads_bytes.swap(0, Ordering::Relaxed);
|
||||
let timeline_restarts = timeline_restarts.swap(0, Ordering::Relaxed);
|
||||
info!("evictions={evictions:.2}/s downloads={downloads:.2}/s timeline_restarts={timeline_restarts}");
|
||||
|
||||
total_stats.evictions_count.fetch_add(evictions_count, Ordering::Relaxed);
|
||||
total_stats.downloads_count.fetch_add(downloads_count, Ordering::Relaxed);
|
||||
total_stats.downloads_bytes.fetch_add(downloads_bytes, Ordering::Relaxed);
|
||||
total_stats.timeline_restarts.fetch_add(timeline_restarts, Ordering::Relaxed);
|
||||
|
||||
let evictions_per_s = evictions_count as f64 / delta.as_secs_f64();
|
||||
let downloads_per_s = downloads_count as f64 / delta.as_secs_f64();
|
||||
let downloads_mibs_per_s = downloads_bytes as f64 / delta.as_secs_f64() / ((1 << 20) as f64);
|
||||
|
||||
info!("evictions={evictions_per_s:.2}/s downloads={downloads_per_s:.2}/s download_bytes={downloads_mibs_per_s:.2}MiB/s timeline_restarts={timeline_restarts}");
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -124,14 +156,42 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
args,
|
||||
Arc::clone(&mgmt_api_client),
|
||||
tl,
|
||||
Arc::clone(&live_stats),
|
||||
Arc::clone(&periodic_stats),
|
||||
token.clone(),
|
||||
));
|
||||
}
|
||||
}
|
||||
if let Some(runtime) = args.runtime {
|
||||
tokio::spawn(async move {
|
||||
tokio::time::sleep(runtime.into()).await;
|
||||
token.cancel();
|
||||
});
|
||||
}
|
||||
|
||||
while let Some(res) = tasks.join_next().await {
|
||||
res.unwrap();
|
||||
}
|
||||
let end = Instant::now();
|
||||
let duration: Duration = end - start;
|
||||
|
||||
let output = {
|
||||
let LiveStats {
|
||||
evictions_count,
|
||||
downloads_count,
|
||||
downloads_bytes,
|
||||
timeline_restarts,
|
||||
} = &*total_stats;
|
||||
Output {
|
||||
downloads_count: downloads_count.load(Ordering::Relaxed),
|
||||
downloads_bytes: downloads_bytes.load(Ordering::Relaxed),
|
||||
evictions_count: evictions_count.load(Ordering::Relaxed),
|
||||
timeline_restarts: timeline_restarts.load(Ordering::Relaxed),
|
||||
runtime: duration,
|
||||
}
|
||||
};
|
||||
let output = serde_json::to_string_pretty(&output).unwrap();
|
||||
println!("{output}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -140,6 +200,7 @@ async fn timeline_actor(
|
||||
mgmt_api_client: Arc<pageserver_client::mgmt_api::Client>,
|
||||
timeline: TenantTimelineId,
|
||||
live_stats: Arc<LiveStats>,
|
||||
token: CancellationToken,
|
||||
) {
|
||||
// TODO: support sharding
|
||||
let tenant_shard_id = TenantShardId::unsharded(timeline.tenant_id);
|
||||
@@ -149,7 +210,7 @@ async fn timeline_actor(
|
||||
layers: Vec<mpsc::Sender<OwnedSemaphorePermit>>,
|
||||
concurrency: Arc<tokio::sync::Semaphore>,
|
||||
}
|
||||
loop {
|
||||
while !token.is_cancelled() {
|
||||
debug!("restarting timeline");
|
||||
let layer_map_info = mgmt_api_client
|
||||
.layer_map_info(tenant_shard_id, timeline.timeline_id)
|
||||
@@ -185,7 +246,7 @@ async fn timeline_actor(
|
||||
|
||||
live_stats.timeline_restart_done();
|
||||
|
||||
loop {
|
||||
while !token.is_cancelled() {
|
||||
assert!(!timeline.joinset.is_empty());
|
||||
if let Some(res) = timeline.joinset.try_join_next() {
|
||||
debug!(?res, "a layer actor exited, should not happen");
|
||||
@@ -255,7 +316,7 @@ async fn layer_actor(
|
||||
.layer_ondemand_download(tenant_shard_id, timeline_id, layer.layer_file_name())
|
||||
.await
|
||||
.unwrap();
|
||||
live_stats.download_done();
|
||||
live_stats.download_done(layer.layer_file_size());
|
||||
did_it
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,20 +1,39 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use ::metrics::IntGauge;
|
||||
use bytes::{Buf, BufMut, Bytes};
|
||||
use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE};
|
||||
use sha2::Digest;
|
||||
use tracing::warn;
|
||||
|
||||
fn hash256(data: &[u8]) -> [u8; 32] {
|
||||
sha2::Sha256::digest(data).into()
|
||||
// BEGIN Copyright (c) 2017 Servo Contributors
|
||||
|
||||
/// Const version of FNV hash.
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub const fn fnv_hash(bytes: &[u8]) -> u128 {
|
||||
const INITIAL_STATE: u128 = 0x6c62272e07bb014262b821756295c58d;
|
||||
const PRIME: u128 = 0x0000000001000000000000000000013B;
|
||||
|
||||
let mut hash = INITIAL_STATE;
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
hash ^= bytes[i] as u128;
|
||||
hash = hash.wrapping_mul(PRIME);
|
||||
i += 1;
|
||||
}
|
||||
hash
|
||||
}
|
||||
|
||||
/// Create a metadata key from a hash, encoded as [AUX_KEY_PREFIX, 2B directory prefix, first 13B of 128b sha256].
|
||||
// END Copyright (c) 2017 Servo Contributors
|
||||
|
||||
/// Create a metadata key from a hash, encoded as [AUX_KEY_PREFIX, 2B directory prefix, least significant 13B of FNV hash].
|
||||
fn aux_hash_to_metadata_key(dir_level1: u8, dir_level2: u8, data: &[u8]) -> Key {
|
||||
let mut key = [0; METADATA_KEY_SIZE];
|
||||
let hash = hash256(data);
|
||||
let mut key: [u8; 16] = [0; METADATA_KEY_SIZE];
|
||||
let hash = fnv_hash(data).to_be_bytes();
|
||||
key[0] = AUX_KEY_PREFIX;
|
||||
key[1] = dir_level1;
|
||||
key[2] = dir_level2;
|
||||
key[3..16].copy_from_slice(&hash[0..13]);
|
||||
key[3..16].copy_from_slice(&hash[3..16]);
|
||||
Key::from_metadata_key_fixed_size(&key)
|
||||
}
|
||||
|
||||
@@ -145,6 +164,55 @@ pub fn encode_file_value(files: &[(&str, &[u8])]) -> anyhow::Result<Vec<u8>> {
|
||||
Ok(encoded)
|
||||
}
|
||||
|
||||
/// An estimation of the size of aux files.
|
||||
pub struct AuxFileSizeEstimator {
|
||||
aux_file_size_gauge: IntGauge,
|
||||
size: Arc<std::sync::Mutex<Option<isize>>>,
|
||||
}
|
||||
|
||||
impl AuxFileSizeEstimator {
|
||||
pub fn new(aux_file_size_gauge: IntGauge) -> Self {
|
||||
Self {
|
||||
aux_file_size_gauge,
|
||||
size: Arc::new(std::sync::Mutex::new(None)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_base_backup(&self, new_size: usize) {
|
||||
let mut guard = self.size.lock().unwrap();
|
||||
*guard = Some(new_size as isize);
|
||||
self.report(new_size as isize);
|
||||
}
|
||||
|
||||
pub fn on_add(&self, file_size: usize) {
|
||||
let mut guard = self.size.lock().unwrap();
|
||||
if let Some(size) = &mut *guard {
|
||||
*size += file_size as isize;
|
||||
self.report(*size);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_remove(&self, file_size: usize) {
|
||||
let mut guard = self.size.lock().unwrap();
|
||||
if let Some(size) = &mut *guard {
|
||||
*size -= file_size as isize;
|
||||
self.report(*size);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn on_update(&self, old_size: usize, new_size: usize) {
|
||||
let mut guard = self.size.lock().unwrap();
|
||||
if let Some(size) = &mut *guard {
|
||||
*size += new_size as isize - old_size as isize;
|
||||
self.report(*size);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn report(&self, size: isize) {
|
||||
self.aux_file_size_gauge.set(size as i64);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -153,17 +221,18 @@ mod tests {
|
||||
fn test_hash_portable() {
|
||||
// AUX file encoding requires the hash to be portable across all platforms. This test case checks
|
||||
// if the algorithm produces the same hash across different environments.
|
||||
|
||||
assert_eq!(
|
||||
"1b4f0e9851971998e732078544c96b36c3d01cedf7caa332359d6f1d83567014",
|
||||
hex::encode(hash256("test1".as_bytes()))
|
||||
265160408618497461376862998434862070044,
|
||||
super::fnv_hash("test1".as_bytes())
|
||||
);
|
||||
assert_eq!(
|
||||
"3dfc364fd121aaa081cec54ef9ef6f69a4756df50cf3c52f1abd2b451829c4c0",
|
||||
hex::encode(hash256("test/test2".as_bytes()))
|
||||
295486155126299629456360817749600553988,
|
||||
super::fnv_hash("test/test2".as_bytes())
|
||||
);
|
||||
assert_eq!(
|
||||
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
|
||||
hex::encode(hash256("".as_bytes()))
|
||||
144066263297769815596495629667062367629,
|
||||
super::fnv_hash("".as_bytes())
|
||||
);
|
||||
}
|
||||
|
||||
@@ -172,28 +241,28 @@ mod tests {
|
||||
// To correct retrieve AUX files, the generated keys for the same file must be the same for all versions
|
||||
// of the page server.
|
||||
assert_eq!(
|
||||
"62000001011B4F0E9851971998E732078544",
|
||||
encode_aux_file_key("pg_logical/mappings/test1").to_string()
|
||||
"62000001017F8B83D94F7081693471ABF91C",
|
||||
encode_aux_file_key("pg_logical/mappings/test1").to_string(),
|
||||
);
|
||||
assert_eq!(
|
||||
"620000010260303AE22B998861BCE3B28F33",
|
||||
encode_aux_file_key("pg_logical/snapshots/test2").to_string()
|
||||
"62000001027F8E83D94F7081693471ABFCCD",
|
||||
encode_aux_file_key("pg_logical/snapshots/test2").to_string(),
|
||||
);
|
||||
assert_eq!(
|
||||
"6200000103E3B0C44298FC1C149AFBF4C899",
|
||||
encode_aux_file_key("pg_logical/replorigin_checkpoint").to_string()
|
||||
"62000001032E07BB014262B821756295C58D",
|
||||
encode_aux_file_key("pg_logical/replorigin_checkpoint").to_string(),
|
||||
);
|
||||
assert_eq!(
|
||||
"62000001FF60945D49535300BE8E42108658",
|
||||
encode_aux_file_key("pg_logical/unsupported").to_string()
|
||||
"62000001FF4F38E1C74754E7D03C1A660178",
|
||||
encode_aux_file_key("pg_logical/unsupported").to_string(),
|
||||
);
|
||||
assert_eq!(
|
||||
"6200000201FD61A03AF4F77D870FC21E05E7",
|
||||
"62000002017F8D83D94F7081693471ABFB92",
|
||||
encode_aux_file_key("pg_replslot/test3").to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
"620000FFFF7F75AD39B73CD2A1FE41680550",
|
||||
encode_aux_file_key("other_file_not_supported").to_string()
|
||||
"620000FFFF2B6ECC8AEF93F643DC44F15E03",
|
||||
encode_aux_file_key("other_file_not_supported").to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -601,7 +601,7 @@ where
|
||||
// add zenith.signal file
|
||||
let mut zenith_signal = String::new();
|
||||
if self.prev_record_lsn == Lsn(0) {
|
||||
if self.lsn == self.timeline.get_ancestor_lsn() {
|
||||
if self.timeline.is_ancestor_lsn(self.lsn) {
|
||||
write!(zenith_signal, "PREV LSN: none")
|
||||
.map_err(|e| BasebackupError::Server(e.into()))?;
|
||||
} else {
|
||||
|
||||
@@ -284,7 +284,6 @@ fn start_pageserver(
|
||||
))
|
||||
.unwrap();
|
||||
pageserver::preinitialize_metrics();
|
||||
pageserver::metrics::wal_redo::set_process_kind_metric(conf.walredo_process_kind);
|
||||
|
||||
// If any failpoints were set from FAILPOINTS environment variable,
|
||||
// print them to the log for debugging purposes
|
||||
@@ -516,16 +515,12 @@ fn start_pageserver(
|
||||
}
|
||||
});
|
||||
|
||||
let secondary_controller = if let Some(remote_storage) = &remote_storage {
|
||||
secondary::spawn_tasks(
|
||||
tenant_manager.clone(),
|
||||
remote_storage.clone(),
|
||||
background_jobs_barrier.clone(),
|
||||
shutdown_pageserver.clone(),
|
||||
)
|
||||
} else {
|
||||
secondary::null_controller()
|
||||
};
|
||||
let secondary_controller = secondary::spawn_tasks(
|
||||
tenant_manager.clone(),
|
||||
remote_storage.clone(),
|
||||
background_jobs_barrier.clone(),
|
||||
shutdown_pageserver.clone(),
|
||||
);
|
||||
|
||||
// shared state between the disk-usage backed eviction background task and the http endpoint
|
||||
// that allows triggering disk-usage based eviction manually. note that the http endpoint
|
||||
@@ -533,15 +528,13 @@ fn start_pageserver(
|
||||
// been configured.
|
||||
let disk_usage_eviction_state: Arc<disk_usage_eviction_task::State> = Arc::default();
|
||||
|
||||
if let Some(remote_storage) = &remote_storage {
|
||||
launch_disk_usage_global_eviction_task(
|
||||
conf,
|
||||
remote_storage.clone(),
|
||||
disk_usage_eviction_state.clone(),
|
||||
tenant_manager.clone(),
|
||||
background_jobs_barrier.clone(),
|
||||
)?;
|
||||
}
|
||||
launch_disk_usage_global_eviction_task(
|
||||
conf,
|
||||
remote_storage.clone(),
|
||||
disk_usage_eviction_state.clone(),
|
||||
tenant_manager.clone(),
|
||||
background_jobs_barrier.clone(),
|
||||
)?;
|
||||
|
||||
// Start up the service to handle HTTP mgmt API request. We created the
|
||||
// listener earlier already.
|
||||
@@ -654,17 +647,20 @@ fn start_pageserver(
|
||||
None,
|
||||
"libpq endpoint listener",
|
||||
true,
|
||||
async move {
|
||||
page_service::libpq_listener_main(
|
||||
conf,
|
||||
broker_client,
|
||||
pg_auth,
|
||||
pageserver_listener,
|
||||
conf.pg_auth_type,
|
||||
libpq_ctx,
|
||||
task_mgr::shutdown_token(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
let tenant_manager = tenant_manager.clone();
|
||||
async move {
|
||||
page_service::libpq_listener_main(
|
||||
tenant_manager,
|
||||
broker_client,
|
||||
pg_auth,
|
||||
pageserver_listener,
|
||||
conf.pg_auth_type,
|
||||
libpq_ctx,
|
||||
task_mgr::shutdown_token(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
@@ -693,14 +689,7 @@ fn start_pageserver(
|
||||
// Right now that tree doesn't reach very far, and `task_mgr` is used instead.
|
||||
// The plan is to change that over time.
|
||||
shutdown_pageserver.take();
|
||||
let bg_remote_storage = remote_storage.clone();
|
||||
let bg_deletion_queue = deletion_queue.clone();
|
||||
pageserver::shutdown_pageserver(
|
||||
&tenant_manager,
|
||||
bg_remote_storage.map(|_| bg_deletion_queue),
|
||||
0,
|
||||
)
|
||||
.await;
|
||||
pageserver::shutdown_pageserver(&tenant_manager, deletion_queue.clone(), 0).await;
|
||||
unreachable!()
|
||||
})
|
||||
}
|
||||
@@ -708,12 +697,11 @@ fn start_pageserver(
|
||||
|
||||
fn create_remote_storage_client(
|
||||
conf: &'static PageServerConf,
|
||||
) -> anyhow::Result<Option<GenericRemoteStorage>> {
|
||||
) -> anyhow::Result<GenericRemoteStorage> {
|
||||
let config = if let Some(config) = &conf.remote_storage_config {
|
||||
config
|
||||
} else {
|
||||
tracing::warn!("no remote storage configured, this is a deprecated configuration");
|
||||
return Ok(None);
|
||||
anyhow::bail!("no remote storage configured, this is a deprecated configuration");
|
||||
};
|
||||
|
||||
// Create the client
|
||||
@@ -733,7 +721,7 @@ fn create_remote_storage_client(
|
||||
GenericRemoteStorage::unreliable_wrapper(remote_storage, conf.test_remote_failures);
|
||||
}
|
||||
|
||||
Ok(Some(remote_storage))
|
||||
Ok(remote_storage)
|
||||
}
|
||||
|
||||
fn cli() -> Command {
|
||||
|
||||
@@ -99,7 +99,7 @@ pub mod defaults {
|
||||
|
||||
pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
|
||||
|
||||
pub const DEFAULT_WALREDO_PROCESS_KIND: &str = "sync";
|
||||
pub const DEFAULT_WALREDO_PROCESS_KIND: &str = "async";
|
||||
|
||||
///
|
||||
/// Default built-in configuration file.
|
||||
|
||||
@@ -38,7 +38,7 @@ use deleter::DeleterMessage;
|
||||
use list_writer::ListWriterQueueMessage;
|
||||
use validator::ValidatorQueueMessage;
|
||||
|
||||
use crate::{config::PageServerConf, tenant::storage_layer::LayerFileName};
|
||||
use crate::{config::PageServerConf, tenant::storage_layer::LayerName};
|
||||
|
||||
// TODO: configurable for how long to wait before executing deletions
|
||||
|
||||
@@ -479,7 +479,7 @@ impl DeletionQueueClient {
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
current_generation: Generation,
|
||||
layers: Vec<(LayerFileName, LayerFileMetadata)>,
|
||||
layers: Vec<(LayerName, LayerFileMetadata)>,
|
||||
) -> Result<(), DeletionQueueError> {
|
||||
if current_generation.is_none() {
|
||||
debug!("Enqueuing deletions in legacy mode, skipping queue");
|
||||
@@ -511,7 +511,7 @@ impl DeletionQueueClient {
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
current_generation: Generation,
|
||||
layers: Vec<(LayerFileName, LayerFileMetadata)>,
|
||||
layers: Vec<(LayerName, LayerFileMetadata)>,
|
||||
) -> Result<(), DeletionQueueError> {
|
||||
metrics::DELETION_QUEUE
|
||||
.keys_submitted
|
||||
@@ -632,7 +632,7 @@ impl DeletionQueue {
|
||||
///
|
||||
/// If remote_storage is None, then the returned workers will also be None.
|
||||
pub fn new<C>(
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
control_plane_client: Option<C>,
|
||||
conf: &'static PageServerConf,
|
||||
) -> (Self, Option<DeletionQueueWorkers<C>>)
|
||||
@@ -658,23 +658,6 @@ impl DeletionQueue {
|
||||
// longer to flush after Tenants have all been torn down.
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let remote_storage = match remote_storage {
|
||||
None => {
|
||||
return (
|
||||
Self {
|
||||
client: DeletionQueueClient {
|
||||
tx,
|
||||
executor_tx,
|
||||
lsn_table: lsn_table.clone(),
|
||||
},
|
||||
cancel,
|
||||
},
|
||||
None,
|
||||
)
|
||||
}
|
||||
Some(r) => r,
|
||||
};
|
||||
|
||||
(
|
||||
Self {
|
||||
client: DeletionQueueClient {
|
||||
@@ -734,20 +717,20 @@ mod test {
|
||||
use crate::{
|
||||
control_plane_client::RetryForeverError,
|
||||
repository::Key,
|
||||
tenant::{harness::TenantHarness, storage_layer::DeltaFileName},
|
||||
tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
pub const TIMELINE_ID: TimelineId =
|
||||
TimelineId::from_array(hex!("11223344556677881122334455667788"));
|
||||
|
||||
pub const EXAMPLE_LAYER_NAME: LayerFileName = LayerFileName::Delta(DeltaFileName {
|
||||
pub const EXAMPLE_LAYER_NAME: LayerName = LayerName::Delta(DeltaLayerName {
|
||||
key_range: Key::from_i128(0x0)..Key::from_i128(0xFFFFFFFFFFFFFFFF),
|
||||
lsn_range: Lsn(0x00000000016B59D8)..Lsn(0x00000000016B5A51),
|
||||
});
|
||||
|
||||
// When you need a second layer in a test.
|
||||
pub const EXAMPLE_LAYER_NAME_ALT: LayerFileName = LayerFileName::Delta(DeltaFileName {
|
||||
pub const EXAMPLE_LAYER_NAME_ALT: LayerName = LayerName::Delta(DeltaLayerName {
|
||||
key_range: Key::from_i128(0x0)..Key::from_i128(0xFFFFFFFFFFFFFFFF),
|
||||
lsn_range: Lsn(0x00000000016B5A51)..Lsn(0x00000000016B5A61),
|
||||
});
|
||||
@@ -765,7 +748,7 @@ mod test {
|
||||
/// Simulate a pageserver restart by destroying and recreating the deletion queue
|
||||
async fn restart(&mut self) {
|
||||
let (deletion_queue, workers) = DeletionQueue::new(
|
||||
Some(self.storage.clone()),
|
||||
self.storage.clone(),
|
||||
Some(self.mock_control_plane.clone()),
|
||||
self.harness.conf,
|
||||
);
|
||||
@@ -797,7 +780,7 @@ mod test {
|
||||
/// Returns remote layer file name, suitable for use in assert_remote_files
|
||||
fn write_remote_layer(
|
||||
&self,
|
||||
file_name: LayerFileName,
|
||||
file_name: LayerName,
|
||||
gen: Generation,
|
||||
) -> anyhow::Result<String> {
|
||||
let tenant_shard_id = self.harness.tenant_shard_id;
|
||||
@@ -875,7 +858,7 @@ mod test {
|
||||
let mock_control_plane = MockControlPlane::new();
|
||||
|
||||
let (deletion_queue, worker) = DeletionQueue::new(
|
||||
Some(storage.clone()),
|
||||
storage.clone(),
|
||||
Some(mock_control_plane.clone()),
|
||||
harness.conf,
|
||||
);
|
||||
@@ -952,7 +935,7 @@ mod test {
|
||||
let client = ctx.deletion_queue.new_client();
|
||||
client.recover(HashMap::new())?;
|
||||
|
||||
let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
|
||||
let layer_file_name_1: LayerName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
|
||||
let tenant_shard_id = ctx.harness.tenant_shard_id;
|
||||
|
||||
let content: Vec<u8> = "victim1 contents".into();
|
||||
|
||||
@@ -34,7 +34,7 @@ use crate::deletion_queue::TEMP_SUFFIX;
|
||||
use crate::metrics;
|
||||
use crate::tenant::remote_timeline_client::remote_layer_path;
|
||||
use crate::tenant::remote_timeline_client::LayerFileMetadata;
|
||||
use crate::tenant::storage_layer::LayerFileName;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::virtual_file::on_fatal_io_error;
|
||||
use crate::virtual_file::MaybeFatalIo;
|
||||
|
||||
@@ -59,7 +59,7 @@ pub(super) struct DeletionOp {
|
||||
// `layers` and `objects` are both just lists of objects. `layers` is used if you do not
|
||||
// have a config object handy to project it to a remote key, and need the consuming worker
|
||||
// to do it for you.
|
||||
pub(super) layers: Vec<(LayerFileName, LayerFileMetadata)>,
|
||||
pub(super) layers: Vec<(LayerName, LayerFileMetadata)>,
|
||||
pub(super) objects: Vec<RemotePath>,
|
||||
|
||||
/// The _current_ generation of the Tenant shard attachment in which we are enqueuing
|
||||
|
||||
@@ -64,7 +64,7 @@ use crate::{
|
||||
mgr::TenantManager,
|
||||
remote_timeline_client::LayerFileMetadata,
|
||||
secondary::SecondaryTenant,
|
||||
storage_layer::{AsLayerDesc, EvictionError, Layer, LayerFileName},
|
||||
storage_layer::{AsLayerDesc, EvictionError, Layer, LayerName},
|
||||
},
|
||||
};
|
||||
|
||||
@@ -604,7 +604,7 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
||||
pub(crate) struct EvictionSecondaryLayer {
|
||||
pub(crate) secondary_tenant: Arc<SecondaryTenant>,
|
||||
pub(crate) timeline_id: TimelineId,
|
||||
pub(crate) name: LayerFileName,
|
||||
pub(crate) name: LayerName,
|
||||
pub(crate) metadata: LayerFileMetadata,
|
||||
}
|
||||
|
||||
@@ -637,9 +637,9 @@ impl EvictionLayer {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_name(&self) -> LayerFileName {
|
||||
pub(crate) fn get_name(&self) -> LayerName {
|
||||
match self {
|
||||
Self::Attached(l) => l.layer_desc().filename(),
|
||||
Self::Attached(l) => l.layer_desc().layer_name(),
|
||||
Self::Secondary(sl) => sl.name.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -420,25 +420,6 @@ paths:
|
||||
description: Tenant scheduled to load successfully
|
||||
|
||||
/v1/tenant/{tenant_id}/synthetic_size:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
get:
|
||||
description: |
|
||||
Calculate tenant's synthetic size
|
||||
responses:
|
||||
"200":
|
||||
description: Tenant's synthetic size
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/SyntheticSizeResponse"
|
||||
|
||||
# This route has no handler. TODO: remove?
|
||||
/v1/tenant/{tenant_id}/size:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
@@ -468,19 +449,9 @@ paths:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
required:
|
||||
- id
|
||||
- size
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
format: hex
|
||||
size:
|
||||
type: integer
|
||||
nullable: true
|
||||
description: |
|
||||
Size metric in bytes or null if inputs_only=true was given.
|
||||
$ref: "#/components/schemas/SyntheticSizeResponse"
|
||||
text/html:
|
||||
description: SVG representation of the tenant and it's timelines.
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
@@ -929,6 +900,9 @@ components:
|
||||
format: hex
|
||||
size:
|
||||
type: integer
|
||||
nullable: true
|
||||
description: |
|
||||
Size metric in bytes or null if inputs_only=true was given.
|
||||
segment_sizes:
|
||||
type: array
|
||||
items:
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
//!
|
||||
//! Management HTTP API
|
||||
//!
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
@@ -24,7 +26,11 @@ use pageserver_api::models::TenantScanRemoteStorageShard;
|
||||
use pageserver_api::models::TenantShardLocation;
|
||||
use pageserver_api::models::TenantShardSplitRequest;
|
||||
use pageserver_api::models::TenantShardSplitResponse;
|
||||
use pageserver_api::models::TenantSorting;
|
||||
use pageserver_api::models::TenantState;
|
||||
use pageserver_api::models::TopTenantShardItem;
|
||||
use pageserver_api::models::TopTenantShardsRequest;
|
||||
use pageserver_api::models::TopTenantShardsResponse;
|
||||
use pageserver_api::models::{
|
||||
DownloadRemoteLayersTaskSpawnRequest, LocationConfigMode, TenantAttachRequest,
|
||||
TenantLoadRequest, TenantLocationConfigRequest,
|
||||
@@ -63,7 +69,7 @@ use crate::tenant::remote_timeline_client::list_remote_timelines;
|
||||
use crate::tenant::secondary::SecondaryController;
|
||||
use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||
use crate::tenant::storage_layer::LayerFileName;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::timeline::CompactFlags;
|
||||
use crate::tenant::timeline::Timeline;
|
||||
use crate::tenant::SpawnMode;
|
||||
@@ -104,7 +110,7 @@ pub struct State {
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
allowlist_routes: Vec<Uri>,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
@@ -118,7 +124,7 @@ impl State {
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
@@ -813,12 +819,6 @@ async fn tenant_attach_handler(
|
||||
|
||||
let generation = get_request_generation(state, maybe_body.as_ref().and_then(|r| r.generation))?;
|
||||
|
||||
if state.remote_storage.is_none() {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"attach_tenant is not possible because pageserver was configured without remote storage"
|
||||
)));
|
||||
}
|
||||
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
let shard_params = ShardParameters::default();
|
||||
let location_conf = LocationConf::attached_single(tenant_conf, generation, &shard_params);
|
||||
@@ -1229,7 +1229,7 @@ async fn layer_download_handler(
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let layer_file_name = get_request_param(&request, "layer_file_name")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
let layer_name = LayerFileName::from_str(layer_file_name)
|
||||
let layer_name = LayerName::from_str(layer_file_name)
|
||||
.map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?;
|
||||
let state = get_state(&request);
|
||||
|
||||
@@ -1261,7 +1261,7 @@ async fn evict_timeline_layer_handler(
|
||||
let layer_file_name = get_request_param(&request, "layer_file_name")?;
|
||||
let state = get_state(&request);
|
||||
|
||||
let layer_name = LayerFileName::from_str(layer_file_name)
|
||||
let layer_name = LayerName::from_str(layer_file_name)
|
||||
.map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?;
|
||||
|
||||
let timeline =
|
||||
@@ -1643,12 +1643,6 @@ async fn tenant_time_travel_remote_storage_handler(
|
||||
)));
|
||||
}
|
||||
|
||||
let Some(storage) = state.remote_storage.as_ref() else {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"remote storage not configured, cannot run time travel"
|
||||
)));
|
||||
};
|
||||
|
||||
if timestamp > done_if_after {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"The done_if_after timestamp comes before the timestamp to recover to"
|
||||
@@ -1658,7 +1652,7 @@ async fn tenant_time_travel_remote_storage_handler(
|
||||
tracing::info!("Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}");
|
||||
|
||||
remote_timeline_client::upload::time_travel_recover_tenant(
|
||||
storage,
|
||||
&state.remote_storage,
|
||||
&tenant_shard_id,
|
||||
timestamp,
|
||||
done_if_after,
|
||||
@@ -1715,12 +1709,7 @@ async fn timeline_gc_handler(
|
||||
let gc_req: TimelineGcRequest = json_request(&mut request).await?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let wait_task_done = mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx)?;
|
||||
let gc_result = wait_task_done
|
||||
.await
|
||||
.context("wait for gc task")
|
||||
.map_err(ApiError::InternalServerError)?
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
let gc_result = mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx).await?;
|
||||
|
||||
json_response(StatusCode::OK, gc_result)
|
||||
}
|
||||
@@ -1908,11 +1897,6 @@ async fn deletion_queue_flush(
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let state = get_state(&r);
|
||||
|
||||
if state.remote_storage.is_none() {
|
||||
// Nothing to do if remote storage is disabled.
|
||||
return json_response(StatusCode::OK, ());
|
||||
}
|
||||
|
||||
let execute = parse_query_param(&r, "execute")?.unwrap_or(false);
|
||||
|
||||
let flush = async {
|
||||
@@ -2077,18 +2061,11 @@ async fn disk_usage_eviction_run(
|
||||
};
|
||||
|
||||
let state = get_state(&r);
|
||||
|
||||
let Some(storage) = state.remote_storage.as_ref() else {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"remote storage not configured, cannot run eviction iteration"
|
||||
)));
|
||||
};
|
||||
|
||||
let eviction_state = state.disk_usage_eviction_state.clone();
|
||||
|
||||
let res = crate::disk_usage_eviction_task::disk_usage_eviction_task_iteration_impl(
|
||||
&eviction_state,
|
||||
storage,
|
||||
&state.remote_storage,
|
||||
usage,
|
||||
&state.tenant_manager,
|
||||
config.eviction_order,
|
||||
@@ -2125,29 +2102,23 @@ async fn tenant_scan_remote_handler(
|
||||
let state = get_state(&request);
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
|
||||
let Some(remote_storage) = state.remote_storage.as_ref() else {
|
||||
return Err(ApiError::BadRequest(anyhow::anyhow!(
|
||||
"Remote storage not configured"
|
||||
)));
|
||||
};
|
||||
|
||||
let mut response = TenantScanRemoteStorageResponse::default();
|
||||
|
||||
let (shards, _other_keys) =
|
||||
list_remote_tenant_shards(remote_storage, tenant_id, cancel.clone())
|
||||
list_remote_tenant_shards(&state.remote_storage, tenant_id, cancel.clone())
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||
|
||||
for tenant_shard_id in shards {
|
||||
let (timeline_ids, _other_keys) =
|
||||
list_remote_timelines(remote_storage, tenant_shard_id, cancel.clone())
|
||||
list_remote_timelines(&state.remote_storage, tenant_shard_id, cancel.clone())
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||
|
||||
let mut generation = Generation::none();
|
||||
for timeline_id in timeline_ids {
|
||||
match download_index_part(
|
||||
remote_storage,
|
||||
&state.remote_storage,
|
||||
&tenant_shard_id,
|
||||
&timeline_id,
|
||||
Generation::MAX,
|
||||
@@ -2358,6 +2329,97 @@ async fn get_utilization(
|
||||
.map_err(ApiError::InternalServerError)
|
||||
}
|
||||
|
||||
/// Report on the largest tenants on this pageserver, for the storage controller to identify
|
||||
/// candidates for splitting
|
||||
async fn post_top_tenants(
|
||||
mut r: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
check_permission(&r, None)?;
|
||||
let request: TopTenantShardsRequest = json_request(&mut r).await?;
|
||||
let state = get_state(&r);
|
||||
|
||||
fn get_size_metric(sizes: &TopTenantShardItem, order_by: &TenantSorting) -> u64 {
|
||||
match order_by {
|
||||
TenantSorting::ResidentSize => sizes.resident_size,
|
||||
TenantSorting::MaxLogicalSize => sizes.max_logical_size,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq)]
|
||||
struct HeapItem {
|
||||
metric: u64,
|
||||
sizes: TopTenantShardItem,
|
||||
}
|
||||
|
||||
impl PartialOrd for HeapItem {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
/// Heap items have reverse ordering on their metric: this enables using BinaryHeap, which
|
||||
/// supports popping the greatest item but not the smallest.
|
||||
impl Ord for HeapItem {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
Reverse(self.metric).cmp(&Reverse(other.metric))
|
||||
}
|
||||
}
|
||||
|
||||
let mut top_n: BinaryHeap<HeapItem> = BinaryHeap::with_capacity(request.limit);
|
||||
|
||||
// FIXME: this is a lot of clones to take this tenant list
|
||||
for (tenant_shard_id, tenant_slot) in state.tenant_manager.list() {
|
||||
if let Some(shards_lt) = request.where_shards_lt {
|
||||
// Ignore tenants which already have >= this many shards
|
||||
if tenant_shard_id.shard_count >= shards_lt {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let sizes = match tenant_slot {
|
||||
TenantSlot::Attached(tenant) => tenant.get_sizes(),
|
||||
TenantSlot::Secondary(_) | TenantSlot::InProgress(_) => {
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let metric = get_size_metric(&sizes, &request.order_by);
|
||||
|
||||
if let Some(gt) = request.where_gt {
|
||||
// Ignore tenants whose metric is <= the lower size threshold, to do less sorting work
|
||||
if metric <= gt {
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
match top_n.peek() {
|
||||
None => {
|
||||
// Top N list is empty: candidate becomes first member
|
||||
top_n.push(HeapItem { metric, sizes });
|
||||
}
|
||||
Some(i) if i.metric > metric && top_n.len() < request.limit => {
|
||||
// Lowest item in list is greater than our candidate, but we aren't at limit yet: push to end
|
||||
top_n.push(HeapItem { metric, sizes });
|
||||
}
|
||||
Some(i) if i.metric > metric => {
|
||||
// List is at limit and lowest value is greater than our candidate, drop it.
|
||||
}
|
||||
Some(_) => top_n.push(HeapItem { metric, sizes }),
|
||||
}
|
||||
|
||||
while top_n.len() > request.limit {
|
||||
top_n.pop();
|
||||
}
|
||||
}
|
||||
|
||||
json_response(
|
||||
StatusCode::OK,
|
||||
TopTenantShardsResponse {
|
||||
shards: top_n.into_iter().map(|i| i.sizes).collect(),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Common functionality of all the HTTP API handlers.
|
||||
///
|
||||
/// - Adds a tracing span to each request (by `request_span`)
|
||||
@@ -2644,5 +2706,6 @@ pub fn make_router(
|
||||
)
|
||||
.put("/v1/io_engine", |r| api_handler(r, put_io_engine_handler))
|
||||
.get("/v1/utilization", |r| api_handler(r, get_utilization))
|
||||
.post("/v1/top_tenants", |r| api_handler(r, post_top_tenants))
|
||||
.any(handler_404))
|
||||
}
|
||||
|
||||
@@ -57,7 +57,7 @@ pub use crate::metrics::preinitialize_metrics;
|
||||
#[tracing::instrument(skip_all, fields(%exit_code))]
|
||||
pub async fn shutdown_pageserver(
|
||||
tenant_manager: &TenantManager,
|
||||
deletion_queue: Option<DeletionQueue>,
|
||||
mut deletion_queue: DeletionQueue,
|
||||
exit_code: i32,
|
||||
) {
|
||||
use std::time::Duration;
|
||||
@@ -89,9 +89,7 @@ pub async fn shutdown_pageserver(
|
||||
.await;
|
||||
|
||||
// Best effort to persist any outstanding deletions, to avoid leaking objects
|
||||
if let Some(mut deletion_queue) = deletion_queue {
|
||||
deletion_queue.shutdown(Duration::from_secs(5)).await;
|
||||
}
|
||||
deletion_queue.shutdown(Duration::from_secs(5)).await;
|
||||
|
||||
// Shut down the HTTP endpoint last, so that you can still check the server's
|
||||
// status while it's shutting down.
|
||||
@@ -114,10 +112,6 @@ pub async fn shutdown_pageserver(
|
||||
std::process::exit(exit_code);
|
||||
}
|
||||
|
||||
/// The name of the metadata file pageserver creates per timeline.
|
||||
/// Full path: `tenants/<tenant_id>/timelines/<timeline_id>/metadata`.
|
||||
pub const METADATA_FILE_NAME: &str = "metadata";
|
||||
|
||||
/// Per-tenant configuration file.
|
||||
/// Full path: `tenants/<tenant_id>/config`.
|
||||
pub(crate) const TENANT_CONFIG_NAME: &str = "config";
|
||||
|
||||
@@ -585,6 +585,15 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
.expect("failed to define current logical size metric")
|
||||
});
|
||||
|
||||
static AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
register_int_gauge_vec!(
|
||||
"pageserver_aux_file_estimated_size",
|
||||
"The size of all aux files for a timeline in aux file v2 store.",
|
||||
&["tenant_id", "shard_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) mod initial_logical_size {
|
||||
use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -1990,29 +1999,6 @@ impl Default for WalRedoProcessCounters {
|
||||
pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
|
||||
Lazy::new(WalRedoProcessCounters::default);
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub mod wal_redo {
|
||||
use super::*;
|
||||
|
||||
static PROCESS_KIND: Lazy<std::sync::Mutex<UIntGaugeVec>> = Lazy::new(|| {
|
||||
std::sync::Mutex::new(
|
||||
register_uint_gauge_vec!(
|
||||
"pageserver_wal_redo_process_kind",
|
||||
"The configured process kind for walredo",
|
||||
&["kind"],
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
});
|
||||
|
||||
pub fn set_process_kind_metric(kind: crate::walredo::ProcessKind) {
|
||||
// use guard to avoid races around the next two steps
|
||||
let guard = PROCESS_KIND.lock().unwrap();
|
||||
guard.reset();
|
||||
guard.with_label_values(&[&format!("{kind}")]).set(1);
|
||||
}
|
||||
}
|
||||
|
||||
/// Similar to `prometheus::HistogramTimer` but does not record on drop.
|
||||
pub(crate) struct StorageTimeMetricsTimer {
|
||||
metrics: StorageTimeMetrics,
|
||||
@@ -2112,9 +2098,10 @@ pub(crate) struct TimelineMetrics {
|
||||
pub garbage_collect_histo: StorageTimeMetrics,
|
||||
pub find_gc_cutoffs_histo: StorageTimeMetrics,
|
||||
pub last_record_gauge: IntGauge,
|
||||
resident_physical_size_gauge: UIntGauge,
|
||||
pub resident_physical_size_gauge: UIntGauge,
|
||||
/// copy of LayeredTimeline.current_logical_size
|
||||
pub current_logical_size_gauge: UIntGauge,
|
||||
pub aux_file_size_gauge: IntGauge,
|
||||
pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
|
||||
pub evictions: IntCounter,
|
||||
pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
|
||||
@@ -2187,6 +2174,9 @@ impl TimelineMetrics {
|
||||
let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap();
|
||||
let aux_file_size_gauge = AUX_FILE_SIZE
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap();
|
||||
// TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
|
||||
let directory_entries_count_gauge_closure = {
|
||||
let tenant_shard_id = *tenant_shard_id;
|
||||
@@ -2224,6 +2214,7 @@ impl TimelineMetrics {
|
||||
last_record_gauge,
|
||||
resident_physical_size_gauge,
|
||||
current_logical_size_gauge,
|
||||
aux_file_size_gauge,
|
||||
directory_entries_count_gauge,
|
||||
evictions,
|
||||
evictions_with_low_residence_duration: std::sync::RwLock::new(
|
||||
@@ -2264,6 +2255,7 @@ impl TimelineMetrics {
|
||||
let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
}
|
||||
let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
|
||||
self.evictions_with_low_residence_duration
|
||||
.write()
|
||||
@@ -2320,6 +2312,7 @@ use pin_project_lite::pin_project;
|
||||
use std::collections::HashMap;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::pin::Pin;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::{Duration, Instant};
|
||||
@@ -2329,35 +2322,35 @@ use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::mgr::TenantSlot;
|
||||
|
||||
/// Maintain a per timeline gauge in addition to the global gauge.
|
||||
struct PerTimelineRemotePhysicalSizeGauge {
|
||||
last_set: u64,
|
||||
pub(crate) struct PerTimelineRemotePhysicalSizeGauge {
|
||||
last_set: AtomicU64,
|
||||
gauge: UIntGauge,
|
||||
}
|
||||
|
||||
impl PerTimelineRemotePhysicalSizeGauge {
|
||||
fn new(per_timeline_gauge: UIntGauge) -> Self {
|
||||
Self {
|
||||
last_set: per_timeline_gauge.get(),
|
||||
last_set: AtomicU64::new(0),
|
||||
gauge: per_timeline_gauge,
|
||||
}
|
||||
}
|
||||
fn set(&mut self, sz: u64) {
|
||||
pub(crate) fn set(&self, sz: u64) {
|
||||
self.gauge.set(sz);
|
||||
if sz < self.last_set {
|
||||
REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set - sz);
|
||||
let prev = self.last_set.swap(sz, std::sync::atomic::Ordering::Relaxed);
|
||||
if sz < prev {
|
||||
REMOTE_PHYSICAL_SIZE_GLOBAL.sub(prev - sz);
|
||||
} else {
|
||||
REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - self.last_set);
|
||||
REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - prev);
|
||||
};
|
||||
self.last_set = sz;
|
||||
}
|
||||
fn get(&self) -> u64 {
|
||||
pub(crate) fn get(&self) -> u64 {
|
||||
self.gauge.get()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PerTimelineRemotePhysicalSizeGauge {
|
||||
fn drop(&mut self) {
|
||||
REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set);
|
||||
REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set.load(std::sync::atomic::Ordering::Relaxed));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2365,7 +2358,7 @@ pub(crate) struct RemoteTimelineClientMetrics {
|
||||
tenant_id: String,
|
||||
shard_id: String,
|
||||
timeline_id: String,
|
||||
remote_physical_size_gauge: Mutex<Option<PerTimelineRemotePhysicalSizeGauge>>,
|
||||
pub(crate) remote_physical_size_gauge: PerTimelineRemotePhysicalSizeGauge,
|
||||
calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
|
||||
bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
|
||||
bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
|
||||
@@ -2373,38 +2366,27 @@ pub(crate) struct RemoteTimelineClientMetrics {
|
||||
|
||||
impl RemoteTimelineClientMetrics {
|
||||
pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
|
||||
let tenant_id_str = tenant_shard_id.tenant_id.to_string();
|
||||
let shard_id_str = format!("{}", tenant_shard_id.shard_slug());
|
||||
let timeline_id_str = timeline_id.to_string();
|
||||
|
||||
let remote_physical_size_gauge = PerTimelineRemotePhysicalSizeGauge::new(
|
||||
REMOTE_PHYSICAL_SIZE
|
||||
.get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
RemoteTimelineClientMetrics {
|
||||
tenant_id: tenant_shard_id.tenant_id.to_string(),
|
||||
shard_id: format!("{}", tenant_shard_id.shard_slug()),
|
||||
timeline_id: timeline_id.to_string(),
|
||||
tenant_id: tenant_id_str,
|
||||
shard_id: shard_id_str,
|
||||
timeline_id: timeline_id_str,
|
||||
calls: Mutex::new(HashMap::default()),
|
||||
bytes_started_counter: Mutex::new(HashMap::default()),
|
||||
bytes_finished_counter: Mutex::new(HashMap::default()),
|
||||
remote_physical_size_gauge: Mutex::new(None),
|
||||
remote_physical_size_gauge,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn remote_physical_size_set(&self, sz: u64) {
|
||||
let mut guard = self.remote_physical_size_gauge.lock().unwrap();
|
||||
let gauge = guard.get_or_insert_with(|| {
|
||||
PerTimelineRemotePhysicalSizeGauge::new(
|
||||
REMOTE_PHYSICAL_SIZE
|
||||
.get_metric_with_label_values(&[
|
||||
&self.tenant_id,
|
||||
&self.shard_id,
|
||||
&self.timeline_id,
|
||||
])
|
||||
.unwrap(),
|
||||
)
|
||||
});
|
||||
gauge.set(sz);
|
||||
}
|
||||
|
||||
pub(crate) fn remote_physical_size_get(&self) -> u64 {
|
||||
let guard = self.remote_physical_size_gauge.lock().unwrap();
|
||||
guard.as_ref().map(|gauge| gauge.get()).unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn remote_operation_time(
|
||||
&self,
|
||||
file_kind: &RemoteOpFileKind,
|
||||
|
||||
@@ -32,6 +32,7 @@ use std::str;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_util::io::StreamReader;
|
||||
@@ -49,7 +50,6 @@ use utils::{
|
||||
use crate::auth::check_permission;
|
||||
use crate::basebackup;
|
||||
use crate::basebackup::BasebackupError;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::import_datadir::import_wal_from_tar;
|
||||
use crate::metrics;
|
||||
@@ -59,13 +59,15 @@ use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
|
||||
use crate::task_mgr;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::mgr;
|
||||
use crate::tenant::mgr::get_active_tenant_with_timeout;
|
||||
use crate::tenant::mgr::GetActiveTenantError;
|
||||
use crate::tenant::mgr::GetTenantError;
|
||||
use crate::tenant::mgr::ShardResolveResult;
|
||||
use crate::tenant::mgr::ShardSelector;
|
||||
use crate::tenant::mgr::TenantManager;
|
||||
use crate::tenant::timeline::WaitLsnError;
|
||||
use crate::tenant::GetTimelineError;
|
||||
use crate::tenant::PageReconstructError;
|
||||
use crate::tenant::Tenant;
|
||||
use crate::tenant::Timeline;
|
||||
use crate::trace::Tracer;
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
@@ -135,7 +137,7 @@ async fn read_tar_eof(mut reader: (impl AsyncRead + Unpin)) -> anyhow::Result<()
|
||||
/// Listens for connections, and launches a new handler task for each.
|
||||
///
|
||||
pub async fn libpq_listener_main(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
listener: TcpListener,
|
||||
@@ -180,7 +182,7 @@ pub async fn libpq_listener_main(
|
||||
"serving compute connection task",
|
||||
false,
|
||||
page_service_conn_main(
|
||||
conf,
|
||||
tenant_manager.clone(),
|
||||
broker_client.clone(),
|
||||
local_auth,
|
||||
socket,
|
||||
@@ -203,7 +205,7 @@ pub async fn libpq_listener_main(
|
||||
|
||||
#[instrument(skip_all, fields(peer_addr))]
|
||||
async fn page_service_conn_main(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
socket: tokio::net::TcpStream,
|
||||
@@ -260,7 +262,8 @@ async fn page_service_conn_main(
|
||||
// and create a child per-query context when it invokes process_query.
|
||||
// But it's in a shared crate, so, we store connection_ctx inside PageServerHandler
|
||||
// and create the per-query context in process_query ourselves.
|
||||
let mut conn_handler = PageServerHandler::new(conf, broker_client, auth, connection_ctx);
|
||||
let mut conn_handler =
|
||||
PageServerHandler::new(tenant_manager, broker_client, auth, connection_ctx);
|
||||
let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?;
|
||||
|
||||
match pgbackend
|
||||
@@ -291,11 +294,12 @@ struct HandlerTimeline {
|
||||
}
|
||||
|
||||
struct PageServerHandler {
|
||||
_conf: &'static PageServerConf,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
claims: Option<Claims>,
|
||||
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
|
||||
/// The context created for the lifetime of the connection
|
||||
/// services by this PageServerHandler.
|
||||
/// For each query received over the connection,
|
||||
@@ -381,13 +385,13 @@ impl From<WaitLsnError> for QueryError {
|
||||
|
||||
impl PageServerHandler {
|
||||
pub fn new(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
connection_ctx: RequestContext,
|
||||
) -> Self {
|
||||
PageServerHandler {
|
||||
_conf: conf,
|
||||
tenant_manager,
|
||||
broker_client,
|
||||
auth,
|
||||
claims: None,
|
||||
@@ -552,13 +556,9 @@ impl PageServerHandler {
|
||||
{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id();
|
||||
|
||||
let tenant = mgr::get_active_tenant_with_timeout(
|
||||
tenant_id,
|
||||
ShardSelector::First,
|
||||
ACTIVE_TENANT_TIMEOUT,
|
||||
&task_mgr::shutdown_token(),
|
||||
)
|
||||
.await?;
|
||||
let tenant = self
|
||||
.get_active_tenant_with_timeout(tenant_id, ShardSelector::First, ACTIVE_TENANT_TIMEOUT)
|
||||
.await?;
|
||||
|
||||
// Make request tracer if needed
|
||||
let mut tracer = if tenant.get_trace_read_requests() {
|
||||
@@ -726,13 +726,9 @@ impl PageServerHandler {
|
||||
|
||||
// Create empty timeline
|
||||
info!("creating new timeline");
|
||||
let tenant = get_active_tenant_with_timeout(
|
||||
tenant_id,
|
||||
ShardSelector::Zero,
|
||||
ACTIVE_TENANT_TIMEOUT,
|
||||
&task_mgr::shutdown_token(),
|
||||
)
|
||||
.await?;
|
||||
let tenant = self
|
||||
.get_active_tenant_with_timeout(tenant_id, ShardSelector::Zero, ACTIVE_TENANT_TIMEOUT)
|
||||
.await?;
|
||||
let timeline = tenant
|
||||
.create_empty_timeline(timeline_id, base_lsn, pg_version, &ctx)
|
||||
.await?;
|
||||
@@ -1370,18 +1366,69 @@ impl PageServerHandler {
|
||||
timeline_id: TimelineId,
|
||||
selector: ShardSelector,
|
||||
) -> Result<Arc<Timeline>, GetActiveTimelineError> {
|
||||
let tenant = get_active_tenant_with_timeout(
|
||||
tenant_id,
|
||||
selector,
|
||||
ACTIVE_TENANT_TIMEOUT,
|
||||
&task_mgr::shutdown_token(),
|
||||
)
|
||||
.await
|
||||
.map_err(GetActiveTimelineError::Tenant)?;
|
||||
let tenant = self
|
||||
.get_active_tenant_with_timeout(tenant_id, selector, ACTIVE_TENANT_TIMEOUT)
|
||||
.await
|
||||
.map_err(GetActiveTimelineError::Tenant)?;
|
||||
let timeline = tenant.get_timeline(timeline_id, true)?;
|
||||
set_tracing_field_shard_id(&timeline);
|
||||
Ok(timeline)
|
||||
}
|
||||
|
||||
/// Get a shard's [`Tenant`] in its active state, if present. If we don't find the shard and some
|
||||
/// slots for this tenant are `InProgress` then we will wait.
|
||||
/// If we find the [`Tenant`] and it's not yet in state [`TenantState::Active`], we will wait.
|
||||
///
|
||||
/// `timeout` is used as a total timeout for the whole wait operation.
|
||||
async fn get_active_tenant_with_timeout(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
shard_selector: ShardSelector,
|
||||
timeout: Duration,
|
||||
) -> Result<Arc<Tenant>, GetActiveTenantError> {
|
||||
let wait_start = Instant::now();
|
||||
let deadline = wait_start + timeout;
|
||||
|
||||
// Resolve TenantId to TenantShardId. This is usually a quick one-shot thing, the loop is
|
||||
// for handling the rare case that the slot we're accessing is InProgress.
|
||||
let tenant_shard = loop {
|
||||
let resolved = self
|
||||
.tenant_manager
|
||||
.resolve_attached_shard(&tenant_id, shard_selector);
|
||||
match resolved {
|
||||
ShardResolveResult::Found(tenant_shard) => break tenant_shard,
|
||||
ShardResolveResult::NotFound => {
|
||||
return Err(GetActiveTenantError::NotFound(GetTenantError::NotFound(
|
||||
tenant_id,
|
||||
)));
|
||||
}
|
||||
ShardResolveResult::InProgress(barrier) => {
|
||||
// We can't authoritatively answer right now: wait for InProgress state
|
||||
// to end, then try again
|
||||
tokio::select! {
|
||||
_ = self.await_connection_cancelled() => {
|
||||
return Err(GetActiveTenantError::Cancelled)
|
||||
},
|
||||
_ = barrier.wait() => {
|
||||
// The barrier completed: proceed around the loop to try looking up again
|
||||
},
|
||||
_ = tokio::time::sleep(deadline.duration_since(Instant::now())) => {
|
||||
return Err(GetActiveTenantError::WaitForActiveTimeout {
|
||||
latest_state: None,
|
||||
wait_time: timeout,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
tracing::debug!("Waiting for tenant to enter active state...");
|
||||
tenant_shard
|
||||
.wait_to_become_active(deadline.duration_since(Instant::now()))
|
||||
.await?;
|
||||
Ok(tenant_shard)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -1771,13 +1818,13 @@ where
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
|
||||
let tenant = get_active_tenant_with_timeout(
|
||||
tenant_id,
|
||||
ShardSelector::Zero,
|
||||
ACTIVE_TENANT_TIMEOUT,
|
||||
&task_mgr::shutdown_token(),
|
||||
)
|
||||
.await?;
|
||||
let tenant = self
|
||||
.get_active_tenant_with_timeout(
|
||||
tenant_id,
|
||||
ShardSelector::Zero,
|
||||
ACTIVE_TENANT_TIMEOUT,
|
||||
)
|
||||
.await?;
|
||||
pgb.write_message_noflush(&BeMessage::RowDescription(&[
|
||||
RowDescriptor::int8_col(b"checkpoint_distance"),
|
||||
RowDescriptor::int8_col(b"checkpoint_timeout"),
|
||||
|
||||
@@ -699,13 +699,17 @@ impl Timeline {
|
||||
.await
|
||||
.context("scan")?;
|
||||
let mut result = HashMap::new();
|
||||
let mut sz = 0;
|
||||
for (_, v) in kv {
|
||||
let v = v.context("get value")?;
|
||||
let v = aux_file::decode_file_value_bytes(&v).context("value decode")?;
|
||||
for (fname, content) in v {
|
||||
sz += fname.len();
|
||||
sz += content.len();
|
||||
result.insert(fname, content);
|
||||
}
|
||||
}
|
||||
self.aux_file_size_estimator.on_base_backup(sz);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
@@ -1474,23 +1478,45 @@ impl<'a> DatadirModification<'a> {
|
||||
Err(PageReconstructError::MissingKey(_)) => None,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
let files = if let Some(ref old_val) = old_val {
|
||||
let files: Vec<(&str, &[u8])> = if let Some(ref old_val) = old_val {
|
||||
aux_file::decode_file_value(old_val)?
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
let new_files = if content.is_empty() {
|
||||
files
|
||||
.into_iter()
|
||||
.filter(|(p, _)| &path != p)
|
||||
.collect::<Vec<_>>()
|
||||
} else {
|
||||
files
|
||||
.into_iter()
|
||||
.filter(|(p, _)| &path != p)
|
||||
.chain(std::iter::once((path, content)))
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
let mut other_files = Vec::with_capacity(files.len());
|
||||
let mut modifying_file = None;
|
||||
for file @ (p, content) in files {
|
||||
if path == p {
|
||||
assert!(
|
||||
modifying_file.is_none(),
|
||||
"duplicated entries found for {}",
|
||||
path
|
||||
);
|
||||
modifying_file = Some(content);
|
||||
} else {
|
||||
other_files.push(file);
|
||||
}
|
||||
}
|
||||
let mut new_files = other_files;
|
||||
match (modifying_file, content.is_empty()) {
|
||||
(Some(old_content), false) => {
|
||||
self.tline
|
||||
.aux_file_size_estimator
|
||||
.on_update(old_content.len(), content.len());
|
||||
new_files.push((path, content));
|
||||
}
|
||||
(Some(old_content), true) => {
|
||||
self.tline
|
||||
.aux_file_size_estimator
|
||||
.on_remove(old_content.len());
|
||||
// not adding the file key to the final `new_files` vec.
|
||||
}
|
||||
(None, false) => {
|
||||
self.tline.aux_file_size_estimator.on_add(content.len());
|
||||
new_files.push((path, content));
|
||||
}
|
||||
(None, true) => anyhow::bail!("removing non-existing aux file: {}", path),
|
||||
}
|
||||
let new_val = aux_file::encode_file_value(&new_files)?;
|
||||
self.put(key, Value::Image(new_val.into()));
|
||||
}
|
||||
@@ -1671,7 +1697,7 @@ impl<'a> DatadirModification<'a> {
|
||||
}
|
||||
|
||||
if !self.pending_deletions.is_empty() {
|
||||
writer.delete_batch(&self.pending_deletions).await?;
|
||||
writer.delete_batch(&self.pending_deletions, ctx).await?;
|
||||
self.pending_deletions.clear();
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ use futures::FutureExt;
|
||||
use futures::StreamExt;
|
||||
use pageserver_api::models;
|
||||
use pageserver_api::models::TimelineState;
|
||||
use pageserver_api::models::TopTenantShardItem;
|
||||
use pageserver_api::models::WalRedoManagerStatus;
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
use pageserver_api::shard::ShardStripeSize;
|
||||
@@ -190,7 +191,7 @@ pub const TENANT_DELETED_MARKER_FILE_NAME: &str = "deleted";
|
||||
#[derive(Clone)]
|
||||
pub struct TenantSharedResources {
|
||||
pub broker_client: storage_broker::BrokerClientChannel,
|
||||
pub remote_storage: Option<GenericRemoteStorage>,
|
||||
pub remote_storage: GenericRemoteStorage,
|
||||
pub deletion_queue_client: DeletionQueueClient,
|
||||
}
|
||||
|
||||
@@ -292,7 +293,7 @@ pub struct Tenant {
|
||||
walredo_mgr: Option<Arc<WalRedoManager>>,
|
||||
|
||||
// provides access to timeline data sitting in the remote storage
|
||||
pub(crate) remote_storage: Option<GenericRemoteStorage>,
|
||||
pub(crate) remote_storage: GenericRemoteStorage,
|
||||
|
||||
// Access to global deletion queue for when this tenant wants to schedule a deletion
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
@@ -551,21 +552,22 @@ impl Tenant {
|
||||
);
|
||||
|
||||
if let Some(index_part) = index_part.as_ref() {
|
||||
timeline
|
||||
.remote_client
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.init_upload_queue(index_part)?;
|
||||
} else if self.remote_storage.is_some() {
|
||||
timeline.remote_client.init_upload_queue(index_part)?;
|
||||
} else {
|
||||
// No data on the remote storage, but we have local metadata file. We can end up
|
||||
// here with timeline_create being interrupted before finishing index part upload.
|
||||
// By doing what we do here, the index part upload is retried.
|
||||
// If control plane retries timeline creation in the meantime, the mgmt API handler
|
||||
// for timeline creation will coalesce on the upload we queue here.
|
||||
|
||||
// FIXME: this branch should be dead code as we no longer write local metadata.
|
||||
let rtc = timeline.remote_client.as_ref().unwrap();
|
||||
rtc.init_upload_queue_for_empty_remote(&metadata)?;
|
||||
rtc.schedule_index_upload_for_full_metadata_update(&metadata)?;
|
||||
|
||||
timeline
|
||||
.remote_client
|
||||
.init_upload_queue_for_empty_remote(&metadata)?;
|
||||
timeline
|
||||
.remote_client
|
||||
.schedule_index_upload_for_full_metadata_update(&metadata)?;
|
||||
}
|
||||
|
||||
timeline
|
||||
@@ -777,14 +779,14 @@ impl Tenant {
|
||||
AttachType::Normal
|
||||
};
|
||||
|
||||
let preload = match (&mode, &remote_storage) {
|
||||
(SpawnMode::Create, _) => {
|
||||
let preload = match &mode {
|
||||
SpawnMode::Create => {
|
||||
None
|
||||
},
|
||||
(SpawnMode::Eager | SpawnMode::Lazy, Some(remote_storage)) => {
|
||||
SpawnMode::Eager | SpawnMode::Lazy => {
|
||||
let _preload_timer = TENANT.preload.start_timer();
|
||||
let res = tenant_clone
|
||||
.preload(remote_storage, task_mgr::shutdown_token())
|
||||
.preload(&remote_storage, task_mgr::shutdown_token())
|
||||
.await;
|
||||
match res {
|
||||
Ok(p) => Some(p),
|
||||
@@ -794,10 +796,7 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
}
|
||||
(_, None) => {
|
||||
let _preload_timer = TENANT.preload.start_timer();
|
||||
None
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// Remote preload is complete.
|
||||
@@ -1021,7 +1020,7 @@ impl Tenant {
|
||||
index_part,
|
||||
remote_metadata,
|
||||
TimelineResources {
|
||||
remote_client: Some(remote_client),
|
||||
remote_client,
|
||||
deletion_queue_client: self.deletion_queue_client.clone(),
|
||||
timeline_get_throttle: self.timeline_get_throttle.clone(),
|
||||
},
|
||||
@@ -1047,7 +1046,7 @@ impl Tenant {
|
||||
Arc::clone(self),
|
||||
timeline_id,
|
||||
&index_part.metadata,
|
||||
Some(remote_timeline_client),
|
||||
remote_timeline_client,
|
||||
self.deletion_queue_client.clone(),
|
||||
)
|
||||
.instrument(tracing::info_span!("timeline_delete", %timeline_id))
|
||||
@@ -1139,9 +1138,7 @@ impl Tenant {
|
||||
let mut size = 0;
|
||||
|
||||
for timeline in self.list_timelines() {
|
||||
if let Some(remote_client) = &timeline.remote_client {
|
||||
size += remote_client.get_remote_physical_size();
|
||||
}
|
||||
size += timeline.remote_client.get_remote_physical_size();
|
||||
}
|
||||
|
||||
size
|
||||
@@ -1191,6 +1188,7 @@ impl Tenant {
|
||||
pub fn create_broken_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_shard_id: TenantShardId,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
reason: String,
|
||||
) -> Arc<Tenant> {
|
||||
Arc::new(Tenant::new(
|
||||
@@ -1205,7 +1203,7 @@ impl Tenant {
|
||||
ShardIdentity::broken(tenant_shard_id.shard_number, tenant_shard_id.shard_count),
|
||||
None,
|
||||
tenant_shard_id,
|
||||
None,
|
||||
remote_storage,
|
||||
DeletionQueueClient::broken(),
|
||||
))
|
||||
}
|
||||
@@ -1398,13 +1396,7 @@ impl Tenant {
|
||||
tline.freeze_and_flush().await.context("freeze_and_flush")?;
|
||||
|
||||
// Make sure the freeze_and_flush reaches remote storage.
|
||||
tline
|
||||
.remote_client
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.wait_completion()
|
||||
.await
|
||||
.unwrap();
|
||||
tline.remote_client.wait_completion().await.unwrap();
|
||||
|
||||
let tl = uninit_tl.finish_creation()?;
|
||||
// The non-test code would call tl.activate() here.
|
||||
@@ -1470,20 +1462,19 @@ impl Tenant {
|
||||
return Err(CreateTimelineError::Conflict);
|
||||
}
|
||||
|
||||
if let Some(remote_client) = existing.remote_client.as_ref() {
|
||||
// Wait for uploads to complete, so that when we return Ok, the timeline
|
||||
// is known to be durable on remote storage. Just like we do at the end of
|
||||
// this function, after we have created the timeline ourselves.
|
||||
//
|
||||
// We only really care that the initial version of `index_part.json` has
|
||||
// been uploaded. That's enough to remember that the timeline
|
||||
// exists. However, there is no function to wait specifically for that so
|
||||
// we just wait for all in-progress uploads to finish.
|
||||
remote_client
|
||||
.wait_completion()
|
||||
.await
|
||||
.context("wait for timeline uploads to complete")?;
|
||||
}
|
||||
// Wait for uploads to complete, so that when we return Ok, the timeline
|
||||
// is known to be durable on remote storage. Just like we do at the end of
|
||||
// this function, after we have created the timeline ourselves.
|
||||
//
|
||||
// We only really care that the initial version of `index_part.json` has
|
||||
// been uploaded. That's enough to remember that the timeline
|
||||
// exists. However, there is no function to wait specifically for that so
|
||||
// we just wait for all in-progress uploads to finish.
|
||||
existing
|
||||
.remote_client
|
||||
.wait_completion()
|
||||
.await
|
||||
.context("wait for timeline uploads to complete")?;
|
||||
|
||||
return Ok(existing);
|
||||
}
|
||||
@@ -1559,14 +1550,14 @@ impl Tenant {
|
||||
// the timeline is visible in [`Self::timelines`], but it is _not_ durable yet. We must
|
||||
// not send a success to the caller until it is. The same applies to handling retries,
|
||||
// see the handling of [`TimelineExclusionError::AlreadyExists`] above.
|
||||
if let Some(remote_client) = loaded_timeline.remote_client.as_ref() {
|
||||
let kind = ancestor_timeline_id
|
||||
.map(|_| "branched")
|
||||
.unwrap_or("bootstrapped");
|
||||
remote_client.wait_completion().await.with_context(|| {
|
||||
format!("wait for {} timeline initial uploads to complete", kind)
|
||||
})?;
|
||||
}
|
||||
let kind = ancestor_timeline_id
|
||||
.map(|_| "branched")
|
||||
.unwrap_or("bootstrapped");
|
||||
loaded_timeline
|
||||
.remote_client
|
||||
.wait_completion()
|
||||
.await
|
||||
.with_context(|| format!("wait for {} timeline initial uploads to complete", kind))?;
|
||||
|
||||
loaded_timeline.activate(self.clone(), broker_client, None, ctx);
|
||||
|
||||
@@ -2161,32 +2152,26 @@ impl Tenant {
|
||||
) -> anyhow::Result<()> {
|
||||
let timelines = self.timelines.lock().unwrap().clone();
|
||||
for timeline in timelines.values() {
|
||||
let Some(tl_client) = &timeline.remote_client else {
|
||||
anyhow::bail!("Remote storage is mandatory");
|
||||
};
|
||||
|
||||
let Some(remote_storage) = &self.remote_storage else {
|
||||
anyhow::bail!("Remote storage is mandatory");
|
||||
};
|
||||
|
||||
// We do not block timeline creation/deletion during splits inside the pageserver: it is up to higher levels
|
||||
// to ensure that they do not start a split if currently in the process of doing these.
|
||||
|
||||
// Upload an index from the parent: this is partly to provide freshness for the
|
||||
// child tenants that will copy it, and partly for general ease-of-debugging: there will
|
||||
// always be a parent shard index in the same generation as we wrote the child shard index.
|
||||
tl_client.schedule_index_upload_for_file_changes()?;
|
||||
tl_client.wait_completion().await?;
|
||||
timeline
|
||||
.remote_client
|
||||
.schedule_index_upload_for_file_changes()?;
|
||||
timeline.remote_client.wait_completion().await?;
|
||||
|
||||
// Shut down the timeline's remote client: this means that the indices we write
|
||||
// for child shards will not be invalidated by the parent shard deleting layers.
|
||||
tl_client.shutdown().await;
|
||||
timeline.remote_client.shutdown().await;
|
||||
|
||||
// Download methods can still be used after shutdown, as they don't flow through the remote client's
|
||||
// queue. In principal the RemoteTimelineClient could provide this without downloading it, but this
|
||||
// operation is rare, so it's simpler to just download it (and robustly guarantees that the index
|
||||
// we use here really is the remotely persistent one).
|
||||
let result = tl_client
|
||||
let result = timeline.remote_client
|
||||
.download_index_file(&self.cancel)
|
||||
.instrument(info_span!("download_index_file", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline.timeline_id))
|
||||
.await?;
|
||||
@@ -2199,7 +2184,7 @@ impl Tenant {
|
||||
|
||||
for child_shard in child_shards {
|
||||
upload_index_part(
|
||||
remote_storage,
|
||||
&self.remote_storage,
|
||||
child_shard,
|
||||
&timeline.timeline_id,
|
||||
self.generation,
|
||||
@@ -2212,6 +2197,31 @@ impl Tenant {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_sizes(&self) -> TopTenantShardItem {
|
||||
let mut result = TopTenantShardItem {
|
||||
id: self.tenant_shard_id,
|
||||
resident_size: 0,
|
||||
physical_size: 0,
|
||||
max_logical_size: 0,
|
||||
};
|
||||
|
||||
for timeline in self.timelines.lock().unwrap().values() {
|
||||
result.resident_size += timeline.metrics.resident_physical_size_gauge.get();
|
||||
|
||||
result.physical_size += timeline
|
||||
.remote_client
|
||||
.metrics
|
||||
.remote_physical_size_gauge
|
||||
.get();
|
||||
result.max_logical_size = std::cmp::max(
|
||||
result.max_logical_size,
|
||||
timeline.metrics.current_logical_size_gauge.get(),
|
||||
);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a Vec of timelines and their ancestors (timeline_id, ancestor_id),
|
||||
@@ -2475,7 +2485,7 @@ impl Tenant {
|
||||
shard_identity: ShardIdentity,
|
||||
walredo_mgr: Option<Arc<WalRedoManager>>,
|
||||
tenant_shard_id: TenantShardId,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
) -> Tenant {
|
||||
let (state, mut rx) = watch::channel(state);
|
||||
@@ -2800,7 +2810,7 @@ impl Tenant {
|
||||
// See comments in [`Tenant::branch_timeline`] for more information about why branch
|
||||
// creation task can run concurrently with timeline's GC iteration.
|
||||
for timeline in gc_timelines {
|
||||
if task_mgr::is_shutdown_requested() || cancel.is_cancelled() {
|
||||
if cancel.is_cancelled() {
|
||||
// We were requested to shut down. Stop and return with the progress we
|
||||
// made.
|
||||
break;
|
||||
@@ -3119,11 +3129,10 @@ impl Tenant {
|
||||
// We still need to upload its metadata eagerly: if other nodes `attach` the tenant and miss this timeline, their GC
|
||||
// could get incorrect information and remove more layers, than needed.
|
||||
// See also https://github.com/neondatabase/neon/issues/3865
|
||||
if let Some(remote_client) = new_timeline.remote_client.as_ref() {
|
||||
remote_client
|
||||
.schedule_index_upload_for_full_metadata_update(&metadata)
|
||||
.context("branch initial metadata upload")?;
|
||||
}
|
||||
new_timeline
|
||||
.remote_client
|
||||
.schedule_index_upload_for_full_metadata_update(&metadata)
|
||||
.context("branch initial metadata upload")?;
|
||||
|
||||
Ok(new_timeline)
|
||||
}
|
||||
@@ -3155,11 +3164,6 @@ impl Tenant {
|
||||
pgdata_path: &Utf8PathBuf,
|
||||
timeline_id: &TimelineId,
|
||||
) -> anyhow::Result<()> {
|
||||
let Some(storage) = &self.remote_storage else {
|
||||
// No remote storage? No upload.
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let temp_path = timelines_path.join(format!(
|
||||
"{INITDB_PATH}.upload-{timeline_id}.{TEMP_FILE_SUFFIX}"
|
||||
));
|
||||
@@ -3183,7 +3187,7 @@ impl Tenant {
|
||||
backoff::retry(
|
||||
|| async {
|
||||
self::remote_timeline_client::upload_initdb_dir(
|
||||
storage,
|
||||
&self.remote_storage,
|
||||
&self.tenant_shard_id.tenant_id,
|
||||
timeline_id,
|
||||
pgdata_zstd.try_clone().await?,
|
||||
@@ -3240,9 +3244,6 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
if let Some(existing_initdb_timeline_id) = load_existing_initdb {
|
||||
let Some(storage) = &self.remote_storage else {
|
||||
bail!("no storage configured but load_existing_initdb set to {existing_initdb_timeline_id}");
|
||||
};
|
||||
if existing_initdb_timeline_id != timeline_id {
|
||||
let source_path = &remote_initdb_archive_path(
|
||||
&self.tenant_shard_id.tenant_id,
|
||||
@@ -3252,7 +3253,7 @@ impl Tenant {
|
||||
&remote_initdb_archive_path(&self.tenant_shard_id.tenant_id, &timeline_id);
|
||||
|
||||
// if this fails, it will get retried by retried control plane requests
|
||||
storage
|
||||
self.remote_storage
|
||||
.copy_object(source_path, dest_path, &self.cancel)
|
||||
.await
|
||||
.context("copy initdb tar")?;
|
||||
@@ -3260,7 +3261,7 @@ impl Tenant {
|
||||
let (initdb_tar_zst_path, initdb_tar_zst) =
|
||||
self::remote_timeline_client::download_initdb_tar_zst(
|
||||
self.conf,
|
||||
storage,
|
||||
&self.remote_storage,
|
||||
&self.tenant_shard_id,
|
||||
&existing_initdb_timeline_id,
|
||||
&self.cancel,
|
||||
@@ -3355,20 +3356,14 @@ impl Tenant {
|
||||
|
||||
/// Call this before constructing a timeline, to build its required structures
|
||||
fn build_timeline_resources(&self, timeline_id: TimelineId) -> TimelineResources {
|
||||
let remote_client = if let Some(remote_storage) = self.remote_storage.as_ref() {
|
||||
let remote_client = RemoteTimelineClient::new(
|
||||
remote_storage.clone(),
|
||||
self.deletion_queue_client.clone(),
|
||||
self.conf,
|
||||
self.tenant_shard_id,
|
||||
timeline_id,
|
||||
self.generation,
|
||||
);
|
||||
Some(remote_client)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let remote_client = RemoteTimelineClient::new(
|
||||
self.remote_storage.clone(),
|
||||
self.deletion_queue_client.clone(),
|
||||
self.conf,
|
||||
self.tenant_shard_id,
|
||||
timeline_id,
|
||||
self.generation,
|
||||
);
|
||||
TimelineResources {
|
||||
remote_client,
|
||||
deletion_queue_client: self.deletion_queue_client.clone(),
|
||||
@@ -3392,9 +3387,9 @@ impl Tenant {
|
||||
let tenant_shard_id = self.tenant_shard_id;
|
||||
|
||||
let resources = self.build_timeline_resources(new_timeline_id);
|
||||
if let Some(remote_client) = &resources.remote_client {
|
||||
remote_client.init_upload_queue_for_empty_remote(new_metadata)?;
|
||||
}
|
||||
resources
|
||||
.remote_client
|
||||
.init_upload_queue_for_empty_remote(new_metadata)?;
|
||||
|
||||
let timeline_struct = self
|
||||
.create_timeline_struct(
|
||||
@@ -3562,9 +3557,7 @@ impl Tenant {
|
||||
tracing::info!(timeline_id=%timeline.timeline_id, "Flushing...");
|
||||
timeline.freeze_and_flush().await?;
|
||||
tracing::info!(timeline_id=%timeline.timeline_id, "Waiting for uploads...");
|
||||
if let Some(client) = &timeline.remote_client {
|
||||
client.wait_completion().await?;
|
||||
}
|
||||
timeline.remote_client.wait_completion().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -3878,7 +3871,7 @@ pub(crate) mod harness {
|
||||
ShardIdentity::unsharded(),
|
||||
Some(walredo_mgr),
|
||||
self.tenant_shard_id,
|
||||
Some(self.remote_storage.clone()),
|
||||
self.remote_storage.clone(),
|
||||
self.deletion_queue.new_client(),
|
||||
));
|
||||
|
||||
|
||||
@@ -299,7 +299,7 @@ mod tests {
|
||||
// Write part (in block to drop the file)
|
||||
let mut offsets = Vec::new();
|
||||
{
|
||||
let file = VirtualFile::create(pathbuf.as_path()).await?;
|
||||
let file = VirtualFile::create(pathbuf.as_path(), &ctx).await?;
|
||||
let mut wtr = BlobWriter::<BUFFERED>::new(file, 0);
|
||||
for blob in blobs.iter() {
|
||||
let (_, res) = wtr.write_blob(blob.clone(), &ctx).await;
|
||||
@@ -314,7 +314,7 @@ mod tests {
|
||||
wtr.flush_buffer(&ctx).await?;
|
||||
}
|
||||
|
||||
let file = VirtualFile::open(pathbuf.as_path()).await?;
|
||||
let file = VirtualFile::open(pathbuf.as_path(), &ctx).await?;
|
||||
let rdr = BlockReaderRef::VirtualFile(&file);
|
||||
let rdr = BlockCursor::new(rdr);
|
||||
for (idx, (blob, offset)) in blobs.iter().zip(offsets.iter()).enumerate() {
|
||||
|
||||
@@ -102,7 +102,7 @@ impl<'a> BlockReaderRef<'a> {
|
||||
#[cfg(test)]
|
||||
TestDisk(r) => r.read_blk(blknum),
|
||||
#[cfg(test)]
|
||||
VirtualFile(r) => r.read_blk(blknum).await,
|
||||
VirtualFile(r) => r.read_blk(blknum, ctx).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -177,10 +177,11 @@ impl<'a> FileBlockReader<'a> {
|
||||
&self,
|
||||
buf: PageWriteGuard<'static>,
|
||||
blkno: u32,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<PageWriteGuard<'static>, std::io::Error> {
|
||||
assert!(buf.len() == PAGE_SZ);
|
||||
self.file
|
||||
.read_exact_at_page(buf, blkno as u64 * PAGE_SZ as u64)
|
||||
.read_exact_at_page(buf, blkno as u64 * PAGE_SZ as u64, ctx)
|
||||
.await
|
||||
}
|
||||
/// Read a block.
|
||||
@@ -206,7 +207,7 @@ impl<'a> FileBlockReader<'a> {
|
||||
ReadBufResult::Found(guard) => Ok(guard.into()),
|
||||
ReadBufResult::NotFound(write_guard) => {
|
||||
// Read the page from disk into the buffer
|
||||
let write_guard = self.fill_buffer(write_guard, blknum).await?;
|
||||
let write_guard = self.fill_buffer(write_guard, blknum, ctx).await?;
|
||||
Ok(write_guard.mark_valid().into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -181,25 +181,23 @@ async fn ensure_timelines_dir_empty(timelines_path: &Utf8Path) -> Result<(), Del
|
||||
|
||||
async fn remove_tenant_remote_delete_mark(
|
||||
conf: &PageServerConf,
|
||||
remote_storage: Option<&GenericRemoteStorage>,
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(), DeleteTenantError> {
|
||||
if let Some(remote_storage) = remote_storage {
|
||||
let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
|
||||
backoff::retry(
|
||||
|| async { remote_storage.delete(&path, cancel).await },
|
||||
TimeoutOrCancel::caused_by_cancel,
|
||||
FAILED_UPLOAD_WARN_THRESHOLD,
|
||||
FAILED_REMOTE_OP_RETRIES,
|
||||
"remove_tenant_remote_delete_mark",
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
.ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
|
||||
.and_then(|x| x)
|
||||
.context("remove_tenant_remote_delete_mark")?;
|
||||
}
|
||||
let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
|
||||
backoff::retry(
|
||||
|| async { remote_storage.delete(&path, cancel).await },
|
||||
TimeoutOrCancel::caused_by_cancel,
|
||||
FAILED_UPLOAD_WARN_THRESHOLD,
|
||||
FAILED_REMOTE_OP_RETRIES,
|
||||
"remove_tenant_remote_delete_mark",
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
.ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
|
||||
.and_then(|x| x)
|
||||
.context("remove_tenant_remote_delete_mark")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -297,7 +295,7 @@ impl DeleteTenantFlow {
|
||||
#[instrument(skip_all)]
|
||||
pub(crate) async fn run(
|
||||
conf: &'static PageServerConf,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
tenants: &'static std::sync::RwLock<TenantsMap>,
|
||||
tenant: Arc<Tenant>,
|
||||
cancel: &CancellationToken,
|
||||
@@ -308,9 +306,7 @@ impl DeleteTenantFlow {
|
||||
|
||||
let mut guard = Self::prepare(&tenant).await?;
|
||||
|
||||
if let Err(e) =
|
||||
Self::run_inner(&mut guard, conf, remote_storage.as_ref(), &tenant, cancel).await
|
||||
{
|
||||
if let Err(e) = Self::run_inner(&mut guard, conf, &remote_storage, &tenant, cancel).await {
|
||||
tenant.set_broken(format!("{e:#}")).await;
|
||||
return Err(e);
|
||||
}
|
||||
@@ -327,7 +323,7 @@ impl DeleteTenantFlow {
|
||||
async fn run_inner(
|
||||
guard: &mut OwnedMutexGuard<Self>,
|
||||
conf: &'static PageServerConf,
|
||||
remote_storage: Option<&GenericRemoteStorage>,
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
tenant: &Tenant,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(), DeleteTenantError> {
|
||||
@@ -339,14 +335,9 @@ impl DeleteTenantFlow {
|
||||
))?
|
||||
});
|
||||
|
||||
// IDEA: implement detach as delete without remote storage. Then they would use the same lock (deletion_progress) so wont contend.
|
||||
// Though sounds scary, different mark name?
|
||||
// Detach currently uses remove_dir_all so in case of a crash we can end up in a weird state.
|
||||
if let Some(remote_storage) = &remote_storage {
|
||||
create_remote_delete_mark(conf, remote_storage, &tenant.tenant_shard_id, cancel)
|
||||
.await
|
||||
.context("remote_mark")?
|
||||
}
|
||||
create_remote_delete_mark(conf, remote_storage, &tenant.tenant_shard_id, cancel)
|
||||
.await
|
||||
.context("remote_mark")?;
|
||||
|
||||
fail::fail_point!("tenant-delete-before-create-local-mark", |_| {
|
||||
Err(anyhow::anyhow!(
|
||||
@@ -483,7 +474,7 @@ impl DeleteTenantFlow {
|
||||
fn schedule_background(
|
||||
guard: OwnedMutexGuard<Self>,
|
||||
conf: &'static PageServerConf,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
tenants: &'static std::sync::RwLock<TenantsMap>,
|
||||
tenant: Arc<Tenant>,
|
||||
) {
|
||||
@@ -512,7 +503,7 @@ impl DeleteTenantFlow {
|
||||
async fn background(
|
||||
mut guard: OwnedMutexGuard<Self>,
|
||||
conf: &PageServerConf,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
tenants: &'static std::sync::RwLock<TenantsMap>,
|
||||
tenant: &Arc<Tenant>,
|
||||
) -> Result<(), DeleteTenantError> {
|
||||
@@ -551,7 +542,7 @@ impl DeleteTenantFlow {
|
||||
|
||||
remove_tenant_remote_delete_mark(
|
||||
conf,
|
||||
remote_storage.as_ref(),
|
||||
&remote_storage,
|
||||
&tenant.tenant_shard_id,
|
||||
&task_mgr::shutdown_token(),
|
||||
)
|
||||
|
||||
@@ -28,6 +28,7 @@ impl EphemeralFile {
|
||||
conf: &PageServerConf,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<EphemeralFile, io::Error> {
|
||||
static NEXT_FILENAME: AtomicU64 = AtomicU64::new(1);
|
||||
let filename_disambiguator =
|
||||
@@ -45,6 +46,7 @@ impl EphemeralFile {
|
||||
.read(true)
|
||||
.write(true)
|
||||
.create(true),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -153,7 +155,7 @@ mod tests {
|
||||
async fn test_ephemeral_blobs() -> Result<(), io::Error> {
|
||||
let (conf, tenant_id, timeline_id, ctx) = harness("ephemeral_blobs")?;
|
||||
|
||||
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id).await?;
|
||||
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &ctx).await?;
|
||||
|
||||
let pos_foo = file.write_blob(b"foo", &ctx).await?;
|
||||
assert_eq!(
|
||||
|
||||
@@ -78,7 +78,7 @@ impl RW {
|
||||
page_cache::ReadBufResult::NotFound(write_guard) => {
|
||||
let write_guard = writer
|
||||
.file
|
||||
.read_exact_at_page(write_guard, blknum as u64 * PAGE_SZ as u64)
|
||||
.read_exact_at_page(write_guard, blknum as u64 * PAGE_SZ as u64, ctx)
|
||||
.await?;
|
||||
let read_guard = write_guard.mark_valid();
|
||||
return Ok(BlockLease::PageReadGuard(read_guard));
|
||||
|
||||
@@ -214,12 +214,12 @@ impl TimelineMetadata {
|
||||
self.body.ancestor_timeline = Some(*timeline);
|
||||
}
|
||||
|
||||
pub fn detach_from_ancestor(&mut self, timeline: &TimelineId, ancestor_lsn: &Lsn) {
|
||||
pub fn detach_from_ancestor(&mut self, branchpoint: &(TimelineId, Lsn)) {
|
||||
if let Some(ancestor) = self.body.ancestor_timeline {
|
||||
assert_eq!(ancestor, *timeline);
|
||||
assert_eq!(ancestor, branchpoint.0);
|
||||
}
|
||||
if self.body.ancestor_lsn != Lsn(0) {
|
||||
assert_eq!(self.body.ancestor_lsn, *ancestor_lsn);
|
||||
assert_eq!(self.body.ancestor_lsn, branchpoint.1);
|
||||
}
|
||||
self.body.ancestor_timeline = None;
|
||||
self.body.ancestor_lsn = Lsn(0);
|
||||
|
||||
@@ -16,10 +16,9 @@ use std::cmp::Ordering;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::time::Duration;
|
||||
use sysinfo::SystemExt;
|
||||
use tokio::fs;
|
||||
use utils::timeout::{timeout_cancellable, TimeoutCancellableError};
|
||||
|
||||
use anyhow::Context;
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -47,7 +46,7 @@ use crate::tenant::span::debug_assert_current_span_has_tenant_id;
|
||||
use crate::tenant::storage_layer::inmemory_layer;
|
||||
use crate::tenant::timeline::ShutdownMode;
|
||||
use crate::tenant::{AttachedTenantConf, SpawnMode, Tenant, TenantState};
|
||||
use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TEMP_FILE_SUFFIX};
|
||||
use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX};
|
||||
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::fs_ext::PathExt;
|
||||
@@ -119,6 +118,7 @@ pub(crate) enum TenantsMapRemoveResult {
|
||||
|
||||
/// When resolving a TenantId to a shard, we may be looking for the 0th
|
||||
/// shard, or we might be looking for whichever shard holds a particular page.
|
||||
#[derive(Copy, Clone)]
|
||||
pub(crate) enum ShardSelector {
|
||||
/// Only return the 0th shard, if it is present. If a non-0th shard is present,
|
||||
/// ignore it.
|
||||
@@ -169,6 +169,14 @@ impl TenantStartupMode {
|
||||
}
|
||||
}
|
||||
|
||||
/// Result type for looking up a TenantId to a specific shard
|
||||
pub(crate) enum ShardResolveResult {
|
||||
NotFound,
|
||||
Found(Arc<Tenant>),
|
||||
// Wait for this barrrier, then query again
|
||||
InProgress(utils::completion::Barrier),
|
||||
}
|
||||
|
||||
impl TenantsMap {
|
||||
/// Convenience function for typical usage, where we want to get a `Tenant` object, for
|
||||
/// working with attached tenants. If the TenantId is in the map but in Secondary state,
|
||||
@@ -182,51 +190,6 @@ impl TenantsMap {
|
||||
}
|
||||
}
|
||||
|
||||
/// A page service client sends a TenantId, and to look up the correct Tenant we must
|
||||
/// resolve this to a fully qualified TenantShardId.
|
||||
fn resolve_attached_shard(
|
||||
&self,
|
||||
tenant_id: &TenantId,
|
||||
selector: ShardSelector,
|
||||
) -> Option<TenantShardId> {
|
||||
let mut want_shard = None;
|
||||
match self {
|
||||
TenantsMap::Initializing => None,
|
||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
|
||||
for slot in m.range(TenantShardId::tenant_range(*tenant_id)) {
|
||||
// Ignore all slots that don't contain an attached tenant
|
||||
let tenant = match &slot.1 {
|
||||
TenantSlot::Attached(t) => t,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
match selector {
|
||||
ShardSelector::First => return Some(*slot.0),
|
||||
ShardSelector::Zero if slot.0.shard_number == ShardNumber(0) => {
|
||||
return Some(*slot.0)
|
||||
}
|
||||
ShardSelector::Page(key) => {
|
||||
// First slot we see for this tenant, calculate the expected shard number
|
||||
// for the key: we will use this for checking if this and subsequent
|
||||
// slots contain the key, rather than recalculating the hash each time.
|
||||
if want_shard.is_none() {
|
||||
want_shard = Some(tenant.shard_identity.get_shard_number(&key));
|
||||
}
|
||||
|
||||
if Some(tenant.shard_identity.number) == want_shard {
|
||||
return Some(*slot.0);
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
|
||||
// Fall through: we didn't find an acceptable shard
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Only for use from DeleteTenantFlow. This method directly removes a TenantSlot from the map.
|
||||
///
|
||||
/// The normal way to remove a tenant is using a SlotGuard, which will gracefully remove the guarded
|
||||
@@ -391,22 +354,17 @@ async fn init_load_generations(
|
||||
// deletion list entries may still be valid. We provide that by pushing a recovery operation into
|
||||
// the queue. Sequential processing of te queue ensures that recovery is done before any new tenant deletions
|
||||
// are processed, even though we don't block on recovery completing here.
|
||||
//
|
||||
// Must only do this if remote storage is enabled, otherwise deletion queue
|
||||
// is not running and channel push will fail.
|
||||
if resources.remote_storage.is_some() {
|
||||
let attached_tenants = generations
|
||||
.iter()
|
||||
.flat_map(|(id, start_mode)| {
|
||||
match start_mode {
|
||||
TenantStartupMode::Attached((_mode, generation)) => Some(generation),
|
||||
TenantStartupMode::Secondary => None,
|
||||
}
|
||||
.map(|gen| (*id, *gen))
|
||||
})
|
||||
.collect();
|
||||
resources.deletion_queue_client.recover(attached_tenants)?;
|
||||
}
|
||||
let attached_tenants = generations
|
||||
.iter()
|
||||
.flat_map(|(id, start_mode)| {
|
||||
match start_mode {
|
||||
TenantStartupMode::Attached((_mode, generation)) => Some(generation),
|
||||
TenantStartupMode::Secondary => None,
|
||||
}
|
||||
.map(|gen| (*id, *gen))
|
||||
})
|
||||
.collect();
|
||||
resources.deletion_queue_client.recover(attached_tenants)?;
|
||||
|
||||
Ok(Some(generations))
|
||||
}
|
||||
@@ -460,53 +418,6 @@ fn load_tenant_config(
|
||||
}
|
||||
};
|
||||
|
||||
// Clean up legacy `metadata` files.
|
||||
// Doing it here because every single tenant directory is visited here.
|
||||
// In any later code, there's different treatment of tenant dirs
|
||||
// ... depending on whether the tenant is in re-attach response or not
|
||||
// ... epending on whether the tenant is ignored or not
|
||||
assert_eq!(
|
||||
&conf.tenant_path(&tenant_shard_id),
|
||||
&tenant_dir_path,
|
||||
"later use of conf....path() methods would be dubious"
|
||||
);
|
||||
let timelines: Vec<TimelineId> = match conf.timelines_path(&tenant_shard_id).read_dir_utf8() {
|
||||
Ok(iter) => {
|
||||
let mut timelines = Vec::new();
|
||||
for res in iter {
|
||||
let p = res?;
|
||||
let Some(timeline_id) = p.file_name().parse::<TimelineId>().ok() else {
|
||||
// skip any entries that aren't TimelineId, such as
|
||||
// - *.___temp dirs
|
||||
// - unfinished initdb uploads (test_non_uploaded_root_timeline_is_deleted_after_restart)
|
||||
continue;
|
||||
};
|
||||
timelines.push(timeline_id);
|
||||
}
|
||||
timelines
|
||||
}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => vec![],
|
||||
Err(e) => return Err(anyhow::anyhow!(e)),
|
||||
};
|
||||
for timeline_id in timelines {
|
||||
let timeline_path = &conf.timeline_path(&tenant_shard_id, &timeline_id);
|
||||
let metadata_path = timeline_path.join(METADATA_FILE_NAME);
|
||||
match std::fs::remove_file(&metadata_path) {
|
||||
Ok(()) => {
|
||||
crashsafe::fsync(timeline_path)
|
||||
.context("fsync timeline dir after removing legacy metadata file")?;
|
||||
info!("removed legacy metadata file at {metadata_path}");
|
||||
}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
|
||||
// something removed the file earlier, or it was never there
|
||||
// We don't care, this software version doesn't write it again, so, we're good.
|
||||
}
|
||||
Err(e) => {
|
||||
anyhow::bail!("remove legacy metadata file: {e}: {metadata_path}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let tenant_ignore_mark_file = tenant_dir_path.join(IGNORED_TENANT_FILE_NAME);
|
||||
if tenant_ignore_mark_file.exists() {
|
||||
info!("Found an ignore mark file {tenant_ignore_mark_file:?}, skipping the tenant");
|
||||
@@ -611,6 +522,7 @@ pub async fn init_tenant_mgr(
|
||||
TenantSlot::Attached(Tenant::create_broken_tenant(
|
||||
conf,
|
||||
tenant_shard_id,
|
||||
resources.remote_storage.clone(),
|
||||
format!("{}", e),
|
||||
)),
|
||||
);
|
||||
@@ -803,6 +715,7 @@ fn tenant_spawn(
|
||||
"Cannot load tenant, ignore mark found at {tenant_ignore_mark:?}"
|
||||
);
|
||||
|
||||
let remote_storage = resources.remote_storage.clone();
|
||||
let tenant = match Tenant::spawn(
|
||||
conf,
|
||||
tenant_shard_id,
|
||||
@@ -817,7 +730,7 @@ fn tenant_spawn(
|
||||
Ok(tenant) => tenant,
|
||||
Err(e) => {
|
||||
error!("Failed to spawn tenant {tenant_shard_id}, reason: {e:#}");
|
||||
Tenant::create_broken_tenant(conf, tenant_shard_id, format!("{e:#}"))
|
||||
Tenant::create_broken_tenant(conf, tenant_shard_id, remote_storage, format!("{e:#}"))
|
||||
}
|
||||
};
|
||||
|
||||
@@ -2103,6 +2016,72 @@ impl TenantManager {
|
||||
|
||||
Ok(reparented)
|
||||
}
|
||||
|
||||
/// A page service client sends a TenantId, and to look up the correct Tenant we must
|
||||
/// resolve this to a fully qualified TenantShardId.
|
||||
///
|
||||
/// During shard splits: we shall see parent shards in InProgress state and skip them, and
|
||||
/// instead match on child shards which should appear in Attached state. Very early in a shard
|
||||
/// split, or in other cases where a shard is InProgress, we will return our own InProgress result
|
||||
/// to instruct the caller to wait for that to finish before querying again.
|
||||
pub(crate) fn resolve_attached_shard(
|
||||
&self,
|
||||
tenant_id: &TenantId,
|
||||
selector: ShardSelector,
|
||||
) -> ShardResolveResult {
|
||||
let tenants = self.tenants.read().unwrap();
|
||||
let mut want_shard = None;
|
||||
let mut any_in_progress = None;
|
||||
|
||||
match &*tenants {
|
||||
TenantsMap::Initializing => ShardResolveResult::NotFound,
|
||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
|
||||
for slot in m.range(TenantShardId::tenant_range(*tenant_id)) {
|
||||
// Ignore all slots that don't contain an attached tenant
|
||||
let tenant = match &slot.1 {
|
||||
TenantSlot::Attached(t) => t,
|
||||
TenantSlot::InProgress(barrier) => {
|
||||
// We might still find a usable shard, but in case we don't, remember that
|
||||
// we saw at least one InProgress slot, so that we can distinguish this case
|
||||
// from a simple NotFound in our return value.
|
||||
any_in_progress = Some(barrier.clone());
|
||||
continue;
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
match selector {
|
||||
ShardSelector::First => return ShardResolveResult::Found(tenant.clone()),
|
||||
ShardSelector::Zero if slot.0.shard_number == ShardNumber(0) => {
|
||||
return ShardResolveResult::Found(tenant.clone())
|
||||
}
|
||||
ShardSelector::Page(key) => {
|
||||
// First slot we see for this tenant, calculate the expected shard number
|
||||
// for the key: we will use this for checking if this and subsequent
|
||||
// slots contain the key, rather than recalculating the hash each time.
|
||||
if want_shard.is_none() {
|
||||
want_shard = Some(tenant.shard_identity.get_shard_number(&key));
|
||||
}
|
||||
|
||||
if Some(tenant.shard_identity.number) == want_shard {
|
||||
return ShardResolveResult::Found(tenant.clone());
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
|
||||
// Fall through: we didn't find a slot that was in Attached state & matched our selector. If
|
||||
// we found one or more InProgress slot, indicate to caller that they should retry later. Otherwise
|
||||
// this requested shard simply isn't found.
|
||||
if let Some(barrier) = any_in_progress {
|
||||
ShardResolveResult::InProgress(barrier)
|
||||
} else {
|
||||
ShardResolveResult::NotFound
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
@@ -2151,105 +2130,6 @@ pub(crate) enum GetActiveTenantError {
|
||||
Broken(String),
|
||||
}
|
||||
|
||||
/// Get a [`Tenant`] in its active state. If the tenant_id is currently in [`TenantSlot::InProgress`]
|
||||
/// state, then wait for up to `timeout`. If the [`Tenant`] is not currently in [`TenantState::Active`],
|
||||
/// then wait for up to `timeout` (minus however long we waited for the slot).
|
||||
pub(crate) async fn get_active_tenant_with_timeout(
|
||||
tenant_id: TenantId,
|
||||
shard_selector: ShardSelector,
|
||||
timeout: Duration,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Arc<Tenant>, GetActiveTenantError> {
|
||||
enum WaitFor {
|
||||
Barrier(utils::completion::Barrier),
|
||||
Tenant(Arc<Tenant>),
|
||||
}
|
||||
|
||||
let wait_start = Instant::now();
|
||||
let deadline = wait_start + timeout;
|
||||
|
||||
let (wait_for, tenant_shard_id) = {
|
||||
let locked = TENANTS.read().unwrap();
|
||||
|
||||
// Resolve TenantId to TenantShardId
|
||||
let tenant_shard_id = locked
|
||||
.resolve_attached_shard(&tenant_id, shard_selector)
|
||||
.ok_or(GetActiveTenantError::NotFound(GetTenantError::NotFound(
|
||||
tenant_id,
|
||||
)))?;
|
||||
|
||||
let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)
|
||||
.map_err(GetTenantError::MapState)?;
|
||||
match peek_slot {
|
||||
Some(TenantSlot::Attached(tenant)) => {
|
||||
match tenant.current_state() {
|
||||
TenantState::Active => {
|
||||
// Fast path: we don't need to do any async waiting.
|
||||
return Ok(tenant.clone());
|
||||
}
|
||||
_ => {
|
||||
tenant.activate_now();
|
||||
(WaitFor::Tenant(tenant.clone()), tenant_shard_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(TenantSlot::Secondary(_)) => {
|
||||
return Err(GetActiveTenantError::NotFound(GetTenantError::NotActive(
|
||||
tenant_shard_id,
|
||||
)))
|
||||
}
|
||||
Some(TenantSlot::InProgress(barrier)) => {
|
||||
(WaitFor::Barrier(barrier.clone()), tenant_shard_id)
|
||||
}
|
||||
None => {
|
||||
return Err(GetActiveTenantError::NotFound(GetTenantError::NotFound(
|
||||
tenant_id,
|
||||
)))
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let tenant = match wait_for {
|
||||
WaitFor::Barrier(barrier) => {
|
||||
tracing::debug!("Waiting for tenant InProgress state to pass...");
|
||||
timeout_cancellable(
|
||||
deadline.duration_since(Instant::now()),
|
||||
cancel,
|
||||
barrier.wait(),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
TimeoutCancellableError::Timeout => GetActiveTenantError::WaitForActiveTimeout {
|
||||
latest_state: None,
|
||||
wait_time: wait_start.elapsed(),
|
||||
},
|
||||
TimeoutCancellableError::Cancelled => GetActiveTenantError::Cancelled,
|
||||
})?;
|
||||
{
|
||||
let locked = TENANTS.read().unwrap();
|
||||
let peek_slot =
|
||||
tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)
|
||||
.map_err(GetTenantError::MapState)?;
|
||||
match peek_slot {
|
||||
Some(TenantSlot::Attached(tenant)) => tenant.clone(),
|
||||
_ => {
|
||||
return Err(GetActiveTenantError::NotFound(GetTenantError::NotActive(
|
||||
tenant_shard_id,
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
WaitFor::Tenant(tenant) => tenant,
|
||||
};
|
||||
|
||||
tracing::debug!("Waiting for tenant to enter active state...");
|
||||
tenant
|
||||
.wait_to_become_active(deadline.duration_since(Instant::now()))
|
||||
.await?;
|
||||
Ok(tenant)
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum DeleteTimelineError {
|
||||
#[error("Tenant {0}")]
|
||||
@@ -2276,7 +2156,7 @@ pub(crate) async fn load_tenant(
|
||||
tenant_id: TenantId,
|
||||
generation: Generation,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), TenantMapInsertError> {
|
||||
@@ -2880,86 +2760,73 @@ use {
|
||||
utils::http::error::ApiError,
|
||||
};
|
||||
|
||||
pub(crate) fn immediate_gc(
|
||||
#[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id))]
|
||||
pub(crate) async fn immediate_gc(
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
gc_req: TimelineGcRequest,
|
||||
cancel: CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
|
||||
let guard = TENANTS.read().unwrap();
|
||||
|
||||
let tenant = guard
|
||||
.get(&tenant_shard_id)
|
||||
.cloned()
|
||||
.with_context(|| format!("tenant {tenant_shard_id}"))
|
||||
.map_err(|e| ApiError::NotFound(e.into()))?;
|
||||
) -> Result<GcResult, ApiError> {
|
||||
let tenant = {
|
||||
let guard = TENANTS.read().unwrap();
|
||||
guard
|
||||
.get(&tenant_shard_id)
|
||||
.cloned()
|
||||
.with_context(|| format!("tenant {tenant_shard_id}"))
|
||||
.map_err(|e| ApiError::NotFound(e.into()))?
|
||||
};
|
||||
|
||||
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
|
||||
// Use tenant's pitr setting
|
||||
let pitr = tenant.get_pitr_interval();
|
||||
|
||||
tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
|
||||
|
||||
// Run in task_mgr to avoid race with tenant_detach operation
|
||||
let ctx = ctx.detached_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
|
||||
let (task_done, wait_task_done) = tokio::sync::oneshot::channel();
|
||||
let span = info_span!("manual_gc", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);
|
||||
let ctx: RequestContext =
|
||||
ctx.detached_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
|
||||
|
||||
// TODO: spawning is redundant now, need to hold the gate
|
||||
task_mgr::spawn(
|
||||
&tokio::runtime::Handle::current(),
|
||||
TaskKind::GarbageCollector,
|
||||
Some(tenant_shard_id),
|
||||
Some(timeline_id),
|
||||
&format!("timeline_gc_handler garbage collection run for tenant {tenant_shard_id} timeline {timeline_id}"),
|
||||
false,
|
||||
async move {
|
||||
fail::fail_point!("immediate_gc_task_pre");
|
||||
let _gate_guard = tenant.gate.enter().map_err(|_| ApiError::ShuttingDown)?;
|
||||
|
||||
#[allow(unused_mut)]
|
||||
let mut result = tenant
|
||||
.gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)
|
||||
.await;
|
||||
// FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
|
||||
// better once the types support it.
|
||||
fail::fail_point!("immediate_gc_task_pre");
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
{
|
||||
// we need to synchronize with drop completion for python tests without polling for
|
||||
// log messages
|
||||
if let Ok(result) = result.as_mut() {
|
||||
let mut js = tokio::task::JoinSet::new();
|
||||
for layer in std::mem::take(&mut result.doomed_layers) {
|
||||
js.spawn(layer.wait_drop());
|
||||
}
|
||||
tracing::info!(total = js.len(), "starting to wait for the gc'd layers to be dropped");
|
||||
while let Some(res) = js.join_next().await {
|
||||
res.expect("wait_drop should not panic");
|
||||
}
|
||||
}
|
||||
#[allow(unused_mut)]
|
||||
let mut result = tenant
|
||||
.gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)
|
||||
.await;
|
||||
// FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
|
||||
// better once the types support it.
|
||||
|
||||
let timeline = tenant.get_timeline(timeline_id, false).ok();
|
||||
let rtc = timeline.as_ref().and_then(|x| x.remote_client.as_ref());
|
||||
|
||||
if let Some(rtc) = rtc {
|
||||
// layer drops schedule actions on remote timeline client to actually do the
|
||||
// deletions; don't care about the shutdown error, just exit fast
|
||||
drop(rtc.wait_completion().await);
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
{
|
||||
// we need to synchronize with drop completion for python tests without polling for
|
||||
// log messages
|
||||
if let Ok(result) = result.as_mut() {
|
||||
let mut js = tokio::task::JoinSet::new();
|
||||
for layer in std::mem::take(&mut result.doomed_layers) {
|
||||
js.spawn(layer.wait_drop());
|
||||
}
|
||||
|
||||
match task_done.send(result) {
|
||||
Ok(_) => (),
|
||||
Err(result) => error!("failed to send gc result: {result:?}"),
|
||||
tracing::info!(
|
||||
total = js.len(),
|
||||
"starting to wait for the gc'd layers to be dropped"
|
||||
);
|
||||
while let Some(res) = js.join_next().await {
|
||||
res.expect("wait_drop should not panic");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
.instrument(span)
|
||||
);
|
||||
|
||||
// drop the guard until after we've spawned the task so that timeline shutdown will wait for the task
|
||||
drop(guard);
|
||||
let timeline = tenant.get_timeline(timeline_id, false).ok();
|
||||
let rtc = timeline.as_ref().map(|x| &x.remote_client);
|
||||
|
||||
Ok(wait_task_done)
|
||||
if let Some(rtc) = rtc {
|
||||
// layer drops schedule actions on remote timeline client to actually do the
|
||||
// deletions; don't care about the shutdown error, just exit fast
|
||||
drop(rtc.wait_completion().await);
|
||||
}
|
||||
}
|
||||
|
||||
result.map_err(ApiError::InternalServerError)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -240,7 +240,7 @@ use utils::id::{TenantId, TimelineId};
|
||||
use self::index::IndexPart;
|
||||
|
||||
use super::metadata::MetadataUpdate;
|
||||
use super::storage_layer::{Layer, LayerFileName, ResidentLayer};
|
||||
use super::storage_layer::{Layer, LayerName, ResidentLayer};
|
||||
use super::upload_queue::SetDeletedFlagProgress;
|
||||
use super::Generation;
|
||||
|
||||
@@ -317,7 +317,7 @@ pub struct RemoteTimelineClient {
|
||||
|
||||
upload_queue: Mutex<UploadQueue>,
|
||||
|
||||
metrics: Arc<RemoteTimelineClientMetrics>,
|
||||
pub(crate) metrics: Arc<RemoteTimelineClientMetrics>,
|
||||
|
||||
storage_impl: GenericRemoteStorage,
|
||||
|
||||
@@ -437,6 +437,19 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this timeline was previously detached at this Lsn and the remote timeline
|
||||
/// client is currently initialized.
|
||||
pub(crate) fn is_previous_ancestor_lsn(&self, lsn: Lsn) -> bool {
|
||||
// technically this is a dirty read, but given how timeline detach ancestor is implemented
|
||||
// via tenant restart, the lineage has always been uploaded.
|
||||
self.upload_queue
|
||||
.lock()
|
||||
.unwrap()
|
||||
.initialized_mut()
|
||||
.map(|uq| uq.latest_lineage.is_previous_ancestor_lsn(lsn))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn update_remote_physical_size_gauge(&self, current_remote_index_part: Option<&IndexPart>) {
|
||||
let size: u64 = if let Some(current_remote_index_part) = current_remote_index_part {
|
||||
current_remote_index_part
|
||||
@@ -448,11 +461,11 @@ impl RemoteTimelineClient {
|
||||
} else {
|
||||
0
|
||||
};
|
||||
self.metrics.remote_physical_size_set(size);
|
||||
self.metrics.remote_physical_size_gauge.set(size);
|
||||
}
|
||||
|
||||
pub fn get_remote_physical_size(&self) -> u64 {
|
||||
self.metrics.remote_physical_size_get()
|
||||
self.metrics.remote_physical_size_gauge.get()
|
||||
}
|
||||
|
||||
//
|
||||
@@ -503,7 +516,7 @@ impl RemoteTimelineClient {
|
||||
/// On success, returns the size of the downloaded file.
|
||||
pub async fn download_layer_file(
|
||||
&self,
|
||||
layer_file_name: &LayerFileName,
|
||||
layer_file_name: &LayerName,
|
||||
layer_metadata: &LayerFileMetadata,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
@@ -628,7 +641,7 @@ impl RemoteTimelineClient {
|
||||
);
|
||||
|
||||
let index_part = IndexPart::from(&*upload_queue);
|
||||
let op = UploadOp::UploadMetadata(index_part, disk_consistent_lsn);
|
||||
let op = UploadOp::UploadMetadata(Box::new(index_part), disk_consistent_lsn);
|
||||
self.metric_begin(&op);
|
||||
upload_queue.queued_operations.push_back(op);
|
||||
upload_queue.latest_files_changes_since_metadata_upload_scheduled = 0;
|
||||
@@ -647,7 +660,14 @@ impl RemoteTimelineClient {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
let Some(prev) = upload_queue.latest_metadata.ancestor_timeline() else {
|
||||
return Err(anyhow::anyhow!(
|
||||
"cannot reparent without a current ancestor"
|
||||
));
|
||||
};
|
||||
|
||||
upload_queue.latest_metadata.reparent(new_parent);
|
||||
upload_queue.latest_lineage.record_previous_ancestor(&prev);
|
||||
|
||||
self.schedule_index_upload(upload_queue);
|
||||
|
||||
@@ -670,14 +690,13 @@ impl RemoteTimelineClient {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
upload_queue
|
||||
.latest_metadata
|
||||
.detach_from_ancestor(&adopted.0, &adopted.1);
|
||||
upload_queue.latest_metadata.detach_from_ancestor(&adopted);
|
||||
upload_queue.latest_lineage.record_detaching(&adopted);
|
||||
|
||||
for layer in layers {
|
||||
upload_queue
|
||||
.latest_files
|
||||
.insert(layer.layer_desc().filename(), layer.metadata());
|
||||
.insert(layer.layer_desc().layer_name(), layer.metadata());
|
||||
}
|
||||
|
||||
self.schedule_index_upload(upload_queue);
|
||||
@@ -713,7 +732,7 @@ impl RemoteTimelineClient {
|
||||
|
||||
upload_queue
|
||||
.latest_files
|
||||
.insert(layer.layer_desc().filename(), metadata.clone());
|
||||
.insert(layer.layer_desc().layer_name(), metadata.clone());
|
||||
upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;
|
||||
|
||||
info!(
|
||||
@@ -737,7 +756,7 @@ impl RemoteTimelineClient {
|
||||
/// successfully.
|
||||
pub fn schedule_layer_file_deletion(
|
||||
self: &Arc<Self>,
|
||||
names: &[LayerFileName],
|
||||
names: &[LayerName],
|
||||
) -> anyhow::Result<()> {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
@@ -765,7 +784,7 @@ impl RemoteTimelineClient {
|
||||
// the layer files as "dangling". this is fine, at worst case we create work for the
|
||||
// scrubber.
|
||||
|
||||
let names = gc_layers.iter().map(|x| x.layer_desc().filename());
|
||||
let names = gc_layers.iter().map(|x| x.layer_desc().layer_name());
|
||||
|
||||
self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);
|
||||
|
||||
@@ -780,9 +799,9 @@ impl RemoteTimelineClient {
|
||||
self: &Arc<Self>,
|
||||
upload_queue: &mut UploadQueueInitialized,
|
||||
names: I,
|
||||
) -> Vec<(LayerFileName, LayerFileMetadata)>
|
||||
) -> Vec<(LayerName, LayerFileMetadata)>
|
||||
where
|
||||
I: IntoIterator<Item = LayerFileName>,
|
||||
I: IntoIterator<Item = LayerName>,
|
||||
{
|
||||
// Decorate our list of names with each name's metadata, dropping
|
||||
// names that are unexpectedly missing from our metadata. This metadata
|
||||
@@ -832,7 +851,7 @@ impl RemoteTimelineClient {
|
||||
/// `index_part.json` with [`Self::schedule_gc_update`] or [`Self::schedule_compaction_update`].
|
||||
pub(crate) fn schedule_deletion_of_unlinked(
|
||||
self: &Arc<Self>,
|
||||
layers: Vec<(LayerFileName, LayerFileMetadata)>,
|
||||
layers: Vec<(LayerName, LayerFileMetadata)>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
@@ -845,7 +864,7 @@ impl RemoteTimelineClient {
|
||||
fn schedule_deletion_of_unlinked0(
|
||||
self: &Arc<Self>,
|
||||
upload_queue: &mut UploadQueueInitialized,
|
||||
mut with_metadata: Vec<(LayerFileName, LayerFileMetadata)>,
|
||||
mut with_metadata: Vec<(LayerName, LayerFileMetadata)>,
|
||||
) {
|
||||
// Filter out any layers which were not created by this tenant shard. These are
|
||||
// layers that originate from some ancestor shard after a split, and may still
|
||||
@@ -914,7 +933,7 @@ impl RemoteTimelineClient {
|
||||
self.schedule_layer_file_upload0(upload_queue, layer.clone());
|
||||
}
|
||||
|
||||
let names = compacted_from.iter().map(|x| x.layer_desc().filename());
|
||||
let names = compacted_from.iter().map(|x| x.layer_desc().layer_name());
|
||||
|
||||
self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
@@ -1108,6 +1127,11 @@ impl RemoteTimelineClient {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn is_deleting(&self) -> bool {
|
||||
let mut locked = self.upload_queue.lock().unwrap();
|
||||
locked.stopped_mut().is_ok()
|
||||
}
|
||||
|
||||
pub(crate) async fn preserve_initdb_archive(
|
||||
self: &Arc<Self>,
|
||||
tenant_id: &TenantId,
|
||||
@@ -1144,7 +1168,7 @@ impl RemoteTimelineClient {
|
||||
&self.tenant_shard_id.tenant_id,
|
||||
&self.timeline_id,
|
||||
self.tenant_shard_id.to_index(),
|
||||
&uploaded.layer_desc().filename(),
|
||||
&uploaded.layer_desc().layer_name(),
|
||||
uploaded.metadata().generation,
|
||||
);
|
||||
|
||||
@@ -1185,7 +1209,7 @@ impl RemoteTimelineClient {
|
||||
.get_timeline_id()
|
||||
.expect("Source timeline should be alive"),
|
||||
self.tenant_shard_id.to_index(),
|
||||
&adopted.layer_desc().filename(),
|
||||
&adopted.layer_desc().layer_name(),
|
||||
adopted.metadata().generation,
|
||||
);
|
||||
|
||||
@@ -1193,7 +1217,7 @@ impl RemoteTimelineClient {
|
||||
&self.tenant_shard_id.tenant_id,
|
||||
&self.timeline_id,
|
||||
self.tenant_shard_id.to_index(),
|
||||
&adopted_as.layer_desc().filename(),
|
||||
&adopted_as.layer_desc().layer_name(),
|
||||
adopted_as.metadata().generation,
|
||||
);
|
||||
|
||||
@@ -1527,7 +1551,7 @@ impl RemoteTimelineClient {
|
||||
&self.tenant_shard_id.tenant_id,
|
||||
&self.timeline_id,
|
||||
layer_metadata.shard,
|
||||
&layer.layer_desc().filename(),
|
||||
&layer.layer_desc().layer_name(),
|
||||
layer_metadata.generation,
|
||||
);
|
||||
|
||||
@@ -1811,6 +1835,7 @@ impl RemoteTimelineClient {
|
||||
latest_files: initialized.latest_files.clone(),
|
||||
latest_files_changes_since_metadata_upload_scheduled: 0,
|
||||
latest_metadata: initialized.latest_metadata.clone(),
|
||||
latest_lineage: initialized.latest_lineage.clone(),
|
||||
projected_remote_consistent_lsn: None,
|
||||
visible_remote_consistent_lsn: initialized
|
||||
.visible_remote_consistent_lsn
|
||||
@@ -1896,14 +1921,14 @@ pub fn remote_layer_path(
|
||||
tenant_id: &TenantId,
|
||||
timeline_id: &TimelineId,
|
||||
shard: ShardIndex,
|
||||
layer_file_name: &LayerFileName,
|
||||
layer_file_name: &LayerName,
|
||||
generation: Generation,
|
||||
) -> RemotePath {
|
||||
// Generation-aware key format
|
||||
let path = format!(
|
||||
"tenants/{tenant_id}{0}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{1}{2}",
|
||||
shard.get_suffix(),
|
||||
layer_file_name.file_name(),
|
||||
layer_file_name,
|
||||
generation.get_suffix()
|
||||
);
|
||||
|
||||
@@ -2000,8 +2025,8 @@ mod tests {
|
||||
TimelineMetadata::from_bytes(&metadata.to_bytes().unwrap()).unwrap()
|
||||
}
|
||||
|
||||
fn assert_file_list(a: &HashSet<LayerFileName>, b: &[&str]) {
|
||||
let mut avec: Vec<String> = a.iter().map(|x| x.file_name()).collect();
|
||||
fn assert_file_list(a: &HashSet<LayerName>, b: &[&str]) {
|
||||
let mut avec: Vec<String> = a.iter().map(|x| x.to_string()).collect();
|
||||
avec.sort();
|
||||
|
||||
let mut bvec = b.to_vec();
|
||||
@@ -2112,7 +2137,7 @@ mod tests {
|
||||
tenant_ctx: _tenant_ctx,
|
||||
} = test_setup;
|
||||
|
||||
let client = timeline.remote_client.as_ref().unwrap();
|
||||
let client = &timeline.remote_client;
|
||||
|
||||
// Download back the index.json, and check that the list of files is correct
|
||||
let initial_index_part = match client
|
||||
@@ -2127,7 +2152,7 @@ mod tests {
|
||||
.layer_metadata
|
||||
.keys()
|
||||
.map(|f| f.to_owned())
|
||||
.collect::<HashSet<LayerFileName>>();
|
||||
.collect::<HashSet<LayerName>>();
|
||||
let initial_layer = {
|
||||
assert!(initial_layers.len() == 1);
|
||||
initial_layers.into_iter().next().unwrap()
|
||||
@@ -2153,7 +2178,7 @@ mod tests {
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59DA-00000000016B5A53".parse().unwrap(), dummy_contents("baz"))
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(name, contents): (LayerFileName, Vec<u8>)| {
|
||||
.map(|(name, contents): (LayerName, Vec<u8>)| {
|
||||
|
||||
let local_path = local_layer_path(
|
||||
harness.conf,
|
||||
@@ -2234,9 +2259,9 @@ mod tests {
|
||||
.map(|f| f.to_owned())
|
||||
.collect(),
|
||||
&[
|
||||
&initial_layer.file_name(),
|
||||
&layers[0].layer_desc().filename().file_name(),
|
||||
&layers[1].layer_desc().filename().file_name(),
|
||||
&initial_layer.to_string(),
|
||||
&layers[0].layer_desc().layer_name().to_string(),
|
||||
&layers[1].layer_desc().layer_name().to_string(),
|
||||
],
|
||||
);
|
||||
assert_eq!(index_part.metadata, metadata);
|
||||
@@ -2250,7 +2275,7 @@ mod tests {
|
||||
// keep using schedule_layer_file_deletion because we don't have a way to wait for the
|
||||
// spawn_blocking started by the drop.
|
||||
client
|
||||
.schedule_layer_file_deletion(&[layers[0].layer_desc().filename()])
|
||||
.schedule_layer_file_deletion(&[layers[0].layer_desc().layer_name()])
|
||||
.unwrap();
|
||||
{
|
||||
let mut guard = client.upload_queue.lock().unwrap();
|
||||
@@ -2268,9 +2293,9 @@ mod tests {
|
||||
}
|
||||
assert_remote_files(
|
||||
&[
|
||||
&initial_layer.file_name(),
|
||||
&layers[0].layer_desc().filename().file_name(),
|
||||
&layers[1].layer_desc().filename().file_name(),
|
||||
&initial_layer.to_string(),
|
||||
&layers[0].layer_desc().layer_name().to_string(),
|
||||
&layers[1].layer_desc().layer_name().to_string(),
|
||||
"index_part.json",
|
||||
],
|
||||
&remote_timeline_dir,
|
||||
@@ -2283,9 +2308,9 @@ mod tests {
|
||||
|
||||
assert_remote_files(
|
||||
&[
|
||||
&initial_layer.file_name(),
|
||||
&layers[1].layer_desc().filename().file_name(),
|
||||
&layers[2].layer_desc().filename().file_name(),
|
||||
&initial_layer.to_string(),
|
||||
&layers[1].layer_desc().layer_name().to_string(),
|
||||
&layers[2].layer_desc().layer_name().to_string(),
|
||||
"index_part.json",
|
||||
],
|
||||
&remote_timeline_dir,
|
||||
@@ -2303,9 +2328,9 @@ mod tests {
|
||||
timeline,
|
||||
..
|
||||
} = TestSetup::new("metrics").await.unwrap();
|
||||
let client = timeline.remote_client.as_ref().unwrap();
|
||||
let client = &timeline.remote_client;
|
||||
|
||||
let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
|
||||
let layer_file_name_1: LayerName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
|
||||
let local_path = local_layer_path(
|
||||
harness.conf,
|
||||
&timeline.tenant_shard_id,
|
||||
|
||||
@@ -22,7 +22,7 @@ use crate::context::RequestContext;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
|
||||
use crate::tenant::storage_layer::layer::local_layer_path;
|
||||
use crate::tenant::storage_layer::LayerFileName;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::Generation;
|
||||
use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
|
||||
use crate::TEMP_FILE_SUFFIX;
|
||||
@@ -48,7 +48,7 @@ pub async fn download_layer_file<'a>(
|
||||
storage: &'a GenericRemoteStorage,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
layer_file_name: &'a LayerFileName,
|
||||
layer_file_name: &'a LayerName,
|
||||
layer_metadata: &'a LayerFileMetadata,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
@@ -112,14 +112,17 @@ pub async fn download_layer_file<'a>(
|
||||
// We use fatal_err() below because the after the rename above,
|
||||
// the in-memory state of the filesystem already has the layer file in its final place,
|
||||
// and subsequent pageserver code could think it's durable while it really isn't.
|
||||
let work = async move {
|
||||
let timeline_dir = VirtualFile::open(&timeline_path)
|
||||
.await
|
||||
.fatal_err("VirtualFile::open for timeline dir fsync");
|
||||
timeline_dir
|
||||
.sync_all()
|
||||
.await
|
||||
.fatal_err("VirtualFile::sync_all timeline dir");
|
||||
let work = {
|
||||
let ctx = ctx.detached_child(ctx.task_kind(), ctx.download_behavior());
|
||||
async move {
|
||||
let timeline_dir = VirtualFile::open(&timeline_path, &ctx)
|
||||
.await
|
||||
.fatal_err("VirtualFile::open for timeline dir fsync");
|
||||
timeline_dir
|
||||
.sync_all()
|
||||
.await
|
||||
.fatal_err("VirtualFile::sync_all timeline dir");
|
||||
}
|
||||
};
|
||||
crate::virtual_file::io_engine::get()
|
||||
.spawn_blocking_and_block_on_if_std(work)
|
||||
@@ -196,7 +199,7 @@ async fn download_object<'a>(
|
||||
use crate::virtual_file::owned_buffers_io::{self, util::size_tracking_writer};
|
||||
use bytes::BytesMut;
|
||||
async {
|
||||
let destination_file = VirtualFile::create(dst_path)
|
||||
let destination_file = VirtualFile::create(dst_path, ctx)
|
||||
.await
|
||||
.with_context(|| format!("create a destination file for layer '{dst_path}'"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
@@ -6,9 +6,10 @@ use std::collections::HashMap;
|
||||
|
||||
use chrono::NaiveDateTime;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
use crate::tenant::storage_layer::LayerFileName;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::upload_queue::UploadQueueInitialized;
|
||||
use crate::tenant::Generation;
|
||||
use pageserver_api::shard::ShardIndex;
|
||||
@@ -75,7 +76,7 @@ pub struct IndexPart {
|
||||
///
|
||||
/// Older versions of `IndexPart` will not have this property or have only a part of metadata
|
||||
/// that latest version stores.
|
||||
pub layer_metadata: HashMap<LayerFileName, IndexLayerMetadata>,
|
||||
pub layer_metadata: HashMap<LayerName, IndexLayerMetadata>,
|
||||
|
||||
// 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata.
|
||||
// It's duplicated for convenience when reading the serialized structure, but is
|
||||
@@ -84,6 +85,9 @@ pub struct IndexPart {
|
||||
|
||||
#[serde(rename = "metadata_bytes")]
|
||||
pub metadata: TimelineMetadata,
|
||||
|
||||
#[serde(default)]
|
||||
pub(crate) lineage: Lineage,
|
||||
}
|
||||
|
||||
impl IndexPart {
|
||||
@@ -96,17 +100,19 @@ impl IndexPart {
|
||||
/// - 3: no longer deserialize `timeline_layers` (serialized format is the same, but timeline_layers
|
||||
/// is always generated from the keys of `layer_metadata`)
|
||||
/// - 4: timeline_layers is fully removed.
|
||||
const LATEST_VERSION: usize = 4;
|
||||
/// - 5: lineage was added
|
||||
const LATEST_VERSION: usize = 5;
|
||||
|
||||
// Versions we may see when reading from a bucket.
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4];
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5];
|
||||
|
||||
pub const FILE_NAME: &'static str = "index_part.json";
|
||||
|
||||
fn new(
|
||||
layers_and_metadata: &HashMap<LayerFileName, LayerFileMetadata>,
|
||||
layers_and_metadata: &HashMap<LayerName, LayerFileMetadata>,
|
||||
disk_consistent_lsn: Lsn,
|
||||
metadata: TimelineMetadata,
|
||||
lineage: Lineage,
|
||||
) -> Self {
|
||||
let layer_metadata = layers_and_metadata
|
||||
.iter()
|
||||
@@ -119,6 +125,7 @@ impl IndexPart {
|
||||
disk_consistent_lsn,
|
||||
metadata,
|
||||
deleted_at: None,
|
||||
lineage,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,6 +154,7 @@ impl IndexPart {
|
||||
&HashMap::new(),
|
||||
example_metadata.disk_consistent_lsn(),
|
||||
example_metadata,
|
||||
Default::default(),
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -155,8 +163,9 @@ impl From<&UploadQueueInitialized> for IndexPart {
|
||||
fn from(uq: &UploadQueueInitialized) -> Self {
|
||||
let disk_consistent_lsn = uq.latest_metadata.disk_consistent_lsn();
|
||||
let metadata = uq.latest_metadata.clone();
|
||||
let lineage = uq.latest_lineage.clone();
|
||||
|
||||
Self::new(&uq.latest_files, disk_consistent_lsn, metadata)
|
||||
Self::new(&uq.latest_files, disk_consistent_lsn, metadata, lineage)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,8 +193,76 @@ impl From<&LayerFileMetadata> for IndexLayerMetadata {
|
||||
}
|
||||
}
|
||||
|
||||
/// Limited history of earlier ancestors.
|
||||
///
|
||||
/// A timeline can have more than 1 earlier ancestor, in the rare case that it was repeatedly
|
||||
/// reparented by having an later timeline be detached from it's ancestor.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)]
|
||||
pub(crate) struct Lineage {
|
||||
/// Has the `reparenting_history` been truncated to [`Lineage::REMEMBER_AT_MOST`].
|
||||
#[serde(skip_serializing_if = "is_false", default)]
|
||||
reparenting_history_truncated: bool,
|
||||
|
||||
/// Earlier ancestors, truncated when [`Self::reparenting_history_truncated`]
|
||||
///
|
||||
/// These are stored in case we want to support WAL based DR on the timeline. There can be many
|
||||
/// of these and at most one [`Self::original_ancestor`]. There cannot be more reparentings
|
||||
/// after [`Self::original_ancestor`] has been set.
|
||||
#[serde(skip_serializing_if = "Vec::is_empty", default)]
|
||||
reparenting_history: Vec<TimelineId>,
|
||||
|
||||
/// The ancestor from which this timeline has been detached from and when.
|
||||
///
|
||||
/// If you are adding support for detaching from a hierarchy, consider changing the ancestry
|
||||
/// into a `Vec<(TimelineId, Lsn)>` to be a path instead.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
original_ancestor: Option<(TimelineId, Lsn, NaiveDateTime)>,
|
||||
}
|
||||
|
||||
fn is_false(b: &bool) -> bool {
|
||||
!b
|
||||
}
|
||||
|
||||
impl Lineage {
|
||||
const REMEMBER_AT_MOST: usize = 100;
|
||||
|
||||
pub(crate) fn record_previous_ancestor(&mut self, old_ancestor: &TimelineId) {
|
||||
if self.reparenting_history.last() == Some(old_ancestor) {
|
||||
// do not re-record it
|
||||
return;
|
||||
}
|
||||
|
||||
let drop_oldest = self.reparenting_history.len() + 1 >= Self::REMEMBER_AT_MOST;
|
||||
|
||||
self.reparenting_history_truncated |= drop_oldest;
|
||||
if drop_oldest {
|
||||
self.reparenting_history.remove(0);
|
||||
}
|
||||
self.reparenting_history.push(*old_ancestor);
|
||||
}
|
||||
|
||||
pub(crate) fn record_detaching(&mut self, branchpoint: &(TimelineId, Lsn)) {
|
||||
assert!(self.original_ancestor.is_none());
|
||||
|
||||
self.original_ancestor =
|
||||
Some((branchpoint.0, branchpoint.1, chrono::Utc::now().naive_utc()));
|
||||
}
|
||||
|
||||
/// The queried lsn is most likely the basebackup lsn, and this answers question "is it allowed
|
||||
/// to start a read/write primary at this lsn".
|
||||
///
|
||||
/// Returns true if the Lsn was previously a branch point.
|
||||
pub(crate) fn is_previous_ancestor_lsn(&self, lsn: Lsn) -> bool {
|
||||
self.original_ancestor
|
||||
.as_ref()
|
||||
.is_some_and(|(_, ancestor_lsn, _)| lsn == *ancestor_lsn)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str::FromStr;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -221,6 +298,7 @@ mod tests {
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
|
||||
deleted_at: None,
|
||||
lineage: Lineage::default(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
|
||||
@@ -261,6 +339,7 @@ mod tests {
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
|
||||
deleted_at: None,
|
||||
lineage: Lineage::default(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
|
||||
@@ -302,7 +381,8 @@ mod tests {
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
|
||||
deleted_at: Some(chrono::NaiveDateTime::parse_from_str(
|
||||
"2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap())
|
||||
"2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap()),
|
||||
lineage: Lineage::default(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
|
||||
@@ -347,6 +427,7 @@ mod tests {
|
||||
])
|
||||
.unwrap(),
|
||||
deleted_at: None,
|
||||
lineage: Lineage::default(),
|
||||
};
|
||||
|
||||
let empty_layers_parsed = IndexPart::from_s3_bytes(empty_layers_json.as_bytes()).unwrap();
|
||||
@@ -385,11 +466,58 @@ mod tests {
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
|
||||
deleted_at: Some(chrono::NaiveDateTime::parse_from_str(
|
||||
"2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap()),
|
||||
deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
|
||||
lineage: Lineage::default(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v5_indexpart_is_parsed() {
|
||||
let example = r#"{
|
||||
"version":5,
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF420-00000000014EF499":{"file_size":23289856,"generation":1},
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF499-00000000015A7619":{"file_size":1015808,"generation":1}},
|
||||
"disk_consistent_lsn":"0/15A7618",
|
||||
"metadata_bytes":[226,88,25,241,0,46,0,4,0,0,0,0,1,90,118,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,78,244,32,0,0,0,0,1,78,244,32,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
|
||||
"lineage":{
|
||||
"original_ancestor":["e2bfd8c633d713d279e6fcd2bcc15b6d","0/15A7618","2024-05-07T18:52:36.322426563"],
|
||||
"reparenting_history":["e1bfd8c633d713d279e6fcd2bcc15b6d"]
|
||||
}
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 5,
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF420-00000000014EF499".parse().unwrap(), IndexLayerMetadata {
|
||||
file_size: 23289856,
|
||||
generation: Generation::new(1),
|
||||
shard: ShardIndex::unsharded(),
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF499-00000000015A7619".parse().unwrap(), IndexLayerMetadata {
|
||||
file_size: 1015808,
|
||||
generation: Generation::new(1),
|
||||
shard: ShardIndex::unsharded(),
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: Lsn::from_str("0/15A7618").unwrap(),
|
||||
metadata: TimelineMetadata::from_bytes(&[226,88,25,241,0,46,0,4,0,0,0,0,1,90,118,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,78,244,32,0,0,0,0,1,78,244,32,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
|
||||
deleted_at: None,
|
||||
lineage: Lineage {
|
||||
reparenting_history_truncated: false,
|
||||
reparenting_history: vec![TimelineId::from_str("e1bfd8c633d713d279e6fcd2bcc15b6d").unwrap()],
|
||||
original_ancestor: Some((TimelineId::from_str("e2bfd8c633d713d279e6fcd2bcc15b6d").unwrap(), Lsn::from_str("0/15A7618").unwrap(), parse_naive_datetime("2024-05-07T18:52:36.322426563"))),
|
||||
},
|
||||
};
|
||||
|
||||
let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
fn parse_naive_datetime(s: &str) -> NaiveDateTime {
|
||||
chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%f").unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ use super::{
|
||||
mgr::TenantManager,
|
||||
remote_timeline_client::LayerFileMetadata,
|
||||
span::debug_assert_current_span_has_tenant_id,
|
||||
storage_layer::{layer::local_layer_path, LayerFileName},
|
||||
storage_layer::{layer::local_layer_path, LayerName},
|
||||
};
|
||||
|
||||
use pageserver_api::{
|
||||
@@ -182,7 +182,7 @@ impl SecondaryTenant {
|
||||
self: &Arc<Self>,
|
||||
conf: &PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
name: LayerFileName,
|
||||
name: LayerName,
|
||||
metadata: LayerFileMetadata,
|
||||
) {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
@@ -22,11 +22,11 @@ use crate::{
|
||||
FAILED_REMOTE_OP_RETRIES,
|
||||
},
|
||||
span::debug_assert_current_span_has_tenant_id,
|
||||
storage_layer::{layer::local_layer_path, LayerFileName},
|
||||
storage_layer::{layer::local_layer_path, LayerName},
|
||||
tasks::{warn_when_period_overrun, BackgroundLoopKind},
|
||||
},
|
||||
virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile},
|
||||
METADATA_FILE_NAME, TEMP_FILE_SUFFIX,
|
||||
TEMP_FILE_SUFFIX,
|
||||
};
|
||||
|
||||
use super::{
|
||||
@@ -45,10 +45,10 @@ use crate::tenant::{
|
||||
|
||||
use camino::Utf8PathBuf;
|
||||
use chrono::format::{DelayedFormat, StrftimeItems};
|
||||
use futures::Future;
|
||||
use futures::{Future, StreamExt};
|
||||
use pageserver_api::models::SecondaryProgress;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::{DownloadError, Etag, GenericRemoteStorage};
|
||||
use remote_storage::{DownloadError, Etag, GenericRemoteStorage, RemoteStorageActivity};
|
||||
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{info_span, instrument, warn, Instrument};
|
||||
@@ -71,6 +71,12 @@ use super::{
|
||||
/// `<ttps://github.com/neondatabase/neon/issues/6200>`
|
||||
const DOWNLOAD_FRESHEN_INTERVAL: Duration = Duration::from_millis(60000);
|
||||
|
||||
/// Range of concurrency we may use when downloading layers within a timeline. This is independent
|
||||
/// for each tenant we're downloading: the concurrency of _tenants_ is defined separately in
|
||||
/// `PageServerConf::secondary_download_concurrency`
|
||||
const MAX_LAYER_CONCURRENCY: usize = 16;
|
||||
const MIN_LAYER_CONCURRENCY: usize = 1;
|
||||
|
||||
pub(super) async fn downloader_task(
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
@@ -79,14 +85,15 @@ pub(super) async fn downloader_task(
|
||||
cancel: CancellationToken,
|
||||
root_ctx: RequestContext,
|
||||
) {
|
||||
let concurrency = tenant_manager.get_conf().secondary_download_concurrency;
|
||||
// How many tenants' secondary download operations we will run concurrently
|
||||
let tenant_concurrency = tenant_manager.get_conf().secondary_download_concurrency;
|
||||
|
||||
let generator = SecondaryDownloader {
|
||||
tenant_manager,
|
||||
remote_storage,
|
||||
root_ctx,
|
||||
};
|
||||
let mut scheduler = Scheduler::new(generator, concurrency);
|
||||
let mut scheduler = Scheduler::new(generator, tenant_concurrency);
|
||||
|
||||
scheduler
|
||||
.run(command_queue, background_jobs_can_start, cancel)
|
||||
@@ -111,7 +118,7 @@ impl OnDiskState {
|
||||
_conf: &'static PageServerConf,
|
||||
_tenant_shard_id: &TenantShardId,
|
||||
_imeline_id: &TimelineId,
|
||||
_ame: LayerFileName,
|
||||
_ame: LayerName,
|
||||
metadata: LayerFileMetadata,
|
||||
access_time: SystemTime,
|
||||
) -> Self {
|
||||
@@ -124,10 +131,10 @@ impl OnDiskState {
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(super) struct SecondaryDetailTimeline {
|
||||
pub(super) on_disk_layers: HashMap<LayerFileName, OnDiskState>,
|
||||
pub(super) on_disk_layers: HashMap<LayerName, OnDiskState>,
|
||||
|
||||
/// We remember when layers were evicted, to prevent re-downloading them.
|
||||
pub(super) evicted_at: HashMap<LayerFileName, SystemTime>,
|
||||
pub(super) evicted_at: HashMap<LayerName, SystemTime>,
|
||||
}
|
||||
|
||||
/// This state is written by the secondary downloader, it is opaque
|
||||
@@ -792,6 +799,8 @@ impl<'a> TenantDownloader<'a> {
|
||||
|
||||
tracing::debug!(timeline_id=%timeline.timeline_id, "Downloading layers, {} in heatmap", timeline.layers.len());
|
||||
|
||||
let mut download_futs = Vec::new();
|
||||
|
||||
// Download heatmap layers that are not present on local disk, or update their
|
||||
// access time if they are already present.
|
||||
for layer in timeline.layers {
|
||||
@@ -874,67 +883,33 @@ impl<'a> TenantDownloader<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
// Failpoint for simulating slow remote storage
|
||||
failpoint_support::sleep_millis_async!(
|
||||
"secondary-layer-download-sleep",
|
||||
&self.secondary_state.cancel
|
||||
);
|
||||
|
||||
// Note: no backoff::retry wrapper here because download_layer_file does its own retries internally
|
||||
let downloaded_bytes = match download_layer_file(
|
||||
self.conf,
|
||||
self.remote_storage,
|
||||
*tenant_shard_id,
|
||||
timeline.timeline_id,
|
||||
&layer.name,
|
||||
&LayerFileMetadata::from(&layer.metadata),
|
||||
&self.secondary_state.cancel,
|
||||
download_futs.push(self.download_layer(
|
||||
tenant_shard_id,
|
||||
&timeline.timeline_id,
|
||||
layer,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(bytes) => bytes,
|
||||
Err(DownloadError::NotFound) => {
|
||||
// A heatmap might be out of date and refer to a layer that doesn't exist any more.
|
||||
// This is harmless: continue to download the next layer. It is expected during compaction
|
||||
// GC.
|
||||
tracing::debug!(
|
||||
"Skipped downloading missing layer {}, raced with compaction/gc?",
|
||||
layer.name
|
||||
);
|
||||
continue;
|
||||
));
|
||||
}
|
||||
|
||||
// Break up layer downloads into chunks, so that for each chunk we can re-check how much
|
||||
// concurrency to use based on activity level of remote storage.
|
||||
while !download_futs.is_empty() {
|
||||
let chunk =
|
||||
download_futs.split_off(download_futs.len().saturating_sub(MAX_LAYER_CONCURRENCY));
|
||||
|
||||
let concurrency = Self::layer_concurrency(self.remote_storage.activity());
|
||||
|
||||
let mut result_stream = futures::stream::iter(chunk).buffered(concurrency);
|
||||
let mut result_stream = std::pin::pin!(result_stream);
|
||||
while let Some(result) = result_stream.next().await {
|
||||
match result {
|
||||
Err(e) => return Err(e),
|
||||
Ok(None) => {
|
||||
// No error, but we didn't download the layer. Don't mark it touched
|
||||
}
|
||||
Ok(Some(layer)) => touched.push(layer),
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
if downloaded_bytes != layer.metadata.file_size {
|
||||
let local_path = local_layer_path(
|
||||
self.conf,
|
||||
tenant_shard_id,
|
||||
&timeline.timeline_id,
|
||||
&layer.name,
|
||||
&layer.metadata.generation,
|
||||
);
|
||||
|
||||
tracing::warn!(
|
||||
"Downloaded layer {} with unexpected size {} != {}. Removing download.",
|
||||
layer.name,
|
||||
downloaded_bytes,
|
||||
layer.metadata.file_size
|
||||
);
|
||||
|
||||
tokio::fs::remove_file(&local_path)
|
||||
.await
|
||||
.or_else(fs_ext::ignore_not_found)?;
|
||||
} else {
|
||||
tracing::info!("Downloaded layer {}, size {}", layer.name, downloaded_bytes);
|
||||
let mut progress = self.secondary_state.progress.lock().unwrap();
|
||||
progress.bytes_downloaded += downloaded_bytes;
|
||||
progress.layers_downloaded += 1;
|
||||
}
|
||||
|
||||
SECONDARY_MODE.download_layer.inc();
|
||||
touched.push(layer)
|
||||
}
|
||||
|
||||
// Write updates to state to record layers we just downloaded or touched.
|
||||
@@ -966,6 +941,90 @@ impl<'a> TenantDownloader<'a> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn download_layer(
|
||||
&self,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
layer: HeatMapLayer,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Option<HeatMapLayer>, UpdateError> {
|
||||
// Failpoint for simulating slow remote storage
|
||||
failpoint_support::sleep_millis_async!(
|
||||
"secondary-layer-download-sleep",
|
||||
&self.secondary_state.cancel
|
||||
);
|
||||
|
||||
// Note: no backoff::retry wrapper here because download_layer_file does its own retries internally
|
||||
let downloaded_bytes = match download_layer_file(
|
||||
self.conf,
|
||||
self.remote_storage,
|
||||
*tenant_shard_id,
|
||||
*timeline_id,
|
||||
&layer.name,
|
||||
&LayerFileMetadata::from(&layer.metadata),
|
||||
&self.secondary_state.cancel,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(bytes) => bytes,
|
||||
Err(DownloadError::NotFound) => {
|
||||
// A heatmap might be out of date and refer to a layer that doesn't exist any more.
|
||||
// This is harmless: continue to download the next layer. It is expected during compaction
|
||||
// GC.
|
||||
tracing::debug!(
|
||||
"Skipped downloading missing layer {}, raced with compaction/gc?",
|
||||
layer.name
|
||||
);
|
||||
return Ok(None);
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
if downloaded_bytes != layer.metadata.file_size {
|
||||
let local_path = local_layer_path(
|
||||
self.conf,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
&layer.name,
|
||||
&layer.metadata.generation,
|
||||
);
|
||||
|
||||
tracing::warn!(
|
||||
"Downloaded layer {} with unexpected size {} != {}. Removing download.",
|
||||
layer.name,
|
||||
downloaded_bytes,
|
||||
layer.metadata.file_size
|
||||
);
|
||||
|
||||
tokio::fs::remove_file(&local_path)
|
||||
.await
|
||||
.or_else(fs_ext::ignore_not_found)?;
|
||||
} else {
|
||||
tracing::info!("Downloaded layer {}, size {}", layer.name, downloaded_bytes);
|
||||
let mut progress = self.secondary_state.progress.lock().unwrap();
|
||||
progress.bytes_downloaded += downloaded_bytes;
|
||||
progress.layers_downloaded += 1;
|
||||
}
|
||||
|
||||
SECONDARY_MODE.download_layer.inc();
|
||||
|
||||
Ok(Some(layer))
|
||||
}
|
||||
|
||||
/// Calculate the currently allowed parallelism of layer download tasks, based on activity level of the remote storage
|
||||
fn layer_concurrency(activity: RemoteStorageActivity) -> usize {
|
||||
// When less than 75% of units are available, use minimum concurrency. Else, do a linear mapping
|
||||
// of our concurrency range to the units available within the remaining 25%.
|
||||
let clamp_at = (activity.read_total * 3) / 4;
|
||||
if activity.read_available > clamp_at {
|
||||
(MAX_LAYER_CONCURRENCY * (activity.read_available - clamp_at))
|
||||
/ (activity.read_total - clamp_at)
|
||||
} else {
|
||||
MIN_LAYER_CONCURRENCY
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan local storage and build up Layer objects based on the metadata in a HeatMapTimeline
|
||||
@@ -997,7 +1056,7 @@ async fn init_timeline_state(
|
||||
|
||||
// As we iterate through layers found on disk, we will look up their metadata from this map.
|
||||
// Layers not present in metadata will be discarded.
|
||||
let heatmap_metadata: HashMap<&LayerFileName, &HeatMapLayer> =
|
||||
let heatmap_metadata: HashMap<&LayerName, &HeatMapLayer> =
|
||||
heatmap.layers.iter().map(|l| (&l.name, l)).collect();
|
||||
|
||||
while let Some(dentry) = dir
|
||||
@@ -1015,11 +1074,7 @@ async fn init_timeline_state(
|
||||
.fatal_err(&format!("Read metadata on {}", file_path));
|
||||
|
||||
let file_name = file_path.file_name().expect("created it from the dentry");
|
||||
if file_name == METADATA_FILE_NAME {
|
||||
// Secondary mode doesn't use local metadata files, but they might have been left behind by an attached tenant.
|
||||
warn!(path=?dentry.path(), "found legacy metadata file, these should have been removed in load_tenant_config");
|
||||
continue;
|
||||
} else if crate::is_temporary(&file_path)
|
||||
if crate::is_temporary(&file_path)
|
||||
|| is_temp_download_file(&file_path)
|
||||
|| is_ephemeral_file(file_name)
|
||||
{
|
||||
@@ -1034,7 +1089,7 @@ async fn init_timeline_state(
|
||||
continue;
|
||||
}
|
||||
|
||||
match LayerFileName::from_str(file_name) {
|
||||
match LayerName::from_str(file_name) {
|
||||
Ok(name) => {
|
||||
let remote_meta = heatmap_metadata.get(&name);
|
||||
match remote_meta {
|
||||
@@ -1092,3 +1147,58 @@ async fn init_timeline_state(
|
||||
|
||||
detail
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn layer_concurrency() {
|
||||
// Totally idle
|
||||
assert_eq!(
|
||||
TenantDownloader::layer_concurrency(RemoteStorageActivity {
|
||||
read_available: 16,
|
||||
read_total: 16,
|
||||
write_available: 16,
|
||||
write_total: 16
|
||||
}),
|
||||
MAX_LAYER_CONCURRENCY
|
||||
);
|
||||
|
||||
// Totally busy
|
||||
assert_eq!(
|
||||
TenantDownloader::layer_concurrency(RemoteStorageActivity {
|
||||
read_available: 0,
|
||||
read_total: 16,
|
||||
|
||||
write_available: 16,
|
||||
write_total: 16
|
||||
}),
|
||||
MIN_LAYER_CONCURRENCY
|
||||
);
|
||||
|
||||
// Edge of the range at which we interpolate
|
||||
assert_eq!(
|
||||
TenantDownloader::layer_concurrency(RemoteStorageActivity {
|
||||
read_available: 12,
|
||||
read_total: 16,
|
||||
|
||||
write_available: 16,
|
||||
write_total: 16
|
||||
}),
|
||||
MIN_LAYER_CONCURRENCY
|
||||
);
|
||||
|
||||
// Midpoint of the range in which we interpolate
|
||||
assert_eq!(
|
||||
TenantDownloader::layer_concurrency(RemoteStorageActivity {
|
||||
read_available: 14,
|
||||
read_total: 16,
|
||||
|
||||
write_available: 16,
|
||||
write_total: 16
|
||||
}),
|
||||
MAX_LAYER_CONCURRENCY / 2
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
use std::time::SystemTime;
|
||||
|
||||
use crate::tenant::{
|
||||
remote_timeline_client::index::IndexLayerMetadata, storage_layer::LayerFileName,
|
||||
};
|
||||
use crate::tenant::{remote_timeline_client::index::IndexLayerMetadata, storage_layer::LayerName};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr, TimestampSeconds};
|
||||
@@ -17,6 +15,14 @@ pub(super) struct HeatMapTenant {
|
||||
pub(super) generation: Generation,
|
||||
|
||||
pub(super) timelines: Vec<HeatMapTimeline>,
|
||||
|
||||
/// Uploaders provide their own upload period in the heatmap, as a hint to downloaders
|
||||
/// of how frequently it is worthwhile to check for updates.
|
||||
///
|
||||
/// This is optional for backward compat, and because we sometimes might upload
|
||||
/// a heatmap explicitly via API for a tenant that has no periodic upload configured.
|
||||
#[serde(default)]
|
||||
pub(super) upload_period_ms: Option<u128>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
@@ -31,7 +37,7 @@ pub(crate) struct HeatMapTimeline {
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub(crate) struct HeatMapLayer {
|
||||
pub(super) name: LayerFileName,
|
||||
pub(super) name: LayerName,
|
||||
pub(super) metadata: IndexLayerMetadata,
|
||||
|
||||
#[serde_as(as = "TimestampSeconds<i64>")]
|
||||
@@ -42,7 +48,7 @@ pub(crate) struct HeatMapLayer {
|
||||
|
||||
impl HeatMapLayer {
|
||||
pub(crate) fn new(
|
||||
name: LayerFileName,
|
||||
name: LayerName,
|
||||
metadata: IndexLayerMetadata,
|
||||
access_time: SystemTime,
|
||||
) -> Self {
|
||||
@@ -83,4 +89,21 @@ impl HeatMapTenant {
|
||||
|
||||
stats
|
||||
}
|
||||
|
||||
pub(crate) fn strip_atimes(self) -> Self {
|
||||
Self {
|
||||
timelines: self
|
||||
.timelines
|
||||
.into_iter()
|
||||
.map(|mut tl| {
|
||||
for layer in &mut tl.layers {
|
||||
layer.access_time = SystemTime::UNIX_EPOCH;
|
||||
}
|
||||
tl
|
||||
})
|
||||
.collect(),
|
||||
generation: self.generation,
|
||||
upload_period_ms: self.upload_period_ms,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,7 +80,7 @@ impl RunningJob for WriteInProgress {
|
||||
|
||||
struct UploadPending {
|
||||
tenant: Arc<Tenant>,
|
||||
last_digest: Option<md5::Digest>,
|
||||
last_upload: Option<LastUploadState>,
|
||||
target_time: Option<Instant>,
|
||||
period: Option<Duration>,
|
||||
}
|
||||
@@ -94,7 +94,7 @@ impl scheduler::PendingJob for UploadPending {
|
||||
struct WriteComplete {
|
||||
tenant_shard_id: TenantShardId,
|
||||
completed_at: Instant,
|
||||
digest: Option<md5::Digest>,
|
||||
uploaded: Option<LastUploadState>,
|
||||
next_upload: Option<Instant>,
|
||||
}
|
||||
|
||||
@@ -115,10 +115,7 @@ struct UploaderTenantState {
|
||||
tenant: Weak<Tenant>,
|
||||
|
||||
/// Digest of the serialized heatmap that we last successfully uploaded
|
||||
///
|
||||
/// md5 is generally a bad hash. We use it because it's convenient for interop with AWS S3's ETag,
|
||||
/// which is also an md5sum.
|
||||
last_digest: Option<md5::Digest>,
|
||||
last_upload_state: Option<LastUploadState>,
|
||||
|
||||
/// When the last upload attempt completed (may have been successful or failed)
|
||||
last_upload: Option<Instant>,
|
||||
@@ -187,7 +184,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
|
||||
tenant: Arc::downgrade(&tenant),
|
||||
last_upload: None,
|
||||
next_upload: Some(now.checked_add(period_warmup(period)).unwrap_or(now)),
|
||||
last_digest: None,
|
||||
last_upload_state: None,
|
||||
});
|
||||
|
||||
// Decline to do the upload if insufficient time has passed
|
||||
@@ -195,10 +192,10 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
|
||||
return;
|
||||
}
|
||||
|
||||
let last_digest = state.last_digest;
|
||||
let last_upload = state.last_upload_state.clone();
|
||||
result.jobs.push(UploadPending {
|
||||
tenant,
|
||||
last_digest,
|
||||
last_upload,
|
||||
target_time: state.next_upload,
|
||||
period: Some(period),
|
||||
});
|
||||
@@ -218,7 +215,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
|
||||
) {
|
||||
let UploadPending {
|
||||
tenant,
|
||||
last_digest,
|
||||
last_upload,
|
||||
target_time,
|
||||
period,
|
||||
} = job;
|
||||
@@ -231,16 +228,16 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
|
||||
let _completion = completion;
|
||||
|
||||
let started_at = Instant::now();
|
||||
let digest = match upload_tenant_heatmap(remote_storage, &tenant, last_digest).await {
|
||||
Ok(UploadHeatmapOutcome::Uploaded(digest)) => {
|
||||
let uploaded = match upload_tenant_heatmap(remote_storage, &tenant, last_upload.clone()).await {
|
||||
Ok(UploadHeatmapOutcome::Uploaded(uploaded)) => {
|
||||
let duration = Instant::now().duration_since(started_at);
|
||||
SECONDARY_MODE
|
||||
.upload_heatmap_duration
|
||||
.observe(duration.as_secs_f64());
|
||||
SECONDARY_MODE.upload_heatmap.inc();
|
||||
Some(digest)
|
||||
Some(uploaded)
|
||||
}
|
||||
Ok(UploadHeatmapOutcome::NoChange | UploadHeatmapOutcome::Skipped) => last_digest,
|
||||
Ok(UploadHeatmapOutcome::NoChange | UploadHeatmapOutcome::Skipped) => last_upload,
|
||||
Err(UploadHeatmapError::Upload(e)) => {
|
||||
tracing::warn!(
|
||||
"Failed to upload heatmap for tenant {}: {e:#}",
|
||||
@@ -251,11 +248,11 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
|
||||
.upload_heatmap_duration
|
||||
.observe(duration.as_secs_f64());
|
||||
SECONDARY_MODE.upload_heatmap_errors.inc();
|
||||
last_digest
|
||||
last_upload
|
||||
}
|
||||
Err(UploadHeatmapError::Cancelled) => {
|
||||
tracing::info!("Cancelled heatmap upload, shutting down");
|
||||
last_digest
|
||||
last_upload
|
||||
}
|
||||
};
|
||||
|
||||
@@ -277,7 +274,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
|
||||
WriteComplete {
|
||||
tenant_shard_id: *tenant.get_tenant_shard_id(),
|
||||
completed_at: now,
|
||||
digest,
|
||||
uploaded,
|
||||
next_upload,
|
||||
}
|
||||
}.instrument(info_span!(parent: None, "heatmap_upload", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))))
|
||||
@@ -299,7 +296,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
|
||||
|
||||
Ok(UploadPending {
|
||||
// Ignore our state for last digest: this forces an upload even if nothing has changed
|
||||
last_digest: None,
|
||||
last_upload: None,
|
||||
tenant,
|
||||
target_time: None,
|
||||
period: None,
|
||||
@@ -312,7 +309,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
|
||||
let WriteComplete {
|
||||
tenant_shard_id,
|
||||
completed_at,
|
||||
digest,
|
||||
uploaded,
|
||||
next_upload,
|
||||
} = completion;
|
||||
use std::collections::hash_map::Entry;
|
||||
@@ -322,7 +319,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
|
||||
}
|
||||
Entry::Occupied(mut entry) => {
|
||||
entry.get_mut().last_upload = Some(completed_at);
|
||||
entry.get_mut().last_digest = digest;
|
||||
entry.get_mut().last_upload_state = uploaded;
|
||||
entry.get_mut().next_upload = next_upload
|
||||
}
|
||||
}
|
||||
@@ -331,7 +328,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
|
||||
|
||||
enum UploadHeatmapOutcome {
|
||||
/// We successfully wrote to remote storage, with this digest.
|
||||
Uploaded(md5::Digest),
|
||||
Uploaded(LastUploadState),
|
||||
/// We did not upload because the heatmap digest was unchanged since the last upload
|
||||
NoChange,
|
||||
/// We skipped the upload for some reason, such as tenant/timeline not ready
|
||||
@@ -347,12 +344,25 @@ enum UploadHeatmapError {
|
||||
Upload(#[from] anyhow::Error),
|
||||
}
|
||||
|
||||
/// Digests describing the heatmap we most recently uploaded successfully.
|
||||
///
|
||||
/// md5 is generally a bad hash. We use it because it's convenient for interop with AWS S3's ETag,
|
||||
/// which is also an md5sum.
|
||||
#[derive(Clone)]
|
||||
struct LastUploadState {
|
||||
// Digest of json-encoded HeatMapTenant
|
||||
uploaded_digest: md5::Digest,
|
||||
|
||||
// Digest without atimes set.
|
||||
layers_only_digest: md5::Digest,
|
||||
}
|
||||
|
||||
/// The inner upload operation. This will skip if `last_digest` is Some and matches the digest
|
||||
/// of the object we would have uploaded.
|
||||
async fn upload_tenant_heatmap(
|
||||
remote_storage: GenericRemoteStorage,
|
||||
tenant: &Arc<Tenant>,
|
||||
last_digest: Option<md5::Digest>,
|
||||
last_upload: Option<LastUploadState>,
|
||||
) -> Result<UploadHeatmapOutcome, UploadHeatmapError> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
@@ -368,6 +378,7 @@ async fn upload_tenant_heatmap(
|
||||
let mut heatmap = HeatMapTenant {
|
||||
timelines: Vec::new(),
|
||||
generation,
|
||||
upload_period_ms: tenant.get_heatmap_period().map(|p| p.as_millis()),
|
||||
};
|
||||
let timelines = tenant.timelines.lock().unwrap().clone();
|
||||
|
||||
@@ -396,15 +407,31 @@ async fn upload_tenant_heatmap(
|
||||
|
||||
// Serialize the heatmap
|
||||
let bytes = serde_json::to_vec(&heatmap).map_err(|e| anyhow::anyhow!(e))?;
|
||||
let bytes = bytes::Bytes::from(bytes);
|
||||
let size = bytes.len();
|
||||
|
||||
// Drop out early if nothing changed since our last upload
|
||||
let digest = md5::compute(&bytes);
|
||||
if Some(digest) == last_digest {
|
||||
if Some(&digest) == last_upload.as_ref().map(|d| &d.uploaded_digest) {
|
||||
return Ok(UploadHeatmapOutcome::NoChange);
|
||||
}
|
||||
|
||||
// Calculate a digest that omits atimes, so that we can distinguish actual changes in
|
||||
// layers from changes only in atimes.
|
||||
let heatmap_size_bytes = heatmap.get_stats().bytes;
|
||||
let layers_only_bytes =
|
||||
serde_json::to_vec(&heatmap.strip_atimes()).map_err(|e| anyhow::anyhow!(e))?;
|
||||
let layers_only_digest = md5::compute(&layers_only_bytes);
|
||||
if heatmap_size_bytes < tenant.get_checkpoint_distance() {
|
||||
// For small tenants, skip upload if only atimes changed. This avoids doing frequent
|
||||
// uploads from long-idle tenants whose atimes are just incremented by periodic
|
||||
// size calculations.
|
||||
if Some(&layers_only_digest) == last_upload.as_ref().map(|d| &d.layers_only_digest) {
|
||||
return Ok(UploadHeatmapOutcome::NoChange);
|
||||
}
|
||||
}
|
||||
|
||||
let bytes = bytes::Bytes::from(bytes);
|
||||
let size = bytes.len();
|
||||
|
||||
let path = remote_heatmap_path(tenant.get_tenant_shard_id());
|
||||
|
||||
let cancel = &tenant.cancel;
|
||||
@@ -436,5 +463,8 @@ async fn upload_tenant_heatmap(
|
||||
|
||||
tracing::info!("Successfully uploaded {size} byte heatmap to {path}");
|
||||
|
||||
Ok(UploadHeatmapOutcome::Uploaded(digest))
|
||||
Ok(UploadHeatmapOutcome::Uploaded(LastUploadState {
|
||||
uploaded_digest: digest,
|
||||
layers_only_digest,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
//! Common traits and structs for layers
|
||||
|
||||
pub mod delta_layer;
|
||||
mod filename;
|
||||
pub mod image_layer;
|
||||
pub(crate) mod inmemory_layer;
|
||||
pub(crate) mod layer;
|
||||
mod layer_desc;
|
||||
mod layer_name;
|
||||
|
||||
use crate::context::{AccessStatsBehavior, RequestContext};
|
||||
use crate::repository::Value;
|
||||
@@ -34,10 +34,10 @@ use utils::rate_limit::RateLimit;
|
||||
use utils::{id::TimelineId, lsn::Lsn};
|
||||
|
||||
pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
|
||||
pub use filename::{DeltaFileName, ImageFileName, LayerFileName};
|
||||
pub use image_layer::{ImageLayer, ImageLayerWriter};
|
||||
pub use inmemory_layer::InMemoryLayer;
|
||||
pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
|
||||
pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
|
||||
|
||||
pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
|
||||
|
||||
@@ -646,8 +646,8 @@ pub mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
impl From<DeltaFileName> for PersistentLayerDesc {
|
||||
fn from(value: DeltaFileName) -> Self {
|
||||
impl From<DeltaLayerName> for PersistentLayerDesc {
|
||||
fn from(value: DeltaLayerName) -> Self {
|
||||
PersistentLayerDesc::new_delta(
|
||||
TenantShardId::from([0; 18]),
|
||||
TimelineId::from_array([0; 16]),
|
||||
@@ -658,8 +658,8 @@ pub mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ImageFileName> for PersistentLayerDesc {
|
||||
fn from(value: ImageFileName) -> Self {
|
||||
impl From<ImageLayerName> for PersistentLayerDesc {
|
||||
fn from(value: ImageLayerName) -> Self {
|
||||
PersistentLayerDesc::new_img(
|
||||
TenantShardId::from([0; 18]),
|
||||
TimelineId::from_array([0; 16]),
|
||||
@@ -670,11 +670,11 @@ pub mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LayerFileName> for PersistentLayerDesc {
|
||||
fn from(value: LayerFileName) -> Self {
|
||||
impl From<LayerName> for PersistentLayerDesc {
|
||||
fn from(value: LayerName) -> Self {
|
||||
match value {
|
||||
LayerFileName::Delta(d) => Self::from(d),
|
||||
LayerFileName::Image(i) => Self::from(i),
|
||||
LayerName::Delta(d) => Self::from(d),
|
||||
LayerName::Image(i) => Self::from(i),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,6 +57,7 @@ use std::fs::File;
|
||||
use std::io::SeekFrom;
|
||||
use std::ops::Range;
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::OnceCell;
|
||||
use tracing::*;
|
||||
@@ -68,7 +69,8 @@ use utils::{
|
||||
};
|
||||
|
||||
use super::{
|
||||
AsLayerDesc, LayerAccessStats, PersistentLayerDesc, ResidentLayer, ValuesReconstructState,
|
||||
AsLayerDesc, LayerAccessStats, LayerName, PersistentLayerDesc, ResidentLayer,
|
||||
ValuesReconstructState,
|
||||
};
|
||||
|
||||
///
|
||||
@@ -309,13 +311,13 @@ impl DeltaLayer {
|
||||
.and_then(|res| res)?;
|
||||
|
||||
// not production code
|
||||
let actual_filename = path.file_name().unwrap().to_owned();
|
||||
let expected_filename = self.layer_desc().filename().file_name();
|
||||
let actual_layer_name = LayerName::from_str(path.file_name().unwrap()).unwrap();
|
||||
let expected_layer_name = self.layer_desc().layer_name();
|
||||
|
||||
if actual_filename != expected_filename {
|
||||
if actual_layer_name != expected_layer_name {
|
||||
println!("warning: filename does not match what is expected from in-file summary");
|
||||
println!("actual: {:?}", actual_filename);
|
||||
println!("expected: {:?}", expected_filename);
|
||||
println!("actual: {:?}", actual_layer_name.to_string());
|
||||
println!("expected: {:?}", expected_layer_name.to_string());
|
||||
}
|
||||
|
||||
Ok(Arc::new(loaded))
|
||||
@@ -392,6 +394,7 @@ impl DeltaLayerWriterInner {
|
||||
tenant_shard_id: TenantShardId,
|
||||
key_start: Key,
|
||||
lsn_range: Range<Lsn>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
// Create the file initially with a temporary filename. We don't know
|
||||
// the end key yet, so we cannot form the final filename yet. We will
|
||||
@@ -402,7 +405,7 @@ impl DeltaLayerWriterInner {
|
||||
let path =
|
||||
DeltaLayer::temp_path_for(conf, &tenant_shard_id, &timeline_id, key_start, &lsn_range);
|
||||
|
||||
let mut file = VirtualFile::create(&path).await?;
|
||||
let mut file = VirtualFile::create(&path, ctx).await?;
|
||||
// make room for the header block
|
||||
file.seek(SeekFrom::Start(PAGE_SZ as u64)).await?;
|
||||
let blob_writer = BlobWriter::new(file, PAGE_SZ as u64);
|
||||
@@ -584,6 +587,7 @@ impl DeltaLayerWriter {
|
||||
tenant_shard_id: TenantShardId,
|
||||
key_start: Key,
|
||||
lsn_range: Range<Lsn>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
Ok(Self {
|
||||
inner: Some(
|
||||
@@ -593,6 +597,7 @@ impl DeltaLayerWriter {
|
||||
tenant_shard_id,
|
||||
key_start,
|
||||
lsn_range,
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
),
|
||||
@@ -699,6 +704,7 @@ impl DeltaLayer {
|
||||
let mut file = VirtualFile::open_with_options(
|
||||
path,
|
||||
virtual_file::OpenOptions::new().read(true).write(true),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to open file '{}'", path))?;
|
||||
@@ -732,7 +738,7 @@ impl DeltaLayerInner {
|
||||
max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Result<Self, anyhow::Error>, anyhow::Error> {
|
||||
let file = match VirtualFile::open(path).await {
|
||||
let file = match VirtualFile::open(path, ctx).await {
|
||||
Ok(file) => file,
|
||||
Err(e) => return Ok(Err(anyhow::Error::new(e).context("open layer file"))),
|
||||
};
|
||||
@@ -906,7 +912,7 @@ impl DeltaLayerInner {
|
||||
.await
|
||||
.map_err(GetVectoredError::Other)?;
|
||||
|
||||
self.do_reads_and_update_state(reads, reconstruct_state)
|
||||
self.do_reads_and_update_state(reads, reconstruct_state, ctx)
|
||||
.await;
|
||||
|
||||
reconstruct_state.on_lsn_advanced(&keyspace, self.lsn_range.start);
|
||||
@@ -1010,6 +1016,7 @@ impl DeltaLayerInner {
|
||||
&self,
|
||||
reads: Vec<VectoredRead>,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) {
|
||||
let vectored_blob_reader = VectoredBlobReader::new(&self.file);
|
||||
let mut ignore_key_with_err = None;
|
||||
@@ -1027,7 +1034,7 @@ impl DeltaLayerInner {
|
||||
// track when a key is done.
|
||||
for read in reads.into_iter().rev() {
|
||||
let res = vectored_blob_reader
|
||||
.read_blobs(&read, buf.take().expect("Should have a buffer"))
|
||||
.read_blobs(&read, buf.take().expect("Should have a buffer"), ctx)
|
||||
.await;
|
||||
|
||||
let blobs_buf = match res {
|
||||
@@ -1272,7 +1279,7 @@ impl DeltaLayerInner {
|
||||
|
||||
buf.clear();
|
||||
buf.reserve(read.size());
|
||||
let res = reader.read_blobs(&read, buf).await?;
|
||||
let res = reader.read_blobs(&read, buf, ctx).await?;
|
||||
|
||||
for blob in res.blobs {
|
||||
let key = blob.meta.key;
|
||||
@@ -1789,6 +1796,7 @@ mod test {
|
||||
harness.tenant_shard_id,
|
||||
entries_meta.key_range.start,
|
||||
entries_meta.lsn_range.clone(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -1846,7 +1854,7 @@ mod test {
|
||||
|
||||
for read in vectored_reads {
|
||||
let blobs_buf = vectored_blob_reader
|
||||
.read_blobs(&read, buf.take().expect("Should have a buffer"))
|
||||
.read_blobs(&read, buf.take().expect("Should have a buffer"), &ctx)
|
||||
.await?;
|
||||
for meta in blobs_buf.blobs.iter() {
|
||||
let value = &blobs_buf.buf[meta.start..meta.end];
|
||||
@@ -1976,6 +1984,7 @@ mod test {
|
||||
tenant.tenant_shard_id,
|
||||
Key::MIN,
|
||||
Lsn(0x11)..truncate_at,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -54,6 +54,7 @@ use std::fs::File;
|
||||
use std::io::SeekFrom;
|
||||
use std::ops::Range;
|
||||
use std::os::unix::prelude::FileExt;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::OnceCell;
|
||||
use tokio_stream::StreamExt;
|
||||
@@ -65,8 +66,10 @@ use utils::{
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use super::filename::ImageFileName;
|
||||
use super::{AsLayerDesc, Layer, PersistentLayerDesc, ResidentLayer, ValuesReconstructState};
|
||||
use super::layer_name::ImageLayerName;
|
||||
use super::{
|
||||
AsLayerDesc, Layer, LayerName, PersistentLayerDesc, ResidentLayer, ValuesReconstructState,
|
||||
};
|
||||
|
||||
///
|
||||
/// Header stored in the beginning of the file
|
||||
@@ -231,7 +234,7 @@ impl ImageLayer {
|
||||
conf: &PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
fname: &ImageFileName,
|
||||
fname: &ImageLayerName,
|
||||
) -> Utf8PathBuf {
|
||||
let rand_string: String = rand::thread_rng()
|
||||
.sample_iter(&Alphanumeric)
|
||||
@@ -267,13 +270,13 @@ impl ImageLayer {
|
||||
.and_then(|res| res)?;
|
||||
|
||||
// not production code
|
||||
let actual_filename = path.file_name().unwrap().to_owned();
|
||||
let expected_filename = self.layer_desc().filename().file_name();
|
||||
let actual_layer_name = LayerName::from_str(path.file_name().unwrap()).unwrap();
|
||||
let expected_layer_name = self.layer_desc().layer_name();
|
||||
|
||||
if actual_filename != expected_filename {
|
||||
if actual_layer_name != expected_layer_name {
|
||||
println!("warning: filename does not match what is expected from in-file summary");
|
||||
println!("actual: {:?}", actual_filename);
|
||||
println!("expected: {:?}", expected_filename);
|
||||
println!("actual: {:?}", actual_layer_name.to_string());
|
||||
println!("expected: {:?}", expected_layer_name.to_string());
|
||||
}
|
||||
|
||||
Ok(loaded)
|
||||
@@ -340,6 +343,7 @@ impl ImageLayer {
|
||||
let mut file = VirtualFile::open_with_options(
|
||||
path,
|
||||
virtual_file::OpenOptions::new().read(true).write(true),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to open file '{}'", path))?;
|
||||
@@ -374,7 +378,7 @@ impl ImageLayerInner {
|
||||
max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Result<Self, anyhow::Error>, anyhow::Error> {
|
||||
let file = match VirtualFile::open(path).await {
|
||||
let file = match VirtualFile::open(path, ctx).await {
|
||||
Ok(file) => file,
|
||||
Err(e) => return Ok(Err(anyhow::Error::new(e).context("open layer file"))),
|
||||
};
|
||||
@@ -471,7 +475,7 @@ impl ImageLayerInner {
|
||||
.await
|
||||
.map_err(GetVectoredError::Other)?;
|
||||
|
||||
self.do_reads_and_update_state(reads, reconstruct_state)
|
||||
self.do_reads_and_update_state(reads, reconstruct_state, ctx)
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
@@ -534,6 +538,7 @@ impl ImageLayerInner {
|
||||
&self,
|
||||
reads: Vec<VectoredRead>,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) {
|
||||
let max_vectored_read_bytes = self
|
||||
.max_vectored_read_bytes
|
||||
@@ -562,7 +567,7 @@ impl ImageLayerInner {
|
||||
}
|
||||
|
||||
let buf = BytesMut::with_capacity(buf_size);
|
||||
let res = vectored_blob_reader.read_blobs(&read, buf).await;
|
||||
let res = vectored_blob_reader.read_blobs(&read, buf, ctx).await;
|
||||
|
||||
match res {
|
||||
Ok(blobs_buf) => {
|
||||
@@ -628,6 +633,7 @@ impl ImageLayerWriterInner {
|
||||
tenant_shard_id: TenantShardId,
|
||||
key_range: &Range<Key>,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
// Create the file initially with a temporary filename.
|
||||
// We'll atomically rename it to the final name when we're done.
|
||||
@@ -635,7 +641,7 @@ impl ImageLayerWriterInner {
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_shard_id,
|
||||
&ImageFileName {
|
||||
&ImageLayerName {
|
||||
key_range: key_range.clone(),
|
||||
lsn,
|
||||
},
|
||||
@@ -647,6 +653,7 @@ impl ImageLayerWriterInner {
|
||||
virtual_file::OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true),
|
||||
ctx,
|
||||
)
|
||||
.await?
|
||||
};
|
||||
@@ -801,10 +808,11 @@ impl ImageLayerWriter {
|
||||
tenant_shard_id: TenantShardId,
|
||||
key_range: &Range<Key>,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<ImageLayerWriter> {
|
||||
Ok(Self {
|
||||
inner: Some(
|
||||
ImageLayerWriterInner::new(conf, timeline_id, tenant_shard_id, key_range, lsn)
|
||||
ImageLayerWriterInner::new(conf, timeline_id, tenant_shard_id, key_range, lsn, ctx)
|
||||
.await?,
|
||||
),
|
||||
})
|
||||
|
||||
@@ -473,10 +473,11 @@ impl InMemoryLayer {
|
||||
timeline_id: TimelineId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
start_lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<InMemoryLayer> {
|
||||
trace!("initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}");
|
||||
|
||||
let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id).await?;
|
||||
let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id, ctx).await?;
|
||||
let key = InMemoryLayerFileId(file.page_cache_file_id());
|
||||
|
||||
Ok(InMemoryLayer {
|
||||
@@ -642,6 +643,7 @@ impl InMemoryLayer {
|
||||
self.tenant_shard_id,
|
||||
Key::MIN,
|
||||
self.start_lsn..end_lsn,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline};
|
||||
use super::delta_layer::{self, DeltaEntry};
|
||||
use super::image_layer;
|
||||
use super::{
|
||||
AsLayerDesc, LayerAccessStats, LayerAccessStatsReset, LayerFileName, PersistentLayerDesc,
|
||||
AsLayerDesc, LayerAccessStats, LayerAccessStatsReset, LayerName, PersistentLayerDesc,
|
||||
ValueReconstructResult, ValueReconstructState, ValuesReconstructState,
|
||||
};
|
||||
|
||||
@@ -128,19 +128,17 @@ pub(crate) fn local_layer_path(
|
||||
conf: &PageServerConf,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
layer_file_name: &LayerFileName,
|
||||
_generation: &Generation,
|
||||
layer_file_name: &LayerName,
|
||||
generation: &Generation,
|
||||
) -> Utf8PathBuf {
|
||||
let timeline_path = conf.timeline_path(tenant_shard_id, timeline_id);
|
||||
|
||||
timeline_path.join(layer_file_name.file_name())
|
||||
|
||||
// TOOD: include generation in the name in now+1 releases.
|
||||
// timeline_path.join(format!(
|
||||
// "{}{}",
|
||||
// layer_file_name.file_name(),
|
||||
// generation.get_suffix()
|
||||
// ))
|
||||
if generation.is_none() {
|
||||
// Without a generation, we may only use legacy path style
|
||||
timeline_path.join(layer_file_name.to_string())
|
||||
} else {
|
||||
timeline_path.join(format!("{}-v1{}", layer_file_name, generation.get_suffix()))
|
||||
}
|
||||
}
|
||||
|
||||
impl Layer {
|
||||
@@ -148,7 +146,7 @@ impl Layer {
|
||||
pub(crate) fn for_evicted(
|
||||
conf: &'static PageServerConf,
|
||||
timeline: &Arc<Timeline>,
|
||||
file_name: LayerFileName,
|
||||
file_name: LayerName,
|
||||
metadata: LayerFileMetadata,
|
||||
) -> Self {
|
||||
let local_path = local_layer_path(
|
||||
@@ -189,7 +187,7 @@ impl Layer {
|
||||
conf: &'static PageServerConf,
|
||||
timeline: &Arc<Timeline>,
|
||||
local_path: Utf8PathBuf,
|
||||
file_name: LayerFileName,
|
||||
file_name: LayerName,
|
||||
metadata: LayerFileMetadata,
|
||||
) -> ResidentLayer {
|
||||
let desc = PersistentLayerDesc::from_filename(
|
||||
@@ -261,7 +259,7 @@ impl Layer {
|
||||
conf,
|
||||
&timeline.tenant_shard_id,
|
||||
&timeline.timeline_id,
|
||||
&desc.filename(),
|
||||
&desc.layer_name(),
|
||||
&timeline.generation,
|
||||
);
|
||||
|
||||
@@ -587,9 +585,6 @@ struct LayerInner {
|
||||
/// [`Timeline::gate`] at the same time.
|
||||
timeline: Weak<Timeline>,
|
||||
|
||||
/// Cached knowledge of [`Timeline::remote_client`] being `Some`.
|
||||
have_remote_client: bool,
|
||||
|
||||
access_stats: LayerAccessStats,
|
||||
|
||||
/// This custom OnceCell is backed by std mutex, but only held for short time periods.
|
||||
@@ -689,7 +684,7 @@ impl Drop for LayerInner {
|
||||
let span = tracing::info_span!(parent: None, "layer_delete", tenant_id = %self.layer_desc().tenant_shard_id.tenant_id, shard_id=%self.layer_desc().tenant_shard_id.shard_slug(), timeline_id = %self.layer_desc().timeline_id);
|
||||
|
||||
let path = std::mem::take(&mut self.path);
|
||||
let file_name = self.layer_desc().filename();
|
||||
let file_name = self.layer_desc().layer_name();
|
||||
let file_size = self.layer_desc().file_size;
|
||||
let timeline = self.timeline.clone();
|
||||
let meta = self.metadata();
|
||||
@@ -734,23 +729,23 @@ impl Drop for LayerInner {
|
||||
if removed {
|
||||
timeline.metrics.resident_physical_size_sub(file_size);
|
||||
}
|
||||
if let Some(remote_client) = timeline.remote_client.as_ref() {
|
||||
let res = remote_client.schedule_deletion_of_unlinked(vec![(file_name, meta)]);
|
||||
let res = timeline
|
||||
.remote_client
|
||||
.schedule_deletion_of_unlinked(vec![(file_name, meta)]);
|
||||
|
||||
if let Err(e) = res {
|
||||
// test_timeline_deletion_with_files_stuck_in_upload_queue is good at
|
||||
// demonstrating this deadlock (without spawn_blocking): stop will drop
|
||||
// queued items, which will have ResidentLayer's, and those drops would try
|
||||
// to re-entrantly lock the RemoteTimelineClient inner state.
|
||||
if !timeline.is_active() {
|
||||
tracing::info!("scheduling deletion on drop failed: {e:#}");
|
||||
} else {
|
||||
tracing::warn!("scheduling deletion on drop failed: {e:#}");
|
||||
}
|
||||
LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::DeleteSchedulingFailed);
|
||||
if let Err(e) = res {
|
||||
// test_timeline_deletion_with_files_stuck_in_upload_queue is good at
|
||||
// demonstrating this deadlock (without spawn_blocking): stop will drop
|
||||
// queued items, which will have ResidentLayer's, and those drops would try
|
||||
// to re-entrantly lock the RemoteTimelineClient inner state.
|
||||
if !timeline.is_active() {
|
||||
tracing::info!("scheduling deletion on drop failed: {e:#}");
|
||||
} else {
|
||||
LAYER_IMPL_METRICS.inc_completed_deletes();
|
||||
tracing::warn!("scheduling deletion on drop failed: {e:#}");
|
||||
}
|
||||
LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::DeleteSchedulingFailed);
|
||||
} else {
|
||||
LAYER_IMPL_METRICS.inc_completed_deletes();
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -782,11 +777,12 @@ impl LayerInner {
|
||||
|
||||
LayerInner {
|
||||
conf,
|
||||
debug_str: { format!("timelines/{}/{}", timeline.timeline_id, desc.filename()).into() },
|
||||
debug_str: {
|
||||
format!("timelines/{}/{}", timeline.timeline_id, desc.layer_name()).into()
|
||||
},
|
||||
path: local_path,
|
||||
desc,
|
||||
timeline: Arc::downgrade(timeline),
|
||||
have_remote_client: timeline.remote_client.is_some(),
|
||||
access_stats,
|
||||
wanted_deleted: AtomicBool::new(false),
|
||||
inner,
|
||||
@@ -815,8 +811,6 @@ impl LayerInner {
|
||||
/// in a new attempt to evict OR join the previously started attempt.
|
||||
#[tracing::instrument(level = tracing::Level::DEBUG, skip_all, ret, err(level = tracing::Level::DEBUG), fields(layer=%self))]
|
||||
pub(crate) async fn evict_and_wait(&self, timeout: Duration) -> Result<(), EvictionError> {
|
||||
assert!(self.have_remote_client);
|
||||
|
||||
let mut rx = self.status.as_ref().unwrap().subscribe();
|
||||
|
||||
{
|
||||
@@ -973,10 +967,6 @@ impl LayerInner {
|
||||
return Err(DownloadError::NotFile(ft));
|
||||
}
|
||||
|
||||
if timeline.remote_client.as_ref().is_none() {
|
||||
return Err(DownloadError::NoRemoteStorage);
|
||||
}
|
||||
|
||||
if let Some(ctx) = ctx {
|
||||
self.check_expected_download(ctx)?;
|
||||
}
|
||||
@@ -1113,14 +1103,10 @@ impl LayerInner {
|
||||
permit: heavier_once_cell::InitPermit,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<DownloadedLayer>> {
|
||||
let client = timeline
|
||||
let result = timeline
|
||||
.remote_client
|
||||
.as_ref()
|
||||
.expect("checked before download_init_and_wait");
|
||||
|
||||
let result = client
|
||||
.download_layer_file(
|
||||
&self.desc.filename(),
|
||||
&self.desc.layer_name(),
|
||||
&self.metadata(),
|
||||
&timeline.cancel,
|
||||
ctx,
|
||||
@@ -1257,7 +1243,7 @@ impl LayerInner {
|
||||
}
|
||||
|
||||
fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {
|
||||
let layer_file_name = self.desc.filename().file_name();
|
||||
let layer_name = self.desc.layer_name().to_string();
|
||||
|
||||
let resident = self
|
||||
.inner
|
||||
@@ -1271,7 +1257,7 @@ impl LayerInner {
|
||||
let lsn_range = &self.desc.lsn_range;
|
||||
|
||||
HistoricLayerInfo::Delta {
|
||||
layer_file_name,
|
||||
layer_file_name: layer_name,
|
||||
layer_file_size: self.desc.file_size,
|
||||
lsn_start: lsn_range.start,
|
||||
lsn_end: lsn_range.end,
|
||||
@@ -1282,7 +1268,7 @@ impl LayerInner {
|
||||
let lsn = self.desc.image_layer_lsn();
|
||||
|
||||
HistoricLayerInfo::Image {
|
||||
layer_file_name,
|
||||
layer_file_name: layer_name,
|
||||
layer_file_size: self.desc.file_size,
|
||||
lsn_start: lsn,
|
||||
remote: !resident,
|
||||
@@ -1293,20 +1279,10 @@ impl LayerInner {
|
||||
|
||||
/// `DownloadedLayer` is being dropped, so it calls this method.
|
||||
fn on_downloaded_layer_drop(self: Arc<LayerInner>, only_version: usize) {
|
||||
let can_evict = self.have_remote_client;
|
||||
|
||||
// we cannot know without inspecting LayerInner::inner if we should evict or not, even
|
||||
// though here it is very likely
|
||||
let span = tracing::info_span!(parent: None, "layer_evict", tenant_id = %self.desc.tenant_shard_id.tenant_id, shard_id = %self.desc.tenant_shard_id.shard_slug(), timeline_id = %self.desc.timeline_id, layer=%self, version=%only_version);
|
||||
|
||||
if !can_evict {
|
||||
// it would be nice to assert this case out, but we are in drop
|
||||
span.in_scope(|| {
|
||||
tracing::error!("bug in struct Layer: ResidentOrWantedEvicted has been downgraded while we have no remote storage");
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// NOTE: this scope *must* never call `self.inner.get` because evict_and_wait might
|
||||
// drop while the `self.inner` is being locked, leading to a deadlock.
|
||||
|
||||
@@ -1578,8 +1554,6 @@ pub(crate) enum EvictionError {
|
||||
pub(crate) enum DownloadError {
|
||||
#[error("timeline has already shutdown")]
|
||||
TimelineShutdown,
|
||||
#[error("no remote storage configured")]
|
||||
NoRemoteStorage,
|
||||
#[error("context denies downloading")]
|
||||
ContextAndConfigReallyDeniesDownloads,
|
||||
#[error("downloading is really required but not allowed by this method")]
|
||||
|
||||
@@ -145,7 +145,7 @@ async fn smoke_test() {
|
||||
.await
|
||||
.expect("the local layer file still exists");
|
||||
|
||||
let rtc = timeline.remote_client.as_ref().unwrap();
|
||||
let rtc = &timeline.remote_client;
|
||||
|
||||
{
|
||||
let layers = &[layer];
|
||||
@@ -761,13 +761,7 @@ async fn eviction_cancellation_on_drop() {
|
||||
timeline.freeze_and_flush().await.unwrap();
|
||||
|
||||
// wait for the upload to complete so our Arc::strong_count assertion holds
|
||||
timeline
|
||||
.remote_client
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.wait_completion()
|
||||
.await
|
||||
.unwrap();
|
||||
timeline.remote_client.wait_completion().await.unwrap();
|
||||
|
||||
let (evicted_layer, not_evicted) = {
|
||||
let mut layers = {
|
||||
|
||||
@@ -5,7 +5,7 @@ use utils::{id::TimelineId, lsn::Lsn};
|
||||
|
||||
use crate::repository::Key;
|
||||
|
||||
use super::{DeltaFileName, ImageFileName, LayerFileName};
|
||||
use super::{DeltaLayerName, ImageLayerName, LayerName};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@@ -51,7 +51,7 @@ impl PersistentLayerDesc {
|
||||
}
|
||||
|
||||
pub fn short_id(&self) -> impl Display {
|
||||
self.filename()
|
||||
self.layer_name()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -103,14 +103,14 @@ impl PersistentLayerDesc {
|
||||
pub fn from_filename(
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
filename: LayerFileName,
|
||||
filename: LayerName,
|
||||
file_size: u64,
|
||||
) -> Self {
|
||||
match filename {
|
||||
LayerFileName::Image(i) => {
|
||||
LayerName::Image(i) => {
|
||||
Self::new_img(tenant_shard_id, timeline_id, i.key_range, i.lsn, file_size)
|
||||
}
|
||||
LayerFileName::Delta(d) => Self::new_delta(
|
||||
LayerName::Delta(d) => Self::new_delta(
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
d.key_range,
|
||||
@@ -132,34 +132,34 @@ impl PersistentLayerDesc {
|
||||
lsn..(lsn + 1)
|
||||
}
|
||||
|
||||
/// Get a delta file name for this layer.
|
||||
/// Get a delta layer name for this layer.
|
||||
///
|
||||
/// Panic: if this is not a delta layer.
|
||||
pub fn delta_file_name(&self) -> DeltaFileName {
|
||||
pub fn delta_layer_name(&self) -> DeltaLayerName {
|
||||
assert!(self.is_delta);
|
||||
DeltaFileName {
|
||||
DeltaLayerName {
|
||||
key_range: self.key_range.clone(),
|
||||
lsn_range: self.lsn_range.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a delta file name for this layer.
|
||||
/// Get a image layer name for this layer.
|
||||
///
|
||||
/// Panic: if this is not an image layer, or the lsn range is invalid
|
||||
pub fn image_file_name(&self) -> ImageFileName {
|
||||
pub fn image_layer_name(&self) -> ImageLayerName {
|
||||
assert!(!self.is_delta);
|
||||
assert!(self.lsn_range.start + 1 == self.lsn_range.end);
|
||||
ImageFileName {
|
||||
ImageLayerName {
|
||||
key_range: self.key_range.clone(),
|
||||
lsn: self.lsn_range.start,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn filename(&self) -> LayerFileName {
|
||||
pub fn layer_name(&self) -> LayerName {
|
||||
if self.is_delta {
|
||||
self.delta_file_name().into()
|
||||
self.delta_layer_name().into()
|
||||
} else {
|
||||
self.image_file_name().into()
|
||||
self.image_layer_name().into()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -15,29 +15,29 @@ use super::PersistentLayerDesc;
|
||||
|
||||
// Note: Timeline::load_layer_map() relies on this sort order
|
||||
#[derive(PartialEq, Eq, Clone, Hash)]
|
||||
pub struct DeltaFileName {
|
||||
pub struct DeltaLayerName {
|
||||
pub key_range: Range<Key>,
|
||||
pub lsn_range: Range<Lsn>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for DeltaFileName {
|
||||
impl std::fmt::Debug for DeltaLayerName {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use super::RangeDisplayDebug;
|
||||
|
||||
f.debug_struct("DeltaFileName")
|
||||
f.debug_struct("DeltaLayerName")
|
||||
.field("key_range", &RangeDisplayDebug(&self.key_range))
|
||||
.field("lsn_range", &self.lsn_range)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for DeltaFileName {
|
||||
impl PartialOrd for DeltaLayerName {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for DeltaFileName {
|
||||
impl Ord for DeltaLayerName {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
let mut cmp = self.key_range.start.cmp(&other.key_range.start);
|
||||
if cmp != Ordering::Equal {
|
||||
@@ -57,16 +57,14 @@ impl Ord for DeltaFileName {
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents the filename of a DeltaLayer
|
||||
/// Represents the region of the LSN-Key space covered by a DeltaLayer
|
||||
///
|
||||
/// ```text
|
||||
/// <key start>-<key end>__<LSN start>-<LSN end>
|
||||
/// ```
|
||||
impl DeltaFileName {
|
||||
///
|
||||
/// Parse a string as a delta file name. Returns None if the filename does not
|
||||
/// match the expected pattern.
|
||||
///
|
||||
impl DeltaLayerName {
|
||||
/// Parse the part of a delta layer's file name that represents the LayerName. Returns None
|
||||
/// if the filename does not match the expected pattern.
|
||||
pub fn parse_str(fname: &str) -> Option<Self> {
|
||||
let mut parts = fname.split("__");
|
||||
let mut key_parts = parts.next()?.split('-');
|
||||
@@ -105,14 +103,14 @@ impl DeltaFileName {
|
||||
// or panic?
|
||||
}
|
||||
|
||||
Some(DeltaFileName {
|
||||
Some(DeltaLayerName {
|
||||
key_range: key_start..key_end,
|
||||
lsn_range: start_lsn..end_lsn,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for DeltaFileName {
|
||||
impl fmt::Display for DeltaLayerName {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
@@ -126,29 +124,29 @@ impl fmt::Display for DeltaFileName {
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Hash)]
|
||||
pub struct ImageFileName {
|
||||
pub struct ImageLayerName {
|
||||
pub key_range: Range<Key>,
|
||||
pub lsn: Lsn,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ImageFileName {
|
||||
impl std::fmt::Debug for ImageLayerName {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use super::RangeDisplayDebug;
|
||||
|
||||
f.debug_struct("ImageFileName")
|
||||
f.debug_struct("ImageLayerName")
|
||||
.field("key_range", &RangeDisplayDebug(&self.key_range))
|
||||
.field("lsn", &self.lsn)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for ImageFileName {
|
||||
impl PartialOrd for ImageLayerName {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for ImageFileName {
|
||||
impl Ord for ImageLayerName {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
let mut cmp = self.key_range.start.cmp(&other.key_range.start);
|
||||
if cmp != Ordering::Equal {
|
||||
@@ -164,7 +162,7 @@ impl Ord for ImageFileName {
|
||||
}
|
||||
}
|
||||
|
||||
impl ImageFileName {
|
||||
impl ImageLayerName {
|
||||
pub fn lsn_as_range(&self) -> Range<Lsn> {
|
||||
// Saves from having to copypaste this all over
|
||||
PersistentLayerDesc::image_layer_lsn_range(self.lsn)
|
||||
@@ -172,16 +170,14 @@ impl ImageFileName {
|
||||
}
|
||||
|
||||
///
|
||||
/// Represents the filename of an ImageLayer
|
||||
/// Represents the part of the Key-LSN space covered by an ImageLayer
|
||||
///
|
||||
/// ```text
|
||||
/// <key start>-<key end>__<LSN>
|
||||
/// ```
|
||||
impl ImageFileName {
|
||||
///
|
||||
/// Parse a string as an image file name. Returns None if the filename does not
|
||||
/// match the expected pattern.
|
||||
///
|
||||
impl ImageLayerName {
|
||||
/// Parse a string as then LayerName part of an image layer file name. Returns None if the
|
||||
/// filename does not match the expected pattern.
|
||||
pub fn parse_str(fname: &str) -> Option<Self> {
|
||||
let mut parts = fname.split("__");
|
||||
let mut key_parts = parts.next()?.split('-');
|
||||
@@ -202,14 +198,14 @@ impl ImageFileName {
|
||||
|
||||
let lsn = Lsn::from_hex(lsn_str).ok()?;
|
||||
|
||||
Some(ImageFileName {
|
||||
Some(ImageLayerName {
|
||||
key_range: key_start..key_end,
|
||||
lsn,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ImageFileName {
|
||||
impl fmt::Display for ImageLayerName {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
@@ -220,21 +216,24 @@ impl fmt::Display for ImageFileName {
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// LayerName is the logical identity of a layer within a LayerMap at a moment in time. The
|
||||
/// LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations
|
||||
/// over time (e.g. across shard splits or compression). The physical filenames of layers in local
|
||||
/// storage and object names in remote storage consist of the LayerName plus some extra qualifiers
|
||||
/// that uniquely identify the physical incarnation of a layer (see [crate::tenant::remote_timeline_client::remote_layer_path])
|
||||
/// and [`crate::tenant::storage_layer::layer::local_layer_path`])
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
|
||||
pub enum LayerFileName {
|
||||
Image(ImageFileName),
|
||||
Delta(DeltaFileName),
|
||||
pub enum LayerName {
|
||||
Image(ImageLayerName),
|
||||
Delta(DeltaLayerName),
|
||||
}
|
||||
|
||||
impl LayerFileName {
|
||||
pub fn file_name(&self) -> String {
|
||||
self.to_string()
|
||||
}
|
||||
|
||||
impl LayerName {
|
||||
/// Determines if this layer file is considered to be in future meaning we will discard these
|
||||
/// layers during timeline initialization from the given disk_consistent_lsn.
|
||||
pub(crate) fn is_in_future(&self, disk_consistent_lsn: Lsn) -> bool {
|
||||
use LayerFileName::*;
|
||||
use LayerName::*;
|
||||
match self {
|
||||
Image(file_name) if file_name.lsn > disk_consistent_lsn => true,
|
||||
Delta(file_name) if file_name.lsn_range.end > disk_consistent_lsn + 1 => true,
|
||||
@@ -243,7 +242,7 @@ impl LayerFileName {
|
||||
}
|
||||
|
||||
pub(crate) fn kind(&self) -> &'static str {
|
||||
use LayerFileName::*;
|
||||
use LayerName::*;
|
||||
match self {
|
||||
Delta(_) => "delta",
|
||||
Image(_) => "image",
|
||||
@@ -251,7 +250,7 @@ impl LayerFileName {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for LayerFileName {
|
||||
impl fmt::Display for LayerName {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Image(fname) => write!(f, "{fname}"),
|
||||
@@ -260,25 +259,25 @@ impl fmt::Display for LayerFileName {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ImageFileName> for LayerFileName {
|
||||
fn from(fname: ImageFileName) -> Self {
|
||||
impl From<ImageLayerName> for LayerName {
|
||||
fn from(fname: ImageLayerName) -> Self {
|
||||
Self::Image(fname)
|
||||
}
|
||||
}
|
||||
impl From<DeltaFileName> for LayerFileName {
|
||||
fn from(fname: DeltaFileName) -> Self {
|
||||
impl From<DeltaLayerName> for LayerName {
|
||||
fn from(fname: DeltaLayerName) -> Self {
|
||||
Self::Delta(fname)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for LayerFileName {
|
||||
impl FromStr for LayerName {
|
||||
type Err = String;
|
||||
|
||||
/// Conversion from either a physical layer filename, or the string-ization of
|
||||
/// Self. When loading a physical layer filename, we drop any extra information
|
||||
/// not needed to build Self.
|
||||
fn from_str(value: &str) -> Result<Self, Self::Err> {
|
||||
let gen_suffix_regex = Regex::new("^(?<base>.+)-(?<gen>[0-9a-f]{8})$").unwrap();
|
||||
let gen_suffix_regex = Regex::new("^(?<base>.+)(?<gen>-v1-[0-9a-f]{8})$").unwrap();
|
||||
let file_name: Cow<str> = match gen_suffix_regex.captures(value) {
|
||||
Some(captures) => captures
|
||||
.name("base")
|
||||
@@ -288,8 +287,8 @@ impl FromStr for LayerFileName {
|
||||
None => value.into(),
|
||||
};
|
||||
|
||||
let delta = DeltaFileName::parse_str(&file_name);
|
||||
let image = ImageFileName::parse_str(&file_name);
|
||||
let delta = DeltaLayerName::parse_str(&file_name);
|
||||
let image = ImageLayerName::parse_str(&file_name);
|
||||
let ok = match (delta, image) {
|
||||
(None, None) => {
|
||||
return Err(format!(
|
||||
@@ -304,7 +303,7 @@ impl FromStr for LayerFileName {
|
||||
}
|
||||
}
|
||||
|
||||
impl serde::Serialize for LayerFileName {
|
||||
impl serde::Serialize for LayerName {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
@@ -316,19 +315,19 @@ impl serde::Serialize for LayerFileName {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> serde::Deserialize<'de> for LayerFileName {
|
||||
impl<'de> serde::Deserialize<'de> for LayerName {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_string(LayerFileNameVisitor)
|
||||
deserializer.deserialize_string(LayerNameVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
struct LayerFileNameVisitor;
|
||||
struct LayerNameVisitor;
|
||||
|
||||
impl<'de> serde::de::Visitor<'de> for LayerFileNameVisitor {
|
||||
type Value = LayerFileName;
|
||||
impl<'de> serde::de::Visitor<'de> for LayerNameVisitor {
|
||||
type Value = LayerName;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
@@ -349,16 +348,16 @@ mod test {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn image_layer_parse() -> anyhow::Result<()> {
|
||||
let expected = LayerFileName::Image(ImageFileName {
|
||||
let expected = LayerName::Image(ImageLayerName {
|
||||
key_range: Key::from_i128(0)
|
||||
..Key::from_hex("000000067F00000001000004DF0000000006").unwrap(),
|
||||
lsn: Lsn::from_hex("00000000014FED58").unwrap(),
|
||||
});
|
||||
let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-00000001").map_err(|s| anyhow::anyhow!(s))?;
|
||||
let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-v1-00000001").map_err(|s| anyhow::anyhow!(s))?;
|
||||
assert_eq!(parsed, expected,);
|
||||
|
||||
// Omitting generation suffix is valid
|
||||
let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58").map_err(|s| anyhow::anyhow!(s))?;
|
||||
let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58").map_err(|s| anyhow::anyhow!(s))?;
|
||||
assert_eq!(parsed, expected,);
|
||||
|
||||
Ok(())
|
||||
@@ -366,17 +365,17 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn delta_layer_parse() -> anyhow::Result<()> {
|
||||
let expected = LayerFileName::Delta(DeltaFileName {
|
||||
let expected = LayerName::Delta(DeltaLayerName {
|
||||
key_range: Key::from_i128(0)
|
||||
..Key::from_hex("000000067F00000001000004DF0000000006").unwrap(),
|
||||
lsn_range: Lsn::from_hex("00000000014FED58").unwrap()
|
||||
..Lsn::from_hex("000000000154C481").unwrap(),
|
||||
});
|
||||
let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-00000001").map_err(|s| anyhow::anyhow!(s))?;
|
||||
let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-v1-00000001").map_err(|s| anyhow::anyhow!(s))?;
|
||||
assert_eq!(parsed, expected);
|
||||
|
||||
// Omitting generation suffix is valid
|
||||
let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481").map_err(|s| anyhow::anyhow!(s))?;
|
||||
let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481").map_err(|s| anyhow::anyhow!(s))?;
|
||||
assert_eq!(parsed, expected);
|
||||
|
||||
Ok(())
|
||||
@@ -41,7 +41,7 @@ static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy<tokio::sync::Semaphore
|
||||
tokio::sync::Semaphore::new(permits)
|
||||
});
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy, strum_macros::IntoStaticStr)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy, strum_macros::IntoStaticStr, enum_map::Enum)]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
pub(crate) enum BackgroundLoopKind {
|
||||
Compaction,
|
||||
@@ -57,19 +57,25 @@ pub(crate) enum BackgroundLoopKind {
|
||||
|
||||
impl BackgroundLoopKind {
|
||||
fn as_static_str(&self) -> &'static str {
|
||||
let s: &'static str = self.into();
|
||||
s
|
||||
self.into()
|
||||
}
|
||||
}
|
||||
|
||||
static PERMIT_GAUGES: once_cell::sync::Lazy<
|
||||
enum_map::EnumMap<BackgroundLoopKind, metrics::IntCounterPair>,
|
||||
> = once_cell::sync::Lazy::new(|| {
|
||||
enum_map::EnumMap::from_array(std::array::from_fn(|i| {
|
||||
let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
|
||||
crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE.with_label_values(&[kind.into()])
|
||||
}))
|
||||
});
|
||||
|
||||
/// Cancellation safe.
|
||||
pub(crate) async fn concurrent_background_tasks_rate_limit_permit(
|
||||
loop_kind: BackgroundLoopKind,
|
||||
_ctx: &RequestContext,
|
||||
) -> tokio::sync::SemaphorePermit<'static> {
|
||||
let _guard = crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE
|
||||
.with_label_values(&[loop_kind.as_static_str()])
|
||||
.guard();
|
||||
let _guard = PERMIT_GAUGES[loop_kind].guard();
|
||||
|
||||
pausable_failpoint!(
|
||||
"initial-size-calculation-permit-pause",
|
||||
|
||||
@@ -60,10 +60,13 @@ use std::{
|
||||
ops::ControlFlow,
|
||||
};
|
||||
|
||||
use crate::tenant::storage_layer::layer::local_layer_path;
|
||||
use crate::tenant::{
|
||||
layer_map::{LayerMap, SearchResult},
|
||||
metadata::TimelineMetadata,
|
||||
use crate::tenant::timeline::init::LocalLayerFileMetadata;
|
||||
use crate::{
|
||||
aux_file::AuxFileSizeEstimator,
|
||||
tenant::{
|
||||
layer_map::{LayerMap, SearchResult},
|
||||
metadata::TimelineMetadata,
|
||||
},
|
||||
};
|
||||
use crate::{
|
||||
context::{DownloadBehavior, RequestContext},
|
||||
@@ -75,7 +78,7 @@ use crate::{
|
||||
disk_usage_eviction_task::finite_f32,
|
||||
tenant::storage_layer::{
|
||||
AsLayerDesc, DeltaLayerWriter, EvictionError, ImageLayerWriter, InMemoryLayer, Layer,
|
||||
LayerAccessStatsReset, LayerFileName, ResidentLayer, ValueReconstructResult,
|
||||
LayerAccessStatsReset, LayerName, ResidentLayer, ValueReconstructResult,
|
||||
ValueReconstructState, ValuesReconstructState,
|
||||
},
|
||||
};
|
||||
@@ -197,7 +200,7 @@ fn drop_wlock<T>(rlock: tokio::sync::RwLockWriteGuard<'_, T>) {
|
||||
|
||||
/// The outward-facing resources required to build a Timeline
|
||||
pub struct TimelineResources {
|
||||
pub remote_client: Option<RemoteTimelineClient>,
|
||||
pub remote_client: RemoteTimelineClient,
|
||||
pub deletion_queue_client: DeletionQueueClient,
|
||||
pub timeline_get_throttle: Arc<
|
||||
crate::tenant::throttle::Throttle<&'static crate::metrics::tenant_throttling::TimelineGet>,
|
||||
@@ -269,7 +272,7 @@ pub struct Timeline {
|
||||
|
||||
/// Remote storage client.
|
||||
/// See [`remote_timeline_client`](super::remote_timeline_client) module comment for details.
|
||||
pub remote_client: Option<Arc<RemoteTimelineClient>>,
|
||||
pub remote_client: Arc<RemoteTimelineClient>,
|
||||
|
||||
// What page versions do we hold in the repository? If we get a
|
||||
// request > last_record_lsn, we need to wait until we receive all
|
||||
@@ -409,6 +412,8 @@ pub struct Timeline {
|
||||
|
||||
/// Keep aux directory cache to avoid it's reconstruction on each update
|
||||
pub(crate) aux_files: tokio::sync::Mutex<AuxFilesState>,
|
||||
|
||||
pub(crate) aux_file_size_estimator: AuxFileSizeEstimator,
|
||||
}
|
||||
|
||||
pub struct WalReceiverInfo {
|
||||
@@ -1220,11 +1225,17 @@ impl Timeline {
|
||||
}
|
||||
reconstruct_timer.stop_and_record();
|
||||
|
||||
// Note that this is an approximation. Tracking the exact number of layers visited
|
||||
// per key requires virtually unbounded memory usage and is inefficient
|
||||
// (i.e. segment tree tracking each range queried from a layer)
|
||||
crate::metrics::VEC_READ_NUM_LAYERS_VISITED
|
||||
.observe(layers_visited as f64 / results.len() as f64);
|
||||
// For aux file keys (v1 or v2) the vectored read path does not return an error
|
||||
// when they're missing. Instead they are omitted from the resulting btree
|
||||
// (this is a requirement, not a bug). Skip updating the metric in these cases
|
||||
// to avoid infinite results.
|
||||
if !results.is_empty() {
|
||||
// Note that this is an approximation. Tracking the exact number of layers visited
|
||||
// per key requires virtually unbounded memory usage and is inefficient
|
||||
// (i.e. segment tree tracking each range queried from a layer)
|
||||
crate::metrics::VEC_READ_NUM_LAYERS_VISITED
|
||||
.observe(layers_visited as f64 / results.len() as f64);
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
@@ -1364,22 +1375,14 @@ impl Timeline {
|
||||
/// not validated with control plane yet.
|
||||
/// See [`Self::get_remote_consistent_lsn_visible`].
|
||||
pub(crate) fn get_remote_consistent_lsn_projected(&self) -> Option<Lsn> {
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
remote_client.remote_consistent_lsn_projected()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
self.remote_client.remote_consistent_lsn_projected()
|
||||
}
|
||||
|
||||
/// remote_consistent_lsn which the tenant is guaranteed not to go backward from,
|
||||
/// i.e. a value of remote_consistent_lsn_projected which has undergone
|
||||
/// generation validation in the deletion queue.
|
||||
pub(crate) fn get_remote_consistent_lsn_visible(&self) -> Option<Lsn> {
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
remote_client.remote_consistent_lsn_visible()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
self.remote_client.remote_consistent_lsn_visible()
|
||||
}
|
||||
|
||||
/// The sum of the file size of all historic layers in the layer map.
|
||||
@@ -1749,16 +1752,14 @@ impl Timeline {
|
||||
match self.freeze_and_flush().await {
|
||||
Ok(_) => {
|
||||
// drain the upload queue
|
||||
if let Some(client) = self.remote_client.as_ref() {
|
||||
// if we did not wait for completion here, it might be our shutdown process
|
||||
// didn't wait for remote uploads to complete at all, as new tasks can forever
|
||||
// be spawned.
|
||||
//
|
||||
// what is problematic is the shutting down of RemoteTimelineClient, because
|
||||
// obviously it does not make sense to stop while we wait for it, but what
|
||||
// about corner cases like s3 suddenly hanging up?
|
||||
client.shutdown().await;
|
||||
}
|
||||
// if we did not wait for completion here, it might be our shutdown process
|
||||
// didn't wait for remote uploads to complete at all, as new tasks can forever
|
||||
// be spawned.
|
||||
//
|
||||
// what is problematic is the shutting down of RemoteTimelineClient, because
|
||||
// obviously it does not make sense to stop while we wait for it, but what
|
||||
// about corner cases like s3 suddenly hanging up?
|
||||
self.remote_client.shutdown().await;
|
||||
}
|
||||
Err(e) => {
|
||||
// Non-fatal. Shutdown is infallible. Failures to flush just mean that
|
||||
@@ -1774,18 +1775,16 @@ impl Timeline {
|
||||
|
||||
// Transition the remote_client into a state where it's only useful for timeline deletion.
|
||||
// (The deletion use case is why we can't just hook up remote_client to Self::cancel).)
|
||||
if let Some(remote_client) = self.remote_client.as_ref() {
|
||||
remote_client.stop();
|
||||
// As documented in remote_client.stop()'s doc comment, it's our responsibility
|
||||
// to shut down the upload queue tasks.
|
||||
// TODO: fix that, task management should be encapsulated inside remote_client.
|
||||
task_mgr::shutdown_tasks(
|
||||
Some(TaskKind::RemoteUploadTask),
|
||||
Some(self.tenant_shard_id),
|
||||
Some(self.timeline_id),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
self.remote_client.stop();
|
||||
// As documented in remote_client.stop()'s doc comment, it's our responsibility
|
||||
// to shut down the upload queue tasks.
|
||||
// TODO: fix that, task management should be encapsulated inside remote_client.
|
||||
task_mgr::shutdown_tasks(
|
||||
Some(TaskKind::RemoteUploadTask),
|
||||
Some(self.tenant_shard_id),
|
||||
Some(self.timeline_id),
|
||||
)
|
||||
.await;
|
||||
|
||||
// TODO: work toward making this a no-op. See this funciton's doc comment for more context.
|
||||
tracing::debug!("Waiting for tasks...");
|
||||
@@ -1905,16 +1904,12 @@ impl Timeline {
|
||||
#[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
|
||||
pub(crate) async fn download_layer(
|
||||
&self,
|
||||
layer_file_name: &LayerFileName,
|
||||
layer_file_name: &LayerName,
|
||||
) -> anyhow::Result<Option<bool>> {
|
||||
let Some(layer) = self.find_layer(layer_file_name).await else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
if self.remote_client.is_none() {
|
||||
return Ok(Some(false));
|
||||
}
|
||||
|
||||
layer.download().await?;
|
||||
|
||||
Ok(Some(true))
|
||||
@@ -1925,7 +1920,7 @@ impl Timeline {
|
||||
/// Returns `Ok(None)` in the case where the layer could not be found by its `layer_file_name`.
|
||||
pub(crate) async fn evict_layer(
|
||||
&self,
|
||||
layer_file_name: &LayerFileName,
|
||||
layer_file_name: &LayerName,
|
||||
) -> anyhow::Result<Option<bool>> {
|
||||
let _gate = self
|
||||
.gate
|
||||
@@ -2155,6 +2150,16 @@ impl Timeline {
|
||||
};
|
||||
|
||||
Arc::new_cyclic(|myself| {
|
||||
let metrics = TimelineMetrics::new(
|
||||
&tenant_shard_id,
|
||||
&timeline_id,
|
||||
crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
|
||||
"mtime",
|
||||
evictions_low_residence_duration_metric_threshold,
|
||||
),
|
||||
);
|
||||
let aux_file_metrics = metrics.aux_file_size_gauge.clone();
|
||||
|
||||
let mut result = Timeline {
|
||||
conf,
|
||||
tenant_conf,
|
||||
@@ -2169,7 +2174,7 @@ impl Timeline {
|
||||
walredo_mgr,
|
||||
walreceiver: Mutex::new(None),
|
||||
|
||||
remote_client: resources.remote_client.map(Arc::new),
|
||||
remote_client: Arc::new(resources.remote_client),
|
||||
|
||||
// initialize in-memory 'last_record_lsn' from 'disk_consistent_lsn'.
|
||||
last_record_lsn: SeqWait::new(RecordLsn {
|
||||
@@ -2186,14 +2191,7 @@ impl Timeline {
|
||||
ancestor_timeline: ancestor,
|
||||
ancestor_lsn: metadata.ancestor_lsn(),
|
||||
|
||||
metrics: TimelineMetrics::new(
|
||||
&tenant_shard_id,
|
||||
&timeline_id,
|
||||
crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
|
||||
"mtime",
|
||||
evictions_low_residence_duration_metric_threshold,
|
||||
),
|
||||
),
|
||||
metrics,
|
||||
|
||||
query_metrics: crate::metrics::SmgrQueryTimePerTimeline::new(
|
||||
&tenant_shard_id,
|
||||
@@ -2257,6 +2255,8 @@ impl Timeline {
|
||||
dir: None,
|
||||
n_deltas: 0,
|
||||
}),
|
||||
|
||||
aux_file_size_estimator: AuxFileSizeEstimator::new(aux_file_metrics),
|
||||
};
|
||||
result.repartition_threshold =
|
||||
result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE;
|
||||
@@ -2387,13 +2387,13 @@ impl Timeline {
|
||||
index_part: Option<IndexPart>,
|
||||
) -> anyhow::Result<()> {
|
||||
use init::{Decision::*, Discovered, DismissedLayer};
|
||||
use LayerFileName::*;
|
||||
use LayerName::*;
|
||||
|
||||
let mut guard = self.layers.write().await;
|
||||
|
||||
let timer = self.metrics.load_layer_map_histo.start_timer();
|
||||
|
||||
// Scan timeline directory and create ImageFileName and DeltaFilename
|
||||
// Scan timeline directory and create ImageLayerName and DeltaFilename
|
||||
// structs representing all files on disk
|
||||
let timeline_path = self
|
||||
.conf
|
||||
@@ -2421,10 +2421,6 @@ impl Timeline {
|
||||
discovered_layers.push((layer_file_name, local_path, file_size));
|
||||
continue;
|
||||
}
|
||||
Discovered::Metadata => {
|
||||
warn!("found legacy metadata file, these should have been removed in load_tenant_config");
|
||||
continue;
|
||||
}
|
||||
Discovered::IgnoredBackup => {
|
||||
continue;
|
||||
}
|
||||
@@ -2463,33 +2459,37 @@ impl Timeline {
|
||||
let mut needs_cleanup = Vec::new();
|
||||
let mut total_physical_size = 0;
|
||||
|
||||
for (name, local_path, decision) in decided {
|
||||
for (name, decision) in decided {
|
||||
let decision = match decision {
|
||||
Ok(UseRemote { local, remote }) => {
|
||||
// Remote is authoritative, but we may still choose to retain
|
||||
// the local file if the contents appear to match
|
||||
if local.file_size() == remote.file_size() {
|
||||
if local.metadata.file_size() == remote.file_size() {
|
||||
// Use the local file, but take the remote metadata so that we pick up
|
||||
// the correct generation.
|
||||
UseLocal(remote)
|
||||
UseLocal(LocalLayerFileMetadata {
|
||||
metadata: remote,
|
||||
local_path: local.local_path,
|
||||
})
|
||||
} else {
|
||||
let local_path = local_path.as_ref().expect("Locally found layer must have path");
|
||||
init::cleanup_local_file_for_remote(local_path, &local, &remote)?;
|
||||
init::cleanup_local_file_for_remote(&local, &remote)?;
|
||||
UseRemote { local, remote }
|
||||
}
|
||||
}
|
||||
Ok(decision) => decision,
|
||||
Err(DismissedLayer::Future { local }) => {
|
||||
if local.is_some() {
|
||||
let local_path = local_path.expect("Locally found layer must have path");
|
||||
init::cleanup_future_layer(&local_path, &name, disk_consistent_lsn)?;
|
||||
if let Some(local) = local {
|
||||
init::cleanup_future_layer(
|
||||
&local.local_path,
|
||||
&name,
|
||||
disk_consistent_lsn,
|
||||
)?;
|
||||
}
|
||||
needs_cleanup.push(name);
|
||||
continue;
|
||||
}
|
||||
Err(DismissedLayer::LocalOnly(local)) => {
|
||||
let local_path = local_path.expect("Locally found layer must have path");
|
||||
init::cleanup_local_only_file(&local_path, &name, &local)?;
|
||||
init::cleanup_local_only_file(&name, &local)?;
|
||||
// this file never existed remotely, we will have to do rework
|
||||
continue;
|
||||
}
|
||||
@@ -2503,20 +2503,10 @@ impl Timeline {
|
||||
tracing::debug!(layer=%name, ?decision, "applied");
|
||||
|
||||
let layer = match decision {
|
||||
UseLocal(m) => {
|
||||
total_physical_size += m.file_size();
|
||||
|
||||
let local_path = local_path.unwrap_or_else(|| {
|
||||
local_layer_path(
|
||||
conf,
|
||||
&this.tenant_shard_id,
|
||||
&this.timeline_id,
|
||||
&name,
|
||||
&m.generation,
|
||||
)
|
||||
});
|
||||
|
||||
Layer::for_resident(conf, &this, local_path, name, m).drop_eviction_guard()
|
||||
UseLocal(local) => {
|
||||
total_physical_size += local.metadata.file_size();
|
||||
Layer::for_resident(conf, &this, local.local_path, name, local.metadata)
|
||||
.drop_eviction_guard()
|
||||
}
|
||||
Evicted(remote) | UseRemote { remote, .. } => {
|
||||
Layer::for_evicted(conf, &this, name, remote)
|
||||
@@ -2536,36 +2526,36 @@ impl Timeline {
|
||||
|
||||
guard.initialize_local_layers(loaded_layers, disk_consistent_lsn + 1);
|
||||
|
||||
if let Some(rtc) = self.remote_client.as_ref() {
|
||||
rtc.schedule_layer_file_deletion(&needs_cleanup)?;
|
||||
rtc.schedule_index_upload_for_file_changes()?;
|
||||
// This barrier orders above DELETEs before any later operations.
|
||||
// This is critical because code executing after the barrier might
|
||||
// create again objects with the same key that we just scheduled for deletion.
|
||||
// For example, if we just scheduled deletion of an image layer "from the future",
|
||||
// later compaction might run again and re-create the same image layer.
|
||||
// "from the future" here means an image layer whose LSN is > IndexPart::disk_consistent_lsn.
|
||||
// "same" here means same key range and LSN.
|
||||
//
|
||||
// Without a barrier between above DELETEs and the re-creation's PUTs,
|
||||
// the upload queue may execute the PUT first, then the DELETE.
|
||||
// In our example, we will end up with an IndexPart referencing a non-existent object.
|
||||
//
|
||||
// 1. a future image layer is created and uploaded
|
||||
// 2. ps restart
|
||||
// 3. the future layer from (1) is deleted during load layer map
|
||||
// 4. image layer is re-created and uploaded
|
||||
// 5. deletion queue would like to delete (1) but actually deletes (4)
|
||||
// 6. delete by name works as expected, but it now deletes the wrong (later) version
|
||||
//
|
||||
// See https://github.com/neondatabase/neon/issues/5878
|
||||
//
|
||||
// NB: generation numbers naturally protect against this because they disambiguate
|
||||
// (1) and (4)
|
||||
rtc.schedule_barrier()?;
|
||||
// Tenant::create_timeline will wait for these uploads to happen before returning, or
|
||||
// on retry.
|
||||
}
|
||||
self.remote_client
|
||||
.schedule_layer_file_deletion(&needs_cleanup)?;
|
||||
self.remote_client
|
||||
.schedule_index_upload_for_file_changes()?;
|
||||
// This barrier orders above DELETEs before any later operations.
|
||||
// This is critical because code executing after the barrier might
|
||||
// create again objects with the same key that we just scheduled for deletion.
|
||||
// For example, if we just scheduled deletion of an image layer "from the future",
|
||||
// later compaction might run again and re-create the same image layer.
|
||||
// "from the future" here means an image layer whose LSN is > IndexPart::disk_consistent_lsn.
|
||||
// "same" here means same key range and LSN.
|
||||
//
|
||||
// Without a barrier between above DELETEs and the re-creation's PUTs,
|
||||
// the upload queue may execute the PUT first, then the DELETE.
|
||||
// In our example, we will end up with an IndexPart referencing a non-existent object.
|
||||
//
|
||||
// 1. a future image layer is created and uploaded
|
||||
// 2. ps restart
|
||||
// 3. the future layer from (1) is deleted during load layer map
|
||||
// 4. image layer is re-created and uploaded
|
||||
// 5. deletion queue would like to delete (1) but actually deletes (4)
|
||||
// 6. delete by name works as expected, but it now deletes the wrong (later) version
|
||||
//
|
||||
// See https://github.com/neondatabase/neon/issues/5878
|
||||
//
|
||||
// NB: generation numbers naturally protect against this because they disambiguate
|
||||
// (1) and (4)
|
||||
self.remote_client.schedule_barrier()?;
|
||||
// Tenant::create_timeline will wait for these uploads to happen before returning, or
|
||||
// on retry.
|
||||
|
||||
info!(
|
||||
"loaded layer map with {} layers at {}, total physical size: {}",
|
||||
@@ -2624,6 +2614,7 @@ impl Timeline {
|
||||
// Don't make noise.
|
||||
} else {
|
||||
warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work");
|
||||
debug_assert!(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -2997,10 +2988,10 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
async fn find_layer(&self, layer_name: &LayerFileName) -> Option<Layer> {
|
||||
async fn find_layer(&self, layer_name: &LayerName) -> Option<Layer> {
|
||||
let guard = self.layers.read().await;
|
||||
for historic_layer in guard.layer_map().iter_historic_layers() {
|
||||
let historic_layer_name = historic_layer.filename();
|
||||
let historic_layer_name = historic_layer.layer_name();
|
||||
if layer_name == &historic_layer_name {
|
||||
return Some(guard.get_from_desc(&historic_layer));
|
||||
}
|
||||
@@ -3017,9 +3008,6 @@ impl Timeline {
|
||||
/// should treat this as a cue to simply skip doing any heatmap uploading
|
||||
/// for this timeline.
|
||||
pub(crate) async fn generate_heatmap(&self) -> Option<HeatMapTimeline> {
|
||||
// no point in heatmaps without remote client
|
||||
let _remote_client = self.remote_client.as_ref()?;
|
||||
|
||||
if !self.is_active() {
|
||||
return None;
|
||||
}
|
||||
@@ -3030,7 +3018,7 @@ impl Timeline {
|
||||
let last_activity_ts = layer.access_stats().latest_activity_or_now();
|
||||
|
||||
HeatMapLayer::new(
|
||||
layer.layer_desc().filename(),
|
||||
layer.layer_desc().layer_name(),
|
||||
(&layer.metadata()).into(),
|
||||
last_activity_ts,
|
||||
)
|
||||
@@ -3040,6 +3028,15 @@ impl Timeline {
|
||||
|
||||
Some(HeatMapTimeline::new(self.timeline_id, layers))
|
||||
}
|
||||
|
||||
/// Returns true if the given lsn is or was an ancestor branchpoint.
|
||||
pub(crate) fn is_ancestor_lsn(&self, lsn: Lsn) -> bool {
|
||||
// upon timeline detach, we set the ancestor_lsn to Lsn::INVALID and the store the original
|
||||
// branchpoint in the value in IndexPart::lineage
|
||||
self.ancestor_lsn == lsn
|
||||
|| (self.ancestor_lsn == Lsn::INVALID
|
||||
&& self.remote_client.is_previous_ancestor_lsn(lsn))
|
||||
}
|
||||
}
|
||||
|
||||
type TraversalId = Arc<str>;
|
||||
@@ -3177,7 +3174,7 @@ impl Timeline {
|
||||
if let Some(open_layer) = &layers.open_layer {
|
||||
let start_lsn = open_layer.get_lsn_range().start;
|
||||
if cont_lsn > start_lsn {
|
||||
//info!("CHECKING for {} at {} on open layer {}", key, cont_lsn, open_layer.filename().display());
|
||||
//info!("CHECKING for {} at {} on open layer {}", key, cont_lsn, open_layer.layer_name().display());
|
||||
// Get all the data needed to reconstruct the page version from this layer.
|
||||
// But if we have an older cached page image, no need to go past that.
|
||||
let lsn_floor = max(cached_lsn + 1, start_lsn);
|
||||
@@ -3206,7 +3203,7 @@ impl Timeline {
|
||||
for frozen_layer in layers.frozen_layers.iter().rev() {
|
||||
let start_lsn = frozen_layer.get_lsn_range().start;
|
||||
if cont_lsn > start_lsn {
|
||||
//info!("CHECKING for {} at {} on frozen layer {}", key, cont_lsn, frozen_layer.filename().display());
|
||||
//info!("CHECKING for {} at {} on frozen layer {}", key, cont_lsn, frozen_layer.layer_name().display());
|
||||
let lsn_floor = max(cached_lsn + 1, start_lsn);
|
||||
|
||||
let frozen_layer = frozen_layer.clone();
|
||||
@@ -3551,7 +3548,11 @@ impl Timeline {
|
||||
///
|
||||
/// Get a handle to the latest layer for appending.
|
||||
///
|
||||
async fn get_layer_for_write(&self, lsn: Lsn) -> anyhow::Result<Arc<InMemoryLayer>> {
|
||||
async fn get_layer_for_write(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<InMemoryLayer>> {
|
||||
let mut guard = self.layers.write().await;
|
||||
let layer = guard
|
||||
.get_layer_for_write(
|
||||
@@ -3560,6 +3561,7 @@ impl Timeline {
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_shard_id,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
Ok(layer)
|
||||
@@ -3824,8 +3826,8 @@ impl Timeline {
|
||||
);
|
||||
self.create_delta_layer(
|
||||
&frozen_layer,
|
||||
ctx,
|
||||
Some(metadata_keyspace.0.ranges[0].clone()),
|
||||
ctx,
|
||||
)
|
||||
.await?
|
||||
} else {
|
||||
@@ -3854,7 +3856,7 @@ impl Timeline {
|
||||
// Normal case, write out a L0 delta layer file.
|
||||
// `create_delta_layer` will not modify the layer map.
|
||||
// We will remove frozen layer and add delta layer in one atomic operation later.
|
||||
let Some(layer) = self.create_delta_layer(&frozen_layer, ctx, None).await? else {
|
||||
let Some(layer) = self.create_delta_layer(&frozen_layer, None, ctx).await? else {
|
||||
panic!("delta layer cannot be empty if no filter is applied");
|
||||
};
|
||||
(
|
||||
@@ -3953,29 +3955,23 @@ impl Timeline {
|
||||
x.unwrap()
|
||||
));
|
||||
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
for layer in layers_to_upload {
|
||||
remote_client.schedule_layer_file_upload(layer)?;
|
||||
}
|
||||
remote_client.schedule_index_upload_for_metadata_update(&update)?;
|
||||
for layer in layers_to_upload {
|
||||
self.remote_client.schedule_layer_file_upload(layer)?;
|
||||
}
|
||||
self.remote_client
|
||||
.schedule_index_upload_for_metadata_update(&update)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn preserve_initdb_archive(&self) -> anyhow::Result<()> {
|
||||
if let Some(remote_client) = &self.remote_client {
|
||||
remote_client
|
||||
.preserve_initdb_archive(
|
||||
&self.tenant_shard_id.tenant_id,
|
||||
&self.timeline_id,
|
||||
&self.cancel,
|
||||
)
|
||||
.await?;
|
||||
} else {
|
||||
bail!("No remote storage configured, but was asked to backup the initdb archive for {} / {}", self.tenant_shard_id.tenant_id, self.timeline_id);
|
||||
}
|
||||
Ok(())
|
||||
self.remote_client
|
||||
.preserve_initdb_archive(
|
||||
&self.tenant_shard_id.tenant_id,
|
||||
&self.timeline_id,
|
||||
&self.cancel,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
// Write out the given frozen in-memory layer as a new L0 delta file. This L0 file will not be tracked
|
||||
@@ -3983,8 +3979,8 @@ impl Timeline {
|
||||
async fn create_delta_layer(
|
||||
self: &Arc<Self>,
|
||||
frozen_layer: &Arc<InMemoryLayer>,
|
||||
ctx: &RequestContext,
|
||||
key_range: Option<Range<Key>>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Option<ResidentLayer>> {
|
||||
let self_clone = Arc::clone(self);
|
||||
let frozen_layer = Arc::clone(frozen_layer);
|
||||
@@ -4007,6 +4003,7 @@ impl Timeline {
|
||||
&self_clone
|
||||
.conf
|
||||
.timeline_path(&self_clone.tenant_shard_id, &self_clone.timeline_id),
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.fatal_err("VirtualFile::open for timeline dir fsync");
|
||||
@@ -4200,6 +4197,7 @@ impl Timeline {
|
||||
self.tenant_shard_id,
|
||||
&img_range,
|
||||
lsn,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -4233,7 +4231,7 @@ impl Timeline {
|
||||
|
||||
// Maybe flush `key_rest_accum`
|
||||
if key_request_accum.raw_size() >= Timeline::MAX_GET_VECTORED_KEYS
|
||||
|| last_key_in_range
|
||||
|| (last_key_in_range && key_request_accum.raw_size() > 0)
|
||||
{
|
||||
let results = self
|
||||
.get_vectored(key_request_accum.consume_keyspace(), lsn, ctx)
|
||||
@@ -4304,6 +4302,7 @@ impl Timeline {
|
||||
&self
|
||||
.conf
|
||||
.timeline_path(&self.tenant_shard_id, &self.timeline_id),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.fatal_err("VirtualFile::open for timeline dir fsync");
|
||||
@@ -4328,6 +4327,16 @@ impl Timeline {
|
||||
/// this Timeline is shut down. Calling this function will cause the initial
|
||||
/// logical size calculation to skip waiting for the background jobs barrier.
|
||||
pub(crate) async fn await_initial_logical_size(self: Arc<Self>) {
|
||||
if !self.shard_identity.is_shard_zero() {
|
||||
// We don't populate logical size on shard >0: skip waiting for it.
|
||||
return;
|
||||
}
|
||||
|
||||
if self.remote_client.is_deleting() {
|
||||
// The timeline was created in a deletion-resume state, we don't expect logical size to be populated
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(await_bg_cancel) = self
|
||||
.current_logical_size
|
||||
.cancel_wait_for_background_loop_concurrency_limit_semaphore
|
||||
@@ -4339,9 +4348,10 @@ impl Timeline {
|
||||
// the logical size cancellation to skip the concurrency limit semaphore.
|
||||
// TODO: this is an unexpected case. We should restructure so that it
|
||||
// can't happen.
|
||||
tracing::info!(
|
||||
tracing::warn!(
|
||||
"await_initial_logical_size: can't get semaphore cancel token, skipping"
|
||||
);
|
||||
debug_assert!(false);
|
||||
}
|
||||
|
||||
tokio::select!(
|
||||
@@ -4357,7 +4367,6 @@ impl Timeline {
|
||||
/// - has an ancestor to detach from
|
||||
/// - the ancestor does not have an ancestor -- follows from the original RFC limitations, not
|
||||
/// a technical requirement
|
||||
/// - has prev_lsn in remote storage (temporary restriction)
|
||||
///
|
||||
/// After the operation has been started, it cannot be canceled. Upon restart it needs to be
|
||||
/// polled again until completion.
|
||||
@@ -4491,9 +4500,8 @@ impl Timeline {
|
||||
// deletion will happen later, the layer file manager calls garbage_collect_on_drop
|
||||
guard.finish_compact_l0(&remove_layers, &insert_layers, &self.metrics);
|
||||
|
||||
if let Some(remote_client) = self.remote_client.as_ref() {
|
||||
remote_client.schedule_compaction_update(&remove_layers, new_deltas)?;
|
||||
}
|
||||
self.remote_client
|
||||
.schedule_compaction_update(&remove_layers, new_deltas)?;
|
||||
|
||||
drop_wlock(guard);
|
||||
|
||||
@@ -4511,9 +4519,8 @@ impl Timeline {
|
||||
|
||||
let upload_layers: Vec<_> = replace_layers.into_iter().map(|r| r.1).collect();
|
||||
|
||||
if let Some(remote_client) = self.remote_client.as_ref() {
|
||||
remote_client.schedule_compaction_update(&drop_layers, &upload_layers)?;
|
||||
}
|
||||
self.remote_client
|
||||
.schedule_compaction_update(&drop_layers, &upload_layers)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -4523,16 +4530,14 @@ impl Timeline {
|
||||
self: &Arc<Self>,
|
||||
new_images: impl IntoIterator<Item = ResidentLayer>,
|
||||
) -> anyhow::Result<()> {
|
||||
let Some(remote_client) = &self.remote_client else {
|
||||
return Ok(());
|
||||
};
|
||||
for layer in new_images {
|
||||
remote_client.schedule_layer_file_upload(layer)?;
|
||||
self.remote_client.schedule_layer_file_upload(layer)?;
|
||||
}
|
||||
// should any new image layer been created, not uploading index_part will
|
||||
// result in a mismatch between remote_physical_size and layermap calculated
|
||||
// size, which will fail some tests, but should not be an issue otherwise.
|
||||
remote_client.schedule_index_upload_for_file_changes()?;
|
||||
self.remote_client
|
||||
.schedule_index_upload_for_file_changes()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -4630,11 +4635,9 @@ impl Timeline {
|
||||
pub(super) async fn gc(&self) -> anyhow::Result<GcResult> {
|
||||
// this is most likely the background tasks, but it might be the spawned task from
|
||||
// immediate_gc
|
||||
let cancel = crate::task_mgr::shutdown_token();
|
||||
let _g = tokio::select! {
|
||||
guard = self.gc_lock.lock() => guard,
|
||||
_ = self.cancel.cancelled() => return Ok(GcResult::default()),
|
||||
_ = cancel.cancelled() => return Ok(GcResult::default()),
|
||||
};
|
||||
let timer = self.metrics.garbage_collect_histo.start_timer();
|
||||
|
||||
@@ -4731,7 +4734,7 @@ impl Timeline {
|
||||
if l.get_lsn_range().end > horizon_cutoff {
|
||||
debug!(
|
||||
"keeping {} because it's newer than horizon_cutoff {}",
|
||||
l.filename(),
|
||||
l.layer_name(),
|
||||
horizon_cutoff,
|
||||
);
|
||||
result.layers_needed_by_cutoff += 1;
|
||||
@@ -4742,7 +4745,7 @@ impl Timeline {
|
||||
if l.get_lsn_range().end > pitr_cutoff {
|
||||
debug!(
|
||||
"keeping {} because it's newer than pitr_cutoff {}",
|
||||
l.filename(),
|
||||
l.layer_name(),
|
||||
pitr_cutoff,
|
||||
);
|
||||
result.layers_needed_by_pitr += 1;
|
||||
@@ -4761,7 +4764,7 @@ impl Timeline {
|
||||
if &l.get_lsn_range().start <= retain_lsn {
|
||||
debug!(
|
||||
"keeping {} because it's still might be referenced by child branch forked at {} is_dropped: xx is_incremental: {}",
|
||||
l.filename(),
|
||||
l.layer_name(),
|
||||
retain_lsn,
|
||||
l.is_incremental(),
|
||||
);
|
||||
@@ -4792,7 +4795,7 @@ impl Timeline {
|
||||
if !layers
|
||||
.image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))
|
||||
{
|
||||
debug!("keeping {} because it is the latest layer", l.filename());
|
||||
debug!("keeping {} because it is the latest layer", l.layer_name());
|
||||
result.layers_not_updated += 1;
|
||||
continue 'outer;
|
||||
}
|
||||
@@ -4800,7 +4803,7 @@ impl Timeline {
|
||||
// We didn't find any reason to keep this file, so remove it.
|
||||
debug!(
|
||||
"garbage collecting {} is_dropped: xx is_incremental: {}",
|
||||
l.filename(),
|
||||
l.layer_name(),
|
||||
l.is_incremental(),
|
||||
);
|
||||
layers_to_remove.push(l);
|
||||
@@ -4820,9 +4823,7 @@ impl Timeline {
|
||||
|
||||
result.layers_removed = gc_layers.len() as u64;
|
||||
|
||||
if let Some(remote_client) = self.remote_client.as_ref() {
|
||||
remote_client.schedule_gc_update(&gc_layers)?;
|
||||
}
|
||||
self.remote_client.schedule_gc_update(&gc_layers)?;
|
||||
|
||||
guard.finish_gc_timeline(&gc_layers);
|
||||
|
||||
@@ -5206,7 +5207,7 @@ impl<'a> TimelineWriter<'a> {
|
||||
let buf_size: u64 = buf.len().try_into().expect("oversized value buf");
|
||||
|
||||
let action = self.get_open_layer_action(lsn, buf_size);
|
||||
let layer = self.handle_open_layer_action(lsn, action).await?;
|
||||
let layer = self.handle_open_layer_action(lsn, action, ctx).await?;
|
||||
let res = layer.put_value(key, lsn, &buf, ctx).await;
|
||||
|
||||
if res.is_ok() {
|
||||
@@ -5229,14 +5230,15 @@ impl<'a> TimelineWriter<'a> {
|
||||
&mut self,
|
||||
at: Lsn,
|
||||
action: OpenLayerAction,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<&Arc<InMemoryLayer>> {
|
||||
match action {
|
||||
OpenLayerAction::Roll => {
|
||||
let freeze_at = self.write_guard.as_ref().unwrap().max_lsn.unwrap();
|
||||
self.roll_layer(freeze_at).await?;
|
||||
self.open_layer(at).await?;
|
||||
self.open_layer(at, ctx).await?;
|
||||
}
|
||||
OpenLayerAction::Open => self.open_layer(at).await?,
|
||||
OpenLayerAction::Open => self.open_layer(at, ctx).await?,
|
||||
OpenLayerAction::None => {
|
||||
assert!(self.write_guard.is_some());
|
||||
}
|
||||
@@ -5245,8 +5247,8 @@ impl<'a> TimelineWriter<'a> {
|
||||
Ok(&self.write_guard.as_ref().unwrap().open_layer)
|
||||
}
|
||||
|
||||
async fn open_layer(&mut self, at: Lsn) -> anyhow::Result<()> {
|
||||
let layer = self.tl.get_layer_for_write(at).await?;
|
||||
async fn open_layer(&mut self, at: Lsn, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
let layer = self.tl.get_layer_for_write(at, ctx).await?;
|
||||
let initial_size = layer.size().await?;
|
||||
|
||||
let last_freeze_at = self.last_freeze_at.load();
|
||||
@@ -5323,10 +5325,14 @@ impl<'a> TimelineWriter<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn delete_batch(&mut self, batch: &[(Range<Key>, Lsn)]) -> anyhow::Result<()> {
|
||||
pub(crate) async fn delete_batch(
|
||||
&mut self,
|
||||
batch: &[(Range<Key>, Lsn)],
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
if let Some((_, lsn)) = batch.first() {
|
||||
let action = self.get_open_layer_action(*lsn, 0);
|
||||
let layer = self.handle_open_layer_action(*lsn, action).await?;
|
||||
let layer = self.handle_open_layer_action(*lsn, action, ctx).await?;
|
||||
layer.put_tombstones(batch).await?;
|
||||
}
|
||||
|
||||
|
||||
@@ -295,13 +295,11 @@ impl Timeline {
|
||||
// Update the LayerMap so that readers will use the new layers, and enqueue it for writing to remote storage
|
||||
self.rewrite_layers(replace_layers, drop_layers).await?;
|
||||
|
||||
if let Some(remote_client) = self.remote_client.as_ref() {
|
||||
// We wait for all uploads to complete before finishing this compaction stage. This is not
|
||||
// necessary for correctness, but it simplifies testing, and avoids proceeding with another
|
||||
// Timeline's compaction while this timeline's uploads may be generating lots of disk I/O
|
||||
// load.
|
||||
remote_client.wait_completion().await?;
|
||||
}
|
||||
// We wait for all uploads to complete before finishing this compaction stage. This is not
|
||||
// necessary for correctness, but it simplifies testing, and avoids proceeding with another
|
||||
// Timeline's compaction while this timeline's uploads may be generating lots of disk I/O
|
||||
// load.
|
||||
self.remote_client.wait_completion().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -700,6 +698,7 @@ impl Timeline {
|
||||
debug!("Create new layer {}..{}", lsn_range.start, lsn_range.end);
|
||||
lsn_range.clone()
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
@@ -755,6 +754,7 @@ impl Timeline {
|
||||
&self
|
||||
.conf
|
||||
.timeline_path(&self.tenant_shard_id, &self.timeline_id),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.fatal_err("VirtualFile::open for timeline dir fsync");
|
||||
@@ -1093,6 +1093,7 @@ impl CompactionJobExecutor for TimelineAdaptor {
|
||||
self.timeline.tenant_shard_id,
|
||||
key_range.start,
|
||||
lsn_range.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -1167,6 +1168,7 @@ impl TimelineAdaptor {
|
||||
self.timeline.tenant_shard_id,
|
||||
key_range,
|
||||
lsn,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -26,19 +26,21 @@ use super::{Timeline, TimelineResources};
|
||||
/// during attach or pageserver restart.
|
||||
/// See comment in persist_index_part_with_deleted_flag.
|
||||
async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
|
||||
if let Some(remote_client) = timeline.remote_client.as_ref() {
|
||||
match remote_client.persist_index_part_with_deleted_flag().await {
|
||||
// If we (now, or already) marked it successfully as deleted, we can proceed
|
||||
Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
|
||||
// Bail out otherwise
|
||||
//
|
||||
// AlreadyInProgress shouldn't happen, because the 'delete_lock' prevents
|
||||
// two tasks from performing the deletion at the same time. The first task
|
||||
// that starts deletion should run it to completion.
|
||||
Err(e @ PersistIndexPartWithDeletedFlagError::AlreadyInProgress(_))
|
||||
| Err(e @ PersistIndexPartWithDeletedFlagError::Other(_)) => {
|
||||
return Err(DeleteTimelineError::Other(anyhow::anyhow!(e)));
|
||||
}
|
||||
match timeline
|
||||
.remote_client
|
||||
.persist_index_part_with_deleted_flag()
|
||||
.await
|
||||
{
|
||||
// If we (now, or already) marked it successfully as deleted, we can proceed
|
||||
Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
|
||||
// Bail out otherwise
|
||||
//
|
||||
// AlreadyInProgress shouldn't happen, because the 'delete_lock' prevents
|
||||
// two tasks from performing the deletion at the same time. The first task
|
||||
// that starts deletion should run it to completion.
|
||||
Err(e @ PersistIndexPartWithDeletedFlagError::AlreadyInProgress(_))
|
||||
| Err(e @ PersistIndexPartWithDeletedFlagError::Other(_)) => {
|
||||
return Err(DeleteTimelineError::Other(anyhow::anyhow!(e)));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -117,11 +119,11 @@ pub(super) async fn delete_local_timeline_directory(
|
||||
|
||||
/// Removes remote layers and an index file after them.
|
||||
async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<()> {
|
||||
if let Some(remote_client) = &timeline.remote_client {
|
||||
remote_client.delete_all().await.context("delete_all")?
|
||||
};
|
||||
|
||||
Ok(())
|
||||
timeline
|
||||
.remote_client
|
||||
.delete_all()
|
||||
.await
|
||||
.context("delete_all")
|
||||
}
|
||||
|
||||
// This function removs remaining traces of a timeline on disk.
|
||||
@@ -260,7 +262,7 @@ impl DeleteTimelineFlow {
|
||||
tenant: Arc<Tenant>,
|
||||
timeline_id: TimelineId,
|
||||
local_metadata: &TimelineMetadata,
|
||||
remote_client: Option<RemoteTimelineClient>,
|
||||
remote_client: RemoteTimelineClient,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
) -> anyhow::Result<()> {
|
||||
// Note: here we even skip populating layer map. Timeline is essentially uninitialized.
|
||||
|
||||
@@ -22,8 +22,6 @@ pub(crate) enum Error {
|
||||
TooManyAncestors,
|
||||
#[error("shutting down, please retry later")]
|
||||
ShuttingDown,
|
||||
#[error("detached timeline must receive writes before the operation")]
|
||||
DetachedTimelineNeedsWrites,
|
||||
#[error("flushing failed")]
|
||||
FlushAncestor(#[source] anyhow::Error),
|
||||
#[error("layer download failed")]
|
||||
@@ -72,10 +70,6 @@ pub(super) async fn prepare(
|
||||
) -> Result<(completion::Completion, PreparedTimelineDetach), Error> {
|
||||
use Error::*;
|
||||
|
||||
if detached.remote_client.as_ref().is_none() {
|
||||
unimplemented!("no new code for running without remote storage");
|
||||
}
|
||||
|
||||
let Some((ancestor, ancestor_lsn)) = detached
|
||||
.ancestor_timeline
|
||||
.as_ref()
|
||||
@@ -94,14 +88,6 @@ pub(super) async fn prepare(
|
||||
return Err(TooManyAncestors);
|
||||
}
|
||||
|
||||
if detached.get_prev_record_lsn() == Lsn::INVALID
|
||||
|| detached.disk_consistent_lsn.load() == ancestor_lsn
|
||||
{
|
||||
// this is to avoid a problem that after detaching we would be unable to start up the
|
||||
// compute because of "PREV_LSN: invalid".
|
||||
return Err(DetachedTimelineNeedsWrites);
|
||||
}
|
||||
|
||||
// before we acquire the gate, we must mark the ancestor as having a detach operation
|
||||
// ongoing which will block other concurrent detach operations so we don't get to ackward
|
||||
// situations where there would be two branches trying to reparent earlier branches.
|
||||
@@ -225,6 +211,7 @@ pub(super) async fn prepare(
|
||||
&detached
|
||||
.conf
|
||||
.timeline_path(&detached.tenant_shard_id, &detached.timeline_id),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.fatal_err("VirtualFile::open for timeline dir fsync");
|
||||
@@ -324,8 +311,6 @@ async fn upload_rewritten_layer(
|
||||
// FIXME: better shuttingdown error
|
||||
target
|
||||
.remote_client
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.upload_layer_file(&copied, cancel)
|
||||
.await
|
||||
.map_err(UploadRewritten)?;
|
||||
@@ -349,6 +334,7 @@ async fn copy_lsn_prefix(
|
||||
target_timeline.tenant_shard_id,
|
||||
layer.layer_desc().key_range.start,
|
||||
layer.layer_desc().lsn_range.start..end_lsn,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.map_err(CopyDeltaPrefix)?;
|
||||
@@ -407,15 +393,13 @@ async fn remote_copy(
|
||||
let owned = crate::tenant::storage_layer::Layer::for_evicted(
|
||||
adoptee.conf,
|
||||
adoptee,
|
||||
adopted.layer_desc().filename(),
|
||||
adopted.layer_desc().layer_name(),
|
||||
metadata,
|
||||
);
|
||||
|
||||
// FIXME: better shuttingdown error
|
||||
adoptee
|
||||
.remote_client
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.copy_timeline_layer(adopted, &owned, cancel)
|
||||
.await
|
||||
.map(move |()| owned)
|
||||
@@ -429,11 +413,6 @@ pub(super) async fn complete(
|
||||
prepared: PreparedTimelineDetach,
|
||||
_ctx: &RequestContext,
|
||||
) -> Result<Vec<TimelineId>, anyhow::Error> {
|
||||
let rtc = detached
|
||||
.remote_client
|
||||
.as_ref()
|
||||
.expect("has to have a remote timeline client for timeline ancestor detach");
|
||||
|
||||
let PreparedTimelineDetach { layers } = prepared;
|
||||
|
||||
let ancestor = detached
|
||||
@@ -450,11 +429,13 @@ pub(super) async fn complete(
|
||||
//
|
||||
// this is not perfect, but it avoids us a retry happening after a compaction or gc on restart
|
||||
// which could give us a completely wrong layer combination.
|
||||
rtc.schedule_adding_existing_layers_to_index_detach_and_wait(
|
||||
&layers,
|
||||
(ancestor.timeline_id, ancestor_lsn),
|
||||
)
|
||||
.await?;
|
||||
detached
|
||||
.remote_client
|
||||
.schedule_adding_existing_layers_to_index_detach_and_wait(
|
||||
&layers,
|
||||
(ancestor.timeline_id, ancestor_lsn),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut tasks = tokio::task::JoinSet::new();
|
||||
|
||||
@@ -499,8 +480,6 @@ pub(super) async fn complete(
|
||||
async move {
|
||||
let res = timeline
|
||||
.remote_client
|
||||
.as_ref()
|
||||
.expect("reparented has to have remote client because detached has one")
|
||||
.schedule_reparenting_and_wait(&new_parent)
|
||||
.await;
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ use std::{
|
||||
use pageserver_api::models::{EvictionPolicy, EvictionPolicyLayerAccessThreshold};
|
||||
use tokio::time::Instant;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, error, info, info_span, instrument, warn, Instrument};
|
||||
use tracing::{debug, info, info_span, instrument, warn, Instrument};
|
||||
|
||||
use crate::{
|
||||
context::{DownloadBehavior, RequestContext},
|
||||
@@ -211,11 +211,6 @@ impl Timeline {
|
||||
|
||||
// So, we just need to deal with this.
|
||||
|
||||
if self.remote_client.is_none() {
|
||||
error!("no remote storage configured, cannot evict layers");
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
|
||||
let mut js = tokio::task::JoinSet::new();
|
||||
{
|
||||
let guard = self.layers.read().await;
|
||||
|
||||
@@ -6,10 +6,9 @@ use crate::{
|
||||
self,
|
||||
index::{IndexPart, LayerFileMetadata},
|
||||
},
|
||||
storage_layer::LayerFileName,
|
||||
storage_layer::LayerName,
|
||||
Generation,
|
||||
},
|
||||
METADATA_FILE_NAME,
|
||||
};
|
||||
use anyhow::Context;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
@@ -20,15 +19,13 @@ use utils::lsn::Lsn;
|
||||
/// Identified files in the timeline directory.
|
||||
pub(super) enum Discovered {
|
||||
/// The only one we care about
|
||||
Layer(LayerFileName, Utf8PathBuf, u64),
|
||||
Layer(LayerName, Utf8PathBuf, u64),
|
||||
/// Old ephmeral files from previous launches, should be removed
|
||||
Ephemeral(String),
|
||||
/// Old temporary timeline files, unsure what these really are, should be removed
|
||||
Temporary(String),
|
||||
/// Temporary on-demand download files, should be removed
|
||||
TemporaryDownload(String),
|
||||
/// "metadata" file we persist locally and include in `index_part.json`
|
||||
Metadata,
|
||||
/// Backup file from previously future layers
|
||||
IgnoredBackup,
|
||||
/// Unrecognized, warn about these
|
||||
@@ -43,15 +40,13 @@ pub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result<Vec<Discovere
|
||||
let direntry = direntry?;
|
||||
let file_name = direntry.file_name().to_string();
|
||||
|
||||
let discovered = match LayerFileName::from_str(&file_name) {
|
||||
let discovered = match LayerName::from_str(&file_name) {
|
||||
Ok(file_name) => {
|
||||
let file_size = direntry.metadata()?.len();
|
||||
Discovered::Layer(file_name, direntry.path().to_owned(), file_size)
|
||||
}
|
||||
Err(_) => {
|
||||
if file_name == METADATA_FILE_NAME {
|
||||
Discovered::Metadata
|
||||
} else if file_name.ends_with(".old") {
|
||||
if file_name.ends_with(".old") {
|
||||
// ignore these
|
||||
Discovered::IgnoredBackup
|
||||
} else if remote_timeline_client::is_temp_download_file(direntry.path()) {
|
||||
@@ -72,6 +67,28 @@ pub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result<Vec<Discovere
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
/// Whereas `LayerFileMetadata` describes the metadata we would store in remote storage,
|
||||
/// this structure extends it with metadata describing the layer's presence in local storage.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct LocalLayerFileMetadata {
|
||||
pub(super) metadata: LayerFileMetadata,
|
||||
pub(super) local_path: Utf8PathBuf,
|
||||
}
|
||||
|
||||
impl LocalLayerFileMetadata {
|
||||
pub fn new(
|
||||
local_path: Utf8PathBuf,
|
||||
file_size: u64,
|
||||
generation: Generation,
|
||||
shard: ShardIndex,
|
||||
) -> Self {
|
||||
Self {
|
||||
local_path,
|
||||
metadata: LayerFileMetadata::new(file_size, generation, shard),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Decision on what to do with a layer file after considering its local and remote metadata.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) enum Decision {
|
||||
@@ -80,11 +97,11 @@ pub(super) enum Decision {
|
||||
/// The layer is present locally, but local metadata does not match remote; we must
|
||||
/// delete it and treat it as evicted.
|
||||
UseRemote {
|
||||
local: LayerFileMetadata,
|
||||
local: LocalLayerFileMetadata,
|
||||
remote: LayerFileMetadata,
|
||||
},
|
||||
/// The layer is present locally, and metadata matches.
|
||||
UseLocal(LayerFileMetadata),
|
||||
UseLocal(LocalLayerFileMetadata),
|
||||
}
|
||||
|
||||
/// A layer needs to be left out of the layer map.
|
||||
@@ -92,39 +109,29 @@ pub(super) enum Decision {
|
||||
pub(super) enum DismissedLayer {
|
||||
/// The related layer is is in future compared to disk_consistent_lsn, it must not be loaded.
|
||||
Future {
|
||||
/// The local metadata. `None` if the layer is only known through [`IndexPart`].
|
||||
local: Option<LayerFileMetadata>,
|
||||
/// `None` if the layer is only known through [`IndexPart`].
|
||||
local: Option<LocalLayerFileMetadata>,
|
||||
},
|
||||
/// The layer only exists locally.
|
||||
///
|
||||
/// In order to make crash safe updates to layer map, we must dismiss layers which are only
|
||||
/// found locally or not yet included in the remote `index_part.json`.
|
||||
LocalOnly(LayerFileMetadata),
|
||||
LocalOnly(LocalLayerFileMetadata),
|
||||
}
|
||||
|
||||
/// Merges local discoveries and remote [`IndexPart`] to a collection of decisions.
|
||||
pub(super) fn reconcile(
|
||||
discovered: Vec<(LayerFileName, Utf8PathBuf, u64)>,
|
||||
discovered: Vec<(LayerName, Utf8PathBuf, u64)>,
|
||||
index_part: Option<&IndexPart>,
|
||||
disk_consistent_lsn: Lsn,
|
||||
generation: Generation,
|
||||
shard: ShardIndex,
|
||||
) -> Vec<(
|
||||
LayerFileName,
|
||||
Option<Utf8PathBuf>,
|
||||
Result<Decision, DismissedLayer>,
|
||||
)> {
|
||||
) -> Vec<(LayerName, Result<Decision, DismissedLayer>)> {
|
||||
use Decision::*;
|
||||
|
||||
// name => (local_path, local_metadata, remote_metadata)
|
||||
type Collected = HashMap<
|
||||
LayerFileName,
|
||||
(
|
||||
Option<Utf8PathBuf>,
|
||||
Option<LayerFileMetadata>,
|
||||
Option<LayerFileMetadata>,
|
||||
),
|
||||
>;
|
||||
// name => (local_metadata, remote_metadata)
|
||||
type Collected =
|
||||
HashMap<LayerName, (Option<LocalLayerFileMetadata>, Option<LayerFileMetadata>)>;
|
||||
|
||||
let mut discovered = discovered
|
||||
.into_iter()
|
||||
@@ -135,8 +142,9 @@ pub(super) fn reconcile(
|
||||
// it is not in IndexPart, in which case using our current generation makes sense
|
||||
// because it will be uploaded in this generation.
|
||||
(
|
||||
Some(local_path),
|
||||
Some(LayerFileMetadata::new(file_size, generation, shard)),
|
||||
Some(LocalLayerFileMetadata::new(
|
||||
local_path, file_size, generation, shard,
|
||||
)),
|
||||
None,
|
||||
),
|
||||
)
|
||||
@@ -152,20 +160,20 @@ pub(super) fn reconcile(
|
||||
.map(|(name, metadata)| (name, LayerFileMetadata::from(metadata)))
|
||||
.for_each(|(name, metadata)| {
|
||||
if let Some(existing) = discovered.get_mut(name) {
|
||||
existing.2 = Some(metadata);
|
||||
existing.1 = Some(metadata);
|
||||
} else {
|
||||
discovered.insert(name.to_owned(), (None, None, Some(metadata)));
|
||||
discovered.insert(name.to_owned(), (None, Some(metadata)));
|
||||
}
|
||||
});
|
||||
|
||||
discovered
|
||||
.into_iter()
|
||||
.map(|(name, (local_path, local, remote))| {
|
||||
.map(|(name, (local, remote))| {
|
||||
let decision = if name.is_in_future(disk_consistent_lsn) {
|
||||
Err(DismissedLayer::Future { local })
|
||||
} else {
|
||||
match (local, remote) {
|
||||
(Some(local), Some(remote)) if local != remote => {
|
||||
(Some(local), Some(remote)) if local.metadata != remote => {
|
||||
Ok(UseRemote { local, remote })
|
||||
}
|
||||
(Some(x), Some(_)) => Ok(UseLocal(x)),
|
||||
@@ -177,7 +185,7 @@ pub(super) fn reconcile(
|
||||
}
|
||||
};
|
||||
|
||||
(name, local_path, decision)
|
||||
(name, decision)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
@@ -189,12 +197,12 @@ pub(super) fn cleanup(path: &Utf8Path, kind: &str) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
pub(super) fn cleanup_local_file_for_remote(
|
||||
path: &Utf8Path,
|
||||
local: &LayerFileMetadata,
|
||||
local: &LocalLayerFileMetadata,
|
||||
remote: &LayerFileMetadata,
|
||||
) -> anyhow::Result<()> {
|
||||
let local_size = local.file_size();
|
||||
let local_size = local.metadata.file_size();
|
||||
let remote_size = remote.file_size();
|
||||
let path = &local.local_path;
|
||||
|
||||
let file_name = path.file_name().expect("must be file path");
|
||||
tracing::warn!("removing local file {file_name:?} because it has unexpected length {local_size}; length in remote index is {remote_size}");
|
||||
@@ -211,7 +219,7 @@ pub(super) fn cleanup_local_file_for_remote(
|
||||
|
||||
pub(super) fn cleanup_future_layer(
|
||||
path: &Utf8Path,
|
||||
name: &LayerFileName,
|
||||
name: &LayerName,
|
||||
disk_consistent_lsn: Lsn,
|
||||
) -> anyhow::Result<()> {
|
||||
// future image layers are allowed to be produced always for not yet flushed to disk
|
||||
@@ -223,12 +231,14 @@ pub(super) fn cleanup_future_layer(
|
||||
}
|
||||
|
||||
pub(super) fn cleanup_local_only_file(
|
||||
path: &Utf8Path,
|
||||
name: &LayerFileName,
|
||||
local: &LayerFileMetadata,
|
||||
name: &LayerName,
|
||||
local: &LocalLayerFileMetadata,
|
||||
) -> anyhow::Result<()> {
|
||||
let kind = name.kind();
|
||||
tracing::info!("found local-only {kind} layer {name}, metadata {local:?}");
|
||||
std::fs::remove_file(path)?;
|
||||
tracing::info!(
|
||||
"found local-only {kind} layer {name}, metadata {:?}",
|
||||
local.metadata
|
||||
);
|
||||
std::fs::remove_file(&local.local_path)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ use utils::{
|
||||
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
context::RequestContext,
|
||||
metrics::TimelineMetrics,
|
||||
tenant::{
|
||||
layer_map::{BatchedUpdates, LayerMap},
|
||||
@@ -69,6 +70,7 @@ impl LayerManager {
|
||||
conf: &'static PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Arc<InMemoryLayer>> {
|
||||
ensure!(lsn.is_aligned());
|
||||
|
||||
@@ -105,7 +107,7 @@ impl LayerManager {
|
||||
);
|
||||
|
||||
let new_layer =
|
||||
InMemoryLayer::create(conf, timeline_id, tenant_shard_id, start_lsn).await?;
|
||||
InMemoryLayer::create(conf, timeline_id, tenant_shard_id, start_lsn, ctx).await?;
|
||||
let layer = Arc::new(new_layer);
|
||||
|
||||
self.layer_map.open_layer = Some(layer.clone());
|
||||
@@ -294,7 +296,7 @@ impl<T: AsLayerDesc + Clone> LayerFileManager<T> {
|
||||
// A layer's descriptor is present in the LayerMap => the LayerFileManager contains a layer for the descriptor.
|
||||
self.0
|
||||
.get(&desc.key())
|
||||
.with_context(|| format!("get layer from desc: {}", desc.filename()))
|
||||
.with_context(|| format!("get layer from desc: {}", desc.layer_name()))
|
||||
.expect("not found")
|
||||
.clone()
|
||||
}
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use super::storage_layer::LayerFileName;
|
||||
use super::storage_layer::LayerName;
|
||||
use super::storage_layer::ResidentLayer;
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
use crate::tenant::remote_timeline_client::index::IndexPart;
|
||||
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||
use crate::tenant::remote_timeline_client::index::Lineage;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::fmt::Debug;
|
||||
|
||||
@@ -45,7 +46,7 @@ pub(crate) struct UploadQueueInitialized {
|
||||
|
||||
/// All layer files stored in the remote storage, taking into account all
|
||||
/// in-progress and queued operations
|
||||
pub(crate) latest_files: HashMap<LayerFileName, LayerFileMetadata>,
|
||||
pub(crate) latest_files: HashMap<LayerName, LayerFileMetadata>,
|
||||
|
||||
/// How many file uploads or deletions been scheduled, since the
|
||||
/// last (scheduling of) metadata index upload?
|
||||
@@ -56,6 +57,9 @@ pub(crate) struct UploadQueueInitialized {
|
||||
/// DANGER: do not return to outside world, e.g., safekeepers.
|
||||
pub(crate) latest_metadata: TimelineMetadata,
|
||||
|
||||
/// Part of the flattened "next" `index_part.json`.
|
||||
pub(crate) latest_lineage: Lineage,
|
||||
|
||||
/// `disk_consistent_lsn` from the last metadata file that was successfully
|
||||
/// uploaded. `Lsn(0)` if nothing was uploaded yet.
|
||||
/// Unlike `latest_files` or `latest_metadata`, this value is never ahead.
|
||||
@@ -89,7 +93,7 @@ pub(crate) struct UploadQueueInitialized {
|
||||
/// Putting this behind a testing feature to catch problems in tests, but assuming we could have a
|
||||
/// bug causing leaks, then it's better to not leave this enabled for production builds.
|
||||
#[cfg(feature = "testing")]
|
||||
pub(crate) dangling_files: HashMap<LayerFileName, Generation>,
|
||||
pub(crate) dangling_files: HashMap<LayerName, Generation>,
|
||||
|
||||
/// Set to true when we have inserted the `UploadOp::Shutdown` into the `inprogress_tasks`.
|
||||
pub(crate) shutting_down: bool,
|
||||
@@ -171,6 +175,7 @@ impl UploadQueue {
|
||||
latest_files: HashMap::new(),
|
||||
latest_files_changes_since_metadata_upload_scheduled: 0,
|
||||
latest_metadata: metadata.clone(),
|
||||
latest_lineage: Lineage::default(),
|
||||
projected_remote_consistent_lsn: None,
|
||||
visible_remote_consistent_lsn: Arc::new(AtomicLsn::new(0)),
|
||||
// what follows are boring default initializations
|
||||
@@ -218,6 +223,7 @@ impl UploadQueue {
|
||||
latest_files: files,
|
||||
latest_files_changes_since_metadata_upload_scheduled: 0,
|
||||
latest_metadata: index_part.metadata.clone(),
|
||||
latest_lineage: index_part.lineage.clone(),
|
||||
projected_remote_consistent_lsn: Some(index_part.metadata.disk_consistent_lsn()),
|
||||
visible_remote_consistent_lsn: Arc::new(
|
||||
index_part.metadata.disk_consistent_lsn().into(),
|
||||
@@ -281,7 +287,7 @@ pub(crate) struct UploadTask {
|
||||
/// for timeline deletion, which skips this queue and goes directly to DeletionQueue.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Delete {
|
||||
pub(crate) layers: Vec<(LayerFileName, LayerFileMetadata)>,
|
||||
pub(crate) layers: Vec<(LayerName, LayerFileMetadata)>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -290,7 +296,7 @@ pub(crate) enum UploadOp {
|
||||
UploadLayer(ResidentLayer, LayerFileMetadata),
|
||||
|
||||
/// Upload the metadata file
|
||||
UploadMetadata(IndexPart, Lsn),
|
||||
UploadMetadata(Box<IndexPart>, Lsn),
|
||||
|
||||
/// Delete layer files
|
||||
Delete(Delete),
|
||||
|
||||
@@ -23,6 +23,7 @@ use pageserver_api::key::Key;
|
||||
use utils::lsn::Lsn;
|
||||
use utils::vec_map::VecMap;
|
||||
|
||||
use crate::context::RequestContext;
|
||||
use crate::virtual_file::VirtualFile;
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
@@ -285,6 +286,7 @@ impl<'a> VectoredBlobReader<'a> {
|
||||
&self,
|
||||
read: &VectoredRead,
|
||||
buf: BytesMut,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<VectoredBlobsBuf, std::io::Error> {
|
||||
assert!(read.size() > 0);
|
||||
assert!(
|
||||
@@ -295,7 +297,7 @@ impl<'a> VectoredBlobReader<'a> {
|
||||
);
|
||||
let buf = self
|
||||
.file
|
||||
.read_exact_at_n(buf, read.start, read.size())
|
||||
.read_exact_at_n(buf, read.start, read.size(), ctx)
|
||||
.await?;
|
||||
|
||||
let blobs_at = read.blobs_at.as_slice();
|
||||
|
||||
@@ -344,16 +344,23 @@ macro_rules! with_file {
|
||||
|
||||
impl VirtualFile {
|
||||
/// Open a file in read-only mode. Like File::open.
|
||||
pub async fn open(path: &Utf8Path) -> Result<VirtualFile, std::io::Error> {
|
||||
Self::open_with_options(path, OpenOptions::new().read(true)).await
|
||||
pub async fn open(
|
||||
path: &Utf8Path,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<VirtualFile, std::io::Error> {
|
||||
Self::open_with_options(path, OpenOptions::new().read(true), ctx).await
|
||||
}
|
||||
|
||||
/// Create a new file for writing. If the file exists, it will be truncated.
|
||||
/// Like File::create.
|
||||
pub async fn create(path: &Utf8Path) -> Result<VirtualFile, std::io::Error> {
|
||||
pub async fn create(
|
||||
path: &Utf8Path,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<VirtualFile, std::io::Error> {
|
||||
Self::open_with_options(
|
||||
path,
|
||||
OpenOptions::new().write(true).create(true).truncate(true),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -366,6 +373,7 @@ impl VirtualFile {
|
||||
pub async fn open_with_options(
|
||||
path: &Utf8Path,
|
||||
open_options: &OpenOptions,
|
||||
_ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
|
||||
) -> Result<VirtualFile, std::io::Error> {
|
||||
let path_str = path.to_string();
|
||||
let parts = path_str.split('/').collect::<Vec<&str>>();
|
||||
@@ -576,21 +584,34 @@ impl VirtualFile {
|
||||
Ok(self.pos)
|
||||
}
|
||||
|
||||
pub async fn read_exact_at<B>(&self, buf: B, offset: u64) -> Result<B, Error>
|
||||
pub async fn read_exact_at<B>(
|
||||
&self,
|
||||
buf: B,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<B, Error>
|
||||
where
|
||||
B: IoBufMut + Send,
|
||||
{
|
||||
let (buf, res) =
|
||||
read_exact_at_impl(buf, offset, None, |buf, offset| self.read_at(buf, offset)).await;
|
||||
let (buf, res) = read_exact_at_impl(buf, offset, None, |buf, offset| {
|
||||
self.read_at(buf, offset, ctx)
|
||||
})
|
||||
.await;
|
||||
res.map(|()| buf)
|
||||
}
|
||||
|
||||
pub async fn read_exact_at_n<B>(&self, buf: B, offset: u64, count: usize) -> Result<B, Error>
|
||||
pub async fn read_exact_at_n<B>(
|
||||
&self,
|
||||
buf: B,
|
||||
offset: u64,
|
||||
count: usize,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<B, Error>
|
||||
where
|
||||
B: IoBufMut + Send,
|
||||
{
|
||||
let (buf, res) = read_exact_at_impl(buf, offset, Some(count), |buf, offset| {
|
||||
self.read_at(buf, offset)
|
||||
self.read_at(buf, offset, ctx)
|
||||
})
|
||||
.await;
|
||||
res.map(|()| buf)
|
||||
@@ -601,12 +622,13 @@ impl VirtualFile {
|
||||
&self,
|
||||
page: PageWriteGuard<'static>,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<PageWriteGuard<'static>, Error> {
|
||||
let buf = PageWriteGuardBuf {
|
||||
page,
|
||||
init_up_to: 0,
|
||||
};
|
||||
let res = self.read_exact_at(buf, offset).await;
|
||||
let res = self.read_exact_at(buf, offset, ctx).await;
|
||||
res.map(|PageWriteGuardBuf { page, .. }| page)
|
||||
.map_err(|e| Error::new(ErrorKind::Other, e))
|
||||
}
|
||||
@@ -699,7 +721,12 @@ impl VirtualFile {
|
||||
(buf, Ok(n))
|
||||
}
|
||||
|
||||
pub(crate) async fn read_at<B>(&self, buf: B, offset: u64) -> (B, Result<usize, Error>)
|
||||
pub(crate) async fn read_at<B>(
|
||||
&self,
|
||||
buf: B,
|
||||
offset: u64,
|
||||
_ctx: &RequestContext, /* TODO: use for metrics: https://github.com/neondatabase/neon/issues/6107 */
|
||||
) -> (B, Result<usize, Error>)
|
||||
where
|
||||
B: tokio_epoll_uring::BoundedBufMut + Send,
|
||||
{
|
||||
@@ -1020,20 +1047,21 @@ impl VirtualFile {
|
||||
pub(crate) async fn read_blk(
|
||||
&self,
|
||||
blknum: u32,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<crate::tenant::block_io::BlockLease<'_>, std::io::Error> {
|
||||
use crate::page_cache::PAGE_SZ;
|
||||
let buf = vec![0; PAGE_SZ];
|
||||
let buf = self
|
||||
.read_exact_at(buf, blknum as u64 * (PAGE_SZ as u64))
|
||||
.read_exact_at(buf, blknum as u64 * (PAGE_SZ as u64), ctx)
|
||||
.await?;
|
||||
Ok(crate::tenant::block_io::BlockLease::Vec(buf))
|
||||
}
|
||||
|
||||
async fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<(), Error> {
|
||||
async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
|
||||
let mut tmp = vec![0; 128];
|
||||
loop {
|
||||
let res;
|
||||
(tmp, res) = self.read_at(tmp, self.pos).await;
|
||||
(tmp, res) = self.read_at(tmp, self.pos, ctx).await;
|
||||
match res {
|
||||
Ok(0) => return Ok(()),
|
||||
Ok(n) => {
|
||||
@@ -1159,7 +1187,6 @@ mod tests {
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::thread_rng;
|
||||
use rand::Rng;
|
||||
use std::future::Future;
|
||||
use std::io::Write;
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::sync::Arc;
|
||||
@@ -1176,9 +1203,14 @@ mod tests {
|
||||
}
|
||||
|
||||
impl MaybeVirtualFile {
|
||||
async fn read_exact_at(&self, mut buf: Vec<u8>, offset: u64) -> Result<Vec<u8>, Error> {
|
||||
async fn read_exact_at(
|
||||
&self,
|
||||
mut buf: Vec<u8>,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Vec<u8>, Error> {
|
||||
match self {
|
||||
MaybeVirtualFile::VirtualFile(file) => file.read_exact_at(buf, offset).await,
|
||||
MaybeVirtualFile::VirtualFile(file) => file.read_exact_at(buf, offset, ctx).await,
|
||||
MaybeVirtualFile::File(file) => file.read_exact_at(&mut buf, offset).map(|()| buf),
|
||||
}
|
||||
}
|
||||
@@ -1230,13 +1262,13 @@ mod tests {
|
||||
|
||||
// Helper function to slurp contents of a file, starting at the current position,
|
||||
// into a string
|
||||
async fn read_string(&mut self) -> Result<String, Error> {
|
||||
async fn read_string(&mut self, ctx: &RequestContext) -> Result<String, Error> {
|
||||
use std::io::Read;
|
||||
let mut buf = String::new();
|
||||
match self {
|
||||
MaybeVirtualFile::VirtualFile(file) => {
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf).await?;
|
||||
file.read_to_end(&mut buf, ctx).await?;
|
||||
return Ok(String::from_utf8(buf).unwrap());
|
||||
}
|
||||
MaybeVirtualFile::File(file) => {
|
||||
@@ -1247,9 +1279,14 @@ mod tests {
|
||||
}
|
||||
|
||||
// Helper function to slurp a portion of a file into a string
|
||||
async fn read_string_at(&mut self, pos: u64, len: usize) -> Result<String, Error> {
|
||||
async fn read_string_at(
|
||||
&mut self,
|
||||
pos: u64,
|
||||
len: usize,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<String, Error> {
|
||||
let buf = vec![0; len];
|
||||
let buf = self.read_exact_at(buf, pos).await?;
|
||||
let buf = self.read_exact_at(buf, pos, ctx).await?;
|
||||
Ok(String::from_utf8(buf).unwrap())
|
||||
}
|
||||
}
|
||||
@@ -1263,73 +1300,101 @@ mod tests {
|
||||
// results with VirtualFiles as with native Files. (Except that with
|
||||
// native files, you will run out of file descriptors if the ulimit
|
||||
// is low enough.)
|
||||
test_files("virtual_files", |path, open_options| async move {
|
||||
let vf = VirtualFile::open_with_options(&path, &open_options).await?;
|
||||
Ok(MaybeVirtualFile::VirtualFile(vf))
|
||||
})
|
||||
.await
|
||||
struct A;
|
||||
|
||||
impl Adapter for A {
|
||||
async fn open(
|
||||
path: Utf8PathBuf,
|
||||
opts: OpenOptions,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<MaybeVirtualFile, anyhow::Error> {
|
||||
let vf = VirtualFile::open_with_options(&path, &opts, ctx).await?;
|
||||
Ok(MaybeVirtualFile::VirtualFile(vf))
|
||||
}
|
||||
}
|
||||
test_files::<A>("virtual_files").await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_physical_files() -> anyhow::Result<()> {
|
||||
test_files("physical_files", |path, open_options| async move {
|
||||
Ok(MaybeVirtualFile::File({
|
||||
let owned_fd = open_options.open(path.as_std_path()).await?;
|
||||
File::from(owned_fd)
|
||||
}))
|
||||
})
|
||||
.await
|
||||
struct B;
|
||||
|
||||
impl Adapter for B {
|
||||
async fn open(
|
||||
path: Utf8PathBuf,
|
||||
opts: OpenOptions,
|
||||
_ctx: &RequestContext,
|
||||
) -> Result<MaybeVirtualFile, anyhow::Error> {
|
||||
Ok(MaybeVirtualFile::File({
|
||||
let owned_fd = opts.open(path.as_std_path()).await?;
|
||||
File::from(owned_fd)
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
test_files::<B>("physical_files").await
|
||||
}
|
||||
|
||||
async fn test_files<OF, FT>(testname: &str, openfunc: OF) -> anyhow::Result<()>
|
||||
/// This is essentially a closure which returns a MaybeVirtualFile, but because rust edition
|
||||
/// 2024 is not yet out with new lifetime capture or outlives rules, this is a async function
|
||||
/// in trait which benefits from the new lifetime capture rules already.
|
||||
trait Adapter {
|
||||
async fn open(
|
||||
path: Utf8PathBuf,
|
||||
opts: OpenOptions,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<MaybeVirtualFile, anyhow::Error>;
|
||||
}
|
||||
|
||||
async fn test_files<A>(testname: &str) -> anyhow::Result<()>
|
||||
where
|
||||
OF: Fn(Utf8PathBuf, OpenOptions) -> FT,
|
||||
FT: Future<Output = Result<MaybeVirtualFile, std::io::Error>>,
|
||||
A: Adapter,
|
||||
{
|
||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
|
||||
let testdir = crate::config::PageServerConf::test_repo_dir(testname);
|
||||
std::fs::create_dir_all(&testdir)?;
|
||||
|
||||
let path_a = testdir.join("file_a");
|
||||
let mut file_a = openfunc(
|
||||
let mut file_a = A::open(
|
||||
path_a.clone(),
|
||||
OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.to_owned(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
file_a.write_all(b"foobar".to_vec(), &ctx).await?;
|
||||
|
||||
// cannot read from a file opened in write-only mode
|
||||
let _ = file_a.read_string().await.unwrap_err();
|
||||
let _ = file_a.read_string(&ctx).await.unwrap_err();
|
||||
|
||||
// Close the file and re-open for reading
|
||||
let mut file_a = openfunc(path_a, OpenOptions::new().read(true).to_owned()).await?;
|
||||
let mut file_a = A::open(path_a, OpenOptions::new().read(true).to_owned(), &ctx).await?;
|
||||
|
||||
// cannot write to a file opened in read-only mode
|
||||
let _ = file_a.write_all(b"bar".to_vec(), &ctx).await.unwrap_err();
|
||||
|
||||
// Try simple read
|
||||
assert_eq!("foobar", file_a.read_string().await?);
|
||||
assert_eq!("foobar", file_a.read_string(&ctx).await?);
|
||||
|
||||
// It's positioned at the EOF now.
|
||||
assert_eq!("", file_a.read_string().await?);
|
||||
assert_eq!("", file_a.read_string(&ctx).await?);
|
||||
|
||||
// Test seeks.
|
||||
assert_eq!(file_a.seek(SeekFrom::Start(1)).await?, 1);
|
||||
assert_eq!("oobar", file_a.read_string().await?);
|
||||
assert_eq!("oobar", file_a.read_string(&ctx).await?);
|
||||
|
||||
assert_eq!(file_a.seek(SeekFrom::End(-2)).await?, 4);
|
||||
assert_eq!("ar", file_a.read_string().await?);
|
||||
assert_eq!("ar", file_a.read_string(&ctx).await?);
|
||||
|
||||
assert_eq!(file_a.seek(SeekFrom::Start(1)).await?, 1);
|
||||
assert_eq!(file_a.seek(SeekFrom::Current(2)).await?, 3);
|
||||
assert_eq!("bar", file_a.read_string().await?);
|
||||
assert_eq!("bar", file_a.read_string(&ctx).await?);
|
||||
|
||||
assert_eq!(file_a.seek(SeekFrom::Current(-5)).await?, 1);
|
||||
assert_eq!("oobar", file_a.read_string().await?);
|
||||
assert_eq!("oobar", file_a.read_string(&ctx).await?);
|
||||
|
||||
// Test erroneous seeks to before byte 0
|
||||
file_a.seek(SeekFrom::End(-7)).await.unwrap_err();
|
||||
@@ -1337,11 +1402,11 @@ mod tests {
|
||||
file_a.seek(SeekFrom::Current(-2)).await.unwrap_err();
|
||||
|
||||
// the erroneous seek should have left the position unchanged
|
||||
assert_eq!("oobar", file_a.read_string().await?);
|
||||
assert_eq!("oobar", file_a.read_string(&ctx).await?);
|
||||
|
||||
// Create another test file, and try FileExt functions on it.
|
||||
let path_b = testdir.join("file_b");
|
||||
let mut file_b = openfunc(
|
||||
let mut file_b = A::open(
|
||||
path_b.clone(),
|
||||
OpenOptions::new()
|
||||
.read(true)
|
||||
@@ -1349,12 +1414,13 @@ mod tests {
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.to_owned(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
file_b.write_all_at(b"BAR".to_vec(), 3, &ctx).await?;
|
||||
file_b.write_all_at(b"FOO".to_vec(), 0, &ctx).await?;
|
||||
|
||||
assert_eq!(file_b.read_string_at(2, 3).await?, "OBA");
|
||||
assert_eq!(file_b.read_string_at(2, 3, &ctx).await?, "OBA");
|
||||
|
||||
// Open a lot of files, enough to cause some evictions. (Or to be precise,
|
||||
// open the same file many times. The effect is the same.)
|
||||
@@ -1364,9 +1430,13 @@ mod tests {
|
||||
|
||||
let mut vfiles = Vec::new();
|
||||
for _ in 0..100 {
|
||||
let mut vfile =
|
||||
openfunc(path_b.clone(), OpenOptions::new().read(true).to_owned()).await?;
|
||||
assert_eq!("FOOBAR", vfile.read_string().await?);
|
||||
let mut vfile = A::open(
|
||||
path_b.clone(),
|
||||
OpenOptions::new().read(true).to_owned(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
assert_eq!("FOOBAR", vfile.read_string(&ctx).await?);
|
||||
vfiles.push(vfile);
|
||||
}
|
||||
|
||||
@@ -1375,13 +1445,13 @@ mod tests {
|
||||
|
||||
// The underlying file descriptor for 'file_a' should be closed now. Try to read
|
||||
// from it again. We left the file positioned at offset 1 above.
|
||||
assert_eq!("oobar", file_a.read_string().await?);
|
||||
assert_eq!("oobar", file_a.read_string(&ctx).await?);
|
||||
|
||||
// Check that all the other FDs still work too. Use them in random order for
|
||||
// good measure.
|
||||
vfiles.as_mut_slice().shuffle(&mut thread_rng());
|
||||
for vfile in vfiles.iter_mut() {
|
||||
assert_eq!("OOBAR", vfile.read_string_at(1, 5).await?);
|
||||
assert_eq!("OOBAR", vfile.read_string_at(1, 5, &ctx).await?);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -1397,6 +1467,7 @@ mod tests {
|
||||
const THREADS: usize = 100;
|
||||
const SAMPLE: [u8; SIZE] = [0xADu8; SIZE];
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
|
||||
let testdir = crate::config::PageServerConf::test_repo_dir("vfile_concurrency");
|
||||
std::fs::create_dir_all(&testdir)?;
|
||||
|
||||
@@ -1410,8 +1481,12 @@ mod tests {
|
||||
// Open the file many times.
|
||||
let mut files = Vec::new();
|
||||
for _ in 0..VIRTUAL_FILES {
|
||||
let f = VirtualFile::open_with_options(&test_file_path, OpenOptions::new().read(true))
|
||||
.await?;
|
||||
let f = VirtualFile::open_with_options(
|
||||
&test_file_path,
|
||||
OpenOptions::new().read(true),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
files.push(f);
|
||||
}
|
||||
let files = Arc::new(files);
|
||||
@@ -1425,12 +1500,13 @@ mod tests {
|
||||
let mut hdls = Vec::new();
|
||||
for _threadno in 0..THREADS {
|
||||
let files = files.clone();
|
||||
let ctx = ctx.detached_child(TaskKind::UnitTest, DownloadBehavior::Error);
|
||||
let hdl = rt.spawn(async move {
|
||||
let mut buf = vec![0u8; SIZE];
|
||||
let mut rng = rand::rngs::OsRng;
|
||||
for _ in 1..1000 {
|
||||
let f = &files[rng.gen_range(0..files.len())];
|
||||
buf = f.read_exact_at(buf, 0).await.unwrap();
|
||||
buf = f.read_exact_at(buf, 0, &ctx).await.unwrap();
|
||||
assert!(buf == SAMPLE);
|
||||
}
|
||||
});
|
||||
@@ -1446,6 +1522,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_atomic_overwrite_basic() {
|
||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
|
||||
let testdir = crate::config::PageServerConf::test_repo_dir("test_atomic_overwrite_basic");
|
||||
std::fs::create_dir_all(&testdir).unwrap();
|
||||
|
||||
@@ -1455,8 +1532,8 @@ mod tests {
|
||||
VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
|
||||
.await
|
||||
.unwrap();
|
||||
let mut file = MaybeVirtualFile::from(VirtualFile::open(&path).await.unwrap());
|
||||
let post = file.read_string().await.unwrap();
|
||||
let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
|
||||
let post = file.read_string(&ctx).await.unwrap();
|
||||
assert_eq!(post, "foo");
|
||||
assert!(!tmp_path.exists());
|
||||
drop(file);
|
||||
@@ -1464,8 +1541,8 @@ mod tests {
|
||||
VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"bar".to_vec())
|
||||
.await
|
||||
.unwrap();
|
||||
let mut file = MaybeVirtualFile::from(VirtualFile::open(&path).await.unwrap());
|
||||
let post = file.read_string().await.unwrap();
|
||||
let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
|
||||
let post = file.read_string(&ctx).await.unwrap();
|
||||
assert_eq!(post, "bar");
|
||||
assert!(!tmp_path.exists());
|
||||
drop(file);
|
||||
@@ -1473,6 +1550,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_atomic_overwrite_preexisting_tmp() {
|
||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
|
||||
let testdir =
|
||||
crate::config::PageServerConf::test_repo_dir("test_atomic_overwrite_preexisting_tmp");
|
||||
std::fs::create_dir_all(&testdir).unwrap();
|
||||
@@ -1487,8 +1565,8 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut file = MaybeVirtualFile::from(VirtualFile::open(&path).await.unwrap());
|
||||
let post = file.read_string().await.unwrap();
|
||||
let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
|
||||
let post = file.read_string(&ctx).await.unwrap();
|
||||
assert_eq!(post, "foo");
|
||||
assert!(!tmp_path.exists());
|
||||
drop(file);
|
||||
|
||||
@@ -153,10 +153,7 @@ impl PostgresRedoManager {
|
||||
process: self
|
||||
.redo_process
|
||||
.get()
|
||||
.map(|p| WalRedoManagerProcessStatus {
|
||||
pid: p.id(),
|
||||
kind: std::borrow::Cow::Borrowed(p.kind().into()),
|
||||
}),
|
||||
.map(|p| WalRedoManagerProcessStatus { pid: p.id() }),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
/// Layer of indirection previously used to support multiple implementations.
|
||||
/// Subject to removal: <https://github.com/neondatabase/neon/issues/7753>
|
||||
use std::time::Duration;
|
||||
|
||||
use bytes::Bytes;
|
||||
use pageserver_api::{reltag::RelTag, shard::TenantShardId};
|
||||
use tracing::warn;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::{config::PageServerConf, walrecord::NeonWalRecord};
|
||||
@@ -12,7 +15,6 @@ mod protocol;
|
||||
|
||||
mod process_impl {
|
||||
pub(super) mod process_async;
|
||||
pub(super) mod process_std;
|
||||
}
|
||||
|
||||
#[derive(
|
||||
@@ -34,10 +36,7 @@ pub enum Kind {
|
||||
Async,
|
||||
}
|
||||
|
||||
pub(crate) enum Process {
|
||||
Sync(process_impl::process_std::WalRedoProcess),
|
||||
Async(process_impl::process_async::WalRedoProcess),
|
||||
}
|
||||
pub(crate) struct Process(process_impl::process_async::WalRedoProcess);
|
||||
|
||||
impl Process {
|
||||
#[inline(always)]
|
||||
@@ -46,18 +45,17 @@ impl Process {
|
||||
tenant_shard_id: TenantShardId,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<Self> {
|
||||
Ok(match conf.walredo_process_kind {
|
||||
Kind::Sync => Self::Sync(process_impl::process_std::WalRedoProcess::launch(
|
||||
conf,
|
||||
tenant_shard_id,
|
||||
pg_version,
|
||||
)?),
|
||||
Kind::Async => Self::Async(process_impl::process_async::WalRedoProcess::launch(
|
||||
conf,
|
||||
tenant_shard_id,
|
||||
pg_version,
|
||||
)?),
|
||||
})
|
||||
if conf.walredo_process_kind != Kind::Async {
|
||||
warn!(
|
||||
configured = %conf.walredo_process_kind,
|
||||
"the walredo_process_kind setting has been turned into a no-op, using async implementation"
|
||||
);
|
||||
}
|
||||
Ok(Self(process_impl::process_async::WalRedoProcess::launch(
|
||||
conf,
|
||||
tenant_shard_id,
|
||||
pg_version,
|
||||
)?))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
@@ -69,29 +67,12 @@ impl Process {
|
||||
records: &[(Lsn, NeonWalRecord)],
|
||||
wal_redo_timeout: Duration,
|
||||
) -> anyhow::Result<Bytes> {
|
||||
match self {
|
||||
Process::Sync(p) => {
|
||||
p.apply_wal_records(rel, blknum, base_img, records, wal_redo_timeout)
|
||||
.await
|
||||
}
|
||||
Process::Async(p) => {
|
||||
p.apply_wal_records(rel, blknum, base_img, records, wal_redo_timeout)
|
||||
.await
|
||||
}
|
||||
}
|
||||
self.0
|
||||
.apply_wal_records(rel, blknum, base_img, records, wal_redo_timeout)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) fn id(&self) -> u32 {
|
||||
match self {
|
||||
Process::Sync(p) => p.id(),
|
||||
Process::Async(p) => p.id(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn kind(&self) -> Kind {
|
||||
match self {
|
||||
Process::Sync(_) => Kind::Sync,
|
||||
Process::Async(_) => Kind::Async,
|
||||
}
|
||||
self.0.id()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,405 +0,0 @@
|
||||
use self::no_leak_child::NoLeakChild;
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER},
|
||||
walrecord::NeonWalRecord,
|
||||
walredo::process::{no_leak_child, protocol},
|
||||
};
|
||||
use anyhow::Context;
|
||||
use bytes::Bytes;
|
||||
use nix::poll::{PollFd, PollFlags};
|
||||
use pageserver_api::{reltag::RelTag, shard::TenantShardId};
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use std::os::fd::AsRawFd;
|
||||
#[cfg(feature = "testing")]
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::{
|
||||
collections::VecDeque,
|
||||
io::{Read, Write},
|
||||
process::{ChildStdin, ChildStdout, Command, Stdio},
|
||||
sync::{Mutex, MutexGuard},
|
||||
time::Duration,
|
||||
};
|
||||
use tracing::{debug, error, instrument, Instrument};
|
||||
use utils::{lsn::Lsn, nonblock::set_nonblock};
|
||||
|
||||
pub struct WalRedoProcess {
|
||||
#[allow(dead_code)]
|
||||
conf: &'static PageServerConf,
|
||||
tenant_shard_id: TenantShardId,
|
||||
// Some() on construction, only becomes None on Drop.
|
||||
child: Option<NoLeakChild>,
|
||||
stdout: Mutex<ProcessOutput>,
|
||||
stdin: Mutex<ProcessInput>,
|
||||
/// Counter to separate same sized walredo inputs failing at the same millisecond.
|
||||
#[cfg(feature = "testing")]
|
||||
dump_sequence: AtomicUsize,
|
||||
}
|
||||
|
||||
struct ProcessInput {
|
||||
stdin: ChildStdin,
|
||||
n_requests: usize,
|
||||
}
|
||||
|
||||
struct ProcessOutput {
|
||||
stdout: ChildStdout,
|
||||
pending_responses: VecDeque<Option<Bytes>>,
|
||||
n_processed_responses: usize,
|
||||
}
|
||||
|
||||
impl WalRedoProcess {
|
||||
//
|
||||
// Start postgres binary in special WAL redo mode.
|
||||
//
|
||||
#[instrument(skip_all,fields(pg_version=pg_version))]
|
||||
pub(crate) fn launch(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_shard_id: TenantShardId,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<Self> {
|
||||
crate::span::debug_assert_current_span_has_tenant_id();
|
||||
|
||||
let pg_bin_dir_path = conf.pg_bin_dir(pg_version).context("pg_bin_dir")?; // TODO these should be infallible.
|
||||
let pg_lib_dir_path = conf.pg_lib_dir(pg_version).context("pg_lib_dir")?;
|
||||
|
||||
use no_leak_child::NoLeakChildCommandExt;
|
||||
// Start postgres itself
|
||||
let child = Command::new(pg_bin_dir_path.join("postgres"))
|
||||
// the first arg must be --wal-redo so the child process enters into walredo mode
|
||||
.arg("--wal-redo")
|
||||
// the child doesn't process this arg, but, having it in the argv helps indentify the
|
||||
// walredo process for a particular tenant when debugging a pagserver
|
||||
.args(["--tenant-shard-id", &format!("{tenant_shard_id}")])
|
||||
.stdin(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir_path)
|
||||
.env("DYLD_LIBRARY_PATH", &pg_lib_dir_path)
|
||||
// NB: The redo process is not trusted after we sent it the first
|
||||
// walredo work. Before that, it is trusted. Specifically, we trust
|
||||
// it to
|
||||
// 1. close all file descriptors except stdin, stdout, stderr because
|
||||
// pageserver might not be 100% diligent in setting FD_CLOEXEC on all
|
||||
// the files it opens, and
|
||||
// 2. to use seccomp to sandbox itself before processing the first
|
||||
// walredo request.
|
||||
.spawn_no_leak_child(tenant_shard_id)
|
||||
.context("spawn process")?;
|
||||
WAL_REDO_PROCESS_COUNTERS.started.inc();
|
||||
let mut child = scopeguard::guard(child, |child| {
|
||||
error!("killing wal-redo-postgres process due to a problem during launch");
|
||||
child.kill_and_wait(WalRedoKillCause::Startup);
|
||||
});
|
||||
|
||||
let stdin = child.stdin.take().unwrap();
|
||||
let stdout = child.stdout.take().unwrap();
|
||||
let stderr = child.stderr.take().unwrap();
|
||||
let stderr = tokio::process::ChildStderr::from_std(stderr)
|
||||
.context("convert to tokio::ChildStderr")?;
|
||||
macro_rules! set_nonblock_or_log_err {
|
||||
($file:ident) => {{
|
||||
let res = set_nonblock($file.as_raw_fd());
|
||||
if let Err(e) = &res {
|
||||
error!(error = %e, file = stringify!($file), pid = child.id(), "set_nonblock failed");
|
||||
}
|
||||
res
|
||||
}};
|
||||
}
|
||||
set_nonblock_or_log_err!(stdin)?;
|
||||
set_nonblock_or_log_err!(stdout)?;
|
||||
|
||||
// all fallible operations post-spawn are complete, so get rid of the guard
|
||||
let child = scopeguard::ScopeGuard::into_inner(child);
|
||||
|
||||
tokio::spawn(
|
||||
async move {
|
||||
scopeguard::defer! {
|
||||
debug!("wal-redo-postgres stderr_logger_task finished");
|
||||
crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_finished.inc();
|
||||
}
|
||||
debug!("wal-redo-postgres stderr_logger_task started");
|
||||
crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_started.inc();
|
||||
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
let mut stderr_lines = tokio::io::BufReader::new(stderr);
|
||||
let mut buf = Vec::new();
|
||||
let res = loop {
|
||||
buf.clear();
|
||||
// TODO we don't trust the process to cap its stderr length.
|
||||
// Currently it can do unbounded Vec allocation.
|
||||
match stderr_lines.read_until(b'\n', &mut buf).await {
|
||||
Ok(0) => break Ok(()), // eof
|
||||
Ok(num_bytes) => {
|
||||
let output = String::from_utf8_lossy(&buf[..num_bytes]);
|
||||
error!(%output, "received output");
|
||||
}
|
||||
Err(e) => {
|
||||
break Err(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
match res {
|
||||
Ok(()) => (),
|
||||
Err(e) => {
|
||||
error!(error=?e, "failed to read from walredo stderr");
|
||||
}
|
||||
}
|
||||
}.instrument(tracing::info_span!(parent: None, "wal-redo-postgres-stderr", pid = child.id(), tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %pg_version))
|
||||
);
|
||||
|
||||
Ok(Self {
|
||||
conf,
|
||||
tenant_shard_id,
|
||||
child: Some(child),
|
||||
stdin: Mutex::new(ProcessInput {
|
||||
stdin,
|
||||
n_requests: 0,
|
||||
}),
|
||||
stdout: Mutex::new(ProcessOutput {
|
||||
stdout,
|
||||
pending_responses: VecDeque::new(),
|
||||
n_processed_responses: 0,
|
||||
}),
|
||||
#[cfg(feature = "testing")]
|
||||
dump_sequence: AtomicUsize::default(),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn id(&self) -> u32 {
|
||||
self.child
|
||||
.as_ref()
|
||||
.expect("must not call this during Drop")
|
||||
.id()
|
||||
}
|
||||
|
||||
// Apply given WAL records ('records') over an old page image. Returns
|
||||
// new page image.
|
||||
//
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), pid=%self.id()))]
|
||||
pub(crate) async fn apply_wal_records(
|
||||
&self,
|
||||
rel: RelTag,
|
||||
blknum: u32,
|
||||
base_img: &Option<Bytes>,
|
||||
records: &[(Lsn, NeonWalRecord)],
|
||||
wal_redo_timeout: Duration,
|
||||
) -> anyhow::Result<Bytes> {
|
||||
let tag = protocol::BufferTag { rel, blknum };
|
||||
let input = self.stdin.lock().unwrap();
|
||||
|
||||
// Serialize all the messages to send the WAL redo process first.
|
||||
//
|
||||
// This could be problematic if there are millions of records to replay,
|
||||
// but in practice the number of records is usually so small that it doesn't
|
||||
// matter, and it's better to keep this code simple.
|
||||
//
|
||||
// Most requests start with a before-image with BLCKSZ bytes, followed by
|
||||
// by some other WAL records. Start with a buffer that can hold that
|
||||
// comfortably.
|
||||
let mut writebuf: Vec<u8> = Vec::with_capacity((BLCKSZ as usize) * 3);
|
||||
protocol::build_begin_redo_for_block_msg(tag, &mut writebuf);
|
||||
if let Some(img) = base_img {
|
||||
protocol::build_push_page_msg(tag, img, &mut writebuf);
|
||||
}
|
||||
for (lsn, rec) in records.iter() {
|
||||
if let NeonWalRecord::Postgres {
|
||||
will_init: _,
|
||||
rec: postgres_rec,
|
||||
} = rec
|
||||
{
|
||||
protocol::build_apply_record_msg(*lsn, postgres_rec, &mut writebuf);
|
||||
} else {
|
||||
anyhow::bail!("tried to pass neon wal record to postgres WAL redo");
|
||||
}
|
||||
}
|
||||
protocol::build_get_page_msg(tag, &mut writebuf);
|
||||
WAL_REDO_RECORD_COUNTER.inc_by(records.len() as u64);
|
||||
|
||||
let res = self.apply_wal_records0(&writebuf, input, wal_redo_timeout);
|
||||
|
||||
if res.is_err() {
|
||||
// not all of these can be caused by this particular input, however these are so rare
|
||||
// in tests so capture all.
|
||||
self.record_and_log(&writebuf);
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
fn apply_wal_records0(
|
||||
&self,
|
||||
writebuf: &[u8],
|
||||
input: MutexGuard<ProcessInput>,
|
||||
wal_redo_timeout: Duration,
|
||||
) -> anyhow::Result<Bytes> {
|
||||
let mut proc = { input }; // TODO: remove this legacy rename, but this keep the patch small.
|
||||
let mut nwrite = 0usize;
|
||||
|
||||
while nwrite < writebuf.len() {
|
||||
let mut stdin_pollfds = [PollFd::new(&proc.stdin, PollFlags::POLLOUT)];
|
||||
let n = loop {
|
||||
match nix::poll::poll(&mut stdin_pollfds[..], wal_redo_timeout.as_millis() as i32) {
|
||||
Err(nix::errno::Errno::EINTR) => continue,
|
||||
res => break res,
|
||||
}
|
||||
}?;
|
||||
|
||||
if n == 0 {
|
||||
anyhow::bail!("WAL redo timed out");
|
||||
}
|
||||
|
||||
// If 'stdin' is writeable, do write.
|
||||
let in_revents = stdin_pollfds[0].revents().unwrap();
|
||||
if in_revents & (PollFlags::POLLERR | PollFlags::POLLOUT) != PollFlags::empty() {
|
||||
nwrite += proc.stdin.write(&writebuf[nwrite..])?;
|
||||
}
|
||||
if in_revents.contains(PollFlags::POLLHUP) {
|
||||
// We still have more data to write, but the process closed the pipe.
|
||||
anyhow::bail!("WAL redo process closed its stdin unexpectedly");
|
||||
}
|
||||
}
|
||||
let request_no = proc.n_requests;
|
||||
proc.n_requests += 1;
|
||||
drop(proc);
|
||||
|
||||
// To improve walredo performance we separate sending requests and receiving
|
||||
// responses. Them are protected by different mutexes (output and input).
|
||||
// If thread T1, T2, T3 send requests D1, D2, D3 to walredo process
|
||||
// then there is not warranty that T1 will first granted output mutex lock.
|
||||
// To address this issue we maintain number of sent requests, number of processed
|
||||
// responses and ring buffer with pending responses. After sending response
|
||||
// (under input mutex), threads remembers request number. Then it releases
|
||||
// input mutex, locks output mutex and fetch in ring buffer all responses until
|
||||
// its stored request number. The it takes correspondent element from
|
||||
// pending responses ring buffer and truncate all empty elements from the front,
|
||||
// advancing processed responses number.
|
||||
|
||||
let mut output = self.stdout.lock().unwrap();
|
||||
let n_processed_responses = output.n_processed_responses;
|
||||
while n_processed_responses + output.pending_responses.len() <= request_no {
|
||||
// We expect the WAL redo process to respond with an 8k page image. We read it
|
||||
// into this buffer.
|
||||
let mut resultbuf = vec![0; BLCKSZ.into()];
|
||||
let mut nresult: usize = 0; // # of bytes read into 'resultbuf' so far
|
||||
while nresult < BLCKSZ.into() {
|
||||
let mut stdout_pollfds = [PollFd::new(&output.stdout, PollFlags::POLLIN)];
|
||||
// We do two things simultaneously: reading response from stdout
|
||||
// and forward any logging information that the child writes to its stderr to the page server's log.
|
||||
let n = loop {
|
||||
match nix::poll::poll(
|
||||
&mut stdout_pollfds[..],
|
||||
wal_redo_timeout.as_millis() as i32,
|
||||
) {
|
||||
Err(nix::errno::Errno::EINTR) => continue,
|
||||
res => break res,
|
||||
}
|
||||
}?;
|
||||
|
||||
if n == 0 {
|
||||
anyhow::bail!("WAL redo timed out");
|
||||
}
|
||||
|
||||
// If we have some data in stdout, read it to the result buffer.
|
||||
let out_revents = stdout_pollfds[0].revents().unwrap();
|
||||
if out_revents & (PollFlags::POLLERR | PollFlags::POLLIN) != PollFlags::empty() {
|
||||
nresult += output.stdout.read(&mut resultbuf[nresult..])?;
|
||||
}
|
||||
if out_revents.contains(PollFlags::POLLHUP) {
|
||||
anyhow::bail!("WAL redo process closed its stdout unexpectedly");
|
||||
}
|
||||
}
|
||||
output
|
||||
.pending_responses
|
||||
.push_back(Some(Bytes::from(resultbuf)));
|
||||
}
|
||||
// Replace our request's response with None in `pending_responses`.
|
||||
// Then make space in the ring buffer by clearing out any seqence of contiguous
|
||||
// `None`'s from the front of `pending_responses`.
|
||||
// NB: We can't pop_front() because other requests' responses because another
|
||||
// requester might have grabbed the output mutex before us:
|
||||
// T1: grab input mutex
|
||||
// T1: send request_no 23
|
||||
// T1: release input mutex
|
||||
// T2: grab input mutex
|
||||
// T2: send request_no 24
|
||||
// T2: release input mutex
|
||||
// T2: grab output mutex
|
||||
// T2: n_processed_responses + output.pending_responses.len() <= request_no
|
||||
// 23 0 24
|
||||
// T2: enters poll loop that reads stdout
|
||||
// T2: put response for 23 into pending_responses
|
||||
// T2: put response for 24 into pending_resposnes
|
||||
// pending_responses now looks like this: Front Some(response_23) Some(response_24) Back
|
||||
// T2: takes its response_24
|
||||
// pending_responses now looks like this: Front Some(response_23) None Back
|
||||
// T2: does the while loop below
|
||||
// pending_responses now looks like this: Front Some(response_23) None Back
|
||||
// T2: releases output mutex
|
||||
// T1: grabs output mutex
|
||||
// T1: n_processed_responses + output.pending_responses.len() > request_no
|
||||
// 23 2 23
|
||||
// T1: skips poll loop that reads stdout
|
||||
// T1: takes its response_23
|
||||
// pending_responses now looks like this: Front None None Back
|
||||
// T2: does the while loop below
|
||||
// pending_responses now looks like this: Front Back
|
||||
// n_processed_responses now has value 25
|
||||
let res = output.pending_responses[request_no - n_processed_responses]
|
||||
.take()
|
||||
.expect("we own this request_no, nobody else is supposed to take it");
|
||||
while let Some(front) = output.pending_responses.front() {
|
||||
if front.is_none() {
|
||||
output.pending_responses.pop_front();
|
||||
output.n_processed_responses += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
fn record_and_log(&self, writebuf: &[u8]) {
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
let millis = std::time::SystemTime::now()
|
||||
.duration_since(std::time::SystemTime::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis();
|
||||
|
||||
let seq = self.dump_sequence.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
// these files will be collected to an allure report
|
||||
let filename = format!("walredo-{millis}-{}-{seq}.walredo", writebuf.len());
|
||||
|
||||
let path = self.conf.tenant_path(&self.tenant_shard_id).join(&filename);
|
||||
|
||||
let res = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.read(true)
|
||||
.open(path)
|
||||
.and_then(|mut f| f.write_all(writebuf));
|
||||
|
||||
// trip up allowed_errors
|
||||
if let Err(e) = res {
|
||||
tracing::error!(target=%filename, length=writebuf.len(), "failed to write out the walredo errored input: {e}");
|
||||
} else {
|
||||
tracing::error!(filename, "erroring walredo input saved");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "testing"))]
|
||||
fn record_and_log(&self, _: &[u8]) {}
|
||||
}
|
||||
|
||||
impl Drop for WalRedoProcess {
|
||||
fn drop(&mut self) {
|
||||
self.child
|
||||
.take()
|
||||
.expect("we only do this once")
|
||||
.kill_and_wait(WalRedoKillCause::WalRedoProcessDrop);
|
||||
// no way to wait for stderr_logger_task from Drop because that is async only
|
||||
}
|
||||
}
|
||||
@@ -49,7 +49,7 @@ char *neon_auth_token;
|
||||
int readahead_buffer_size = 128;
|
||||
int flush_every_n_requests = 8;
|
||||
|
||||
int neon_protocol_version = 2;
|
||||
int neon_protocol_version = 1;
|
||||
|
||||
static int n_reconnect_attempts = 0;
|
||||
static int max_reconnect_attempts = 60;
|
||||
@@ -860,7 +860,7 @@ pg_init_libpagestore(void)
|
||||
"Version of compute<->page server protocol",
|
||||
NULL,
|
||||
&neon_protocol_version,
|
||||
2, /* use protocol version 2 */
|
||||
1, /* default to old protocol for now */
|
||||
1, /* min */
|
||||
2, /* max */
|
||||
PGC_SU_BACKEND,
|
||||
|
||||
@@ -237,18 +237,50 @@ extern void neon_zeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum);
|
||||
|
||||
/*
|
||||
* LSN values associated with each request to the pageserver
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
/*
|
||||
* 'request_lsn' is the main value that determines which page version to
|
||||
* fetch.
|
||||
*/
|
||||
XLogRecPtr request_lsn;
|
||||
|
||||
/*
|
||||
* A hint to the pageserver that the requested page hasn't been modified
|
||||
* between this LSN and 'request_lsn'. That allows the pageserver to
|
||||
* return the page faster, without waiting for 'request_lsn' to arrive in
|
||||
* the pageserver, as long as 'not_modified_since' has arrived.
|
||||
*/
|
||||
XLogRecPtr not_modified_since;
|
||||
|
||||
/*
|
||||
* 'effective_request_lsn' is not included in the request that's sent to
|
||||
* the pageserver, but is used to keep track of the latest LSN of when the
|
||||
* request was made. In a standby server, this is always the same as the
|
||||
* 'request_lsn', but in the primary we use UINT64_MAX as the
|
||||
* 'request_lsn' to request the latest page version, so we need this
|
||||
* separate field to remember that latest LSN was when the request was
|
||||
* made. It's needed to manage prefetch request, to verify if the response
|
||||
* to a prefetched request is still valid.
|
||||
*/
|
||||
XLogRecPtr effective_request_lsn;
|
||||
} neon_request_lsns;
|
||||
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, XLogRecPtr not_modified_since, char *buffer);
|
||||
neon_request_lsns request_lsns, char *buffer);
|
||||
extern void neon_write(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
#else
|
||||
extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
void *buffer);
|
||||
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, XLogRecPtr not_modified_since, void *buffer);
|
||||
neon_request_lsns request_lsns, void *buffer);
|
||||
extern void neon_write(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, const void *buffer, bool skipFsync);
|
||||
#endif
|
||||
|
||||
@@ -168,8 +168,7 @@ typedef enum PrefetchStatus
|
||||
typedef struct PrefetchRequest
|
||||
{
|
||||
BufferTag buftag; /* must be first entry in the struct */
|
||||
XLogRecPtr request_lsn;
|
||||
XLogRecPtr not_modified_since;
|
||||
neon_request_lsns request_lsns;
|
||||
NeonResponse *response; /* may be null */
|
||||
PrefetchStatus status;
|
||||
shardno_t shard_no;
|
||||
@@ -271,16 +270,15 @@ static PrefetchState *MyPState;
|
||||
|
||||
static bool compact_prefetch_buffers(void);
|
||||
static void consume_prefetch_responses(void);
|
||||
static uint64 prefetch_register_buffer(BufferTag tag, XLogRecPtr *force_request_lsn, XLogRecPtr *force_not_modified_since);
|
||||
static uint64 prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns);
|
||||
static bool prefetch_read(PrefetchRequest *slot);
|
||||
static void prefetch_do_request(PrefetchRequest *slot, XLogRecPtr *force_request_lsn, XLogRecPtr *force_not_modified_since);
|
||||
static void prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns);
|
||||
static bool prefetch_wait_for(uint64 ring_index);
|
||||
static void prefetch_cleanup_trailing_unused(void);
|
||||
static inline void prefetch_set_unused(uint64 ring_index);
|
||||
|
||||
static void neon_get_request_lsn(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
XLogRecPtr *request_lsn, XLogRecPtr *not_modified_since);
|
||||
static bool neon_prefetch_response_usable(XLogRecPtr request_lsn, XLogRecPtr not_modified_since,
|
||||
static neon_request_lsns neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno);
|
||||
static bool neon_prefetch_response_usable(neon_request_lsns request_lsns,
|
||||
PrefetchRequest *slot);
|
||||
|
||||
static bool
|
||||
@@ -338,8 +336,7 @@ compact_prefetch_buffers(void)
|
||||
target_slot->shard_no = source_slot->shard_no;
|
||||
target_slot->status = source_slot->status;
|
||||
target_slot->response = source_slot->response;
|
||||
target_slot->request_lsn = source_slot->request_lsn;
|
||||
target_slot->not_modified_since = source_slot->not_modified_since;
|
||||
target_slot->request_lsns = source_slot->request_lsns;
|
||||
target_slot->my_ring_index = empty_ring_index;
|
||||
|
||||
prfh_delete(MyPState->prf_hash, source_slot);
|
||||
@@ -358,8 +355,9 @@ compact_prefetch_buffers(void)
|
||||
};
|
||||
source_slot->response = NULL;
|
||||
source_slot->my_ring_index = 0;
|
||||
source_slot->request_lsn = InvalidXLogRecPtr;
|
||||
source_slot->not_modified_since = InvalidXLogRecPtr;
|
||||
source_slot->request_lsns = (neon_request_lsns) {
|
||||
InvalidXLogRecPtr, InvalidXLogRecPtr, InvalidXLogRecPtr
|
||||
};
|
||||
|
||||
/* update bookkeeping */
|
||||
n_moved++;
|
||||
@@ -689,7 +687,7 @@ prefetch_set_unused(uint64 ring_index)
|
||||
* prefetch_wait_for().
|
||||
*/
|
||||
static void
|
||||
prefetch_do_request(PrefetchRequest *slot, XLogRecPtr *force_request_lsn, XLogRecPtr *force_not_modified_since)
|
||||
prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns)
|
||||
{
|
||||
bool found;
|
||||
NeonGetPageRequest request = {
|
||||
@@ -700,23 +698,14 @@ prefetch_do_request(PrefetchRequest *slot, XLogRecPtr *force_request_lsn, XLogRe
|
||||
.blkno = slot->buftag.blockNum,
|
||||
};
|
||||
|
||||
Assert(((force_request_lsn != NULL) == (force_not_modified_since != NULL)));
|
||||
|
||||
if (force_request_lsn)
|
||||
{
|
||||
request.req.lsn = *force_request_lsn;
|
||||
request.req.not_modified_since = *force_not_modified_since;
|
||||
}
|
||||
if (force_request_lsns)
|
||||
slot->request_lsns = *force_request_lsns;
|
||||
else
|
||||
{
|
||||
neon_get_request_lsn(BufTagGetNRelFileInfo(slot->buftag),
|
||||
slot->buftag.forkNum,
|
||||
slot->buftag.blockNum,
|
||||
&request.req.lsn,
|
||||
&request.req.not_modified_since);
|
||||
}
|
||||
slot->request_lsn = request.req.lsn;
|
||||
slot->not_modified_since = request.req.not_modified_since;
|
||||
slot->request_lsns = neon_get_request_lsns(BufTagGetNRelFileInfo(slot->buftag),
|
||||
slot->buftag.forkNum,
|
||||
slot->buftag.blockNum);
|
||||
request.req.lsn = slot->request_lsns.request_lsn;
|
||||
request.req.not_modified_since = slot->request_lsns.not_modified_since;
|
||||
|
||||
Assert(slot->response == NULL);
|
||||
Assert(slot->my_ring_index == MyPState->ring_unused);
|
||||
@@ -742,25 +731,22 @@ prefetch_do_request(PrefetchRequest *slot, XLogRecPtr *force_request_lsn, XLogRe
|
||||
*
|
||||
* Register that we may want the contents of BufferTag in the near future.
|
||||
*
|
||||
* If force_request_lsn and force_not_modified_since are not NULL, those
|
||||
* values are sent to the pageserver. If they are NULL, we utilize the
|
||||
* lastWrittenLsn -infrastructure to fill them in.
|
||||
* If force_request_lsns is not NULL, those values are sent to the
|
||||
* pageserver. If NULL, we utilize the lastWrittenLsn -infrastructure
|
||||
* to calculate the LSNs to send.
|
||||
*
|
||||
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
||||
* invalidates any active pointers into the hash table.
|
||||
*/
|
||||
|
||||
static uint64
|
||||
prefetch_register_buffer(BufferTag tag, XLogRecPtr *force_request_lsn,
|
||||
XLogRecPtr *force_not_modified_since)
|
||||
prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns)
|
||||
{
|
||||
uint64 ring_index;
|
||||
PrefetchRequest req;
|
||||
PrefetchRequest *slot;
|
||||
PrfHashEntry *entry;
|
||||
|
||||
Assert(((force_request_lsn != NULL) == (force_not_modified_since != NULL)));
|
||||
|
||||
/* use an intermediate PrefetchRequest struct to ensure correct alignment */
|
||||
req.buftag = tag;
|
||||
Retry:
|
||||
@@ -781,10 +767,9 @@ Retry:
|
||||
* If the caller specified a request LSN to use, only accept prefetch
|
||||
* responses that satisfy that request.
|
||||
*/
|
||||
if (force_request_lsn)
|
||||
if (force_request_lsns)
|
||||
{
|
||||
if (!neon_prefetch_response_usable(*force_request_lsn,
|
||||
*force_not_modified_since, slot))
|
||||
if (!neon_prefetch_response_usable(*force_request_lsns, slot))
|
||||
{
|
||||
/* Wait for the old request to finish and discard it */
|
||||
if (!prefetch_wait_for(ring_index))
|
||||
@@ -886,7 +871,7 @@ Retry:
|
||||
slot->shard_no = get_shard_number(&tag);
|
||||
slot->my_ring_index = ring_index;
|
||||
|
||||
prefetch_do_request(slot, force_request_lsn, force_not_modified_since);
|
||||
prefetch_do_request(slot, force_request_lsns);
|
||||
Assert(slot->status == PRFS_REQUESTED);
|
||||
Assert(MyPState->ring_last <= ring_index &&
|
||||
ring_index < MyPState->ring_unused);
|
||||
@@ -1529,11 +1514,11 @@ nm_adjust_lsn(XLogRecPtr lsn)
|
||||
/*
|
||||
* Return LSN for requesting pages and number of blocks from page server
|
||||
*/
|
||||
static void
|
||||
neon_get_request_lsn(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
XLogRecPtr *request_lsn, XLogRecPtr *not_modified_since)
|
||||
static neon_request_lsns
|
||||
neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno)
|
||||
{
|
||||
XLogRecPtr last_written_lsn;
|
||||
neon_request_lsns result;
|
||||
|
||||
last_written_lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
|
||||
last_written_lsn = nm_adjust_lsn(last_written_lsn);
|
||||
@@ -1542,12 +1527,13 @@ neon_get_request_lsn(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
if (RecoveryInProgress())
|
||||
{
|
||||
/* Request the page at the last replayed LSN. */
|
||||
*request_lsn = GetXLogReplayRecPtr(NULL);
|
||||
*not_modified_since = last_written_lsn;
|
||||
Assert(last_written_lsn <= *request_lsn);
|
||||
result.request_lsn = GetXLogReplayRecPtr(NULL);
|
||||
result.not_modified_since = last_written_lsn;
|
||||
result.effective_request_lsn = result.request_lsn;
|
||||
Assert(last_written_lsn <= result.request_lsn);
|
||||
|
||||
neon_log(DEBUG1, "neon_get_request_lsn request lsn %X/%X, not_modified_since %X/%X",
|
||||
LSN_FORMAT_ARGS(*request_lsn), LSN_FORMAT_ARGS(*not_modified_since));
|
||||
neon_log(DEBUG1, "neon_get_request_lsns request lsn %X/%X, not_modified_since %X/%X",
|
||||
LSN_FORMAT_ARGS(result.request_lsn), LSN_FORMAT_ARGS(result.not_modified_since));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1559,7 +1545,7 @@ neon_get_request_lsn(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
* must still in the buffer cache, so our request cannot concern
|
||||
* those.
|
||||
*/
|
||||
neon_log(DEBUG1, "neon_get_request_lsn GetLastWrittenLSN lsn %X/%X ",
|
||||
neon_log(DEBUG1, "neon_get_request_lsns GetLastWrittenLSN lsn %X/%X",
|
||||
LSN_FORMAT_ARGS(last_written_lsn));
|
||||
|
||||
/*
|
||||
@@ -1585,16 +1571,33 @@ neon_get_request_lsn(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
}
|
||||
|
||||
/*
|
||||
* Request the latest version of the page. The most up-to-date request
|
||||
* LSN we could use would be the current insert LSN, but to avoid the
|
||||
* overhead of looking it up, use 'flushlsn' instead. This relies on
|
||||
* the assumption that if the page was modified since the last WAL
|
||||
* flush, it should still be in the buffer cache, and we wouldn't be
|
||||
* requesting it.
|
||||
* Request the very latest version of the page. In principle we
|
||||
* want to read the page at the current insert LSN, and we could
|
||||
* use that value in the request. However, there's a corner case
|
||||
* with pageserver's garbage collection. If the GC horizon is
|
||||
* set to a very small value, it's possible that by the time
|
||||
* that the pageserver processes our request, the GC horizon has
|
||||
* already moved past the LSN we calculate here. Standby servers
|
||||
* always have that problem as the can always lag behind the
|
||||
* primary, but for the primary we can avoid it by always
|
||||
* requesting the latest page, by setting request LSN to
|
||||
* UINT64_MAX.
|
||||
*
|
||||
* Remember the current LSN, however, so that we can later
|
||||
* correctly determine if the response to the request is still
|
||||
* valid. The most up-to-date LSN we could use for that purpose
|
||||
* would be the current insert LSN, but to avoid the overhead of
|
||||
* looking it up, use 'flushlsn' instead. This relies on the
|
||||
* assumption that if the page was modified since the last WAL
|
||||
* flush, it should still be in the buffer cache, and we
|
||||
* wouldn't be requesting it.
|
||||
*/
|
||||
*request_lsn = flushlsn;
|
||||
*not_modified_since = last_written_lsn;
|
||||
result.request_lsn = UINT64_MAX;
|
||||
result.not_modified_since = last_written_lsn;
|
||||
result.effective_request_lsn = flushlsn;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1604,12 +1607,16 @@ neon_get_request_lsn(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
* satisfy a page read now.
|
||||
*/
|
||||
static bool
|
||||
neon_prefetch_response_usable(XLogRecPtr request_lsn, XLogRecPtr not_modified_since,
|
||||
neon_prefetch_response_usable(neon_request_lsns request_lsns,
|
||||
PrefetchRequest *slot)
|
||||
{
|
||||
/* sanity check the LSN's on the old and the new request */
|
||||
Assert(request_lsn >= not_modified_since);
|
||||
Assert(slot->request_lsn >= slot->not_modified_since);
|
||||
Assert(request_lsns.request_lsn >= request_lsns.not_modified_since);
|
||||
Assert(request_lsns.effective_request_lsn >= request_lsns.not_modified_since);
|
||||
Assert(request_lsns.effective_request_lsn <= request_lsns.request_lsn);
|
||||
Assert(slot->request_lsns.request_lsn >= slot->request_lsns.not_modified_since);
|
||||
Assert(slot->request_lsns.effective_request_lsn >= slot->request_lsns.not_modified_since);
|
||||
Assert(slot->request_lsns.effective_request_lsn <= slot->request_lsns.request_lsn);
|
||||
Assert(slot->status != PRFS_UNUSED);
|
||||
|
||||
/*
|
||||
@@ -1627,26 +1634,40 @@ neon_prefetch_response_usable(XLogRecPtr request_lsn, XLogRecPtr not_modified_si
|
||||
* calculate LSNs "out of order" with each other, but the prefetch queue
|
||||
* is backend-private at the moment.)
|
||||
*/
|
||||
if (request_lsn < slot->request_lsn || not_modified_since < slot->not_modified_since)
|
||||
if (request_lsns.effective_request_lsn < slot->request_lsns.effective_request_lsn ||
|
||||
request_lsns.not_modified_since < slot->request_lsns.not_modified_since)
|
||||
{
|
||||
ereport(LOG,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "request with unexpected LSN after prefetch"),
|
||||
errdetail("Request %X/%X not_modified_since %X/%X, prefetch %X/%X not_modified_since %X/%X)",
|
||||
LSN_FORMAT_ARGS(request_lsn), LSN_FORMAT_ARGS(not_modified_since),
|
||||
LSN_FORMAT_ARGS(slot->request_lsn), LSN_FORMAT_ARGS(slot->not_modified_since))));
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn),
|
||||
LSN_FORMAT_ARGS(request_lsns.not_modified_since),
|
||||
LSN_FORMAT_ARGS(slot->request_lsns.effective_request_lsn),
|
||||
LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since))));
|
||||
return false;
|
||||
}
|
||||
|
||||
/*---
|
||||
* Each request to the pageserver carries two LSN values:
|
||||
* `not_modified_since` and `request_lsn`. The (not_modified_since,
|
||||
* request_lsn] range of each request is effectively a claim that the page
|
||||
* has not been modified between those LSNs. If the range of the old
|
||||
* request in the queue overlaps with the new request, we know that the
|
||||
* page hasn't been modified in the union of the ranges. We can use the
|
||||
* response to old request to satisfy the new request in that case. For
|
||||
* example:
|
||||
* Each request to the pageserver has three LSN values associated with it:
|
||||
* `not_modified_since`, `request_lsn`, and 'effective_request_lsn'.
|
||||
* `not_modified_since` and `request_lsn` are sent to the pageserver, but
|
||||
* in the primary node, we always use UINT64_MAX as the `request_lsn`, so
|
||||
* we remember `effective_request_lsn` separately. In a primary,
|
||||
* `effective_request_lsn` is the last flush WAL position when the request
|
||||
* was sent to the pageserver. That's logically the LSN that we are
|
||||
* requesting the page at, but we send UINT64_MAX to the pageserver so
|
||||
* that if the GC horizon advances past that position, we still get a
|
||||
* valid response instead of an error.
|
||||
*
|
||||
* To determine whether a response to a GetPage request issued earlier is
|
||||
* still valid to satisfy a new page read, we look at the
|
||||
* (not_modified_since, effective_request_lsn] range of the request. It is
|
||||
* effectively a claim that the page has not been modified between those
|
||||
* LSNs. If the range of the old request in the queue overlaps with the
|
||||
* new request, we know that the page hasn't been modified in the union of
|
||||
* the ranges. We can use the response to old request to satisfy the new
|
||||
* request in that case. For example:
|
||||
*
|
||||
* 100 500
|
||||
* Old request: +--------+
|
||||
@@ -1675,9 +1696,9 @@ neon_prefetch_response_usable(XLogRecPtr request_lsn, XLogRecPtr not_modified_si
|
||||
*/
|
||||
|
||||
/* this follows from the checks above */
|
||||
Assert(request_lsn >= slot->not_modified_since);
|
||||
Assert(request_lsns.effective_request_lsn >= slot->request_lsns.not_modified_since);
|
||||
|
||||
return not_modified_since <= slot->request_lsn;
|
||||
return request_lsns.not_modified_since <= slot->request_lsns.effective_request_lsn;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1689,8 +1710,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
|
||||
bool exists;
|
||||
NeonResponse *resp;
|
||||
BlockNumber n_blocks;
|
||||
XLogRecPtr request_lsn;
|
||||
XLogRecPtr not_modified_since;
|
||||
neon_request_lsns request_lsns;
|
||||
|
||||
switch (reln->smgr_relpersistence)
|
||||
{
|
||||
@@ -1745,15 +1765,15 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
|
||||
return false;
|
||||
}
|
||||
|
||||
neon_get_request_lsn(InfoFromSMgrRel(reln), forkNum, REL_METADATA_PSEUDO_BLOCKNO,
|
||||
&request_lsn, ¬_modified_since);
|
||||
request_lsns = neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, REL_METADATA_PSEUDO_BLOCKNO);
|
||||
{
|
||||
NeonExistsRequest request = {
|
||||
.req.tag = T_NeonExistsRequest,
|
||||
.req.lsn = request_lsn,
|
||||
.req.not_modified_since = not_modified_since,
|
||||
.req.lsn = request_lsns.request_lsn,
|
||||
.req.not_modified_since = request_lsns.not_modified_since,
|
||||
.rinfo = InfoFromSMgrRel(reln),
|
||||
.forknum = forkNum};
|
||||
.forknum = forkNum
|
||||
};
|
||||
|
||||
resp = page_server_request(&request);
|
||||
}
|
||||
@@ -1770,7 +1790,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
|
||||
errmsg(NEON_TAG "could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn),
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
@@ -2135,7 +2155,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
|
||||
CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));
|
||||
|
||||
ring_index = prefetch_register_buffer(tag, NULL, NULL);
|
||||
ring_index = prefetch_register_buffer(tag, NULL);
|
||||
|
||||
Assert(ring_index < MyPState->ring_unused &&
|
||||
MyPState->ring_last <= ring_index);
|
||||
@@ -2188,10 +2208,10 @@ neon_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
void
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, XLogRecPtr not_modified_since, char *buffer)
|
||||
neon_request_lsns request_lsns, char *buffer)
|
||||
#else
|
||||
neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, XLogRecPtr not_modified_since, void *buffer)
|
||||
neon_request_lsns request_lsns, void *buffer)
|
||||
#endif
|
||||
{
|
||||
NeonResponse *resp;
|
||||
@@ -2223,7 +2243,7 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
* value of the LwLsn cache when the entry is not found.
|
||||
*/
|
||||
if (RecoveryInProgress() && !(MyBackendType == B_STARTUP))
|
||||
XLogWaitForReplayOf(request_lsn);
|
||||
XLogWaitForReplayOf(request_lsns.request_lsn);
|
||||
|
||||
/*
|
||||
* Try to find prefetched page in the list of received pages.
|
||||
@@ -2234,7 +2254,7 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
if (entry != NULL)
|
||||
{
|
||||
slot = entry->slot;
|
||||
if (neon_prefetch_response_usable(request_lsn, not_modified_since, slot))
|
||||
if (neon_prefetch_response_usable(request_lsns, slot))
|
||||
{
|
||||
ring_index = slot->my_ring_index;
|
||||
pgBufferUsage.prefetch.hits += 1;
|
||||
@@ -2268,8 +2288,7 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
{
|
||||
pgBufferUsage.prefetch.misses += 1;
|
||||
|
||||
ring_index = prefetch_register_buffer(buftag, &request_lsn,
|
||||
¬_modified_since);
|
||||
ring_index = prefetch_register_buffer(buftag, &request_lsns);
|
||||
slot = GetPrfSlot(ring_index);
|
||||
}
|
||||
else
|
||||
@@ -2310,7 +2329,7 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
slot->shard_no, blkno,
|
||||
RelFileInfoFmt(rinfo),
|
||||
forkNum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn),
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
@@ -2333,8 +2352,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, char *buffer
|
||||
neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer)
|
||||
#endif
|
||||
{
|
||||
XLogRecPtr request_lsn;
|
||||
XLogRecPtr not_modified_since;
|
||||
neon_request_lsns request_lsns;
|
||||
|
||||
switch (reln->smgr_relpersistence)
|
||||
{
|
||||
@@ -2359,9 +2377,8 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
|
||||
return;
|
||||
}
|
||||
|
||||
neon_get_request_lsn(InfoFromSMgrRel(reln), forkNum, blkno,
|
||||
&request_lsn, ¬_modified_since);
|
||||
neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsn, not_modified_since, buffer);
|
||||
request_lsns = neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno);
|
||||
neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer);
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln))
|
||||
@@ -2530,8 +2547,7 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
NeonResponse *resp;
|
||||
BlockNumber n_blocks;
|
||||
XLogRecPtr request_lsn;
|
||||
XLogRecPtr not_modified_since;
|
||||
neon_request_lsns request_lsns;
|
||||
|
||||
switch (reln->smgr_relpersistence)
|
||||
{
|
||||
@@ -2558,13 +2574,12 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
return n_blocks;
|
||||
}
|
||||
|
||||
neon_get_request_lsn(InfoFromSMgrRel(reln), forknum, REL_METADATA_PSEUDO_BLOCKNO,
|
||||
&request_lsn, ¬_modified_since);
|
||||
request_lsns = neon_get_request_lsns(InfoFromSMgrRel(reln), forknum, REL_METADATA_PSEUDO_BLOCKNO);
|
||||
{
|
||||
NeonNblocksRequest request = {
|
||||
.req.tag = T_NeonNblocksRequest,
|
||||
.req.lsn = request_lsn,
|
||||
.req.not_modified_since = not_modified_since,
|
||||
.req.lsn = request_lsns.request_lsn,
|
||||
.req.not_modified_since = request_lsns.not_modified_since,
|
||||
.rinfo = InfoFromSMgrRel(reln),
|
||||
.forknum = forknum,
|
||||
};
|
||||
@@ -2584,7 +2599,7 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
errmsg(NEON_TAG "could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn),
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
@@ -2595,10 +2610,10 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
|
||||
|
||||
neon_log(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn,
|
||||
n_blocks);
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn),
|
||||
n_blocks);
|
||||
|
||||
pfree(resp);
|
||||
return n_blocks;
|
||||
@@ -2612,17 +2627,15 @@ neon_dbsize(Oid dbNode)
|
||||
{
|
||||
NeonResponse *resp;
|
||||
int64 db_size;
|
||||
XLogRecPtr request_lsn,
|
||||
not_modified_since;
|
||||
neon_request_lsns request_lsns;
|
||||
NRelFileInfo dummy_node = {0};
|
||||
|
||||
neon_get_request_lsn(dummy_node, MAIN_FORKNUM, REL_METADATA_PSEUDO_BLOCKNO,
|
||||
&request_lsn, ¬_modified_since);
|
||||
request_lsns = neon_get_request_lsns(dummy_node, MAIN_FORKNUM, REL_METADATA_PSEUDO_BLOCKNO);
|
||||
{
|
||||
NeonDbSizeRequest request = {
|
||||
.req.tag = T_NeonDbSizeRequest,
|
||||
.req.lsn = request_lsn,
|
||||
.req.not_modified_since = not_modified_since,
|
||||
.req.lsn = request_lsns.request_lsn,
|
||||
.req.not_modified_since = request_lsns.not_modified_since,
|
||||
.dbNode = dbNode,
|
||||
};
|
||||
|
||||
@@ -2639,8 +2652,7 @@ neon_dbsize(Oid dbNode)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "could not read db size of db %u from page server at lsn %X/%08X",
|
||||
dbNode,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn),
|
||||
dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
@@ -2650,9 +2662,7 @@ neon_dbsize(Oid dbNode)
|
||||
}
|
||||
|
||||
neon_log(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
|
||||
dbNode,
|
||||
(uint32) (request_lsn >> 32), (uint32) request_lsn,
|
||||
db_size);
|
||||
dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn), db_size);
|
||||
|
||||
pfree(resp);
|
||||
return db_size;
|
||||
@@ -2897,6 +2907,10 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
|
||||
XLogRecPtr request_lsn,
|
||||
not_modified_since;
|
||||
|
||||
/*
|
||||
* Compute a request LSN to use, similar to neon_get_request_lsns() but the
|
||||
* logic is a bit simpler.
|
||||
*/
|
||||
if (RecoveryInProgress())
|
||||
{
|
||||
request_lsn = GetXLogReplayRecPtr(NULL);
|
||||
@@ -2908,10 +2922,10 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
|
||||
*/
|
||||
request_lsn = GetRedoStartLsn();
|
||||
}
|
||||
request_lsn = nm_adjust_lsn(request_lsn);
|
||||
}
|
||||
else
|
||||
request_lsn = GetXLogInsertRecPtr();
|
||||
request_lsn = nm_adjust_lsn(request_lsn);
|
||||
request_lsn = UINT64_MAX;
|
||||
|
||||
/*
|
||||
* GetRedoStartLsn() returns LSN of basebackup. We know that the SLRU
|
||||
|
||||
@@ -48,10 +48,10 @@ PG_FUNCTION_INFO_V1(neon_xlogflush);
|
||||
*/
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
typedef void (*neon_read_at_lsn_type) (NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, XLogRecPtr not_modified_since, char *buffer);
|
||||
neon_request_lsns request_lsns, char *buffer);
|
||||
#else
|
||||
typedef void (*neon_read_at_lsn_type) (NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
XLogRecPtr request_lsn, XLogRecPtr not_modified_since, void *buffer);
|
||||
neon_request_lsns request_lsns, void *buffer);
|
||||
#endif
|
||||
|
||||
static neon_read_at_lsn_type neon_read_at_lsn_ptr;
|
||||
@@ -298,9 +298,7 @@ get_raw_page_at_lsn(PG_FUNCTION_ARGS)
|
||||
text *relname;
|
||||
text *forkname;
|
||||
uint32 blkno;
|
||||
|
||||
XLogRecPtr request_lsn;
|
||||
XLogRecPtr not_modified_since;
|
||||
neon_request_lsns request_lsns;
|
||||
|
||||
if (PG_NARGS() != 5)
|
||||
elog(ERROR, "unexpected number of arguments in SQL function signature");
|
||||
@@ -312,8 +310,15 @@ get_raw_page_at_lsn(PG_FUNCTION_ARGS)
|
||||
forkname = PG_GETARG_TEXT_PP(1);
|
||||
blkno = PG_GETARG_UINT32(2);
|
||||
|
||||
request_lsn = PG_ARGISNULL(3) ? GetXLogInsertRecPtr() : PG_GETARG_LSN(3);
|
||||
not_modified_since = PG_ARGISNULL(4) ? request_lsn : PG_GETARG_LSN(4);
|
||||
request_lsns.request_lsn = PG_ARGISNULL(3) ? GetXLogInsertRecPtr() : PG_GETARG_LSN(3);
|
||||
request_lsns.not_modified_since = PG_ARGISNULL(4) ? request_lsns.request_lsn : PG_GETARG_LSN(4);
|
||||
/*
|
||||
* For the time being, use the same LSN for request and
|
||||
* effective request LSN. If any test needed to use UINT64_MAX
|
||||
* as the request LSN, we'd need to add effective_request_lsn
|
||||
* as a new argument.
|
||||
*/
|
||||
request_lsns.effective_request_lsn = request_lsns.request_lsn;
|
||||
|
||||
if (!superuser())
|
||||
ereport(ERROR,
|
||||
@@ -367,7 +372,8 @@ get_raw_page_at_lsn(PG_FUNCTION_ARGS)
|
||||
SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ);
|
||||
raw_page_data = VARDATA(raw_page);
|
||||
|
||||
neon_read_at_lsn(InfoFromRelation(rel), forknum, blkno, request_lsn, not_modified_since, raw_page_data);
|
||||
neon_read_at_lsn(InfoFromRelation(rel), forknum, blkno, request_lsns,
|
||||
raw_page_data);
|
||||
|
||||
relation_close(rel, AccessShareLock);
|
||||
|
||||
@@ -413,19 +419,25 @@ get_raw_page_at_lsn_ex(PG_FUNCTION_ARGS)
|
||||
|
||||
ForkNumber forknum = PG_GETARG_UINT32(3);
|
||||
uint32 blkno = PG_GETARG_UINT32(4);
|
||||
XLogRecPtr request_lsn;
|
||||
XLogRecPtr not_modified_since;
|
||||
neon_request_lsns request_lsns;
|
||||
|
||||
/* Initialize buffer to copy to */
|
||||
bytea *raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ);
|
||||
|
||||
request_lsn = PG_ARGISNULL(5) ? GetXLogInsertRecPtr() : PG_GETARG_LSN(5);
|
||||
not_modified_since = PG_ARGISNULL(6) ? request_lsn : PG_GETARG_LSN(6);
|
||||
request_lsns.request_lsn = PG_ARGISNULL(5) ? GetXLogInsertRecPtr() : PG_GETARG_LSN(5);
|
||||
request_lsns.not_modified_since = PG_ARGISNULL(6) ? request_lsns.request_lsn : PG_GETARG_LSN(6);
|
||||
/*
|
||||
* For the time being, use the same LSN for request
|
||||
* and effective request LSN. If any test needed to
|
||||
* use UINT64_MAX as the request LSN, we'd need to add
|
||||
* effective_request_lsn as a new argument.
|
||||
*/
|
||||
request_lsns.effective_request_lsn = request_lsns.request_lsn;
|
||||
|
||||
SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ);
|
||||
raw_page_data = VARDATA(raw_page);
|
||||
|
||||
neon_read_at_lsn(rinfo, forknum, blkno, request_lsn, not_modified_since, raw_page_data);
|
||||
neon_read_at_lsn(rinfo, forknum, blkno, request_lsns, raw_page_data);
|
||||
PG_RETURN_BYTEA_P(raw_page);
|
||||
}
|
||||
}
|
||||
|
||||
585
poetry.lock
generated
585
poetry.lock
generated
@@ -158,6 +158,28 @@ files = [
|
||||
attrs = ">=16.0.0"
|
||||
pluggy = ">=0.4.0"
|
||||
|
||||
[[package]]
|
||||
name = "annotated-types"
|
||||
version = "0.6.0"
|
||||
description = "Reusable constraint types to use with typing.Annotated"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
|
||||
{file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "antlr4-python3-runtime"
|
||||
version = "4.13.1"
|
||||
description = "ANTLR 4.13.1 runtime for Python 3"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "antlr4-python3-runtime-4.13.1.tar.gz", hash = "sha256:3cd282f5ea7cfb841537fe01f143350fdb1c0b1ce7981443a2fa8513fddb6d1a"},
|
||||
{file = "antlr4_python3_runtime-4.13.1-py3-none-any.whl", hash = "sha256:78ec57aad12c97ac039ca27403ad61cb98aaec8a3f9bb8144f889aa0fa28b943"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyio"
|
||||
version = "4.3.0"
|
||||
@@ -267,22 +289,23 @@ tests-no-zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy"
|
||||
|
||||
[[package]]
|
||||
name = "aws-sam-translator"
|
||||
version = "1.48.0"
|
||||
version = "1.88.0"
|
||||
description = "AWS SAM Translator is a library that transform SAM templates into AWS CloudFormation templates"
|
||||
optional = false
|
||||
python-versions = ">=3.7, <=4.0, !=4.0"
|
||||
python-versions = "!=4.0,<=4.0,>=3.8"
|
||||
files = [
|
||||
{file = "aws-sam-translator-1.48.0.tar.gz", hash = "sha256:7171037323dfa30f8f73e9bccb9210e4c384a585e087219a9518a5204f0a2c44"},
|
||||
{file = "aws_sam_translator-1.48.0-py2-none-any.whl", hash = "sha256:be18dfa3dfe7ab291d281667c5f73ac62dbe6bfe86df7d122e4258b906b736f0"},
|
||||
{file = "aws_sam_translator-1.48.0-py3-none-any.whl", hash = "sha256:ca4f8f9910d7713aeaba59346775bfb3198f6acb47c6704572f9bd3fc0fb5bf0"},
|
||||
{file = "aws_sam_translator-1.88.0-py3-none-any.whl", hash = "sha256:aa93d498d8de3fb3d485c316155b1628144b823bbc176099a20de06df666fcac"},
|
||||
{file = "aws_sam_translator-1.88.0.tar.gz", hash = "sha256:e77c65f3488566122277accd44a0f1ec018e37403e0d5fe25120d96e537e91a7"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
boto3 = ">=1.19.5,<2.dev0"
|
||||
jsonschema = ">=3.2,<4.0"
|
||||
jsonschema = ">=3.2,<5"
|
||||
pydantic = ">=1.8,<3"
|
||||
typing-extensions = ">=4.4"
|
||||
|
||||
[package.extras]
|
||||
dev = ["black (==20.8b1)", "boto3 (>=1.23,<2)", "click (>=7.1,<8.0)", "coverage (>=5.3,<6.0)", "dateparser (>=0.7,<1.0)", "docopt (>=0.6.2,<0.7.0)", "flake8 (>=3.8.4,<3.9.0)", "parameterized (>=0.7.4,<0.8.0)", "pylint (>=2.9.0,<2.10.0)", "pytest (>=6.2.5,<6.3.0)", "pytest-cov (>=2.10.1,<2.11.0)", "pytest-env (>=0.6.2,<0.7.0)", "pytest-xdist (>=2.5,<3.0)", "pyyaml (>=5.4,<6.0)", "requests (>=2.24.0,<2.25.0)", "tenacity (>=7.0.0,<7.1.0)", "tox (>=3.24,<4.0)"]
|
||||
dev = ["black (==24.3.0)", "boto3 (>=1.23,<2)", "boto3-stubs[appconfig,serverlessrepo] (>=1.19.5,<2.dev0)", "coverage (>=5.3,<8)", "dateparser (>=1.1,<2.0)", "mypy (>=1.3.0,<1.4.0)", "parameterized (>=0.7,<1.0)", "pytest (>=6.2,<8)", "pytest-cov (>=2.10,<5)", "pytest-env (>=0.6,<1)", "pytest-rerunfailures (>=9.1,<12)", "pytest-xdist (>=2.5,<4)", "pyyaml (>=6.0,<7.0)", "requests (>=2.28,<3.0)", "ruamel.yaml (==0.17.21)", "ruff (>=0.1.0,<0.2.0)", "tenacity (>=8.0,<9.0)", "types-PyYAML (>=6.0,<7.0)", "types-jsonschema (>=3.2,<4.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "aws-xray-sdk"
|
||||
@@ -798,24 +821,26 @@ pycparser = "*"
|
||||
|
||||
[[package]]
|
||||
name = "cfn-lint"
|
||||
version = "0.61.3"
|
||||
version = "0.87.1"
|
||||
description = "Checks CloudFormation templates for practices and behaviour that could potentially be improved"
|
||||
optional = false
|
||||
python-versions = ">=3.6, <=4.0, !=4.0"
|
||||
python-versions = "!=4.0,<=4.0,>=3.8"
|
||||
files = [
|
||||
{file = "cfn-lint-0.61.3.tar.gz", hash = "sha256:3806e010d77901f5e935496df690c10e39676434a738fce1a1161cf9c7bd36a2"},
|
||||
{file = "cfn_lint-0.61.3-py3-none-any.whl", hash = "sha256:8e9522fad0c7c98b31ecbdd4724f8d8a5787457cc0f71e62ae0d11104d6e52ab"},
|
||||
{file = "cfn_lint-0.87.1-py3-none-any.whl", hash = "sha256:d450f450635fc223b6f66880ccac52a5fd1a52966fa1705f1ba52b88dfed3071"},
|
||||
{file = "cfn_lint-0.87.1.tar.gz", hash = "sha256:b3ce9d3e5e0eadcea5d584c8ccaa00bf2a990a36a64d7ffd8683bc60b7e4f06f"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aws-sam-translator = ">=1.47.0"
|
||||
aws-sam-translator = ">=1.87.0"
|
||||
jschema-to-python = ">=1.2.3,<1.3.0"
|
||||
jsonpatch = "*"
|
||||
jsonschema = ">=3.0,<4.0"
|
||||
jsonschema = ">=3.0,<5"
|
||||
junit-xml = ">=1.9,<2.0"
|
||||
networkx = ">=2.4,<3.0"
|
||||
networkx = ">=2.4,<4"
|
||||
pyyaml = ">5.4"
|
||||
regex = ">=2021.7.1"
|
||||
sarif-om = ">=1.0.4,<1.1.0"
|
||||
sympy = ">=1.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "charset-normalizer"
|
||||
@@ -931,24 +956,6 @@ websocket-client = ">=0.32.0"
|
||||
ssh = ["paramiko (>=2.4.2)"]
|
||||
tls = ["cryptography (>=1.3.4)", "idna (>=2.0.0)", "pyOpenSSL (>=17.5.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "ecdsa"
|
||||
version = "0.18.0"
|
||||
description = "ECDSA cryptographic signature library (pure python)"
|
||||
optional = false
|
||||
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||
files = [
|
||||
{file = "ecdsa-0.18.0-py2.py3-none-any.whl", hash = "sha256:80600258e7ed2f16b9aa1d7c295bd70194109ad5a30fdee0eaeefef1d4c559dd"},
|
||||
{file = "ecdsa-0.18.0.tar.gz", hash = "sha256:190348041559e21b22a1d65cee485282ca11a6f81d503fddb84d5017e9ed1e49"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
six = ">=1.9.0"
|
||||
|
||||
[package.extras]
|
||||
gmpy = ["gmpy"]
|
||||
gmpy2 = ["gmpy2"]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.1.1"
|
||||
@@ -1268,6 +1275,23 @@ files = [
|
||||
{file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "joserfc"
|
||||
version = "0.9.0"
|
||||
description = "The ultimate Python library for JOSE RFCs, including JWS, JWE, JWK, JWA, JWT"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "joserfc-0.9.0-py3-none-any.whl", hash = "sha256:4026bdbe2c196cd40574e916fa1e28874d99649412edaab0e373dec3077153fb"},
|
||||
{file = "joserfc-0.9.0.tar.gz", hash = "sha256:eebca7f587b1761ce43a98ffd5327f2b600b9aa5bb0a77b947687f503ad43bc0"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
cryptography = "*"
|
||||
|
||||
[package.extras]
|
||||
drafts = ["pycryptodome"]
|
||||
|
||||
[[package]]
|
||||
name = "jschema-to-python"
|
||||
version = "1.2.3"
|
||||
@@ -1309,6 +1333,20 @@ files = [
|
||||
[package.dependencies]
|
||||
jsonpointer = ">=1.9"
|
||||
|
||||
[[package]]
|
||||
name = "jsonpath-ng"
|
||||
version = "1.6.1"
|
||||
description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "jsonpath-ng-1.6.1.tar.gz", hash = "sha256:086c37ba4917304850bd837aeab806670224d3f038fe2833ff593a672ef0a5fa"},
|
||||
{file = "jsonpath_ng-1.6.1-py3-none-any.whl", hash = "sha256:8f22cd8273d7772eea9aaa84d922e0841aa36fdb8a2c6b7f6c3791a16a9bc0be"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
ply = "*"
|
||||
|
||||
[[package]]
|
||||
name = "jsonpickle"
|
||||
version = "2.2.0"
|
||||
@@ -1338,24 +1376,39 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "jsonschema"
|
||||
version = "3.2.0"
|
||||
version = "4.17.3"
|
||||
description = "An implementation of JSON Schema validation for Python"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "jsonschema-3.2.0-py2.py3-none-any.whl", hash = "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163"},
|
||||
{file = "jsonschema-3.2.0.tar.gz", hash = "sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a"},
|
||||
{file = "jsonschema-4.17.3-py3-none-any.whl", hash = "sha256:a870ad254da1a8ca84b6a2905cac29d265f805acc57af304784962a2aa6508f6"},
|
||||
{file = "jsonschema-4.17.3.tar.gz", hash = "sha256:0f864437ab8b6076ba6707453ef8f98a6a0d512a80e93f8abdb676f737ecb60d"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
attrs = ">=17.4.0"
|
||||
pyrsistent = ">=0.14.0"
|
||||
setuptools = "*"
|
||||
six = ">=1.11.0"
|
||||
pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2"
|
||||
|
||||
[package.extras]
|
||||
format = ["idna", "jsonpointer (>1.13)", "rfc3987", "strict-rfc3339", "webcolors"]
|
||||
format-nongpl = ["idna", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "webcolors"]
|
||||
format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
|
||||
format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
|
||||
|
||||
[[package]]
|
||||
name = "jsonschema-spec"
|
||||
version = "0.1.6"
|
||||
description = "JSONSchema Spec with object-oriented paths"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0,<4.0.0"
|
||||
files = [
|
||||
{file = "jsonschema_spec-0.1.6-py3-none-any.whl", hash = "sha256:f2206d18c89d1824c1f775ba14ed039743b41a9167bd2c5bdb774b66b3ca0bbf"},
|
||||
{file = "jsonschema_spec-0.1.6.tar.gz", hash = "sha256:90215863b56e212086641956b20127ccbf6d8a3a38343dad01d6a74d19482f76"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
jsonschema = ">=4.0.0,<4.18.0"
|
||||
pathable = ">=0.4.1,<0.5.0"
|
||||
PyYAML = ">=5.1"
|
||||
requests = ">=2.31.0,<3.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "junit-xml"
|
||||
@@ -1371,6 +1424,52 @@ files = [
|
||||
[package.dependencies]
|
||||
six = "*"
|
||||
|
||||
[[package]]
|
||||
name = "lazy-object-proxy"
|
||||
version = "1.10.0"
|
||||
description = "A fast and thorough lazy object proxy."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "lazy-object-proxy-1.10.0.tar.gz", hash = "sha256:78247b6d45f43a52ef35c25b5581459e85117225408a4128a3daf8bf9648ac69"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:855e068b0358ab916454464a884779c7ffa312b8925c6f7401e952dcf3b89977"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab7004cf2e59f7c2e4345604a3e6ea0d92ac44e1c2375527d56492014e690c3"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc0d2fc424e54c70c4bc06787e4072c4f3b1aa2f897dfdc34ce1013cf3ceef05"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e2adb09778797da09d2b5ebdbceebf7dd32e2c96f79da9052b2e87b6ea495895"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1f711e2c6dcd4edd372cf5dec5c5a30d23bba06ee012093267b3376c079ec83"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp310-cp310-win32.whl", hash = "sha256:76a095cfe6045c7d0ca77db9934e8f7b71b14645f0094ffcd842349ada5c5fb9"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:b4f87d4ed9064b2628da63830986c3d2dca7501e6018347798313fcf028e2fd4"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fec03caabbc6b59ea4a638bee5fce7117be8e99a4103d9d5ad77f15d6f81020c"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02c83f957782cbbe8136bee26416686a6ae998c7b6191711a04da776dc9e47d4"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:009e6bb1f1935a62889ddc8541514b6a9e1fcf302667dcb049a0be5c8f613e56"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75fc59fc450050b1b3c203c35020bc41bd2695ed692a392924c6ce180c6f1dc9"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:782e2c9b2aab1708ffb07d4bf377d12901d7a1d99e5e410d648d892f8967ab1f"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp311-cp311-win32.whl", hash = "sha256:edb45bb8278574710e68a6b021599a10ce730d156e5b254941754a9cc0b17d03"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:e271058822765ad5e3bca7f05f2ace0de58a3f4e62045a8c90a0dfd2f8ad8cc6"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e98c8af98d5707dcdecc9ab0863c0ea6e88545d42ca7c3feffb6b4d1e370c7ba"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:952c81d415b9b80ea261d2372d2a4a2332a3890c2b83e0535f263ddfe43f0d43"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80b39d3a151309efc8cc48675918891b865bdf742a8616a337cb0090791a0de9"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e221060b701e2aa2ea991542900dd13907a5c90fa80e199dbf5a03359019e7a3"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:92f09ff65ecff3108e56526f9e2481b8116c0b9e1425325e13245abfd79bdb1b"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp312-cp312-win32.whl", hash = "sha256:3ad54b9ddbe20ae9f7c1b29e52f123120772b06dbb18ec6be9101369d63a4074"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:127a789c75151db6af398b8972178afe6bda7d6f68730c057fbbc2e96b08d282"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4ed0518a14dd26092614412936920ad081a424bdcb54cc13349a8e2c6d106a"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ad9e6ed739285919aa9661a5bbed0aaf410aa60231373c5579c6b4801bd883c"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fc0a92c02fa1ca1e84fc60fa258458e5bf89d90a1ddaeb8ed9cc3147f417255"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0aefc7591920bbd360d57ea03c995cebc204b424524a5bd78406f6e1b8b2a5d8"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5faf03a7d8942bb4476e3b62fd0f4cf94eaf4618e304a19865abf89a35c0bbee"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp38-cp38-win32.whl", hash = "sha256:e333e2324307a7b5d86adfa835bb500ee70bfcd1447384a822e96495796b0ca4"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:cb73507defd385b7705c599a94474b1d5222a508e502553ef94114a143ec6696"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366c32fe5355ef5fc8a232c5436f4cc66e9d3e8967c01fb2e6302fd6627e3d94"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2297f08f08a2bb0d32a4265e98a006643cd7233fb7983032bd61ac7a02956b3b"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18dd842b49456aaa9a7cf535b04ca4571a302ff72ed8740d06b5adcd41fe0757"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:217138197c170a2a74ca0e05bddcd5f1796c735c37d0eee33e43259b192aa424"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9a3a87cf1e133e5b1994144c12ca4aa3d9698517fe1e2ca82977781b16955658"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp39-cp39-win32.whl", hash = "sha256:30b339b2a743c5288405aa79a69e706a06e02958eab31859f7f3c04980853b70"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:a899b10e17743683b293a729d3a11f2f399e8a90c73b089e29f5d0fe3509f0dd"},
|
||||
{file = "lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markupsafe"
|
||||
version = "2.1.1"
|
||||
@@ -1422,64 +1521,80 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "moto"
|
||||
version = "4.1.2"
|
||||
version = "5.0.6"
|
||||
description = ""
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "moto-4.1.2-py2.py3-none-any.whl", hash = "sha256:1b361ece638c74a657325378a259276f368aafce2f8be84f8143e69fa93ce8ec"},
|
||||
{file = "moto-4.1.2.tar.gz", hash = "sha256:63431733d2a02c7bd652ad71ec1da442a0e0d580cbac5eeb50d440a2ce066eac"},
|
||||
{file = "moto-5.0.6-py2.py3-none-any.whl", hash = "sha256:ca1e22831a741733b581ff2ef4d6ae2e1c6db1eab97af1b78b86ca2c6e88c609"},
|
||||
{file = "moto-5.0.6.tar.gz", hash = "sha256:ad8b23f2b555ad694da8b2432a42b6d96beaaf67a4e7d932196a72193a2eee2c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
antlr4-python3-runtime = {version = "*", optional = true, markers = "extra == \"server\""}
|
||||
aws-xray-sdk = {version = ">=0.93,<0.96 || >0.96", optional = true, markers = "extra == \"server\""}
|
||||
boto3 = ">=1.9.201"
|
||||
botocore = ">=1.12.201"
|
||||
botocore = ">=1.14.0"
|
||||
cfn-lint = {version = ">=0.40.0", optional = true, markers = "extra == \"server\""}
|
||||
cryptography = ">=3.3.1"
|
||||
docker = {version = ">=2.5.1", optional = true, markers = "extra == \"server\""}
|
||||
ecdsa = {version = "!=0.15", optional = true, markers = "extra == \"server\""}
|
||||
docker = {version = ">=3.0.0", optional = true, markers = "extra == \"server\""}
|
||||
flask = {version = "<2.2.0 || >2.2.0,<2.2.1 || >2.2.1", optional = true, markers = "extra == \"server\""}
|
||||
flask-cors = {version = "*", optional = true, markers = "extra == \"server\""}
|
||||
graphql-core = {version = "*", optional = true, markers = "extra == \"server\""}
|
||||
Jinja2 = ">=2.10.1"
|
||||
joserfc = {version = ">=0.9.0", optional = true, markers = "extra == \"server\""}
|
||||
jsondiff = {version = ">=1.1.2", optional = true, markers = "extra == \"server\""}
|
||||
openapi-spec-validator = {version = ">=0.2.8", optional = true, markers = "extra == \"server\""}
|
||||
jsonpath-ng = {version = "*", optional = true, markers = "extra == \"server\""}
|
||||
openapi-spec-validator = {version = ">=0.5.0", optional = true, markers = "extra == \"server\""}
|
||||
py-partiql-parser = {version = "0.5.4", optional = true, markers = "extra == \"server\""}
|
||||
pyparsing = {version = ">=3.0.7", optional = true, markers = "extra == \"server\""}
|
||||
python-dateutil = ">=2.1,<3.0.0"
|
||||
python-jose = {version = ">=3.1.0,<4.0.0", extras = ["cryptography"], optional = true, markers = "extra == \"server\""}
|
||||
PyYAML = {version = ">=5.1", optional = true, markers = "extra == \"server\""}
|
||||
requests = ">=2.5"
|
||||
responses = ">=0.13.0"
|
||||
responses = ">=0.15.0"
|
||||
setuptools = {version = "*", optional = true, markers = "extra == \"server\""}
|
||||
sshpubkeys = {version = ">=3.1.0", optional = true, markers = "extra == \"server\""}
|
||||
werkzeug = ">=0.5,<2.2.0 || >2.2.0,<2.2.1 || >2.2.1"
|
||||
xmltodict = "*"
|
||||
|
||||
[package.extras]
|
||||
all = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "ecdsa (!=0.15)", "graphql-core", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"]
|
||||
apigateway = ["PyYAML (>=5.1)", "ecdsa (!=0.15)", "openapi-spec-validator (>=0.2.8)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"]
|
||||
apigatewayv2 = ["PyYAML (>=5.1)"]
|
||||
all = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.4)", "pyparsing (>=3.0.7)", "setuptools"]
|
||||
apigateway = ["PyYAML (>=5.1)", "joserfc (>=0.9.0)", "openapi-spec-validator (>=0.5.0)"]
|
||||
apigatewayv2 = ["PyYAML (>=5.1)", "openapi-spec-validator (>=0.5.0)"]
|
||||
appsync = ["graphql-core"]
|
||||
awslambda = ["docker (>=2.5.1)"]
|
||||
batch = ["docker (>=2.5.1)"]
|
||||
cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "ecdsa (!=0.15)", "graphql-core", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"]
|
||||
cognitoidp = ["ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"]
|
||||
ds = ["sshpubkeys (>=3.1.0)"]
|
||||
dynamodb = ["docker (>=2.5.1)"]
|
||||
dynamodbstreams = ["docker (>=2.5.1)"]
|
||||
ebs = ["sshpubkeys (>=3.1.0)"]
|
||||
ec2 = ["sshpubkeys (>=3.1.0)"]
|
||||
efs = ["sshpubkeys (>=3.1.0)"]
|
||||
eks = ["sshpubkeys (>=3.1.0)"]
|
||||
awslambda = ["docker (>=3.0.0)"]
|
||||
batch = ["docker (>=3.0.0)"]
|
||||
cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.4)", "pyparsing (>=3.0.7)", "setuptools"]
|
||||
cognitoidp = ["joserfc (>=0.9.0)"]
|
||||
dynamodb = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.4)"]
|
||||
dynamodbstreams = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.4)"]
|
||||
glue = ["pyparsing (>=3.0.7)"]
|
||||
iotdata = ["jsondiff (>=1.1.2)"]
|
||||
route53resolver = ["sshpubkeys (>=3.1.0)"]
|
||||
s3 = ["PyYAML (>=5.1)"]
|
||||
server = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "ecdsa (!=0.15)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"]
|
||||
proxy = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.4)", "pyparsing (>=3.0.7)", "setuptools"]
|
||||
resourcegroupstaggingapi = ["PyYAML (>=5.1)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.4)", "pyparsing (>=3.0.7)"]
|
||||
s3 = ["PyYAML (>=5.1)", "py-partiql-parser (==0.5.4)"]
|
||||
s3crc32c = ["PyYAML (>=5.1)", "crc32c", "py-partiql-parser (==0.5.4)"]
|
||||
server = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.4)", "pyparsing (>=3.0.7)", "setuptools"]
|
||||
ssm = ["PyYAML (>=5.1)"]
|
||||
stepfunctions = ["antlr4-python3-runtime", "jsonpath-ng"]
|
||||
xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"]
|
||||
|
||||
[[package]]
|
||||
name = "mpmath"
|
||||
version = "1.3.0"
|
||||
description = "Python library for arbitrary-precision floating-point arithmetic"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
|
||||
{file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"]
|
||||
docs = ["sphinx"]
|
||||
gmpy = ["gmpy2 (>=2.1.0a4)"]
|
||||
tests = ["pytest (>=4.6)"]
|
||||
|
||||
[[package]]
|
||||
name = "multidict"
|
||||
version = "6.0.4"
|
||||
@@ -1654,42 +1769,38 @@ test = ["codecov (>=2.1)", "pytest (>=7.1)", "pytest-cov (>=3.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "openapi-schema-validator"
|
||||
version = "0.2.3"
|
||||
version = "0.4.4"
|
||||
description = "OpenAPI schema validation for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0,<4.0.0"
|
||||
files = [
|
||||
{file = "openapi-schema-validator-0.2.3.tar.gz", hash = "sha256:2c64907728c3ef78e23711c8840a423f0b241588c9ed929855e4b2d1bb0cf5f2"},
|
||||
{file = "openapi_schema_validator-0.2.3-py3-none-any.whl", hash = "sha256:9bae709212a19222892cabcc60cafd903cbf4b220223f48583afa3c0e3cc6fc4"},
|
||||
{file = "openapi_schema_validator-0.4.4-py3-none-any.whl", hash = "sha256:79f37f38ef9fd5206b924ed7a6f382cea7b649b3b56383c47f1906082b7b9015"},
|
||||
{file = "openapi_schema_validator-0.4.4.tar.gz", hash = "sha256:c573e2be2c783abae56c5a1486ab716ca96e09d1c3eab56020d1dc680aa57bf8"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
jsonschema = ">=3.0.0,<5.0.0"
|
||||
jsonschema = ">=4.0.0,<4.18.0"
|
||||
rfc3339-validator = "*"
|
||||
|
||||
[package.extras]
|
||||
isodate = ["isodate"]
|
||||
rfc3339-validator = ["rfc3339-validator"]
|
||||
strict-rfc3339 = ["strict-rfc3339"]
|
||||
docs = ["sphinx (>=5.3.0,<6.0.0)", "sphinx-immaterial (>=0.11.0,<0.12.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "openapi-spec-validator"
|
||||
version = "0.4.0"
|
||||
description = "OpenAPI 2.0 (aka Swagger) and OpenAPI 3.0 spec validator"
|
||||
version = "0.5.7"
|
||||
description = "OpenAPI 2.0 (aka Swagger) and OpenAPI 3 spec validator"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0,<4.0.0"
|
||||
files = [
|
||||
{file = "openapi-spec-validator-0.4.0.tar.gz", hash = "sha256:97f258850afc97b048f7c2653855e0f88fa66ac103c2be5077c7960aca2ad49a"},
|
||||
{file = "openapi_spec_validator-0.4.0-py3-none-any.whl", hash = "sha256:06900ac4d546a1df3642a779da0055be58869c598e3042a2fef067cfd99d04d0"},
|
||||
{file = "openapi_spec_validator-0.5.7-py3-none-any.whl", hash = "sha256:8712d2879db7692974ef89c47a3ebfc79436442921ec3a826ac0ce80cde8c549"},
|
||||
{file = "openapi_spec_validator-0.5.7.tar.gz", hash = "sha256:6c2d42180045a80fd6314de848b94310bdb0fa4949f4b099578b69f79d9fa5ac"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
jsonschema = ">=3.2.0,<5.0.0"
|
||||
openapi-schema-validator = ">=0.2.0,<0.3.0"
|
||||
PyYAML = ">=5.1"
|
||||
setuptools = "*"
|
||||
|
||||
[package.extras]
|
||||
requests = ["requests"]
|
||||
jsonschema = ">=4.0.0,<4.18.0"
|
||||
jsonschema-spec = ">=0.1.1,<0.2.0"
|
||||
lazy-object-proxy = ">=1.7.1,<2.0.0"
|
||||
openapi-schema-validator = ">=0.4.2,<0.5.0"
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
@@ -1702,6 +1813,17 @@ files = [
|
||||
{file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pathable"
|
||||
version = "0.4.3"
|
||||
description = "Object-oriented paths"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0,<4.0.0"
|
||||
files = [
|
||||
{file = "pathable-0.4.3-py3-none-any.whl", hash = "sha256:cdd7b1f9d7d5c8b8d3315dbf5a86b2596053ae845f056f57d97c0eefff84da14"},
|
||||
{file = "pathable-0.4.3.tar.gz", hash = "sha256:5c869d315be50776cc8a993f3af43e0c60dc01506b399643f919034ebf4cdcab"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pbr"
|
||||
version = "5.9.0"
|
||||
@@ -1728,6 +1850,17 @@ files = [
|
||||
dev = ["pre-commit", "tox"]
|
||||
testing = ["pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "ply"
|
||||
version = "3.11"
|
||||
description = "Python Lex & Yacc"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"},
|
||||
{file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prometheus-client"
|
||||
version = "0.14.1"
|
||||
@@ -1840,16 +1973,19 @@ files = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyasn1"
|
||||
version = "0.4.8"
|
||||
description = "ASN.1 types and codecs"
|
||||
name = "py-partiql-parser"
|
||||
version = "0.5.4"
|
||||
description = "Pure Python PartiQL Parser"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"},
|
||||
{file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"},
|
||||
{file = "py_partiql_parser-0.5.4-py2.py3-none-any.whl", hash = "sha256:3dc4295a47da9587681a96b35c6e151886fdbd0a4acbe0d97c4c68e5f689d315"},
|
||||
{file = "py_partiql_parser-0.5.4.tar.gz", hash = "sha256:72e043919538fa63edae72fb59afc7e3fd93adbde656718a7d2b4666f23dd114"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
dev = ["black (==22.6.0)", "flake8", "mypy", "pytest"]
|
||||
|
||||
[[package]]
|
||||
name = "pycparser"
|
||||
version = "2.21"
|
||||
@@ -1861,6 +1997,116 @@ files = [
|
||||
{file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pydantic"
|
||||
version = "2.7.1"
|
||||
description = "Data validation using Python type hints"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"},
|
||||
{file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
annotated-types = ">=0.4.0"
|
||||
pydantic-core = "2.18.2"
|
||||
typing-extensions = ">=4.6.1"
|
||||
|
||||
[package.extras]
|
||||
email = ["email-validator (>=2.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pydantic-core"
|
||||
version = "2.18.2"
|
||||
description = "Core functionality for Pydantic validation and serialization"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"},
|
||||
{file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"},
|
||||
{file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"},
|
||||
{file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"},
|
||||
{file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"},
|
||||
{file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"},
|
||||
{file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"},
|
||||
{file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"},
|
||||
{file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"},
|
||||
{file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"},
|
||||
{file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"},
|
||||
{file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"},
|
||||
{file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"},
|
||||
{file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"},
|
||||
{file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"},
|
||||
{file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"},
|
||||
{file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"},
|
||||
{file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"},
|
||||
{file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"},
|
||||
{file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"},
|
||||
{file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"},
|
||||
{file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"},
|
||||
{file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"},
|
||||
{file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"},
|
||||
{file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"},
|
||||
{file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"},
|
||||
{file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"},
|
||||
{file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"},
|
||||
{file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"},
|
||||
{file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"},
|
||||
{file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"},
|
||||
{file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"},
|
||||
{file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"},
|
||||
{file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"},
|
||||
{file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"},
|
||||
{file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"},
|
||||
{file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"},
|
||||
{file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"},
|
||||
{file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"},
|
||||
{file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"},
|
||||
{file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"},
|
||||
{file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"},
|
||||
{file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"},
|
||||
{file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"},
|
||||
{file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"},
|
||||
{file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"},
|
||||
{file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"},
|
||||
{file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"},
|
||||
{file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"},
|
||||
{file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"},
|
||||
{file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"},
|
||||
{file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"},
|
||||
{file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"},
|
||||
{file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"},
|
||||
{file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"},
|
||||
{file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"},
|
||||
{file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"},
|
||||
{file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"},
|
||||
{file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"},
|
||||
{file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"},
|
||||
{file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"},
|
||||
{file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"},
|
||||
{file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"},
|
||||
{file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"},
|
||||
{file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"},
|
||||
{file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"},
|
||||
{file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"},
|
||||
{file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"},
|
||||
{file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"},
|
||||
{file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"},
|
||||
{file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"},
|
||||
{file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"},
|
||||
{file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"},
|
||||
{file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"},
|
||||
{file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"},
|
||||
{file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"},
|
||||
{file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"},
|
||||
{file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"},
|
||||
{file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
|
||||
|
||||
[[package]]
|
||||
name = "pyjwt"
|
||||
version = "2.4.0"
|
||||
@@ -2115,28 +2361,6 @@ files = [
|
||||
[package.dependencies]
|
||||
six = ">=1.5"
|
||||
|
||||
[[package]]
|
||||
name = "python-jose"
|
||||
version = "3.3.0"
|
||||
description = "JOSE implementation in Python"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "python-jose-3.3.0.tar.gz", hash = "sha256:55779b5e6ad599c6336191246e95eb2293a9ddebd555f796a65f838f07e5d78a"},
|
||||
{file = "python_jose-3.3.0-py2.py3-none-any.whl", hash = "sha256:9b1376b023f8b298536eedd47ae1089bcdb848f1535ab30555cd92002d78923a"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"cryptography\""}
|
||||
ecdsa = "!=0.15"
|
||||
pyasn1 = "*"
|
||||
rsa = "*"
|
||||
|
||||
[package.extras]
|
||||
cryptography = ["cryptography (>=3.4.0)"]
|
||||
pycrypto = ["pyasn1", "pycrypto (>=2.6.0,<2.7.0)"]
|
||||
pycryptodome = ["pyasn1", "pycryptodome (>=3.3.1,<4.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pywin32"
|
||||
version = "301"
|
||||
@@ -2181,7 +2405,6 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
|
||||
@@ -2216,6 +2439,94 @@ files = [
|
||||
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "2024.4.28"
|
||||
description = "Alternative regular expression module, to replace re."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "regex-2024.4.28-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd196d056b40af073d95a2879678585f0b74ad35190fac04ca67954c582c6b61"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8bb381f777351bd534462f63e1c6afb10a7caa9fa2a421ae22c26e796fe31b1f"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:47af45b6153522733aa6e92543938e97a70ce0900649ba626cf5aad290b737b6"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99d6a550425cc51c656331af0e2b1651e90eaaa23fb4acde577cf15068e2e20f"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bf29304a8011feb58913c382902fde3395957a47645bf848eea695839aa101b7"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:92da587eee39a52c91aebea8b850e4e4f095fe5928d415cb7ed656b3460ae79a"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6277d426e2f31bdbacb377d17a7475e32b2d7d1f02faaecc48d8e370c6a3ff31"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:28e1f28d07220c0f3da0e8fcd5a115bbb53f8b55cecf9bec0c946eb9a059a94c"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:aaa179975a64790c1f2701ac562b5eeb733946eeb036b5bcca05c8d928a62f10"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6f435946b7bf7a1b438b4e6b149b947c837cb23c704e780c19ba3e6855dbbdd3"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:19d6c11bf35a6ad077eb23852827f91c804eeb71ecb85db4ee1386825b9dc4db"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:fdae0120cddc839eb8e3c15faa8ad541cc6d906d3eb24d82fb041cfe2807bc1e"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:e672cf9caaf669053121f1766d659a8813bd547edef6e009205378faf45c67b8"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f57515750d07e14743db55d59759893fdb21d2668f39e549a7d6cad5d70f9fea"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-win32.whl", hash = "sha256:a1409c4eccb6981c7baabc8888d3550df518add6e06fe74fa1d9312c1838652d"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-win_amd64.whl", hash = "sha256:1f687a28640f763f23f8a9801fe9e1b37338bb1ca5d564ddd41619458f1f22d1"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:84077821c85f222362b72fdc44f7a3a13587a013a45cf14534df1cbbdc9a6796"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b45d4503de8f4f3dc02f1d28a9b039e5504a02cc18906cfe744c11def942e9eb"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:457c2cd5a646dd4ed536c92b535d73548fb8e216ebee602aa9f48e068fc393f3"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b51739ddfd013c6f657b55a508de8b9ea78b56d22b236052c3a85a675102dc6"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:459226445c7d7454981c4c0ce0ad1a72e1e751c3e417f305722bbcee6697e06a"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:670fa596984b08a4a769491cbdf22350431970d0112e03d7e4eeaecaafcd0fec"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe00f4fe11c8a521b173e6324d862ee7ee3412bf7107570c9b564fe1119b56fb"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:36f392dc7763fe7924575475736bddf9ab9f7a66b920932d0ea50c2ded2f5636"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:23a412b7b1a7063f81a742463f38821097b6a37ce1e5b89dd8e871d14dbfd86b"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f1d6e4b7b2ae3a6a9df53efbf199e4bfcff0959dbdb5fd9ced34d4407348e39a"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:499334ad139557de97cbc4347ee921c0e2b5e9c0f009859e74f3f77918339257"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:0940038bec2fe9e26b203d636c44d31dd8766abc1fe66262da6484bd82461ccf"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:66372c2a01782c5fe8e04bff4a2a0121a9897e19223d9eab30c54c50b2ebeb7f"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-win32.whl", hash = "sha256:c77d10ec3c1cf328b2f501ca32583625987ea0f23a0c2a49b37a39ee5c4c4630"},
|
||||
{file = "regex-2024.4.28-cp311-cp311-win_amd64.whl", hash = "sha256:fc0916c4295c64d6890a46e02d4482bb5ccf33bf1a824c0eaa9e83b148291f90"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:08a1749f04fee2811c7617fdd46d2e46d09106fa8f475c884b65c01326eb15c5"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b8eb28995771c087a73338f695a08c9abfdf723d185e57b97f6175c5051ff1ae"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dd7ef715ccb8040954d44cfeff17e6b8e9f79c8019daae2fd30a8806ef5435c0"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb0315a2b26fde4005a7c401707c5352df274460f2f85b209cf6024271373013"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f2fc053228a6bd3a17a9b0a3f15c3ab3cf95727b00557e92e1cfe094b88cc662"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7fe9739a686dc44733d52d6e4f7b9c77b285e49edf8570754b322bca6b85b4cc"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a74fcf77d979364f9b69fcf8200849ca29a374973dc193a7317698aa37d8b01c"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:965fd0cf4694d76f6564896b422724ec7b959ef927a7cb187fc6b3f4e4f59833"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2fef0b38c34ae675fcbb1b5db760d40c3fc3612cfa186e9e50df5782cac02bcd"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bc365ce25f6c7c5ed70e4bc674f9137f52b7dd6a125037f9132a7be52b8a252f"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:ac69b394764bb857429b031d29d9604842bc4cbfd964d764b1af1868eeebc4f0"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:144a1fc54765f5c5c36d6d4b073299832aa1ec6a746a6452c3ee7b46b3d3b11d"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2630ca4e152c221072fd4a56d4622b5ada876f668ecd24d5ab62544ae6793ed6"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-win32.whl", hash = "sha256:7f3502f03b4da52bbe8ba962621daa846f38489cae5c4a7b5d738f15f6443d17"},
|
||||
{file = "regex-2024.4.28-cp312-cp312-win_amd64.whl", hash = "sha256:0dd3f69098511e71880fb00f5815db9ed0ef62c05775395968299cb400aeab82"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:374f690e1dd0dbdcddea4a5c9bdd97632cf656c69113f7cd6a361f2a67221cb6"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:25f87ae6b96374db20f180eab083aafe419b194e96e4f282c40191e71980c666"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5dbc1bcc7413eebe5f18196e22804a3be1bfdfc7e2afd415e12c068624d48247"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f85151ec5a232335f1be022b09fbbe459042ea1951d8a48fef251223fc67eee1"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57ba112e5530530fd175ed550373eb263db4ca98b5f00694d73b18b9a02e7185"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:224803b74aab56aa7be313f92a8d9911dcade37e5f167db62a738d0c85fdac4b"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a54a047b607fd2d2d52a05e6ad294602f1e0dec2291152b745870afc47c1397"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a2a512d623f1f2d01d881513af9fc6a7c46e5cfffb7dc50c38ce959f9246c94"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c06bf3f38f0707592898428636cbb75d0a846651b053a1cf748763e3063a6925"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1031a5e7b048ee371ab3653aad3030ecfad6ee9ecdc85f0242c57751a05b0ac4"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d7a353ebfa7154c871a35caca7bfd8f9e18666829a1dc187115b80e35a29393e"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:7e76b9cfbf5ced1aca15a0e5b6f229344d9b3123439ffce552b11faab0114a02"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:5ce479ecc068bc2a74cb98dd8dba99e070d1b2f4a8371a7dfe631f85db70fe6e"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7d77b6f63f806578c604dca209280e4c54f0fa9a8128bb8d2cc5fb6f99da4150"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-win32.whl", hash = "sha256:d84308f097d7a513359757c69707ad339da799e53b7393819ec2ea36bc4beb58"},
|
||||
{file = "regex-2024.4.28-cp38-cp38-win_amd64.whl", hash = "sha256:2cc1b87bba1dd1a898e664a31012725e48af826bf3971e786c53e32e02adae6c"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7413167c507a768eafb5424413c5b2f515c606be5bb4ef8c5dee43925aa5718b"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:108e2dcf0b53a7c4ab8986842a8edcb8ab2e59919a74ff51c296772e8e74d0ae"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f1c5742c31ba7d72f2dedf7968998730664b45e38827637e0f04a2ac7de2f5f1"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecc6148228c9ae25ce403eade13a0961de1cb016bdb35c6eafd8e7b87ad028b1"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7d893c8cf0e2429b823ef1a1d360a25950ed11f0e2a9df2b5198821832e1947"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4290035b169578ffbbfa50d904d26bec16a94526071ebec3dadbebf67a26b25e"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44a22ae1cfd82e4ffa2066eb3390777dc79468f866f0625261a93e44cdf6482b"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd24fd140b69f0b0bcc9165c397e9b2e89ecbeda83303abf2a072609f60239e2"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:39fb166d2196413bead229cd64a2ffd6ec78ebab83fff7d2701103cf9f4dfd26"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9301cc6db4d83d2c0719f7fcda37229691745168bf6ae849bea2e85fc769175d"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7c3d389e8d76a49923683123730c33e9553063d9041658f23897f0b396b2386f"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:99ef6289b62042500d581170d06e17f5353b111a15aa6b25b05b91c6886df8fc"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:b91d529b47798c016d4b4c1d06cc826ac40d196da54f0de3c519f5a297c5076a"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:43548ad74ea50456e1c68d3c67fff3de64c6edb85bcd511d1136f9b5376fc9d1"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-win32.whl", hash = "sha256:05d9b6578a22db7dedb4df81451f360395828b04f4513980b6bd7a1412c679cc"},
|
||||
{file = "regex-2024.4.28-cp39-cp39-win_amd64.whl", hash = "sha256:3986217ec830c2109875be740531feb8ddafe0dfa49767cdcd072ed7e8927962"},
|
||||
{file = "regex-2024.4.28.tar.gz", hash = "sha256:83ab366777ea45d58f72593adf35d36ca911ea8bd838483c1823b883a121b0e4"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.31.0"
|
||||
@@ -2256,18 +2567,18 @@ urllib3 = ">=1.25.10"
|
||||
tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-localserver", "types-mock", "types-requests"]
|
||||
|
||||
[[package]]
|
||||
name = "rsa"
|
||||
version = "4.9"
|
||||
description = "Pure-Python RSA implementation"
|
||||
name = "rfc3339-validator"
|
||||
version = "0.1.4"
|
||||
description = "A pure python RFC3339 validator"
|
||||
optional = false
|
||||
python-versions = ">=3.6,<4"
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
files = [
|
||||
{file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"},
|
||||
{file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"},
|
||||
{file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"},
|
||||
{file = "rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pyasn1 = ">=0.1.3"
|
||||
six = "*"
|
||||
|
||||
[[package]]
|
||||
name = "ruff"
|
||||
@@ -2366,22 +2677,18 @@ files = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sshpubkeys"
|
||||
version = "3.3.1"
|
||||
description = "SSH public key parser"
|
||||
name = "sympy"
|
||||
version = "1.12"
|
||||
description = "Computer algebra system (CAS) in Python"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "sshpubkeys-3.3.1-py2.py3-none-any.whl", hash = "sha256:946f76b8fe86704b0e7c56a00d80294e39bc2305999844f079a217885060b1ac"},
|
||||
{file = "sshpubkeys-3.3.1.tar.gz", hash = "sha256:3020ed4f8c846849299370fbe98ff4157b0ccc1accec105e07cfa9ae4bb55064"},
|
||||
{file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"},
|
||||
{file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
cryptography = ">=2.1.4"
|
||||
ecdsa = ">=0.13"
|
||||
|
||||
[package.extras]
|
||||
dev = ["twine", "wheel", "yapf"]
|
||||
mpmath = ">=0.19"
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
@@ -2652,16 +2959,6 @@ files = [
|
||||
{file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
|
||||
{file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
|
||||
{file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
|
||||
@@ -2899,4 +3196,4 @@ cffi = ["cffi (>=1.11)"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "496d6d9f722983bda4d1265370bc8ba75560da74ab5d6b68c94a03290815e1eb"
|
||||
content-hash = "dcde14c58a32bda5f123319a069352c458b3719f3c62977991eebb9803a46a9e"
|
||||
|
||||
@@ -13,7 +13,7 @@ use tokio_postgres::config::AuthKeys;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::auth::credentials::check_peer_addr_is_in_list;
|
||||
use crate::auth::validate_password_and_exchange;
|
||||
use crate::auth::{validate_password_and_exchange, AuthError};
|
||||
use crate::cache::Cached;
|
||||
use crate::console::errors::GetAuthInfoError;
|
||||
use crate::console::provider::{CachedRoleSecret, ConsoleBackend};
|
||||
@@ -23,7 +23,7 @@ use crate::intern::EndpointIdInt;
|
||||
use crate::metrics::Metrics;
|
||||
use crate::proxy::connect_compute::ComputeConnectBackend;
|
||||
use crate::proxy::NeonOptions;
|
||||
use crate::rate_limiter::{BucketRateLimiter, RateBucketInfo};
|
||||
use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter, RateBucketInfo};
|
||||
use crate::stream::Stream;
|
||||
use crate::{
|
||||
auth::{self, ComputeUserInfoMaybeEndpoint},
|
||||
@@ -280,6 +280,7 @@ async fn auth_quirks(
|
||||
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
|
||||
allow_cleartext: bool,
|
||||
config: &'static AuthenticationConfig,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> auth::Result<ComputeCredentials> {
|
||||
// If there's no project so far, that entails that client doesn't
|
||||
// support SNI or other means of passing the endpoint (project) name.
|
||||
@@ -305,6 +306,10 @@ async fn auth_quirks(
|
||||
if !check_peer_addr_is_in_list(&ctx.peer_addr, &allowed_ips) {
|
||||
return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr));
|
||||
}
|
||||
|
||||
if !endpoint_rate_limiter.check(info.endpoint.clone().into(), 1) {
|
||||
return Err(AuthError::too_many_connections());
|
||||
}
|
||||
let cached_secret = match maybe_secret {
|
||||
Some(secret) => secret,
|
||||
None => api.get_role_secret(ctx, &info).await?,
|
||||
@@ -417,6 +422,7 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint, &()> {
|
||||
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
|
||||
allow_cleartext: bool,
|
||||
config: &'static AuthenticationConfig,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> auth::Result<BackendType<'a, ComputeCredentials, NodeInfo>> {
|
||||
use BackendType::*;
|
||||
|
||||
@@ -428,8 +434,16 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint, &()> {
|
||||
"performing authentication using the console"
|
||||
);
|
||||
|
||||
let credentials =
|
||||
auth_quirks(ctx, &*api, user_info, client, allow_cleartext, config).await?;
|
||||
let credentials = auth_quirks(
|
||||
ctx,
|
||||
&*api,
|
||||
user_info,
|
||||
client,
|
||||
allow_cleartext,
|
||||
config,
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await?;
|
||||
BackendType::Console(api, credentials)
|
||||
}
|
||||
// NOTE: this auth backend doesn't use client credentials.
|
||||
@@ -539,7 +553,7 @@ mod tests {
|
||||
},
|
||||
context::RequestMonitoring,
|
||||
proxy::NeonOptions,
|
||||
rate_limiter::RateBucketInfo,
|
||||
rate_limiter::{EndpointRateLimiter, RateBucketInfo},
|
||||
scram::ServerSecret,
|
||||
stream::{PqStream, Stream},
|
||||
};
|
||||
@@ -699,10 +713,20 @@ mod tests {
|
||||
_ => panic!("wrong message"),
|
||||
}
|
||||
});
|
||||
let endpoint_rate_limiter =
|
||||
Arc::new(EndpointRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET));
|
||||
|
||||
let _creds = auth_quirks(&mut ctx, &api, user_info, &mut stream, false, &CONFIG)
|
||||
.await
|
||||
.unwrap();
|
||||
let _creds = auth_quirks(
|
||||
&mut ctx,
|
||||
&api,
|
||||
user_info,
|
||||
&mut stream,
|
||||
false,
|
||||
&CONFIG,
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
handle.await.unwrap();
|
||||
}
|
||||
@@ -739,10 +763,20 @@ mod tests {
|
||||
frontend::password_message(b"my-secret-password", &mut write).unwrap();
|
||||
client.write_all(&write).await.unwrap();
|
||||
});
|
||||
let endpoint_rate_limiter =
|
||||
Arc::new(EndpointRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET));
|
||||
|
||||
let _creds = auth_quirks(&mut ctx, &api, user_info, &mut stream, true, &CONFIG)
|
||||
.await
|
||||
.unwrap();
|
||||
let _creds = auth_quirks(
|
||||
&mut ctx,
|
||||
&api,
|
||||
user_info,
|
||||
&mut stream,
|
||||
true,
|
||||
&CONFIG,
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
handle.await.unwrap();
|
||||
}
|
||||
@@ -780,9 +814,20 @@ mod tests {
|
||||
client.write_all(&write).await.unwrap();
|
||||
});
|
||||
|
||||
let creds = auth_quirks(&mut ctx, &api, user_info, &mut stream, true, &CONFIG)
|
||||
.await
|
||||
.unwrap();
|
||||
let endpoint_rate_limiter =
|
||||
Arc::new(EndpointRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET));
|
||||
|
||||
let creds = auth_quirks(
|
||||
&mut ctx,
|
||||
&api,
|
||||
user_info,
|
||||
&mut stream,
|
||||
true,
|
||||
&CONFIG,
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(creds.info.endpoint, "my-endpoint");
|
||||
|
||||
|
||||
@@ -144,6 +144,9 @@ struct ProxyCliArgs {
|
||||
/// Can be given multiple times for different bucket sizes.
|
||||
#[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
|
||||
endpoint_rps_limit: Vec<RateBucketInfo>,
|
||||
/// Wake compute rate limiter max number of requests per second.
|
||||
#[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
|
||||
wake_compute_limit: Vec<RateBucketInfo>,
|
||||
/// Whether the auth rate limiter actually takes effect (for testing)
|
||||
#[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
|
||||
auth_rate_limit_enabled: bool,
|
||||
@@ -154,7 +157,7 @@ struct ProxyCliArgs {
|
||||
#[clap(long, default_value_t = 64)]
|
||||
auth_rate_limit_ip_subnet: u8,
|
||||
/// Redis rate limiter max number of requests per second.
|
||||
#[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
|
||||
#[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
|
||||
redis_rps_limit: Vec<RateBucketInfo>,
|
||||
/// cache for `allowed_ips` (use `size=0` to disable)
|
||||
#[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
|
||||
@@ -365,6 +368,10 @@ async fn main() -> anyhow::Result<()> {
|
||||
proxy::metrics::CancellationSource::FromClient,
|
||||
));
|
||||
|
||||
let mut endpoint_rps_limit = args.endpoint_rps_limit.clone();
|
||||
RateBucketInfo::validate(&mut endpoint_rps_limit)?;
|
||||
let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(endpoint_rps_limit));
|
||||
|
||||
// client facing tasks. these will exit on error or on cancellation
|
||||
// cancellation returns Ok(())
|
||||
let mut client_tasks = JoinSet::new();
|
||||
@@ -373,6 +380,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
proxy_listener,
|
||||
cancellation_token.clone(),
|
||||
cancellation_handler.clone(),
|
||||
endpoint_rate_limiter.clone(),
|
||||
));
|
||||
|
||||
// TODO: rename the argument to something like serverless.
|
||||
@@ -387,6 +395,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
serverless_listener,
|
||||
cancellation_token.clone(),
|
||||
cancellation_handler.clone(),
|
||||
endpoint_rate_limiter.clone(),
|
||||
));
|
||||
}
|
||||
|
||||
@@ -559,11 +568,16 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
|
||||
let url = args.auth_endpoint.parse()?;
|
||||
let endpoint = http::Endpoint::new(url, http::new_client());
|
||||
|
||||
let mut endpoint_rps_limit = args.endpoint_rps_limit.clone();
|
||||
RateBucketInfo::validate(&mut endpoint_rps_limit)?;
|
||||
let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(endpoint_rps_limit));
|
||||
let api =
|
||||
console::provider::neon::Api::new(endpoint, caches, locks, endpoint_rate_limiter);
|
||||
let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
|
||||
RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
|
||||
let wake_compute_endpoint_rate_limiter =
|
||||
Arc::new(EndpointRateLimiter::new(wake_compute_rps_limit));
|
||||
let api = console::provider::neon::Api::new(
|
||||
endpoint,
|
||||
caches,
|
||||
locks,
|
||||
wake_compute_endpoint_rate_limiter,
|
||||
);
|
||||
let api = console::provider::ConsoleBackend::Console(api);
|
||||
auth::BackendType::Console(MaybeOwned::Owned(api), ())
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::{
|
||||
auth::parse_endpoint_param,
|
||||
cancellation::CancelClosure,
|
||||
console::{errors::WakeComputeError, messages::MetricsAuxInfo},
|
||||
console::{errors::WakeComputeError, messages::MetricsAuxInfo, provider::ApiLockError},
|
||||
context::RequestMonitoring,
|
||||
error::{ReportableError, UserFacingError},
|
||||
metrics::{Metrics, NumDbConnectionsGuard},
|
||||
@@ -34,6 +34,9 @@ pub enum ConnectionError {
|
||||
|
||||
#[error("{COULD_NOT_CONNECT}: {0}")]
|
||||
WakeComputeError(#[from] WakeComputeError),
|
||||
|
||||
#[error("error acquiring resource permit: {0}")]
|
||||
TooManyConnectionAttempts(#[from] ApiLockError),
|
||||
}
|
||||
|
||||
impl UserFacingError for ConnectionError {
|
||||
@@ -57,6 +60,9 @@ impl UserFacingError for ConnectionError {
|
||||
None => err.to_string(),
|
||||
},
|
||||
WakeComputeError(err) => err.to_string_client(),
|
||||
TooManyConnectionAttempts(_) => {
|
||||
"Failed to acquire permit to connect to the database. Too many database connection attempts are currently ongoing.".to_owned()
|
||||
}
|
||||
_ => COULD_NOT_CONNECT.to_owned(),
|
||||
}
|
||||
}
|
||||
@@ -72,6 +78,7 @@ impl ReportableError for ConnectionError {
|
||||
ConnectionError::CouldNotConnect(_) => crate::error::ErrorKind::Compute,
|
||||
ConnectionError::TlsError(_) => crate::error::ErrorKind::Compute,
|
||||
ConnectionError::WakeComputeError(e) => e.get_error_kind(),
|
||||
ConnectionError::TooManyConnectionAttempts(e) => e.get_error_kind(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ use crate::{
|
||||
compute,
|
||||
config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions},
|
||||
context::RequestMonitoring,
|
||||
error::ReportableError,
|
||||
intern::ProjectIdInt,
|
||||
metrics::ApiLockMetrics,
|
||||
scram, EndpointCacheKey,
|
||||
@@ -30,6 +31,8 @@ pub mod errors {
|
||||
};
|
||||
use thiserror::Error;
|
||||
|
||||
use super::ApiLockError;
|
||||
|
||||
/// A go-to error message which doesn't leak any detail.
|
||||
const REQUEST_FAILED: &str = "Console request failed";
|
||||
|
||||
@@ -211,8 +214,8 @@ pub mod errors {
|
||||
#[error("Too many connections attempts")]
|
||||
TooManyConnections,
|
||||
|
||||
#[error("Timeout waiting to acquire wake compute lock")]
|
||||
TimeoutError,
|
||||
#[error("error acquiring resource permit: {0}")]
|
||||
TooManyConnectionAttempts(#[from] ApiLockError),
|
||||
}
|
||||
|
||||
// This allows more useful interactions than `#[from]`.
|
||||
@@ -222,17 +225,6 @@ pub mod errors {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<tokio::sync::AcquireError> for WakeComputeError {
|
||||
fn from(_: tokio::sync::AcquireError) -> Self {
|
||||
WakeComputeError::TimeoutError
|
||||
}
|
||||
}
|
||||
impl From<tokio::time::error::Elapsed> for WakeComputeError {
|
||||
fn from(_: tokio::time::error::Elapsed) -> Self {
|
||||
WakeComputeError::TimeoutError
|
||||
}
|
||||
}
|
||||
|
||||
impl UserFacingError for WakeComputeError {
|
||||
fn to_string_client(&self) -> String {
|
||||
use WakeComputeError::*;
|
||||
@@ -245,7 +237,9 @@ pub mod errors {
|
||||
|
||||
TooManyConnections => self.to_string(),
|
||||
|
||||
TimeoutError => "timeout while acquiring the compute resource lock".to_owned(),
|
||||
TooManyConnectionAttempts(_) => {
|
||||
"Failed to acquire permit to connect to the database. Too many database connection attempts are currently ongoing.".to_owned()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -256,7 +250,7 @@ pub mod errors {
|
||||
WakeComputeError::BadComputeAddress(_) => crate::error::ErrorKind::ControlPlane,
|
||||
WakeComputeError::ApiError(e) => e.get_error_kind(),
|
||||
WakeComputeError::TooManyConnections => crate::error::ErrorKind::RateLimit,
|
||||
WakeComputeError::TimeoutError => crate::error::ErrorKind::ServiceRateLimit,
|
||||
WakeComputeError::TooManyConnectionAttempts(e) => e.get_error_kind(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -456,6 +450,23 @@ pub struct ApiLocks<K> {
|
||||
metrics: &'static ApiLockMetrics,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ApiLockError {
|
||||
#[error("lock was closed")]
|
||||
AcquireError(#[from] tokio::sync::AcquireError),
|
||||
#[error("permit could not be acquired")]
|
||||
TimeoutError(#[from] tokio::time::error::Elapsed),
|
||||
}
|
||||
|
||||
impl ReportableError for ApiLockError {
|
||||
fn get_error_kind(&self) -> crate::error::ErrorKind {
|
||||
match self {
|
||||
ApiLockError::AcquireError(_) => crate::error::ErrorKind::Service,
|
||||
ApiLockError::TimeoutError(_) => crate::error::ErrorKind::RateLimit,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K: Hash + Eq + Clone> ApiLocks<K> {
|
||||
pub fn new(
|
||||
name: &'static str,
|
||||
@@ -475,7 +486,7 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get_permit(&self, key: &K) -> Result<WakeComputePermit, errors::WakeComputeError> {
|
||||
pub async fn get_permit(&self, key: &K) -> Result<WakeComputePermit, ApiLockError> {
|
||||
if self.permits == 0 {
|
||||
return Ok(WakeComputePermit { permit: None });
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ pub struct Api {
|
||||
endpoint: http::Endpoint,
|
||||
pub caches: &'static ApiCaches,
|
||||
pub locks: &'static ApiLocks<EndpointCacheKey>,
|
||||
pub endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
pub wake_compute_endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
jwt: String,
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ impl Api {
|
||||
endpoint: http::Endpoint,
|
||||
caches: &'static ApiCaches,
|
||||
locks: &'static ApiLocks<EndpointCacheKey>,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
wake_compute_endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> Self {
|
||||
let jwt: String = match std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN") {
|
||||
Ok(v) => v,
|
||||
@@ -46,7 +46,7 @@ impl Api {
|
||||
endpoint,
|
||||
caches,
|
||||
locks,
|
||||
endpoint_rate_limiter,
|
||||
wake_compute_endpoint_rate_limiter,
|
||||
jwt,
|
||||
}
|
||||
}
|
||||
@@ -283,7 +283,7 @@ impl super::Api for Api {
|
||||
|
||||
// check rate limit
|
||||
if !self
|
||||
.endpoint_rate_limiter
|
||||
.wake_compute_endpoint_rate_limiter
|
||||
.check(user_info.endpoint.normalize().into(), 1)
|
||||
{
|
||||
return Err(WakeComputeError::TooManyConnections);
|
||||
|
||||
@@ -19,6 +19,7 @@ use crate::{
|
||||
metrics::{Metrics, NumClientConnectionsGuard},
|
||||
protocol2::read_proxy_protocol,
|
||||
proxy::handshake::{handshake, HandshakeData},
|
||||
rate_limiter::EndpointRateLimiter,
|
||||
stream::{PqStream, Stream},
|
||||
EndpointCacheKey,
|
||||
};
|
||||
@@ -61,6 +62,7 @@ pub async fn task_main(
|
||||
listener: tokio::net::TcpListener,
|
||||
cancellation_token: CancellationToken,
|
||||
cancellation_handler: Arc<CancellationHandlerMain>,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> anyhow::Result<()> {
|
||||
scopeguard::defer! {
|
||||
info!("proxy has shut down");
|
||||
@@ -86,6 +88,7 @@ pub async fn task_main(
|
||||
let cancellation_handler = Arc::clone(&cancellation_handler);
|
||||
|
||||
tracing::info!(protocol = "tcp", %session_id, "accepted new TCP connection");
|
||||
let endpoint_rate_limiter2 = endpoint_rate_limiter.clone();
|
||||
|
||||
connections.spawn(async move {
|
||||
let (socket, peer_addr) = match read_proxy_protocol(socket).await{
|
||||
@@ -123,6 +126,7 @@ pub async fn task_main(
|
||||
cancellation_handler,
|
||||
socket,
|
||||
ClientMode::Tcp,
|
||||
endpoint_rate_limiter2,
|
||||
conn_gauge,
|
||||
)
|
||||
.instrument(span.clone())
|
||||
@@ -234,6 +238,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
cancellation_handler: Arc<CancellationHandlerMain>,
|
||||
stream: S,
|
||||
mode: ClientMode,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
conn_gauge: NumClientConnectionsGuard<'static>,
|
||||
) -> Result<Option<ProxyPassthrough<CancellationHandlerMainInternal, S>>, ClientRequestError> {
|
||||
info!(
|
||||
@@ -243,7 +248,6 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
|
||||
let metrics = &Metrics::get().proxy;
|
||||
let proto = ctx.protocol;
|
||||
// let _client_gauge = metrics.client_connections.guard(proto);
|
||||
let _request_gauge = metrics.connection_requests.guard(proto);
|
||||
|
||||
let tls = config.tls_config.as_ref();
|
||||
@@ -286,6 +290,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
&mut stream,
|
||||
mode.allow_cleartext(),
|
||||
&config.authentication_config,
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -86,6 +86,8 @@ impl ShouldRetry for compute::ConnectionError {
|
||||
match self {
|
||||
compute::ConnectionError::Postgres(err) => err.should_retry_database_address(),
|
||||
compute::ConnectionError::CouldNotConnect(err) => err.should_retry_database_address(),
|
||||
// the cache entry was not checked for validity
|
||||
compute::ConnectionError::TooManyConnectionAttempts(_) => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,7 +119,7 @@ fn report_error(e: &WakeComputeError, retry: bool) {
|
||||
WakeupFailureKind::ApiConsoleOtherError
|
||||
}
|
||||
WakeComputeError::TooManyConnections => WakeupFailureKind::ApiConsoleLocked,
|
||||
WakeComputeError::TimeoutError => WakeupFailureKind::TimeoutError,
|
||||
WakeComputeError::TooManyConnectionAttempts(_) => WakeupFailureKind::TimeoutError,
|
||||
};
|
||||
Metrics::get()
|
||||
.proxy
|
||||
|
||||
@@ -128,12 +128,18 @@ impl std::str::FromStr for RateBucketInfo {
|
||||
}
|
||||
|
||||
impl RateBucketInfo {
|
||||
pub const DEFAULT_ENDPOINT_SET: [Self; 3] = [
|
||||
pub const DEFAULT_SET: [Self; 3] = [
|
||||
Self::new(300, Duration::from_secs(1)),
|
||||
Self::new(200, Duration::from_secs(60)),
|
||||
Self::new(100, Duration::from_secs(600)),
|
||||
];
|
||||
|
||||
pub const DEFAULT_ENDPOINT_SET: [Self; 3] = [
|
||||
Self::new(500, Duration::from_secs(1)),
|
||||
Self::new(300, Duration::from_secs(60)),
|
||||
Self::new(200, Duration::from_secs(600)),
|
||||
];
|
||||
|
||||
pub fn validate(info: &mut [Self]) -> anyhow::Result<()> {
|
||||
info.sort_unstable_by_key(|info| info.interval);
|
||||
let invalid = info
|
||||
@@ -266,7 +272,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn default_rate_buckets() {
|
||||
let mut defaults = RateBucketInfo::DEFAULT_ENDPOINT_SET;
|
||||
let mut defaults = RateBucketInfo::DEFAULT_SET;
|
||||
RateBucketInfo::validate(&mut defaults[..]).unwrap();
|
||||
}
|
||||
|
||||
@@ -333,11 +339,8 @@ mod tests {
|
||||
let rand = rand::rngs::StdRng::from_seed([1; 32]);
|
||||
let hasher = BuildHasherDefault::<FxHasher>::default();
|
||||
|
||||
let limiter = BucketRateLimiter::new_with_rand_and_hasher(
|
||||
&RateBucketInfo::DEFAULT_ENDPOINT_SET,
|
||||
rand,
|
||||
hasher,
|
||||
);
|
||||
let limiter =
|
||||
BucketRateLimiter::new_with_rand_and_hasher(&RateBucketInfo::DEFAULT_SET, rand, hasher);
|
||||
for i in 0..1_000_000 {
|
||||
limiter.check(i, 1);
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ use crate::context::RequestMonitoring;
|
||||
use crate::metrics::Metrics;
|
||||
use crate::protocol2::read_proxy_protocol;
|
||||
use crate::proxy::run_until_cancelled;
|
||||
use crate::rate_limiter::EndpointRateLimiter;
|
||||
use crate::serverless::backend::PoolingBackend;
|
||||
use crate::serverless::http_util::{api_error_into_response, json_response};
|
||||
|
||||
@@ -54,6 +55,7 @@ pub async fn task_main(
|
||||
ws_listener: TcpListener,
|
||||
cancellation_token: CancellationToken,
|
||||
cancellation_handler: Arc<CancellationHandlerMain>,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> anyhow::Result<()> {
|
||||
scopeguard::defer! {
|
||||
info!("websocket server has shut down");
|
||||
@@ -82,6 +84,7 @@ pub async fn task_main(
|
||||
let backend = Arc::new(PoolingBackend {
|
||||
pool: Arc::clone(&conn_pool),
|
||||
config,
|
||||
endpoint_rate_limiter: Arc::clone(&endpoint_rate_limiter),
|
||||
});
|
||||
|
||||
let tls_config = match config.tls_config.as_ref() {
|
||||
@@ -129,6 +132,7 @@ pub async fn task_main(
|
||||
backend.clone(),
|
||||
connections.clone(),
|
||||
cancellation_handler.clone(),
|
||||
endpoint_rate_limiter.clone(),
|
||||
conn_token.clone(),
|
||||
server.clone(),
|
||||
tls_acceptor.clone(),
|
||||
@@ -162,6 +166,7 @@ async fn connection_handler(
|
||||
backend: Arc<PoolingBackend>,
|
||||
connections: TaskTracker,
|
||||
cancellation_handler: Arc<CancellationHandlerMain>,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
cancellation_token: CancellationToken,
|
||||
server: Builder<TokioExecutor>,
|
||||
tls_acceptor: TlsAcceptor,
|
||||
@@ -245,6 +250,7 @@ async fn connection_handler(
|
||||
session_id,
|
||||
peer_addr,
|
||||
http_request_token,
|
||||
endpoint_rate_limiter.clone(),
|
||||
)
|
||||
.in_current_span()
|
||||
.map_ok_or_else(api_error_into_response, |r| r),
|
||||
@@ -285,6 +291,7 @@ async fn request_handler(
|
||||
peer_addr: IpAddr,
|
||||
// used to cancel in-flight HTTP requests. not used to cancel websockets
|
||||
http_cancellation_token: CancellationToken,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> Result<Response<Full<Bytes>>, ApiError> {
|
||||
let host = request
|
||||
.headers()
|
||||
@@ -310,9 +317,15 @@ async fn request_handler(
|
||||
|
||||
ws_connections.spawn(
|
||||
async move {
|
||||
if let Err(e) =
|
||||
websocket::serve_websocket(config, ctx, websocket, cancellation_handler, host)
|
||||
.await
|
||||
if let Err(e) = websocket::serve_websocket(
|
||||
config,
|
||||
ctx,
|
||||
websocket,
|
||||
cancellation_handler,
|
||||
endpoint_rate_limiter,
|
||||
host,
|
||||
)
|
||||
.await
|
||||
{
|
||||
error!("error in websocket connection: {e:#}");
|
||||
}
|
||||
|
||||
@@ -10,11 +10,13 @@ use crate::{
|
||||
console::{
|
||||
errors::{GetAuthInfoError, WakeComputeError},
|
||||
locks::ApiLocks,
|
||||
provider::ApiLockError,
|
||||
CachedNodeInfo,
|
||||
},
|
||||
context::RequestMonitoring,
|
||||
error::{ErrorKind, ReportableError, UserFacingError},
|
||||
proxy::{connect_compute::ConnectMechanism, retry::ShouldRetry},
|
||||
rate_limiter::EndpointRateLimiter,
|
||||
Host,
|
||||
};
|
||||
|
||||
@@ -23,6 +25,7 @@ use super::conn_pool::{poll_client, Client, ConnInfo, GlobalConnPool};
|
||||
pub struct PoolingBackend {
|
||||
pub pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
|
||||
pub config: &'static ProxyConfig,
|
||||
pub endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
}
|
||||
|
||||
impl PoolingBackend {
|
||||
@@ -38,6 +41,12 @@ impl PoolingBackend {
|
||||
if !check_peer_addr_is_in_list(&ctx.peer_addr, &allowed_ips) {
|
||||
return Err(AuthError::ip_address_not_allowed(ctx.peer_addr));
|
||||
}
|
||||
if !self
|
||||
.endpoint_rate_limiter
|
||||
.check(conn_info.user_info.endpoint.clone().into(), 1)
|
||||
{
|
||||
return Err(AuthError::too_many_connections());
|
||||
}
|
||||
let cached_secret = match maybe_secret {
|
||||
Some(secret) => secret,
|
||||
None => backend.get_role_secret(ctx).await?,
|
||||
@@ -131,6 +140,8 @@ pub enum HttpConnError {
|
||||
AuthError(#[from] AuthError),
|
||||
#[error("wake_compute returned error")]
|
||||
WakeCompute(#[from] WakeComputeError),
|
||||
#[error("error acquiring resource permit: {0}")]
|
||||
TooManyConnectionAttempts(#[from] ApiLockError),
|
||||
}
|
||||
|
||||
impl ReportableError for HttpConnError {
|
||||
@@ -141,6 +152,7 @@ impl ReportableError for HttpConnError {
|
||||
HttpConnError::GetAuthInfo(a) => a.get_error_kind(),
|
||||
HttpConnError::AuthError(a) => a.get_error_kind(),
|
||||
HttpConnError::WakeCompute(w) => w.get_error_kind(),
|
||||
HttpConnError::TooManyConnectionAttempts(w) => w.get_error_kind(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -153,6 +165,9 @@ impl UserFacingError for HttpConnError {
|
||||
HttpConnError::GetAuthInfo(c) => c.to_string_client(),
|
||||
HttpConnError::AuthError(c) => c.to_string_client(),
|
||||
HttpConnError::WakeCompute(c) => c.to_string_client(),
|
||||
HttpConnError::TooManyConnectionAttempts(_) => {
|
||||
"Failed to acquire permit to connect to the database. Too many database connection attempts are currently ongoing.".to_owned()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -165,6 +180,15 @@ impl ShouldRetry for HttpConnError {
|
||||
HttpConnError::GetAuthInfo(_) => false,
|
||||
HttpConnError::AuthError(_) => false,
|
||||
HttpConnError::WakeCompute(_) => false,
|
||||
HttpConnError::TooManyConnectionAttempts(_) => false,
|
||||
}
|
||||
}
|
||||
fn should_retry_database_address(&self) -> bool {
|
||||
match self {
|
||||
HttpConnError::ConnectionError(e) => e.should_retry_database_address(),
|
||||
// we never checked cache validity
|
||||
HttpConnError::TooManyConnectionAttempts(_) => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user