Compare commits

...

12 Commits

Author SHA1 Message Date
Konstantin Knizhnik
8d04f3c0a0 Add redo filter for read-only replica which enable redo only for locally available pages 2023-02-03 19:27:22 +02:00
bojanserafimov
be81db21b9 Revert accidental change (#3538) 2023-02-03 17:54:12 +02:00
Joonas Koivunen
f2d89761c2 feat: LayerMap::replace (#3513)
Cc: #3486

Adds a method to replace a particular layer from the LayerMap for the
purposes of remote layer download and layer eviction. In those use cases
read lock on layer map needs to be released after initial search, but
other operations could modify layermap before replacing thread gets to
run.

Co-authored-by: bojanserafimov <bojan.serafimov7@gmail.com>
2023-02-03 15:33:46 +02:00
Sergey Melnikov
a0372158a0 Add build_info metric to storage broker (#3525)
## Describe your changes
Add libmetrics_build_info metrics with commit sha to storage_broken
/metrics, to match behaviour of proxy, pageserver and safekeeper.
2023-02-03 12:23:27 +01:00
Anastasia Lubennikova
83048a4adc Handle errors during metric collection. (#3521)
Don't exit the loop if one of the tenants failed to scrape its metrics.
fixes #3490
2023-02-03 12:37:34 +02:00
Konstantin Knizhnik
f71b1b174d Check correctness of file_cache_size_limit (#3530)
## Describe your changes

## Issue ticket number and link

## Checklist before requesting a review
- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.
2023-02-03 08:01:26 +02:00
Dmitry Ivanov
96e78394f5 [proxy] Fix project (aka endpoint) init in the password hack handler (#3529)
The project/endpoint should be set in the original (non-as_ref'd) creds,
because we call `wake_compute` not only in `try_password_hack` but also
later in the connection retry logic.

This PR also removes the obsolete `as_ref` method and makes the code
simpler because we no longer need this complication after a recent
refactoring.

Further action points: finally introduce typestate in creds (planned).
2023-02-02 22:56:15 +02:00
bojanserafimov
ada933eb42 Pageserver read trace utils (#2795)
List, dump, and analyze read traces.
2023-02-02 15:33:40 -05:00
Kirill Bulatov
f6a10f4693 Use regular layer names instead of a special test ones (#3524)
We do not need special enum variant for testing the file names, neither
its special handling across the code.
Current tests are able to create regular layers with normal layer names,
as the PR shows.
2023-02-02 14:52:17 +02:00
Vadim Kharitonov
d25307dced Compile postgres with ICU support 2023-02-02 13:33:38 +01:00
Kirill Bulatov
2759f1a22e Evict layers on demand (#3486)
Closes https://github.com/neondatabase/neon/issues/3439

Adds a set of commands to manipulate the layer map:
* dump the layer map contents
* evict the layer form the layer map (remove the local file, put the
remote layer instead in the layer map)
* download the layer (operation, reversing the eviction)

The commands will change later, when the statistics is added on top, so
the swagger schema is not adjusted.

The commands might have issues with big amount of layers: no pagination
is done for the dump command, eviction and download commands look for
the layer to evict/download by iterating all layers sequentially and
comparing the layer names.
For now, that seems to be tolerable ("big" number of layers is ~2_000)
and further experiments are needed.

---------

Co-authored-by: Christian Schwarz <christian@neon.tech>
2023-02-02 12:14:44 +02:00
Kirill Bulatov
f474495ba0 Publish builds stats that are easy to browse (#3514)
Adds two new tags, `run-extra-build-macos` and `run-extra-build-stats`
to trigger corresponding build jobs on any PR.

On every build for `main` or PR with `run-extra-build-stats` tag, publish a GitHub commit status with the link to the `cargo build --all --release --timings` report.
2023-02-02 11:18:42 +02:00
31 changed files with 1699 additions and 416 deletions

View File

@@ -15,6 +15,7 @@
!proxy/
!safekeeper/
!storage_broker/
!trace/
!vendor/postgres-v14/
!vendor/postgres-v15/
!workspace_hack/

View File

@@ -4,6 +4,7 @@ on:
push:
branches:
- main
pull_request:
defaults:
run:
@@ -20,6 +21,7 @@ env:
jobs:
check-macos-build:
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')
timeout-minutes: 90
runs-on: macos-latest
@@ -93,11 +95,16 @@ jobs:
run: ./run_clippy.sh
gather-rust-build-stats:
timeout-minutes: 90
runs-on: ubuntu-latest
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats')
runs-on: [ self-hosted, gen3, large ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
options: --init
env:
BUILD_TYPE: release
# remove the cachepot wrapper and build without crate caches
RUSTC_WRAPPER: ""
# build with incremental compilation produce partial results
# so do not attempt to cache this build, also disable the incremental compilation
CARGO_INCREMENTAL: 0
@@ -109,11 +116,6 @@ jobs:
submodules: true
fetch-depth: 1
- name: Install Ubuntu postgres dependencies
run: |
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev libssl-dev protobuf-compiler
# Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers
run: make postgres-headers -j$(nproc)
@@ -122,7 +124,31 @@ jobs:
run: cargo build --all --release --timings
- name: Upload the build stats
uses: actions/upload-artifact@v3
id: upload-stats
env:
BUCKET: neon-github-public-dev
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
run: |
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/build-stats/${SHA}/${GITHUB_RUN_ID}/cargo-timing.html
aws s3 cp --only-show-errors ./target/cargo-timings/cargo-timing.html "s3://${BUCKET}/build-stats/${SHA}/${GITHUB_RUN_ID}/"
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
- name: Publish build stats report
uses: actions/github-script@v6
env:
REPORT_URL: ${{ steps.upload-stats.outputs.report-url }}
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
with:
name: neon-${{ runner.os }}-release-build-stats
path: ./target/cargo-timings/
script: |
const { REPORT_URL, SHA } = process.env
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha: `${SHA}`,
state: 'success',
target_url: `${REPORT_URL}`,
context: `Build stats (release)`,
})

294
Cargo.lock generated
View File

@@ -30,9 +30,9 @@ dependencies = [
[[package]]
name = "ahash"
version = "0.8.2"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf6ccdb167abbf410dcb915cabd428929d7f6a04980b54a11f26a39f1c7f7107"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
dependencies = [
"cfg-if",
"once_cell",
@@ -143,9 +143,9 @@ dependencies = [
[[package]]
name = "async-trait"
version = "0.1.61"
version = "0.1.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "705339e0e4a9690e2908d2b3d049d85682cf19fbd5782494498fbf7003a6a282"
checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2"
dependencies = [
"proc-macro2",
"quote",
@@ -498,9 +498,9 @@ dependencies = [
[[package]]
name = "axum"
version = "0.6.2"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1304eab461cf02bd70b083ed8273388f9724c549b316ba3d1e213ce0e9e7fb7e"
checksum = "e5694b64066a2459918d8074c2ce0d5a88f409431994c2356617c8ae0c4721fc"
dependencies = [
"async-trait",
"axum-core",
@@ -527,9 +527,9 @@ dependencies = [
[[package]]
name = "axum-core"
version = "0.3.1"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f487e40dc9daee24d8a1779df88522f159a54a980f99cfbe43db0be0bd3444a8"
checksum = "1cae3e661676ffbacb30f1a824089a8c9150e71017f7e1e38f2aa32009188d34"
dependencies = [
"async-trait",
"bytes",
@@ -623,9 +623,9 @@ dependencies = [
[[package]]
name = "bstr"
version = "1.1.0"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b45ea9b00a7b3f2988e9a65ad3917e62123c38dba709b666506207be96d1790b"
checksum = "b7f0778972c64420fdedc63f09919c8a88bda7b25135357fd25a5d9f3257e832"
dependencies = [
"memchr",
"once_cell",
@@ -647,9 +647,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "bytes"
version = "1.3.0"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c"
checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be"
dependencies = [
"serde",
]
@@ -672,9 +672,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.0.78"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cexpr"
@@ -756,9 +756,9 @@ dependencies = [
[[package]]
name = "clap"
version = "4.1.1"
version = "4.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec7a4128863c188deefe750ac1d1dfe66c236909f845af04beed823638dc1b2"
checksum = "f13b9c79b5d1dd500d20ef541215a6423c75829ef43117e1b4d17fd8af0b5d76"
dependencies = [
"bitflags",
"clap_derive",
@@ -838,7 +838,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap 4.1.1",
"clap 4.1.4",
"futures",
"hyper",
"notify",
@@ -896,7 +896,7 @@ name = "control_plane"
version = "0.1.0"
dependencies = [
"anyhow",
"clap 4.1.1",
"clap 4.1.4",
"comfy-table",
"git-version",
"nix",
@@ -1077,9 +1077,9 @@ dependencies = [
[[package]]
name = "cxx"
version = "1.0.86"
version = "1.0.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d1075c37807dcf850c379432f0df05ba52cc30f279c5cfc43cc221ce7f8579"
checksum = "bc831ee6a32dd495436e317595e639a587aa9907bef96fe6e6abc290ab6204e9"
dependencies = [
"cc",
"cxxbridge-flags",
@@ -1089,9 +1089,9 @@ dependencies = [
[[package]]
name = "cxx-build"
version = "1.0.86"
version = "1.0.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5044281f61b27bc598f2f6647d480aed48d2bf52d6eb0b627d84c0361b17aa70"
checksum = "94331d54f1b1a8895cd81049f7eaaaef9d05a7dcb4d1fd08bf3ff0806246789d"
dependencies = [
"cc",
"codespan-reporting",
@@ -1104,15 +1104,15 @@ dependencies = [
[[package]]
name = "cxxbridge-flags"
version = "1.0.86"
version = "1.0.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61b50bc93ba22c27b0d31128d2d130a0a6b3d267ae27ef7e4fae2167dfe8781c"
checksum = "48dcd35ba14ca9b40d6e4b4b39961f23d835dbb8eed74565ded361d93e1feb8a"
[[package]]
name = "cxxbridge-macro"
version = "1.0.86"
version = "1.0.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e61fda7e62115119469c7b3591fd913ecca96fb766cfd3f2e2502ab7bc87a5"
checksum = "81bbeb29798b407ccd82a3324ade1a7286e0d29851475990b612670f6f5124d2"
dependencies = [
"proc-macro2",
"quote",
@@ -1221,15 +1221,15 @@ dependencies = [
[[package]]
name = "either"
version = "1.8.0"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
[[package]]
name = "encoding_rs"
version = "0.8.31"
version = "0.8.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b"
checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394"
dependencies = [
"cfg-if",
]
@@ -1303,7 +1303,7 @@ dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"windows-sys",
"windows-sys 0.42.0",
]
[[package]]
@@ -1348,9 +1348,9 @@ dependencies = [
[[package]]
name = "futures"
version = "0.3.25"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38390104763dc37a5145a53c29c63c1290b5d316d6086ec32c293f6736051bb0"
checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84"
dependencies = [
"futures-channel",
"futures-core",
@@ -1363,9 +1363,9 @@ dependencies = [
[[package]]
name = "futures-channel"
version = "0.3.25"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52ba265a92256105f45b719605a571ffe2d1f0fea3807304b522c1d778f79eed"
checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5"
dependencies = [
"futures-core",
"futures-sink",
@@ -1373,15 +1373,15 @@ dependencies = [
[[package]]
name = "futures-core"
version = "0.3.25"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac"
checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608"
[[package]]
name = "futures-executor"
version = "0.3.25"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7acc85df6714c176ab5edf386123fafe217be88c0840ec11f199441134a074e2"
checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e"
dependencies = [
"futures-core",
"futures-task",
@@ -1390,15 +1390,15 @@ dependencies = [
[[package]]
name = "futures-io"
version = "0.3.25"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00f5fb52a06bdcadeb54e8d3671f8888a39697dcb0b81b23b55174030427f4eb"
checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531"
[[package]]
name = "futures-macro"
version = "0.3.25"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdfb8ce053d86b91919aad980c220b1fb8401a9394410e1c289ed7e66b61835d"
checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70"
dependencies = [
"proc-macro2",
"quote",
@@ -1407,15 +1407,15 @@ dependencies = [
[[package]]
name = "futures-sink"
version = "0.3.25"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9"
checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364"
[[package]]
name = "futures-task"
version = "0.3.25"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea"
checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366"
[[package]]
name = "futures-timer"
@@ -1425,9 +1425,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
[[package]]
name = "futures-util"
version = "0.3.25"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6"
checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1"
dependencies = [
"futures-channel",
"futures-core",
@@ -1464,9 +1464,9 @@ dependencies = [
[[package]]
name = "gimli"
version = "0.27.0"
version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dec7af912d60cdbd3677c1af9352ebae6fb8394d165568a2234df0fa00f87793"
checksum = "221996f774192f0f718773def8201c4ae31f02616a54ccfc2d358bb0e5cefdec"
[[package]]
name = "git-version"
@@ -1536,7 +1536,7 @@ version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
dependencies = [
"ahash 0.8.2",
"ahash 0.8.3",
]
[[package]]
@@ -1550,9 +1550,9 @@ dependencies = [
[[package]]
name = "heck"
version = "0.4.0"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "hermit-abi"
@@ -1814,7 +1814,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e"
dependencies = [
"libc",
"windows-sys",
"windows-sys 0.42.0",
]
[[package]]
@@ -1832,7 +1832,7 @@ dependencies = [
"hermit-abi 0.2.6",
"io-lifetimes",
"rustix",
"windows-sys",
"windows-sys 0.42.0",
]
[[package]]
@@ -1852,9 +1852,9 @@ checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440"
[[package]]
name = "js-sys"
version = "0.3.60"
version = "0.3.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47"
checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730"
dependencies = [
"wasm-bindgen",
]
@@ -2039,9 +2039,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.6.2"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa"
checksum = "f2e212582ede878b109755efd0773a4f0f4ec851584cf0aefbeb4d9ecc114822"
dependencies = [
"adler",
]
@@ -2055,7 +2055,7 @@ dependencies = [
"libc",
"log",
"wasi",
"windows-sys",
"windows-sys 0.42.0",
]
[[package]]
@@ -2099,9 +2099,9 @@ dependencies = [
[[package]]
name = "notify"
version = "5.0.0"
version = "5.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2c66da08abae1c024c01d635253e402341b4060a12e99b31c7594063bf490a"
checksum = "58ea850aa68a06e48fdb069c0ec44d0d64c8dbffa49bf3b6f7f0a901fdea1ba9"
dependencies = [
"bitflags",
"crossbeam-channel",
@@ -2112,7 +2112,7 @@ dependencies = [
"libc",
"mio",
"walkdir",
"winapi",
"windows-sys 0.42.0",
]
[[package]]
@@ -2167,9 +2167,9 @@ dependencies = [
[[package]]
name = "object"
version = "0.30.2"
version = "0.30.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b8c786513eb403643f2a88c244c2aaa270ef2153f55094587d0c48a3cf22a83"
checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439"
dependencies = [
"memchr",
]
@@ -2305,9 +2305,9 @@ dependencies = [
[[package]]
name = "os_info"
version = "3.5.1"
version = "3.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4750134fb6a5d49afc80777394ad5d95b04bc12068c6abb92fae8f43817270f"
checksum = "5c424bc68d15e0778838ac013b5b3449544d8133633d8016319e7e05a820b8c0"
dependencies = [
"log",
"serde",
@@ -2336,7 +2336,7 @@ dependencies = [
"byteorder",
"bytes",
"chrono",
"clap 4.1.1",
"clap 4.1.4",
"close_fds",
"const_format",
"consumption_metrics",
@@ -2418,15 +2418,15 @@ dependencies = [
[[package]]
name = "parking_lot_core"
version = "0.9.6"
version = "0.9.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba1ef8814b5c993410bb3adfad7a5ed269563e4a2f90c41f5d85be7fb47133bf"
checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-sys",
"windows-sys 0.45.0",
]
[[package]]
@@ -2782,7 +2782,7 @@ dependencies = [
"bstr",
"bytes",
"chrono",
"clap 4.1.1",
"clap 4.1.4",
"consumption_metrics",
"futures",
"git-version",
@@ -2882,9 +2882,9 @@ dependencies = [
[[package]]
name = "rayon-core"
version = "1.10.1"
version = "1.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cac410af5d00ab6884528b4ab69d1e8e146e8d471201800fa1b4524126de6ad3"
checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
@@ -2974,11 +2974,11 @@ dependencies = [
[[package]]
name = "reqwest"
version = "0.11.13"
version = "0.11.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68cc60575865c7831548863cc02356512e3f1dc2f3f82cb837d7fc4cc8f3c97c"
checksum = "21eed90ec8570952d53b772ecf8f206aa1ec9a3d76b2521c56c42973f2d91ee9"
dependencies = [
"base64 0.13.1",
"base64 0.21.0",
"bytes",
"encoding_rs",
"futures-core",
@@ -3115,7 +3115,7 @@ dependencies = [
"io-lifetimes",
"libc",
"linux-raw-sys",
"windows-sys",
"windows-sys 0.42.0",
]
[[package]]
@@ -3181,7 +3181,7 @@ dependencies = [
"async-trait",
"byteorder",
"bytes",
"clap 4.1.1",
"clap 4.1.4",
"const_format",
"crc32c",
"fs2",
@@ -3242,7 +3242,7 @@ version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3"
dependencies = [
"windows-sys",
"windows-sys 0.42.0",
]
[[package]]
@@ -3269,9 +3269,9 @@ dependencies = [
[[package]]
name = "security-framework"
version = "2.7.0"
version = "2.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bc1bb97804af6631813c55739f771071e0f2ed33ee20b68c86ec505d906356c"
checksum = "a332be01508d814fed64bf28f798a146d73792121129962fdf335bb3c49a4254"
dependencies = [
"bitflags",
"core-foundation",
@@ -3282,9 +3282,9 @@ dependencies = [
[[package]]
name = "security-framework-sys"
version = "2.6.1"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556"
checksum = "31c9bb296072e961fcbd8853511dd39c2d8be2deb1e17c6860b1d30732b323b4"
dependencies = [
"core-foundation-sys",
"libc",
@@ -3298,9 +3298,9 @@ checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a"
[[package]]
name = "sentry"
version = "0.29.1"
version = "0.29.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17ad137b9df78294b98cab1a650bef237cc6c950e82e5ce164655e674d07c5cc"
checksum = "a6097dc270a9c4555c5d6222ed243eaa97ff38e29299ed7c5cb36099033c604e"
dependencies = [
"httpdate",
"reqwest",
@@ -3316,9 +3316,9 @@ dependencies = [
[[package]]
name = "sentry-backtrace"
version = "0.29.1"
version = "0.29.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afe4800806552aab314129761d5d3b3d422284eca3de2ab59e9fd133636cbd3d"
checksum = "9d92d1e4d591534ae4f872d6142f3b500f4ffc179a6aed8a3e86c7cc96d10a6a"
dependencies = [
"backtrace",
"once_cell",
@@ -3328,9 +3328,9 @@ dependencies = [
[[package]]
name = "sentry-contexts"
version = "0.29.1"
version = "0.29.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a42938426670f6e7974989cd1417837a96dd8bbb01567094f567d6acb360bf88"
checksum = "3afa877b1898ff67dd9878cf4bec4e53cef7d3be9f14b1fc9e4fcdf36f8e4259"
dependencies = [
"hostname",
"libc",
@@ -3342,9 +3342,9 @@ dependencies = [
[[package]]
name = "sentry-core"
version = "0.29.1"
version = "0.29.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4df9b9d8de2658a1ecd4e45f7b06c80c5dd97b891bfbc7c501186189b7e9bbdf"
checksum = "fc43eb7e4e3a444151a0fe8a0e9ce60eabd905dae33d66e257fa26f1b509c1bd"
dependencies = [
"once_cell",
"rand",
@@ -3355,9 +3355,9 @@ dependencies = [
[[package]]
name = "sentry-panic"
version = "0.29.1"
version = "0.29.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0af37b8500f273e511ebd6eb0d342ff7937d64ce3f134764b2b4653112d48cb4"
checksum = "ccab4fab11e3e63c45f4524bee2e75cde39cdf164cb0b0cbe6ccd1948ceddf66"
dependencies = [
"sentry-backtrace",
"sentry-core",
@@ -3365,9 +3365,9 @@ dependencies = [
[[package]]
name = "sentry-types"
version = "0.29.1"
version = "0.29.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccc95faa4078768a6bf8df45e2b894bbf372b3dbbfb364e9429c1c58ab7545c6"
checksum = "f63708ec450b6bdcb657af760c447416d69c38ce421f34e5e2e9ce8118410bc7"
dependencies = [
"debugid",
"getrandom",
@@ -3580,7 +3580,7 @@ dependencies = [
"anyhow",
"async-stream",
"bytes",
"clap 4.1.1",
"clap 4.1.4",
"const_format",
"futures",
"futures-core",
@@ -3661,9 +3661,9 @@ dependencies = [
[[package]]
name = "sync_wrapper"
version = "0.1.1"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8"
checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
[[package]]
name = "synstructure"
@@ -3822,9 +3822,9 @@ dependencies = [
[[package]]
name = "tokio"
version = "1.24.2"
version = "1.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a12a59981d9e3c38d216785b0c37399f6e415e8d0712047620f189371b0bb"
checksum = "c8e00990ebabbe4c14c08aca901caed183ecd5c09562a12c824bb53d3c3fd3af"
dependencies = [
"autocfg",
"bytes",
@@ -3836,7 +3836,7 @@ dependencies = [
"signal-hook-registry",
"socket2",
"tokio-macros",
"windows-sys",
"windows-sys 0.42.0",
]
[[package]]
@@ -3961,18 +3961,18 @@ dependencies = [
[[package]]
name = "toml"
version = "0.5.10"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1333c76748e868a4d9d1017b5ab53171dfd095f70c712fdb4653a406547f598f"
checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234"
dependencies = [
"serde",
]
[[package]]
name = "toml_datetime"
version = "0.5.0"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808b51e57d0ef8f71115d8f3a01e7d3750d01c79cac4b3eda910f4389fdf92fd"
checksum = "4553f467ac8e3d374bc9a177a26801e5d0f9b211aa1673fb137a403afd1c9cf5"
dependencies = [
"serde",
]
@@ -4089,6 +4089,17 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
[[package]]
name = "trace"
version = "0.1.0"
dependencies = [
"anyhow",
"clap 4.1.4",
"pageserver_api",
"utils",
"workspace_hack",
]
[[package]]
name = "tracing"
version = "0.1.37"
@@ -4247,9 +4258,9 @@ dependencies = [
[[package]]
name = "unicode-bidi"
version = "0.3.8"
version = "0.3.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"
checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58"
[[package]]
name = "unicode-ident"
@@ -4367,9 +4378,9 @@ dependencies = [
[[package]]
name = "uuid"
version = "1.2.2"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "422ee0de9031b5b948b97a8fc04e3aa35230001a722ddd27943e0be31564ce4c"
checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79"
dependencies = [
"getrandom",
"serde",
@@ -4392,7 +4403,7 @@ name = "wal_craft"
version = "0.1.0"
dependencies = [
"anyhow",
"clap 4.1.1",
"clap 4.1.4",
"env_logger",
"log",
"once_cell",
@@ -4431,9 +4442,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.83"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
@@ -4441,9 +4452,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.83"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142"
checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9"
dependencies = [
"bumpalo",
"log",
@@ -4456,9 +4467,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.33"
version = "0.4.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d"
checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454"
dependencies = [
"cfg-if",
"js-sys",
@@ -4468,9 +4479,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.83"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810"
checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@@ -4478,9 +4489,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.83"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c"
checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6"
dependencies = [
"proc-macro2",
"quote",
@@ -4491,15 +4502,15 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.83"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f"
checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d"
[[package]]
name = "web-sys"
version = "0.3.60"
version = "0.3.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f"
checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97"
dependencies = [
"js-sys",
"wasm-bindgen",
@@ -4526,9 +4537,9 @@ dependencies = [
[[package]]
name = "which"
version = "4.3.0"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c831fbbee9e129a8cf93e7747a82da9d95ba8e16621cae60ec2cdc849bacb7b"
checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269"
dependencies = [
"either",
"libc",
@@ -4581,6 +4592,30 @@ dependencies = [
"windows_x86_64_msvc",
]
[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.1"
@@ -4639,14 +4674,13 @@ dependencies = [
"anyhow",
"bytes",
"chrono",
"clap 4.1.1",
"clap 4.1.4",
"crossbeam-utils",
"either",
"fail",
"futures",
"futures-channel",
"futures-executor",
"futures-task",
"futures-util",
"hashbrown 0.12.3",
"indexmap",

View File

@@ -7,6 +7,7 @@ members = [
"safekeeper",
"storage_broker",
"workspace_hack",
"trace",
"libs/*",
]
@@ -31,7 +32,7 @@ bstr = "1.0"
byteorder = "1.4"
bytes = "1.0"
chrono = { version = "0.4", default-features = false, features = ["clock"] }
clap = "4.0"
clap = { version = "4.0", features = ["derive"] }
close_fds = "0.3.2"
comfy-table = "6.1"
const_format = "0.2"

View File

@@ -10,7 +10,8 @@ ARG TAG=pinned
FROM debian:bullseye-slim AS build-deps
RUN apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
libicu-dev
#########################################################################################
#
@@ -22,7 +23,7 @@ FROM build-deps AS pg-build
ARG PG_VERSION
COPY vendor/postgres-${PG_VERSION} postgres
RUN cd postgres && \
./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp && \
./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp --with-icu && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
@@ -234,10 +235,13 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
# Install:
# libreadline8 for psql
# libicu67, locales for collations (including ICU)
# libossp-uuid16 for extension ossp-uuid
# libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
RUN apt update && \
apt install --no-install-recommends -y \
locales \
libicu67 \
libreadline8 \
libossp-uuid16 \
libgeos-c1v5 \
@@ -246,7 +250,9 @@ RUN apt update && \
libprotobuf-c1 \
libsfcgal1 \
gdb && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
ENV LANG en_US.utf8
USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -227,6 +227,52 @@ pub struct TimelineInfo {
pub state: TimelineState,
}
#[derive(Debug, Clone, Serialize)]
pub struct LayerMapInfo {
pub in_memory_layers: Vec<InMemoryLayerInfo>,
pub historic_layers: Vec<HistoricLayerInfo>,
}
#[serde_as]
#[derive(Debug, Clone, Serialize)]
#[serde(tag = "kind")]
pub enum InMemoryLayerInfo {
Open {
#[serde_as(as = "DisplayFromStr")]
lsn_start: Lsn,
},
Frozen {
#[serde_as(as = "DisplayFromStr")]
lsn_start: Lsn,
#[serde_as(as = "DisplayFromStr")]
lsn_end: Lsn,
},
}
#[serde_as]
#[derive(Debug, Clone, Serialize)]
#[serde(tag = "kind")]
pub enum HistoricLayerInfo {
Delta {
layer_file_name: String,
layer_file_size: Option<u64>,
#[serde_as(as = "DisplayFromStr")]
lsn_start: Lsn,
#[serde_as(as = "DisplayFromStr")]
lsn_end: Lsn,
remote: bool,
},
Image {
layer_file_name: String,
layer_file_size: Option<u64>,
#[serde_as(as = "DisplayFromStr")]
lsn_start: Lsn,
remote: bool,
},
}
#[derive(Debug, Serialize, Deserialize)]
pub struct DownloadRemoteLayersTaskSpawnRequest {
pub max_concurrent_downloads: NonZeroUsize,
@@ -267,7 +313,7 @@ pub struct TimelineGcRequest {
}
// Wrapped in libpq CopyData
#[derive(PartialEq, Eq)]
#[derive(PartialEq, Eq, Debug)]
pub enum PagestreamFeMessage {
Exists(PagestreamExistsRequest),
Nblocks(PagestreamNblocksRequest),

View File

@@ -1,8 +1,7 @@
use pageserver::keyspace::{KeyPartitioning, KeySpace};
use pageserver::repository::Key;
use pageserver::tenant::layer_map::LayerMap;
use pageserver::tenant::storage_layer::Layer;
use pageserver::tenant::storage_layer::{DeltaFileName, ImageFileName, LayerDescriptor};
use pageserver::tenant::storage_layer::{Layer, LayerDescriptor, LayerFileName};
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
use std::cmp::{max, min};
use std::fs::File;
@@ -26,30 +25,15 @@ fn build_layer_map(filename_dump: PathBuf) -> LayerMap<LayerDescriptor> {
let mut updates = layer_map.batch_update();
for fname in filenames {
let fname = &fname.unwrap();
if let Some(imgfilename) = ImageFileName::parse_str(fname) {
let layer = LayerDescriptor {
key: imgfilename.key_range,
lsn: imgfilename.lsn..(imgfilename.lsn + 1),
is_incremental: false,
short_id: fname.to_string(),
};
updates.insert_historic(Arc::new(layer));
min_lsn = min(min_lsn, imgfilename.lsn);
max_lsn = max(max_lsn, imgfilename.lsn);
} else if let Some(deltafilename) = DeltaFileName::parse_str(fname) {
let layer = LayerDescriptor {
key: deltafilename.key_range.clone(),
lsn: deltafilename.lsn_range.clone(),
is_incremental: true,
short_id: fname.to_string(),
};
updates.insert_historic(Arc::new(layer));
min_lsn = min(min_lsn, deltafilename.lsn_range.start);
max_lsn = max(max_lsn, deltafilename.lsn_range.end);
} else {
panic!("unexpected filename {fname}");
}
let fname = fname.unwrap();
let fname = LayerFileName::from_str(&fname).unwrap();
let layer = LayerDescriptor::from(fname);
let lsn_range = layer.get_lsn_range();
min_lsn = min(min_lsn, lsn_range.start);
max_lsn = max(max_lsn, Lsn(lsn_range.end.0 - 1));
updates.insert_historic(Arc::new(layer));
}
println!("min: {min_lsn}, max: {max_lsn}");

View File

@@ -83,10 +83,7 @@ pub async fn collect_metrics(
return Ok(());
},
_ = ticker.tick() => {
if let Err(err) = collect_metrics_iteration(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &ctx).await
{
error!("metrics collection failed: {err:?}");
}
collect_metrics_iteration(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &ctx).await;
}
}
}
@@ -97,17 +94,18 @@ pub async fn collect_metrics(
/// Gather per-tenant and per-timeline metrics and send them to the `metric_collection_endpoint`.
/// Cache metrics to avoid sending the same metrics multiple times.
///
/// This function handles all errors internally
/// and doesn't break iteration if just one tenant fails.
///
/// TODO
/// - refactor this function (chunking+sending part) to reuse it in proxy module;
/// - improve error handling. Now if one tenant fails to collect metrics,
/// the whole iteration fails and metrics for other tenants are not collected.
pub async fn collect_metrics_iteration(
client: &reqwest::Client,
cached_metrics: &mut HashMap<PageserverConsumptionMetricsKey, u64>,
metric_collection_endpoint: &reqwest::Url,
node_id: NodeId,
ctx: &RequestContext,
) -> anyhow::Result<()> {
) {
let mut current_metrics: Vec<(PageserverConsumptionMetricsKey, u64)> = Vec::new();
trace!(
"starting collect_metrics_iteration. metric_collection_endpoint: {}",
@@ -115,7 +113,13 @@ pub async fn collect_metrics_iteration(
);
// get list of tenants
let tenants = mgr::list_tenants().await?;
let tenants = match mgr::list_tenants().await {
Ok(tenants) => tenants,
Err(err) => {
error!("failed to list tenants: {:?}", err);
return;
}
};
// iterate through list of Active tenants and collect metrics
for (tenant_id, tenant_state) in tenants {
@@ -123,7 +127,15 @@ pub async fn collect_metrics_iteration(
continue;
}
let tenant = mgr::get_tenant(tenant_id, true).await?;
let tenant = match mgr::get_tenant(tenant_id, true).await {
Ok(tenant) => tenant,
Err(err) => {
// It is possible that tenant was deleted between
// `list_tenants` and `get_tenant`, so just warn about it.
warn!("failed to get tenant {tenant_id:?}: {err:?}");
continue;
}
};
let mut tenant_resident_size = 0;
@@ -142,29 +154,51 @@ pub async fn collect_metrics_iteration(
timeline_written_size,
));
let (timeline_logical_size, is_exact) = timeline.get_current_logical_size(ctx)?;
// Only send timeline logical size when it is fully calculated.
if is_exact {
current_metrics.push((
PageserverConsumptionMetricsKey {
tenant_id,
timeline_id: Some(timeline.timeline_id),
metric: TIMELINE_LOGICAL_SIZE,
},
timeline_logical_size,
));
}
match timeline.get_current_logical_size(ctx) {
// Only send timeline logical size when it is fully calculated.
Ok((size, is_exact)) if is_exact => {
current_metrics.push((
PageserverConsumptionMetricsKey {
tenant_id,
timeline_id: Some(timeline.timeline_id),
metric: TIMELINE_LOGICAL_SIZE,
},
size,
));
}
Ok((_, _)) => {}
Err(err) => {
error!(
"failed to get current logical size for timeline {}: {err:?}",
timeline.timeline_id
);
continue;
}
};
}
let timeline_resident_size = timeline.get_resident_physical_size();
tenant_resident_size += timeline_resident_size;
}
let tenant_remote_size = tenant.get_remote_size().await?;
debug!(
"collected current metrics for tenant: {}: state={:?} resident_size={} remote_size={}",
tenant_id, tenant_state, tenant_resident_size, tenant_remote_size
);
match tenant.get_remote_size().await {
Ok(tenant_remote_size) => {
current_metrics.push((
PageserverConsumptionMetricsKey {
tenant_id,
timeline_id: None,
metric: REMOTE_STORAGE_SIZE,
},
tenant_remote_size,
));
}
Err(err) => {
error!(
"failed to get remote size for tenant {}: {err:?}",
tenant_id
);
}
}
current_metrics.push((
PageserverConsumptionMetricsKey {
@@ -175,15 +209,6 @@ pub async fn collect_metrics_iteration(
tenant_resident_size,
));
current_metrics.push((
PageserverConsumptionMetricsKey {
tenant_id,
timeline_id: None,
metric: REMOTE_STORAGE_SIZE,
},
tenant_remote_size,
));
// Note that this metric is calculated in a separate bgworker
// Here we only use cached value, which may lag behind the real latest one
let tenant_synthetic_size = tenant.get_cached_synthetic_size();
@@ -205,7 +230,7 @@ pub async fn collect_metrics_iteration(
if current_metrics.is_empty() {
trace!("no new metrics to send");
return Ok(());
return;
}
// Send metrics.
@@ -256,8 +281,6 @@ pub async fn collect_metrics_iteration(
}
}
}
Ok(())
}
/// Caclculate synthetic size for each active tenant

View File

@@ -7,7 +7,7 @@ use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
use remote_storage::GenericRemoteStorage;
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::http::request::{must_get_query_param, parse_query_param};
use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};
use super::models::{
StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
@@ -317,10 +317,7 @@ async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response
let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let timeline = mgr::get_tenant(tenant_id, true)
.await
.and_then(|tenant| tenant.get_timeline(timeline_id, true))
.map_err(ApiError::NotFound)?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let result = timeline
.find_lsn_for_timestamp(timestamp_pg, &ctx)
.await
@@ -528,6 +525,62 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
)
}
async fn layer_map_info_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_id))?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let layer_map_info = timeline.layer_map_info();
json_response(StatusCode::OK, layer_map_info)
}
async fn layer_download_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let layer_file_name = get_request_param(&request, "layer_file_name")?;
check_permission(&request, Some(tenant_id))?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let downloaded = timeline
.download_layer(layer_file_name)
.await
.map_err(ApiError::InternalServerError)?;
match downloaded {
Some(true) => json_response(StatusCode::OK, ()),
Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
None => json_response(
StatusCode::BAD_REQUEST,
format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
),
}
}
async fn evict_timeline_layer_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
check_permission(&request, Some(tenant_id))?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let layer_file_name = get_request_param(&request, "layer_file_name")?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let evicted = timeline
.evict_layer(layer_file_name)
.await
.map_err(ApiError::InternalServerError)?;
match evicted {
Some(true) => json_response(StatusCode::OK, ()),
Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
None => json_response(
StatusCode::BAD_REQUEST,
format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
),
}
}
// Helper function to standardize the error messages we produce on bad durations
//
// Intended to be used with anyhow's `with_context`, e.g.:
@@ -804,12 +857,7 @@ async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<
check_permission(&request, Some(tenant_id))?;
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
let tenant = mgr::get_tenant(tenant_id, true)
.await
.map_err(ApiError::NotFound)?;
let timeline = tenant
.get_timeline(timeline_id, true)
.map_err(ApiError::NotFound)?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
timeline
.freeze_and_flush()
.await
@@ -830,12 +878,7 @@ async fn timeline_download_remote_layers_handler_post(
let body: DownloadRemoteLayersTaskSpawnRequest = json_request(&mut request).await?;
check_permission(&request, Some(tenant_id))?;
let tenant = mgr::get_tenant(tenant_id, true)
.await
.map_err(ApiError::NotFound)?;
let timeline = tenant
.get_timeline(timeline_id, true)
.map_err(ApiError::NotFound)?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
match timeline.spawn_download_all_remote_layers(body).await {
Ok(st) => json_response(StatusCode::ACCEPTED, st),
Err(st) => json_response(StatusCode::CONFLICT, st),
@@ -846,15 +889,10 @@ async fn timeline_download_remote_layers_handler_get(
request: Request<Body>,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_id))?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let tenant = mgr::get_tenant(tenant_id, true)
.await
.map_err(ApiError::NotFound)?;
let timeline = tenant
.get_timeline(timeline_id, true)
.map_err(ApiError::NotFound)?;
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
let info = timeline
.get_download_all_remote_layers_task_info()
.context("task never started since last pageserver process start")
@@ -862,6 +900,18 @@ async fn timeline_download_remote_layers_handler_get(
json_response(StatusCode::OK, info)
}
async fn active_timeline_of_active_tenant(
tenant_id: TenantId,
timeline_id: TimelineId,
) -> Result<Arc<Timeline>, ApiError> {
let tenant = mgr::get_tenant(tenant_id, true)
.await
.map_err(ApiError::NotFound)?;
tenant
.get_timeline(timeline_id, true)
.map_err(ApiError::NotFound)
}
async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
json_response(
StatusCode::NOT_FOUND,
@@ -958,5 +1008,17 @@ pub fn make_router(
"/v1/tenant/:tenant_id/timeline/:timeline_id",
timeline_delete_handler,
)
.get(
"/v1/tenant/:tenant_id/timeline/:timeline_id/layer",
layer_map_info_handler,
)
.get(
"/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
layer_download_handler,
)
.delete(
"/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
evict_timeline_layer_handler,
)
.any(handler_404))
}

View File

@@ -59,6 +59,7 @@ use std::sync::Arc;
use utils::lsn::Lsn;
use historic_layer_coverage::BufferedHistoricLayerCoverage;
pub use historic_layer_coverage::Replacement;
use super::storage_layer::range_eq;
@@ -138,6 +139,24 @@ where
self.layer_map.remove_historic_noflush(layer)
}
/// Replaces existing layer iff it is the `expected`.
///
/// If the expected layer has been removed it will not be inserted by this function.
///
/// Returned `Replacement` describes succeeding in replacement or the reason why it could not
/// be done.
///
/// TODO replacement can be done without buffering and rebuilding layer map updates.
/// One way to do that is to add a layer of indirection for returned values, so
/// that we can replace values only by updating a hashmap.
pub fn replace_historic(
&mut self,
expected: &Arc<L>,
new: Arc<L>,
) -> anyhow::Result<Replacement<Arc<L>>> {
self.layer_map.replace_historic_noflush(expected, new)
}
// We will flush on drop anyway, but this method makes it
// more explicit that there is some work being done.
/// Apply all updates
@@ -254,14 +273,8 @@ where
/// Helper function for BatchedUpdates::insert_historic
///
pub(self) fn insert_historic_noflush(&mut self, layer: Arc<L>) {
let kr = layer.get_key_range();
let lr = layer.get_lsn_range();
self.historic.insert(
historic_layer_coverage::LayerKey {
key: kr.start.to_i128()..kr.end.to_i128(),
lsn: lr.start.0..lr.end.0,
is_image: !layer.is_incremental(),
},
historic_layer_coverage::LayerKey::from(&*layer),
Arc::clone(&layer),
);
@@ -278,30 +291,72 @@ where
/// Helper function for BatchedUpdates::remove_historic
///
pub fn remove_historic_noflush(&mut self, layer: Arc<L>) {
let kr = layer.get_key_range();
let lr = layer.get_lsn_range();
self.historic.remove(historic_layer_coverage::LayerKey {
key: kr.start.to_i128()..kr.end.to_i128(),
lsn: lr.start.0..lr.end.0,
is_image: !layer.is_incremental(),
});
self.historic
.remove(historic_layer_coverage::LayerKey::from(&*layer));
if Self::is_l0(&layer) {
let len_before = self.l0_delta_layers.len();
// FIXME: ptr_eq might fail to return true for 'dyn'
// references. Clippy complains about this. In practice it
// seems to work, the assertion below would be triggered
// otherwise but this ought to be fixed.
#[allow(clippy::vtable_address_comparisons)]
self.l0_delta_layers
.retain(|other| !Arc::ptr_eq(other, &layer));
assert_eq!(self.l0_delta_layers.len(), len_before - 1);
.retain(|other| !Self::compare_arced_layers(other, &layer));
// this assertion is related to use of Arc::ptr_eq in Self::compare_arced_layers,
// there's a chance that the comparison fails at runtime due to it comparing (pointer,
// vtable) pairs.
assert_eq!(
self.l0_delta_layers.len(),
len_before - 1,
"failed to locate removed historic layer from l0_delta_layers"
);
}
NUM_ONDISK_LAYERS.dec();
}
pub(self) fn replace_historic_noflush(
&mut self,
expected: &Arc<L>,
new: Arc<L>,
) -> anyhow::Result<Replacement<Arc<L>>> {
let key = historic_layer_coverage::LayerKey::from(&**expected);
let other = historic_layer_coverage::LayerKey::from(&*new);
let expected_l0 = Self::is_l0(expected);
let new_l0 = Self::is_l0(&new);
anyhow::ensure!(
key == other,
"expected and new must have equal LayerKeys: {key:?} != {other:?}"
);
anyhow::ensure!(
expected_l0 == new_l0,
"expected and new must both be l0 deltas or neither should be: {expected_l0} != {new_l0}"
);
let l0_index = if expected_l0 {
// find the index in case replace worked, we need to replace that as well
Some(
self.l0_delta_layers
.iter()
.position(|slot| Self::compare_arced_layers(slot, expected))
.ok_or_else(|| anyhow::anyhow!("existing l0 delta layer was not found"))?,
)
} else {
None
};
let replaced = self.historic.replace(&key, new.clone(), |existing| {
Self::compare_arced_layers(existing, expected)
});
if let Replacement::Replaced { .. } = &replaced {
if let Some(index) = l0_index {
self.l0_delta_layers[index] = new;
}
}
Ok(replaced)
}
/// Helper function for BatchedUpdates::drop.
pub(self) fn flush_updates(&mut self) {
self.historic.rebuild();
@@ -675,4 +730,105 @@ where
println!("End dump LayerMap");
Ok(())
}
#[inline(always)]
fn compare_arced_layers(left: &Arc<L>, right: &Arc<L>) -> bool {
// FIXME: ptr_eq might fail to return true for 'dyn' references because of multiple vtables
// can be created in compilation. Clippy complains about this. In practice it seems to
// work.
//
// In future rust versions this might become Arc::as_ptr(left) as *const () ==
// Arc::as_ptr(right) as *const (), we could change to that before.
#[allow(clippy::vtable_address_comparisons)]
Arc::ptr_eq(left, right)
}
}
#[cfg(test)]
mod tests {
use super::{LayerMap, Replacement};
use crate::tenant::storage_layer::{Layer, LayerDescriptor, LayerFileName};
use std::str::FromStr;
use std::sync::Arc;
mod l0_delta_layers_updated {
use super::*;
#[test]
fn for_full_range_delta() {
// l0_delta_layers are used by compaction, and should observe all buffered updates
l0_delta_layers_updated_scenario(
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000053423C21-0000000053424D69",
true
)
}
#[test]
fn for_non_full_range_delta() {
// has minimal uncovered areas compared to l0_delta_layers_updated_on_insert_replace_remove_for_full_range_delta
l0_delta_layers_updated_scenario(
"000000000000000000000000000000000001-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE__0000000053423C21-0000000053424D69",
// because not full range
false
)
}
#[test]
fn for_image() {
l0_delta_layers_updated_scenario(
"000000000000000000000000000000000000-000000000000000000000000000000010000__0000000053424D69",
// code only checks if it is a full range layer, doesn't care about images, which must
// mean we should in practice never have full range images
false
)
}
fn l0_delta_layers_updated_scenario(layer_name: &str, expected_l0: bool) {
let name = LayerFileName::from_str(layer_name).unwrap();
let skeleton = LayerDescriptor::from(name);
let remote: Arc<dyn Layer> = Arc::new(skeleton.clone());
let downloaded: Arc<dyn Layer> = Arc::new(skeleton);
let mut map = LayerMap::default();
// two disjoint Arcs in different lifecycle phases.
assert!(!LayerMap::compare_arced_layers(&remote, &downloaded));
let expected_in_counts = (1, usize::from(expected_l0));
map.batch_update().insert_historic(remote.clone());
assert_eq!(count_layer_in(&map, &remote), expected_in_counts);
let replaced = map
.batch_update()
.replace_historic(&remote, downloaded.clone())
.expect("name derived attributes are the same");
assert!(
matches!(replaced, Replacement::Replaced { .. }),
"{replaced:?}"
);
assert_eq!(count_layer_in(&map, &downloaded), expected_in_counts);
map.batch_update().remove_historic(downloaded.clone());
assert_eq!(count_layer_in(&map, &downloaded), (0, 0));
}
fn count_layer_in(map: &LayerMap<dyn Layer>, layer: &Arc<dyn Layer>) -> (usize, usize) {
let historic = map
.iter_historic_layers()
.filter(|x| LayerMap::compare_arced_layers(x, layer))
.count();
let l0s = map
.get_level0_deltas()
.expect("why does this return a result");
let l0 = l0s
.iter()
.filter(|x| LayerMap::compare_arced_layers(x, layer))
.count();
(historic, l0)
}
}
}

View File

@@ -41,6 +41,18 @@ impl Ord for LayerKey {
}
}
impl<'a, L: crate::tenant::storage_layer::Layer + ?Sized> From<&'a L> for LayerKey {
fn from(layer: &'a L) -> Self {
let kr = layer.get_key_range();
let lr = layer.get_lsn_range();
LayerKey {
key: kr.start.to_i128()..kr.end.to_i128(),
lsn: lr.start.0..lr.end.0,
is_image: !layer.is_incremental(),
}
}
}
/// Efficiently queryable layer coverage for each LSN.
///
/// Allows answering layer map queries very efficiently,
@@ -82,15 +94,13 @@ impl<Value: Clone> HistoricLayerCoverage<Value> {
}
// Insert into data structure
if layer_key.is_image {
self.head
.image_coverage
.insert(layer_key.key, layer_key.lsn.clone(), value);
let target = if layer_key.is_image {
&mut self.head.image_coverage
} else {
self.head
.delta_coverage
.insert(layer_key.key, layer_key.lsn.clone(), value);
}
&mut self.head.delta_coverage
};
target.insert(layer_key.key, layer_key.lsn.clone(), value);
// Remember history. Clone is O(1)
self.historic.insert(layer_key.lsn.start, self.head.clone());
@@ -415,6 +425,59 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
self.buffer.insert(layer_key, None);
}
/// Replaces a previous layer with a new layer value.
///
/// The replacement is conditional on:
/// - there is an existing `LayerKey` record
/// - there is no buffered removal for the given `LayerKey`
/// - the given closure returns true for the current `Value`
///
/// The closure is used to compare the latest value (buffered insert, or existing layer)
/// against some expectation. This allows to use `Arc::ptr_eq` or similar which would be
/// inaccessible via `PartialEq` trait.
///
/// Returns a `Replacement` value describing the outcome; only the case of
/// `Replacement::Replaced` modifies the map and requires a rebuild.
pub fn replace<F>(
&mut self,
layer_key: &LayerKey,
new: Value,
check_expected: F,
) -> Replacement<Value>
where
F: FnOnce(&Value) -> bool,
{
let (slot, in_buffered) = match self.buffer.get(layer_key) {
Some(inner @ Some(_)) => {
// we compare against the buffered version, because there will be a later
// rebuild before querying
(inner.as_ref(), true)
}
Some(None) => {
// buffer has removal for this key; it will not be equivalent by any check_expected.
return Replacement::RemovalBuffered;
}
None => {
// no pending modification for the key, check layers
(self.layers.get(layer_key), false)
}
};
match slot {
Some(existing) if !check_expected(existing) => {
// unfortunate clone here, but otherwise the nll borrowck grows the region of
// 'a to cover the whole function, and we could not mutate in the other
// Some(existing) branch
Replacement::Unexpected(existing.clone())
}
None => Replacement::NotFound,
Some(_existing) => {
self.insert(layer_key.to_owned(), new);
Replacement::Replaced { in_buffered }
}
}
}
pub fn rebuild(&mut self) {
// Find the first LSN that needs to be rebuilt
let rebuild_since: u64 = match self.buffer.iter().next() {
@@ -483,6 +546,22 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
}
}
/// Outcome of the replace operation.
#[derive(Debug)]
pub enum Replacement<Value> {
/// Previous value was replaced with the new value.
Replaced {
/// Replacement happened for a scheduled insert.
in_buffered: bool,
},
/// Key was not found buffered updates or existing layers.
NotFound,
/// Key has been scheduled for removal, it was not replaced.
RemovalBuffered,
/// Previous value was rejected by the closure.
Unexpected(Value),
}
#[test]
fn test_retroactive_regression_1() {
let mut map = BufferedHistoricLayerCoverage::new();
@@ -548,7 +627,7 @@ fn test_retroactive_simple() {
LayerKey {
key: 2..5,
lsn: 105..106,
is_image: true,
is_image: false,
},
"Delta 1".to_string(),
);
@@ -556,17 +635,24 @@ fn test_retroactive_simple() {
// Rebuild so we can start querying
map.rebuild();
// Query key 4
let version = map.get().unwrap().get_version(90);
assert!(version.is_none());
let version = map.get().unwrap().get_version(102).unwrap();
assert_eq!(version.image_coverage.query(4), Some("Image 1".to_string()));
let version = map.get().unwrap().get_version(107).unwrap();
assert_eq!(version.image_coverage.query(4), Some("Delta 1".to_string()));
let version = map.get().unwrap().get_version(115).unwrap();
assert_eq!(version.image_coverage.query(4), Some("Image 2".to_string()));
let version = map.get().unwrap().get_version(125).unwrap();
assert_eq!(version.image_coverage.query(4), Some("Image 3".to_string()));
{
let map = map.get().expect("rebuilt");
let version = map.get_version(90);
assert!(version.is_none());
let version = map.get_version(102).unwrap();
assert_eq!(version.image_coverage.query(4), Some("Image 1".to_string()));
let version = map.get_version(107).unwrap();
assert_eq!(version.image_coverage.query(4), Some("Image 1".to_string()));
assert_eq!(version.delta_coverage.query(4), Some("Delta 1".to_string()));
let version = map.get_version(115).unwrap();
assert_eq!(version.image_coverage.query(4), Some("Image 2".to_string()));
let version = map.get_version(125).unwrap();
assert_eq!(version.image_coverage.query(4), Some("Image 3".to_string()));
}
// Remove Image 3
map.remove(LayerKey {
@@ -576,8 +662,147 @@ fn test_retroactive_simple() {
});
map.rebuild();
// Check deletion worked
let version = map.get().unwrap().get_version(125).unwrap();
assert_eq!(version.image_coverage.query(4), Some("Image 2".to_string()));
assert_eq!(version.image_coverage.query(8), Some("Image 4".to_string()));
{
// Check deletion worked
let map = map.get().expect("rebuilt");
let version = map.get_version(125).unwrap();
assert_eq!(version.image_coverage.query(4), Some("Image 2".to_string()));
assert_eq!(version.image_coverage.query(8), Some("Image 4".to_string()));
}
}
#[test]
fn test_retroactive_replacement() {
let mut map = BufferedHistoricLayerCoverage::new();
let keys = [
LayerKey {
key: 0..5,
lsn: 100..101,
is_image: true,
},
LayerKey {
key: 3..9,
lsn: 110..111,
is_image: true,
},
LayerKey {
key: 4..6,
lsn: 120..121,
is_image: true,
},
];
let layers = [
"Image 1".to_string(),
"Image 2".to_string(),
"Image 3".to_string(),
];
for (key, layer) in keys.iter().zip(layers.iter()) {
map.insert(key.to_owned(), layer.to_owned());
}
// rebuild is not necessary here, because replace works for both buffered updates and existing
// layers.
for (key, orig_layer) in keys.iter().zip(layers.iter()) {
let replacement = format!("Remote {orig_layer}");
// evict
let ret = map.replace(key, replacement.clone(), |l| l == orig_layer);
assert!(
matches!(ret, Replacement::Replaced { .. }),
"replace {orig_layer}: {ret:?}"
);
map.rebuild();
let at = key.lsn.end + 1;
let version = map.get().expect("rebuilt").get_version(at).unwrap();
assert_eq!(
version.image_coverage.query(4).as_deref(),
Some(replacement.as_str()),
"query for 4 at version {at} after eviction",
);
// download
let ret = map.replace(key, orig_layer.clone(), |l| l == &replacement);
assert!(
matches!(ret, Replacement::Replaced { .. }),
"replace {orig_layer} back: {ret:?}"
);
map.rebuild();
let version = map.get().expect("rebuilt").get_version(at).unwrap();
assert_eq!(
version.image_coverage.query(4).as_deref(),
Some(orig_layer.as_str()),
"query for 4 at version {at} after download",
);
}
}
#[test]
fn missing_key_is_not_inserted_with_replace() {
let mut map = BufferedHistoricLayerCoverage::new();
let key = LayerKey {
key: 0..5,
lsn: 100..101,
is_image: true,
};
let ret = map.replace(&key, "should not replace", |_| true);
assert!(matches!(ret, Replacement::NotFound), "{ret:?}");
map.rebuild();
assert!(map
.get()
.expect("no changes to rebuild")
.get_version(102)
.is_none());
}
#[test]
fn replacing_buffered_insert_and_remove() {
let mut map = BufferedHistoricLayerCoverage::new();
let key = LayerKey {
key: 0..5,
lsn: 100..101,
is_image: true,
};
map.insert(key.clone(), "Image 1");
let ret = map.replace(&key, "Remote Image 1", |&l| l == "Image 1");
assert!(
matches!(ret, Replacement::Replaced { in_buffered: true }),
"{ret:?}"
);
map.rebuild();
assert_eq!(
map.get()
.expect("rebuilt")
.get_version(102)
.unwrap()
.image_coverage
.query(4),
Some("Remote Image 1")
);
map.remove(key.clone());
let ret = map.replace(&key, "should not replace", |_| true);
assert!(
matches!(ret, Replacement::RemovalBuffered),
"cannot replace after scheduled remove: {ret:?}"
);
map.rebuild();
let ret = map.replace(&key, "should not replace", |_| true);
assert!(
matches!(ret, Replacement::NotFound),
"cannot replace after remove + rebuild: {ret:?}"
);
let at_version = map.get().expect("rebuilt").get_version(102);
assert!(at_version.is_none());
}

View File

@@ -1135,18 +1135,29 @@ mod tests {
client.init_upload_queue_for_empty_remote(&metadata)?;
// Create a couple of dummy files, schedule upload for them
let content_foo = dummy_contents("foo");
let content_bar = dummy_contents("bar");
std::fs::write(timeline_path.join("foo"), &content_foo)?;
std::fs::write(timeline_path.join("bar"), &content_bar)?;
let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
let layer_file_name_2: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D9-00000000016B5A52".parse().unwrap();
let layer_file_name_3: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59DA-00000000016B5A53".parse().unwrap();
let content_1 = dummy_contents("foo");
let content_2 = dummy_contents("bar");
let content_3 = dummy_contents("baz");
std::fs::write(
timeline_path.join(layer_file_name_1.file_name()),
&content_1,
)?;
std::fs::write(
timeline_path.join(layer_file_name_2.file_name()),
&content_2,
)?;
std::fs::write(timeline_path.join(layer_file_name_3.file_name()), content_3)?;
client.schedule_layer_file_upload(
&LayerFileName::Test("foo".to_owned()),
&LayerFileMetadata::new(content_foo.len() as u64),
&layer_file_name_1,
&LayerFileMetadata::new(content_1.len() as u64),
)?;
client.schedule_layer_file_upload(
&LayerFileName::Test("bar".to_owned()),
&LayerFileMetadata::new(content_bar.len() as u64),
&layer_file_name_2,
&LayerFileMetadata::new(content_2.len() as u64),
)?;
// Check that they are started immediately, not queued
@@ -1183,7 +1194,13 @@ mod tests {
// Download back the index.json, and check that the list of files is correct
let index_part = runtime.block_on(client.download_index_file())?;
assert_file_list(&index_part.timeline_layers, &["foo", "bar"]);
assert_file_list(
&index_part.timeline_layers,
&[
&layer_file_name_1.file_name(),
&layer_file_name_2.file_name(),
],
);
let downloaded_metadata = index_part.parse_metadata()?;
assert_eq!(downloaded_metadata, metadata);
@@ -1191,10 +1208,10 @@ mod tests {
let content_baz = dummy_contents("baz");
std::fs::write(timeline_path.join("baz"), &content_baz)?;
client.schedule_layer_file_upload(
&LayerFileName::Test("baz".to_owned()),
&layer_file_name_3,
&LayerFileMetadata::new(content_baz.len() as u64),
)?;
client.schedule_layer_file_deletion(&[LayerFileName::Test("foo".to_owned())])?;
client.schedule_layer_file_deletion(&[layer_file_name_1.clone()])?;
{
let mut guard = client.upload_queue.lock().unwrap();
let upload_queue = guard.initialized_mut().unwrap();
@@ -1206,12 +1223,26 @@ mod tests {
assert!(upload_queue.num_inprogress_deletions == 0);
assert!(upload_queue.latest_files_changes_since_metadata_upload_scheduled == 0);
}
assert_remote_files(&["foo", "bar", "index_part.json"], &remote_timeline_dir);
assert_remote_files(
&[
&layer_file_name_1.file_name(),
&layer_file_name_2.file_name(),
"index_part.json",
],
&remote_timeline_dir,
);
// Finish them
runtime.block_on(client.wait_completion())?;
assert_remote_files(&["bar", "baz", "index_part.json"], &remote_timeline_dir);
assert_remote_files(
&[
&layer_file_name_2.file_name(),
&layer_file_name_3.file_name(),
"index_part.json",
],
&remote_timeline_dir,
);
Ok(())
}

View File

@@ -8,7 +8,8 @@ use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use tracing::warn;
use crate::tenant::{metadata::TimelineMetadata, storage_layer::LayerFileName};
use crate::tenant::metadata::TimelineMetadata;
use crate::tenant::storage_layer::LayerFileName;
use utils::lsn::Lsn;
@@ -274,7 +275,7 @@ mod tests {
"timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
"layer_metadata":{
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
"LAYER_FILE_NAME::test/not_a_real_layer_but_adding_coverage": { "file_size": 9007199254741001 }
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
},
"disk_consistent_lsn":"0/16960E8",
"metadata_bytes":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
@@ -288,7 +289,7 @@ mod tests {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
file_size: Some(25600000),
}),
(LayerFileName::new_test("not_a_real_layer_but_adding_coverage"), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
// serde_json should always parse this but this might be a double with jq for
// example.
file_size: Some(9007199254741001),
@@ -312,7 +313,7 @@ mod tests {
"missing_layers":["This shouldn't fail deserialization"],
"layer_metadata":{
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
"LAYER_FILE_NAME::test/not_a_real_layer_but_adding_coverage": { "file_size": 9007199254741001 }
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
},
"disk_consistent_lsn":"0/16960E8",
"metadata_bytes":[112,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
@@ -326,7 +327,7 @@ mod tests {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
file_size: Some(25600000),
}),
(LayerFileName::new_test("not_a_real_layer_but_adding_coverage"), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
// serde_json should always parse this but this might be a double with jq for
// example.
file_size: Some(9007199254741001),

View File

@@ -6,11 +6,13 @@ mod image_layer;
mod inmemory_layer;
mod remote_layer;
use crate::config::PageServerConf;
use crate::context::RequestContext;
use crate::repository::{Key, Value};
use crate::walrecord::NeonWalRecord;
use anyhow::Result;
use bytes::Bytes;
use pageserver_api::models::HistoricLayerInfo;
use std::ops::Range;
use std::path::PathBuf;
use std::sync::Arc;
@@ -21,7 +23,7 @@ use utils::{
};
pub use delta_layer::{DeltaLayer, DeltaLayerWriter};
pub use filename::{DeltaFileName, ImageFileName, LayerFileName, PathOrConf};
pub use filename::{DeltaFileName, ImageFileName, LayerFileName};
pub use image_layer::{ImageLayer, ImageLayerWriter};
pub use inmemory_layer::InMemoryLayer;
pub use remote_layer::RemoteLayer;
@@ -187,6 +189,8 @@ pub trait PersistentLayer: Layer {
/// Should not change over the lifetime of the layer object because
/// current_physical_size is computed as the som of this value.
fn file_size(&self) -> Option<u64>;
fn info(&self) -> HistoricLayerInfo;
}
pub fn downcast_remote_layer(
@@ -208,6 +212,10 @@ impl std::fmt::Debug for dyn Layer {
}
/// Holds metadata about a layer without any content. Used mostly for testing.
///
/// To use filenames as fixtures, parse them as [`LayerFileName`] then convert from that to a
/// LayerDescriptor.
#[derive(Clone)]
pub struct LayerDescriptor {
pub key: Range<Key>,
pub lsn: Range<Lsn>,
@@ -246,3 +254,50 @@ impl Layer for LayerDescriptor {
todo!()
}
}
impl From<DeltaFileName> for LayerDescriptor {
fn from(value: DeltaFileName) -> Self {
let short_id = value.to_string();
LayerDescriptor {
key: value.key_range,
lsn: value.lsn_range,
is_incremental: true,
short_id,
}
}
}
impl From<ImageFileName> for LayerDescriptor {
fn from(value: ImageFileName) -> Self {
let short_id = value.to_string();
let lsn = value.lsn_as_range();
LayerDescriptor {
key: value.key_range,
lsn,
is_incremental: false,
short_id,
}
}
}
impl From<LayerFileName> for LayerDescriptor {
fn from(value: LayerFileName) -> Self {
match value {
LayerFileName::Delta(d) => Self::from(d),
LayerFileName::Image(i) => Self::from(i),
}
}
}
/// Helper enum to hold a PageServerConf, or a path
///
/// This is used by DeltaLayer and ImageLayer. Normally, this holds a reference to the
/// global config, and paths to layer files are constructed using the tenant/timeline
/// path from the config. But in the 'pageserver_binutils' binary, we need to construct a Layer
/// struct for a file on disk, without having a page server running, so that we have no
/// config. In that case, we use the Path variant to hold the full path to the file on
/// disk.
enum PathOrConf {
Path(PathBuf),
Conf(&'static PageServerConf),
}

View File

@@ -37,6 +37,7 @@ use crate::virtual_file::VirtualFile;
use crate::{walrecord, TEMP_FILE_SUFFIX};
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
use anyhow::{bail, ensure, Context, Result};
use pageserver_api::models::HistoricLayerInfo;
use rand::{distributions::Alphanumeric, Rng};
use serde::{Deserialize, Serialize};
use std::fs::{self, File};
@@ -417,6 +418,19 @@ impl PersistentLayer for DeltaLayer {
fn file_size(&self) -> Option<u64> {
Some(self.file_size)
}
fn info(&self) -> HistoricLayerInfo {
let layer_file_name = self.filename().file_name();
let lsn_range = self.get_lsn_range();
HistoricLayerInfo::Delta {
layer_file_name,
layer_file_size: Some(self.file_size),
lsn_start: lsn_range.start,
lsn_end: lsn_range.end,
remote: false,
}
}
}
impl DeltaLayer {

View File

@@ -1,12 +1,10 @@
//!
//! Helper functions for dealing with filenames of the image and delta layer files.
//!
use crate::config::PageServerConf;
use crate::repository::Key;
use std::cmp::Ordering;
use std::fmt;
use std::ops::Range;
use std::path::PathBuf;
use std::str::FromStr;
use utils::lsn::Lsn;
@@ -130,6 +128,13 @@ impl Ord for ImageFileName {
}
}
impl ImageFileName {
pub fn lsn_as_range(&self) -> Range<Lsn> {
// Saves from having to copypaste this all over
self.lsn..(self.lsn + 1)
}
}
///
/// Represents the filename of an ImageLayer
///
@@ -177,49 +182,32 @@ impl fmt::Display for ImageFileName {
pub enum LayerFileName {
Image(ImageFileName),
Delta(DeltaFileName),
#[cfg(test)]
Test(String),
}
impl LayerFileName {
pub fn file_name(&self) -> String {
match self {
LayerFileName::Image(fname) => format!("{fname}"),
LayerFileName::Delta(fname) => format!("{fname}"),
#[cfg(test)]
LayerFileName::Test(fname) => fname.to_string(),
Self::Image(fname) => fname.to_string(),
Self::Delta(fname) => fname.to_string(),
}
}
#[cfg(test)]
pub(crate) fn new_test(name: &str) -> LayerFileName {
LayerFileName::Test(name.to_owned())
}
}
impl From<ImageFileName> for LayerFileName {
fn from(fname: ImageFileName) -> Self {
LayerFileName::Image(fname)
Self::Image(fname)
}
}
impl From<DeltaFileName> for LayerFileName {
fn from(fname: DeltaFileName) -> Self {
LayerFileName::Delta(fname)
Self::Delta(fname)
}
}
// include a `/` in the name as an additional layer of robustness
// because `/` chars are not allowed in UNIX paths
#[cfg(test)]
const LAYER_FILE_NAME_TEST_PREFIX: &str = "LAYER_FILE_NAME::test/";
impl FromStr for LayerFileName {
type Err = String;
fn from_str(value: &str) -> Result<Self, Self::Err> {
#[cfg(test)]
if let Some(value) = value.strip_prefix(LAYER_FILE_NAME_TEST_PREFIX) {
return Ok(LayerFileName::Test(value.to_owned()));
}
let delta = DeltaFileName::parse_str(value);
let image = ImageFileName::parse_str(value);
let ok = match (delta, image) {
@@ -228,8 +216,8 @@ impl FromStr for LayerFileName {
"neither delta nor image layer file name: {value:?}"
))
}
(Some(delta), None) => LayerFileName::Delta(delta),
(None, Some(image)) => LayerFileName::Image(image),
(Some(delta), None) => Self::Delta(delta),
(None, Some(image)) => Self::Image(image),
(Some(_), Some(_)) => unreachable!(),
};
Ok(ok)
@@ -242,12 +230,8 @@ impl serde::Serialize for LayerFileName {
S: serde::Serializer,
{
match self {
LayerFileName::Image(fname) => serializer.serialize_str(&format!("{}", fname)),
LayerFileName::Delta(fname) => serializer.serialize_str(&format!("{}", fname)),
#[cfg(test)]
LayerFileName::Test(t) => {
serializer.serialize_str(&format!("{LAYER_FILE_NAME_TEST_PREFIX}{t}"))
}
Self::Image(fname) => serializer.serialize_str(&fname.to_string()),
Self::Delta(fname) => serializer.serialize_str(&fname.to_string()),
}
}
}
@@ -270,16 +254,3 @@ impl<'de> serde::de::Visitor<'de> for LayerFileNameVisitor {
v.parse().map_err(|e| E::custom(e))
}
}
/// Helper enum to hold a PageServerConf, or a path
///
/// This is used by DeltaLayer and ImageLayer. Normally, this holds a reference to the
/// global config, and paths to layer files are constructed using the tenant/timeline
/// path from the config. But in the 'pageserver_binutils' binary, we need to construct a Layer
/// struct for a file on disk, without having a page server running, so that we have no
/// config. In that case, we use the Path variant to hold the full path to the file on
/// disk.
pub enum PathOrConf {
Path(PathBuf),
Conf(&'static PageServerConf),
}

View File

@@ -34,6 +34,7 @@ use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
use anyhow::{bail, ensure, Context, Result};
use bytes::Bytes;
use hex;
use pageserver_api::models::HistoricLayerInfo;
use rand::{distributions::Alphanumeric, Rng};
use serde::{Deserialize, Serialize};
use std::fs::{self, File};
@@ -51,8 +52,8 @@ use utils::{
lsn::Lsn,
};
use super::filename::{ImageFileName, LayerFileName, PathOrConf};
use super::{Layer, LayerIter};
use super::filename::{ImageFileName, LayerFileName};
use super::{Layer, LayerIter, PathOrConf};
///
/// Header stored in the beginning of the file
@@ -235,6 +236,18 @@ impl PersistentLayer for ImageLayer {
fn file_size(&self) -> Option<u64> {
Some(self.file_size)
}
fn info(&self) -> HistoricLayerInfo {
let layer_file_name = self.filename().file_name();
let lsn_range = self.get_lsn_range();
HistoricLayerInfo::Image {
layer_file_name,
layer_file_size: Some(self.file_size),
lsn_start: lsn_range.start,
remote: false,
}
}
}
impl ImageLayer {

View File

@@ -13,6 +13,7 @@ use crate::tenant::ephemeral_file::EphemeralFile;
use crate::tenant::storage_layer::{ValueReconstructResult, ValueReconstructState};
use crate::walrecord;
use anyhow::{ensure, Result};
use pageserver_api::models::InMemoryLayerInfo;
use std::cell::RefCell;
use std::collections::HashMap;
use tracing::*;
@@ -80,6 +81,16 @@ impl InMemoryLayer {
pub fn get_timeline_id(&self) -> TimelineId {
self.timeline_id
}
pub fn info(&self) -> InMemoryLayerInfo {
let lsn_start = self.start_lsn;
let lsn_end = self.inner.read().unwrap().end_lsn;
match lsn_end {
Some(lsn_end) => InMemoryLayerInfo::Frozen { lsn_start, lsn_end },
None => InMemoryLayerInfo::Open { lsn_start },
}
}
}
impl Layer for InMemoryLayer {

View File

@@ -7,6 +7,7 @@ use crate::repository::Key;
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
use crate::tenant::storage_layer::{Layer, ValueReconstructResult, ValueReconstructState};
use anyhow::{bail, Result};
use pageserver_api::models::HistoricLayerInfo;
use std::ops::Range;
use std::path::PathBuf;
use std::sync::Arc;
@@ -136,6 +137,28 @@ impl PersistentLayer for RemoteLayer {
fn file_size(&self) -> Option<u64> {
self.layer_metadata.file_size()
}
fn info(&self) -> HistoricLayerInfo {
let layer_file_name = self.filename().file_name();
let lsn_range = self.get_lsn_range();
if self.is_delta {
HistoricLayerInfo::Delta {
layer_file_name,
layer_file_size: self.layer_metadata.file_size(),
lsn_start: lsn_range.start,
lsn_end: lsn_range.end,
remote: true,
}
} else {
HistoricLayerInfo::Image {
layer_file_name,
layer_file_size: self.layer_metadata.file_size(),
lsn_start: lsn_range.start,
remote: true,
}
}
}
}
impl RemoteLayer {
@@ -149,7 +172,7 @@ impl RemoteLayer {
tenantid,
timelineid,
key_range: fname.key_range.clone(),
lsn_range: fname.lsn..(fname.lsn + 1),
lsn_range: fname.lsn_as_range(),
is_delta: false,
is_incremental: false,
file_name: fname.to_owned().into(),

View File

@@ -10,7 +10,7 @@ use itertools::Itertools;
use once_cell::sync::OnceCell;
use pageserver_api::models::{
DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest,
DownloadRemoteLayersTaskState, TimelineState,
DownloadRemoteLayersTaskState, LayerMapInfo, TimelineState,
};
use tokio::sync::{oneshot, watch, Semaphore, TryAcquireError};
use tokio_util::sync::CancellationToken;
@@ -69,6 +69,7 @@ use crate::ZERO_PAGE;
use crate::{is_temporary, task_mgr};
use walreceiver::spawn_connection_manager_task;
use super::layer_map::BatchedUpdates;
use super::remote_timeline_client::index::IndexPart;
use super::remote_timeline_client::RemoteTimelineClient;
use super::storage_layer::{DeltaLayer, ImageLayer, Layer};
@@ -91,7 +92,7 @@ pub struct Timeline {
pub pg_version: u32,
pub layers: RwLock<LayerMap<dyn PersistentLayer>>,
pub(super) layers: RwLock<LayerMap<dyn PersistentLayer>>,
last_freeze_at: AtomicLsn,
// Atomic would be more appropriate here.
@@ -663,7 +664,7 @@ impl Timeline {
// Below are functions compact_level0() and create_image_layers()
// but they are a bit ad hoc and don't quite work like it's explained
// above. Rewrite it.
let _layer_removal_cs = self.layer_removal_cs.lock().await;
let layer_removal_cs = self.layer_removal_cs.lock().await;
// Is the timeline being deleted?
let state = *self.state.borrow();
if state == TimelineState::Stopping {
@@ -696,7 +697,8 @@ impl Timeline {
// 3. Compact
let timer = self.metrics.compact_time_histo.start_timer();
self.compact_level0(target_file_size, ctx).await?;
self.compact_level0(&layer_removal_cs, target_file_size, ctx)
.await?;
timer.stop_and_record();
// If `create_image_layers' or `compact_level0` scheduled any
@@ -832,6 +834,83 @@ impl Timeline {
pub fn subscribe_for_state_updates(&self) -> watch::Receiver<TimelineState> {
self.state.subscribe()
}
pub fn layer_map_info(&self) -> LayerMapInfo {
let layer_map = self.layers.read().unwrap();
let mut in_memory_layers = Vec::with_capacity(layer_map.frozen_layers.len() + 1);
if let Some(open_layer) = &layer_map.open_layer {
in_memory_layers.push(open_layer.info());
}
for frozen_layer in &layer_map.frozen_layers {
in_memory_layers.push(frozen_layer.info());
}
let mut historic_layers = Vec::new();
for historic_layer in layer_map.iter_historic_layers() {
historic_layers.push(historic_layer.info());
}
LayerMapInfo {
in_memory_layers,
historic_layers,
}
}
pub async fn download_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
let Some(layer) = self.find_layer(layer_file_name) else { return Ok(None) };
let Some(remote_layer) = layer.downcast_remote_layer() else { return Ok(Some(false)) };
if self.remote_client.is_none() {
return Ok(Some(false));
}
self.download_remote_layer(remote_layer).await?;
Ok(Some(true))
}
pub async fn evict_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
let Some(local_layer) = self.find_layer(layer_file_name) else { return Ok(None) };
if local_layer.is_remote_layer() {
return Ok(Some(false));
}
let Some(remote_client) = &self.remote_client else { return Ok(Some(false)) };
// ensure the current layer is uploaded for sure
remote_client
.wait_completion()
.await
.context("wait for layer upload ops to complete")?;
let layer_metadata = LayerFileMetadata::new(
local_layer
.file_size()
.expect("Local layer should have a file size"),
);
let new_remote_layer = Arc::new(match local_layer.filename() {
LayerFileName::Image(image_name) => RemoteLayer::new_img(
self.tenant_id,
self.timeline_id,
&image_name,
&layer_metadata,
),
LayerFileName::Delta(delta_name) => RemoteLayer::new_delta(
self.tenant_id,
self.timeline_id,
&delta_name,
&layer_metadata,
),
});
let gc_lock = self.layer_removal_cs.lock().await;
let mut layers = self.layers.write().unwrap();
let mut updates = layers.batch_update();
self.delete_historic_layer(&gc_lock, local_layer, &mut updates)?;
updates.insert_historic(new_remote_layer);
updates.flush();
drop(layers);
drop(gc_lock);
Ok(Some(true))
}
}
// Private functions
@@ -1289,8 +1368,6 @@ impl Timeline {
let remote_layer = Arc::new(remote_layer);
updates.insert_historic(remote_layer);
}
#[cfg(test)]
LayerFileName::Test(_) => unreachable!(),
}
}
@@ -1621,6 +1698,43 @@ impl Timeline {
Err(e) => error!("Failed to compute current logical size for metrics update: {e:?}"),
}
}
fn find_layer(&self, layer_file_name: &str) -> Option<Arc<dyn PersistentLayer>> {
for historic_layer in self.layers.read().unwrap().iter_historic_layers() {
let historic_layer_name = historic_layer.filename().file_name();
if layer_file_name == historic_layer_name {
return Some(historic_layer);
}
}
None
}
/// Removes the layer from local FS (if present) and from memory.
/// Remote storage is not affected by this operation.
fn delete_historic_layer(
&self,
// we cannot remove layers otherwise, since gc and compaction will race
_layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
layer: Arc<dyn PersistentLayer>,
updates: &mut BatchedUpdates<'_, dyn PersistentLayer>,
) -> anyhow::Result<()> {
let layer_size = layer.file_size();
layer.delete()?;
if let Some(layer_size) = layer_size {
self.metrics.resident_physical_size_gauge.sub(layer_size);
}
// TODO Removing from the bottom of the layer map is expensive.
// Maybe instead discard all layer map historic versions that
// won't be needed for page reconstruction for this timeline,
// and mark what we can't delete yet as deleted from the layer
// map index without actually rebuilding the index.
updates.remove_historic(layer);
Ok(())
}
}
type TraversalId = String;
@@ -2727,6 +2841,7 @@ impl Timeline {
///
async fn compact_level0(
&self,
layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
target_file_size: u64,
ctx: &RequestContext,
) -> anyhow::Result<()> {
@@ -2780,14 +2895,8 @@ impl Timeline {
// delete the old ones
let mut layer_names_to_delete = Vec::with_capacity(deltas_to_compact.len());
for l in deltas_to_compact {
if let Some(path) = l.local_path() {
self.metrics
.resident_physical_size_gauge
.sub(path.metadata()?.len());
}
layer_names_to_delete.push(l.filename());
l.delete()?;
updates.remove_historic(l);
self.delete_historic_layer(layer_removal_cs, l, &mut updates)?;
}
updates.flush();
drop(layers);
@@ -2907,7 +3016,7 @@ impl Timeline {
fail_point!("before-timeline-gc");
let _layer_removal_cs = self.layer_removal_cs.lock().await;
let layer_removal_cs = self.layer_removal_cs.lock().await;
// Is the timeline being deleted?
let state = *self.state.borrow();
if state == TimelineState::Stopping {
@@ -2926,7 +3035,13 @@ impl Timeline {
let new_gc_cutoff = Lsn::min(horizon_cutoff, pitr_cutoff);
let res = self
.gc_timeline(horizon_cutoff, pitr_cutoff, retain_lsns, new_gc_cutoff)
.gc_timeline(
&layer_removal_cs,
horizon_cutoff,
pitr_cutoff,
retain_lsns,
new_gc_cutoff,
)
.instrument(
info_span!("gc_timeline", timeline = %self.timeline_id, cutoff = %new_gc_cutoff),
)
@@ -2940,6 +3055,7 @@ impl Timeline {
async fn gc_timeline(
&self,
layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
horizon_cutoff: Lsn,
pitr_cutoff: Lsn,
retain_lsns: Vec<Lsn>,
@@ -3095,22 +3211,12 @@ impl Timeline {
// (couldn't do this in the loop above, because you cannot modify a collection
// while iterating it. BTreeMap::retain() would be another option)
let mut layer_names_to_delete = Vec::with_capacity(layers_to_remove.len());
for doomed_layer in layers_to_remove {
if let Some(path) = doomed_layer.local_path() {
self.metrics
.resident_physical_size_gauge
.sub(path.metadata()?.len());
{
for doomed_layer in layers_to_remove {
layer_names_to_delete.push(doomed_layer.filename());
self.delete_historic_layer(layer_removal_cs, doomed_layer, &mut updates)?; // FIXME: schedule succeeded deletions before returning?
result.layers_removed += 1;
}
layer_names_to_delete.push(doomed_layer.filename());
doomed_layer.delete()?; // FIXME: schedule succeeded deletions before returning?
// TODO Removing from the bottom of the layer map is expensive.
// Maybe instead discard all layer map historic versions that
// won't be needed for page reconstruction for this timeline,
// and mark what we can't delete yet as deleted from the layer
// map index without actually rebuilding the index.
updates.remove_historic(doomed_layer);
result.layers_removed += 1;
}
if result.layers_removed != 0 {
@@ -3280,10 +3386,42 @@ impl Timeline {
let mut layers = self_clone.layers.write().unwrap();
let mut updates = layers.batch_update();
{
use crate::tenant::layer_map::Replacement;
let l: Arc<dyn PersistentLayer> = remote_layer.clone();
updates.remove_historic(l);
match updates.replace_historic(&l, new_layer) {
Ok(Replacement::Replaced { .. }) => { /* expected */ }
Ok(Replacement::NotFound) => {
// TODO: the downloaded file should probably be removed, otherwise
// it will be added to the layermap on next load? we should
// probably restart any get_reconstruct_data search as well.
//
// See: https://github.com/neondatabase/neon/issues/3533
error!("replacing downloaded layer into layermap failed because layer was not found");
}
Ok(Replacement::RemovalBuffered) => {
unreachable!("current implementation does not remove anything")
}
Ok(Replacement::Unexpected(other)) => {
// if the other layer would have the same pointer value as
// expected, it means they differ only on vtables.
//
// otherwise there's no known reason for this to happen as
// compacted layers should have different covering rectangle
// leading to produce Replacement::NotFound.
error!(
expected.ptr = ?Arc::as_ptr(&l),
other.ptr = ?Arc::as_ptr(&other),
"replacing downloaded layer into layermap failed because another layer was found instead of expected"
);
}
Err(e) => {
// this is a precondition failure, the layer filename derived
// attributes didn't match up, which doesn't seem likely.
error!("replacing downloaded layer into layermap failed: {e:#?}")
}
}
}
updates.insert_historic(new_layer);
updates.flush();
drop(layers);

View File

@@ -132,7 +132,7 @@ lfc_shmem_request(void)
RequestNamedLWLockTranche("lfc_lock", 1);
}
bool
static bool
lfc_check_limit_hook(int *newval, void **extra, GucSource source)
{
if (*newval > lfc_max_size)
@@ -143,7 +143,7 @@ lfc_check_limit_hook(int *newval, void **extra, GucSource source)
return true;
}
void
static void
lfc_change_limit_hook(int newval, void *extra)
{
uint32 new_size = SIZE_MB_TO_CHUNKS(newval);
@@ -213,7 +213,7 @@ lfc_init(void)
INT_MAX,
PGC_SIGHUP,
GUC_UNIT_MB,
NULL,
lfc_check_limit_hook,
lfc_change_limit_hook,
NULL);
@@ -472,7 +472,6 @@ local_cache_pages(PG_FUNCTION_ARGS)
HASH_SEQ_STATUS status;
FileCacheEntry* entry;
uint32 n_pages = 0;
uint32 i;
funcctx = SRF_FIRSTCALL_INIT();

View File

@@ -49,6 +49,7 @@
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "access/xlog_internal.h"
#include "access/xlogutils.h"
#include "access/xlogdefs.h"
#include "catalog/pg_class.h"
#include "common/hashfn.h"
@@ -1310,6 +1311,62 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
SetLastWrittenLSNForBlock(lsn, reln->smgr_rnode.node, forknum, blocknum);
}
static bool
replica_redo_block_filter(XLogReaderState *record, uint8 block_id)
{
BufferTag tag;
uint32 hash; /* hash value for tag */
LWLock *partition_lock; /* buffer partition lock for it */
int buf_id;
XLogRecPtr lsn = record->EndRecPtr;
#if PG_VERSION_NUM >= 150000
XLogRecGetBlockTag(record, block_id,
&tag.rnode, &tag.forkNum, &tag.blockNum);
#else
if (!XLogRecGetBlockTag(record, block_id,
&tag.rnode, &tag.forkNum, &tag.blockNum))
{
/* Caller specified a bogus block_id */
elog(PANIC, "failed to locate backup block with ID %d", block_id);
}
#endif
/* we need to set last written LSN prior to page lookup because
* if some other backend will try to concurrently load this page it should use proper request LSN
*/
SetLastWrittenLSNForBlock(lsn, tag.rnode, tag.forkNum, tag.blockNum);
/* determine its hash code and partition lock ID */
hash = BufTableHashCode(&tag);
partition_lock = BufMappingPartitionLock(hash);
/* see if the block is in the buffer pool already */
LWLockAcquire(partition_lock, LW_SHARED);
buf_id = BufTableLookup(&tag, hash);
LWLockRelease(partition_lock);
/* If page is present in share buffers then perform redo */
if (buf_id >= 0)
return true;
/*
* Also perform redo if page is present in local file cache.
* Please notice that it is unsafe to skip redo with out invalidation cache entry.
* There is completely no sync here. But if page was evicted from the cache just before this check,
* then it is ok to skip redo, because nobody will read outdated page image.
* And if some other backend concurrently request this page and even downloaded it from pageserver and
* save in shared buffers and may be even in local file cache, then updating last written LSN cache
* with end LSN of this records guarantee that loaded page will include with wal record and so it is safe
* to skip it redo.
* And if we found page in shared buffers or local file cache and then it is evicted, then it is also safe
* because redo will read this page from pageserver. It may be non-optimal from performance point of view,
* but safe.
*/
return lfc_cache_contains(tag.rnode, tag.forkNum, tag.blockNum);
}
/*
* neon_init() -- Initialize private state
*/
@@ -1325,6 +1382,9 @@ neon_init(void)
sizeof(PrefetchRequest) * readahead_buffer_size
);
/* Replica should try to recover only pages present in shared buffers */
redo_read_buffer_filter = replica_redo_block_filter;
MyPState = MemoryContextAllocZero(TopMemoryContext, prfs_size);
MyPState->n_unused = readahead_buffer_size;

View File

@@ -149,36 +149,32 @@ impl<'l> BackendType<'l, ClientCredentials<'_>> {
};
// TODO: find a proper way to merge those very similar blocks.
let (mut node, payload) = match self {
let (mut node, password) = match self {
Console(api, creds) if creds.project.is_none() => {
let payload = fetch_magic_payload(client).await?;
creds.project = Some(payload.project.into());
let node = api.wake_compute(extra, creds).await?;
let mut creds = creds.as_ref();
creds.project = Some(payload.project.as_str().into());
let node = api.wake_compute(extra, &creds).await?;
(node, payload)
(node, payload.password)
}
// This is a hack to allow cleartext password in secure connections (wss).
Console(api, creds) if creds.use_cleartext_password_flow => {
let payload = fetch_plaintext_password(client).await?;
let node = api.wake_compute(extra, creds).await?;
(node, payload)
(node, payload.password)
}
Postgres(api, creds) if creds.project.is_none() => {
let payload = fetch_magic_payload(client).await?;
creds.project = Some(payload.project.into());
let node = api.wake_compute(extra, creds).await?;
let mut creds = creds.as_ref();
creds.project = Some(payload.project.as_str().into());
let node = api.wake_compute(extra, &creds).await?;
(node, payload)
(node, payload.password)
}
_ => return Ok(None),
};
node.config.password(payload.password);
node.config.password(password);
Ok(Some(AuthSuccess {
reported_auth_ok: false,
value: node,

View File

@@ -47,18 +47,6 @@ impl ClientCredentials<'_> {
}
}
impl<'a> ClientCredentials<'a> {
#[inline]
pub fn as_ref(&'a self) -> ClientCredentials<'a> {
Self {
user: self.user,
dbname: self.dbname,
project: self.project().map(Cow::Borrowed),
use_cleartext_password_flow: self.use_cleartext_password_flow,
}
}
}
impl<'a> ClientCredentials<'a> {
pub fn parse(
params: &'a StartupMessageParams,

View File

@@ -431,6 +431,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
logging::init(LogFormat::from_config(&args.log_format)?)?;
info!("version: {GIT_VERSION}");
::metrics::set_build_info_metric(GIT_VERSION);
let registry = Registry {
shared_state: Arc::new(RwLock::new(SharedState::new(args.all_keys_chan_size))),

View File

@@ -97,10 +97,17 @@ class NeonCompare(PgCompare):
self._pg_bin = pg_bin
self.pageserver_http_client = self.env.pageserver.http_client()
# We only use one branch and one timeline
self.env.neon_cli.create_branch(branch_name, "empty")
self._pg = self.env.postgres.create_start(branch_name)
self.timeline = self.pg.safe_psql("SHOW neon.timeline_id")[0][0]
# Create tenant
tenant_conf: Dict[str, str] = {}
if False: # TODO add pytest setting for this
tenant_conf["trace_read_requests"] = "true"
self.tenant, _ = self.env.neon_cli.create_tenant(conf=tenant_conf)
# Create timeline
self.timeline = self.env.neon_cli.create_timeline(branch_name, tenant_id=self.tenant)
# Start pg
self._pg = self.env.postgres.create_start(branch_name, "main", self.tenant)
@property
def pg(self) -> PgProtocol:
@@ -115,11 +122,11 @@ class NeonCompare(PgCompare):
return self._pg_bin
def flush(self):
self.pageserver_http_client.timeline_checkpoint(self.env.initial_tenant, self.timeline)
self.pageserver_http_client.timeline_gc(self.env.initial_tenant, self.timeline, 0)
self.pageserver_http_client.timeline_checkpoint(self.tenant, self.timeline)
self.pageserver_http_client.timeline_gc(self.tenant, self.timeline, 0)
def compact(self):
self.pageserver_http_client.timeline_compact(self.env.initial_tenant, self.timeline)
self.pageserver_http_client.timeline_compact(self.tenant, self.timeline)
def report_peak_memory_use(self):
self.zenbenchmark.record(
@@ -131,13 +138,13 @@ class NeonCompare(PgCompare):
def report_size(self):
timeline_size = self.zenbenchmark.get_timeline_size(
self.env.repo_dir, self.env.initial_tenant, self.timeline
self.env.repo_dir, self.tenant, self.timeline
)
self.zenbenchmark.record(
"size", timeline_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER
)
params = f'{{tenant_id="{self.env.initial_tenant}",timeline_id="{self.timeline}"}}'
params = f'{{tenant_id="{self.tenant}",timeline_id="{self.timeline}"}}'
total_files = self.zenbenchmark.get_int_counter_value(
self.env.pageserver, "pageserver_created_persistent_files_total" + params
)

View File

@@ -1472,6 +1472,91 @@ class PageserverHttpClient(requests.Session):
assert len(relevant) == 1
return relevant[0].lstrip(name).strip()
def layer_map_info(
self,
tenant_id: TenantId,
timeline_id: TimelineId,
) -> LayerMapInfo:
res = self.get(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/layer/",
)
self.verbose_error(res)
return LayerMapInfo.from_json(res.json())
def download_layer(self, tenant_id: TenantId, timeline_id: TimelineId, layer_name: str):
res = self.get(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/layer/{layer_name}",
)
self.verbose_error(res)
assert res.status_code == 200
def evict_layer(self, tenant_id: TenantId, timeline_id: TimelineId, layer_name: str):
res = self.delete(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/layer/{layer_name}",
)
self.verbose_error(res)
assert res.status_code == 200
@dataclass
class LayerMapInfo:
in_memory_layers: List[InMemoryLayerInfo]
historic_layers: List[HistoricLayerInfo]
@classmethod
def from_json(cls, d: Dict[str, Any]) -> LayerMapInfo:
info = LayerMapInfo(in_memory_layers=[], historic_layers=[])
json_in_memory_layers = d["in_memory_layers"]
assert isinstance(json_in_memory_layers, List)
for json_in_memory_layer in json_in_memory_layers:
info.in_memory_layers.append(InMemoryLayerInfo.from_json(json_in_memory_layer))
json_historic_layers = d["historic_layers"]
assert isinstance(json_historic_layers, List)
for json_historic_layer in json_historic_layers:
info.historic_layers.append(HistoricLayerInfo.from_json(json_historic_layer))
return info
@dataclass
class InMemoryLayerInfo:
kind: str
lsn_start: str
lsn_end: Optional[str]
@classmethod
def from_json(cls, d: Dict[str, Any]) -> InMemoryLayerInfo:
return InMemoryLayerInfo(
kind=d["kind"],
lsn_start=d["lsn_start"],
lsn_end=d.get("lsn_end"),
)
@dataclass
class HistoricLayerInfo:
kind: str
layer_file_name: str
layer_file_size: Optional[int]
lsn_start: str
lsn_end: Optional[str]
remote: bool
@classmethod
def from_json(cls, d: Dict[str, Any]) -> HistoricLayerInfo:
return HistoricLayerInfo(
kind=d["kind"],
layer_file_name=d["layer_file_name"],
layer_file_size=d.get("layer_file_size"),
lsn_start=d["lsn_start"],
lsn_end=d.get("lsn_end"),
remote=d["remote"],
)
@dataclass
class PageserverPort:

View File

@@ -0,0 +1,140 @@
import pytest
from fixtures.neon_fixtures import (
NeonEnvBuilder,
RemoteStorageKind,
wait_for_last_record_lsn,
wait_for_upload,
)
from fixtures.types import Lsn, TenantId, TimelineId
from fixtures.utils import query_scalar
# Crates a few layers, ensures that we can evict them (removing locally but keeping track of them anyway)
# and then download them back.
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
def test_basic_eviction(
neon_env_builder: NeonEnvBuilder,
remote_storage_kind: RemoteStorageKind,
):
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storage_kind,
test_name="test_download_remote_layers_api",
)
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
pg = env.postgres.create_start("main")
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
# Create a number of layers in the tenant
with pg.cursor() as cur:
cur.execute("CREATE TABLE foo (t text)")
cur.execute(
"""
INSERT INTO foo
SELECT 'long string to consume some space' || g
FROM generate_series(1, 5000000) g
"""
)
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
wait_for_last_record_lsn(client, tenant_id, timeline_id, current_lsn)
client.timeline_checkpoint(tenant_id, timeline_id)
wait_for_upload(client, tenant_id, timeline_id, current_lsn)
timeline_path = env.repo_dir / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
initial_local_layers = sorted(
list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
)
assert (
len(initial_local_layers) > 1
), f"Should create multiple layers for timeline, but got {initial_local_layers}"
# Compare layer map dump with the local layers, ensure everything's present locally and matches
initial_layer_map_info = client.layer_map_info(tenant_id=tenant_id, timeline_id=timeline_id)
assert (
not initial_layer_map_info.in_memory_layers
), "Should have no in memory layers after flushing"
assert len(initial_local_layers) == len(
initial_layer_map_info.historic_layers
), "Should have the same layers in memory and on disk"
for returned_layer in initial_layer_map_info.historic_layers:
assert (
returned_layer.kind == "Delta"
), f"Did not create and expect image layers, but got {returned_layer}"
assert (
not returned_layer.remote
), f"All created layers should be present locally, but got {returned_layer}"
local_layers = list(
filter(lambda layer: layer.name == returned_layer.layer_file_name, initial_local_layers)
)
assert (
len(local_layers) == 1
), f"Did not find returned layer {returned_layer} in local layers {initial_local_layers}"
local_layer = local_layers[0]
assert (
returned_layer.layer_file_size == local_layer.stat().st_size
), f"Returned layer {returned_layer} has a different file size than local layer {local_layer}"
# Detach all layers, ensre they are not in the local FS, but are still dumped as part of the layer map
for local_layer in initial_local_layers:
client.evict_layer(
tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer.name
)
assert not any(
new_local_layer.name == local_layer.name for new_local_layer in timeline_path.glob("*")
), f"Did not expect to find {local_layer} layer after evicting"
empty_layers = list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
assert (
not empty_layers
), f"After evicting all layers, timeline {tenant_id}/{timeline_id} should have no layers locally, but got: {empty_layers}"
evicted_layer_map_info = client.layer_map_info(tenant_id=tenant_id, timeline_id=timeline_id)
assert (
not evicted_layer_map_info.in_memory_layers
), "Should have no in memory layers after flushing and evicting"
assert len(initial_local_layers) == len(
evicted_layer_map_info.historic_layers
), "Should have the same layers in memory and on disk initially"
for returned_layer in evicted_layer_map_info.historic_layers:
assert (
returned_layer.kind == "Delta"
), f"Did not create and expect image layers, but got {returned_layer}"
assert (
returned_layer.remote
), f"All layers should be evicted and not present locally, but got {returned_layer}"
assert any(
local_layer.name == returned_layer.layer_file_name
for local_layer in initial_local_layers
), f"Did not find returned layer {returned_layer} in local layers {initial_local_layers}"
# redownload all evicted layers and ensure the initial state is restored
for local_layer in initial_local_layers:
client.download_layer(
tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer.name
)
client.timeline_download_remote_layers(
tenant_id,
timeline_id,
# allow some concurrency to unveil potential concurrency bugs
max_concurrent_downloads=10,
errors_ok=False,
at_least_one_download=False,
)
redownloaded_layers = sorted(
list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
)
assert (
redownloaded_layers == initial_local_layers
), "Should have the same layers locally after redownloading the evicted layers"
redownloaded_layer_map_info = client.layer_map_info(
tenant_id=tenant_id, timeline_id=timeline_id
)
assert (
redownloaded_layer_map_info == initial_layer_map_info
), "Should have the same layer map after redownloading the evicted layers"

15
trace/Cargo.toml Normal file
View File

@@ -0,0 +1,15 @@
[package]
name = "trace"
version = "0.1.0"
edition.workspace = true
license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap.workspace = true
anyhow.workspace = true
pageserver_api.workspace = true
utils.workspace = true
workspace_hack.workspace = true

172
trace/src/main.rs Normal file
View File

@@ -0,0 +1,172 @@
//! A tool for working with read traces generated by the pageserver.
use std::collections::HashMap;
use std::path::PathBuf;
use std::str::FromStr;
use std::{
fs::{read_dir, File},
io::BufReader,
};
use pageserver_api::models::{PagestreamFeMessage, PagestreamGetPageRequest};
use utils::id::{ConnectionId, TenantId, TimelineId};
use clap::{Parser, Subcommand};
/// Utils for working with pageserver read traces. For generating
/// traces, see the `trace_read_requests` tenant config option.
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Path of trace directory
#[arg(short, long)]
path: PathBuf,
#[command(subcommand)]
command: Command,
}
/// What to do with the read trace
#[derive(Subcommand, Debug)]
enum Command {
/// List traces in the directory
List,
/// Print the traces in text format
Dump,
/// Print stats and anomalies about the traces
Analyze,
/// Draw the traces in svg format
Draw,
/// Send the read requests to a pageserver
Replay,
}
// HACK This function will change and improve as we see what kind of analysis is useful.
// Currently it collects the difference in blkno of consecutive GetPage requests,
// and counts the frequency of each value. This information is useful in order to:
// - see how sequential a workload is by seeing how often the delta is 1
// - detect any prefetching anomalies by looking for negative deltas during seqscan
fn analyze_trace<R: std::io::Read>(mut reader: R) {
let mut total = 0; // Total requests traced
let mut cross_rel = 0; // Requests that ask for different rel than previous request
let mut deltas = HashMap::<i32, u32>::new(); // Consecutive blkno differences
let mut prev: Option<PagestreamGetPageRequest> = None;
// Compute stats
while let Ok(msg) = PagestreamFeMessage::parse(&mut reader) {
match msg {
PagestreamFeMessage::Exists(_) => {}
PagestreamFeMessage::Nblocks(_) => {}
PagestreamFeMessage::GetPage(req) => {
total += 1;
if let Some(prev) = prev {
if prev.rel == req.rel {
let delta = (req.blkno as i32) - (prev.blkno as i32);
deltas.entry(delta).and_modify(|c| *c += 1).or_insert(1);
} else {
cross_rel += 1;
}
}
prev = Some(req);
}
PagestreamFeMessage::DbSize(_) => {}
};
}
// Print stats.
let mut other = deltas.len();
deltas.retain(|_, count| *count > 300);
other -= deltas.len();
dbg!(total);
dbg!(cross_rel);
dbg!(other);
dbg!(deltas);
}
fn dump_trace<R: std::io::Read>(mut reader: R) {
while let Ok(msg) = PagestreamFeMessage::parse(&mut reader) {
println!("{msg:?}");
}
}
#[derive(Debug)]
struct TraceFile {
#[allow(dead_code)]
pub tenant_id: TenantId,
#[allow(dead_code)]
pub timeline_id: TimelineId,
#[allow(dead_code)]
pub connection_id: ConnectionId,
pub path: PathBuf,
}
fn get_trace_files(traces_dir: &PathBuf) -> anyhow::Result<Vec<TraceFile>> {
let mut trace_files = Vec::<TraceFile>::new();
// Trace files are organized as {tenant_id}/{timeline_id}/{connection_id}
for tenant_dir in read_dir(traces_dir)? {
let entry = tenant_dir?;
let path = entry.path();
let tenant_id = TenantId::from_str(path.file_name().unwrap().to_str().unwrap())?;
for timeline_dir in read_dir(path)? {
let entry = timeline_dir?;
let path = entry.path();
let timeline_id = TimelineId::from_str(path.file_name().unwrap().to_str().unwrap())?;
for trace_dir in read_dir(path)? {
let entry = trace_dir?;
let path = entry.path();
let connection_id =
ConnectionId::from_str(path.file_name().unwrap().to_str().unwrap())?;
trace_files.push(TraceFile {
tenant_id,
timeline_id,
connection_id,
path,
});
}
}
}
Ok(trace_files)
}
fn main() -> anyhow::Result<()> {
let args = Args::parse();
match args.command {
Command::List => {
for trace_file in get_trace_files(&args.path)? {
println!("{trace_file:?}");
}
}
Command::Dump => {
for trace_file in get_trace_files(&args.path)? {
let file = File::open(trace_file.path.clone())?;
let reader = BufReader::new(file);
dump_trace(reader);
}
}
Command::Analyze => {
for trace_file in get_trace_files(&args.path)? {
println!("analyzing {trace_file:?}");
let file = File::open(trace_file.path.clone())?;
let reader = BufReader::new(file);
analyze_trace(reader);
}
}
Command::Draw => todo!(),
Command::Replay => todo!(),
}
Ok(())
}

View File

@@ -23,7 +23,6 @@ fail = { version = "0.5", default-features = false, features = ["failpoints"] }
futures = { version = "0.3" }
futures-channel = { version = "0.3", features = ["sink"] }
futures-executor = { version = "0.3" }
futures-task = { version = "0.3", default-features = false, features = ["std"] }
futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
hashbrown = { version = "0.12", features = ["raw"] }
indexmap = { version = "1", default-features = false, features = ["std"] }