mirror of
https://github.com/neondatabase/neon.git
synced 2026-07-04 04:30:38 +00:00
Compare commits
445 Commits
use_debug_
...
release-pr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7f4f1785a8 | ||
|
|
0efff1db26 | ||
|
|
5eecde461d | ||
|
|
85164422d0 | ||
|
|
6c3aba7c44 | ||
|
|
68a175d545 | ||
|
|
5e2c444525 | ||
|
|
8d711229c1 | ||
|
|
0e490f3be7 | ||
|
|
7e41ef1bec | ||
|
|
7916aa26e0 | ||
|
|
52ab8f3e65 | ||
|
|
3d822dbbde | ||
|
|
af46b5286f | ||
|
|
47f7efee06 | ||
|
|
868c38f522 | ||
|
|
c8b2ac93cf | ||
|
|
b2954d16ff | ||
|
|
79485e7c3a | ||
|
|
eaf1ab21c4 | ||
|
|
6508f4e5c1 | ||
|
|
a298d2c29b | ||
|
|
8b197de7ff | ||
|
|
15d079cd41 | ||
|
|
dc1625cd8e | ||
|
|
a6d4de25cd | ||
|
|
bf2a21567d | ||
|
|
24053ff4ca | ||
|
|
b147439d6b | ||
|
|
54433c0839 | ||
|
|
40bb9ff62a | ||
|
|
4688b815b1 | ||
|
|
0982ca4636 | ||
|
|
7272d9f7b3 | ||
|
|
37d555aa59 | ||
|
|
cae3e2976b | ||
|
|
51ecd1bb37 | ||
|
|
1e6bb48076 | ||
|
|
1470af0b42 | ||
|
|
f92f92b91b | ||
|
|
dbb205ae92 | ||
|
|
85072b715f | ||
|
|
6c86fe7143 | ||
|
|
66d5fe7f5b | ||
|
|
a1b9528757 | ||
|
|
1423bb8aa2 | ||
|
|
332f064a42 | ||
|
|
c962f2b447 | ||
|
|
446b3f9d28 | ||
|
|
23352dc2e9 | ||
|
|
c65fc5a955 | ||
|
|
3e624581cd | ||
|
|
fedf4f169c | ||
|
|
86d5798108 | ||
|
|
8b4088dd8a | ||
|
|
c91905e643 | ||
|
|
44b4e355a2 | ||
|
|
03666a1f37 | ||
|
|
9c92242ca0 | ||
|
|
a354071dd0 | ||
|
|
758680d4f8 | ||
|
|
1738fd0a96 | ||
|
|
87b7edfc72 | ||
|
|
def05700d5 | ||
|
|
b547681e08 | ||
|
|
0fd211537b | ||
|
|
a83bd4e81c | ||
|
|
ecdad5e6d5 | ||
|
|
d028929945 | ||
|
|
7b0e3db868 | ||
|
|
088eb72dd7 | ||
|
|
d550e3f626 | ||
|
|
8c6b41daf5 | ||
|
|
bbb050459b | ||
|
|
cab498c787 | ||
|
|
6359342ffb | ||
|
|
13285c2a5e | ||
|
|
33790d14a3 | ||
|
|
709b8cd371 | ||
|
|
1c9bbf1a92 | ||
|
|
16163fb850 | ||
|
|
73ccc2b08c | ||
|
|
c719be6474 | ||
|
|
718645e56c | ||
|
|
fbc8c36983 | ||
|
|
5519e42612 | ||
|
|
4157eaf4c5 | ||
|
|
60241127e2 | ||
|
|
f7d5322e8b | ||
|
|
41bb9c5280 | ||
|
|
69c0d61c5c | ||
|
|
63cb8ce975 | ||
|
|
907e4aa3c4 | ||
|
|
0a2a84b766 | ||
|
|
85b12ddd52 | ||
|
|
dd76f1eeee | ||
|
|
8963ac85f9 | ||
|
|
4a488b3e24 | ||
|
|
c4987b0b13 | ||
|
|
84b4821118 | ||
|
|
32ba9811f9 | ||
|
|
a0cd64c4d3 | ||
|
|
84687b743d | ||
|
|
b6f93dcec9 | ||
|
|
4f6c594973 | ||
|
|
a750c14735 | ||
|
|
9ce0dd4e55 | ||
|
|
0e1a336607 | ||
|
|
7fc2912d06 | ||
|
|
fdf231c237 | ||
|
|
1e08b5dccc | ||
|
|
030810ed3e | ||
|
|
62b74bdc2c | ||
|
|
8b7e9ed820 | ||
|
|
5dad89acd4 | ||
|
|
547b2d2827 | ||
|
|
93f29a0065 | ||
|
|
4f36494615 | ||
|
|
0a550f3e7d | ||
|
|
4bb9554e4a | ||
|
|
008616cfe6 | ||
|
|
e61ec94fbc | ||
|
|
e5152551ad | ||
|
|
b0822a5499 | ||
|
|
1fb6ab59e8 | ||
|
|
e16439400d | ||
|
|
e401f66698 | ||
|
|
2fa461b668 | ||
|
|
03d90bc0b3 | ||
|
|
268bc890ea | ||
|
|
8a6ee79f6f | ||
|
|
9052c32b46 | ||
|
|
995e729ebe | ||
|
|
76077e1ddf | ||
|
|
0467d88f06 | ||
|
|
f5eec194e7 | ||
|
|
7e00be391d | ||
|
|
d56599df2a | ||
|
|
9d9aab3680 | ||
|
|
a202b1b5cc | ||
|
|
90f731f3b1 | ||
|
|
7736b748d3 | ||
|
|
9c23333cb3 | ||
|
|
66a99009ba | ||
|
|
5d4c57491f | ||
|
|
73935ea3a2 | ||
|
|
32e595d4dd | ||
|
|
b0d69acb07 | ||
|
|
98355a419a | ||
|
|
cfb03d6cf0 | ||
|
|
d81ef3f962 | ||
|
|
5d62c67e75 | ||
|
|
53d53d5b1e | ||
|
|
29fe6ea47a | ||
|
|
640327ccb3 | ||
|
|
7cf0f6b37e | ||
|
|
03c2c569be | ||
|
|
eff6d4538a | ||
|
|
5ef7782e9c | ||
|
|
73101db8c4 | ||
|
|
bccdfc6d39 | ||
|
|
99595813bb | ||
|
|
fe07b54758 | ||
|
|
a42d173e7b | ||
|
|
e07f689238 | ||
|
|
7831eddc88 | ||
|
|
943b1bc80c | ||
|
|
95a184e9b7 | ||
|
|
3fa17e9d17 | ||
|
|
55e0fd9789 | ||
|
|
2a88889f44 | ||
|
|
5bad8126dc | ||
|
|
27bc242085 | ||
|
|
192b49cc6d | ||
|
|
e1b60f3693 | ||
|
|
2804f5323b | ||
|
|
676adc6b32 | ||
|
|
96a4e8de66 | ||
|
|
01180666b0 | ||
|
|
6c94269c32 | ||
|
|
edc691647d | ||
|
|
855d7b4781 | ||
|
|
c49c9707ce | ||
|
|
2227540a0d | ||
|
|
f1347f2417 | ||
|
|
30b295b017 | ||
|
|
1cef395266 | ||
|
|
78d160f76d | ||
|
|
b9238059d6 | ||
|
|
d0cb4b88c8 | ||
|
|
1ec3e39d4e | ||
|
|
a1a74eef2c | ||
|
|
90e689adda | ||
|
|
f0b2d4b053 | ||
|
|
299d9474c9 | ||
|
|
7234208b36 | ||
|
|
93450f11f5 | ||
|
|
2f0f9edf33 | ||
|
|
d424f2b7c8 | ||
|
|
21315e80bc | ||
|
|
483b66d383 | ||
|
|
aa72a22661 | ||
|
|
5c0264b591 | ||
|
|
9f13277729 | ||
|
|
54aa319805 | ||
|
|
4a227484bf | ||
|
|
2f83f85291 | ||
|
|
d6cfcb0d93 | ||
|
|
392843ad2a | ||
|
|
bd4dae8f4a | ||
|
|
b05fe53cfd | ||
|
|
c13a2f0df1 | ||
|
|
39be366fc5 | ||
|
|
6eda0a3158 | ||
|
|
306c7a1813 | ||
|
|
80be423a58 | ||
|
|
5dcfef82f2 | ||
|
|
e67b8f69c0 | ||
|
|
e546872ab4 | ||
|
|
322ea1cf7c | ||
|
|
3633742de9 | ||
|
|
079d3a37ba | ||
|
|
a46e77b476 | ||
|
|
a92702b01e | ||
|
|
8ff3253f20 | ||
|
|
04b82c92a7 | ||
|
|
e5bf423e68 | ||
|
|
60af392e45 | ||
|
|
661fc41e71 | ||
|
|
702c488f32 | ||
|
|
45c5122754 | ||
|
|
558394f710 | ||
|
|
73b0898608 | ||
|
|
e65be4c2dc | ||
|
|
40087b8164 | ||
|
|
c762b59483 | ||
|
|
5d71601ca9 | ||
|
|
a113c3e433 | ||
|
|
e81fc598f4 | ||
|
|
48b845fa76 | ||
|
|
27096858dc | ||
|
|
4430d0ae7d | ||
|
|
6e183aa0de | ||
|
|
fd6d0b7635 | ||
|
|
3710c32aae | ||
|
|
be83bee49d | ||
|
|
cf28e5922a | ||
|
|
7d384d6953 | ||
|
|
4b3b37b912 | ||
|
|
1d8d200f4d | ||
|
|
0d80d6ce18 | ||
|
|
f653ee039f | ||
|
|
e614a95853 | ||
|
|
850db4cc13 | ||
|
|
8a316b1277 | ||
|
|
4d13bae449 | ||
|
|
49377abd98 | ||
|
|
a6b2f4e54e | ||
|
|
face60d50b | ||
|
|
9768aa27f2 | ||
|
|
96b2e575e1 | ||
|
|
7222777784 | ||
|
|
5469fdede0 | ||
|
|
72aa6b9fdd | ||
|
|
ae0634b7be | ||
|
|
70711f32fa | ||
|
|
52a88af0aa | ||
|
|
b7a43bf817 | ||
|
|
dce91b33a4 | ||
|
|
23ee4f3050 | ||
|
|
46857e8282 | ||
|
|
368ab0ce54 | ||
|
|
a5987eebfd | ||
|
|
6686ede30f | ||
|
|
373c7057cc | ||
|
|
7d6ec16166 | ||
|
|
0e6fdc8a58 | ||
|
|
521438a5c6 | ||
|
|
07d7874bc8 | ||
|
|
1804111a02 | ||
|
|
cd0178efed | ||
|
|
333574be57 | ||
|
|
79a799a143 | ||
|
|
9da06af6c9 | ||
|
|
ce1753d036 | ||
|
|
67db8432b4 | ||
|
|
4e2e44e524 | ||
|
|
ed786104f3 | ||
|
|
84b74f2bd1 | ||
|
|
fec2ad6283 | ||
|
|
98eebd4682 | ||
|
|
2f74287c9b | ||
|
|
aee1bf95e3 | ||
|
|
b9de9d75ff | ||
|
|
7943b709e6 | ||
|
|
d7d066d493 | ||
|
|
e78ac22107 | ||
|
|
76a8f2bb44 | ||
|
|
8d59a8581f | ||
|
|
b1ddd01289 | ||
|
|
6eae4fc9aa | ||
|
|
765455bca2 | ||
|
|
4204960942 | ||
|
|
67345d66ea | ||
|
|
2266ee5971 | ||
|
|
b58445d855 | ||
|
|
36050e7f3d | ||
|
|
33360ed96d | ||
|
|
39a28d1108 | ||
|
|
efa6aa134f | ||
|
|
2c724e56e2 | ||
|
|
feff887c6f | ||
|
|
353d915fcf | ||
|
|
2e38098cbc | ||
|
|
a6fe5ea1ac | ||
|
|
05b0aed0c1 | ||
|
|
cd1705357d | ||
|
|
6bc7561290 | ||
|
|
fbd3ac14b5 | ||
|
|
e437787c8f | ||
|
|
3460dbf90b | ||
|
|
6b89d99677 | ||
|
|
6cc8ea86e4 | ||
|
|
e62a492d6f | ||
|
|
a475cdf642 | ||
|
|
7002c79a47 | ||
|
|
ee6cf357b4 | ||
|
|
e5c2086b5f | ||
|
|
5f1208296a | ||
|
|
88e8e473cd | ||
|
|
b0a77844f6 | ||
|
|
1baf464307 | ||
|
|
e9b8e81cea | ||
|
|
85d6194aa4 | ||
|
|
333a7a68ef | ||
|
|
6aa4e41bee | ||
|
|
840183e51f | ||
|
|
cbccc94b03 | ||
|
|
fce227df22 | ||
|
|
bd787e800f | ||
|
|
4a7704b4a3 | ||
|
|
ff1119da66 | ||
|
|
4c3ba1627b | ||
|
|
1407174fb2 | ||
|
|
ec9dcb1889 | ||
|
|
d11d781afc | ||
|
|
4e44565b71 | ||
|
|
4ed51ad33b | ||
|
|
1c1ebe5537 | ||
|
|
c19cb7f386 | ||
|
|
4b97d31b16 | ||
|
|
923ade3dd7 | ||
|
|
b04e711975 | ||
|
|
afd0a6b39a | ||
|
|
99752286d8 | ||
|
|
15df93363c | ||
|
|
bc0ab741af | ||
|
|
51d9dfeaa3 | ||
|
|
f63cb18155 | ||
|
|
0de603d88e | ||
|
|
240913912a | ||
|
|
91a4ea0de2 | ||
|
|
8608704f49 | ||
|
|
efef68ce99 | ||
|
|
8daefd24da | ||
|
|
46cc8b7982 | ||
|
|
38cd90dd0c | ||
|
|
a51b269f15 | ||
|
|
43bf6d0a0f | ||
|
|
15273a9b66 | ||
|
|
78aca668d0 | ||
|
|
acbf4148ea | ||
|
|
6508540561 | ||
|
|
a41b5244a8 | ||
|
|
2b3189be95 | ||
|
|
248563c595 | ||
|
|
14cd6ca933 | ||
|
|
eb36403e71 | ||
|
|
3c6f779698 | ||
|
|
f67f0c1c11 | ||
|
|
edb02d3299 | ||
|
|
664a69e65b | ||
|
|
478322ebf9 | ||
|
|
802f174072 | ||
|
|
47f9890bae | ||
|
|
262265daad | ||
|
|
300da5b872 | ||
|
|
7b22b5c433 | ||
|
|
ffca97bc1e | ||
|
|
cb356f3259 | ||
|
|
c85374295f | ||
|
|
4992160677 | ||
|
|
bd535b3371 | ||
|
|
d90c5a03af | ||
|
|
2d02cc9079 | ||
|
|
49ad94b99f | ||
|
|
948a217398 | ||
|
|
125381eae7 | ||
|
|
cd01bbc715 | ||
|
|
d8b5e3b88d | ||
|
|
06d25f2186 | ||
|
|
f759b561f3 | ||
|
|
ece0555600 | ||
|
|
73ea0a0b01 | ||
|
|
d8f6d6fd6f | ||
|
|
d24de169a7 | ||
|
|
0816168296 | ||
|
|
277b44d57a | ||
|
|
68c2c3880e | ||
|
|
49da498f65 | ||
|
|
2c76ba3dd7 | ||
|
|
dbe3dc69ad | ||
|
|
8e5bb3ed49 | ||
|
|
ab0be7b8da | ||
|
|
b4c55f5d24 | ||
|
|
ede70d833c | ||
|
|
70c3d18bb0 | ||
|
|
7a491f52c4 | ||
|
|
323c4ecb4f | ||
|
|
3d2466607e | ||
|
|
ed478b39f4 | ||
|
|
91585a558d | ||
|
|
93467eae1f | ||
|
|
f3aac81d19 | ||
|
|
979ad60c19 | ||
|
|
9316cb1b1f | ||
|
|
e7939a527a | ||
|
|
36d26665e1 | ||
|
|
873347f977 | ||
|
|
e814ac16f9 | ||
|
|
ad3055d386 | ||
|
|
94e03eb452 | ||
|
|
380f26ef79 | ||
|
|
3c5b7f59d7 | ||
|
|
fee89f80b5 | ||
|
|
41cce8eaf1 | ||
|
|
f88fe0218d | ||
|
|
cc856eca85 | ||
|
|
cf350c6002 | ||
|
|
0ce6b6a0a3 | ||
|
|
73f247d537 | ||
|
|
960be82183 | ||
|
|
806e5a6c19 | ||
|
|
8d5df07cce | ||
|
|
df7a9d1407 |
67
.github/workflows/_build-and-test-locally.yml
vendored
67
.github/workflows/_build-and-test-locally.yml
vendored
@@ -104,11 +104,10 @@ jobs:
|
|||||||
|
|
||||||
# Set some environment variables used by all the steps.
|
# Set some environment variables used by all the steps.
|
||||||
#
|
#
|
||||||
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
|
# CARGO_FLAGS is extra options to pass to all "cargo" subcommands.
|
||||||
# It also includes --features, if any
|
|
||||||
#
|
#
|
||||||
# CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
|
# CARGO_PROFILE is passed to "cargo build", "cargo test" etc, but not to
|
||||||
# because "cargo metadata" doesn't accept --release or --debug options
|
# "cargo metadata", because it doesn't accept --release or --debug options.
|
||||||
#
|
#
|
||||||
# We run tests with addtional features, that are turned off by default (e.g. in release builds), see
|
# We run tests with addtional features, that are turned off by default (e.g. in release builds), see
|
||||||
# corresponding Cargo.toml files for their descriptions.
|
# corresponding Cargo.toml files for their descriptions.
|
||||||
@@ -117,16 +116,16 @@ jobs:
|
|||||||
ARCH: ${{ inputs.arch }}
|
ARCH: ${{ inputs.arch }}
|
||||||
SANITIZERS: ${{ inputs.sanitizers }}
|
SANITIZERS: ${{ inputs.sanitizers }}
|
||||||
run: |
|
run: |
|
||||||
CARGO_FEATURES="--features testing"
|
CARGO_FLAGS="--locked --features testing"
|
||||||
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
|
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
|
||||||
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
|
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
|
||||||
CARGO_FLAGS="--locked"
|
CARGO_PROFILE=""
|
||||||
elif [[ $BUILD_TYPE == "debug" ]]; then
|
elif [[ $BUILD_TYPE == "debug" ]]; then
|
||||||
cov_prefix=""
|
cov_prefix=""
|
||||||
CARGO_FLAGS="--locked"
|
CARGO_PROFILE=""
|
||||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||||
cov_prefix=""
|
cov_prefix=""
|
||||||
CARGO_FLAGS="--locked --release"
|
CARGO_PROFILE="--release"
|
||||||
fi
|
fi
|
||||||
if [[ $SANITIZERS == 'enabled' ]]; then
|
if [[ $SANITIZERS == 'enabled' ]]; then
|
||||||
make_vars="WITH_SANITIZERS=yes"
|
make_vars="WITH_SANITIZERS=yes"
|
||||||
@@ -136,8 +135,8 @@ jobs:
|
|||||||
{
|
{
|
||||||
echo "cov_prefix=${cov_prefix}"
|
echo "cov_prefix=${cov_prefix}"
|
||||||
echo "make_vars=${make_vars}"
|
echo "make_vars=${make_vars}"
|
||||||
echo "CARGO_FEATURES=${CARGO_FEATURES}"
|
|
||||||
echo "CARGO_FLAGS=${CARGO_FLAGS}"
|
echo "CARGO_FLAGS=${CARGO_FLAGS}"
|
||||||
|
echo "CARGO_PROFILE=${CARGO_PROFILE}"
|
||||||
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
|
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
|
||||||
} >> $GITHUB_ENV
|
} >> $GITHUB_ENV
|
||||||
|
|
||||||
@@ -189,34 +188,18 @@ jobs:
|
|||||||
path: pg_install/v17
|
path: pg_install/v17
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||||
|
|
||||||
- name: Build postgres v14
|
- name: Build all
|
||||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
# Note: the Makefile picks up BUILD_TYPE and CARGO_PROFILE from the env variables
|
||||||
run: mold -run make ${make_vars} postgres-v14 -j$(nproc)
|
run: mold -run make ${make_vars} all -j$(nproc) CARGO_BUILD_FLAGS="$CARGO_FLAGS"
|
||||||
|
|
||||||
- name: Build postgres v15
|
|
||||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
|
||||||
run: mold -run make ${make_vars} postgres-v15 -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build postgres v16
|
|
||||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
|
||||||
run: mold -run make ${make_vars} postgres-v16 -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build postgres v17
|
|
||||||
if: steps.cache_pg_17.outputs.cache-hit != 'true'
|
|
||||||
run: mold -run make ${make_vars} postgres-v17 -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build neon extensions
|
|
||||||
run: mold -run make ${make_vars} neon-pg-ext -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build walproposer-lib
|
- name: Build walproposer-lib
|
||||||
run: mold -run make ${make_vars} walproposer-lib -j$(nproc)
|
run: mold -run make ${make_vars} walproposer-lib -j$(nproc)
|
||||||
|
|
||||||
- name: Run cargo build
|
- name: Build unit tests
|
||||||
env:
|
if: inputs.sanitizers != 'enabled'
|
||||||
WITH_TESTS: ${{ inputs.sanitizers != 'enabled' && '--tests' || '' }}
|
|
||||||
run: |
|
run: |
|
||||||
export ASAN_OPTIONS=detect_leaks=0
|
export ASAN_OPTIONS=detect_leaks=0
|
||||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins ${WITH_TESTS}
|
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_PROFILE --tests
|
||||||
|
|
||||||
# Do install *before* running rust tests because they might recompile the
|
# Do install *before* running rust tests because they might recompile the
|
||||||
# binaries with different features/flags.
|
# binaries with different features/flags.
|
||||||
@@ -228,7 +211,7 @@ jobs:
|
|||||||
# Install target binaries
|
# Install target binaries
|
||||||
mkdir -p /tmp/neon/bin/
|
mkdir -p /tmp/neon/bin/
|
||||||
binaries=$(
|
binaries=$(
|
||||||
${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
|
${cov_prefix} cargo metadata $CARGO_FLAGS --format-version=1 --no-deps |
|
||||||
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
||||||
)
|
)
|
||||||
for bin in $binaries; do
|
for bin in $binaries; do
|
||||||
@@ -245,7 +228,7 @@ jobs:
|
|||||||
mkdir -p /tmp/neon/test_bin/
|
mkdir -p /tmp/neon/test_bin/
|
||||||
|
|
||||||
test_exe_paths=$(
|
test_exe_paths=$(
|
||||||
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
|
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_PROFILE --message-format=json --no-run |
|
||||||
jq -r '.executable | select(. != null)'
|
jq -r '.executable | select(. != null)'
|
||||||
)
|
)
|
||||||
for bin in $test_exe_paths; do
|
for bin in $test_exe_paths; do
|
||||||
@@ -279,10 +262,10 @@ jobs:
|
|||||||
export LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH
|
||||||
|
|
||||||
#nextest does not yet support running doctests
|
#nextest does not yet support running doctests
|
||||||
${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_PROFILE
|
||||||
|
|
||||||
# run all non-pageserver tests
|
# run all non-pageserver tests
|
||||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
|
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E '!package(pageserver)'
|
||||||
|
|
||||||
# run pageserver tests
|
# run pageserver tests
|
||||||
# (When developing new pageserver features gated by config fields, we commonly make the rust
|
# (When developing new pageserver features gated by config fields, we commonly make the rust
|
||||||
@@ -291,13 +274,13 @@ jobs:
|
|||||||
# pageserver tests from non-pageserver tests cuts down the time it takes for this CI step.)
|
# pageserver tests from non-pageserver tests cuts down the time it takes for this CI step.)
|
||||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=tokio-epoll-uring \
|
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=tokio-epoll-uring \
|
||||||
${cov_prefix} \
|
${cov_prefix} \
|
||||||
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(pageserver)'
|
||||||
|
|
||||||
# Run separate tests for real S3
|
# Run separate tests for real S3
|
||||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
||||||
|
|
||||||
# Run separate tests for real Azure Blob Storage
|
# Run separate tests for real Azure Blob Storage
|
||||||
# XXX: replace region with `eu-central-1`-like region
|
# XXX: replace region with `eu-central-1`-like region
|
||||||
@@ -306,17 +289,17 @@ jobs:
|
|||||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
||||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||||
|
|
||||||
- name: Install postgres binaries
|
- name: Install postgres binaries
|
||||||
run: |
|
run: |
|
||||||
# Use tar to copy files matching the pattern, preserving the paths in the destionation
|
# Use tar to copy files matching the pattern, preserving the paths in the destionation
|
||||||
tar c \
|
tar c \
|
||||||
pg_install/v* \
|
pg_install/v* \
|
||||||
pg_install/build/*/src/test/regress/*.so \
|
build/*/src/test/regress/*.so \
|
||||||
pg_install/build/*/src/test/regress/pg_regress \
|
build/*/src/test/regress/pg_regress \
|
||||||
pg_install/build/*/src/test/isolation/isolationtester \
|
build/*/src/test/isolation/isolationtester \
|
||||||
pg_install/build/*/src/test/isolation/pg_isolation_regress \
|
build/*/src/test/isolation/pg_isolation_regress \
|
||||||
| tar x -C /tmp/neon
|
| tar x -C /tmp/neon
|
||||||
|
|
||||||
- name: Upload Neon artifact
|
- name: Upload Neon artifact
|
||||||
|
|||||||
16
.github/workflows/build-macos.yml
vendored
16
.github/workflows/build-macos.yml
vendored
@@ -110,7 +110,7 @@ jobs:
|
|||||||
|
|
||||||
build-walproposer-lib:
|
build-walproposer-lib:
|
||||||
if: |
|
if: |
|
||||||
inputs.pg_versions != '[]' || inputs.rebuild_everything ||
|
contains(inputs.pg_versions, 'v17') || inputs.rebuild_everything ||
|
||||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||||
github.ref_name == 'main'
|
github.ref_name == 'main'
|
||||||
@@ -144,7 +144,7 @@ jobs:
|
|||||||
id: cache_walproposer_lib
|
id: cache_walproposer_lib
|
||||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||||
with:
|
with:
|
||||||
path: pg_install/build/walproposer-lib
|
path: build/walproposer-lib
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||||
|
|
||||||
- name: Checkout submodule vendor/postgres-v17
|
- name: Checkout submodule vendor/postgres-v17
|
||||||
@@ -169,11 +169,11 @@ jobs:
|
|||||||
run:
|
run:
|
||||||
make walproposer-lib -j$(sysctl -n hw.ncpu)
|
make walproposer-lib -j$(sysctl -n hw.ncpu)
|
||||||
|
|
||||||
- name: Upload "pg_install/build/walproposer-lib" artifact
|
- name: Upload "build/walproposer-lib" artifact
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||||
with:
|
with:
|
||||||
name: pg_install--build--walproposer-lib
|
name: build--walproposer-lib
|
||||||
path: pg_install/build/walproposer-lib
|
path: build/walproposer-lib
|
||||||
# The artifact is supposed to be used by the next job in the same workflow,
|
# The artifact is supposed to be used by the next job in the same workflow,
|
||||||
# so there’s no need to store it for too long.
|
# so there’s no need to store it for too long.
|
||||||
retention-days: 1
|
retention-days: 1
|
||||||
@@ -226,11 +226,11 @@ jobs:
|
|||||||
name: pg_install--v17
|
name: pg_install--v17
|
||||||
path: pg_install/v17
|
path: pg_install/v17
|
||||||
|
|
||||||
- name: Download "pg_install/build/walproposer-lib" artifact
|
- name: Download "build/walproposer-lib" artifact
|
||||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||||
with:
|
with:
|
||||||
name: pg_install--build--walproposer-lib
|
name: build--walproposer-lib
|
||||||
path: pg_install/build/walproposer-lib
|
path: build/walproposer-lib
|
||||||
|
|
||||||
# `actions/download-artifact` doesn't preserve permissions:
|
# `actions/download-artifact` doesn't preserve permissions:
|
||||||
# https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
|
# https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
|
||||||
|
|||||||
4
.github/workflows/build_and_test.yml
vendored
4
.github/workflows/build_and_test.yml
vendored
@@ -670,7 +670,7 @@ jobs:
|
|||||||
ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64
|
ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64
|
||||||
|
|
||||||
compute-node-image-arch:
|
compute-node-image-arch:
|
||||||
needs: [ check-permissions, build-build-tools-image, meta ]
|
needs: [ check-permissions, meta ]
|
||||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||||
permissions:
|
permissions:
|
||||||
id-token: write # aws-actions/configure-aws-credentials
|
id-token: write # aws-actions/configure-aws-credentials
|
||||||
@@ -743,7 +743,6 @@ jobs:
|
|||||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
PG_VERSION=${{ matrix.version.pg }}
|
PG_VERSION=${{ matrix.version.pg }}
|
||||||
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
|
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
|
||||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
|
|
||||||
DEBIAN_VERSION=${{ matrix.version.debian }}
|
DEBIAN_VERSION=${{ matrix.version.debian }}
|
||||||
provenance: false
|
provenance: false
|
||||||
push: true
|
push: true
|
||||||
@@ -763,7 +762,6 @@ jobs:
|
|||||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
PG_VERSION=${{ matrix.version.pg }}
|
PG_VERSION=${{ matrix.version.pg }}
|
||||||
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
|
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
|
||||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
|
|
||||||
DEBIAN_VERSION=${{ matrix.version.debian }}
|
DEBIAN_VERSION=${{ matrix.version.debian }}
|
||||||
provenance: false
|
provenance: false
|
||||||
push: true
|
push: true
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,4 +1,5 @@
|
|||||||
/artifact_cache
|
/artifact_cache
|
||||||
|
/build
|
||||||
/pg_install
|
/pg_install
|
||||||
/target
|
/target
|
||||||
/tmp_check
|
/tmp_check
|
||||||
|
|||||||
3
Cargo.lock
generated
3
Cargo.lock
generated
@@ -4255,6 +4255,7 @@ dependencies = [
|
|||||||
"tokio-util",
|
"tokio-util",
|
||||||
"tonic 0.13.1",
|
"tonic 0.13.1",
|
||||||
"tracing",
|
"tracing",
|
||||||
|
"url",
|
||||||
"utils",
|
"utils",
|
||||||
"workspace_hack",
|
"workspace_hack",
|
||||||
]
|
]
|
||||||
@@ -4472,6 +4473,8 @@ dependencies = [
|
|||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
"postgres_ffi",
|
"postgres_ffi",
|
||||||
"prost 0.13.5",
|
"prost 0.13.5",
|
||||||
|
"strum",
|
||||||
|
"strum_macros",
|
||||||
"thiserror 1.0.69",
|
"thiserror 1.0.69",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tonic 0.13.1",
|
"tonic 0.13.1",
|
||||||
|
|||||||
@@ -45,7 +45,6 @@ COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
|
|||||||
ENV BUILD_TYPE=release
|
ENV BUILD_TYPE=release
|
||||||
RUN set -e \
|
RUN set -e \
|
||||||
&& mold -run make -j $(nproc) -s neon-pg-ext \
|
&& mold -run make -j $(nproc) -s neon-pg-ext \
|
||||||
&& rm -rf pg_install/build \
|
|
||||||
&& tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .
|
&& tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .
|
||||||
|
|
||||||
# Prepare cargo-chef recipe
|
# Prepare cargo-chef recipe
|
||||||
|
|||||||
109
Makefile
109
Makefile
@@ -1,8 +1,18 @@
|
|||||||
ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||||
|
|
||||||
# Where to install Postgres, default is ./pg_install, maybe useful for package managers
|
# Where to install Postgres, default is ./pg_install, maybe useful for package
|
||||||
|
# managers.
|
||||||
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
|
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
|
||||||
|
|
||||||
|
# CARGO_BUILD_FLAGS: Extra flags to pass to `cargo build`. `--locked`
|
||||||
|
# and `--features testing` are popular examples.
|
||||||
|
#
|
||||||
|
# CARGO_PROFILE: You can also set to override the cargo profile to
|
||||||
|
# use. By default, it is derived from BUILD_TYPE.
|
||||||
|
|
||||||
|
# All intermediate build artifacts are stored here.
|
||||||
|
BUILD_DIR := build
|
||||||
|
|
||||||
ICU_PREFIX_DIR := /usr/local/icu
|
ICU_PREFIX_DIR := /usr/local/icu
|
||||||
|
|
||||||
#
|
#
|
||||||
@@ -16,12 +26,12 @@ ifeq ($(BUILD_TYPE),release)
|
|||||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl
|
PG_CONFIGURE_OPTS = --enable-debug --with-openssl
|
||||||
PG_CFLAGS += -O2 -g3 $(CFLAGS)
|
PG_CFLAGS += -O2 -g3 $(CFLAGS)
|
||||||
PG_LDFLAGS = $(LDFLAGS)
|
PG_LDFLAGS = $(LDFLAGS)
|
||||||
# Unfortunately, `--profile=...` is a nightly feature
|
CARGO_PROFILE ?= --profile=release
|
||||||
CARGO_BUILD_FLAGS += --release
|
|
||||||
else ifeq ($(BUILD_TYPE),debug)
|
else ifeq ($(BUILD_TYPE),debug)
|
||||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
|
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
|
||||||
PG_CFLAGS += -O0 -g3 $(CFLAGS)
|
PG_CFLAGS += -O0 -g3 $(CFLAGS)
|
||||||
PG_LDFLAGS = $(LDFLAGS)
|
PG_LDFLAGS = $(LDFLAGS)
|
||||||
|
CARGO_PROFILE ?= --profile=dev
|
||||||
else
|
else
|
||||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||||
endif
|
endif
|
||||||
@@ -93,7 +103,7 @@ all: neon postgres neon-pg-ext
|
|||||||
.PHONY: neon
|
.PHONY: neon
|
||||||
neon: postgres-headers walproposer-lib cargo-target-dir
|
neon: postgres-headers walproposer-lib cargo-target-dir
|
||||||
+@echo "Compiling Neon"
|
+@echo "Compiling Neon"
|
||||||
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
|
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS) $(CARGO_PROFILE)
|
||||||
.PHONY: cargo-target-dir
|
.PHONY: cargo-target-dir
|
||||||
cargo-target-dir:
|
cargo-target-dir:
|
||||||
# https://github.com/rust-lang/cargo/issues/14281
|
# https://github.com/rust-lang/cargo/issues/14281
|
||||||
@@ -104,21 +114,20 @@ cargo-target-dir:
|
|||||||
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
|
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
|
||||||
# to avoid the duplication in the future, but it's tolerable for now.
|
# to avoid the duplication in the future, but it's tolerable for now.
|
||||||
#
|
#
|
||||||
$(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
$(BUILD_DIR)/%/config.status:
|
||||||
|
mkdir -p $(BUILD_DIR)
|
||||||
mkdir -p $(POSTGRES_INSTALL_DIR)
|
test -e $(BUILD_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(BUILD_DIR)/CACHEDIR.TAG
|
||||||
test -e $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG
|
|
||||||
|
|
||||||
+@echo "Configuring Postgres $* build"
|
+@echo "Configuring Postgres $* build"
|
||||||
@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
|
@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
|
||||||
echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
|
echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
|
||||||
echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
|
echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
|
||||||
exit 1; }
|
exit 1; }
|
||||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
|
mkdir -p $(BUILD_DIR)/$*
|
||||||
|
|
||||||
VERSION=$*; \
|
VERSION=$*; \
|
||||||
EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
|
EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
|
||||||
(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
|
(cd $(BUILD_DIR)/$$VERSION && \
|
||||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
|
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
|
||||||
CFLAGS='$(PG_CFLAGS)' LDFLAGS='$(PG_LDFLAGS)' \
|
CFLAGS='$(PG_CFLAGS)' LDFLAGS='$(PG_LDFLAGS)' \
|
||||||
$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
|
$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
|
||||||
@@ -130,74 +139,54 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
|||||||
# the "build-all-versions" entry points) where direct mention of PostgreSQL
|
# the "build-all-versions" entry points) where direct mention of PostgreSQL
|
||||||
# versions is used.
|
# versions is used.
|
||||||
.PHONY: postgres-configure-v17
|
.PHONY: postgres-configure-v17
|
||||||
postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
|
postgres-configure-v17: $(BUILD_DIR)/v17/config.status
|
||||||
.PHONY: postgres-configure-v16
|
.PHONY: postgres-configure-v16
|
||||||
postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
|
postgres-configure-v16: $(BUILD_DIR)/v16/config.status
|
||||||
.PHONY: postgres-configure-v15
|
.PHONY: postgres-configure-v15
|
||||||
postgres-configure-v15: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
|
postgres-configure-v15: $(BUILD_DIR)/v15/config.status
|
||||||
.PHONY: postgres-configure-v14
|
.PHONY: postgres-configure-v14
|
||||||
postgres-configure-v14: $(POSTGRES_INSTALL_DIR)/build/v14/config.status
|
postgres-configure-v14: $(BUILD_DIR)/v14/config.status
|
||||||
|
|
||||||
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
|
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
|
||||||
.PHONY: postgres-headers-%
|
.PHONY: postgres-headers-%
|
||||||
postgres-headers-%: postgres-configure-%
|
postgres-headers-%: postgres-configure-%
|
||||||
+@echo "Installing PostgreSQL $* headers"
|
+@echo "Installing PostgreSQL $* headers"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/include MAKELEVEL=0 install
|
$(MAKE) -C $(BUILD_DIR)/$*/src/include MAKELEVEL=0 install
|
||||||
|
|
||||||
# Compile and install PostgreSQL
|
# Compile and install PostgreSQL
|
||||||
.PHONY: postgres-%
|
.PHONY: postgres-%
|
||||||
postgres-%: postgres-configure-% \
|
postgres-%: postgres-configure-% \
|
||||||
postgres-headers-% # to prevent `make install` conflicts with neon's `postgres-headers`
|
postgres-headers-% # to prevent `make install` conflicts with neon's `postgres-headers`
|
||||||
+@echo "Compiling PostgreSQL $*"
|
+@echo "Compiling PostgreSQL $*"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 install
|
$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 install
|
||||||
+@echo "Compiling libpq $*"
|
+@echo "Compiling libpq $*"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq install
|
$(MAKE) -C $(BUILD_DIR)/$*/src/interfaces/libpq install
|
||||||
+@echo "Compiling pg_prewarm $*"
|
+@echo "Compiling pg_prewarm $*"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_prewarm install
|
$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_prewarm install
|
||||||
+@echo "Compiling pg_buffercache $*"
|
+@echo "Compiling pg_buffercache $*"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache install
|
$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_buffercache install
|
||||||
+@echo "Compiling pg_visibility $*"
|
+@echo "Compiling pg_visibility $*"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_visibility install
|
$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_visibility install
|
||||||
+@echo "Compiling pageinspect $*"
|
+@echo "Compiling pageinspect $*"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
|
$(MAKE) -C $(BUILD_DIR)/$*/contrib/pageinspect install
|
||||||
+@echo "Compiling pg_trgm $*"
|
+@echo "Compiling pg_trgm $*"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_trgm install
|
$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_trgm install
|
||||||
+@echo "Compiling amcheck $*"
|
+@echo "Compiling amcheck $*"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
|
$(MAKE) -C $(BUILD_DIR)/$*/contrib/amcheck install
|
||||||
+@echo "Compiling test_decoding $*"
|
+@echo "Compiling test_decoding $*"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install
|
$(MAKE) -C $(BUILD_DIR)/$*/contrib/test_decoding install
|
||||||
|
|
||||||
.PHONY: postgres-check-%
|
.PHONY: postgres-check-%
|
||||||
postgres-check-%: postgres-%
|
postgres-check-%: postgres-%
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 check
|
$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 check
|
||||||
|
|
||||||
.PHONY: neon-pg-ext-%
|
.PHONY: neon-pg-ext-%
|
||||||
neon-pg-ext-%: postgres-%
|
neon-pg-ext-%: postgres-%
|
||||||
+@echo "Compiling neon $*"
|
+@echo "Compiling neon-specific Postgres extensions for $*"
|
||||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
|
mkdir -p $(BUILD_DIR)/pgxn-$*
|
||||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
|
-C $(BUILD_DIR)/pgxn-$*\
|
||||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
|
-f $(ROOT_PROJECT_DIR)/pgxn/Makefile install
|
||||||
+@echo "Compiling neon_walredo $*"
|
|
||||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
|
|
||||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
|
||||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
|
|
||||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
|
|
||||||
+@echo "Compiling neon_rmgr $*"
|
|
||||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
|
|
||||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
|
||||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
|
|
||||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
|
|
||||||
+@echo "Compiling neon_test_utils $*"
|
|
||||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
|
|
||||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
|
||||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
|
|
||||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
|
|
||||||
+@echo "Compiling neon_utils $*"
|
|
||||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
|
|
||||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
|
||||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
|
|
||||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
|
|
||||||
|
|
||||||
# Build walproposer as a static library. walproposer source code is located
|
# Build walproposer as a static library. walproposer source code is located
|
||||||
# in the pgxn/neon directory.
|
# in the pgxn/neon directory.
|
||||||
@@ -211,15 +200,15 @@ neon-pg-ext-%: postgres-%
|
|||||||
.PHONY: walproposer-lib
|
.PHONY: walproposer-lib
|
||||||
walproposer-lib: neon-pg-ext-v17
|
walproposer-lib: neon-pg-ext-v17
|
||||||
+@echo "Compiling walproposer-lib"
|
+@echo "Compiling walproposer-lib"
|
||||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
mkdir -p $(BUILD_DIR)/walproposer-lib
|
||||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
|
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
|
||||||
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
|
-C $(BUILD_DIR)/walproposer-lib \
|
||||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
|
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
|
||||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(BUILD_DIR)/walproposer-lib
|
||||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(BUILD_DIR)/walproposer-lib
|
||||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
|
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgport.a \
|
||||||
pg_strong_random.o
|
pg_strong_random.o
|
||||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
|
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \
|
||||||
checksum_helper.o \
|
checksum_helper.o \
|
||||||
cryptohash_openssl.o \
|
cryptohash_openssl.o \
|
||||||
hmac_openssl.o \
|
hmac_openssl.o \
|
||||||
@@ -227,7 +216,7 @@ walproposer-lib: neon-pg-ext-v17
|
|||||||
parse_manifest.o \
|
parse_manifest.o \
|
||||||
scram-common.o
|
scram-common.o
|
||||||
ifeq ($(UNAME_S),Linux)
|
ifeq ($(UNAME_S),Linux)
|
||||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
|
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \
|
||||||
pg_crc32c.o
|
pg_crc32c.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@@ -272,7 +261,7 @@ fmt:
|
|||||||
|
|
||||||
postgres-%-pg-bsd-indent: postgres-%
|
postgres-%-pg-bsd-indent: postgres-%
|
||||||
+@echo "Compiling pg_bsd_indent"
|
+@echo "Compiling pg_bsd_indent"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/
|
$(MAKE) -C $(BUILD_DIR)/$*/src/tools/pg_bsd_indent/
|
||||||
|
|
||||||
# Create typedef list for the core. Note that generally it should be combined with
|
# Create typedef list for the core. Note that generally it should be combined with
|
||||||
# buildfarm one to cover platform specific stuff.
|
# buildfarm one to cover platform specific stuff.
|
||||||
@@ -291,7 +280,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
|
|||||||
cat $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/typedefs.list |\
|
cat $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/typedefs.list |\
|
||||||
cat - postgres-$*-typedefs.list | sort | uniq > postgres-$*-typedefs-full.list
|
cat - postgres-$*-typedefs.list | sort | uniq > postgres-$*-typedefs-full.list
|
||||||
+@echo note: you might want to run it on selected files/dirs instead.
|
+@echo note: you might want to run it on selected files/dirs instead.
|
||||||
INDENT=$(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/pg_bsd_indent \
|
INDENT=$(BUILD_DIR)/$*/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||||
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/pgindent --typedefs postgres-$*-typedefs-full.list \
|
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/pgindent --typedefs postgres-$*-typedefs-full.list \
|
||||||
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/ \
|
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/ \
|
||||||
--excludes $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/exclude_file_patterns
|
--excludes $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/exclude_file_patterns
|
||||||
@@ -302,9 +291,9 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
|
|||||||
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
||||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
|
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
|
||||||
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
|
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
|
||||||
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||||
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
|
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
|
||||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \
|
-C $(BUILD_DIR)/neon-v17 \
|
||||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
|
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -77,9 +77,6 @@
|
|||||||
# build_and_test.yml github workflow for how that's done.
|
# build_and_test.yml github workflow for how that's done.
|
||||||
|
|
||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
ARG REPOSITORY=ghcr.io/neondatabase
|
|
||||||
ARG IMAGE=build-tools
|
|
||||||
ARG TAG=pinned
|
|
||||||
ARG BUILD_TAG
|
ARG BUILD_TAG
|
||||||
ARG DEBIAN_VERSION=bookworm
|
ARG DEBIAN_VERSION=bookworm
|
||||||
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
|
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
|
||||||
@@ -150,6 +147,7 @@ RUN case $DEBIAN_VERSION in \
|
|||||||
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \
|
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \
|
||||||
libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd curl unzip g++ \
|
libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd curl unzip g++ \
|
||||||
libclang-dev \
|
libclang-dev \
|
||||||
|
jsonnet \
|
||||||
$VERSION_INSTALLS \
|
$VERSION_INSTALLS \
|
||||||
&& apt clean && rm -rf /var/lib/apt/lists/* && \
|
&& apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||||
useradd -ms /bin/bash nonroot -b /home
|
useradd -ms /bin/bash nonroot -b /home
|
||||||
@@ -1634,18 +1632,7 @@ FROM pg-build AS neon-ext-build
|
|||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
|
|
||||||
COPY pgxn/ pgxn/
|
COPY pgxn/ pgxn/
|
||||||
RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
RUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute
|
||||||
-C pgxn/neon \
|
|
||||||
-s install && \
|
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) \
|
|
||||||
-C pgxn/neon_utils \
|
|
||||||
-s install && \
|
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) \
|
|
||||||
-C pgxn/neon_test_utils \
|
|
||||||
-s install && \
|
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) \
|
|
||||||
-C pgxn/neon_rmgr \
|
|
||||||
-s install
|
|
||||||
|
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
#
|
#
|
||||||
@@ -1735,7 +1722,7 @@ FROM extensions-${EXTENSIONS} AS neon-pg-ext-build
|
|||||||
# Compile the Neon-specific `compute_ctl`, `fast_import`, and `local_proxy` binaries
|
# Compile the Neon-specific `compute_ctl`, `fast_import`, and `local_proxy` binaries
|
||||||
#
|
#
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
|
FROM build-deps-with-cargo AS compute-tools
|
||||||
ARG BUILD_TAG
|
ARG BUILD_TAG
|
||||||
ENV BUILD_TAG=$BUILD_TAG
|
ENV BUILD_TAG=$BUILD_TAG
|
||||||
|
|
||||||
@@ -1745,7 +1732,7 @@ COPY --chown=nonroot . .
|
|||||||
RUN --mount=type=cache,uid=1000,target=/home/nonroot/.cargo/registry \
|
RUN --mount=type=cache,uid=1000,target=/home/nonroot/.cargo/registry \
|
||||||
--mount=type=cache,uid=1000,target=/home/nonroot/.cargo/git \
|
--mount=type=cache,uid=1000,target=/home/nonroot/.cargo/git \
|
||||||
--mount=type=cache,uid=1000,target=/home/nonroot/target \
|
--mount=type=cache,uid=1000,target=/home/nonroot/target \
|
||||||
mold -run cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy && \
|
cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy && \
|
||||||
mkdir target-bin && \
|
mkdir target-bin && \
|
||||||
cp target/release-line-debug-size-lto/compute_ctl \
|
cp target/release-line-debug-size-lto/compute_ctl \
|
||||||
target/release-line-debug-size-lto/fast_import \
|
target/release-line-debug-size-lto/fast_import \
|
||||||
@@ -1839,10 +1826,11 @@ RUN rm /usr/local/pgsql/lib/lib*.a
|
|||||||
# Preprocess the sql_exporter configuration files
|
# Preprocess the sql_exporter configuration files
|
||||||
#
|
#
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
FROM $REPOSITORY/$IMAGE:$TAG AS sql_exporter_preprocessor
|
FROM build-deps AS sql_exporter_preprocessor
|
||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
|
|
||||||
USER nonroot
|
USER nonroot
|
||||||
|
WORKDIR /home/nonroot
|
||||||
|
|
||||||
COPY --chown=nonroot compute compute
|
COPY --chown=nonroot compute compute
|
||||||
|
|
||||||
|
|||||||
@@ -408,7 +408,9 @@ impl ComputeNode {
|
|||||||
// N.B. keep it in sync with `ZENITH_OPTIONS` in `get_maintenance_client()`.
|
// N.B. keep it in sync with `ZENITH_OPTIONS` in `get_maintenance_client()`.
|
||||||
const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0";
|
const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0";
|
||||||
let options = match conn_conf.get_options() {
|
let options = match conn_conf.get_options() {
|
||||||
Some(options) => format!("{} {}", options, EXTRA_OPTIONS),
|
// Allow the control plane to override any options set by the
|
||||||
|
// compute
|
||||||
|
Some(options) => format!("{} {}", EXTRA_OPTIONS, options),
|
||||||
None => EXTRA_OPTIONS.to_string(),
|
None => EXTRA_OPTIONS.to_string(),
|
||||||
};
|
};
|
||||||
conn_conf.options(&options);
|
conn_conf.options(&options);
|
||||||
|
|||||||
@@ -209,6 +209,10 @@ pub struct NeonStorageControllerConf {
|
|||||||
pub use_https_safekeeper_api: bool,
|
pub use_https_safekeeper_api: bool,
|
||||||
|
|
||||||
pub use_local_compute_notifications: bool,
|
pub use_local_compute_notifications: bool,
|
||||||
|
|
||||||
|
pub timeline_safekeeper_count: Option<i64>,
|
||||||
|
|
||||||
|
pub kick_secondary_downloads: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NeonStorageControllerConf {
|
impl NeonStorageControllerConf {
|
||||||
@@ -239,6 +243,8 @@ impl Default for NeonStorageControllerConf {
|
|||||||
timelines_onto_safekeepers: true,
|
timelines_onto_safekeepers: true,
|
||||||
use_https_safekeeper_api: false,
|
use_https_safekeeper_api: false,
|
||||||
use_local_compute_notifications: true,
|
use_local_compute_notifications: true,
|
||||||
|
timeline_safekeeper_count: None,
|
||||||
|
kick_secondary_downloads: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -557,6 +557,10 @@ impl StorageController {
|
|||||||
args.push("--use-local-compute-notifications".to_string());
|
args.push("--use-local-compute-notifications".to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(value) = self.config.kick_secondary_downloads {
|
||||||
|
args.push(format!("--kick-secondary-downloads={value}"));
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
|
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
|
||||||
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
|
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
|
||||||
}
|
}
|
||||||
@@ -628,6 +632,10 @@ impl StorageController {
|
|||||||
args.push("--timelines-onto-safekeepers".to_string());
|
args.push("--timelines-onto-safekeepers".to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(sk_cnt) = self.config.timeline_safekeeper_count {
|
||||||
|
args.push(format!("--timeline-safekeeper-count={sk_cnt}"));
|
||||||
|
}
|
||||||
|
|
||||||
println!("Starting storage controller");
|
println!("Starting storage controller");
|
||||||
|
|
||||||
background_process::start_process(
|
background_process::start_process(
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ pub static COMPUTE_AUDIENCE: &str = "compute";
|
|||||||
pub enum ComputeClaimsScope {
|
pub enum ComputeClaimsScope {
|
||||||
/// An admin-scoped token allows access to all of `compute_ctl`'s authorized
|
/// An admin-scoped token allows access to all of `compute_ctl`'s authorized
|
||||||
/// facilities.
|
/// facilities.
|
||||||
|
#[serde(rename = "compute_ctl:admin")]
|
||||||
Admin,
|
Admin,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -24,7 +25,7 @@ impl FromStr for ComputeClaimsScope {
|
|||||||
|
|
||||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
match s {
|
match s {
|
||||||
"admin" => Ok(ComputeClaimsScope::Admin),
|
"compute_ctl:admin" => Ok(ComputeClaimsScope::Admin),
|
||||||
_ => Err(anyhow::anyhow!("invalid compute claims scope \"{s}\"")),
|
_ => Err(anyhow::anyhow!("invalid compute claims scope \"{s}\"")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -80,3 +81,23 @@ pub struct SetRoleGrantsRequest {
|
|||||||
pub privileges: Vec<Privilege>,
|
pub privileges: Vec<Privilege>,
|
||||||
pub role: PgIdent,
|
pub role: PgIdent,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use crate::requests::ComputeClaimsScope;
|
||||||
|
|
||||||
|
/// Confirm that whether we parse the scope by string or through serde, the
|
||||||
|
/// same values parse to the same enum variant.
|
||||||
|
#[test]
|
||||||
|
fn compute_request_scopes() {
|
||||||
|
const ADMIN_SCOPE: &str = "compute_ctl:admin";
|
||||||
|
|
||||||
|
let from_serde: ComputeClaimsScope =
|
||||||
|
serde_json::from_str(&format!("\"{ADMIN_SCOPE}\"")).unwrap();
|
||||||
|
let from_str = ComputeClaimsScope::from_str(ADMIN_SCOPE).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(from_serde, from_str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -76,6 +76,10 @@ pub struct PostHogConfig {
|
|||||||
pub private_api_url: String,
|
pub private_api_url: String,
|
||||||
/// Public API URL
|
/// Public API URL
|
||||||
pub public_api_url: String,
|
pub public_api_url: String,
|
||||||
|
/// Refresh interval for the feature flag spec
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
#[serde(with = "humantime_serde")]
|
||||||
|
pub refresh_interval: Option<Duration>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `pageserver.toml`
|
/// `pageserver.toml`
|
||||||
@@ -816,7 +820,7 @@ pub mod tenant_conf_defaults {
|
|||||||
// By default ingest enough WAL for two new L0 layers before checking if new image
|
// By default ingest enough WAL for two new L0 layers before checking if new image
|
||||||
// image layers should be created.
|
// image layers should be created.
|
||||||
pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
|
pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
|
||||||
pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
|
pub const DEFAULT_GC_COMPACTION_ENABLED: bool = true;
|
||||||
pub const DEFAULT_GC_COMPACTION_VERIFICATION: bool = true;
|
pub const DEFAULT_GC_COMPACTION_VERIFICATION: bool = true;
|
||||||
pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
|
pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
|
||||||
pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
|
pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
|
||||||
|
|||||||
@@ -23,22 +23,12 @@ pub struct ReAttachRequest {
|
|||||||
pub register: Option<NodeRegisterRequest>,
|
pub register: Option<NodeRegisterRequest>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_mode() -> LocationConfigMode {
|
|
||||||
LocationConfigMode::AttachedSingle
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
pub struct ReAttachResponseTenant {
|
pub struct ReAttachResponseTenant {
|
||||||
pub id: TenantShardId,
|
pub id: TenantShardId,
|
||||||
/// Mandatory if LocationConfigMode is None or set to an Attached* mode
|
/// Mandatory if LocationConfigMode is None or set to an Attached* mode
|
||||||
pub r#gen: Option<u32>,
|
pub r#gen: Option<u32>,
|
||||||
|
|
||||||
/// Default value only for backward compat: this field should be set
|
|
||||||
#[serde(default = "default_mode")]
|
|
||||||
pub mode: LocationConfigMode,
|
pub mode: LocationConfigMode,
|
||||||
|
|
||||||
// Default value only for backward compat: this field should be set
|
|
||||||
#[serde(default = "ShardStripeSize::default")]
|
|
||||||
pub stripe_size: ShardStripeSize,
|
pub stripe_size: ShardStripeSize,
|
||||||
}
|
}
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
|
|||||||
@@ -36,7 +36,10 @@ impl FeatureResolverBackgroundLoop {
|
|||||||
// Main loop of updating the feature flags.
|
// Main loop of updating the feature flags.
|
||||||
handle.spawn(
|
handle.spawn(
|
||||||
async move {
|
async move {
|
||||||
tracing::info!("Starting PostHog feature resolver");
|
tracing::info!(
|
||||||
|
"Starting PostHog feature resolver with refresh period: {:?}",
|
||||||
|
refresh_period
|
||||||
|
);
|
||||||
let mut ticker = tokio::time::interval(refresh_period);
|
let mut ticker = tokio::time::interval(refresh_period);
|
||||||
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
|
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
|
||||||
loop {
|
loop {
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
use std::io;
|
|
||||||
|
|
||||||
use tokio::net::TcpStream;
|
use tokio::net::TcpStream;
|
||||||
|
|
||||||
use crate::client::SocketConfig;
|
use crate::client::SocketConfig;
|
||||||
@@ -8,7 +6,7 @@ use crate::tls::MakeTlsConnect;
|
|||||||
use crate::{Error, cancel_query_raw, connect_socket};
|
use crate::{Error, cancel_query_raw, connect_socket};
|
||||||
|
|
||||||
pub(crate) async fn cancel_query<T>(
|
pub(crate) async fn cancel_query<T>(
|
||||||
config: Option<SocketConfig>,
|
config: SocketConfig,
|
||||||
ssl_mode: SslMode,
|
ssl_mode: SslMode,
|
||||||
tls: T,
|
tls: T,
|
||||||
process_id: i32,
|
process_id: i32,
|
||||||
@@ -17,16 +15,6 @@ pub(crate) async fn cancel_query<T>(
|
|||||||
where
|
where
|
||||||
T: MakeTlsConnect<TcpStream>,
|
T: MakeTlsConnect<TcpStream>,
|
||||||
{
|
{
|
||||||
let config = match config {
|
|
||||||
Some(config) => config,
|
|
||||||
None => {
|
|
||||||
return Err(Error::connect(io::Error::new(
|
|
||||||
io::ErrorKind::InvalidInput,
|
|
||||||
"unknown host",
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let hostname = match &config.host {
|
let hostname = match &config.host {
|
||||||
Host::Tcp(host) => &**host,
|
Host::Tcp(host) => &**host,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -7,11 +7,16 @@ use crate::config::SslMode;
|
|||||||
use crate::tls::{MakeTlsConnect, TlsConnect};
|
use crate::tls::{MakeTlsConnect, TlsConnect};
|
||||||
use crate::{Error, cancel_query, cancel_query_raw};
|
use crate::{Error, cancel_query, cancel_query_raw};
|
||||||
|
|
||||||
/// The capability to request cancellation of in-progress queries on a
|
/// A cancellation token that allows easy cancellation of a query.
|
||||||
/// connection.
|
#[derive(Clone)]
|
||||||
#[derive(Clone, Serialize, Deserialize)]
|
|
||||||
pub struct CancelToken {
|
pub struct CancelToken {
|
||||||
pub socket_config: Option<SocketConfig>,
|
pub socket_config: SocketConfig,
|
||||||
|
pub raw: RawCancelToken,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A raw cancellation token that allows cancellation of a query, given a fresh connection to postgres.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct RawCancelToken {
|
||||||
pub ssl_mode: SslMode,
|
pub ssl_mode: SslMode,
|
||||||
pub process_id: i32,
|
pub process_id: i32,
|
||||||
pub secret_key: i32,
|
pub secret_key: i32,
|
||||||
@@ -36,14 +41,16 @@ impl CancelToken {
|
|||||||
{
|
{
|
||||||
cancel_query::cancel_query(
|
cancel_query::cancel_query(
|
||||||
self.socket_config.clone(),
|
self.socket_config.clone(),
|
||||||
self.ssl_mode,
|
self.raw.ssl_mode,
|
||||||
tls,
|
tls,
|
||||||
self.process_id,
|
self.raw.process_id,
|
||||||
self.secret_key,
|
self.raw.secret_key,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RawCancelToken {
|
||||||
/// Like `cancel_query`, but uses a stream which is already connected to the server rather than opening a new
|
/// Like `cancel_query`, but uses a stream which is already connected to the server rather than opening a new
|
||||||
/// connection itself.
|
/// connection itself.
|
||||||
pub async fn cancel_query_raw<S, T>(&self, stream: S, tls: T) -> Result<(), Error>
|
pub async fn cancel_query_raw<S, T>(&self, stream: S, tls: T) -> Result<(), Error>
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ use postgres_protocol2::message::frontend;
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
|
|
||||||
|
use crate::cancel_token::RawCancelToken;
|
||||||
use crate::codec::{BackendMessages, FrontendMessage};
|
use crate::codec::{BackendMessages, FrontendMessage};
|
||||||
use crate::config::{Host, SslMode};
|
use crate::config::{Host, SslMode};
|
||||||
use crate::query::RowStream;
|
use crate::query::RowStream;
|
||||||
@@ -331,10 +332,12 @@ impl Client {
|
|||||||
/// connection associated with this client.
|
/// connection associated with this client.
|
||||||
pub fn cancel_token(&self) -> CancelToken {
|
pub fn cancel_token(&self) -> CancelToken {
|
||||||
CancelToken {
|
CancelToken {
|
||||||
socket_config: Some(self.socket_config.clone()),
|
socket_config: self.socket_config.clone(),
|
||||||
ssl_mode: self.ssl_mode,
|
raw: RawCancelToken {
|
||||||
process_id: self.process_id,
|
ssl_mode: self.ssl_mode,
|
||||||
secret_key: self.secret_key,
|
process_id: self.process_id,
|
||||||
|
secret_key: self.secret_key,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
use postgres_protocol2::message::backend::ReadyForQueryBody;
|
use postgres_protocol2::message::backend::ReadyForQueryBody;
|
||||||
|
|
||||||
pub use crate::cancel_token::CancelToken;
|
pub use crate::cancel_token::{CancelToken, RawCancelToken};
|
||||||
pub use crate::client::{Client, SocketConfig};
|
pub use crate::client::{Client, SocketConfig};
|
||||||
pub use crate::config::Config;
|
pub use crate::config::Config;
|
||||||
pub use crate::connect_raw::RawConnection;
|
pub use crate::connect_raw::RawConnection;
|
||||||
|
|||||||
@@ -13,22 +13,24 @@ fn main() -> anyhow::Result<()> {
|
|||||||
// Tell cargo to invalidate the built crate whenever the wrapper changes
|
// Tell cargo to invalidate the built crate whenever the wrapper changes
|
||||||
println!("cargo:rerun-if-changed=bindgen_deps.h");
|
println!("cargo:rerun-if-changed=bindgen_deps.h");
|
||||||
|
|
||||||
|
let root_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../..");
|
||||||
|
|
||||||
// Finding the location of built libraries and Postgres C headers:
|
// Finding the location of built libraries and Postgres C headers:
|
||||||
// - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/pg_install`
|
// - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/pg_install`
|
||||||
// - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/pg_install/{PG_MAJORVERSION}/include/postgresql/server`
|
// - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/pg_install/{PG_MAJORVERSION}/include/postgresql/server`
|
||||||
let pg_install_dir = if let Some(postgres_install_dir) = env::var_os("POSTGRES_INSTALL_DIR") {
|
let pg_install_dir = if let Some(postgres_install_dir) = env::var_os("POSTGRES_INSTALL_DIR") {
|
||||||
postgres_install_dir.into()
|
postgres_install_dir.into()
|
||||||
} else {
|
} else {
|
||||||
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../pg_install")
|
root_path.join("pg_install")
|
||||||
};
|
};
|
||||||
|
|
||||||
let pg_install_abs = std::fs::canonicalize(pg_install_dir)?;
|
let pg_install_abs = std::fs::canonicalize(pg_install_dir)?;
|
||||||
let walproposer_lib_dir = pg_install_abs.join("build/walproposer-lib");
|
let walproposer_lib_dir = root_path.join("build/walproposer-lib");
|
||||||
let walproposer_lib_search_str = walproposer_lib_dir
|
let walproposer_lib_search_str = walproposer_lib_dir
|
||||||
.to_str()
|
.to_str()
|
||||||
.ok_or(anyhow!("Bad non-UTF path"))?;
|
.ok_or(anyhow!("Bad non-UTF path"))?;
|
||||||
|
|
||||||
let pgxn_neon = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../pgxn/neon");
|
let pgxn_neon = root_path.join("pgxn/neon");
|
||||||
let pgxn_neon = std::fs::canonicalize(pgxn_neon)?;
|
let pgxn_neon = std::fs::canonicalize(pgxn_neon)?;
|
||||||
let pgxn_neon = pgxn_neon.to_str().ok_or(anyhow!("Bad non-UTF path"))?;
|
let pgxn_neon = pgxn_neon.to_str().ok_or(anyhow!("Bad non-UTF path"))?;
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,9 @@ testing = ["fail/failpoints", "pageserver_api/testing", "wal_decoder/testing", "
|
|||||||
|
|
||||||
fuzz-read-path = ["testing"]
|
fuzz-read-path = ["testing"]
|
||||||
|
|
||||||
|
# Enables benchmarking only APIs
|
||||||
|
benchmarking = []
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
arc-swap.workspace = true
|
arc-swap.workspace = true
|
||||||
@@ -127,6 +130,7 @@ harness = false
|
|||||||
[[bench]]
|
[[bench]]
|
||||||
name = "bench_ingest"
|
name = "bench_ingest"
|
||||||
harness = false
|
harness = false
|
||||||
|
required-features = ["benchmarking"]
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
name = "upload_queue"
|
name = "upload_queue"
|
||||||
|
|||||||
@@ -1,22 +1,29 @@
|
|||||||
use std::env;
|
use std::env;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
use criterion::{Criterion, criterion_group, criterion_main};
|
use criterion::{Criterion, criterion_group, criterion_main};
|
||||||
|
use futures::stream::FuturesUnordered;
|
||||||
use pageserver::config::PageServerConf;
|
use pageserver::config::PageServerConf;
|
||||||
use pageserver::context::{DownloadBehavior, RequestContext};
|
use pageserver::context::{DownloadBehavior, RequestContext};
|
||||||
|
use pageserver::keyspace::KeySpace;
|
||||||
use pageserver::l0_flush::{L0FlushConfig, L0FlushGlobalState};
|
use pageserver::l0_flush::{L0FlushConfig, L0FlushGlobalState};
|
||||||
use pageserver::task_mgr::TaskKind;
|
use pageserver::task_mgr::TaskKind;
|
||||||
use pageserver::tenant::storage_layer::InMemoryLayer;
|
use pageserver::tenant::storage_layer::IoConcurrency;
|
||||||
|
use pageserver::tenant::storage_layer::{InMemoryLayer, ValuesReconstructState};
|
||||||
use pageserver::{page_cache, virtual_file};
|
use pageserver::{page_cache, virtual_file};
|
||||||
|
use pageserver_api::config::GetVectoredConcurrentIo;
|
||||||
use pageserver_api::key::Key;
|
use pageserver_api::key::Key;
|
||||||
use pageserver_api::models::virtual_file::IoMode;
|
use pageserver_api::models::virtual_file::IoMode;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use strum::IntoEnumIterator;
|
use tokio_stream::StreamExt;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use utils::bin_ser::BeSer;
|
use utils::bin_ser::BeSer;
|
||||||
use utils::id::{TenantId, TimelineId};
|
use utils::id::{TenantId, TimelineId};
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
use utils::sync::gate::Gate;
|
||||||
use wal_decoder::models::value::Value;
|
use wal_decoder::models::value::Value;
|
||||||
use wal_decoder::serialized_batch::SerializedValueBatch;
|
use wal_decoder::serialized_batch::SerializedValueBatch;
|
||||||
|
|
||||||
@@ -30,7 +37,7 @@ fn murmurhash32(mut h: u32) -> u32 {
|
|||||||
h
|
h
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(serde::Serialize, Clone, Copy, Debug)]
|
#[derive(serde::Serialize, Clone, Copy, Debug, PartialEq)]
|
||||||
enum KeyLayout {
|
enum KeyLayout {
|
||||||
/// Sequential unique keys
|
/// Sequential unique keys
|
||||||
Sequential,
|
Sequential,
|
||||||
@@ -40,19 +47,30 @@ enum KeyLayout {
|
|||||||
RandomReuse(u32),
|
RandomReuse(u32),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(serde::Serialize, Clone, Copy, Debug)]
|
#[derive(serde::Serialize, Clone, Copy, Debug, PartialEq)]
|
||||||
enum WriteDelta {
|
enum WriteDelta {
|
||||||
Yes,
|
Yes,
|
||||||
No,
|
No,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Serialize, Clone, Copy, Debug, PartialEq)]
|
||||||
|
enum ConcurrentReads {
|
||||||
|
Yes,
|
||||||
|
No,
|
||||||
|
}
|
||||||
|
|
||||||
async fn ingest(
|
async fn ingest(
|
||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
put_size: usize,
|
put_size: usize,
|
||||||
put_count: usize,
|
put_count: usize,
|
||||||
key_layout: KeyLayout,
|
key_layout: KeyLayout,
|
||||||
write_delta: WriteDelta,
|
write_delta: WriteDelta,
|
||||||
|
concurrent_reads: ConcurrentReads,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
|
if concurrent_reads == ConcurrentReads::Yes {
|
||||||
|
assert_eq!(key_layout, KeyLayout::Sequential);
|
||||||
|
}
|
||||||
|
|
||||||
let mut lsn = utils::lsn::Lsn(1000);
|
let mut lsn = utils::lsn::Lsn(1000);
|
||||||
let mut key = Key::from_i128(0x0);
|
let mut key = Key::from_i128(0x0);
|
||||||
|
|
||||||
@@ -68,16 +86,18 @@ async fn ingest(
|
|||||||
let gate = utils::sync::gate::Gate::default();
|
let gate = utils::sync::gate::Gate::default();
|
||||||
let cancel = CancellationToken::new();
|
let cancel = CancellationToken::new();
|
||||||
|
|
||||||
let layer = InMemoryLayer::create(
|
let layer = Arc::new(
|
||||||
conf,
|
InMemoryLayer::create(
|
||||||
timeline_id,
|
conf,
|
||||||
tenant_shard_id,
|
timeline_id,
|
||||||
lsn,
|
tenant_shard_id,
|
||||||
&gate,
|
lsn,
|
||||||
&cancel,
|
&gate,
|
||||||
&ctx,
|
&cancel,
|
||||||
)
|
&ctx,
|
||||||
.await?;
|
)
|
||||||
|
.await?,
|
||||||
|
);
|
||||||
|
|
||||||
let data = Value::Image(Bytes::from(vec![0u8; put_size]));
|
let data = Value::Image(Bytes::from(vec![0u8; put_size]));
|
||||||
let data_ser_size = data.serialized_size().unwrap() as usize;
|
let data_ser_size = data.serialized_size().unwrap() as usize;
|
||||||
@@ -86,6 +106,61 @@ async fn ingest(
|
|||||||
pageserver::context::DownloadBehavior::Download,
|
pageserver::context::DownloadBehavior::Download,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const READ_BATCH_SIZE: u32 = 32;
|
||||||
|
let (tx, mut rx) = tokio::sync::watch::channel::<Option<Key>>(None);
|
||||||
|
let reader_cancel = CancellationToken::new();
|
||||||
|
let reader_handle = if concurrent_reads == ConcurrentReads::Yes {
|
||||||
|
Some(tokio::task::spawn({
|
||||||
|
let cancel = reader_cancel.clone();
|
||||||
|
let layer = layer.clone();
|
||||||
|
let ctx = ctx.attached_child();
|
||||||
|
async move {
|
||||||
|
let gate = Gate::default();
|
||||||
|
let gate_guard = gate.enter().unwrap();
|
||||||
|
let io_concurrency = IoConcurrency::spawn_from_conf(
|
||||||
|
GetVectoredConcurrentIo::SidecarTask,
|
||||||
|
gate_guard,
|
||||||
|
);
|
||||||
|
|
||||||
|
rx.wait_for(|key| key.is_some()).await.unwrap();
|
||||||
|
|
||||||
|
while !cancel.is_cancelled() {
|
||||||
|
let key = match *rx.borrow() {
|
||||||
|
Some(some) => some,
|
||||||
|
None => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut start_key = key;
|
||||||
|
start_key.field6 = key.field6.saturating_sub(READ_BATCH_SIZE);
|
||||||
|
let key_range = start_key..key.next();
|
||||||
|
|
||||||
|
let mut reconstruct_state = ValuesReconstructState::new(io_concurrency.clone());
|
||||||
|
|
||||||
|
layer
|
||||||
|
.get_values_reconstruct_data(
|
||||||
|
KeySpace::single(key_range),
|
||||||
|
Lsn(1)..Lsn(u64::MAX),
|
||||||
|
&mut reconstruct_state,
|
||||||
|
&ctx,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let mut collect_futs = std::mem::take(&mut reconstruct_state.keys)
|
||||||
|
.into_values()
|
||||||
|
.map(|state| state.sink_pending_ios())
|
||||||
|
.collect::<FuturesUnordered<_>>();
|
||||||
|
while collect_futs.next().await.is_some() {}
|
||||||
|
}
|
||||||
|
|
||||||
|
drop(io_concurrency);
|
||||||
|
gate.close().await;
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
const BATCH_SIZE: usize = 16;
|
const BATCH_SIZE: usize = 16;
|
||||||
let mut batch = Vec::new();
|
let mut batch = Vec::new();
|
||||||
|
|
||||||
@@ -113,19 +188,27 @@ async fn ingest(
|
|||||||
|
|
||||||
batch.push((key.to_compact(), lsn, data_ser_size, data.clone()));
|
batch.push((key.to_compact(), lsn, data_ser_size, data.clone()));
|
||||||
if batch.len() >= BATCH_SIZE {
|
if batch.len() >= BATCH_SIZE {
|
||||||
|
let last_key = Key::from_compact(batch.last().unwrap().0);
|
||||||
|
|
||||||
let this_batch = std::mem::take(&mut batch);
|
let this_batch = std::mem::take(&mut batch);
|
||||||
let serialized = SerializedValueBatch::from_values(this_batch);
|
let serialized = SerializedValueBatch::from_values(this_batch);
|
||||||
layer.put_batch(serialized, &ctx).await?;
|
layer.put_batch(serialized, &ctx).await?;
|
||||||
|
|
||||||
|
tx.send(Some(last_key)).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !batch.is_empty() {
|
if !batch.is_empty() {
|
||||||
|
let last_key = Key::from_compact(batch.last().unwrap().0);
|
||||||
|
|
||||||
let this_batch = std::mem::take(&mut batch);
|
let this_batch = std::mem::take(&mut batch);
|
||||||
let serialized = SerializedValueBatch::from_values(this_batch);
|
let serialized = SerializedValueBatch::from_values(this_batch);
|
||||||
layer.put_batch(serialized, &ctx).await?;
|
layer.put_batch(serialized, &ctx).await?;
|
||||||
|
|
||||||
|
tx.send(Some(last_key)).unwrap();
|
||||||
}
|
}
|
||||||
layer.freeze(lsn + 1).await;
|
layer.freeze(lsn + 1).await;
|
||||||
|
|
||||||
if matches!(write_delta, WriteDelta::Yes) {
|
if write_delta == WriteDelta::Yes {
|
||||||
let l0_flush_state = L0FlushGlobalState::new(L0FlushConfig::Direct {
|
let l0_flush_state = L0FlushGlobalState::new(L0FlushConfig::Direct {
|
||||||
max_concurrency: NonZeroUsize::new(1).unwrap(),
|
max_concurrency: NonZeroUsize::new(1).unwrap(),
|
||||||
});
|
});
|
||||||
@@ -136,6 +219,11 @@ async fn ingest(
|
|||||||
tokio::fs::remove_file(path).await?;
|
tokio::fs::remove_file(path).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reader_cancel.cancel();
|
||||||
|
if let Some(handle) = reader_handle {
|
||||||
|
handle.await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -147,6 +235,7 @@ fn ingest_main(
|
|||||||
put_count: usize,
|
put_count: usize,
|
||||||
key_layout: KeyLayout,
|
key_layout: KeyLayout,
|
||||||
write_delta: WriteDelta,
|
write_delta: WriteDelta,
|
||||||
|
concurrent_reads: ConcurrentReads,
|
||||||
) {
|
) {
|
||||||
pageserver::virtual_file::set_io_mode(io_mode);
|
pageserver::virtual_file::set_io_mode(io_mode);
|
||||||
|
|
||||||
@@ -156,7 +245,15 @@ fn ingest_main(
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
runtime.block_on(async move {
|
runtime.block_on(async move {
|
||||||
let r = ingest(conf, put_size, put_count, key_layout, write_delta).await;
|
let r = ingest(
|
||||||
|
conf,
|
||||||
|
put_size,
|
||||||
|
put_count,
|
||||||
|
key_layout,
|
||||||
|
write_delta,
|
||||||
|
concurrent_reads,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
if let Err(e) = r {
|
if let Err(e) = r {
|
||||||
panic!("{e:?}");
|
panic!("{e:?}");
|
||||||
}
|
}
|
||||||
@@ -195,6 +292,7 @@ fn criterion_benchmark(c: &mut Criterion) {
|
|||||||
key_size: usize,
|
key_size: usize,
|
||||||
key_layout: KeyLayout,
|
key_layout: KeyLayout,
|
||||||
write_delta: WriteDelta,
|
write_delta: WriteDelta,
|
||||||
|
concurrent_reads: ConcurrentReads,
|
||||||
}
|
}
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct HandPickedParameters {
|
struct HandPickedParameters {
|
||||||
@@ -245,7 +343,7 @@ fn criterion_benchmark(c: &mut Criterion) {
|
|||||||
];
|
];
|
||||||
let exploded_parameters = {
|
let exploded_parameters = {
|
||||||
let mut out = Vec::new();
|
let mut out = Vec::new();
|
||||||
for io_mode in IoMode::iter() {
|
for concurrent_reads in [ConcurrentReads::Yes, ConcurrentReads::No] {
|
||||||
for param in expect.clone() {
|
for param in expect.clone() {
|
||||||
let HandPickedParameters {
|
let HandPickedParameters {
|
||||||
volume_mib,
|
volume_mib,
|
||||||
@@ -253,12 +351,18 @@ fn criterion_benchmark(c: &mut Criterion) {
|
|||||||
key_layout,
|
key_layout,
|
||||||
write_delta,
|
write_delta,
|
||||||
} = param;
|
} = param;
|
||||||
|
|
||||||
|
if key_layout != KeyLayout::Sequential && concurrent_reads == ConcurrentReads::Yes {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
out.push(ExplodedParameters {
|
out.push(ExplodedParameters {
|
||||||
io_mode,
|
io_mode: IoMode::DirectRw,
|
||||||
volume_mib,
|
volume_mib,
|
||||||
key_size,
|
key_size,
|
||||||
key_layout,
|
key_layout,
|
||||||
write_delta,
|
write_delta,
|
||||||
|
concurrent_reads,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -272,9 +376,10 @@ fn criterion_benchmark(c: &mut Criterion) {
|
|||||||
key_size,
|
key_size,
|
||||||
key_layout,
|
key_layout,
|
||||||
write_delta,
|
write_delta,
|
||||||
|
concurrent_reads,
|
||||||
} = self;
|
} = self;
|
||||||
format!(
|
format!(
|
||||||
"io_mode={io_mode:?} volume_mib={volume_mib:?} key_size_bytes={key_size:?} key_layout={key_layout:?} write_delta={write_delta:?}"
|
"io_mode={io_mode:?} volume_mib={volume_mib:?} key_size_bytes={key_size:?} key_layout={key_layout:?} write_delta={write_delta:?} concurrent_reads={concurrent_reads:?}"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -287,12 +392,23 @@ fn criterion_benchmark(c: &mut Criterion) {
|
|||||||
key_size,
|
key_size,
|
||||||
key_layout,
|
key_layout,
|
||||||
write_delta,
|
write_delta,
|
||||||
|
concurrent_reads,
|
||||||
} = params;
|
} = params;
|
||||||
let put_count = volume_mib * 1024 * 1024 / key_size;
|
let put_count = volume_mib * 1024 * 1024 / key_size;
|
||||||
group.throughput(criterion::Throughput::Bytes((key_size * put_count) as u64));
|
group.throughput(criterion::Throughput::Bytes((key_size * put_count) as u64));
|
||||||
group.sample_size(10);
|
group.sample_size(10);
|
||||||
group.bench_function(id, |b| {
|
group.bench_function(id, |b| {
|
||||||
b.iter(|| ingest_main(conf, io_mode, key_size, put_count, key_layout, write_delta))
|
b.iter(|| {
|
||||||
|
ingest_main(
|
||||||
|
conf,
|
||||||
|
io_mode,
|
||||||
|
key_size,
|
||||||
|
put_count,
|
||||||
|
key_layout,
|
||||||
|
write_delta,
|
||||||
|
concurrent_reads,
|
||||||
|
)
|
||||||
|
})
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ futures.workspace = true
|
|||||||
pageserver_api.workspace = true
|
pageserver_api.workspace = true
|
||||||
postgres_ffi.workspace = true
|
postgres_ffi.workspace = true
|
||||||
prost.workspace = true
|
prost.workspace = true
|
||||||
|
strum.workspace = true
|
||||||
|
strum_macros.workspace = true
|
||||||
thiserror.workspace = true
|
thiserror.workspace = true
|
||||||
tokio.workspace = true
|
tokio.workspace = true
|
||||||
tonic.workspace = true
|
tonic.workspace = true
|
||||||
|
|||||||
@@ -121,7 +121,7 @@ impl Client {
|
|||||||
pub async fn get_base_backup(
|
pub async fn get_base_backup(
|
||||||
&mut self,
|
&mut self,
|
||||||
req: model::GetBaseBackupRequest,
|
req: model::GetBaseBackupRequest,
|
||||||
) -> Result<impl Stream<Item = Result<Bytes, tonic::Status>>, tonic::Status> {
|
) -> Result<impl Stream<Item = Result<Bytes, tonic::Status>> + 'static, tonic::Status> {
|
||||||
let proto_req = proto::GetBaseBackupRequest::from(req);
|
let proto_req = proto::GetBaseBackupRequest::from(req);
|
||||||
|
|
||||||
let response_stream: Streaming<proto::GetBaseBackupResponseChunk> =
|
let response_stream: Streaming<proto::GetBaseBackupResponseChunk> =
|
||||||
|
|||||||
@@ -459,7 +459,7 @@ impl GetPageResponse {
|
|||||||
/// These are effectively equivalent to gRPC statuses. However, we use a bidirectional stream
|
/// These are effectively equivalent to gRPC statuses. However, we use a bidirectional stream
|
||||||
/// (potentially shared by many backends), and a gRPC status response would terminate the stream so
|
/// (potentially shared by many backends), and a gRPC status response would terminate the stream so
|
||||||
/// we send GetPageResponse messages with these codes instead.
|
/// we send GetPageResponse messages with these codes instead.
|
||||||
#[derive(Clone, Copy, Debug)]
|
#[derive(Clone, Copy, Debug, PartialEq, strum_macros::Display)]
|
||||||
pub enum GetPageStatusCode {
|
pub enum GetPageStatusCode {
|
||||||
/// Unknown status. For forwards compatibility: used when an older client version receives a new
|
/// Unknown status. For forwards compatibility: used when an older client version receives a new
|
||||||
/// status code from a newer server version.
|
/// status code from a newer server version.
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ tokio.workspace = true
|
|||||||
tokio-stream.workspace = true
|
tokio-stream.workspace = true
|
||||||
tokio-util.workspace = true
|
tokio-util.workspace = true
|
||||||
tonic.workspace = true
|
tonic.workspace = true
|
||||||
|
url.workspace = true
|
||||||
|
|
||||||
pageserver_client.workspace = true
|
pageserver_client.workspace = true
|
||||||
pageserver_api.workspace = true
|
pageserver_api.workspace = true
|
||||||
|
|||||||
@@ -1,20 +1,29 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
|
use std::pin::Pin;
|
||||||
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::anyhow;
|
||||||
|
use futures::TryStreamExt as _;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use pageserver_client::mgmt_api::ForceAwaitLogicalSize;
|
use pageserver_client::mgmt_api::ForceAwaitLogicalSize;
|
||||||
use pageserver_client::page_service::BasebackupRequest;
|
use pageserver_client::page_service::BasebackupRequest;
|
||||||
|
use pageserver_page_api as page_api;
|
||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
|
use tokio::io::AsyncRead;
|
||||||
use tokio::sync::Barrier;
|
use tokio::sync::Barrier;
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
|
use tokio_util::compat::{TokioAsyncReadCompatExt as _, TokioAsyncWriteCompatExt as _};
|
||||||
|
use tokio_util::io::StreamReader;
|
||||||
|
use tonic::async_trait;
|
||||||
use tracing::{info, instrument};
|
use tracing::{info, instrument};
|
||||||
|
use url::Url;
|
||||||
use utils::id::TenantTimelineId;
|
use utils::id::TenantTimelineId;
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
use utils::shard::ShardIndex;
|
||||||
|
|
||||||
use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
|
use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
|
||||||
use crate::util::{request_stats, tokio_thread_local_stats};
|
use crate::util::{request_stats, tokio_thread_local_stats};
|
||||||
@@ -24,14 +33,15 @@ use crate::util::{request_stats, tokio_thread_local_stats};
|
|||||||
pub(crate) struct Args {
|
pub(crate) struct Args {
|
||||||
#[clap(long, default_value = "http://localhost:9898")]
|
#[clap(long, default_value = "http://localhost:9898")]
|
||||||
mgmt_api_endpoint: String,
|
mgmt_api_endpoint: String,
|
||||||
#[clap(long, default_value = "postgres://postgres@localhost:64000")]
|
/// The Pageserver to connect to. Use postgresql:// for libpq, or grpc:// for gRPC.
|
||||||
|
#[clap(long, default_value = "postgresql://postgres@localhost:64000")]
|
||||||
page_service_connstring: String,
|
page_service_connstring: String,
|
||||||
#[clap(long)]
|
#[clap(long)]
|
||||||
pageserver_jwt: Option<String>,
|
pageserver_jwt: Option<String>,
|
||||||
#[clap(long, default_value = "1")]
|
#[clap(long, default_value = "1")]
|
||||||
num_clients: NonZeroUsize,
|
num_clients: NonZeroUsize,
|
||||||
#[clap(long, default_value = "1.0")]
|
#[clap(long)]
|
||||||
gzip_probability: f64,
|
no_compression: bool,
|
||||||
#[clap(long)]
|
#[clap(long)]
|
||||||
runtime: Option<humantime::Duration>,
|
runtime: Option<humantime::Duration>,
|
||||||
#[clap(long)]
|
#[clap(long)]
|
||||||
@@ -146,12 +156,27 @@ async fn main_impl(
|
|||||||
|
|
||||||
let mut work_senders = HashMap::new();
|
let mut work_senders = HashMap::new();
|
||||||
let mut tasks = Vec::new();
|
let mut tasks = Vec::new();
|
||||||
for tl in &timelines {
|
let scheme = match Url::parse(&args.page_service_connstring) {
|
||||||
|
Ok(url) => url.scheme().to_lowercase().to_string(),
|
||||||
|
Err(url::ParseError::RelativeUrlWithoutBase) => "postgresql".to_string(),
|
||||||
|
Err(err) => return Err(anyhow!("invalid connstring: {err}")),
|
||||||
|
};
|
||||||
|
for &tl in &timelines {
|
||||||
let (sender, receiver) = tokio::sync::mpsc::channel(1); // TODO: not sure what the implications of this are
|
let (sender, receiver) = tokio::sync::mpsc::channel(1); // TODO: not sure what the implications of this are
|
||||||
work_senders.insert(tl, sender);
|
work_senders.insert(tl, sender);
|
||||||
tasks.push(tokio::spawn(client(
|
|
||||||
args,
|
let client: Box<dyn Client> = match scheme.as_str() {
|
||||||
*tl,
|
"postgresql" | "postgres" => Box::new(
|
||||||
|
LibpqClient::new(&args.page_service_connstring, tl, !args.no_compression).await?,
|
||||||
|
),
|
||||||
|
"grpc" => Box::new(
|
||||||
|
GrpcClient::new(&args.page_service_connstring, tl, !args.no_compression).await?,
|
||||||
|
),
|
||||||
|
scheme => return Err(anyhow!("invalid scheme {scheme}")),
|
||||||
|
};
|
||||||
|
|
||||||
|
tasks.push(tokio::spawn(run_worker(
|
||||||
|
client,
|
||||||
Arc::clone(&start_work_barrier),
|
Arc::clone(&start_work_barrier),
|
||||||
receiver,
|
receiver,
|
||||||
Arc::clone(&all_work_done_barrier),
|
Arc::clone(&all_work_done_barrier),
|
||||||
@@ -166,13 +191,7 @@ async fn main_impl(
|
|||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
let target = all_targets.choose(&mut rng).unwrap();
|
let target = all_targets.choose(&mut rng).unwrap();
|
||||||
let lsn = target.lsn_range.clone().map(|r| rng.gen_range(r));
|
let lsn = target.lsn_range.clone().map(|r| rng.gen_range(r));
|
||||||
(
|
(target.timeline, Work { lsn })
|
||||||
target.timeline,
|
|
||||||
Work {
|
|
||||||
lsn,
|
|
||||||
gzip: rng.gen_bool(args.gzip_probability),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
};
|
};
|
||||||
let sender = work_senders.get(&timeline).unwrap();
|
let sender = work_senders.get(&timeline).unwrap();
|
||||||
// TODO: what if this blocks?
|
// TODO: what if this blocks?
|
||||||
@@ -216,13 +235,11 @@ async fn main_impl(
|
|||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
struct Work {
|
struct Work {
|
||||||
lsn: Option<Lsn>,
|
lsn: Option<Lsn>,
|
||||||
gzip: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
async fn client(
|
async fn run_worker(
|
||||||
args: &'static Args,
|
mut client: Box<dyn Client>,
|
||||||
timeline: TenantTimelineId,
|
|
||||||
start_work_barrier: Arc<Barrier>,
|
start_work_barrier: Arc<Barrier>,
|
||||||
mut work: tokio::sync::mpsc::Receiver<Work>,
|
mut work: tokio::sync::mpsc::Receiver<Work>,
|
||||||
all_work_done_barrier: Arc<Barrier>,
|
all_work_done_barrier: Arc<Barrier>,
|
||||||
@@ -230,37 +247,14 @@ async fn client(
|
|||||||
) {
|
) {
|
||||||
start_work_barrier.wait().await;
|
start_work_barrier.wait().await;
|
||||||
|
|
||||||
let client = pageserver_client::page_service::Client::new(args.page_service_connstring.clone())
|
while let Some(Work { lsn }) = work.recv().await {
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
while let Some(Work { lsn, gzip }) = work.recv().await {
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
let copy_out_stream = client
|
let stream = client.basebackup(lsn).await.unwrap();
|
||||||
.basebackup(&BasebackupRequest {
|
|
||||||
tenant_id: timeline.tenant_id,
|
|
||||||
timeline_id: timeline.timeline_id,
|
|
||||||
lsn,
|
|
||||||
gzip,
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.with_context(|| format!("start basebackup for {timeline}"))
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
use futures::StreamExt;
|
let size = futures::io::copy(stream.compat(), &mut tokio::io::sink().compat_write())
|
||||||
let size = Arc::new(AtomicUsize::new(0));
|
.await
|
||||||
copy_out_stream
|
.unwrap();
|
||||||
.for_each({
|
info!("basebackup size is {size} bytes");
|
||||||
|r| {
|
|
||||||
let size = Arc::clone(&size);
|
|
||||||
async move {
|
|
||||||
let size = Arc::clone(&size);
|
|
||||||
size.fetch_add(r.unwrap().len(), Ordering::Relaxed);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
info!("basebackup size is {} bytes", size.load(Ordering::Relaxed));
|
|
||||||
let elapsed = start.elapsed();
|
let elapsed = start.elapsed();
|
||||||
live_stats.inc();
|
live_stats.inc();
|
||||||
STATS.with(|stats| {
|
STATS.with(|stats| {
|
||||||
@@ -270,3 +264,94 @@ async fn client(
|
|||||||
|
|
||||||
all_work_done_barrier.wait().await;
|
all_work_done_barrier.wait().await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A basebackup client. This allows switching out the client protocol implementation.
|
||||||
|
#[async_trait]
|
||||||
|
trait Client: Send {
|
||||||
|
async fn basebackup(
|
||||||
|
&mut self,
|
||||||
|
lsn: Option<Lsn>,
|
||||||
|
) -> anyhow::Result<Pin<Box<dyn AsyncRead + Send>>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A libpq-based Pageserver client.
|
||||||
|
struct LibpqClient {
|
||||||
|
inner: pageserver_client::page_service::Client,
|
||||||
|
ttid: TenantTimelineId,
|
||||||
|
compression: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LibpqClient {
|
||||||
|
async fn new(
|
||||||
|
connstring: &str,
|
||||||
|
ttid: TenantTimelineId,
|
||||||
|
compression: bool,
|
||||||
|
) -> anyhow::Result<Self> {
|
||||||
|
Ok(Self {
|
||||||
|
inner: pageserver_client::page_service::Client::new(connstring.to_string()).await?,
|
||||||
|
ttid,
|
||||||
|
compression,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Client for LibpqClient {
|
||||||
|
async fn basebackup(
|
||||||
|
&mut self,
|
||||||
|
lsn: Option<Lsn>,
|
||||||
|
) -> anyhow::Result<Pin<Box<dyn AsyncRead + Send + 'static>>> {
|
||||||
|
let req = BasebackupRequest {
|
||||||
|
tenant_id: self.ttid.tenant_id,
|
||||||
|
timeline_id: self.ttid.timeline_id,
|
||||||
|
lsn,
|
||||||
|
gzip: self.compression,
|
||||||
|
};
|
||||||
|
let stream = self.inner.basebackup(&req).await?;
|
||||||
|
Ok(Box::pin(StreamReader::new(
|
||||||
|
stream.map_err(std::io::Error::other),
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A gRPC Pageserver client.
|
||||||
|
struct GrpcClient {
|
||||||
|
inner: page_api::Client,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GrpcClient {
|
||||||
|
async fn new(
|
||||||
|
connstring: &str,
|
||||||
|
ttid: TenantTimelineId,
|
||||||
|
compression: bool,
|
||||||
|
) -> anyhow::Result<Self> {
|
||||||
|
let inner = page_api::Client::new(
|
||||||
|
connstring.to_string(),
|
||||||
|
ttid.tenant_id,
|
||||||
|
ttid.timeline_id,
|
||||||
|
ShardIndex::unsharded(),
|
||||||
|
None,
|
||||||
|
compression.then_some(tonic::codec::CompressionEncoding::Zstd),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(Self { inner })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Client for GrpcClient {
|
||||||
|
async fn basebackup(
|
||||||
|
&mut self,
|
||||||
|
lsn: Option<Lsn>,
|
||||||
|
) -> anyhow::Result<Pin<Box<dyn AsyncRead + Send + 'static>>> {
|
||||||
|
let req = page_api::GetBaseBackupRequest {
|
||||||
|
lsn,
|
||||||
|
replica: false,
|
||||||
|
full: false,
|
||||||
|
};
|
||||||
|
let stream = self.inner.get_base_backup(req).await?;
|
||||||
|
Ok(Box::pin(StreamReader::new(
|
||||||
|
stream.map_err(std::io::Error::other),
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -10,33 +10,31 @@ use anyhow::Context;
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
|
use futures::{Stream, StreamExt as _};
|
||||||
use pageserver_api::key::Key;
|
use pageserver_api::key::Key;
|
||||||
use pageserver_api::keyspace::KeySpaceAccum;
|
use pageserver_api::keyspace::KeySpaceAccum;
|
||||||
use pageserver_api::pagestream_api::{PagestreamGetPageRequest, PagestreamRequest};
|
use pageserver_api::pagestream_api::{PagestreamGetPageRequest, PagestreamRequest};
|
||||||
use pageserver_api::reltag::RelTag;
|
use pageserver_api::reltag::RelTag;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use pageserver_page_api::proto;
|
use pageserver_page_api as page_api;
|
||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
use url::Url;
|
||||||
use utils::id::TenantTimelineId;
|
use utils::id::TenantTimelineId;
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
use utils::shard::ShardIndex;
|
||||||
|
|
||||||
use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
|
use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
|
||||||
use crate::util::{request_stats, tokio_thread_local_stats};
|
use crate::util::{request_stats, tokio_thread_local_stats};
|
||||||
|
|
||||||
#[derive(clap::ValueEnum, Clone, Debug)]
|
|
||||||
enum Protocol {
|
|
||||||
Libpq,
|
|
||||||
Grpc,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
|
/// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
|
||||||
#[derive(clap::Parser)]
|
#[derive(clap::Parser)]
|
||||||
pub(crate) struct Args {
|
pub(crate) struct Args {
|
||||||
#[clap(long, default_value = "http://localhost:9898")]
|
#[clap(long, default_value = "http://localhost:9898")]
|
||||||
mgmt_api_endpoint: String,
|
mgmt_api_endpoint: String,
|
||||||
|
/// Pageserver connection string. Supports postgresql:// and grpc:// protocols.
|
||||||
#[clap(long, default_value = "postgres://postgres@localhost:64000")]
|
#[clap(long, default_value = "postgres://postgres@localhost:64000")]
|
||||||
page_service_connstring: String,
|
page_service_connstring: String,
|
||||||
#[clap(long)]
|
#[clap(long)]
|
||||||
@@ -45,8 +43,9 @@ pub(crate) struct Args {
|
|||||||
num_clients: NonZeroUsize,
|
num_clients: NonZeroUsize,
|
||||||
#[clap(long)]
|
#[clap(long)]
|
||||||
runtime: Option<humantime::Duration>,
|
runtime: Option<humantime::Duration>,
|
||||||
#[clap(long, value_enum, default_value = "libpq")]
|
/// If true, enable compression (only for gRPC).
|
||||||
protocol: Protocol,
|
#[clap(long)]
|
||||||
|
compression: bool,
|
||||||
/// Each client sends requests at the given rate.
|
/// Each client sends requests at the given rate.
|
||||||
///
|
///
|
||||||
/// If a request takes too long and we should be issuing a new request already,
|
/// If a request takes too long and we should be issuing a new request already,
|
||||||
@@ -325,18 +324,32 @@ async fn main_impl(
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
Box::pin(async move {
|
Box::pin(async move {
|
||||||
let client: Box<dyn Client> = match args.protocol {
|
let scheme = match Url::parse(&args.page_service_connstring) {
|
||||||
Protocol::Libpq => Box::new(
|
Ok(url) => url.scheme().to_lowercase().to_string(),
|
||||||
LibpqClient::new(args.page_service_connstring.clone(), worker_id.timeline)
|
Err(url::ParseError::RelativeUrlWithoutBase) => "postgresql".to_string(),
|
||||||
.await
|
Err(err) => panic!("invalid connstring: {err}"),
|
||||||
.unwrap(),
|
};
|
||||||
|
let client: Box<dyn Client> = match scheme.as_str() {
|
||||||
|
"postgresql" | "postgres" => {
|
||||||
|
assert!(!args.compression, "libpq does not support compression");
|
||||||
|
Box::new(
|
||||||
|
LibpqClient::new(&args.page_service_connstring, worker_id.timeline)
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
"grpc" => Box::new(
|
||||||
|
GrpcClient::new(
|
||||||
|
&args.page_service_connstring,
|
||||||
|
worker_id.timeline,
|
||||||
|
args.compression,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
),
|
),
|
||||||
|
|
||||||
Protocol::Grpc => Box::new(
|
scheme => panic!("unsupported scheme {scheme}"),
|
||||||
GrpcClient::new(args.page_service_connstring.clone(), worker_id.timeline)
|
|
||||||
.await
|
|
||||||
.unwrap(),
|
|
||||||
),
|
|
||||||
};
|
};
|
||||||
run_worker(args, client, ss, cancel, rps_period, ranges, weights).await
|
run_worker(args, client, ss, cancel, rps_period, ranges, weights).await
|
||||||
})
|
})
|
||||||
@@ -543,8 +556,8 @@ struct LibpqClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl LibpqClient {
|
impl LibpqClient {
|
||||||
async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
|
async fn new(connstring: &str, ttid: TenantTimelineId) -> anyhow::Result<Self> {
|
||||||
let inner = pageserver_client::page_service::Client::new(connstring)
|
let inner = pageserver_client::page_service::Client::new(connstring.to_string())
|
||||||
.await?
|
.await?
|
||||||
.pagestream(ttid.tenant_id, ttid.timeline_id)
|
.pagestream(ttid.tenant_id, ttid.timeline_id)
|
||||||
.await?;
|
.await?;
|
||||||
@@ -600,34 +613,36 @@ impl Client for LibpqClient {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A gRPC client using the raw, no-frills gRPC client.
|
/// A gRPC Pageserver client.
|
||||||
struct GrpcClient {
|
struct GrpcClient {
|
||||||
req_tx: tokio::sync::mpsc::Sender<proto::GetPageRequest>,
|
req_tx: tokio::sync::mpsc::Sender<page_api::GetPageRequest>,
|
||||||
resp_rx: tonic::Streaming<proto::GetPageResponse>,
|
resp_rx: Pin<Box<dyn Stream<Item = Result<page_api::GetPageResponse, tonic::Status>> + Send>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GrpcClient {
|
impl GrpcClient {
|
||||||
async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
|
async fn new(
|
||||||
let mut client = pageserver_page_api::proto::PageServiceClient::connect(connstring).await?;
|
connstring: &str,
|
||||||
|
ttid: TenantTimelineId,
|
||||||
|
compression: bool,
|
||||||
|
) -> anyhow::Result<Self> {
|
||||||
|
let mut client = page_api::Client::new(
|
||||||
|
connstring.to_string(),
|
||||||
|
ttid.tenant_id,
|
||||||
|
ttid.timeline_id,
|
||||||
|
ShardIndex::unsharded(),
|
||||||
|
None,
|
||||||
|
compression.then_some(tonic::codec::CompressionEncoding::Zstd),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
// The channel has a buffer size of 1, since 0 is not allowed. It does not matter, since the
|
// The channel has a buffer size of 1, since 0 is not allowed. It does not matter, since the
|
||||||
// benchmark will control the queue depth (i.e. in-flight requests) anyway, and requests are
|
// benchmark will control the queue depth (i.e. in-flight requests) anyway, and requests are
|
||||||
// buffered by Tonic and the OS too.
|
// buffered by Tonic and the OS too.
|
||||||
let (req_tx, req_rx) = tokio::sync::mpsc::channel(1);
|
let (req_tx, req_rx) = tokio::sync::mpsc::channel(1);
|
||||||
let req_stream = tokio_stream::wrappers::ReceiverStream::new(req_rx);
|
let req_stream = tokio_stream::wrappers::ReceiverStream::new(req_rx);
|
||||||
let mut req = tonic::Request::new(req_stream);
|
let resp_rx = Box::pin(client.get_pages(req_stream).await?);
|
||||||
let metadata = req.metadata_mut();
|
|
||||||
metadata.insert("neon-tenant-id", ttid.tenant_id.to_string().try_into()?);
|
|
||||||
metadata.insert("neon-timeline-id", ttid.timeline_id.to_string().try_into()?);
|
|
||||||
metadata.insert("neon-shard-id", "0000".try_into()?);
|
|
||||||
|
|
||||||
let resp = client.get_pages(req).await?;
|
Ok(Self { req_tx, resp_rx })
|
||||||
let resp_stream = resp.into_inner();
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
req_tx,
|
|
||||||
resp_rx: resp_stream,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -641,27 +656,27 @@ impl Client for GrpcClient {
|
|||||||
rel: RelTag,
|
rel: RelTag,
|
||||||
blks: Vec<u32>,
|
blks: Vec<u32>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let req = proto::GetPageRequest {
|
let req = page_api::GetPageRequest {
|
||||||
request_id: req_id,
|
request_id: req_id,
|
||||||
request_class: proto::GetPageClass::Normal as i32,
|
request_class: page_api::GetPageClass::Normal,
|
||||||
read_lsn: Some(proto::ReadLsn {
|
read_lsn: page_api::ReadLsn {
|
||||||
request_lsn: req_lsn.0,
|
request_lsn: req_lsn,
|
||||||
not_modified_since_lsn: mod_lsn.0,
|
not_modified_since_lsn: Some(mod_lsn),
|
||||||
}),
|
},
|
||||||
rel: Some(rel.into()),
|
rel,
|
||||||
block_number: blks,
|
block_numbers: blks,
|
||||||
};
|
};
|
||||||
self.req_tx.send(req).await?;
|
self.req_tx.send(req).await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn recv_get_page(&mut self) -> anyhow::Result<(u64, Vec<Bytes>)> {
|
async fn recv_get_page(&mut self) -> anyhow::Result<(u64, Vec<Bytes>)> {
|
||||||
let resp = self.resp_rx.message().await?.unwrap();
|
let resp = self.resp_rx.next().await.unwrap().unwrap();
|
||||||
anyhow::ensure!(
|
anyhow::ensure!(
|
||||||
resp.status_code == proto::GetPageStatusCode::Ok as i32,
|
resp.status_code == page_api::GetPageStatusCode::Ok,
|
||||||
"unexpected status code: {}",
|
"unexpected status code: {}",
|
||||||
resp.status_code
|
resp.status_code,
|
||||||
);
|
);
|
||||||
Ok((resp.request_id, resp.page_image))
|
Ok((resp.request_id, resp.page_images))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -583,7 +583,7 @@ fn start_pageserver(
|
|||||||
deletion_queue_client,
|
deletion_queue_client,
|
||||||
l0_flush_global_state,
|
l0_flush_global_state,
|
||||||
basebackup_prepare_sender,
|
basebackup_prepare_sender,
|
||||||
feature_resolver,
|
feature_resolver: feature_resolver.clone(),
|
||||||
},
|
},
|
||||||
shutdown_pageserver.clone(),
|
shutdown_pageserver.clone(),
|
||||||
);
|
);
|
||||||
@@ -715,6 +715,7 @@ fn start_pageserver(
|
|||||||
disk_usage_eviction_state,
|
disk_usage_eviction_state,
|
||||||
deletion_queue.new_client(),
|
deletion_queue.new_client(),
|
||||||
secondary_controller,
|
secondary_controller,
|
||||||
|
feature_resolver,
|
||||||
)
|
)
|
||||||
.context("Failed to initialize router state")?,
|
.context("Failed to initialize router state")?,
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use std::{collections::HashMap, sync::Arc, time::Duration};
|
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||||
|
|
||||||
|
use arc_swap::ArcSwap;
|
||||||
use pageserver_api::config::NodeMetadata;
|
use pageserver_api::config::NodeMetadata;
|
||||||
use posthog_client_lite::{
|
use posthog_client_lite::{
|
||||||
CaptureEvent, FeatureResolverBackgroundLoop, PostHogClientConfig, PostHogEvaluationError,
|
CaptureEvent, FeatureResolverBackgroundLoop, PostHogClientConfig, PostHogEvaluationError,
|
||||||
@@ -12,10 +13,13 @@ use utils::id::TenantId;
|
|||||||
|
|
||||||
use crate::{config::PageServerConf, metrics::FEATURE_FLAG_EVALUATION};
|
use crate::{config::PageServerConf, metrics::FEATURE_FLAG_EVALUATION};
|
||||||
|
|
||||||
|
const DEFAULT_POSTHOG_REFRESH_INTERVAL: Duration = Duration::from_secs(600);
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct FeatureResolver {
|
pub struct FeatureResolver {
|
||||||
inner: Option<Arc<FeatureResolverBackgroundLoop>>,
|
inner: Option<Arc<FeatureResolverBackgroundLoop>>,
|
||||||
internal_properties: Option<Arc<HashMap<String, PostHogFlagFilterPropertyValue>>>,
|
internal_properties: Option<Arc<HashMap<String, PostHogFlagFilterPropertyValue>>>,
|
||||||
|
force_overrides_for_testing: Arc<ArcSwap<HashMap<String, String>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FeatureResolver {
|
impl FeatureResolver {
|
||||||
@@ -23,6 +27,7 @@ impl FeatureResolver {
|
|||||||
Self {
|
Self {
|
||||||
inner: None,
|
inner: None,
|
||||||
internal_properties: None,
|
internal_properties: None,
|
||||||
|
force_overrides_for_testing: Arc::new(ArcSwap::new(Arc::new(HashMap::new()))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -139,18 +144,23 @@ impl FeatureResolver {
|
|||||||
}
|
}
|
||||||
tenants
|
tenants
|
||||||
};
|
};
|
||||||
// TODO: make refresh period configurable
|
inner.clone().spawn(
|
||||||
inner
|
handle,
|
||||||
.clone()
|
posthog_config
|
||||||
.spawn(handle, Duration::from_secs(60), fake_tenants);
|
.refresh_interval
|
||||||
|
.unwrap_or(DEFAULT_POSTHOG_REFRESH_INTERVAL),
|
||||||
|
fake_tenants,
|
||||||
|
);
|
||||||
Ok(FeatureResolver {
|
Ok(FeatureResolver {
|
||||||
inner: Some(inner),
|
inner: Some(inner),
|
||||||
internal_properties: Some(internal_properties),
|
internal_properties: Some(internal_properties),
|
||||||
|
force_overrides_for_testing: Arc::new(ArcSwap::new(Arc::new(HashMap::new()))),
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
Ok(FeatureResolver {
|
Ok(FeatureResolver {
|
||||||
inner: None,
|
inner: None,
|
||||||
internal_properties: None,
|
internal_properties: None,
|
||||||
|
force_overrides_for_testing: Arc::new(ArcSwap::new(Arc::new(HashMap::new()))),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -190,6 +200,11 @@ impl FeatureResolver {
|
|||||||
flag_key: &str,
|
flag_key: &str,
|
||||||
tenant_id: TenantId,
|
tenant_id: TenantId,
|
||||||
) -> Result<String, PostHogEvaluationError> {
|
) -> Result<String, PostHogEvaluationError> {
|
||||||
|
let force_overrides = self.force_overrides_for_testing.load();
|
||||||
|
if let Some(value) = force_overrides.get(flag_key) {
|
||||||
|
return Ok(value.clone());
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(inner) = &self.inner {
|
if let Some(inner) = &self.inner {
|
||||||
let res = inner.feature_store().evaluate_multivariate(
|
let res = inner.feature_store().evaluate_multivariate(
|
||||||
flag_key,
|
flag_key,
|
||||||
@@ -228,6 +243,15 @@ impl FeatureResolver {
|
|||||||
flag_key: &str,
|
flag_key: &str,
|
||||||
tenant_id: TenantId,
|
tenant_id: TenantId,
|
||||||
) -> Result<(), PostHogEvaluationError> {
|
) -> Result<(), PostHogEvaluationError> {
|
||||||
|
let force_overrides = self.force_overrides_for_testing.load();
|
||||||
|
if let Some(value) = force_overrides.get(flag_key) {
|
||||||
|
return if value == "true" {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(PostHogEvaluationError::NoConditionGroupMatched)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(inner) = &self.inner {
|
if let Some(inner) = &self.inner {
|
||||||
let res = inner.feature_store().evaluate_boolean(
|
let res = inner.feature_store().evaluate_boolean(
|
||||||
flag_key,
|
flag_key,
|
||||||
@@ -259,8 +283,22 @@ impl FeatureResolver {
|
|||||||
inner.feature_store().is_feature_flag_boolean(flag_key)
|
inner.feature_store().is_feature_flag_boolean(flag_key)
|
||||||
} else {
|
} else {
|
||||||
Err(PostHogEvaluationError::NotAvailable(
|
Err(PostHogEvaluationError::NotAvailable(
|
||||||
"PostHog integration is not enabled".to_string(),
|
"PostHog integration is not enabled, cannot auto-determine the flag type"
|
||||||
|
.to_string(),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Force override a feature flag for testing. This is only for testing purposes. Assume the caller only call it
|
||||||
|
/// from a single thread so it won't race.
|
||||||
|
pub fn force_override_for_testing(&self, flag_key: &str, value: Option<&str>) {
|
||||||
|
let mut force_overrides = self.force_overrides_for_testing.load().as_ref().clone();
|
||||||
|
if let Some(value) = value {
|
||||||
|
force_overrides.insert(flag_key.to_string(), value.to_string());
|
||||||
|
} else {
|
||||||
|
force_overrides.remove(flag_key);
|
||||||
|
}
|
||||||
|
self.force_overrides_for_testing
|
||||||
|
.store(Arc::new(force_overrides));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ use crate::config::PageServerConf;
|
|||||||
use crate::context;
|
use crate::context;
|
||||||
use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
|
use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
|
||||||
use crate::deletion_queue::DeletionQueueClient;
|
use crate::deletion_queue::DeletionQueueClient;
|
||||||
|
use crate::feature_resolver::FeatureResolver;
|
||||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||||
use crate::task_mgr::TaskKind;
|
use crate::task_mgr::TaskKind;
|
||||||
use crate::tenant::config::LocationConf;
|
use crate::tenant::config::LocationConf;
|
||||||
@@ -107,6 +108,7 @@ pub struct State {
|
|||||||
deletion_queue_client: DeletionQueueClient,
|
deletion_queue_client: DeletionQueueClient,
|
||||||
secondary_controller: SecondaryController,
|
secondary_controller: SecondaryController,
|
||||||
latest_utilization: tokio::sync::Mutex<Option<(std::time::Instant, bytes::Bytes)>>,
|
latest_utilization: tokio::sync::Mutex<Option<(std::time::Instant, bytes::Bytes)>>,
|
||||||
|
feature_resolver: FeatureResolver,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl State {
|
impl State {
|
||||||
@@ -120,6 +122,7 @@ impl State {
|
|||||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||||
deletion_queue_client: DeletionQueueClient,
|
deletion_queue_client: DeletionQueueClient,
|
||||||
secondary_controller: SecondaryController,
|
secondary_controller: SecondaryController,
|
||||||
|
feature_resolver: FeatureResolver,
|
||||||
) -> anyhow::Result<Self> {
|
) -> anyhow::Result<Self> {
|
||||||
let allowlist_routes = &[
|
let allowlist_routes = &[
|
||||||
"/v1/status",
|
"/v1/status",
|
||||||
@@ -140,6 +143,7 @@ impl State {
|
|||||||
deletion_queue_client,
|
deletion_queue_client,
|
||||||
secondary_controller,
|
secondary_controller,
|
||||||
latest_utilization: Default::default(),
|
latest_utilization: Default::default(),
|
||||||
|
feature_resolver,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3675,8 +3679,8 @@ async fn tenant_evaluate_feature_flag(
|
|||||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||||
|
|
||||||
let flag: String = must_parse_query_param(&request, "flag")?;
|
let flag: String = parse_request_param(&request, "flag_key")?;
|
||||||
let as_type: String = must_parse_query_param(&request, "as")?;
|
let as_type: Option<String> = parse_query_param(&request, "as")?;
|
||||||
|
|
||||||
let state = get_state(&request);
|
let state = get_state(&request);
|
||||||
|
|
||||||
@@ -3685,11 +3689,11 @@ async fn tenant_evaluate_feature_flag(
|
|||||||
.tenant_manager
|
.tenant_manager
|
||||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||||
let properties = tenant.feature_resolver.collect_properties(tenant_shard_id.tenant_id);
|
let properties = tenant.feature_resolver.collect_properties(tenant_shard_id.tenant_id);
|
||||||
if as_type == "boolean" {
|
if as_type.as_deref() == Some("boolean") {
|
||||||
let result = tenant.feature_resolver.evaluate_boolean(&flag, tenant_shard_id.tenant_id);
|
let result = tenant.feature_resolver.evaluate_boolean(&flag, tenant_shard_id.tenant_id);
|
||||||
let result = result.map(|_| true).map_err(|e| e.to_string());
|
let result = result.map(|_| true).map_err(|e| e.to_string());
|
||||||
json_response(StatusCode::OK, json!({ "result": result, "properties": properties }))
|
json_response(StatusCode::OK, json!({ "result": result, "properties": properties }))
|
||||||
} else if as_type == "multivariate" {
|
} else if as_type.as_deref() == Some("multivariate") {
|
||||||
let result = tenant.feature_resolver.evaluate_multivariate(&flag, tenant_shard_id.tenant_id).map_err(|e| e.to_string());
|
let result = tenant.feature_resolver.evaluate_multivariate(&flag, tenant_shard_id.tenant_id).map_err(|e| e.to_string());
|
||||||
json_response(StatusCode::OK, json!({ "result": result, "properties": properties }))
|
json_response(StatusCode::OK, json!({ "result": result, "properties": properties }))
|
||||||
} else {
|
} else {
|
||||||
@@ -3709,6 +3713,35 @@ async fn tenant_evaluate_feature_flag(
|
|||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn force_override_feature_flag_for_testing_put(
|
||||||
|
request: Request<Body>,
|
||||||
|
_cancel: CancellationToken,
|
||||||
|
) -> Result<Response<Body>, ApiError> {
|
||||||
|
check_permission(&request, None)?;
|
||||||
|
|
||||||
|
let flag: String = parse_request_param(&request, "flag_key")?;
|
||||||
|
let value: String = must_parse_query_param(&request, "value")?;
|
||||||
|
let state = get_state(&request);
|
||||||
|
state
|
||||||
|
.feature_resolver
|
||||||
|
.force_override_for_testing(&flag, Some(&value));
|
||||||
|
json_response(StatusCode::OK, ())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn force_override_feature_flag_for_testing_delete(
|
||||||
|
request: Request<Body>,
|
||||||
|
_cancel: CancellationToken,
|
||||||
|
) -> Result<Response<Body>, ApiError> {
|
||||||
|
check_permission(&request, None)?;
|
||||||
|
|
||||||
|
let flag: String = parse_request_param(&request, "flag_key")?;
|
||||||
|
let state = get_state(&request);
|
||||||
|
state
|
||||||
|
.feature_resolver
|
||||||
|
.force_override_for_testing(&flag, None);
|
||||||
|
json_response(StatusCode::OK, ())
|
||||||
|
}
|
||||||
|
|
||||||
/// Common functionality of all the HTTP API handlers.
|
/// Common functionality of all the HTTP API handlers.
|
||||||
///
|
///
|
||||||
/// - Adds a tracing span to each request (by `request_span`)
|
/// - Adds a tracing span to each request (by `request_span`)
|
||||||
@@ -4085,8 +4118,14 @@ pub fn make_router(
|
|||||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/activate_post_import",
|
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/activate_post_import",
|
||||||
|r| api_handler(r, activate_post_import_handler),
|
|r| api_handler(r, activate_post_import_handler),
|
||||||
)
|
)
|
||||||
.get("/v1/tenant/:tenant_shard_id/feature_flag", |r| {
|
.get("/v1/tenant/:tenant_shard_id/feature_flag/:flag_key", |r| {
|
||||||
api_handler(r, tenant_evaluate_feature_flag)
|
api_handler(r, tenant_evaluate_feature_flag)
|
||||||
})
|
})
|
||||||
|
.put("/v1/feature_flag/:flag_key", |r| {
|
||||||
|
testing_api_handler("force override feature flag - put", r, force_override_feature_flag_for_testing_put)
|
||||||
|
})
|
||||||
|
.delete("/v1/feature_flag/:flag_key", |r| {
|
||||||
|
testing_api_handler("force override feature flag - delete", r, force_override_feature_flag_for_testing_delete)
|
||||||
|
})
|
||||||
.any(handler_404))
|
.any(handler_404))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3426,7 +3426,7 @@ impl TimelineMetrics {
|
|||||||
pub fn dec_frozen_layer(&self, layer: &InMemoryLayer) {
|
pub fn dec_frozen_layer(&self, layer: &InMemoryLayer) {
|
||||||
assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
|
assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
|
||||||
let labels = self.make_frozen_layer_labels(layer);
|
let labels = self.make_frozen_layer_labels(layer);
|
||||||
let size = layer.try_len().expect("frozen layer should have no writer");
|
let size = layer.len();
|
||||||
TIMELINE_LAYER_COUNT
|
TIMELINE_LAYER_COUNT
|
||||||
.get_metric_with_label_values(&labels)
|
.get_metric_with_label_values(&labels)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
@@ -3441,7 +3441,7 @@ impl TimelineMetrics {
|
|||||||
pub fn inc_frozen_layer(&self, layer: &InMemoryLayer) {
|
pub fn inc_frozen_layer(&self, layer: &InMemoryLayer) {
|
||||||
assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
|
assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
|
||||||
let labels = self.make_frozen_layer_labels(layer);
|
let labels = self.make_frozen_layer_labels(layer);
|
||||||
let size = layer.try_len().expect("frozen layer should have no writer");
|
let size = layer.len();
|
||||||
TIMELINE_LAYER_COUNT
|
TIMELINE_LAYER_COUNT
|
||||||
.get_metric_with_label_values(&labels)
|
.get_metric_with_label_values(&labels)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|||||||
@@ -3544,8 +3544,9 @@ impl proto::PageService for GrpcPageServiceHandler {
|
|||||||
&self,
|
&self,
|
||||||
req: tonic::Request<proto::GetBaseBackupRequest>,
|
req: tonic::Request<proto::GetBaseBackupRequest>,
|
||||||
) -> Result<tonic::Response<Self::GetBaseBackupStream>, tonic::Status> {
|
) -> Result<tonic::Response<Self::GetBaseBackupStream>, tonic::Status> {
|
||||||
// Send 64 KB chunks to avoid large memory allocations.
|
// Send chunks of 256 KB to avoid large memory allocations. pagebench basebackup shows this
|
||||||
const CHUNK_SIZE: usize = 64 * 1024;
|
// to be the sweet spot where throughput is saturated.
|
||||||
|
const CHUNK_SIZE: usize = 256 * 1024;
|
||||||
|
|
||||||
let timeline = self.get_request_timeline(&req).await?;
|
let timeline = self.get_request_timeline(&req).await?;
|
||||||
let ctx = self.ctx.with_scope_timeline(&timeline);
|
let ctx = self.ctx.with_scope_timeline(&timeline);
|
||||||
|
|||||||
@@ -61,8 +61,10 @@ pub(crate) struct LocationConf {
|
|||||||
/// The detailed shard identity. This structure is already scoped within
|
/// The detailed shard identity. This structure is already scoped within
|
||||||
/// a TenantShardId, but we need the full ShardIdentity to enable calculating
|
/// a TenantShardId, but we need the full ShardIdentity to enable calculating
|
||||||
/// key->shard mappings.
|
/// key->shard mappings.
|
||||||
// TODO(vlad): Remove this default once all configs have a shard identity on disk.
|
///
|
||||||
#[serde(default = "ShardIdentity::unsharded")]
|
/// NB: we store this even for unsharded tenants, so that we agree with storcon on the intended
|
||||||
|
/// stripe size. Otherwise, a split request that does not specify a stripe size may use a
|
||||||
|
/// different default than storcon, which can lead to incorrect stripe sizes and corruption.
|
||||||
pub(crate) shard: ShardIdentity,
|
pub(crate) shard: ShardIdentity,
|
||||||
|
|
||||||
/// The pan-cluster tenant configuration, the same on all locations
|
/// The pan-cluster tenant configuration, the same on all locations
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::sync::atomic::AtomicU64;
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
|
|
||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
use num_traits::Num;
|
use num_traits::Num;
|
||||||
@@ -18,6 +18,7 @@ use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
|
|||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
use crate::page_cache;
|
use crate::page_cache;
|
||||||
|
use crate::tenant::storage_layer::inmemory_layer::GlobalResourceUnits;
|
||||||
use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;
|
use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;
|
||||||
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
|
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
|
||||||
use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
|
use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
|
||||||
@@ -30,9 +31,13 @@ pub struct EphemeralFile {
|
|||||||
_tenant_shard_id: TenantShardId,
|
_tenant_shard_id: TenantShardId,
|
||||||
_timeline_id: TimelineId,
|
_timeline_id: TimelineId,
|
||||||
page_cache_file_id: page_cache::FileId,
|
page_cache_file_id: page_cache::FileId,
|
||||||
bytes_written: u64,
|
|
||||||
file: TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter,
|
file: TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter,
|
||||||
buffered_writer: BufferedWriter,
|
|
||||||
|
buffered_writer: tokio::sync::RwLock<BufferedWriter>,
|
||||||
|
|
||||||
|
bytes_written: AtomicU64,
|
||||||
|
|
||||||
|
resource_units: std::sync::Mutex<GlobalResourceUnits>,
|
||||||
}
|
}
|
||||||
|
|
||||||
type BufferedWriter = owned_buffers_io::write::BufferedWriter<
|
type BufferedWriter = owned_buffers_io::write::BufferedWriter<
|
||||||
@@ -94,9 +99,8 @@ impl EphemeralFile {
|
|||||||
_tenant_shard_id: tenant_shard_id,
|
_tenant_shard_id: tenant_shard_id,
|
||||||
_timeline_id: timeline_id,
|
_timeline_id: timeline_id,
|
||||||
page_cache_file_id,
|
page_cache_file_id,
|
||||||
bytes_written: 0,
|
|
||||||
file: file.clone(),
|
file: file.clone(),
|
||||||
buffered_writer: BufferedWriter::new(
|
buffered_writer: tokio::sync::RwLock::new(BufferedWriter::new(
|
||||||
file,
|
file,
|
||||||
0,
|
0,
|
||||||
|| IoBufferMut::with_capacity(TAIL_SZ),
|
|| IoBufferMut::with_capacity(TAIL_SZ),
|
||||||
@@ -104,7 +108,9 @@ impl EphemeralFile {
|
|||||||
cancel.child_token(),
|
cancel.child_token(),
|
||||||
ctx,
|
ctx,
|
||||||
info_span!(parent: None, "ephemeral_file_buffered_writer", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %filename),
|
info_span!(parent: None, "ephemeral_file_buffered_writer", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %filename),
|
||||||
),
|
)),
|
||||||
|
bytes_written: AtomicU64::new(0),
|
||||||
|
resource_units: std::sync::Mutex::new(GlobalResourceUnits::new()),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -151,15 +157,17 @@ impl std::ops::Deref for TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter
|
|||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub(crate) enum EphemeralFileWriteError {
|
pub(crate) enum EphemeralFileWriteError {
|
||||||
#[error("{0}")]
|
|
||||||
TooLong(String),
|
|
||||||
#[error("cancelled")]
|
#[error("cancelled")]
|
||||||
Cancelled,
|
Cancelled,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EphemeralFile {
|
impl EphemeralFile {
|
||||||
pub(crate) fn len(&self) -> u64 {
|
pub(crate) fn len(&self) -> u64 {
|
||||||
self.bytes_written
|
// TODO(vlad): The value returned here is not always correct if
|
||||||
|
// we have more than one concurrent writer. Writes are always
|
||||||
|
// sequenced, but we could grab the buffered writer lock if we wanted
|
||||||
|
// to.
|
||||||
|
self.bytes_written.load(Ordering::Acquire)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn page_cache_file_id(&self) -> page_cache::FileId {
|
pub(crate) fn page_cache_file_id(&self) -> page_cache::FileId {
|
||||||
@@ -186,7 +194,7 @@ impl EphemeralFile {
|
|||||||
/// Panics if the write is short because there's no way we can recover from that.
|
/// Panics if the write is short because there's no way we can recover from that.
|
||||||
/// TODO: make upstack handle this as an error.
|
/// TODO: make upstack handle this as an error.
|
||||||
pub(crate) async fn write_raw(
|
pub(crate) async fn write_raw(
|
||||||
&mut self,
|
&self,
|
||||||
srcbuf: &[u8],
|
srcbuf: &[u8],
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<u64, EphemeralFileWriteError> {
|
) -> Result<u64, EphemeralFileWriteError> {
|
||||||
@@ -198,22 +206,13 @@ impl EphemeralFile {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn write_raw_controlled(
|
async fn write_raw_controlled(
|
||||||
&mut self,
|
&self,
|
||||||
srcbuf: &[u8],
|
srcbuf: &[u8],
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<(u64, Option<owned_buffers_io::write::FlushControl>), EphemeralFileWriteError> {
|
) -> Result<(u64, Option<owned_buffers_io::write::FlushControl>), EphemeralFileWriteError> {
|
||||||
let pos = self.bytes_written;
|
let mut writer = self.buffered_writer.write().await;
|
||||||
|
|
||||||
let new_bytes_written = pos.checked_add(srcbuf.len().into_u64()).ok_or_else(|| {
|
let (nwritten, control) = writer
|
||||||
EphemeralFileWriteError::TooLong(format!(
|
|
||||||
"write would grow EphemeralFile beyond u64::MAX: len={pos} writen={srcbuf_len}",
|
|
||||||
srcbuf_len = srcbuf.len(),
|
|
||||||
))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// Write the payload
|
|
||||||
let (nwritten, control) = self
|
|
||||||
.buffered_writer
|
|
||||||
.write_buffered_borrowed_controlled(srcbuf, ctx)
|
.write_buffered_borrowed_controlled(srcbuf, ctx)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| match e {
|
.map_err(|e| match e {
|
||||||
@@ -225,43 +224,69 @@ impl EphemeralFile {
|
|||||||
"buffered writer has no short writes"
|
"buffered writer has no short writes"
|
||||||
);
|
);
|
||||||
|
|
||||||
self.bytes_written = new_bytes_written;
|
// There's no realistic risk of overflow here. We won't have exabytes sized files on disk.
|
||||||
|
let pos = self
|
||||||
|
.bytes_written
|
||||||
|
.fetch_add(srcbuf.len().into_u64(), Ordering::AcqRel);
|
||||||
|
|
||||||
|
let mut resource_units = self.resource_units.lock().unwrap();
|
||||||
|
resource_units.maybe_publish_size(self.bytes_written.load(Ordering::Relaxed));
|
||||||
|
|
||||||
Ok((pos, control))
|
Ok((pos, control))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn tick(&self) -> Option<u64> {
|
||||||
|
let mut resource_units = self.resource_units.lock().unwrap();
|
||||||
|
let len = self.bytes_written.load(Ordering::Relaxed);
|
||||||
|
resource_units.publish_size(len)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl super::storage_layer::inmemory_layer::vectored_dio_read::File for EphemeralFile {
|
impl super::storage_layer::inmemory_layer::vectored_dio_read::File for EphemeralFile {
|
||||||
async fn read_exact_at_eof_ok<B: IoBufAlignedMut + Send>(
|
async fn read_exact_at_eof_ok<B: IoBufAlignedMut + Send>(
|
||||||
&self,
|
&self,
|
||||||
start: u64,
|
start: u64,
|
||||||
dst: tokio_epoll_uring::Slice<B>,
|
mut dst: tokio_epoll_uring::Slice<B>,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> std::io::Result<(tokio_epoll_uring::Slice<B>, usize)> {
|
) -> std::io::Result<(tokio_epoll_uring::Slice<B>, usize)> {
|
||||||
let submitted_offset = self.buffered_writer.bytes_submitted();
|
// We will fill the slice in back to front. Hence, we need
|
||||||
|
// the slice to be fully initialized.
|
||||||
|
// TODO(vlad): Is there a nicer way of doing this?
|
||||||
|
dst.as_mut_rust_slice_full_zeroed();
|
||||||
|
|
||||||
let mutable = match self.buffered_writer.inspect_mutable() {
|
let writer = self.buffered_writer.read().await;
|
||||||
Some(mutable) => &mutable[0..mutable.pending()],
|
|
||||||
None => {
|
|
||||||
// Timeline::cancel and hence buffered writer flush was cancelled.
|
|
||||||
// Remain read-available while timeline is shutting down.
|
|
||||||
&[]
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let maybe_flushed = self.buffered_writer.inspect_maybe_flushed();
|
// Read bytes written while under lock. This is a hack to deal with concurrent
|
||||||
|
// writes updating the number of bytes written. `bytes_written` is not DIO alligned
|
||||||
|
// but we may end the read there.
|
||||||
|
//
|
||||||
|
// TODO(vlad): Feels like there's a nicer path where we align the end if it
|
||||||
|
// shoots over the end of the file.
|
||||||
|
let bytes_written = self.bytes_written.load(Ordering::Acquire);
|
||||||
|
|
||||||
let dst_cap = dst.bytes_total().into_u64();
|
let dst_cap = dst.bytes_total().into_u64();
|
||||||
let end = {
|
let end = {
|
||||||
// saturating_add is correct here because the max file size is u64::MAX, so,
|
// saturating_add is correct here because the max file size is u64::MAX, so,
|
||||||
// if start + dst.len() > u64::MAX, then we know it will be a short read
|
// if start + dst.len() > u64::MAX, then we know it will be a short read
|
||||||
let mut end: u64 = start.saturating_add(dst_cap);
|
let mut end: u64 = start.saturating_add(dst_cap);
|
||||||
if end > self.bytes_written {
|
if end > bytes_written {
|
||||||
end = self.bytes_written;
|
end = bytes_written;
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let submitted_offset = writer.bytes_submitted();
|
||||||
|
let maybe_flushed = writer.inspect_maybe_flushed();
|
||||||
|
|
||||||
|
let mutable = match writer.inspect_mutable() {
|
||||||
|
Some(mutable) => &mutable[0..mutable.pending()],
|
||||||
|
None => {
|
||||||
|
// Timeline::cancel and hence buffered writer flush was cancelled.
|
||||||
|
// Remain read-available while timeline is shutting down.
|
||||||
|
&[]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// inclusive, exclusive
|
// inclusive, exclusive
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct Range<N>(N, N);
|
struct Range<N>(N, N);
|
||||||
@@ -306,13 +331,33 @@ impl super::storage_layer::inmemory_layer::vectored_dio_read::File for Ephemeral
|
|||||||
|
|
||||||
let mutable_range = Range(std::cmp::max(start, submitted_offset), end);
|
let mutable_range = Range(std::cmp::max(start, submitted_offset), end);
|
||||||
|
|
||||||
let dst = if written_range.len() > 0 {
|
// There are three sources from which we might have to read data:
|
||||||
|
// 1. The file itself
|
||||||
|
// 2. The buffer which contains changes currently being flushed
|
||||||
|
// 3. The buffer which contains chnages yet to be flushed
|
||||||
|
//
|
||||||
|
// For better concurrency, we do them in reverse order: perform the in-memory
|
||||||
|
// reads while holding the writer lock, drop the writer lock and read from the
|
||||||
|
// file if required.
|
||||||
|
|
||||||
|
let dst = if mutable_range.len() > 0 {
|
||||||
|
let offset_in_buffer = mutable_range
|
||||||
|
.0
|
||||||
|
.checked_sub(submitted_offset)
|
||||||
|
.unwrap()
|
||||||
|
.into_usize();
|
||||||
|
let to_copy =
|
||||||
|
&mutable[offset_in_buffer..(offset_in_buffer + mutable_range.len().into_usize())];
|
||||||
let bounds = dst.bounds();
|
let bounds = dst.bounds();
|
||||||
let slice = self
|
let mut view = dst.slice({
|
||||||
.file
|
let start =
|
||||||
.read_exact_at(dst.slice(0..written_range.len().into_usize()), start, ctx)
|
written_range.len().into_usize() + maybe_flushed_range.len().into_usize();
|
||||||
.await?;
|
let end = start.checked_add(mutable_range.len().into_usize()).unwrap();
|
||||||
Slice::from_buf_bounds(Slice::into_inner(slice), bounds)
|
start..end
|
||||||
|
});
|
||||||
|
view.as_mut_rust_slice_full_zeroed()
|
||||||
|
.copy_from_slice(to_copy);
|
||||||
|
Slice::from_buf_bounds(Slice::into_inner(view), bounds)
|
||||||
} else {
|
} else {
|
||||||
dst
|
dst
|
||||||
};
|
};
|
||||||
@@ -342,24 +387,15 @@ impl super::storage_layer::inmemory_layer::vectored_dio_read::File for Ephemeral
|
|||||||
dst
|
dst
|
||||||
};
|
};
|
||||||
|
|
||||||
let dst = if mutable_range.len() > 0 {
|
drop(writer);
|
||||||
let offset_in_buffer = mutable_range
|
|
||||||
.0
|
let dst = if written_range.len() > 0 {
|
||||||
.checked_sub(submitted_offset)
|
|
||||||
.unwrap()
|
|
||||||
.into_usize();
|
|
||||||
let to_copy =
|
|
||||||
&mutable[offset_in_buffer..(offset_in_buffer + mutable_range.len().into_usize())];
|
|
||||||
let bounds = dst.bounds();
|
let bounds = dst.bounds();
|
||||||
let mut view = dst.slice({
|
let slice = self
|
||||||
let start =
|
.file
|
||||||
written_range.len().into_usize() + maybe_flushed_range.len().into_usize();
|
.read_exact_at(dst.slice(0..written_range.len().into_usize()), start, ctx)
|
||||||
let end = start.checked_add(mutable_range.len().into_usize()).unwrap();
|
.await?;
|
||||||
start..end
|
Slice::from_buf_bounds(Slice::into_inner(slice), bounds)
|
||||||
});
|
|
||||||
view.as_mut_rust_slice_full_zeroed()
|
|
||||||
.copy_from_slice(to_copy);
|
|
||||||
Slice::from_buf_bounds(Slice::into_inner(view), bounds)
|
|
||||||
} else {
|
} else {
|
||||||
dst
|
dst
|
||||||
};
|
};
|
||||||
@@ -460,13 +496,15 @@ mod tests {
|
|||||||
let gate = utils::sync::gate::Gate::default();
|
let gate = utils::sync::gate::Gate::default();
|
||||||
let cancel = CancellationToken::new();
|
let cancel = CancellationToken::new();
|
||||||
|
|
||||||
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)
|
let file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let mutable = file.buffered_writer.mutable();
|
let writer = file.buffered_writer.read().await;
|
||||||
|
let mutable = writer.mutable();
|
||||||
let cap = mutable.capacity();
|
let cap = mutable.capacity();
|
||||||
let align = mutable.align();
|
let align = mutable.align();
|
||||||
|
drop(writer);
|
||||||
|
|
||||||
let write_nbytes = cap * 2 + cap / 2;
|
let write_nbytes = cap * 2 + cap / 2;
|
||||||
|
|
||||||
@@ -504,10 +542,11 @@ mod tests {
|
|||||||
let file_contents = std::fs::read(file.file.path()).unwrap();
|
let file_contents = std::fs::read(file.file.path()).unwrap();
|
||||||
assert!(file_contents == content[0..cap * 2]);
|
assert!(file_contents == content[0..cap * 2]);
|
||||||
|
|
||||||
let maybe_flushed_buffer_contents = file.buffered_writer.inspect_maybe_flushed().unwrap();
|
let writer = file.buffered_writer.read().await;
|
||||||
|
let maybe_flushed_buffer_contents = writer.inspect_maybe_flushed().unwrap();
|
||||||
assert_eq!(&maybe_flushed_buffer_contents[..], &content[cap..cap * 2]);
|
assert_eq!(&maybe_flushed_buffer_contents[..], &content[cap..cap * 2]);
|
||||||
|
|
||||||
let mutable_buffer_contents = file.buffered_writer.mutable();
|
let mutable_buffer_contents = writer.mutable();
|
||||||
assert_eq!(mutable_buffer_contents, &content[cap * 2..write_nbytes]);
|
assert_eq!(mutable_buffer_contents, &content[cap * 2..write_nbytes]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -517,12 +556,14 @@ mod tests {
|
|||||||
|
|
||||||
let gate = utils::sync::gate::Gate::default();
|
let gate = utils::sync::gate::Gate::default();
|
||||||
let cancel = CancellationToken::new();
|
let cancel = CancellationToken::new();
|
||||||
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)
|
let file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// mutable buffer and maybe_flushed buffer each has `cap` bytes.
|
// mutable buffer and maybe_flushed buffer each has `cap` bytes.
|
||||||
let cap = file.buffered_writer.mutable().capacity();
|
let writer = file.buffered_writer.read().await;
|
||||||
|
let cap = writer.mutable().capacity();
|
||||||
|
drop(writer);
|
||||||
|
|
||||||
let content: Vec<u8> = rand::thread_rng()
|
let content: Vec<u8> = rand::thread_rng()
|
||||||
.sample_iter(rand::distributions::Standard)
|
.sample_iter(rand::distributions::Standard)
|
||||||
@@ -540,12 +581,13 @@ mod tests {
|
|||||||
2 * cap.into_u64(),
|
2 * cap.into_u64(),
|
||||||
"buffered writer requires one write to be flushed if we write 2.5x buffer capacity"
|
"buffered writer requires one write to be flushed if we write 2.5x buffer capacity"
|
||||||
);
|
);
|
||||||
|
let writer = file.buffered_writer.read().await;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&file.buffered_writer.inspect_maybe_flushed().unwrap()[0..cap],
|
&writer.inspect_maybe_flushed().unwrap()[0..cap],
|
||||||
&content[cap..cap * 2]
|
&content[cap..cap * 2]
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&file.buffered_writer.mutable()[0..cap / 2],
|
&writer.mutable()[0..cap / 2],
|
||||||
&content[cap * 2..cap * 2 + cap / 2]
|
&content[cap * 2..cap * 2 + cap / 2]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -563,13 +605,15 @@ mod tests {
|
|||||||
let gate = utils::sync::gate::Gate::default();
|
let gate = utils::sync::gate::Gate::default();
|
||||||
let cancel = CancellationToken::new();
|
let cancel = CancellationToken::new();
|
||||||
|
|
||||||
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)
|
let file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &cancel, &ctx)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let mutable = file.buffered_writer.mutable();
|
let writer = file.buffered_writer.read().await;
|
||||||
|
let mutable = writer.mutable();
|
||||||
let cap = mutable.capacity();
|
let cap = mutable.capacity();
|
||||||
let align = mutable.align();
|
let align = mutable.align();
|
||||||
|
drop(writer);
|
||||||
let content: Vec<u8> = rand::thread_rng()
|
let content: Vec<u8> = rand::thread_rng()
|
||||||
.sample_iter(rand::distributions::Standard)
|
.sample_iter(rand::distributions::Standard)
|
||||||
.take(cap * 2 + cap / 2)
|
.take(cap * 2 + cap / 2)
|
||||||
|
|||||||
@@ -109,7 +109,7 @@ pub(crate) enum OnDiskValue {
|
|||||||
|
|
||||||
/// Reconstruct data accumulated for a single key during a vectored get
|
/// Reconstruct data accumulated for a single key during a vectored get
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub(crate) struct VectoredValueReconstructState {
|
pub struct VectoredValueReconstructState {
|
||||||
pub(crate) on_disk_values: Vec<(Lsn, OnDiskValueIoWaiter)>,
|
pub(crate) on_disk_values: Vec<(Lsn, OnDiskValueIoWaiter)>,
|
||||||
|
|
||||||
pub(crate) situation: ValueReconstructSituation,
|
pub(crate) situation: ValueReconstructSituation,
|
||||||
@@ -244,13 +244,60 @@ impl VectoredValueReconstructState {
|
|||||||
|
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Benchmarking utility to await for the completion of all pending ios
|
||||||
|
///
|
||||||
|
/// # Cancel-Safety
|
||||||
|
///
|
||||||
|
/// Technically fine to stop polling this future, but, the IOs will still
|
||||||
|
/// be executed to completion by the sidecar task and hold on to / consume resources.
|
||||||
|
/// Better not do it to make reasonsing about the system easier.
|
||||||
|
#[cfg(feature = "benchmarking")]
|
||||||
|
pub async fn sink_pending_ios(self) -> Result<(), std::io::Error> {
|
||||||
|
let mut res = Ok(());
|
||||||
|
|
||||||
|
// We should try hard not to bail early, so that by the time we return from this
|
||||||
|
// function, all IO for this value is done. It's not required -- we could totally
|
||||||
|
// stop polling the IO futures in the sidecar task, they need to support that,
|
||||||
|
// but just stopping to poll doesn't reduce the IO load on the disk. It's easier
|
||||||
|
// to reason about the system if we just wait for all IO to complete, even if
|
||||||
|
// we're no longer interested in the result.
|
||||||
|
//
|
||||||
|
// Revisit this when IO futures are replaced with a more sophisticated IO system
|
||||||
|
// and an IO scheduler, where we know which IOs were submitted and which ones
|
||||||
|
// just queued. Cf the comment on IoConcurrency::spawn_io.
|
||||||
|
for (_lsn, waiter) in self.on_disk_values {
|
||||||
|
let value_recv_res = waiter
|
||||||
|
.wait_completion()
|
||||||
|
// we rely on the caller to poll us to completion, so this is not a bail point
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match (&mut res, value_recv_res) {
|
||||||
|
(Err(_), _) => {
|
||||||
|
// We've already failed, no need to process more.
|
||||||
|
}
|
||||||
|
(Ok(_), Err(_wait_err)) => {
|
||||||
|
// This shouldn't happen - likely the sidecar task panicked.
|
||||||
|
unreachable!();
|
||||||
|
}
|
||||||
|
(Ok(_), Ok(Err(err))) => {
|
||||||
|
let err: std::io::Error = err;
|
||||||
|
res = Err(err);
|
||||||
|
}
|
||||||
|
(Ok(_ok), Ok(Ok(OnDiskValue::RawImage(_img)))) => {}
|
||||||
|
(Ok(_ok), Ok(Ok(OnDiskValue::WalRecordOrImage(_buf)))) => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Bag of data accumulated during a vectored get..
|
/// Bag of data accumulated during a vectored get..
|
||||||
pub(crate) struct ValuesReconstructState {
|
pub struct ValuesReconstructState {
|
||||||
/// The keys will be removed after `get_vectored` completes. The caller outside `Timeline`
|
/// The keys will be removed after `get_vectored` completes. The caller outside `Timeline`
|
||||||
/// should not expect to get anything from this hashmap.
|
/// should not expect to get anything from this hashmap.
|
||||||
pub(crate) keys: HashMap<Key, VectoredValueReconstructState>,
|
pub keys: HashMap<Key, VectoredValueReconstructState>,
|
||||||
/// The keys which are already retrieved
|
/// The keys which are already retrieved
|
||||||
keys_done: KeySpaceRandomAccum,
|
keys_done: KeySpaceRandomAccum,
|
||||||
|
|
||||||
@@ -272,7 +319,7 @@ pub(crate) struct ValuesReconstructState {
|
|||||||
/// The desired end state is that we always do parallel IO.
|
/// The desired end state is that we always do parallel IO.
|
||||||
/// This struct and the dispatching in the impl will be removed once
|
/// This struct and the dispatching in the impl will be removed once
|
||||||
/// we've built enough confidence.
|
/// we've built enough confidence.
|
||||||
pub(crate) enum IoConcurrency {
|
pub enum IoConcurrency {
|
||||||
Sequential,
|
Sequential,
|
||||||
SidecarTask {
|
SidecarTask {
|
||||||
task_id: usize,
|
task_id: usize,
|
||||||
@@ -317,10 +364,7 @@ impl IoConcurrency {
|
|||||||
Self::spawn(SelectedIoConcurrency::Sequential)
|
Self::spawn(SelectedIoConcurrency::Sequential)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn spawn_from_conf(
|
pub fn spawn_from_conf(conf: GetVectoredConcurrentIo, gate_guard: GateGuard) -> IoConcurrency {
|
||||||
conf: GetVectoredConcurrentIo,
|
|
||||||
gate_guard: GateGuard,
|
|
||||||
) -> IoConcurrency {
|
|
||||||
let selected = match conf {
|
let selected = match conf {
|
||||||
GetVectoredConcurrentIo::Sequential => SelectedIoConcurrency::Sequential,
|
GetVectoredConcurrentIo::Sequential => SelectedIoConcurrency::Sequential,
|
||||||
GetVectoredConcurrentIo::SidecarTask => SelectedIoConcurrency::SidecarTask(gate_guard),
|
GetVectoredConcurrentIo::SidecarTask => SelectedIoConcurrency::SidecarTask(gate_guard),
|
||||||
@@ -425,16 +469,6 @@ impl IoConcurrency {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn clone(&self) -> Self {
|
|
||||||
match self {
|
|
||||||
IoConcurrency::Sequential => IoConcurrency::Sequential,
|
|
||||||
IoConcurrency::SidecarTask { task_id, ios_tx } => IoConcurrency::SidecarTask {
|
|
||||||
task_id: *task_id,
|
|
||||||
ios_tx: ios_tx.clone(),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Submit an IO to be executed in the background. DEADLOCK RISK, read the full doc string.
|
/// Submit an IO to be executed in the background. DEADLOCK RISK, read the full doc string.
|
||||||
///
|
///
|
||||||
/// The IO is represented as an opaque future.
|
/// The IO is represented as an opaque future.
|
||||||
@@ -573,6 +607,18 @@ impl IoConcurrency {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Clone for IoConcurrency {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
match self {
|
||||||
|
IoConcurrency::Sequential => IoConcurrency::Sequential,
|
||||||
|
IoConcurrency::SidecarTask { task_id, ios_tx } => IoConcurrency::SidecarTask {
|
||||||
|
task_id: *task_id,
|
||||||
|
ios_tx: ios_tx.clone(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Make noise in case the [`ValuesReconstructState`] gets dropped while
|
/// Make noise in case the [`ValuesReconstructState`] gets dropped while
|
||||||
/// there are still IOs in flight.
|
/// there are still IOs in flight.
|
||||||
/// Refer to `collect_pending_ios` for why we prefer not to do that.
|
/// Refer to `collect_pending_ios` for why we prefer not to do that.
|
||||||
@@ -603,7 +649,7 @@ impl Drop for ValuesReconstructState {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl ValuesReconstructState {
|
impl ValuesReconstructState {
|
||||||
pub(crate) fn new(io_concurrency: IoConcurrency) -> Self {
|
pub fn new(io_concurrency: IoConcurrency) -> Self {
|
||||||
Self {
|
Self {
|
||||||
keys: HashMap::new(),
|
keys: HashMap::new(),
|
||||||
keys_done: KeySpaceRandomAccum::new(),
|
keys_done: KeySpaceRandomAccum::new(),
|
||||||
|
|||||||
@@ -70,23 +70,15 @@ pub struct InMemoryLayer {
|
|||||||
/// We use a separate lock for the index to reduce the critical section
|
/// We use a separate lock for the index to reduce the critical section
|
||||||
/// during which reads cannot be planned.
|
/// during which reads cannot be planned.
|
||||||
///
|
///
|
||||||
/// If you need access to both the index and the underlying file at the same time,
|
/// Note that the file backing [`InMemoryLayer::file`] is append-only,
|
||||||
/// respect the following locking order to avoid deadlocks:
|
/// so it is not necessary to hold a lock on the index while reading or writing from the file.
|
||||||
/// 1. [`InMemoryLayer::inner`]
|
|
||||||
/// 2. [`InMemoryLayer::index`]
|
|
||||||
///
|
|
||||||
/// Note that the file backing [`InMemoryLayer::inner`] is append-only,
|
|
||||||
/// so it is not necessary to hold simultaneous locks on index.
|
|
||||||
/// This avoids holding index locks across IO, and is crucial for avoiding read tail latency.
|
|
||||||
/// In particular:
|
/// In particular:
|
||||||
/// 1. It is safe to read and release [`InMemoryLayer::index`] before locking and reading from [`InMemoryLayer::inner`].
|
/// 1. It is safe to read and release [`InMemoryLayer::index`] before reading from [`InMemoryLayer::file`].
|
||||||
/// 2. It is safe to write and release [`InMemoryLayer::inner`] before locking and updating [`InMemoryLayer::index`].
|
/// 2. It is safe to write to [`InMemoryLayer::file`] before locking and updating [`InMemoryLayer::index`].
|
||||||
index: RwLock<BTreeMap<CompactKey, VecMap<Lsn, IndexEntry>>>,
|
index: RwLock<BTreeMap<CompactKey, VecMap<Lsn, IndexEntry>>>,
|
||||||
|
|
||||||
/// The above fields never change, except for `end_lsn`, which is only set once,
|
/// Wrapper for the actual on-disk file. Uses interior mutability for concurrent reads/writes.
|
||||||
/// and `index` (see rationale there).
|
file: EphemeralFile,
|
||||||
/// All other changing parts are in `inner`, and protected by a mutex.
|
|
||||||
inner: RwLock<InMemoryLayerInner>,
|
|
||||||
|
|
||||||
estimated_in_mem_size: AtomicU64,
|
estimated_in_mem_size: AtomicU64,
|
||||||
}
|
}
|
||||||
@@ -96,20 +88,10 @@ impl std::fmt::Debug for InMemoryLayer {
|
|||||||
f.debug_struct("InMemoryLayer")
|
f.debug_struct("InMemoryLayer")
|
||||||
.field("start_lsn", &self.start_lsn)
|
.field("start_lsn", &self.start_lsn)
|
||||||
.field("end_lsn", &self.end_lsn)
|
.field("end_lsn", &self.end_lsn)
|
||||||
.field("inner", &self.inner)
|
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct InMemoryLayerInner {
|
|
||||||
/// The values are stored in a serialized format in this file.
|
|
||||||
/// Each serialized Value is preceded by a 'u32' length field.
|
|
||||||
/// PerSeg::page_versions map stores offsets into this file.
|
|
||||||
file: EphemeralFile,
|
|
||||||
|
|
||||||
resource_units: GlobalResourceUnits,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Support the same max blob length as blob_io, because ultimately
|
/// Support the same max blob length as blob_io, because ultimately
|
||||||
/// all the InMemoryLayer contents end up being written into a delta layer,
|
/// all the InMemoryLayer contents end up being written into a delta layer,
|
||||||
/// using the [`crate::tenant::blob_io`].
|
/// using the [`crate::tenant::blob_io`].
|
||||||
@@ -258,12 +240,6 @@ struct IndexEntryUnpacked {
|
|||||||
pos: u64,
|
pos: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Debug for InMemoryLayerInner {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
f.debug_struct("InMemoryLayerInner").finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// State shared by all in-memory (ephemeral) layers. Updated infrequently during background ticks in Timeline,
|
/// State shared by all in-memory (ephemeral) layers. Updated infrequently during background ticks in Timeline,
|
||||||
/// to minimize contention.
|
/// to minimize contention.
|
||||||
///
|
///
|
||||||
@@ -280,7 +256,7 @@ pub(crate) struct GlobalResources {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Per-timeline RAII struct for its contribution to [`GlobalResources`]
|
// Per-timeline RAII struct for its contribution to [`GlobalResources`]
|
||||||
struct GlobalResourceUnits {
|
pub(crate) struct GlobalResourceUnits {
|
||||||
// How many dirty bytes have I added to the global dirty_bytes: this guard object is responsible
|
// How many dirty bytes have I added to the global dirty_bytes: this guard object is responsible
|
||||||
// for decrementing the global counter by this many bytes when dropped.
|
// for decrementing the global counter by this many bytes when dropped.
|
||||||
dirty_bytes: u64,
|
dirty_bytes: u64,
|
||||||
@@ -292,7 +268,7 @@ impl GlobalResourceUnits {
|
|||||||
// updated when the Timeline "ticks" in the background.
|
// updated when the Timeline "ticks" in the background.
|
||||||
const MAX_SIZE_DRIFT: u64 = 10 * 1024 * 1024;
|
const MAX_SIZE_DRIFT: u64 = 10 * 1024 * 1024;
|
||||||
|
|
||||||
fn new() -> Self {
|
pub(crate) fn new() -> Self {
|
||||||
GLOBAL_RESOURCES
|
GLOBAL_RESOURCES
|
||||||
.dirty_layers
|
.dirty_layers
|
||||||
.fetch_add(1, AtomicOrdering::Relaxed);
|
.fetch_add(1, AtomicOrdering::Relaxed);
|
||||||
@@ -304,7 +280,7 @@ impl GlobalResourceUnits {
|
|||||||
///
|
///
|
||||||
/// Returns the effective layer size limit that should be applied, if any, to keep
|
/// Returns the effective layer size limit that should be applied, if any, to keep
|
||||||
/// the total number of dirty bytes below the configured maximum.
|
/// the total number of dirty bytes below the configured maximum.
|
||||||
fn publish_size(&mut self, size: u64) -> Option<u64> {
|
pub(crate) fn publish_size(&mut self, size: u64) -> Option<u64> {
|
||||||
let new_global_dirty_bytes = match size.cmp(&self.dirty_bytes) {
|
let new_global_dirty_bytes = match size.cmp(&self.dirty_bytes) {
|
||||||
Ordering::Equal => GLOBAL_RESOURCES.dirty_bytes.load(AtomicOrdering::Relaxed),
|
Ordering::Equal => GLOBAL_RESOURCES.dirty_bytes.load(AtomicOrdering::Relaxed),
|
||||||
Ordering::Greater => {
|
Ordering::Greater => {
|
||||||
@@ -349,7 +325,7 @@ impl GlobalResourceUnits {
|
|||||||
|
|
||||||
// Call publish_size if the input size differs from last published size by more than
|
// Call publish_size if the input size differs from last published size by more than
|
||||||
// the drift limit
|
// the drift limit
|
||||||
fn maybe_publish_size(&mut self, size: u64) {
|
pub(crate) fn maybe_publish_size(&mut self, size: u64) {
|
||||||
let publish = match size.cmp(&self.dirty_bytes) {
|
let publish = match size.cmp(&self.dirty_bytes) {
|
||||||
Ordering::Equal => false,
|
Ordering::Equal => false,
|
||||||
Ordering::Greater => size - self.dirty_bytes > Self::MAX_SIZE_DRIFT,
|
Ordering::Greater => size - self.dirty_bytes > Self::MAX_SIZE_DRIFT,
|
||||||
@@ -398,8 +374,8 @@ impl InMemoryLayer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn try_len(&self) -> Option<u64> {
|
pub(crate) fn len(&self) -> u64 {
|
||||||
self.inner.try_read().map(|i| i.file.len()).ok()
|
self.file.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn assert_writable(&self) {
|
pub(crate) fn assert_writable(&self) {
|
||||||
@@ -430,7 +406,7 @@ impl InMemoryLayer {
|
|||||||
|
|
||||||
// Look up the keys in the provided keyspace and update
|
// Look up the keys in the provided keyspace and update
|
||||||
// the reconstruct state with whatever is found.
|
// the reconstruct state with whatever is found.
|
||||||
pub(crate) async fn get_values_reconstruct_data(
|
pub async fn get_values_reconstruct_data(
|
||||||
self: &Arc<InMemoryLayer>,
|
self: &Arc<InMemoryLayer>,
|
||||||
keyspace: KeySpace,
|
keyspace: KeySpace,
|
||||||
lsn_range: Range<Lsn>,
|
lsn_range: Range<Lsn>,
|
||||||
@@ -479,14 +455,13 @@ impl InMemoryLayer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
drop(index); // release the lock before we spawn the IO; if it's serial-mode IO we will deadlock on the read().await below
|
drop(index); // release the lock before we spawn the IO
|
||||||
let read_from = Arc::clone(self);
|
let read_from = Arc::clone(self);
|
||||||
let read_ctx = ctx.attached_child();
|
let read_ctx = ctx.attached_child();
|
||||||
reconstruct_state
|
reconstruct_state
|
||||||
.spawn_io(async move {
|
.spawn_io(async move {
|
||||||
let inner = read_from.inner.read().await;
|
|
||||||
let f = vectored_dio_read::execute(
|
let f = vectored_dio_read::execute(
|
||||||
&inner.file,
|
&read_from.file,
|
||||||
reads
|
reads
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|(_, value_reads)| value_reads.iter().map(|v| &v.read)),
|
.flat_map(|(_, value_reads)| value_reads.iter().map(|v| &v.read)),
|
||||||
@@ -518,7 +493,6 @@ impl InMemoryLayer {
|
|||||||
// This is kinda forced for InMemoryLayer because we need to inner.read() anyway,
|
// This is kinda forced for InMemoryLayer because we need to inner.read() anyway,
|
||||||
// but it's less obvious for DeltaLayer and ImageLayer. So, keep this explicit
|
// but it's less obvious for DeltaLayer and ImageLayer. So, keep this explicit
|
||||||
// drop for consistency among all three layer types.
|
// drop for consistency among all three layer types.
|
||||||
drop(inner);
|
|
||||||
drop(read_from);
|
drop(read_from);
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
@@ -549,12 +523,6 @@ impl std::fmt::Display for InMemoryLayer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl InMemoryLayer {
|
impl InMemoryLayer {
|
||||||
/// Get layer size.
|
|
||||||
pub async fn size(&self) -> Result<u64> {
|
|
||||||
let inner = self.inner.read().await;
|
|
||||||
Ok(inner.file.len())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn estimated_in_mem_size(&self) -> u64 {
|
pub fn estimated_in_mem_size(&self) -> u64 {
|
||||||
self.estimated_in_mem_size.load(AtomicOrdering::Relaxed)
|
self.estimated_in_mem_size.load(AtomicOrdering::Relaxed)
|
||||||
}
|
}
|
||||||
@@ -587,10 +555,7 @@ impl InMemoryLayer {
|
|||||||
end_lsn: OnceLock::new(),
|
end_lsn: OnceLock::new(),
|
||||||
opened_at: Instant::now(),
|
opened_at: Instant::now(),
|
||||||
index: RwLock::new(BTreeMap::new()),
|
index: RwLock::new(BTreeMap::new()),
|
||||||
inner: RwLock::new(InMemoryLayerInner {
|
file,
|
||||||
file,
|
|
||||||
resource_units: GlobalResourceUnits::new(),
|
|
||||||
}),
|
|
||||||
estimated_in_mem_size: AtomicU64::new(0),
|
estimated_in_mem_size: AtomicU64::new(0),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -599,41 +564,37 @@ impl InMemoryLayer {
|
|||||||
///
|
///
|
||||||
/// Errors are not retryable, the [`InMemoryLayer`] must be discarded, and not be read from.
|
/// Errors are not retryable, the [`InMemoryLayer`] must be discarded, and not be read from.
|
||||||
/// The reason why it's not retryable is that the [`EphemeralFile`] writes are not retryable.
|
/// The reason why it's not retryable is that the [`EphemeralFile`] writes are not retryable.
|
||||||
|
///
|
||||||
|
/// This method shall not be called concurrently. We enforce this property via [`crate::tenant::Timeline::write_lock`].
|
||||||
|
///
|
||||||
/// TODO: it can be made retryable if we aborted the process on EphemeralFile write errors.
|
/// TODO: it can be made retryable if we aborted the process on EphemeralFile write errors.
|
||||||
pub async fn put_batch(
|
pub async fn put_batch(
|
||||||
&self,
|
&self,
|
||||||
serialized_batch: SerializedValueBatch,
|
serialized_batch: SerializedValueBatch,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let (base_offset, metadata) = {
|
self.assert_writable();
|
||||||
let mut inner = self.inner.write().await;
|
|
||||||
self.assert_writable();
|
|
||||||
|
|
||||||
let base_offset = inner.file.len();
|
let base_offset = self.file.len();
|
||||||
|
|
||||||
let SerializedValueBatch {
|
let SerializedValueBatch {
|
||||||
raw,
|
raw,
|
||||||
metadata,
|
metadata,
|
||||||
max_lsn: _,
|
max_lsn: _,
|
||||||
len: _,
|
len: _,
|
||||||
} = serialized_batch;
|
} = serialized_batch;
|
||||||
|
|
||||||
// Write the batch to the file
|
// Write the batch to the file
|
||||||
inner.file.write_raw(&raw, ctx).await?;
|
self.file.write_raw(&raw, ctx).await?;
|
||||||
let new_size = inner.file.len();
|
let new_size = self.file.len();
|
||||||
|
|
||||||
let expected_new_len = base_offset
|
let expected_new_len = base_offset
|
||||||
.checked_add(raw.len().into_u64())
|
.checked_add(raw.len().into_u64())
|
||||||
// write_raw would error if we were to overflow u64.
|
// write_raw would error if we were to overflow u64.
|
||||||
// also IndexEntry and higher levels in
|
// also IndexEntry and higher levels in
|
||||||
//the code don't allow the file to grow that large
|
//the code don't allow the file to grow that large
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(new_size, expected_new_len);
|
assert_eq!(new_size, expected_new_len);
|
||||||
|
|
||||||
inner.resource_units.maybe_publish_size(new_size);
|
|
||||||
|
|
||||||
(base_offset, metadata)
|
|
||||||
};
|
|
||||||
|
|
||||||
// Update the index with the new entries
|
// Update the index with the new entries
|
||||||
let mut index = self.index.write().await;
|
let mut index = self.index.write().await;
|
||||||
@@ -686,10 +647,8 @@ impl InMemoryLayer {
|
|||||||
self.opened_at
|
self.opened_at
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn tick(&self) -> Option<u64> {
|
pub(crate) fn tick(&self) -> Option<u64> {
|
||||||
let mut inner = self.inner.write().await;
|
self.file.tick()
|
||||||
let size = inner.file.len();
|
|
||||||
inner.resource_units.publish_size(size)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn put_tombstones(&self, _key_ranges: &[(Range<Key>, Lsn)]) -> Result<()> {
|
pub(crate) async fn put_tombstones(&self, _key_ranges: &[(Range<Key>, Lsn)]) -> Result<()> {
|
||||||
@@ -753,12 +712,6 @@ impl InMemoryLayer {
|
|||||||
gate: &utils::sync::gate::Gate,
|
gate: &utils::sync::gate::Gate,
|
||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
) -> Result<Option<(PersistentLayerDesc, Utf8PathBuf)>> {
|
) -> Result<Option<(PersistentLayerDesc, Utf8PathBuf)>> {
|
||||||
// Grab the lock in read-mode. We hold it over the I/O, but because this
|
|
||||||
// layer is not writeable anymore, no one should be trying to acquire the
|
|
||||||
// write lock on it, so we shouldn't block anyone. See the comment on
|
|
||||||
// [`InMemoryLayer::freeze`] to understand how locking between the append path
|
|
||||||
// and layer flushing works.
|
|
||||||
let inner = self.inner.read().await;
|
|
||||||
let index = self.index.read().await;
|
let index = self.index.read().await;
|
||||||
|
|
||||||
use l0_flush::Inner;
|
use l0_flush::Inner;
|
||||||
@@ -793,7 +746,7 @@ impl InMemoryLayer {
|
|||||||
|
|
||||||
match l0_flush_global_state {
|
match l0_flush_global_state {
|
||||||
l0_flush::Inner::Direct { .. } => {
|
l0_flush::Inner::Direct { .. } => {
|
||||||
let file_contents = inner.file.load_to_io_buf(ctx).await?;
|
let file_contents = self.file.load_to_io_buf(ctx).await?;
|
||||||
let file_contents = file_contents.freeze();
|
let file_contents = file_contents.freeze();
|
||||||
|
|
||||||
for (key, vec_map) in index.iter() {
|
for (key, vec_map) in index.iter() {
|
||||||
|
|||||||
@@ -816,7 +816,7 @@ impl From<layer_manager::Shutdown> for FlushLayerError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(thiserror::Error, Debug)]
|
#[derive(thiserror::Error, Debug)]
|
||||||
pub(crate) enum GetVectoredError {
|
pub enum GetVectoredError {
|
||||||
#[error("timeline shutting down")]
|
#[error("timeline shutting down")]
|
||||||
Cancelled,
|
Cancelled,
|
||||||
|
|
||||||
@@ -849,7 +849,7 @@ impl From<GetReadyAncestorError> for GetVectoredError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(thiserror::Error, Debug)]
|
#[derive(thiserror::Error, Debug)]
|
||||||
pub(crate) enum GetReadyAncestorError {
|
pub enum GetReadyAncestorError {
|
||||||
#[error("ancestor LSN wait error")]
|
#[error("ancestor LSN wait error")]
|
||||||
AncestorLsnTimeout(#[from] WaitLsnError),
|
AncestorLsnTimeout(#[from] WaitLsnError),
|
||||||
|
|
||||||
@@ -939,7 +939,7 @@ impl std::fmt::Debug for Timeline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(thiserror::Error, Debug, Clone)]
|
#[derive(thiserror::Error, Debug, Clone)]
|
||||||
pub(crate) enum WaitLsnError {
|
pub enum WaitLsnError {
|
||||||
// Called on a timeline which is shutting down
|
// Called on a timeline which is shutting down
|
||||||
#[error("Shutdown")]
|
#[error("Shutdown")]
|
||||||
Shutdown,
|
Shutdown,
|
||||||
@@ -1902,16 +1902,11 @@ impl Timeline {
|
|||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
let Some(current_size) = open_layer.try_len() else {
|
let current_size = open_layer.len();
|
||||||
// Unexpected: since we hold the write guard, nobody else should be writing to this layer, so
|
|
||||||
// read lock to get size should always succeed.
|
|
||||||
tracing::warn!("Lock conflict while reading size of open layer");
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
|
|
||||||
let current_lsn = self.get_last_record_lsn();
|
let current_lsn = self.get_last_record_lsn();
|
||||||
|
|
||||||
let checkpoint_distance_override = open_layer.tick().await;
|
let checkpoint_distance_override = open_layer.tick();
|
||||||
|
|
||||||
if let Some(size_override) = checkpoint_distance_override {
|
if let Some(size_override) = checkpoint_distance_override {
|
||||||
if current_size > size_override {
|
if current_size > size_override {
|
||||||
@@ -6543,7 +6538,7 @@ impl Timeline {
|
|||||||
|
|
||||||
debug!("retain_lsns: {:?}", retain_lsns);
|
debug!("retain_lsns: {:?}", retain_lsns);
|
||||||
|
|
||||||
let mut layers_to_remove = Vec::new();
|
let max_retain_lsn = retain_lsns.iter().max();
|
||||||
|
|
||||||
// Scan all layers in the timeline (remote or on-disk).
|
// Scan all layers in the timeline (remote or on-disk).
|
||||||
//
|
//
|
||||||
@@ -6553,108 +6548,110 @@ impl Timeline {
|
|||||||
// 3. it doesn't need to be retained for 'retain_lsns';
|
// 3. it doesn't need to be retained for 'retain_lsns';
|
||||||
// 4. it does not need to be kept for LSNs holding valid leases.
|
// 4. it does not need to be kept for LSNs holding valid leases.
|
||||||
// 5. newer on-disk image layers cover the layer's whole key range
|
// 5. newer on-disk image layers cover the layer's whole key range
|
||||||
//
|
let layers_to_remove = {
|
||||||
// TODO holding a write lock is too agressive and avoidable
|
let mut layers_to_remove = Vec::new();
|
||||||
let mut guard = self
|
|
||||||
.layers
|
|
||||||
.write(LayerManagerLockHolder::GarbageCollection)
|
|
||||||
.await;
|
|
||||||
let layers = guard.layer_map()?;
|
|
||||||
'outer: for l in layers.iter_historic_layers() {
|
|
||||||
result.layers_total += 1;
|
|
||||||
|
|
||||||
// 1. Is it newer than GC horizon cutoff point?
|
let guard = self
|
||||||
if l.get_lsn_range().end > space_cutoff {
|
.layers
|
||||||
info!(
|
.read(LayerManagerLockHolder::GarbageCollection)
|
||||||
"keeping {} because it's newer than space_cutoff {}",
|
.await;
|
||||||
l.layer_name(),
|
let layers = guard.layer_map()?;
|
||||||
space_cutoff,
|
'outer: for l in layers.iter_historic_layers() {
|
||||||
);
|
result.layers_total += 1;
|
||||||
result.layers_needed_by_cutoff += 1;
|
|
||||||
continue 'outer;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. It is newer than PiTR cutoff point?
|
// 1. Is it newer than GC horizon cutoff point?
|
||||||
if l.get_lsn_range().end > time_cutoff {
|
if l.get_lsn_range().end > space_cutoff {
|
||||||
info!(
|
debug!(
|
||||||
"keeping {} because it's newer than time_cutoff {}",
|
"keeping {} because it's newer than space_cutoff {}",
|
||||||
l.layer_name(),
|
|
||||||
time_cutoff,
|
|
||||||
);
|
|
||||||
result.layers_needed_by_pitr += 1;
|
|
||||||
continue 'outer;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. Is it needed by a child branch?
|
|
||||||
// NOTE With that we would keep data that
|
|
||||||
// might be referenced by child branches forever.
|
|
||||||
// We can track this in child timeline GC and delete parent layers when
|
|
||||||
// they are no longer needed. This might be complicated with long inheritance chains.
|
|
||||||
//
|
|
||||||
// TODO Vec is not a great choice for `retain_lsns`
|
|
||||||
for retain_lsn in &retain_lsns {
|
|
||||||
// start_lsn is inclusive
|
|
||||||
if &l.get_lsn_range().start <= retain_lsn {
|
|
||||||
info!(
|
|
||||||
"keeping {} because it's still might be referenced by child branch forked at {} is_dropped: xx is_incremental: {}",
|
|
||||||
l.layer_name(),
|
l.layer_name(),
|
||||||
retain_lsn,
|
space_cutoff,
|
||||||
l.is_incremental(),
|
|
||||||
);
|
);
|
||||||
result.layers_needed_by_branches += 1;
|
result.layers_needed_by_cutoff += 1;
|
||||||
continue 'outer;
|
continue 'outer;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// 4. Is there a valid lease that requires us to keep this layer?
|
// 2. It is newer than PiTR cutoff point?
|
||||||
if let Some(lsn) = &max_lsn_with_valid_lease {
|
if l.get_lsn_range().end > time_cutoff {
|
||||||
// keep if layer start <= any of the lease
|
debug!(
|
||||||
if &l.get_lsn_range().start <= lsn {
|
"keeping {} because it's newer than time_cutoff {}",
|
||||||
info!(
|
|
||||||
"keeping {} because there is a valid lease preventing GC at {}",
|
|
||||||
l.layer_name(),
|
l.layer_name(),
|
||||||
lsn,
|
time_cutoff,
|
||||||
);
|
);
|
||||||
result.layers_needed_by_leases += 1;
|
result.layers_needed_by_pitr += 1;
|
||||||
continue 'outer;
|
continue 'outer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 3. Is it needed by a child branch?
|
||||||
|
// NOTE With that we would keep data that
|
||||||
|
// might be referenced by child branches forever.
|
||||||
|
// We can track this in child timeline GC and delete parent layers when
|
||||||
|
// they are no longer needed. This might be complicated with long inheritance chains.
|
||||||
|
if let Some(retain_lsn) = max_retain_lsn {
|
||||||
|
// start_lsn is inclusive
|
||||||
|
if &l.get_lsn_range().start <= retain_lsn {
|
||||||
|
debug!(
|
||||||
|
"keeping {} because it's still might be referenced by child branch forked at {} is_dropped: xx is_incremental: {}",
|
||||||
|
l.layer_name(),
|
||||||
|
retain_lsn,
|
||||||
|
l.is_incremental(),
|
||||||
|
);
|
||||||
|
result.layers_needed_by_branches += 1;
|
||||||
|
continue 'outer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Is there a valid lease that requires us to keep this layer?
|
||||||
|
if let Some(lsn) = &max_lsn_with_valid_lease {
|
||||||
|
// keep if layer start <= any of the lease
|
||||||
|
if &l.get_lsn_range().start <= lsn {
|
||||||
|
debug!(
|
||||||
|
"keeping {} because there is a valid lease preventing GC at {}",
|
||||||
|
l.layer_name(),
|
||||||
|
lsn,
|
||||||
|
);
|
||||||
|
result.layers_needed_by_leases += 1;
|
||||||
|
continue 'outer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Is there a later on-disk layer for this relation?
|
||||||
|
//
|
||||||
|
// The end-LSN is exclusive, while disk_consistent_lsn is
|
||||||
|
// inclusive. For example, if disk_consistent_lsn is 100, it is
|
||||||
|
// OK for a delta layer to have end LSN 101, but if the end LSN
|
||||||
|
// is 102, then it might not have been fully flushed to disk
|
||||||
|
// before crash.
|
||||||
|
//
|
||||||
|
// For example, imagine that the following layers exist:
|
||||||
|
//
|
||||||
|
// 1000 - image (A)
|
||||||
|
// 1000-2000 - delta (B)
|
||||||
|
// 2000 - image (C)
|
||||||
|
// 2000-3000 - delta (D)
|
||||||
|
// 3000 - image (E)
|
||||||
|
//
|
||||||
|
// If GC horizon is at 2500, we can remove layers A and B, but
|
||||||
|
// we cannot remove C, even though it's older than 2500, because
|
||||||
|
// the delta layer 2000-3000 depends on it.
|
||||||
|
if !layers
|
||||||
|
.image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))
|
||||||
|
{
|
||||||
|
debug!("keeping {} because it is the latest layer", l.layer_name());
|
||||||
|
result.layers_not_updated += 1;
|
||||||
|
continue 'outer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We didn't find any reason to keep this file, so remove it.
|
||||||
|
info!(
|
||||||
|
"garbage collecting {} is_dropped: xx is_incremental: {}",
|
||||||
|
l.layer_name(),
|
||||||
|
l.is_incremental(),
|
||||||
|
);
|
||||||
|
layers_to_remove.push(l);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5. Is there a later on-disk layer for this relation?
|
layers_to_remove
|
||||||
//
|
};
|
||||||
// The end-LSN is exclusive, while disk_consistent_lsn is
|
|
||||||
// inclusive. For example, if disk_consistent_lsn is 100, it is
|
|
||||||
// OK for a delta layer to have end LSN 101, but if the end LSN
|
|
||||||
// is 102, then it might not have been fully flushed to disk
|
|
||||||
// before crash.
|
|
||||||
//
|
|
||||||
// For example, imagine that the following layers exist:
|
|
||||||
//
|
|
||||||
// 1000 - image (A)
|
|
||||||
// 1000-2000 - delta (B)
|
|
||||||
// 2000 - image (C)
|
|
||||||
// 2000-3000 - delta (D)
|
|
||||||
// 3000 - image (E)
|
|
||||||
//
|
|
||||||
// If GC horizon is at 2500, we can remove layers A and B, but
|
|
||||||
// we cannot remove C, even though it's older than 2500, because
|
|
||||||
// the delta layer 2000-3000 depends on it.
|
|
||||||
if !layers
|
|
||||||
.image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))
|
|
||||||
{
|
|
||||||
info!("keeping {} because it is the latest layer", l.layer_name());
|
|
||||||
result.layers_not_updated += 1;
|
|
||||||
continue 'outer;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We didn't find any reason to keep this file, so remove it.
|
|
||||||
info!(
|
|
||||||
"garbage collecting {} is_dropped: xx is_incremental: {}",
|
|
||||||
l.layer_name(),
|
|
||||||
l.is_incremental(),
|
|
||||||
);
|
|
||||||
layers_to_remove.push(l);
|
|
||||||
}
|
|
||||||
|
|
||||||
if !layers_to_remove.is_empty() {
|
if !layers_to_remove.is_empty() {
|
||||||
// Persist the new GC cutoff value before we actually remove anything.
|
// Persist the new GC cutoff value before we actually remove anything.
|
||||||
@@ -6670,15 +6667,19 @@ impl Timeline {
|
|||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
let mut guard = self
|
||||||
|
.layers
|
||||||
|
.write(LayerManagerLockHolder::GarbageCollection)
|
||||||
|
.await;
|
||||||
|
|
||||||
let gc_layers = layers_to_remove
|
let gc_layers = layers_to_remove
|
||||||
.iter()
|
.iter()
|
||||||
.map(|x| guard.get_from_desc(x))
|
.flat_map(|desc| guard.try_get_from_key(&desc.key()).cloned())
|
||||||
.collect::<Vec<Layer>>();
|
.collect::<Vec<Layer>>();
|
||||||
|
|
||||||
result.layers_removed = gc_layers.len() as u64;
|
result.layers_removed = gc_layers.len() as u64;
|
||||||
|
|
||||||
self.remote_client.schedule_gc_update(&gc_layers)?;
|
self.remote_client.schedule_gc_update(&gc_layers)?;
|
||||||
|
|
||||||
guard.open_mut()?.finish_gc_timeline(&gc_layers);
|
guard.open_mut()?.finish_gc_timeline(&gc_layers);
|
||||||
|
|
||||||
#[cfg(feature = "testing")]
|
#[cfg(feature = "testing")]
|
||||||
@@ -7366,7 +7367,7 @@ impl TimelineWriter<'_> {
|
|||||||
.tl
|
.tl
|
||||||
.get_layer_for_write(at, &self.write_guard, ctx)
|
.get_layer_for_write(at, &self.write_guard, ctx)
|
||||||
.await?;
|
.await?;
|
||||||
let initial_size = layer.size().await?;
|
let initial_size = layer.len();
|
||||||
|
|
||||||
let last_freeze_at = self.last_freeze_at.load();
|
let last_freeze_at = self.last_freeze_at.load();
|
||||||
self.write_guard.replace(TimelineWriterState::new(
|
self.write_guard.replace(TimelineWriterState::new(
|
||||||
|
|||||||
28
pgxn/Makefile
Normal file
28
pgxn/Makefile
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# This makefile assumes that 'pg_config' is in the path, or is passed in the
|
||||||
|
# PG_CONFIG variable.
|
||||||
|
#
|
||||||
|
# This is used in two different ways:
|
||||||
|
#
|
||||||
|
# 1. The main makefile calls this, when you invoke the `make neon-pg-ext-%`
|
||||||
|
# target. It passes PG_CONFIG pointing to pg_install/%/bin/pg_config.
|
||||||
|
# This is a VPATH build; the current directory is build/pgxn-%, and
|
||||||
|
# the path to the Makefile is passed with the -f argument.
|
||||||
|
#
|
||||||
|
# 2. compute-node.Dockerfile invokes this to build the compute extensions
|
||||||
|
# for the specific Postgres version. It relies on pg_config already
|
||||||
|
# being in $(PATH).
|
||||||
|
|
||||||
|
srcdir = $(dir $(firstword $(MAKEFILE_LIST)))
|
||||||
|
|
||||||
|
PG_CONFIG = pg_config
|
||||||
|
|
||||||
|
subdirs = neon neon_rmgr neon_walredo neon_utils neon_test_utils
|
||||||
|
|
||||||
|
.PHONY: install install-compute install-storage $(subdirs)
|
||||||
|
install: $(subdirs)
|
||||||
|
install-compute: neon neon_utils neon_test_utils neon_rmgr
|
||||||
|
install-storage: neon_rmgr neon_walredo
|
||||||
|
|
||||||
|
$(subdirs): %:
|
||||||
|
mkdir -p $*
|
||||||
|
$(MAKE) PG_CONFIG=$(PG_CONFIG) -C $* -f $(abspath $(srcdir)/$@/Makefile) install
|
||||||
@@ -82,7 +82,7 @@ typedef PGAlignedBlock PGIOAlignedBlock;
|
|||||||
* read, compare the versions we read from local disk and Page Server,
|
* read, compare the versions we read from local disk and Page Server,
|
||||||
* and Assert that they are identical.
|
* and Assert that they are identical.
|
||||||
*/
|
*/
|
||||||
#define DEBUG_COMPARE_LOCAL
|
/* #define DEBUG_COMPARE_LOCAL */
|
||||||
|
|
||||||
#ifdef DEBUG_COMPARE_LOCAL
|
#ifdef DEBUG_COMPARE_LOCAL
|
||||||
#include "access/nbtree.h"
|
#include "access/nbtree.h"
|
||||||
@@ -1519,8 +1519,6 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
|||||||
#ifdef DEBUG_COMPARE_LOCAL
|
#ifdef DEBUG_COMPARE_LOCAL
|
||||||
compare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);
|
compare_with_localv(reln, forknum, blocknum, buffers, nblocks, request_lsns, read_pages);
|
||||||
memset(read_pages, 0, sizeof(read_pages));
|
memset(read_pages, 0, sizeof(read_pages));
|
||||||
if (prefetch_result == nblocks)
|
|
||||||
neon_log(DEBUG5, "Prefetch hit");
|
|
||||||
#else
|
#else
|
||||||
if (prefetch_result == nblocks)
|
if (prefetch_result == nblocks)
|
||||||
return;
|
return;
|
||||||
|
|||||||
27
poetry.lock
generated
27
poetry.lock
generated
@@ -746,23 +746,23 @@ xray = ["mypy-boto3-xray (>=1.26.0,<1.27.0)"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "botocore"
|
name = "botocore"
|
||||||
version = "1.34.11"
|
version = "1.34.162"
|
||||||
description = "Low-level, data-driven core of boto 3."
|
description = "Low-level, data-driven core of boto 3."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">= 3.8"
|
python-versions = ">=3.8"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "botocore-1.34.11-py3-none-any.whl", hash = "sha256:1ff1398b6ea670e1c01ac67a33af3da854f8e700d3528289c04f319c330d8250"},
|
{file = "botocore-1.34.162-py3-none-any.whl", hash = "sha256:2d918b02db88d27a75b48275e6fb2506e9adaaddbec1ffa6a8a0898b34e769be"},
|
||||||
{file = "botocore-1.34.11.tar.gz", hash = "sha256:51905c3d623c60df5dc5794387de7caf886d350180a01a3dfa762e903edb45a9"},
|
{file = "botocore-1.34.162.tar.gz", hash = "sha256:adc23be4fb99ad31961236342b7cbf3c0bfc62532cd02852196032e8c0d682f3"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
jmespath = ">=0.7.1,<2.0.0"
|
jmespath = ">=0.7.1,<2.0.0"
|
||||||
python-dateutil = ">=2.1,<3.0.0"
|
python-dateutil = ">=2.1,<3.0.0"
|
||||||
urllib3 = {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""}
|
urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
crt = ["awscrt (==0.19.19)"]
|
crt = ["awscrt (==0.21.2)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "botocore-stubs"
|
name = "botocore-stubs"
|
||||||
@@ -3422,20 +3422,21 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "urllib3"
|
name = "urllib3"
|
||||||
version = "1.26.19"
|
version = "2.5.0"
|
||||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
files = [
|
files = [
|
||||||
{file = "urllib3-1.26.19-py2.py3-none-any.whl", hash = "sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3"},
|
{file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"},
|
||||||
{file = "urllib3-1.26.19.tar.gz", hash = "sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429"},
|
{file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""]
|
brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""]
|
||||||
secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
|
h2 = ["h2 (>=4,<5)"]
|
||||||
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||||
|
zstd = ["zstandard (>=0.18.0)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "websockets"
|
name = "websockets"
|
||||||
|
|||||||
146
proxy/src/batch.rs
Normal file
146
proxy/src/batch.rs
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
//! Batch processing system based on intrusive linked lists.
|
||||||
|
//!
|
||||||
|
//! Enqueuing a batch job requires no allocations, with
|
||||||
|
//! direct support for cancelling jobs early.
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::pin::pin;
|
||||||
|
use std::sync::Mutex;
|
||||||
|
|
||||||
|
use futures::future::Either;
|
||||||
|
use scopeguard::ScopeGuard;
|
||||||
|
use tokio::sync::oneshot::error::TryRecvError;
|
||||||
|
|
||||||
|
use crate::ext::LockExt;
|
||||||
|
|
||||||
|
pub trait QueueProcessing: Send + 'static {
|
||||||
|
type Req: Send + 'static;
|
||||||
|
type Res: Send;
|
||||||
|
|
||||||
|
/// Get the desired batch size.
|
||||||
|
fn batch_size(&self, queue_size: usize) -> usize;
|
||||||
|
|
||||||
|
/// This applies a full batch of events.
|
||||||
|
/// Must respond with a full batch of replies.
|
||||||
|
///
|
||||||
|
/// If this apply can error, it's expected that errors be forwarded to each Self::Res.
|
||||||
|
///
|
||||||
|
/// Batching does not need to happen atomically.
|
||||||
|
fn apply(&mut self, req: Vec<Self::Req>) -> impl Future<Output = Vec<Self::Res>> + Send;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct BatchQueue<P: QueueProcessing> {
|
||||||
|
processor: tokio::sync::Mutex<P>,
|
||||||
|
inner: Mutex<BatchQueueInner<P>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BatchJob<P: QueueProcessing> {
|
||||||
|
req: P::Req,
|
||||||
|
res: tokio::sync::oneshot::Sender<P::Res>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<P: QueueProcessing> BatchQueue<P> {
|
||||||
|
pub fn new(p: P) -> Self {
|
||||||
|
Self {
|
||||||
|
processor: tokio::sync::Mutex::new(p),
|
||||||
|
inner: Mutex::new(BatchQueueInner {
|
||||||
|
version: 0,
|
||||||
|
queue: BTreeMap::new(),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn call(&self, req: P::Req) -> P::Res {
|
||||||
|
let (id, mut rx) = self.inner.lock_propagate_poison().register_job(req);
|
||||||
|
let guard = scopeguard::guard(id, move |id| {
|
||||||
|
let mut inner = self.inner.lock_propagate_poison();
|
||||||
|
if inner.queue.remove(&id).is_some() {
|
||||||
|
tracing::debug!("batched task cancelled before completion");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let resp = loop {
|
||||||
|
// try become the leader, or try wait for success.
|
||||||
|
let mut processor = match futures::future::select(rx, pin!(self.processor.lock())).await
|
||||||
|
{
|
||||||
|
// we got the resp.
|
||||||
|
Either::Left((resp, _)) => break resp.ok(),
|
||||||
|
// we are the leader.
|
||||||
|
Either::Right((p, rx_)) => {
|
||||||
|
rx = rx_;
|
||||||
|
p
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let (reqs, resps) = self.inner.lock_propagate_poison().get_batch(&processor);
|
||||||
|
|
||||||
|
// apply a batch.
|
||||||
|
let values = processor.apply(reqs).await;
|
||||||
|
|
||||||
|
// send response values.
|
||||||
|
for (tx, value) in std::iter::zip(resps, values) {
|
||||||
|
// sender hung up but that's fine.
|
||||||
|
drop(tx.send(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
match rx.try_recv() {
|
||||||
|
Ok(resp) => break Some(resp),
|
||||||
|
Err(TryRecvError::Closed) => break None,
|
||||||
|
// edge case - there was a race condition where
|
||||||
|
// we became the leader but were not in the batch.
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
// thread 1: register job id=1
|
||||||
|
// thread 2: register job id=2
|
||||||
|
// thread 2: processor.lock().await
|
||||||
|
// thread 1: processor.lock().await
|
||||||
|
// thread 2: becomes leader, batch_size=1, jobs=[1].
|
||||||
|
Err(TryRecvError::Empty) => {}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// already removed.
|
||||||
|
ScopeGuard::into_inner(guard);
|
||||||
|
|
||||||
|
resp.expect("no response found. batch processer should not panic")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BatchQueueInner<P: QueueProcessing> {
|
||||||
|
version: u64,
|
||||||
|
queue: BTreeMap<u64, BatchJob<P>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<P: QueueProcessing> BatchQueueInner<P> {
|
||||||
|
fn register_job(&mut self, req: P::Req) -> (u64, tokio::sync::oneshot::Receiver<P::Res>) {
|
||||||
|
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||||
|
|
||||||
|
let id = self.version;
|
||||||
|
|
||||||
|
// Overflow concern:
|
||||||
|
// This is a u64, and we might enqueue 2^16 tasks per second.
|
||||||
|
// This gives us 2^48 seconds (9 million years).
|
||||||
|
// Even if this does overflow, it will not break, but some
|
||||||
|
// jobs with the higher version might never get prioritised.
|
||||||
|
self.version += 1;
|
||||||
|
|
||||||
|
self.queue.insert(id, BatchJob { req, res: tx });
|
||||||
|
|
||||||
|
(id, rx)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_batch(&mut self, p: &P) -> (Vec<P::Req>, Vec<tokio::sync::oneshot::Sender<P::Res>>) {
|
||||||
|
let batch_size = p.batch_size(self.queue.len());
|
||||||
|
let mut reqs = Vec::with_capacity(batch_size);
|
||||||
|
let mut resps = Vec::with_capacity(batch_size);
|
||||||
|
|
||||||
|
while reqs.len() < batch_size {
|
||||||
|
let Some((_, job)) = self.queue.pop_first() else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
reqs.push(job.req);
|
||||||
|
resps.push(job.res);
|
||||||
|
}
|
||||||
|
|
||||||
|
(reqs, resps)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -201,7 +201,7 @@ pub async fn run() -> anyhow::Result<()> {
|
|||||||
auth_backend,
|
auth_backend,
|
||||||
http_listener,
|
http_listener,
|
||||||
shutdown.clone(),
|
shutdown.clone(),
|
||||||
Arc::new(CancellationHandler::new(&config.connect_to_compute, None)),
|
Arc::new(CancellationHandler::new(&config.connect_to_compute)),
|
||||||
endpoint_rate_limiter,
|
endpoint_rate_limiter,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,8 @@ use utils::{project_build_tag, project_git_version};
|
|||||||
|
|
||||||
use crate::auth::backend::jwt::JwkCache;
|
use crate::auth::backend::jwt::JwkCache;
|
||||||
use crate::auth::backend::{ConsoleRedirectBackend, MaybeOwned};
|
use crate::auth::backend::{ConsoleRedirectBackend, MaybeOwned};
|
||||||
use crate::cancellation::{CancellationHandler, handle_cancel_messages};
|
use crate::batch::BatchQueue;
|
||||||
|
use crate::cancellation::{CancellationHandler, CancellationProcessor};
|
||||||
use crate::config::{
|
use crate::config::{
|
||||||
self, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig, ProjectInfoCacheOptions,
|
self, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig, ProjectInfoCacheOptions,
|
||||||
ProxyConfig, ProxyProtocolV2, remote_storage_from_toml,
|
ProxyConfig, ProxyProtocolV2, remote_storage_from_toml,
|
||||||
@@ -392,13 +393,7 @@ pub async fn run() -> anyhow::Result<()> {
|
|||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|redis_publisher| RedisKVClient::new(redis_publisher.clone(), redis_rps_limit));
|
.map(|redis_publisher| RedisKVClient::new(redis_publisher.clone(), redis_rps_limit));
|
||||||
|
|
||||||
// channel size should be higher than redis client limit to avoid blocking
|
let cancellation_handler = Arc::new(CancellationHandler::new(&config.connect_to_compute));
|
||||||
let cancel_ch_size = args.cancellation_ch_size;
|
|
||||||
let (tx_cancel, rx_cancel) = tokio::sync::mpsc::channel(cancel_ch_size);
|
|
||||||
let cancellation_handler = Arc::new(CancellationHandler::new(
|
|
||||||
&config.connect_to_compute,
|
|
||||||
Some(tx_cancel),
|
|
||||||
));
|
|
||||||
|
|
||||||
let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
|
let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
|
||||||
RateBucketInfo::to_leaky_bucket(&args.endpoint_rps_limit)
|
RateBucketInfo::to_leaky_bucket(&args.endpoint_rps_limit)
|
||||||
@@ -530,21 +525,11 @@ pub async fn run() -> anyhow::Result<()> {
|
|||||||
match redis_kv_client.try_connect().await {
|
match redis_kv_client.try_connect().await {
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
info!("Connected to Redis KV client");
|
info!("Connected to Redis KV client");
|
||||||
maintenance_tasks.spawn(async move {
|
cancellation_handler.init_tx(BatchQueue::new(CancellationProcessor {
|
||||||
handle_cancel_messages(
|
client: redis_kv_client,
|
||||||
&mut redis_kv_client,
|
batch_size: args.cancellation_batch_size,
|
||||||
rx_cancel,
|
}));
|
||||||
args.cancellation_batch_size,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
drop(redis_kv_client);
|
|
||||||
|
|
||||||
// `handle_cancel_messages` was terminated due to the tx_cancel
|
|
||||||
// being dropped. this is not worthy of an error, and this task can only return `Err`,
|
|
||||||
// so let's wait forever instead.
|
|
||||||
std::future::pending().await
|
|
||||||
});
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
|||||||
@@ -1,19 +1,23 @@
|
|||||||
|
use std::convert::Infallible;
|
||||||
use std::net::{IpAddr, SocketAddr};
|
use std::net::{IpAddr, SocketAddr};
|
||||||
use std::sync::Arc;
|
use std::sync::{Arc, OnceLock};
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use anyhow::{Context, anyhow};
|
use anyhow::anyhow;
|
||||||
|
use futures::FutureExt;
|
||||||
use ipnet::{IpNet, Ipv4Net, Ipv6Net};
|
use ipnet::{IpNet, Ipv4Net, Ipv6Net};
|
||||||
use postgres_client::CancelToken;
|
use postgres_client::RawCancelToken;
|
||||||
use postgres_client::tls::MakeTlsConnect;
|
use postgres_client::tls::MakeTlsConnect;
|
||||||
use redis::{Cmd, FromRedisValue, Value};
|
use redis::{Cmd, FromRedisValue, Value};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokio::net::TcpStream;
|
use tokio::net::TcpStream;
|
||||||
use tokio::sync::{mpsc, oneshot};
|
use tokio::time::timeout;
|
||||||
use tracing::{debug, error, info, warn};
|
use tracing::{debug, error, info};
|
||||||
|
|
||||||
use crate::auth::AuthError;
|
use crate::auth::AuthError;
|
||||||
use crate::auth::backend::ComputeUserInfo;
|
use crate::auth::backend::ComputeUserInfo;
|
||||||
|
use crate::batch::{BatchQueue, QueueProcessing};
|
||||||
use crate::config::ComputeConfig;
|
use crate::config::ComputeConfig;
|
||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
use crate::control_plane::ControlPlaneApi;
|
use crate::control_plane::ControlPlaneApi;
|
||||||
@@ -27,46 +31,36 @@ use crate::redis::kv_ops::RedisKVClient;
|
|||||||
|
|
||||||
type IpSubnetKey = IpNet;
|
type IpSubnetKey = IpNet;
|
||||||
|
|
||||||
const CANCEL_KEY_TTL: i64 = 1_209_600; // 2 weeks cancellation key expire time
|
const CANCEL_KEY_TTL: std::time::Duration = std::time::Duration::from_secs(600);
|
||||||
|
const CANCEL_KEY_REFRESH: std::time::Duration = std::time::Duration::from_secs(570);
|
||||||
|
|
||||||
// Message types for sending through mpsc channel
|
// Message types for sending through mpsc channel
|
||||||
pub enum CancelKeyOp {
|
pub enum CancelKeyOp {
|
||||||
StoreCancelKey {
|
StoreCancelKey {
|
||||||
key: String,
|
key: CancelKeyData,
|
||||||
field: String,
|
value: Box<str>,
|
||||||
value: String,
|
expire: std::time::Duration,
|
||||||
resp_tx: Option<oneshot::Sender<anyhow::Result<()>>>,
|
|
||||||
_guard: CancelChannelSizeGuard<'static>,
|
|
||||||
expire: i64, // TTL for key
|
|
||||||
},
|
},
|
||||||
GetCancelData {
|
GetCancelData {
|
||||||
key: String,
|
key: CancelKeyData,
|
||||||
resp_tx: oneshot::Sender<anyhow::Result<Vec<(String, String)>>>,
|
|
||||||
_guard: CancelChannelSizeGuard<'static>,
|
|
||||||
},
|
|
||||||
RemoveCancelKey {
|
|
||||||
key: String,
|
|
||||||
field: String,
|
|
||||||
resp_tx: Option<oneshot::Sender<anyhow::Result<()>>>,
|
|
||||||
_guard: CancelChannelSizeGuard<'static>,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Pipeline {
|
pub struct Pipeline {
|
||||||
inner: redis::Pipeline,
|
inner: redis::Pipeline,
|
||||||
replies: Vec<CancelReplyOp>,
|
replies: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Pipeline {
|
impl Pipeline {
|
||||||
fn with_capacity(n: usize) -> Self {
|
fn with_capacity(n: usize) -> Self {
|
||||||
Self {
|
Self {
|
||||||
inner: redis::Pipeline::with_capacity(n),
|
inner: redis::Pipeline::with_capacity(n),
|
||||||
replies: Vec::with_capacity(n),
|
replies: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn execute(&mut self, client: &mut RedisKVClient) {
|
async fn execute(self, client: &mut RedisKVClient) -> Vec<anyhow::Result<Value>> {
|
||||||
let responses = self.replies.len();
|
let responses = self.replies;
|
||||||
let batch_size = self.inner.len();
|
let batch_size = self.inner.len();
|
||||||
|
|
||||||
match client.query(&self.inner).await {
|
match client.query(&self.inner).await {
|
||||||
@@ -76,176 +70,73 @@ impl Pipeline {
|
|||||||
batch_size,
|
batch_size,
|
||||||
responses, "successfully completed cancellation jobs",
|
responses, "successfully completed cancellation jobs",
|
||||||
);
|
);
|
||||||
for (value, reply) in std::iter::zip(values, self.replies.drain(..)) {
|
values.into_iter().map(Ok).collect()
|
||||||
reply.send_value(value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(value) => {
|
Ok(value) => {
|
||||||
error!(batch_size, ?value, "unexpected redis return value");
|
error!(batch_size, ?value, "unexpected redis return value");
|
||||||
for reply in self.replies.drain(..) {
|
std::iter::repeat_with(|| Err(anyhow!("incorrect response type from redis")))
|
||||||
reply.send_err(anyhow!("incorrect response type from redis"));
|
.take(responses)
|
||||||
}
|
.collect()
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
for reply in self.replies.drain(..) {
|
std::iter::repeat_with(|| Err(anyhow!("could not send cmd to redis: {err}")))
|
||||||
reply.send_err(anyhow!("could not send cmd to redis: {err}"));
|
.take(responses)
|
||||||
}
|
.collect()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.inner.clear();
|
|
||||||
self.replies.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_command_with_reply(&mut self, cmd: Cmd, reply: CancelReplyOp) {
|
fn add_command_with_reply(&mut self, cmd: Cmd) {
|
||||||
self.inner.add_command(cmd);
|
self.inner.add_command(cmd);
|
||||||
self.replies.push(reply);
|
self.replies += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_command_no_reply(&mut self, cmd: Cmd) {
|
fn add_command_no_reply(&mut self, cmd: Cmd) {
|
||||||
self.inner.add_command(cmd).ignore();
|
self.inner.add_command(cmd).ignore();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_command(&mut self, cmd: Cmd, reply: Option<CancelReplyOp>) {
|
|
||||||
match reply {
|
|
||||||
Some(reply) => self.add_command_with_reply(cmd, reply),
|
|
||||||
None => self.add_command_no_reply(cmd),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CancelKeyOp {
|
impl CancelKeyOp {
|
||||||
fn register(self, pipe: &mut Pipeline) {
|
fn register(&self, pipe: &mut Pipeline) {
|
||||||
#[allow(clippy::used_underscore_binding)]
|
#[allow(clippy::used_underscore_binding)]
|
||||||
match self {
|
match self {
|
||||||
CancelKeyOp::StoreCancelKey {
|
CancelKeyOp::StoreCancelKey { key, value, expire } => {
|
||||||
key,
|
let key = KeyPrefix::Cancel(*key).build_redis_key();
|
||||||
field,
|
pipe.add_command_with_reply(Cmd::hset(&key, "data", &**value));
|
||||||
value,
|
pipe.add_command_no_reply(Cmd::expire(&key, expire.as_secs() as i64));
|
||||||
resp_tx,
|
|
||||||
_guard,
|
|
||||||
expire,
|
|
||||||
} => {
|
|
||||||
let reply =
|
|
||||||
resp_tx.map(|resp_tx| CancelReplyOp::StoreCancelKey { resp_tx, _guard });
|
|
||||||
pipe.add_command(Cmd::hset(&key, field, value), reply);
|
|
||||||
pipe.add_command_no_reply(Cmd::expire(key, expire));
|
|
||||||
}
|
}
|
||||||
CancelKeyOp::GetCancelData {
|
CancelKeyOp::GetCancelData { key } => {
|
||||||
key,
|
let key = KeyPrefix::Cancel(*key).build_redis_key();
|
||||||
resp_tx,
|
pipe.add_command_with_reply(Cmd::hget(key, "data"));
|
||||||
_guard,
|
|
||||||
} => {
|
|
||||||
let reply = CancelReplyOp::GetCancelData { resp_tx, _guard };
|
|
||||||
pipe.add_command_with_reply(Cmd::hgetall(key), reply);
|
|
||||||
}
|
|
||||||
CancelKeyOp::RemoveCancelKey {
|
|
||||||
key,
|
|
||||||
field,
|
|
||||||
resp_tx,
|
|
||||||
_guard,
|
|
||||||
} => {
|
|
||||||
let reply =
|
|
||||||
resp_tx.map(|resp_tx| CancelReplyOp::RemoveCancelKey { resp_tx, _guard });
|
|
||||||
pipe.add_command(Cmd::hdel(key, field), reply);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Message types for sending through mpsc channel
|
pub struct CancellationProcessor {
|
||||||
pub enum CancelReplyOp {
|
pub client: RedisKVClient,
|
||||||
StoreCancelKey {
|
pub batch_size: usize,
|
||||||
resp_tx: oneshot::Sender<anyhow::Result<()>>,
|
|
||||||
_guard: CancelChannelSizeGuard<'static>,
|
|
||||||
},
|
|
||||||
GetCancelData {
|
|
||||||
resp_tx: oneshot::Sender<anyhow::Result<Vec<(String, String)>>>,
|
|
||||||
_guard: CancelChannelSizeGuard<'static>,
|
|
||||||
},
|
|
||||||
RemoveCancelKey {
|
|
||||||
resp_tx: oneshot::Sender<anyhow::Result<()>>,
|
|
||||||
_guard: CancelChannelSizeGuard<'static>,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CancelReplyOp {
|
impl QueueProcessing for CancellationProcessor {
|
||||||
fn send_err(self, e: anyhow::Error) {
|
type Req = (CancelChannelSizeGuard<'static>, CancelKeyOp);
|
||||||
match self {
|
type Res = anyhow::Result<redis::Value>;
|
||||||
CancelReplyOp::StoreCancelKey { resp_tx, _guard } => {
|
|
||||||
resp_tx
|
fn batch_size(&self, _queue_size: usize) -> usize {
|
||||||
.send(Err(e))
|
self.batch_size
|
||||||
.inspect_err(|_| tracing::debug!("could not send reply"))
|
|
||||||
.ok();
|
|
||||||
}
|
|
||||||
CancelReplyOp::GetCancelData { resp_tx, _guard } => {
|
|
||||||
resp_tx
|
|
||||||
.send(Err(e))
|
|
||||||
.inspect_err(|_| tracing::debug!("could not send reply"))
|
|
||||||
.ok();
|
|
||||||
}
|
|
||||||
CancelReplyOp::RemoveCancelKey { resp_tx, _guard } => {
|
|
||||||
resp_tx
|
|
||||||
.send(Err(e))
|
|
||||||
.inspect_err(|_| tracing::debug!("could not send reply"))
|
|
||||||
.ok();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn send_value(self, v: redis::Value) {
|
async fn apply(&mut self, batch: Vec<Self::Req>) -> Vec<Self::Res> {
|
||||||
match self {
|
let mut pipeline = Pipeline::with_capacity(batch.len());
|
||||||
CancelReplyOp::StoreCancelKey { resp_tx, _guard } => {
|
|
||||||
let send =
|
|
||||||
FromRedisValue::from_owned_redis_value(v).context("could not parse value");
|
|
||||||
resp_tx
|
|
||||||
.send(send)
|
|
||||||
.inspect_err(|_| tracing::debug!("could not send reply"))
|
|
||||||
.ok();
|
|
||||||
}
|
|
||||||
CancelReplyOp::GetCancelData { resp_tx, _guard } => {
|
|
||||||
let send =
|
|
||||||
FromRedisValue::from_owned_redis_value(v).context("could not parse value");
|
|
||||||
resp_tx
|
|
||||||
.send(send)
|
|
||||||
.inspect_err(|_| tracing::debug!("could not send reply"))
|
|
||||||
.ok();
|
|
||||||
}
|
|
||||||
CancelReplyOp::RemoveCancelKey { resp_tx, _guard } => {
|
|
||||||
let send =
|
|
||||||
FromRedisValue::from_owned_redis_value(v).context("could not parse value");
|
|
||||||
resp_tx
|
|
||||||
.send(send)
|
|
||||||
.inspect_err(|_| tracing::debug!("could not send reply"))
|
|
||||||
.ok();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Running as a separate task to accept messages through the rx channel
|
|
||||||
pub async fn handle_cancel_messages(
|
|
||||||
client: &mut RedisKVClient,
|
|
||||||
mut rx: mpsc::Receiver<CancelKeyOp>,
|
|
||||||
batch_size: usize,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let mut batch = Vec::with_capacity(batch_size);
|
|
||||||
let mut pipeline = Pipeline::with_capacity(batch_size);
|
|
||||||
|
|
||||||
loop {
|
|
||||||
if rx.recv_many(&mut batch, batch_size).await == 0 {
|
|
||||||
warn!("shutting down cancellation queue");
|
|
||||||
break Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let batch_size = batch.len();
|
let batch_size = batch.len();
|
||||||
debug!(batch_size, "running cancellation jobs");
|
debug!(batch_size, "running cancellation jobs");
|
||||||
|
|
||||||
for msg in batch.drain(..) {
|
for (_, op) in &batch {
|
||||||
msg.register(&mut pipeline);
|
op.register(&mut pipeline);
|
||||||
}
|
}
|
||||||
|
|
||||||
pipeline.execute(client).await;
|
pipeline.execute(&mut self.client).await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -256,7 +147,7 @@ pub struct CancellationHandler {
|
|||||||
compute_config: &'static ComputeConfig,
|
compute_config: &'static ComputeConfig,
|
||||||
// rate limiter of cancellation requests
|
// rate limiter of cancellation requests
|
||||||
limiter: Arc<std::sync::Mutex<LeakyBucketRateLimiter<IpSubnetKey>>>,
|
limiter: Arc<std::sync::Mutex<LeakyBucketRateLimiter<IpSubnetKey>>>,
|
||||||
tx: Option<mpsc::Sender<CancelKeyOp>>, // send messages to the redis KV client task
|
tx: OnceLock<BatchQueue<CancellationProcessor>>, // send messages to the redis KV client task
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
@@ -296,13 +187,10 @@ impl ReportableError for CancelError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl CancellationHandler {
|
impl CancellationHandler {
|
||||||
pub fn new(
|
pub fn new(compute_config: &'static ComputeConfig) -> Self {
|
||||||
compute_config: &'static ComputeConfig,
|
|
||||||
tx: Option<mpsc::Sender<CancelKeyOp>>,
|
|
||||||
) -> Self {
|
|
||||||
Self {
|
Self {
|
||||||
compute_config,
|
compute_config,
|
||||||
tx,
|
tx: OnceLock::new(),
|
||||||
limiter: Arc::new(std::sync::Mutex::new(
|
limiter: Arc::new(std::sync::Mutex::new(
|
||||||
LeakyBucketRateLimiter::<IpSubnetKey>::new_with_shards(
|
LeakyBucketRateLimiter::<IpSubnetKey>::new_with_shards(
|
||||||
LeakyBucketRateLimiter::<IpSubnetKey>::DEFAULT,
|
LeakyBucketRateLimiter::<IpSubnetKey>::DEFAULT,
|
||||||
@@ -312,7 +200,14 @@ impl CancellationHandler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_key(self: &Arc<Self>) -> Session {
|
pub fn init_tx(&self, queue: BatchQueue<CancellationProcessor>) {
|
||||||
|
self.tx
|
||||||
|
.set(queue)
|
||||||
|
.map_err(|_| {})
|
||||||
|
.expect("cancellation queue should be registered once");
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_key(self: Arc<Self>) -> Session {
|
||||||
// we intentionally generate a random "backend pid" and "secret key" here.
|
// we intentionally generate a random "backend pid" and "secret key" here.
|
||||||
// we use the corresponding u64 as an identifier for the
|
// we use the corresponding u64 as an identifier for the
|
||||||
// actual endpoint+pid+secret for postgres/pgbouncer.
|
// actual endpoint+pid+secret for postgres/pgbouncer.
|
||||||
@@ -322,14 +217,10 @@ impl CancellationHandler {
|
|||||||
|
|
||||||
let key: CancelKeyData = rand::random();
|
let key: CancelKeyData = rand::random();
|
||||||
|
|
||||||
let prefix_key: KeyPrefix = KeyPrefix::Cancel(key);
|
|
||||||
let redis_key = prefix_key.build_redis_key();
|
|
||||||
|
|
||||||
debug!("registered new query cancellation key {key}");
|
debug!("registered new query cancellation key {key}");
|
||||||
Session {
|
Session {
|
||||||
key,
|
key,
|
||||||
redis_key,
|
cancellation_handler: self,
|
||||||
cancellation_handler: Arc::clone(self),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -337,62 +228,43 @@ impl CancellationHandler {
|
|||||||
&self,
|
&self,
|
||||||
key: CancelKeyData,
|
key: CancelKeyData,
|
||||||
) -> Result<Option<CancelClosure>, CancelError> {
|
) -> Result<Option<CancelClosure>, CancelError> {
|
||||||
let prefix_key: KeyPrefix = KeyPrefix::Cancel(key);
|
let guard = Metrics::get()
|
||||||
let redis_key = prefix_key.build_redis_key();
|
.proxy
|
||||||
|
.cancel_channel_size
|
||||||
|
.guard(RedisMsgKind::HGet);
|
||||||
|
let op = CancelKeyOp::GetCancelData { key };
|
||||||
|
|
||||||
let (resp_tx, resp_rx) = tokio::sync::oneshot::channel();
|
let Some(tx) = self.tx.get() else {
|
||||||
let op = CancelKeyOp::GetCancelData {
|
|
||||||
key: redis_key,
|
|
||||||
resp_tx,
|
|
||||||
_guard: Metrics::get()
|
|
||||||
.proxy
|
|
||||||
.cancel_channel_size
|
|
||||||
.guard(RedisMsgKind::HGetAll),
|
|
||||||
};
|
|
||||||
|
|
||||||
let Some(tx) = &self.tx else {
|
|
||||||
tracing::warn!("cancellation handler is not available");
|
tracing::warn!("cancellation handler is not available");
|
||||||
return Err(CancelError::InternalError);
|
return Err(CancelError::InternalError);
|
||||||
};
|
};
|
||||||
|
|
||||||
tx.try_send(op)
|
const TIMEOUT: Duration = Duration::from_secs(5);
|
||||||
|
let result = timeout(TIMEOUT, tx.call((guard, op)))
|
||||||
|
.await
|
||||||
|
.map_err(|_| {
|
||||||
|
tracing::warn!("timed out waiting to receive GetCancelData response");
|
||||||
|
CancelError::RateLimit
|
||||||
|
})?
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
tracing::warn!("failed to send GetCancelData for {key}: {e}");
|
tracing::warn!("failed to receive GetCancelData response: {e}");
|
||||||
})
|
CancelError::InternalError
|
||||||
.map_err(|()| CancelError::InternalError)?;
|
})?;
|
||||||
|
|
||||||
let result = resp_rx.await.map_err(|e| {
|
let cancel_state_str = String::from_owned_redis_value(result).map_err(|e| {
|
||||||
tracing::warn!("failed to receive GetCancelData response: {e}");
|
tracing::warn!("failed to receive GetCancelData response: {e}");
|
||||||
CancelError::InternalError
|
CancelError::InternalError
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let cancel_state_str: Option<String> = match result {
|
let cancel_closure: CancelClosure =
|
||||||
Ok(mut state) => {
|
serde_json::from_str(&cancel_state_str).map_err(|e| {
|
||||||
if state.len() == 1 {
|
tracing::warn!("failed to deserialize cancel state: {e}");
|
||||||
Some(state.remove(0).1)
|
CancelError::InternalError
|
||||||
} else {
|
})?;
|
||||||
tracing::warn!("unexpected number of entries in cancel state: {state:?}");
|
|
||||||
return Err(CancelError::InternalError);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
tracing::warn!("failed to receive cancel state from redis: {e}");
|
|
||||||
return Err(CancelError::InternalError);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let cancel_state: Option<CancelClosure> = match cancel_state_str {
|
Ok(Some(cancel_closure))
|
||||||
Some(state) => {
|
|
||||||
let cancel_closure: CancelClosure = serde_json::from_str(&state).map_err(|e| {
|
|
||||||
tracing::warn!("failed to deserialize cancel state: {e}");
|
|
||||||
CancelError::InternalError
|
|
||||||
})?;
|
|
||||||
Some(cancel_closure)
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
Ok(cancel_state)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Try to cancel a running query for the corresponding connection.
|
/// Try to cancel a running query for the corresponding connection.
|
||||||
/// If the cancellation key is not found, it will be published to Redis.
|
/// If the cancellation key is not found, it will be published to Redis.
|
||||||
/// check_allowed - if true, check if the IP is allowed to cancel the query.
|
/// check_allowed - if true, check if the IP is allowed to cancel the query.
|
||||||
@@ -467,10 +339,10 @@ impl CancellationHandler {
|
|||||||
/// This should've been a [`std::future::Future`], but
|
/// This should've been a [`std::future::Future`], but
|
||||||
/// it's impossible to name a type of an unboxed future
|
/// it's impossible to name a type of an unboxed future
|
||||||
/// (we'd need something like `#![feature(type_alias_impl_trait)]`).
|
/// (we'd need something like `#![feature(type_alias_impl_trait)]`).
|
||||||
#[derive(Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct CancelClosure {
|
pub struct CancelClosure {
|
||||||
socket_addr: SocketAddr,
|
socket_addr: SocketAddr,
|
||||||
cancel_token: CancelToken,
|
cancel_token: RawCancelToken,
|
||||||
hostname: String, // for pg_sni router
|
hostname: String, // for pg_sni router
|
||||||
user_info: ComputeUserInfo,
|
user_info: ComputeUserInfo,
|
||||||
}
|
}
|
||||||
@@ -478,7 +350,7 @@ pub struct CancelClosure {
|
|||||||
impl CancelClosure {
|
impl CancelClosure {
|
||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
socket_addr: SocketAddr,
|
socket_addr: SocketAddr,
|
||||||
cancel_token: CancelToken,
|
cancel_token: RawCancelToken,
|
||||||
hostname: String,
|
hostname: String,
|
||||||
user_info: ComputeUserInfo,
|
user_info: ComputeUserInfo,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
@@ -491,7 +363,7 @@ impl CancelClosure {
|
|||||||
}
|
}
|
||||||
/// Cancels the query running on user's compute node.
|
/// Cancels the query running on user's compute node.
|
||||||
pub(crate) async fn try_cancel_query(
|
pub(crate) async fn try_cancel_query(
|
||||||
self,
|
&self,
|
||||||
compute_config: &ComputeConfig,
|
compute_config: &ComputeConfig,
|
||||||
) -> Result<(), CancelError> {
|
) -> Result<(), CancelError> {
|
||||||
let socket = TcpStream::connect(self.socket_addr).await?;
|
let socket = TcpStream::connect(self.socket_addr).await?;
|
||||||
@@ -512,7 +384,6 @@ impl CancelClosure {
|
|||||||
pub(crate) struct Session {
|
pub(crate) struct Session {
|
||||||
/// The user-facing key identifying this session.
|
/// The user-facing key identifying this session.
|
||||||
key: CancelKeyData,
|
key: CancelKeyData,
|
||||||
redis_key: String,
|
|
||||||
cancellation_handler: Arc<CancellationHandler>,
|
cancellation_handler: Arc<CancellationHandler>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -521,60 +392,66 @@ impl Session {
|
|||||||
&self.key
|
&self.key
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send the store key op to the cancellation handler and set TTL for the key
|
/// Ensure the cancel key is continously refreshed,
|
||||||
pub(crate) fn write_cancel_key(
|
/// but stop when the channel is dropped.
|
||||||
|
pub(crate) async fn maintain_cancel_key(
|
||||||
&self,
|
&self,
|
||||||
cancel_closure: CancelClosure,
|
session_id: uuid::Uuid,
|
||||||
) -> Result<(), CancelError> {
|
cancel: tokio::sync::oneshot::Receiver<Infallible>,
|
||||||
let Some(tx) = &self.cancellation_handler.tx else {
|
cancel_closure: &CancelClosure,
|
||||||
tracing::warn!("cancellation handler is not available");
|
compute_config: &ComputeConfig,
|
||||||
return Err(CancelError::InternalError);
|
) {
|
||||||
};
|
futures::future::select(
|
||||||
|
std::pin::pin!(self.maintain_redis_cancel_key(cancel_closure)),
|
||||||
|
cancel,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
let closure_json = serde_json::to_string(&cancel_closure).map_err(|e| {
|
if let Err(err) = cancel_closure
|
||||||
tracing::warn!("failed to serialize cancel closure: {e}");
|
.try_cancel_query(compute_config)
|
||||||
CancelError::InternalError
|
.boxed()
|
||||||
})?;
|
.await
|
||||||
|
{
|
||||||
let op = CancelKeyOp::StoreCancelKey {
|
tracing::warn!(
|
||||||
key: self.redis_key.clone(),
|
?session_id,
|
||||||
field: "data".to_string(),
|
?err,
|
||||||
value: closure_json,
|
"could not cancel the query in the database"
|
||||||
resp_tx: None,
|
);
|
||||||
_guard: Metrics::get()
|
}
|
||||||
.proxy
|
|
||||||
.cancel_channel_size
|
|
||||||
.guard(RedisMsgKind::HSet),
|
|
||||||
expire: CANCEL_KEY_TTL,
|
|
||||||
};
|
|
||||||
|
|
||||||
let _ = tx.try_send(op).map_err(|e| {
|
|
||||||
let key = self.key;
|
|
||||||
tracing::warn!("failed to send StoreCancelKey for {key}: {e}");
|
|
||||||
});
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn remove_cancel_key(&self) -> Result<(), CancelError> {
|
// Ensure the cancel key is continously refreshed.
|
||||||
let Some(tx) = &self.cancellation_handler.tx else {
|
async fn maintain_redis_cancel_key(&self, cancel_closure: &CancelClosure) -> ! {
|
||||||
|
let Some(tx) = self.cancellation_handler.tx.get() else {
|
||||||
tracing::warn!("cancellation handler is not available");
|
tracing::warn!("cancellation handler is not available");
|
||||||
return Err(CancelError::InternalError);
|
// don't exit, as we only want to exit if cancelled externally.
|
||||||
|
std::future::pending().await
|
||||||
};
|
};
|
||||||
|
|
||||||
let op = CancelKeyOp::RemoveCancelKey {
|
let closure_json = serde_json::to_string(&cancel_closure)
|
||||||
key: self.redis_key.clone(),
|
.expect("serialising to json string should not fail")
|
||||||
field: "data".to_string(),
|
.into_boxed_str();
|
||||||
resp_tx: None,
|
|
||||||
_guard: Metrics::get()
|
loop {
|
||||||
|
let guard = Metrics::get()
|
||||||
.proxy
|
.proxy
|
||||||
.cancel_channel_size
|
.cancel_channel_size
|
||||||
.guard(RedisMsgKind::HDel),
|
.guard(RedisMsgKind::HSet);
|
||||||
};
|
let op = CancelKeyOp::StoreCancelKey {
|
||||||
|
key: self.key,
|
||||||
|
value: closure_json.clone(),
|
||||||
|
expire: CANCEL_KEY_TTL,
|
||||||
|
};
|
||||||
|
|
||||||
let _ = tx.try_send(op).map_err(|e| {
|
tracing::debug!(
|
||||||
let key = self.key;
|
src=%self.key,
|
||||||
tracing::warn!("failed to send RemoveCancelKey for {key}: {e}");
|
dest=?cancel_closure.cancel_token,
|
||||||
});
|
"registering cancellation key"
|
||||||
Ok(())
|
);
|
||||||
|
|
||||||
|
if tx.call((guard, op)).await.is_ok() {
|
||||||
|
tokio::time::sleep(CANCEL_KEY_REFRESH).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ use itertools::Itertools;
|
|||||||
use postgres_client::config::{AuthKeys, SslMode};
|
use postgres_client::config::{AuthKeys, SslMode};
|
||||||
use postgres_client::maybe_tls_stream::MaybeTlsStream;
|
use postgres_client::maybe_tls_stream::MaybeTlsStream;
|
||||||
use postgres_client::tls::MakeTlsConnect;
|
use postgres_client::tls::MakeTlsConnect;
|
||||||
use postgres_client::{CancelToken, NoTls, RawConnection};
|
use postgres_client::{NoTls, RawCancelToken, RawConnection};
|
||||||
use postgres_protocol::message::backend::NoticeResponseBody;
|
use postgres_protocol::message::backend::NoticeResponseBody;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokio::net::{TcpStream, lookup_host};
|
use tokio::net::{TcpStream, lookup_host};
|
||||||
@@ -265,7 +265,8 @@ impl ConnectInfo {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type RustlsStream = <ComputeConfig as MakeTlsConnect<tokio::net::TcpStream>>::Stream;
|
pub type RustlsStream = <ComputeConfig as MakeTlsConnect<tokio::net::TcpStream>>::Stream;
|
||||||
|
pub type MaybeRustlsStream = MaybeTlsStream<tokio::net::TcpStream, RustlsStream>;
|
||||||
|
|
||||||
pub(crate) struct PostgresConnection {
|
pub(crate) struct PostgresConnection {
|
||||||
/// Socket connected to a compute node.
|
/// Socket connected to a compute node.
|
||||||
@@ -279,7 +280,7 @@ pub(crate) struct PostgresConnection {
|
|||||||
/// Notices received from compute after authenticating
|
/// Notices received from compute after authenticating
|
||||||
pub(crate) delayed_notice: Vec<NoticeResponseBody>,
|
pub(crate) delayed_notice: Vec<NoticeResponseBody>,
|
||||||
|
|
||||||
_guage: NumDbConnectionsGuard<'static>,
|
pub(crate) guage: NumDbConnectionsGuard<'static>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ConnectInfo {
|
impl ConnectInfo {
|
||||||
@@ -327,8 +328,7 @@ impl ConnectInfo {
|
|||||||
// Yet another reason to rework the connection establishing code.
|
// Yet another reason to rework the connection establishing code.
|
||||||
let cancel_closure = CancelClosure::new(
|
let cancel_closure = CancelClosure::new(
|
||||||
socket_addr,
|
socket_addr,
|
||||||
CancelToken {
|
RawCancelToken {
|
||||||
socket_config: None,
|
|
||||||
ssl_mode: self.ssl_mode,
|
ssl_mode: self.ssl_mode,
|
||||||
process_id,
|
process_id,
|
||||||
secret_key,
|
secret_key,
|
||||||
@@ -343,7 +343,7 @@ impl ConnectInfo {
|
|||||||
delayed_notice,
|
delayed_notice,
|
||||||
cancel_closure,
|
cancel_closure,
|
||||||
aux,
|
aux,
|
||||||
_guage: Metrics::get().proxy.db_connections.guard(ctx.protocol()),
|
guage: Metrics::get().proxy.db_connections.guard(ctx.protocol()),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(connection)
|
Ok(connection)
|
||||||
|
|||||||
@@ -120,7 +120,7 @@ pub async fn task_main(
|
|||||||
Ok(Some(p)) => {
|
Ok(Some(p)) => {
|
||||||
ctx.set_success();
|
ctx.set_success();
|
||||||
let _disconnect = ctx.log_connect();
|
let _disconnect = ctx.log_connect();
|
||||||
match p.proxy_pass(&config.connect_to_compute).await {
|
match p.proxy_pass().await {
|
||||||
Ok(()) => {}
|
Ok(()) => {}
|
||||||
Err(ErrorSource::Client(e)) => {
|
Err(ErrorSource::Client(e)) => {
|
||||||
error!(
|
error!(
|
||||||
@@ -232,22 +232,35 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
|||||||
.or_else(|e| async { Err(stream.throw_error(e, Some(ctx)).await) })
|
.or_else(|e| async { Err(stream.throw_error(e, Some(ctx)).await) })
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
let session = cancellation_handler.get_key();
|
||||||
let session = cancellation_handler_clone.get_key();
|
|
||||||
|
|
||||||
session.write_cancel_key(node.cancel_closure.clone())?;
|
|
||||||
|
|
||||||
prepare_client_connection(&node, *session.key(), &mut stream);
|
prepare_client_connection(&node, *session.key(), &mut stream);
|
||||||
let stream = stream.flush_and_into_inner().await?;
|
let stream = stream.flush_and_into_inner().await?;
|
||||||
|
|
||||||
|
let session_id = ctx.session_id();
|
||||||
|
let (cancel_on_shutdown, cancel) = tokio::sync::oneshot::channel();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
session
|
||||||
|
.maintain_cancel_key(
|
||||||
|
session_id,
|
||||||
|
cancel,
|
||||||
|
&node.cancel_closure,
|
||||||
|
&config.connect_to_compute,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
});
|
||||||
|
|
||||||
Ok(Some(ProxyPassthrough {
|
Ok(Some(ProxyPassthrough {
|
||||||
client: stream,
|
client: stream,
|
||||||
aux: node.aux.clone(),
|
compute: node.stream,
|
||||||
|
|
||||||
|
aux: node.aux,
|
||||||
private_link_id: None,
|
private_link_id: None,
|
||||||
compute: node,
|
|
||||||
session_id: ctx.session_id(),
|
_cancel_on_shutdown: cancel_on_shutdown,
|
||||||
cancel: session,
|
|
||||||
_req: request_gauge,
|
_req: request_gauge,
|
||||||
_conn: conn_gauge,
|
_conn: conn_gauge,
|
||||||
|
_db_conn: node.guage,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -75,6 +75,7 @@
|
|||||||
pub mod binary;
|
pub mod binary;
|
||||||
|
|
||||||
mod auth;
|
mod auth;
|
||||||
|
mod batch;
|
||||||
mod cache;
|
mod cache;
|
||||||
mod cancellation;
|
mod cancellation;
|
||||||
mod compute;
|
mod compute;
|
||||||
|
|||||||
@@ -1,15 +1,17 @@
|
|||||||
use futures::FutureExt;
|
use std::convert::Infallible;
|
||||||
|
|
||||||
use smol_str::SmolStr;
|
use smol_str::SmolStr;
|
||||||
use tokio::io::{AsyncRead, AsyncWrite};
|
use tokio::io::{AsyncRead, AsyncWrite};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
use utils::measured_stream::MeasuredStream;
|
use utils::measured_stream::MeasuredStream;
|
||||||
|
|
||||||
use super::copy_bidirectional::ErrorSource;
|
use super::copy_bidirectional::ErrorSource;
|
||||||
use crate::cancellation;
|
use crate::compute::MaybeRustlsStream;
|
||||||
use crate::compute::PostgresConnection;
|
|
||||||
use crate::config::ComputeConfig;
|
|
||||||
use crate::control_plane::messages::MetricsAuxInfo;
|
use crate::control_plane::messages::MetricsAuxInfo;
|
||||||
use crate::metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard};
|
use crate::metrics::{
|
||||||
|
Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard,
|
||||||
|
NumDbConnectionsGuard,
|
||||||
|
};
|
||||||
use crate::stream::Stream;
|
use crate::stream::Stream;
|
||||||
use crate::usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS};
|
use crate::usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS};
|
||||||
|
|
||||||
@@ -64,40 +66,20 @@ pub(crate) async fn proxy_pass(
|
|||||||
|
|
||||||
pub(crate) struct ProxyPassthrough<S> {
|
pub(crate) struct ProxyPassthrough<S> {
|
||||||
pub(crate) client: Stream<S>,
|
pub(crate) client: Stream<S>,
|
||||||
pub(crate) compute: PostgresConnection,
|
pub(crate) compute: MaybeRustlsStream,
|
||||||
|
|
||||||
pub(crate) aux: MetricsAuxInfo,
|
pub(crate) aux: MetricsAuxInfo,
|
||||||
pub(crate) session_id: uuid::Uuid,
|
|
||||||
pub(crate) private_link_id: Option<SmolStr>,
|
pub(crate) private_link_id: Option<SmolStr>,
|
||||||
pub(crate) cancel: cancellation::Session,
|
|
||||||
|
pub(crate) _cancel_on_shutdown: tokio::sync::oneshot::Sender<Infallible>,
|
||||||
|
|
||||||
pub(crate) _req: NumConnectionRequestsGuard<'static>,
|
pub(crate) _req: NumConnectionRequestsGuard<'static>,
|
||||||
pub(crate) _conn: NumClientConnectionsGuard<'static>,
|
pub(crate) _conn: NumClientConnectionsGuard<'static>,
|
||||||
|
pub(crate) _db_conn: NumDbConnectionsGuard<'static>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {
|
impl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {
|
||||||
pub(crate) async fn proxy_pass(
|
pub(crate) async fn proxy_pass(self) -> Result<(), ErrorSource> {
|
||||||
self,
|
proxy_pass(self.client, self.compute, self.aux, self.private_link_id).await
|
||||||
compute_config: &ComputeConfig,
|
|
||||||
) -> Result<(), ErrorSource> {
|
|
||||||
let res = proxy_pass(
|
|
||||||
self.client,
|
|
||||||
self.compute.stream,
|
|
||||||
self.aux,
|
|
||||||
self.private_link_id,
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
if let Err(err) = self
|
|
||||||
.compute
|
|
||||||
.cancel_closure
|
|
||||||
.try_cancel_query(compute_config)
|
|
||||||
.boxed()
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
tracing::warn!(session_id = ?self.session_id, ?err, "could not cancel the query in the database");
|
|
||||||
}
|
|
||||||
|
|
||||||
drop(self.cancel.remove_cancel_key()); // we don't need a result. If the queue is full, we just log the error
|
|
||||||
|
|
||||||
res
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ pub async fn task_main(
|
|||||||
Ok(Some(p)) => {
|
Ok(Some(p)) => {
|
||||||
ctx.set_success();
|
ctx.set_success();
|
||||||
let _disconnect = ctx.log_connect();
|
let _disconnect = ctx.log_connect();
|
||||||
match p.proxy_pass(&config.connect_to_compute).await {
|
match p.proxy_pass().await {
|
||||||
Ok(()) => {}
|
Ok(()) => {}
|
||||||
Err(ErrorSource::Client(e)) => {
|
Err(ErrorSource::Client(e)) => {
|
||||||
warn!(
|
warn!(
|
||||||
@@ -372,13 +372,24 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
|||||||
Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
|
Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
let session = cancellation_handler.get_key();
|
||||||
let session = cancellation_handler_clone.get_key();
|
|
||||||
|
|
||||||
session.write_cancel_key(node.cancel_closure.clone())?;
|
|
||||||
prepare_client_connection(&node, *session.key(), &mut stream);
|
prepare_client_connection(&node, *session.key(), &mut stream);
|
||||||
let stream = stream.flush_and_into_inner().await?;
|
let stream = stream.flush_and_into_inner().await?;
|
||||||
|
|
||||||
|
let session_id = ctx.session_id();
|
||||||
|
let (cancel_on_shutdown, cancel) = tokio::sync::oneshot::channel();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
session
|
||||||
|
.maintain_cancel_key(
|
||||||
|
session_id,
|
||||||
|
cancel,
|
||||||
|
&node.cancel_closure,
|
||||||
|
&config.connect_to_compute,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
});
|
||||||
|
|
||||||
let private_link_id = match ctx.extra() {
|
let private_link_id = match ctx.extra() {
|
||||||
Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
|
Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
|
||||||
Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
|
Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
|
||||||
@@ -387,13 +398,16 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
|||||||
|
|
||||||
Ok(Some(ProxyPassthrough {
|
Ok(Some(ProxyPassthrough {
|
||||||
client: stream,
|
client: stream,
|
||||||
aux: node.aux.clone(),
|
compute: node.stream,
|
||||||
|
|
||||||
|
aux: node.aux,
|
||||||
private_link_id,
|
private_link_id,
|
||||||
compute: node,
|
|
||||||
session_id: ctx.session_id(),
|
_cancel_on_shutdown: cancel_on_shutdown,
|
||||||
cancel: session,
|
|
||||||
_req: request_gauge,
|
_req: request_gauge,
|
||||||
_conn: conn_gauge,
|
_conn: conn_gauge,
|
||||||
|
_db_conn: node.guage,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,4 @@
|
|||||||
use std::io::ErrorKind;
|
use crate::pqproto::CancelKeyData;
|
||||||
|
|
||||||
use anyhow::Ok;
|
|
||||||
|
|
||||||
use crate::pqproto::{CancelKeyData, id_to_cancel_key};
|
|
||||||
|
|
||||||
pub mod keyspace {
|
pub mod keyspace {
|
||||||
pub const CANCEL_PREFIX: &str = "cancel";
|
pub const CANCEL_PREFIX: &str = "cancel";
|
||||||
@@ -23,39 +19,12 @@ impl KeyPrefix {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub(crate) fn as_str(&self) -> &'static str {
|
|
||||||
match self {
|
|
||||||
KeyPrefix::Cancel(_) => keyspace::CANCEL_PREFIX,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub(crate) fn parse_redis_key(key: &str) -> anyhow::Result<KeyPrefix> {
|
|
||||||
let (prefix, key_str) = key.split_once(':').ok_or_else(|| {
|
|
||||||
anyhow::anyhow!(std::io::Error::new(
|
|
||||||
ErrorKind::InvalidData,
|
|
||||||
"missing prefix"
|
|
||||||
))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
match prefix {
|
|
||||||
keyspace::CANCEL_PREFIX => {
|
|
||||||
let id = u64::from_str_radix(key_str, 16)?;
|
|
||||||
|
|
||||||
Ok(KeyPrefix::Cancel(id_to_cancel_key(id)))
|
|
||||||
}
|
|
||||||
_ => Err(anyhow::anyhow!(std::io::Error::new(
|
|
||||||
ErrorKind::InvalidData,
|
|
||||||
"unknown prefix"
|
|
||||||
))),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use crate::pqproto::id_to_cancel_key;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -65,16 +34,4 @@ mod tests {
|
|||||||
let redis_key = cancel_key.build_redis_key();
|
let redis_key = cancel_key.build_redis_key();
|
||||||
assert_eq!(redis_key, "cancel:30390000d431");
|
assert_eq!(redis_key, "cancel:30390000d431");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_redis_key() {
|
|
||||||
let redis_key = "cancel:30390000d431";
|
|
||||||
let key: KeyPrefix = parse_redis_key(redis_key).expect("Failed to parse key");
|
|
||||||
|
|
||||||
let ref_key = id_to_cancel_key(12345 << 32 | 54321);
|
|
||||||
|
|
||||||
assert_eq!(key.as_str(), KeyPrefix::Cancel(ref_key).as_str());
|
|
||||||
let KeyPrefix::Cancel(cancel_key) = key;
|
|
||||||
assert_eq!(ref_key, cancel_key);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use futures::FutureExt;
|
||||||
use redis::aio::ConnectionLike;
|
use redis::aio::ConnectionLike;
|
||||||
use redis::{Cmd, FromRedisValue, Pipeline, RedisResult};
|
use redis::{Cmd, FromRedisValue, Pipeline, RedisResult};
|
||||||
|
|
||||||
@@ -35,14 +38,11 @@ impl RedisKVClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn try_connect(&mut self) -> anyhow::Result<()> {
|
pub async fn try_connect(&mut self) -> anyhow::Result<()> {
|
||||||
match self.client.connect().await {
|
self.client
|
||||||
Ok(()) => {}
|
.connect()
|
||||||
Err(e) => {
|
.boxed()
|
||||||
tracing::error!("failed to connect to redis: {e}");
|
.await
|
||||||
return Err(e);
|
.inspect_err(|e| tracing::error!("failed to connect to redis: {e}"))
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn query<T: FromRedisValue>(
|
pub(crate) async fn query<T: FromRedisValue>(
|
||||||
@@ -54,15 +54,25 @@ impl RedisKVClient {
|
|||||||
return Err(anyhow::anyhow!("Rate limit exceeded"));
|
return Err(anyhow::anyhow!("Rate limit exceeded"));
|
||||||
}
|
}
|
||||||
|
|
||||||
match q.query(&mut self.client).await {
|
let e = match q.query(&mut self.client).await {
|
||||||
Ok(t) => return Ok(t),
|
Ok(t) => return Ok(t),
|
||||||
Err(e) => {
|
Err(e) => e,
|
||||||
tracing::error!("failed to run query: {e}");
|
};
|
||||||
|
|
||||||
|
tracing::error!("failed to run query: {e}");
|
||||||
|
match e.retry_method() {
|
||||||
|
redis::RetryMethod::Reconnect => {
|
||||||
|
tracing::info!("Redis client is disconnected. Reconnecting...");
|
||||||
|
self.try_connect().await?;
|
||||||
}
|
}
|
||||||
|
redis::RetryMethod::RetryImmediately => {}
|
||||||
|
redis::RetryMethod::WaitAndRetry => {
|
||||||
|
// somewhat arbitrary.
|
||||||
|
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||||
|
}
|
||||||
|
_ => Err(e)?,
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::info!("Redis client is disconnected. Reconnecting...");
|
|
||||||
self.try_connect().await?;
|
|
||||||
Ok(q.query(&mut self.client).await?)
|
Ok(q.query(&mut self.client).await?)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -167,7 +167,7 @@ pub(crate) async fn serve_websocket(
|
|||||||
Ok(Some(p)) => {
|
Ok(Some(p)) => {
|
||||||
ctx.set_success();
|
ctx.set_success();
|
||||||
ctx.log_connect();
|
ctx.log_connect();
|
||||||
match p.proxy_pass(&config.connect_to_compute).await {
|
match p.proxy_pass().await {
|
||||||
Ok(()) => Ok(()),
|
Ok(()) => Ok(()),
|
||||||
Err(ErrorSource::Client(err)) => Err(err).context("client"),
|
Err(ErrorSource::Client(err)) => Err(err).context("client"),
|
||||||
Err(ErrorSource::Compute(err)) => Err(err).context("compute"),
|
Err(ErrorSource::Compute(err)) => Err(err).context("compute"),
|
||||||
|
|||||||
@@ -5,6 +5,9 @@ use std::time::Duration;
|
|||||||
|
|
||||||
use anyhow::{Context, anyhow};
|
use anyhow::{Context, anyhow};
|
||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
|
|
||||||
|
#[cfg(feature = "testing")]
|
||||||
|
use clap::ArgAction;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use futures::future::OptionFuture;
|
use futures::future::OptionFuture;
|
||||||
use http_utils::tls_certs::ReloadingCertificateResolver;
|
use http_utils::tls_certs::ReloadingCertificateResolver;
|
||||||
@@ -207,6 +210,19 @@ struct Cli {
|
|||||||
/// the compute notification directly (instead of via control plane).
|
/// the compute notification directly (instead of via control plane).
|
||||||
#[arg(long, default_value = "false")]
|
#[arg(long, default_value = "false")]
|
||||||
use_local_compute_notifications: bool,
|
use_local_compute_notifications: bool,
|
||||||
|
|
||||||
|
/// Number of safekeepers to choose for a timeline when creating it.
|
||||||
|
/// Safekeepers will be choosen from different availability zones.
|
||||||
|
/// This option exists primarily for testing purposes.
|
||||||
|
#[arg(long, default_value = "3", value_parser = clap::value_parser!(i64).range(1..))]
|
||||||
|
timeline_safekeeper_count: i64,
|
||||||
|
|
||||||
|
/// When set, actively checks and initiates heatmap downloads/uploads during reconciliation.
|
||||||
|
/// This speed up migrations by avoiding the default wait for the heatmap download interval.
|
||||||
|
/// Primarily useful for testing to reduce test execution time.
|
||||||
|
#[cfg(feature = "testing")]
|
||||||
|
#[arg(long, default_value = "true", action=ArgAction::Set)]
|
||||||
|
kick_secondary_downloads: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum StrictMode {
|
enum StrictMode {
|
||||||
@@ -371,6 +387,11 @@ async fn async_main() -> anyhow::Result<()> {
|
|||||||
StrictMode::Strict if args.use_local_compute_notifications => {
|
StrictMode::Strict if args.use_local_compute_notifications => {
|
||||||
anyhow::bail!("`--use-local-compute-notifications` is only permitted in `--dev` mode");
|
anyhow::bail!("`--use-local-compute-notifications` is only permitted in `--dev` mode");
|
||||||
}
|
}
|
||||||
|
StrictMode::Strict if args.timeline_safekeeper_count < 3 => {
|
||||||
|
anyhow::bail!(
|
||||||
|
"Running with less than 3 safekeepers per timeline is only permitted in `--dev` mode"
|
||||||
|
);
|
||||||
|
}
|
||||||
StrictMode::Strict => {
|
StrictMode::Strict => {
|
||||||
tracing::info!("Starting in strict mode: configuration is OK.")
|
tracing::info!("Starting in strict mode: configuration is OK.")
|
||||||
}
|
}
|
||||||
@@ -433,6 +454,9 @@ async fn async_main() -> anyhow::Result<()> {
|
|||||||
ssl_ca_certs,
|
ssl_ca_certs,
|
||||||
timelines_onto_safekeepers: args.timelines_onto_safekeepers,
|
timelines_onto_safekeepers: args.timelines_onto_safekeepers,
|
||||||
use_local_compute_notifications: args.use_local_compute_notifications,
|
use_local_compute_notifications: args.use_local_compute_notifications,
|
||||||
|
timeline_safekeeper_count: args.timeline_safekeeper_count,
|
||||||
|
#[cfg(feature = "testing")]
|
||||||
|
kick_secondary_downloads: args.kick_secondary_downloads,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Validate that we can connect to the database
|
// Validate that we can connect to the database
|
||||||
|
|||||||
@@ -856,6 +856,7 @@ impl Reconciler {
|
|||||||
&self.shard,
|
&self.shard,
|
||||||
&self.config,
|
&self.config,
|
||||||
&self.placement_policy,
|
&self.placement_policy,
|
||||||
|
self.intent.secondary.len(),
|
||||||
);
|
);
|
||||||
match self.observed.locations.get(&node.get_id()) {
|
match self.observed.locations.get(&node.get_id()) {
|
||||||
Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {
|
Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {
|
||||||
@@ -1235,11 +1236,11 @@ pub(crate) fn attached_location_conf(
|
|||||||
shard: &ShardIdentity,
|
shard: &ShardIdentity,
|
||||||
config: &TenantConfig,
|
config: &TenantConfig,
|
||||||
policy: &PlacementPolicy,
|
policy: &PlacementPolicy,
|
||||||
|
secondary_count: usize,
|
||||||
) -> LocationConfig {
|
) -> LocationConfig {
|
||||||
let has_secondaries = match policy {
|
let has_secondaries = match policy {
|
||||||
PlacementPolicy::Attached(0) | PlacementPolicy::Detached | PlacementPolicy::Secondary => {
|
PlacementPolicy::Detached | PlacementPolicy::Secondary => false,
|
||||||
false
|
PlacementPolicy::Attached(0) => secondary_count > 0,
|
||||||
}
|
|
||||||
PlacementPolicy::Attached(_) => true,
|
PlacementPolicy::Attached(_) => true,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -825,6 +825,7 @@ impl Scheduler {
|
|||||||
struct AzScore {
|
struct AzScore {
|
||||||
home_shard_count: usize,
|
home_shard_count: usize,
|
||||||
scheduleable: bool,
|
scheduleable: bool,
|
||||||
|
node_count: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut azs: HashMap<&AvailabilityZone, AzScore> = HashMap::new();
|
let mut azs: HashMap<&AvailabilityZone, AzScore> = HashMap::new();
|
||||||
@@ -832,6 +833,7 @@ impl Scheduler {
|
|||||||
let az = azs.entry(&node.az).or_default();
|
let az = azs.entry(&node.az).or_default();
|
||||||
az.home_shard_count += node.home_shard_count;
|
az.home_shard_count += node.home_shard_count;
|
||||||
az.scheduleable |= matches!(node.may_schedule, MaySchedule::Yes(_));
|
az.scheduleable |= matches!(node.may_schedule, MaySchedule::Yes(_));
|
||||||
|
az.node_count += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If any AZs are schedulable, then filter out the non-schedulable ones (i.e. AZs where
|
// If any AZs are schedulable, then filter out the non-schedulable ones (i.e. AZs where
|
||||||
@@ -840,10 +842,20 @@ impl Scheduler {
|
|||||||
azs.retain(|_, i| i.scheduleable);
|
azs.retain(|_, i| i.scheduleable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We will multiply up shard counts by the max node count for scoring, before dividing
|
||||||
|
// by per-node max node count, to get a normalized score that doesn't collapse to zero
|
||||||
|
// when the absolute shard count is less than the node count.
|
||||||
|
let max_node_count = azs.values().map(|i| i.node_count).max().unwrap_or(0);
|
||||||
|
|
||||||
// Find the AZ with the lowest number of shards currently allocated
|
// Find the AZ with the lowest number of shards currently allocated
|
||||||
Some(
|
Some(
|
||||||
azs.into_iter()
|
azs.into_iter()
|
||||||
.min_by_key(|i| (i.1.home_shard_count, i.0))
|
.min_by_key(|i| {
|
||||||
|
(
|
||||||
|
(i.1.home_shard_count * max_node_count) / i.1.node_count,
|
||||||
|
i.0,
|
||||||
|
)
|
||||||
|
})
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.0
|
.0
|
||||||
.clone(),
|
.clone(),
|
||||||
|
|||||||
@@ -466,6 +466,13 @@ pub struct Config {
|
|||||||
pub timelines_onto_safekeepers: bool,
|
pub timelines_onto_safekeepers: bool,
|
||||||
|
|
||||||
pub use_local_compute_notifications: bool,
|
pub use_local_compute_notifications: bool,
|
||||||
|
|
||||||
|
/// Number of safekeepers to choose for a timeline when creating it.
|
||||||
|
/// Safekeepers will be choosen from different availability zones.
|
||||||
|
pub timeline_safekeeper_count: i64,
|
||||||
|
|
||||||
|
#[cfg(feature = "testing")]
|
||||||
|
pub kick_secondary_downloads: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<DatabaseError> for ApiError {
|
impl From<DatabaseError> for ApiError {
|
||||||
@@ -2060,6 +2067,7 @@ impl Service {
|
|||||||
&tenant_shard.shard,
|
&tenant_shard.shard,
|
||||||
&tenant_shard.config,
|
&tenant_shard.config,
|
||||||
&PlacementPolicy::Attached(0),
|
&PlacementPolicy::Attached(0),
|
||||||
|
tenant_shard.intent.get_secondary().len(),
|
||||||
)),
|
)),
|
||||||
},
|
},
|
||||||
)]);
|
)]);
|
||||||
@@ -5601,7 +5609,15 @@ impl Service {
|
|||||||
for parent_id in parent_ids {
|
for parent_id in parent_ids {
|
||||||
let child_ids = parent_id.split(new_shard_count);
|
let child_ids = parent_id.split(new_shard_count);
|
||||||
|
|
||||||
let (pageserver, generation, policy, parent_ident, config, preferred_az) = {
|
let (
|
||||||
|
pageserver,
|
||||||
|
generation,
|
||||||
|
policy,
|
||||||
|
parent_ident,
|
||||||
|
config,
|
||||||
|
preferred_az,
|
||||||
|
secondary_count,
|
||||||
|
) = {
|
||||||
let mut old_state = tenants
|
let mut old_state = tenants
|
||||||
.remove(&parent_id)
|
.remove(&parent_id)
|
||||||
.expect("It was present, we just split it");
|
.expect("It was present, we just split it");
|
||||||
@@ -5621,6 +5637,7 @@ impl Service {
|
|||||||
old_state.shard,
|
old_state.shard,
|
||||||
old_state.config.clone(),
|
old_state.config.clone(),
|
||||||
old_state.preferred_az().cloned(),
|
old_state.preferred_az().cloned(),
|
||||||
|
old_state.intent.get_secondary().len(),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -5642,6 +5659,7 @@ impl Service {
|
|||||||
&child_shard,
|
&child_shard,
|
||||||
&config,
|
&config,
|
||||||
&policy,
|
&policy,
|
||||||
|
secondary_count,
|
||||||
)),
|
)),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
@@ -8369,6 +8387,11 @@ impl Service {
|
|||||||
/// we have this helper to move things along faster.
|
/// we have this helper to move things along faster.
|
||||||
#[cfg(feature = "testing")]
|
#[cfg(feature = "testing")]
|
||||||
async fn kick_secondary_download(&self, tenant_shard_id: TenantShardId) {
|
async fn kick_secondary_download(&self, tenant_shard_id: TenantShardId) {
|
||||||
|
if !self.config.kick_secondary_downloads {
|
||||||
|
// No-op if kick_secondary_downloads functionaliuty is not configured
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
let (attached_node, secondaries) = {
|
let (attached_node, secondaries) = {
|
||||||
let locked = self.inner.read().unwrap();
|
let locked = self.inner.read().unwrap();
|
||||||
let Some(shard) = locked.tenants.get(&tenant_shard_id) else {
|
let Some(shard) = locked.tenants.get(&tenant_shard_id) else {
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
use std::cmp::max;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -608,7 +609,8 @@ impl Service {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Choose safekeepers for the new timeline: 3 in different azs.
|
/// Choose safekeepers for the new timeline in different azs.
|
||||||
|
/// 3 are choosen by default, but may be configured via config (for testing).
|
||||||
pub(crate) async fn safekeepers_for_new_timeline(
|
pub(crate) async fn safekeepers_for_new_timeline(
|
||||||
&self,
|
&self,
|
||||||
) -> Result<Vec<SafekeeperInfo>, ApiError> {
|
) -> Result<Vec<SafekeeperInfo>, ApiError> {
|
||||||
@@ -651,18 +653,14 @@ impl Service {
|
|||||||
)
|
)
|
||||||
});
|
});
|
||||||
// Number of safekeepers in different AZs we are looking for
|
// Number of safekeepers in different AZs we are looking for
|
||||||
let wanted_count = match all_safekeepers.len() {
|
let mut wanted_count = self.config.timeline_safekeeper_count as usize;
|
||||||
0 => {
|
// TODO(diko): remove this when `timeline_safekeeper_count` option is in the release
|
||||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
// branch and is specified in tests/neon_local config.
|
||||||
"couldn't find any active safekeeper for new timeline",
|
if cfg!(feature = "testing") && all_safekeepers.len() < wanted_count {
|
||||||
)));
|
// In testing mode, we can have less safekeepers than the config says
|
||||||
}
|
wanted_count = max(all_safekeepers.len(), 1);
|
||||||
// Have laxer requirements on testig mode as we don't want to
|
}
|
||||||
// spin up three safekeepers for every single test
|
|
||||||
#[cfg(feature = "testing")]
|
|
||||||
1 | 2 => all_safekeepers.len(),
|
|
||||||
_ => 3,
|
|
||||||
};
|
|
||||||
let mut sks = Vec::new();
|
let mut sks = Vec::new();
|
||||||
let mut azs = HashSet::new();
|
let mut azs = HashSet::new();
|
||||||
for (_sk_util, sk_info, az_id) in all_safekeepers.iter() {
|
for (_sk_util, sk_info, az_id) in all_safekeepers.iter() {
|
||||||
|
|||||||
@@ -1381,8 +1381,13 @@ impl TenantShard {
|
|||||||
.generation
|
.generation
|
||||||
.expect("Attempted to enter attached state without a generation");
|
.expect("Attempted to enter attached state without a generation");
|
||||||
|
|
||||||
let wanted_conf =
|
let wanted_conf = attached_location_conf(
|
||||||
attached_location_conf(generation, &self.shard, &self.config, &self.policy);
|
generation,
|
||||||
|
&self.shard,
|
||||||
|
&self.config,
|
||||||
|
&self.policy,
|
||||||
|
self.intent.get_secondary().len(),
|
||||||
|
);
|
||||||
match self.observed.locations.get(&node_id) {
|
match self.observed.locations.get(&node_id) {
|
||||||
Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {}
|
Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {}
|
||||||
Some(_) | None => {
|
Some(_) | None => {
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ The value to place in the `aud` claim.
|
|||||||
|
|
||||||
@final
|
@final
|
||||||
class ComputeClaimsScope(StrEnum):
|
class ComputeClaimsScope(StrEnum):
|
||||||
ADMIN = "admin"
|
ADMIN = "compute_ctl:admin"
|
||||||
|
|
||||||
|
|
||||||
@final
|
@final
|
||||||
|
|||||||
@@ -453,6 +453,7 @@ class NeonEnvBuilder:
|
|||||||
pageserver_get_vectored_concurrent_io: str | None = None,
|
pageserver_get_vectored_concurrent_io: str | None = None,
|
||||||
pageserver_tracing_config: PageserverTracingConfig | None = None,
|
pageserver_tracing_config: PageserverTracingConfig | None = None,
|
||||||
pageserver_import_config: PageserverImportConfig | None = None,
|
pageserver_import_config: PageserverImportConfig | None = None,
|
||||||
|
storcon_kick_secondary_downloads: bool | None = None,
|
||||||
):
|
):
|
||||||
self.repo_dir = repo_dir
|
self.repo_dir = repo_dir
|
||||||
self.rust_log_override = rust_log_override
|
self.rust_log_override = rust_log_override
|
||||||
@@ -514,6 +515,8 @@ class NeonEnvBuilder:
|
|||||||
self.pageserver_tracing_config = pageserver_tracing_config
|
self.pageserver_tracing_config = pageserver_tracing_config
|
||||||
self.pageserver_import_config = pageserver_import_config
|
self.pageserver_import_config = pageserver_import_config
|
||||||
|
|
||||||
|
self.storcon_kick_secondary_downloads = storcon_kick_secondary_downloads
|
||||||
|
|
||||||
self.pageserver_default_tenant_config_compaction_algorithm: dict[str, Any] | None = (
|
self.pageserver_default_tenant_config_compaction_algorithm: dict[str, Any] | None = (
|
||||||
pageserver_default_tenant_config_compaction_algorithm
|
pageserver_default_tenant_config_compaction_algorithm
|
||||||
)
|
)
|
||||||
@@ -1221,6 +1224,14 @@ class NeonEnv:
|
|||||||
else:
|
else:
|
||||||
cfg["storage_controller"] = {"use_local_compute_notifications": False}
|
cfg["storage_controller"] = {"use_local_compute_notifications": False}
|
||||||
|
|
||||||
|
if config.storcon_kick_secondary_downloads is not None:
|
||||||
|
# Configure whether storage controller should actively kick off secondary downloads
|
||||||
|
if "storage_controller" not in cfg:
|
||||||
|
cfg["storage_controller"] = {}
|
||||||
|
cfg["storage_controller"]["kick_secondary_downloads"] = (
|
||||||
|
config.storcon_kick_secondary_downloads
|
||||||
|
)
|
||||||
|
|
||||||
# Create config for pageserver
|
# Create config for pageserver
|
||||||
http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
|
http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
|
||||||
pg_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
|
pg_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
|
||||||
|
|||||||
@@ -1219,3 +1219,31 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
|
|||||||
)
|
)
|
||||||
self.verbose_error(res)
|
self.verbose_error(res)
|
||||||
return res.json()
|
return res.json()
|
||||||
|
|
||||||
|
def force_override_feature_flag(self, flag: str, value: str | None = None):
|
||||||
|
if value is None:
|
||||||
|
res = self.delete(
|
||||||
|
f"http://localhost:{self.port}/v1/feature_flag/{flag}",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
res = self.put(
|
||||||
|
f"http://localhost:{self.port}/v1/feature_flag/{flag}",
|
||||||
|
params={"value": value},
|
||||||
|
)
|
||||||
|
self.verbose_error(res)
|
||||||
|
|
||||||
|
def evaluate_feature_flag_boolean(self, tenant_id: TenantId, flag: str) -> Any:
|
||||||
|
res = self.get(
|
||||||
|
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/feature_flag/{flag}",
|
||||||
|
params={"as": "boolean"},
|
||||||
|
)
|
||||||
|
self.verbose_error(res)
|
||||||
|
return res.json()
|
||||||
|
|
||||||
|
def evaluate_feature_flag_multivariate(self, tenant_id: TenantId, flag: str) -> Any:
|
||||||
|
res = self.get(
|
||||||
|
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/feature_flag/{flag}",
|
||||||
|
params={"as": "multivariate"},
|
||||||
|
)
|
||||||
|
self.verbose_error(res)
|
||||||
|
return res.json()
|
||||||
|
|||||||
@@ -146,8 +146,6 @@ def run_benchmark(env: NeonEnv, pg_bin: PgBin, record, duration_secs: int):
|
|||||||
ps_http.base_url,
|
ps_http.base_url,
|
||||||
"--page-service-connstring",
|
"--page-service-connstring",
|
||||||
env.pageserver.connstr(password=None),
|
env.pageserver.connstr(password=None),
|
||||||
"--gzip-probability",
|
|
||||||
"1",
|
|
||||||
"--runtime",
|
"--runtime",
|
||||||
f"{duration_secs}s",
|
f"{duration_secs}s",
|
||||||
# don't specify the targets explicitly, let pagebench auto-discover them
|
# don't specify the targets explicitly, let pagebench auto-discover them
|
||||||
|
|||||||
@@ -184,7 +184,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
|
|||||||
"timeline_offloading": False,
|
"timeline_offloading": False,
|
||||||
"rel_size_v2_enabled": True,
|
"rel_size_v2_enabled": True,
|
||||||
"relsize_snapshot_cache_capacity": 10000,
|
"relsize_snapshot_cache_capacity": 10000,
|
||||||
"gc_compaction_enabled": True,
|
"gc_compaction_enabled": False,
|
||||||
"gc_compaction_verification": False,
|
"gc_compaction_verification": False,
|
||||||
"gc_compaction_initial_threshold_kb": 1024000,
|
"gc_compaction_initial_threshold_kb": 1024000,
|
||||||
"gc_compaction_ratio_percent": 200,
|
"gc_compaction_ratio_percent": 200,
|
||||||
|
|||||||
51
test_runner/regress/test_feature_flag.py
Normal file
51
test_runner/regress/test_feature_flag.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from fixtures.utils import run_only_on_default_postgres
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||||
|
|
||||||
|
|
||||||
|
@run_only_on_default_postgres("Pageserver-only test only needs to run on one version")
|
||||||
|
def test_feature_flag(neon_env_builder: NeonEnvBuilder):
|
||||||
|
env = neon_env_builder.init_start()
|
||||||
|
env.pageserver.http_client().force_override_feature_flag("test-feature-flag", "true")
|
||||||
|
assert env.pageserver.http_client().evaluate_feature_flag_boolean(
|
||||||
|
env.initial_tenant, "test-feature-flag"
|
||||||
|
)["result"]["Ok"]
|
||||||
|
assert (
|
||||||
|
env.pageserver.http_client().evaluate_feature_flag_multivariate(
|
||||||
|
env.initial_tenant, "test-feature-flag"
|
||||||
|
)["result"]["Ok"]
|
||||||
|
== "true"
|
||||||
|
)
|
||||||
|
|
||||||
|
env.pageserver.http_client().force_override_feature_flag("test-feature-flag", "false")
|
||||||
|
assert (
|
||||||
|
env.pageserver.http_client().evaluate_feature_flag_boolean(
|
||||||
|
env.initial_tenant, "test-feature-flag"
|
||||||
|
)["result"]["Err"]
|
||||||
|
== "No condition group is matched"
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
env.pageserver.http_client().evaluate_feature_flag_multivariate(
|
||||||
|
env.initial_tenant, "test-feature-flag"
|
||||||
|
)["result"]["Ok"]
|
||||||
|
== "false"
|
||||||
|
)
|
||||||
|
|
||||||
|
env.pageserver.http_client().force_override_feature_flag("test-feature-flag", None)
|
||||||
|
assert (
|
||||||
|
"Err"
|
||||||
|
in env.pageserver.http_client().evaluate_feature_flag_boolean(
|
||||||
|
env.initial_tenant, "test-feature-flag"
|
||||||
|
)["result"]
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
"Err"
|
||||||
|
in env.pageserver.http_client().evaluate_feature_flag_multivariate(
|
||||||
|
env.initial_tenant, "test-feature-flag"
|
||||||
|
)["result"]
|
||||||
|
)
|
||||||
@@ -173,7 +173,11 @@ def test_pg_regress(
|
|||||||
(runpath / "testtablespace").mkdir(parents=True)
|
(runpath / "testtablespace").mkdir(parents=True)
|
||||||
|
|
||||||
# Compute all the file locations that pg_regress will need.
|
# Compute all the file locations that pg_regress will need.
|
||||||
build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/regress"
|
#
|
||||||
|
# XXX: We assume that the `build` directory is a sibling of the
|
||||||
|
# pg_distrib_dir. That is the default when you check out the
|
||||||
|
# repository; `build` and `pg_install` are created side by side.
|
||||||
|
build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/regress"
|
||||||
src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/regress"
|
src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/regress"
|
||||||
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
||||||
schedule = src_path / "parallel_schedule"
|
schedule = src_path / "parallel_schedule"
|
||||||
@@ -250,7 +254,11 @@ def test_isolation(
|
|||||||
(runpath / "testtablespace").mkdir(parents=True)
|
(runpath / "testtablespace").mkdir(parents=True)
|
||||||
|
|
||||||
# Compute all the file locations that pg_isolation_regress will need.
|
# Compute all the file locations that pg_isolation_regress will need.
|
||||||
build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/isolation"
|
#
|
||||||
|
# XXX: We assume that the `build` directory is a sibling of the
|
||||||
|
# pg_distrib_dir. That is the default when you check out the
|
||||||
|
# repository; `build` and `pg_install` are created side by side.
|
||||||
|
build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/isolation"
|
||||||
src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/isolation"
|
src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/isolation"
|
||||||
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
||||||
schedule = src_path / "isolation_schedule"
|
schedule = src_path / "isolation_schedule"
|
||||||
@@ -314,8 +322,11 @@ def test_sql_regress(
|
|||||||
(runpath / "testtablespace").mkdir(parents=True)
|
(runpath / "testtablespace").mkdir(parents=True)
|
||||||
|
|
||||||
# Compute all the file locations that pg_regress will need.
|
# Compute all the file locations that pg_regress will need.
|
||||||
# This test runs neon specific tests
|
#
|
||||||
build_path = pg_distrib_dir / f"build/v{env.pg_version}/src/test/regress"
|
# XXX: We assume that the `build` directory is a sibling of the
|
||||||
|
# pg_distrib_dir. That is the default when you check out the
|
||||||
|
# repository; `build` and `pg_install` are created side by side.
|
||||||
|
build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/regress"
|
||||||
src_path = base_dir / "test_runner/sql_regress"
|
src_path = base_dir / "test_runner/sql_regress"
|
||||||
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
|
||||||
schedule = src_path / "parallel_schedule"
|
schedule = src_path / "parallel_schedule"
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ def test_tenant_s3_restore(
|
|||||||
last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||||
last_flush_lsns.append(last_flush_lsn)
|
last_flush_lsns.append(last_flush_lsn)
|
||||||
ps_http.timeline_checkpoint(tenant_id, timeline_id)
|
ps_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||||
wait_for_upload(ps_http, tenant_id, timeline_id, last_flush_lsn)
|
wait_for_upload(ps_http, tenant_id, timeline_id, last_flush_lsn, timeout=60)
|
||||||
log.info(f"{timeline} timeline {timeline_id} {last_flush_lsn=}")
|
log.info(f"{timeline} timeline {timeline_id} {last_flush_lsn=}")
|
||||||
parent = timeline
|
parent = timeline
|
||||||
|
|
||||||
|
|||||||
@@ -3642,7 +3642,9 @@ def test_timeline_delete_mid_live_migration(neon_env_builder: NeonEnvBuilder, mi
|
|||||||
env.start()
|
env.start()
|
||||||
|
|
||||||
for ps in env.pageservers:
|
for ps in env.pageservers:
|
||||||
ps.allowed_errors.append(".*Timeline.* has been deleted.*")
|
ps.allowed_errors.extend(
|
||||||
|
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||||
|
)
|
||||||
|
|
||||||
tenant_id = TenantId.generate()
|
tenant_id = TenantId.generate()
|
||||||
timeline_id = TimelineId.generate()
|
timeline_id = TimelineId.generate()
|
||||||
@@ -4434,6 +4436,53 @@ def test_storage_controller_graceful_migration(neon_env_builder: NeonEnvBuilder,
|
|||||||
assert initial_ps.http_client().tenant_list_locations()["tenant_shards"] == []
|
assert initial_ps.http_client().tenant_list_locations()["tenant_shards"] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_attached_0_graceful_migration(neon_env_builder: NeonEnvBuilder):
|
||||||
|
neon_env_builder.num_pageservers = 4
|
||||||
|
neon_env_builder.num_azs = 2
|
||||||
|
|
||||||
|
neon_env_builder.storcon_kick_secondary_downloads = False
|
||||||
|
|
||||||
|
env = neon_env_builder.init_start()
|
||||||
|
|
||||||
|
# It is default, but we want to ensure that there are no secondary locations requested
|
||||||
|
env.storage_controller.tenant_policy_update(env.initial_tenant, {"placement": {"Attached": 0}})
|
||||||
|
env.storage_controller.reconcile_until_idle()
|
||||||
|
|
||||||
|
desc = env.storage_controller.tenant_describe(env.initial_tenant)["shards"][0]
|
||||||
|
src_ps_id = desc["node_attached"]
|
||||||
|
src_ps = env.get_pageserver(src_ps_id)
|
||||||
|
src_az = desc["preferred_az_id"]
|
||||||
|
|
||||||
|
# There must be no secondary locations with Attached(0) placement policy
|
||||||
|
assert len(desc["node_secondary"]) == 0
|
||||||
|
|
||||||
|
# Migrate tenant shard to the same AZ node
|
||||||
|
dst_ps = [ps for ps in env.pageservers if ps.id != src_ps_id and ps.az_id == src_az][0]
|
||||||
|
|
||||||
|
env.storage_controller.tenant_shard_migrate(
|
||||||
|
TenantShardId(env.initial_tenant, 0, 0),
|
||||||
|
dst_ps.id,
|
||||||
|
config=StorageControllerMigrationConfig(prewarm=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
def tenant_shard_migrated():
|
||||||
|
src_locations = src_ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||||
|
assert len(src_locations) == 0
|
||||||
|
log.info(f"Tenant shard migrated from {src_ps.id}")
|
||||||
|
dst_locations = dst_ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||||
|
assert len(dst_locations) == 1
|
||||||
|
assert dst_locations[0][1]["mode"] == "AttachedSingle"
|
||||||
|
log.info(f"Tenant shard migrated to {dst_ps.id}")
|
||||||
|
|
||||||
|
# After all we expect that tenant shard exists only on dst node.
|
||||||
|
# We wait so long because [`DEFAULT_HEATMAP_PERIOD`] and [`DEFAULT_DOWNLOAD_INTERVAL`]
|
||||||
|
# are set to 60 seconds by default.
|
||||||
|
#
|
||||||
|
# TODO: we should consider making these configurable, so the test can run faster.
|
||||||
|
wait_until(tenant_shard_migrated, timeout=180, interval=5, status_interval=10)
|
||||||
|
log.info("Tenant shard migrated successfully")
|
||||||
|
|
||||||
|
|
||||||
@run_only_on_default_postgres("this is like a 'unit test' against storcon db")
|
@run_only_on_default_postgres("this is like a 'unit test' against storcon db")
|
||||||
def test_storage_controller_migrate_with_pageserver_restart(
|
def test_storage_controller_migrate_with_pageserver_restart(
|
||||||
neon_env_builder: NeonEnvBuilder, make_httpserver
|
neon_env_builder: NeonEnvBuilder, make_httpserver
|
||||||
|
|||||||
@@ -1099,7 +1099,9 @@ def test_timeline_detach_ancestor_interrupted_by_deletion(
|
|||||||
|
|
||||||
for ps in env.pageservers:
|
for ps in env.pageservers:
|
||||||
ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
|
ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
|
||||||
ps.allowed_errors.append(".*Timeline.* has been deleted.*")
|
ps.allowed_errors.extend(
|
||||||
|
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||||
|
)
|
||||||
|
|
||||||
pageservers = dict((int(p.id), p) for p in env.pageservers)
|
pageservers = dict((int(p.id), p) for p in env.pageservers)
|
||||||
|
|
||||||
@@ -1221,7 +1223,9 @@ def test_sharded_tad_interleaved_after_partial_success(neon_env_builder: NeonEnv
|
|||||||
|
|
||||||
for ps in env.pageservers:
|
for ps in env.pageservers:
|
||||||
ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
|
ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
|
||||||
ps.allowed_errors.append(".*Timeline.* has been deleted.*")
|
ps.allowed_errors.extend(
|
||||||
|
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||||
|
)
|
||||||
|
|
||||||
pageservers = dict((int(p.id), p) for p in env.pageservers)
|
pageservers = dict((int(p.id), p) for p in env.pageservers)
|
||||||
|
|
||||||
|
|||||||
@@ -25,7 +25,9 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool
|
|||||||
initial_tenant_shard_count=2 if sharded else None,
|
initial_tenant_shard_count=2 if sharded else None,
|
||||||
)
|
)
|
||||||
for ps in env.pageservers:
|
for ps in env.pageservers:
|
||||||
ps.allowed_errors.append(".*Timeline.* has been deleted.*")
|
ps.allowed_errors.extend(
|
||||||
|
[".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
|
||||||
|
)
|
||||||
|
|
||||||
if sharded:
|
if sharded:
|
||||||
http = env.storage_controller.pageserver_api()
|
http = env.storage_controller.pageserver_api()
|
||||||
|
|||||||
2
vendor/postgres-v14
vendored
2
vendor/postgres-v14
vendored
Submodule vendor/postgres-v14 updated: 6770bc2513...9085654ee8
2
vendor/revisions.json
vendored
2
vendor/revisions.json
vendored
@@ -13,6 +13,6 @@
|
|||||||
],
|
],
|
||||||
"v14": [
|
"v14": [
|
||||||
"14.18",
|
"14.18",
|
||||||
"6770bc251301ef40c66f7ecb731741dc435b5051"
|
"9085654ee8022d5cc4ca719380a1dc53e5e3246f"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user