mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-05 11:40:37 +00:00
Compare commits
233 Commits
release-pr
...
split-prox
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
18303e4d68 | ||
|
|
3df6d368e3 | ||
|
|
b62e7c0138 | ||
|
|
a2968c6cf8 | ||
|
|
bae1288671 | ||
|
|
1254d8f56e | ||
|
|
073508493c | ||
|
|
7cb2349296 | ||
|
|
87151f9efd | ||
|
|
96fe084c57 | ||
|
|
20fdf3e19f | ||
|
|
c6b36d8171 | ||
|
|
0e8a848937 | ||
|
|
db4085fe22 | ||
|
|
0d895ba002 | ||
|
|
103f34e954 | ||
|
|
262378e561 | ||
|
|
9f38ab39c6 | ||
|
|
fa92328423 | ||
|
|
f7a3380aec | ||
|
|
507f1a5bdd | ||
|
|
401dcd3551 | ||
|
|
4a53cd0fc3 | ||
|
|
f5cef7bf7f | ||
|
|
e6770d79fd | ||
|
|
201f56baf7 | ||
|
|
a155914c1c | ||
|
|
7e08fbd1b9 | ||
|
|
2ca5ff26d7 | ||
|
|
8acce00953 | ||
|
|
d28a6f2576 | ||
|
|
4431688dc6 | ||
|
|
953b7d4f7e | ||
|
|
8561b2c628 | ||
|
|
21638ee96c | ||
|
|
cbe8c77997 | ||
|
|
cf3eac785b | ||
|
|
542385e364 | ||
|
|
05dd1ae9e0 | ||
|
|
8468d51a14 | ||
|
|
a81fab4826 | ||
|
|
b3eea45277 | ||
|
|
fc78774f39 | ||
|
|
ad0988f278 | ||
|
|
4d7c0dac93 | ||
|
|
00c981576a | ||
|
|
c3f2240fbd | ||
|
|
ed5724d79d | ||
|
|
ca5390a89d | ||
|
|
3727c6fbbe | ||
|
|
42229aacf6 | ||
|
|
b7beaa0fd7 | ||
|
|
16c91ff5d3 | ||
|
|
078f941dc8 | ||
|
|
68bcbf8227 | ||
|
|
a31c95cb40 | ||
|
|
dc7eb5ae5a | ||
|
|
44fedfd6c3 | ||
|
|
138f008bab | ||
|
|
6a6f30e378 | ||
|
|
8f3bc5ae35 | ||
|
|
e6e578821b | ||
|
|
c32807ac19 | ||
|
|
50daff9655 | ||
|
|
bd845c7587 | ||
|
|
f63c8e5a8c | ||
|
|
200fa56b04 | ||
|
|
0f3dac265b | ||
|
|
1dc496a2c9 | ||
|
|
6814bdd30b | ||
|
|
0a667bc8ef | ||
|
|
f3acfb2d80 | ||
|
|
8c828c586e | ||
|
|
2334fed762 | ||
|
|
c53799044d | ||
|
|
e7477855b7 | ||
|
|
f4a668a27d | ||
|
|
970f2923b2 | ||
|
|
1678dea20f | ||
|
|
163f2eaf79 | ||
|
|
980d506bda | ||
|
|
d6c79b77df | ||
|
|
3350daeb9a | ||
|
|
939d50a41c | ||
|
|
2f9ada13c4 | ||
|
|
ff51b565d3 | ||
|
|
5e0409de95 | ||
|
|
4e3b70e308 | ||
|
|
61a65f61f3 | ||
|
|
d21246c8bd | ||
|
|
4825b0fec3 | ||
|
|
a4df3c8488 | ||
|
|
d95b46f3f3 | ||
|
|
85bef9f05d | ||
|
|
e374d6778e | ||
|
|
9ceaf9a986 | ||
|
|
f72fe68626 | ||
|
|
9fabdda2dc | ||
|
|
1c7b06c988 | ||
|
|
52b02d95c8 | ||
|
|
4be58522fb | ||
|
|
d09dad0ea2 | ||
|
|
5775662276 | ||
|
|
bdfc9ca7e9 | ||
|
|
1d8cf5b3a9 | ||
|
|
859f019185 | ||
|
|
da6bdff893 | ||
|
|
2416da337e | ||
|
|
6cad0455b0 | ||
|
|
b5e95f68b5 | ||
|
|
dd40b19db4 | ||
|
|
68241f5a3e | ||
|
|
8154e88732 | ||
|
|
240ba7e10c | ||
|
|
7a796a9963 | ||
|
|
eddfd62333 | ||
|
|
cdaa2816e7 | ||
|
|
3cecbfc04d | ||
|
|
65868258d2 | ||
|
|
bb2a3f9b02 | ||
|
|
6711087ddf | ||
|
|
8182bfdf01 | ||
|
|
8e02db1ab9 | ||
|
|
857a1823fe | ||
|
|
9bfa180f2e | ||
|
|
bea0468f1f | ||
|
|
3977e0a7a3 | ||
|
|
775c0c8892 | ||
|
|
24ea9f9f60 | ||
|
|
9c5ad21341 | ||
|
|
f76a4e0ad2 | ||
|
|
a1256b2a67 | ||
|
|
d57412aaab | ||
|
|
6fc2726568 | ||
|
|
99b1a1dfb6 | ||
|
|
5f4e14d27d | ||
|
|
2723a8156a | ||
|
|
6f22de5fc9 | ||
|
|
6ca41d3438 | ||
|
|
2ef8e57f86 | ||
|
|
842c3d8c10 | ||
|
|
c698b7b010 | ||
|
|
f5db655447 | ||
|
|
925c5ad1e8 | ||
|
|
b037ce07ec | ||
|
|
2c0d311a54 | ||
|
|
18cf5cfefd | ||
|
|
39a35671df | ||
|
|
9e23410074 | ||
|
|
d47c94b336 | ||
|
|
563d73d923 | ||
|
|
1a4c1eba92 | ||
|
|
129f348aae | ||
|
|
80c8ceacbc | ||
|
|
35854928d9 | ||
|
|
3cd888f173 | ||
|
|
d6753e9ee4 | ||
|
|
a868e342d4 | ||
|
|
f17fe75169 | ||
|
|
6237322a2e | ||
|
|
e8523014d4 | ||
|
|
631a9c372f | ||
|
|
595c450036 | ||
|
|
204bb8faa3 | ||
|
|
8d948f2e07 | ||
|
|
98af1e365b | ||
|
|
ebda667ef8 | ||
|
|
fd8a7a7223 | ||
|
|
7996bce6d6 | ||
|
|
4e547e6274 | ||
|
|
3d582b212a | ||
|
|
3fbb84d741 | ||
|
|
a4fa250c92 | ||
|
|
39aeb10cfc | ||
|
|
44781518d0 | ||
|
|
16071e57c6 | ||
|
|
392d3524f9 | ||
|
|
c96e8012ce | ||
|
|
5a772761ee | ||
|
|
841b76ea7c | ||
|
|
a4434cf1c0 | ||
|
|
d263b1804e | ||
|
|
b461755326 | ||
|
|
9ded2556df | ||
|
|
7672e49ab5 | ||
|
|
a2d170b6d0 | ||
|
|
1303d47778 | ||
|
|
e250b9e063 | ||
|
|
0c236fa465 | ||
|
|
da84a250c6 | ||
|
|
975f8ac658 | ||
|
|
839a5724a4 | ||
|
|
f2b8e390e7 | ||
|
|
f7131834eb | ||
|
|
4a90423292 | ||
|
|
f4f0869dc8 | ||
|
|
0950866fa8 | ||
|
|
7cf59ae5b4 | ||
|
|
b197cc20fc | ||
|
|
ba17025a57 | ||
|
|
b5ab055526 | ||
|
|
a40b402957 | ||
|
|
d2ee760eb2 | ||
|
|
66337097de | ||
|
|
e6dadcd2f3 | ||
|
|
83e07c1a5b | ||
|
|
ee263e6a62 | ||
|
|
7eb37fea26 | ||
|
|
730db859c7 | ||
|
|
04448ac323 | ||
|
|
324e4e008f | ||
|
|
b49b450dc4 | ||
|
|
8a8b83df27 | ||
|
|
4bdfb96078 | ||
|
|
8da3b547f8 | ||
|
|
b329b1c610 | ||
|
|
4184685721 | ||
|
|
411a130675 | ||
|
|
0645ae318e | ||
|
|
86d6ef305a | ||
|
|
2e37aa3fe8 | ||
|
|
30bbfde50d | ||
|
|
82b9a44ab4 | ||
|
|
4a87bac036 | ||
|
|
38b4ed297e | ||
|
|
cd29156927 | ||
|
|
814c8e8f68 | ||
|
|
0159ae9536 | ||
|
|
d9a82468e2 | ||
|
|
e26ef640c1 | ||
|
|
c11b9cb43d | ||
|
|
69b6675da0 | ||
|
|
6bbd34a216 |
2
.gitattributes
vendored
Normal file
2
.gitattributes
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# allows for nicer hunk headers with git show
|
||||||
|
*.rs diff=rust
|
||||||
3
.github/actionlint.yml
vendored
3
.github/actionlint.yml
vendored
@@ -8,6 +8,9 @@ self-hosted-runner:
|
|||||||
- small-arm64
|
- small-arm64
|
||||||
- us-east-2
|
- us-east-2
|
||||||
config-variables:
|
config-variables:
|
||||||
|
- BENCHMARK_PROJECT_ID_PUB
|
||||||
|
- BENCHMARK_PROJECT_ID_SUB
|
||||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||||
- REMOTE_STORAGE_AZURE_REGION
|
- REMOTE_STORAGE_AZURE_REGION
|
||||||
- SLACK_UPCOMING_RELEASE_CHANNEL_ID
|
- SLACK_UPCOMING_RELEASE_CHANNEL_ID
|
||||||
|
- DEV_AWS_OIDC_ROLE_ARN
|
||||||
|
|||||||
16
.github/actions/neon-project-create/action.yml
vendored
16
.github/actions/neon-project-create/action.yml
vendored
@@ -9,16 +9,13 @@ inputs:
|
|||||||
description: 'Region ID, if not set the project will be created in the default region'
|
description: 'Region ID, if not set the project will be created in the default region'
|
||||||
default: aws-us-east-2
|
default: aws-us-east-2
|
||||||
postgres_version:
|
postgres_version:
|
||||||
description: 'Postgres version; default is 15'
|
description: 'Postgres version; default is 16'
|
||||||
default: '15'
|
default: '16'
|
||||||
api_host:
|
api_host:
|
||||||
description: 'Neon API host'
|
description: 'Neon API host'
|
||||||
default: console-stage.neon.build
|
default: console-stage.neon.build
|
||||||
provisioner:
|
|
||||||
description: 'k8s-pod or k8s-neonvm'
|
|
||||||
default: 'k8s-pod'
|
|
||||||
compute_units:
|
compute_units:
|
||||||
description: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
|
description: '[Min, Max] compute units'
|
||||||
default: '[1, 1]'
|
default: '[1, 1]'
|
||||||
|
|
||||||
outputs:
|
outputs:
|
||||||
@@ -37,10 +34,6 @@ runs:
|
|||||||
# A shell without `set -x` to not to expose password/dsn in logs
|
# A shell without `set -x` to not to expose password/dsn in logs
|
||||||
shell: bash -euo pipefail {0}
|
shell: bash -euo pipefail {0}
|
||||||
run: |
|
run: |
|
||||||
if [ "${PROVISIONER}" == "k8s-pod" ] && [ "${MIN_CU}" != "${MAX_CU}" ]; then
|
|
||||||
echo >&2 "For k8s-pod provisioner MIN_CU should be equal to MAX_CU"
|
|
||||||
fi
|
|
||||||
|
|
||||||
project=$(curl \
|
project=$(curl \
|
||||||
"https://${API_HOST}/api/v2/projects" \
|
"https://${API_HOST}/api/v2/projects" \
|
||||||
--fail \
|
--fail \
|
||||||
@@ -52,7 +45,7 @@ runs:
|
|||||||
\"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
|
\"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
|
||||||
\"pg_version\": ${POSTGRES_VERSION},
|
\"pg_version\": ${POSTGRES_VERSION},
|
||||||
\"region_id\": \"${REGION_ID}\",
|
\"region_id\": \"${REGION_ID}\",
|
||||||
\"provisioner\": \"${PROVISIONER}\",
|
\"provisioner\": \"k8s-neonvm\",
|
||||||
\"autoscaling_limit_min_cu\": ${MIN_CU},
|
\"autoscaling_limit_min_cu\": ${MIN_CU},
|
||||||
\"autoscaling_limit_max_cu\": ${MAX_CU},
|
\"autoscaling_limit_max_cu\": ${MAX_CU},
|
||||||
\"settings\": { }
|
\"settings\": { }
|
||||||
@@ -75,6 +68,5 @@ runs:
|
|||||||
API_KEY: ${{ inputs.api_key }}
|
API_KEY: ${{ inputs.api_key }}
|
||||||
REGION_ID: ${{ inputs.region_id }}
|
REGION_ID: ${{ inputs.region_id }}
|
||||||
POSTGRES_VERSION: ${{ inputs.postgres_version }}
|
POSTGRES_VERSION: ${{ inputs.postgres_version }}
|
||||||
PROVISIONER: ${{ inputs.provisioner }}
|
|
||||||
MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
|
MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
|
||||||
MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}
|
MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}
|
||||||
|
|||||||
@@ -131,8 +131,8 @@ runs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
|
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
|
||||||
# -n16 uses sixteen processes to run tests via pytest-xdist
|
# -n sets the number of parallel processes that pytest-xdist will run
|
||||||
EXTRA_PARAMS="-n16 $EXTRA_PARAMS"
|
EXTRA_PARAMS="-n12 $EXTRA_PARAMS"
|
||||||
|
|
||||||
# --dist=loadgroup points tests marked with @pytest.mark.xdist_group
|
# --dist=loadgroup points tests marked with @pytest.mark.xdist_group
|
||||||
# to the same worker to make @pytest.mark.order work with xdist
|
# to the same worker to make @pytest.mark.order work with xdist
|
||||||
|
|||||||
292
.github/workflows/_build-and-test-locally.yml
vendored
Normal file
292
.github/workflows/_build-and-test-locally.yml
vendored
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
name: Build and Test Locally
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
arch:
|
||||||
|
description: 'x64 or arm64'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
build-tag:
|
||||||
|
description: 'build tag'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
build-tools-image:
|
||||||
|
description: 'build-tools image'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
build-type:
|
||||||
|
description: 'debug or release'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
pg-versions:
|
||||||
|
description: 'a json array of postgres versions to run regression tests on'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash -euxo pipefail {0}
|
||||||
|
|
||||||
|
env:
|
||||||
|
RUST_BACKTRACE: 1
|
||||||
|
COPT: '-Werror'
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-neon:
|
||||||
|
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||||
|
container:
|
||||||
|
image: ${{ inputs.build-tools-image }}
|
||||||
|
credentials:
|
||||||
|
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||||
|
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||||
|
# Raise locked memory limit for tokio-epoll-uring.
|
||||||
|
# On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),
|
||||||
|
# io_uring will account the memory of the CQ and SQ as locked.
|
||||||
|
# More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391
|
||||||
|
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
||||||
|
env:
|
||||||
|
BUILD_TYPE: ${{ inputs.build-type }}
|
||||||
|
GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
|
BUILD_TAG: ${{ inputs.build-tag }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Fix git ownership
|
||||||
|
run: |
|
||||||
|
# Workaround for `fatal: detected dubious ownership in repository at ...`
|
||||||
|
#
|
||||||
|
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
|
||||||
|
# Ref https://github.com/actions/checkout/issues/785
|
||||||
|
#
|
||||||
|
git config --global --add safe.directory ${{ github.workspace }}
|
||||||
|
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||||
|
for r in 14 15 16; do
|
||||||
|
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
|
||||||
|
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
|
||||||
|
done
|
||||||
|
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
fetch-depth: 1
|
||||||
|
|
||||||
|
- name: Set pg 14 revision for caching
|
||||||
|
id: pg_v14_rev
|
||||||
|
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Set pg 15 revision for caching
|
||||||
|
id: pg_v15_rev
|
||||||
|
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Set pg 16 revision for caching
|
||||||
|
id: pg_v16_rev
|
||||||
|
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
# Set some environment variables used by all the steps.
|
||||||
|
#
|
||||||
|
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
|
||||||
|
# It also includes --features, if any
|
||||||
|
#
|
||||||
|
# CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
|
||||||
|
# because "cargo metadata" doesn't accept --release or --debug options
|
||||||
|
#
|
||||||
|
# We run tests with addtional features, that are turned off by default (e.g. in release builds), see
|
||||||
|
# corresponding Cargo.toml files for their descriptions.
|
||||||
|
- name: Set env variables
|
||||||
|
run: |
|
||||||
|
CARGO_FEATURES="--features testing"
|
||||||
|
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||||
|
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
|
||||||
|
CARGO_FLAGS="--locked"
|
||||||
|
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||||
|
cov_prefix=""
|
||||||
|
CARGO_FLAGS="--locked --release"
|
||||||
|
fi
|
||||||
|
{
|
||||||
|
echo "cov_prefix=${cov_prefix}"
|
||||||
|
echo "CARGO_FEATURES=${CARGO_FEATURES}"
|
||||||
|
echo "CARGO_FLAGS=${CARGO_FLAGS}"
|
||||||
|
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
|
||||||
|
} >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Cache postgres v14 build
|
||||||
|
id: cache_pg_14
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: pg_install/v14
|
||||||
|
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||||
|
|
||||||
|
- name: Cache postgres v15 build
|
||||||
|
id: cache_pg_15
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: pg_install/v15
|
||||||
|
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||||
|
|
||||||
|
- name: Cache postgres v16 build
|
||||||
|
id: cache_pg_16
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: pg_install/v16
|
||||||
|
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||||
|
|
||||||
|
- name: Build postgres v14
|
||||||
|
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||||
|
run: mold -run make postgres-v14 -j$(nproc)
|
||||||
|
|
||||||
|
- name: Build postgres v15
|
||||||
|
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
||||||
|
run: mold -run make postgres-v15 -j$(nproc)
|
||||||
|
|
||||||
|
- name: Build postgres v16
|
||||||
|
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||||
|
run: mold -run make postgres-v16 -j$(nproc)
|
||||||
|
|
||||||
|
- name: Build neon extensions
|
||||||
|
run: mold -run make neon-pg-ext -j$(nproc)
|
||||||
|
|
||||||
|
- name: Build walproposer-lib
|
||||||
|
run: mold -run make walproposer-lib -j$(nproc)
|
||||||
|
|
||||||
|
- name: Run cargo build
|
||||||
|
run: |
|
||||||
|
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
||||||
|
export PQ_LIB_DIR
|
||||||
|
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
|
||||||
|
|
||||||
|
# Do install *before* running rust tests because they might recompile the
|
||||||
|
# binaries with different features/flags.
|
||||||
|
- name: Install rust binaries
|
||||||
|
run: |
|
||||||
|
# Install target binaries
|
||||||
|
mkdir -p /tmp/neon/bin/
|
||||||
|
binaries=$(
|
||||||
|
${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
|
||||||
|
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
||||||
|
)
|
||||||
|
for bin in $binaries; do
|
||||||
|
SRC=target/$BUILD_TYPE/$bin
|
||||||
|
DST=/tmp/neon/bin/$bin
|
||||||
|
cp "$SRC" "$DST"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Install test executables and write list of all binaries (for code coverage)
|
||||||
|
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||||
|
# Keep bloated coverage data files away from the rest of the artifact
|
||||||
|
mkdir -p /tmp/coverage/
|
||||||
|
|
||||||
|
mkdir -p /tmp/neon/test_bin/
|
||||||
|
|
||||||
|
test_exe_paths=$(
|
||||||
|
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
|
||||||
|
jq -r '.executable | select(. != null)'
|
||||||
|
)
|
||||||
|
for bin in $test_exe_paths; do
|
||||||
|
SRC=$bin
|
||||||
|
DST=/tmp/neon/test_bin/$(basename $bin)
|
||||||
|
|
||||||
|
# We don't need debug symbols for code coverage, so strip them out to make
|
||||||
|
# the artifact smaller.
|
||||||
|
strip "$SRC" -o "$DST"
|
||||||
|
echo "$DST" >> /tmp/coverage/binaries.list
|
||||||
|
done
|
||||||
|
|
||||||
|
for bin in $binaries; do
|
||||||
|
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Run rust tests
|
||||||
|
env:
|
||||||
|
NEXTEST_RETRIES: 3
|
||||||
|
run: |
|
||||||
|
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
||||||
|
export PQ_LIB_DIR
|
||||||
|
LD_LIBRARY_PATH=$(pwd)/pg_install/v16/lib
|
||||||
|
export LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
#nextest does not yet support running doctests
|
||||||
|
cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
||||||
|
|
||||||
|
for io_engine in std-fs tokio-epoll-uring ; do
|
||||||
|
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
|
||||||
|
done
|
||||||
|
|
||||||
|
# Run separate tests for real S3
|
||||||
|
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||||
|
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||||
|
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||||
|
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
||||||
|
|
||||||
|
# Run separate tests for real Azure Blob Storage
|
||||||
|
# XXX: replace region with `eu-central-1`-like region
|
||||||
|
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
|
||||||
|
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
|
||||||
|
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
||||||
|
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||||
|
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||||
|
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||||
|
|
||||||
|
- name: Install postgres binaries
|
||||||
|
run: cp -a pg_install /tmp/neon/pg_install
|
||||||
|
|
||||||
|
- name: Upload Neon artifact
|
||||||
|
uses: ./.github/actions/upload
|
||||||
|
with:
|
||||||
|
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact
|
||||||
|
path: /tmp/neon
|
||||||
|
|
||||||
|
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
|
||||||
|
- name: Merge and upload coverage data
|
||||||
|
if: inputs.build-type == 'debug'
|
||||||
|
uses: ./.github/actions/save-coverage-data
|
||||||
|
|
||||||
|
regress-tests:
|
||||||
|
# Run test on x64 only
|
||||||
|
if: inputs.arch == 'x64'
|
||||||
|
needs: [ build-neon ]
|
||||||
|
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||||
|
container:
|
||||||
|
image: ${{ inputs.build-tools-image }}
|
||||||
|
credentials:
|
||||||
|
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||||
|
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||||
|
# for changed limits, see comments on `options:` earlier in this file
|
||||||
|
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
pg_version: ${{ fromJson(inputs.pg-versions) }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
fetch-depth: 1
|
||||||
|
|
||||||
|
- name: Pytest regression tests
|
||||||
|
uses: ./.github/actions/run-python-test-set
|
||||||
|
timeout-minutes: 60
|
||||||
|
with:
|
||||||
|
build_type: ${{ inputs.build-type }}
|
||||||
|
test_selection: regress
|
||||||
|
needs_postgres_source: true
|
||||||
|
run_with_real_s3: true
|
||||||
|
real_s3_bucket: neon-github-ci-tests
|
||||||
|
real_s3_region: eu-central-1
|
||||||
|
rerun_flaky: true
|
||||||
|
pg_version: ${{ matrix.pg_version }}
|
||||||
|
env:
|
||||||
|
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||||
|
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
|
||||||
|
BUILD_TAG: ${{ inputs.build-tag }}
|
||||||
|
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||||
|
|
||||||
|
# Temporary disable this step until we figure out why it's so flaky
|
||||||
|
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||||
|
- name: Merge and upload coverage data
|
||||||
|
if: |
|
||||||
|
false &&
|
||||||
|
inputs.build-type == 'debug' && matrix.pg_version == 'v16'
|
||||||
|
uses: ./.github/actions/save-coverage-data
|
||||||
210
.github/workflows/benchmarking.yml
vendored
210
.github/workflows/benchmarking.yml
vendored
@@ -56,24 +56,49 @@ concurrency:
|
|||||||
jobs:
|
jobs:
|
||||||
bench:
|
bench:
|
||||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
statuses: write
|
||||||
|
id-token: write # Required for OIDC authentication in azure runners
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- DEFAULT_PG_VERSION: 16
|
||||||
|
PLATFORM: "neon-staging"
|
||||||
|
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
||||||
|
RUNNER: [ self-hosted, us-east-2, x64 ]
|
||||||
|
IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||||
|
- DEFAULT_PG_VERSION: 16
|
||||||
|
PLATFORM: "azure-staging"
|
||||||
|
region_id: 'azure-eastus2'
|
||||||
|
RUNNER: [ self-hosted, eastus2, x64 ]
|
||||||
|
IMAGE: neondatabase/build-tools:pinned
|
||||||
env:
|
env:
|
||||||
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
||||||
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
||||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||||
DEFAULT_PG_VERSION: 14
|
DEFAULT_PG_VERSION: ${{ matrix.DEFAULT_PG_VERSION }}
|
||||||
TEST_OUTPUT: /tmp/test_output
|
TEST_OUTPUT: /tmp/test_output
|
||||||
BUILD_TYPE: remote
|
BUILD_TYPE: remote
|
||||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||||
PLATFORM: "neon-staging"
|
PLATFORM: ${{ matrix.PLATFORM }}
|
||||||
|
|
||||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
runs-on: ${{ matrix.RUNNER }}
|
||||||
container:
|
container:
|
||||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
image: ${{ matrix.IMAGE }}
|
||||||
options: --init
|
options: --init
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Configure AWS credentials # necessary on Azure runners
|
||||||
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
aws-region: eu-central-1
|
||||||
|
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||||
|
role-duration-seconds: 18000 # 5 hours
|
||||||
|
|
||||||
- name: Download Neon artifact
|
- name: Download Neon artifact
|
||||||
uses: ./.github/actions/download
|
uses: ./.github/actions/download
|
||||||
with:
|
with:
|
||||||
@@ -85,7 +110,7 @@ jobs:
|
|||||||
id: create-neon-project
|
id: create-neon-project
|
||||||
uses: ./.github/actions/neon-project-create
|
uses: ./.github/actions/neon-project-create
|
||||||
with:
|
with:
|
||||||
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
region_id: ${{ matrix.region_id }}
|
||||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||||
|
|
||||||
@@ -96,13 +121,14 @@ jobs:
|
|||||||
test_selection: performance
|
test_selection: performance
|
||||||
run_in_parallel: false
|
run_in_parallel: false
|
||||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
# Set --sparse-ordering option of pytest-order plugin
|
# Set --sparse-ordering option of pytest-order plugin
|
||||||
# to ensure tests are running in order of appears in the file.
|
# to ensure tests are running in order of appears in the file.
|
||||||
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
|
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
|
||||||
extra_params:
|
extra_params:
|
||||||
-m remote_cluster
|
-m remote_cluster
|
||||||
--sparse-ordering
|
--sparse-ordering
|
||||||
--timeout 5400
|
--timeout 14400
|
||||||
--ignore test_runner/performance/test_perf_olap.py
|
--ignore test_runner/performance/test_perf_olap.py
|
||||||
--ignore test_runner/performance/test_perf_pgvector_queries.py
|
--ignore test_runner/performance/test_perf_pgvector_queries.py
|
||||||
--ignore test_runner/performance/test_logical_replication.py
|
--ignore test_runner/performance/test_logical_replication.py
|
||||||
@@ -133,9 +159,10 @@ jobs:
|
|||||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||||
|
|
||||||
replication-tests:
|
replication-tests:
|
||||||
|
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||||
env:
|
env:
|
||||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||||
DEFAULT_PG_VERSION: 14
|
DEFAULT_PG_VERSION: 16
|
||||||
TEST_OUTPUT: /tmp/test_output
|
TEST_OUTPUT: /tmp/test_output
|
||||||
BUILD_TYPE: remote
|
BUILD_TYPE: remote
|
||||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||||
@@ -149,6 +176,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
|
||||||
- name: Download Neon artifact
|
- name: Download Neon artifact
|
||||||
uses: ./.github/actions/download
|
uses: ./.github/actions/download
|
||||||
with:
|
with:
|
||||||
@@ -156,7 +184,7 @@ jobs:
|
|||||||
path: /tmp/neon/
|
path: /tmp/neon/
|
||||||
prefix: latest
|
prefix: latest
|
||||||
|
|
||||||
- name: Run benchmark
|
- name: Run Logical Replication benchmarks
|
||||||
uses: ./.github/actions/run-python-test-set
|
uses: ./.github/actions/run-python-test-set
|
||||||
with:
|
with:
|
||||||
build_type: ${{ env.BUILD_TYPE }}
|
build_type: ${{ env.BUILD_TYPE }}
|
||||||
@@ -164,12 +192,15 @@ jobs:
|
|||||||
run_in_parallel: false
|
run_in_parallel: false
|
||||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||||
extra_params: -m remote_cluster --timeout 5400
|
extra_params: -m remote_cluster --timeout 5400
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
env:
|
env:
|
||||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||||
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
|
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||||
|
BENCHMARK_PROJECT_ID_PUB: ${{ vars.BENCHMARK_PROJECT_ID_PUB }}
|
||||||
|
BENCHMARK_PROJECT_ID_SUB: ${{ vars.BENCHMARK_PROJECT_ID_SUB }}
|
||||||
|
|
||||||
- name: Run benchmark
|
- name: Run Physical Replication benchmarks
|
||||||
uses: ./.github/actions/run-python-test-set
|
uses: ./.github/actions/run-python-test-set
|
||||||
with:
|
with:
|
||||||
build_type: ${{ env.BUILD_TYPE }}
|
build_type: ${{ env.BUILD_TYPE }}
|
||||||
@@ -177,6 +208,7 @@ jobs:
|
|||||||
run_in_parallel: false
|
run_in_parallel: false
|
||||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||||
extra_params: -m remote_cluster --timeout 5400
|
extra_params: -m remote_cluster --timeout 5400
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
env:
|
env:
|
||||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||||
@@ -200,13 +232,16 @@ jobs:
|
|||||||
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
|
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
|
||||||
#
|
#
|
||||||
# Available platforms:
|
# Available platforms:
|
||||||
# - neon-captest-new: Freshly created project (1 CU)
|
# - neonvm-captest-new: Freshly created project (1 CU)
|
||||||
# - neon-captest-freetier: Use freetier-sized compute (0.25 CU)
|
# - neonvm-captest-freetier: Use freetier-sized compute (0.25 CU)
|
||||||
# - neon-captest-reuse: Reusing existing project
|
# - neonvm-captest-azure-new: Freshly created project (1 CU) in azure region
|
||||||
|
# - neonvm-captest-azure-freetier: Use freetier-sized compute (0.25 CU) in azure region
|
||||||
|
# - neonvm-captest-reuse: Reusing existing project
|
||||||
# - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
|
# - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
|
||||||
# - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
|
# - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
|
||||||
env:
|
env:
|
||||||
RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }}
|
RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }}
|
||||||
|
DEFAULT_REGION_ID: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
outputs:
|
outputs:
|
||||||
pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
|
pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
|
||||||
@@ -217,23 +252,36 @@ jobs:
|
|||||||
- name: Generate matrix for pgbench benchmark
|
- name: Generate matrix for pgbench benchmark
|
||||||
id: pgbench-compare-matrix
|
id: pgbench-compare-matrix
|
||||||
run: |
|
run: |
|
||||||
|
region_id_default=${{ env.DEFAULT_REGION_ID }}
|
||||||
|
runner_default='["self-hosted", "us-east-2", "x64"]'
|
||||||
|
runner_azure='["self-hosted", "eastus2", "x64"]'
|
||||||
|
image_default="369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned"
|
||||||
matrix='{
|
matrix='{
|
||||||
|
"pg_version" : [
|
||||||
|
16
|
||||||
|
],
|
||||||
|
"region_id" : [
|
||||||
|
"'"$region_id_default"'"
|
||||||
|
],
|
||||||
"platform": [
|
"platform": [
|
||||||
"neon-captest-new",
|
"neonvm-captest-new",
|
||||||
"neon-captest-reuse",
|
"neonvm-captest-reuse",
|
||||||
"neonvm-captest-new"
|
"neonvm-captest-new"
|
||||||
],
|
],
|
||||||
"db_size": [ "10gb" ],
|
"db_size": [ "10gb" ],
|
||||||
"include": [{ "platform": "neon-captest-freetier", "db_size": "3gb" },
|
"runner": ['"$runner_default"'],
|
||||||
{ "platform": "neon-captest-new", "db_size": "50gb" },
|
"image": [ "'"$image_default"'" ],
|
||||||
{ "platform": "neonvm-captest-freetier", "db_size": "3gb" },
|
"include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||||
{ "platform": "neonvm-captest-new", "db_size": "50gb" },
|
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||||
{ "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb" }]
|
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||||
|
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
|
||||||
|
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
|
||||||
|
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
|
||||||
|
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
|
||||||
}'
|
}'
|
||||||
|
|
||||||
if [ "$(date +%A)" = "Saturday" ]; then
|
if [ "$(date +%A)" = "Saturday" ]; then
|
||||||
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "db_size": "10gb"},
|
matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 14, "region_id": "'"$region_id_default"'", "platform": "rds-postgres", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]')
|
||||||
{ "platform": "rds-aurora", "db_size": "50gb"}]')
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
|
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
|
||||||
@@ -243,7 +291,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
matrix='{
|
matrix='{
|
||||||
"platform": [
|
"platform": [
|
||||||
"neon-captest-reuse"
|
"neonvm-captest-reuse"
|
||||||
]
|
]
|
||||||
}'
|
}'
|
||||||
|
|
||||||
@@ -259,7 +307,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
matrix='{
|
matrix='{
|
||||||
"platform": [
|
"platform": [
|
||||||
"neon-captest-reuse"
|
"neonvm-captest-reuse"
|
||||||
],
|
],
|
||||||
"scale": [
|
"scale": [
|
||||||
"10"
|
"10"
|
||||||
@@ -276,6 +324,10 @@ jobs:
|
|||||||
pgbench-compare:
|
pgbench-compare:
|
||||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||||
needs: [ generate-matrices ]
|
needs: [ generate-matrices ]
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
statuses: write
|
||||||
|
id-token: write # Required for OIDC authentication in azure runners
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
@@ -285,15 +337,15 @@ jobs:
|
|||||||
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
|
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
|
||||||
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
|
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
|
||||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||||
DEFAULT_PG_VERSION: 14
|
DEFAULT_PG_VERSION: ${{ matrix.pg_version }}
|
||||||
TEST_OUTPUT: /tmp/test_output
|
TEST_OUTPUT: /tmp/test_output
|
||||||
BUILD_TYPE: remote
|
BUILD_TYPE: remote
|
||||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||||
PLATFORM: ${{ matrix.platform }}
|
PLATFORM: ${{ matrix.platform }}
|
||||||
|
|
||||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
runs-on: ${{ matrix.runner }}
|
||||||
container:
|
container:
|
||||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
image: ${{ matrix.image }}
|
||||||
options: --init
|
options: --init
|
||||||
|
|
||||||
# Increase timeout to 8h, default timeout is 6h
|
# Increase timeout to 8h, default timeout is 6h
|
||||||
@@ -302,6 +354,13 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Configure AWS credentials # necessary on Azure runners
|
||||||
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
aws-region: eu-central-1
|
||||||
|
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||||
|
role-duration-seconds: 18000 # 5 hours
|
||||||
|
|
||||||
- name: Download Neon artifact
|
- name: Download Neon artifact
|
||||||
uses: ./.github/actions/download
|
uses: ./.github/actions/download
|
||||||
with:
|
with:
|
||||||
@@ -310,27 +369,26 @@ jobs:
|
|||||||
prefix: latest
|
prefix: latest
|
||||||
|
|
||||||
- name: Create Neon Project
|
- name: Create Neon Project
|
||||||
if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier"]'), matrix.platform)
|
if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
|
||||||
id: create-neon-project
|
id: create-neon-project
|
||||||
uses: ./.github/actions/neon-project-create
|
uses: ./.github/actions/neon-project-create
|
||||||
with:
|
with:
|
||||||
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
region_id: ${{ matrix.region_id }}
|
||||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||||
compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
|
compute_units: ${{ (contains(matrix.platform, 'captest-freetier') && '[0.25, 0.25]') || '[1, 1]' }}
|
||||||
provisioner: ${{ (contains(matrix.platform, 'neonvm-') && 'k8s-neonvm') || 'k8s-pod' }}
|
|
||||||
|
|
||||||
- name: Set up Connection String
|
- name: Set up Connection String
|
||||||
id: set-up-connstr
|
id: set-up-connstr
|
||||||
run: |
|
run: |
|
||||||
case "${PLATFORM}" in
|
case "${PLATFORM}" in
|
||||||
neon-captest-reuse)
|
neonvm-captest-reuse)
|
||||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
|
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
|
||||||
;;
|
;;
|
||||||
neonvm-captest-sharding-reuse)
|
neonvm-captest-sharding-reuse)
|
||||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
|
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
|
||||||
;;
|
;;
|
||||||
neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier)
|
neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
|
||||||
CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
|
CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
|
||||||
;;
|
;;
|
||||||
rds-aurora)
|
rds-aurora)
|
||||||
@@ -355,6 +413,7 @@ jobs:
|
|||||||
run_in_parallel: false
|
run_in_parallel: false
|
||||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
|
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
env:
|
env:
|
||||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||||
@@ -368,6 +427,7 @@ jobs:
|
|||||||
run_in_parallel: false
|
run_in_parallel: false
|
||||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
|
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
env:
|
env:
|
||||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||||
@@ -381,6 +441,7 @@ jobs:
|
|||||||
run_in_parallel: false
|
run_in_parallel: false
|
||||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
|
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
env:
|
env:
|
||||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||||
@@ -407,6 +468,21 @@ jobs:
|
|||||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||||
|
|
||||||
pgbench-pgvector:
|
pgbench-pgvector:
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
statuses: write
|
||||||
|
id-token: write # Required for OIDC authentication in azure runners
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- PLATFORM: "neonvm-captest-pgvector"
|
||||||
|
RUNNER: [ self-hosted, us-east-2, x64 ]
|
||||||
|
IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||||
|
- PLATFORM: "azure-captest-pgvector"
|
||||||
|
RUNNER: [ self-hosted, eastus2, x64 ]
|
||||||
|
IMAGE: neondatabase/build-tools:pinned
|
||||||
|
|
||||||
env:
|
env:
|
||||||
TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
|
TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
|
||||||
TEST_PG_BENCH_SCALES_MATRIX: "1"
|
TEST_PG_BENCH_SCALES_MATRIX: "1"
|
||||||
@@ -414,30 +490,60 @@ jobs:
|
|||||||
DEFAULT_PG_VERSION: 16
|
DEFAULT_PG_VERSION: 16
|
||||||
TEST_OUTPUT: /tmp/test_output
|
TEST_OUTPUT: /tmp/test_output
|
||||||
BUILD_TYPE: remote
|
BUILD_TYPE: remote
|
||||||
|
LD_LIBRARY_PATH: /home/nonroot/pg/usr/lib/x86_64-linux-gnu
|
||||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||||
PLATFORM: "neon-captest-pgvector"
|
PLATFORM: ${{ matrix.PLATFORM }}
|
||||||
|
|
||||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
runs-on: ${{ matrix.RUNNER }}
|
||||||
container:
|
container:
|
||||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
image: ${{ matrix.IMAGE }}
|
||||||
options: --init
|
options: --init
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Download Neon artifact
|
# until https://github.com/neondatabase/neon/issues/8275 is fixed we temporarily install postgresql-16
|
||||||
uses: ./.github/actions/download
|
# instead of using Neon artifacts containing pgbench
|
||||||
with:
|
- name: Install postgresql-16 where pytest expects it
|
||||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
run: |
|
||||||
path: /tmp/neon/
|
cd /home/nonroot
|
||||||
prefix: latest
|
wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/libpq5_16.4-1.pgdg110%2B1_amd64.deb
|
||||||
|
wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110%2B1_amd64.deb
|
||||||
|
wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110%2B1_amd64.deb
|
||||||
|
dpkg -x libpq5_16.4-1.pgdg110+1_amd64.deb pg
|
||||||
|
dpkg -x postgresql-client-16_16.4-1.pgdg110+1_amd64.deb pg
|
||||||
|
dpkg -x postgresql-16_16.4-1.pgdg110+1_amd64.deb pg
|
||||||
|
mkdir -p /tmp/neon/pg_install/v16/bin
|
||||||
|
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
|
||||||
|
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql
|
||||||
|
ln -s /home/nonroot/pg/usr/lib/x86_64-linux-gnu /tmp/neon/pg_install/v16/lib
|
||||||
|
/tmp/neon/pg_install/v16/bin/pgbench --version
|
||||||
|
/tmp/neon/pg_install/v16/bin/psql --version
|
||||||
|
|
||||||
- name: Set up Connection String
|
- name: Set up Connection String
|
||||||
id: set-up-connstr
|
id: set-up-connstr
|
||||||
run: |
|
run: |
|
||||||
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
|
case "${PLATFORM}" in
|
||||||
|
neonvm-captest-pgvector)
|
||||||
|
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
|
||||||
|
;;
|
||||||
|
azure-captest-pgvector)
|
||||||
|
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_AZURE }}
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo >&2 "Unknown PLATFORM=${PLATFORM}"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Configure AWS credentials # necessary on Azure runners to read/write from/to S3
|
||||||
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
aws-region: eu-central-1
|
||||||
|
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||||
|
role-duration-seconds: 18000 # 5 hours
|
||||||
|
|
||||||
- name: Benchmark pgvector hnsw indexing
|
- name: Benchmark pgvector hnsw indexing
|
||||||
uses: ./.github/actions/run-python-test-set
|
uses: ./.github/actions/run-python-test-set
|
||||||
@@ -447,6 +553,7 @@ jobs:
|
|||||||
run_in_parallel: false
|
run_in_parallel: false
|
||||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
|
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
env:
|
env:
|
||||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||||
@@ -460,11 +567,12 @@ jobs:
|
|||||||
run_in_parallel: false
|
run_in_parallel: false
|
||||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||||
extra_params: -m remote_cluster --timeout 21600
|
extra_params: -m remote_cluster --timeout 21600
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
env:
|
env:
|
||||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||||
|
|
||||||
- name: Create Allure report
|
- name: Create Allure report
|
||||||
if: ${{ !cancelled() }}
|
if: ${{ !cancelled() }}
|
||||||
uses: ./.github/actions/allure-report-generate
|
uses: ./.github/actions/allure-report-generate
|
||||||
@@ -474,11 +582,10 @@ jobs:
|
|||||||
uses: slackapi/slack-github-action@v1
|
uses: slackapi/slack-github-action@v1
|
||||||
with:
|
with:
|
||||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||||
slack-message: "Periodic perf testing neon-captest-pgvector: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
slack-message: "Periodic perf testing ${PLATFORM}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||||
env:
|
env:
|
||||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||||
|
|
||||||
|
|
||||||
clickbench-compare:
|
clickbench-compare:
|
||||||
# ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters
|
# ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters
|
||||||
# we use for performance testing in pgbench-compare.
|
# we use for performance testing in pgbench-compare.
|
||||||
@@ -523,7 +630,7 @@ jobs:
|
|||||||
id: set-up-connstr
|
id: set-up-connstr
|
||||||
run: |
|
run: |
|
||||||
case "${PLATFORM}" in
|
case "${PLATFORM}" in
|
||||||
neon-captest-reuse)
|
neonvm-captest-reuse)
|
||||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
|
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
|
||||||
;;
|
;;
|
||||||
rds-aurora)
|
rds-aurora)
|
||||||
@@ -533,7 +640,7 @@ jobs:
|
|||||||
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
|
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
@@ -610,7 +717,7 @@ jobs:
|
|||||||
- name: Get Connstring Secret Name
|
- name: Get Connstring Secret Name
|
||||||
run: |
|
run: |
|
||||||
case "${PLATFORM}" in
|
case "${PLATFORM}" in
|
||||||
neon-captest-reuse)
|
neonvm-captest-reuse)
|
||||||
ENV_PLATFORM=CAPTEST_TPCH
|
ENV_PLATFORM=CAPTEST_TPCH
|
||||||
;;
|
;;
|
||||||
rds-aurora)
|
rds-aurora)
|
||||||
@@ -620,7 +727,7 @@ jobs:
|
|||||||
ENV_PLATFORM=RDS_AURORA_TPCH
|
ENV_PLATFORM=RDS_AURORA_TPCH
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
@@ -697,7 +804,7 @@ jobs:
|
|||||||
id: set-up-connstr
|
id: set-up-connstr
|
||||||
run: |
|
run: |
|
||||||
case "${PLATFORM}" in
|
case "${PLATFORM}" in
|
||||||
neon-captest-reuse)
|
neonvm-captest-reuse)
|
||||||
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
|
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
|
||||||
;;
|
;;
|
||||||
rds-aurora)
|
rds-aurora)
|
||||||
@@ -707,7 +814,7 @@ jobs:
|
|||||||
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}
|
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
||||||
exit 1
|
exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
@@ -722,6 +829,7 @@ jobs:
|
|||||||
run_in_parallel: false
|
run_in_parallel: false
|
||||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||||
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
|
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
env:
|
env:
|
||||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||||
|
|||||||
10
.github/workflows/build-build-tools-image.yml
vendored
10
.github/workflows/build-build-tools-image.yml
vendored
@@ -72,6 +72,12 @@ jobs:
|
|||||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||||
|
|
||||||
|
- uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: cache.neon.build
|
||||||
|
username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }}
|
||||||
|
password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }}
|
||||||
|
|
||||||
- uses: docker/build-push-action@v6
|
- uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
@@ -79,8 +85,8 @@ jobs:
|
|||||||
push: true
|
push: true
|
||||||
pull: true
|
pull: true
|
||||||
file: Dockerfile.build-tools
|
file: Dockerfile.build-tools
|
||||||
cache-from: type=registry,ref=neondatabase/build-tools:cache-${{ matrix.arch }}
|
cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.arch }}
|
||||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=neondatabase/build-tools:cache-{0},mode=max', matrix.arch) || '' }}
|
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0},mode=max', matrix.arch) || '' }}
|
||||||
tags: neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.arch }}
|
tags: neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.arch }}
|
||||||
|
|
||||||
- name: Remove custom docker config directory
|
- name: Remove custom docker config directory
|
||||||
|
|||||||
374
.github/workflows/build_and_test.yml
vendored
374
.github/workflows/build_and_test.yml
vendored
@@ -125,7 +125,11 @@ jobs:
|
|||||||
|
|
||||||
check-codestyle-rust:
|
check-codestyle-rust:
|
||||||
needs: [ check-permissions, build-build-tools-image ]
|
needs: [ check-permissions, build-build-tools-image ]
|
||||||
runs-on: [ self-hosted, gen3, small ]
|
strategy:
|
||||||
|
matrix:
|
||||||
|
arch: [ x64, arm64 ]
|
||||||
|
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
|
||||||
|
|
||||||
container:
|
container:
|
||||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||||
credentials:
|
credentials:
|
||||||
@@ -193,291 +197,30 @@ jobs:
|
|||||||
if: ${{ !cancelled() }}
|
if: ${{ !cancelled() }}
|
||||||
run: cargo deny check --hide-inclusion-graph
|
run: cargo deny check --hide-inclusion-graph
|
||||||
|
|
||||||
build-neon:
|
build-and-test-locally:
|
||||||
needs: [ check-permissions, tag, build-build-tools-image ]
|
needs: [ tag, build-build-tools-image ]
|
||||||
runs-on: [ self-hosted, gen3, large ]
|
|
||||||
container:
|
|
||||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
|
||||||
credentials:
|
|
||||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
|
||||||
# Raise locked memory limit for tokio-epoll-uring.
|
|
||||||
# On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),
|
|
||||||
# io_uring will account the memory of the CQ and SQ as locked.
|
|
||||||
# More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391
|
|
||||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
build_type: [ debug, release ]
|
arch: [ x64 ]
|
||||||
env:
|
# Do not build or run tests in debug for release branches
|
||||||
BUILD_TYPE: ${{ matrix.build_type }}
|
build-type: ${{ fromJson((startsWith(github.ref_name, 'release' && github.event_name == 'push')) && '["release"]' || '["debug", "release"]') }}
|
||||||
GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}
|
include:
|
||||||
BUILD_TAG: ${{ needs.tag.outputs.build-tag }}
|
- build-type: release
|
||||||
|
arch: arm64
|
||||||
steps:
|
uses: ./.github/workflows/_build-and-test-locally.yml
|
||||||
- name: Fix git ownership
|
with:
|
||||||
run: |
|
arch: ${{ matrix.arch }}
|
||||||
# Workaround for `fatal: detected dubious ownership in repository at ...`
|
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||||
#
|
build-tag: ${{ needs.tag.outputs.build-tag }}
|
||||||
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
|
build-type: ${{ matrix.build-type }}
|
||||||
# Ref https://github.com/actions/checkout/issues/785
|
# Run tests on all Postgres versions in release builds and only on the latest version in debug builds
|
||||||
#
|
pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16"]' || '["v16"]' }}
|
||||||
git config --global --add safe.directory ${{ github.workspace }}
|
secrets: inherit
|
||||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
|
||||||
for r in 14 15 16; do
|
|
||||||
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
|
|
||||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
fetch-depth: 1
|
|
||||||
|
|
||||||
- name: Set pg 14 revision for caching
|
|
||||||
id: pg_v14_rev
|
|
||||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
- name: Set pg 15 revision for caching
|
|
||||||
id: pg_v15_rev
|
|
||||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
- name: Set pg 16 revision for caching
|
|
||||||
id: pg_v16_rev
|
|
||||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
# Set some environment variables used by all the steps.
|
|
||||||
#
|
|
||||||
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
|
|
||||||
# It also includes --features, if any
|
|
||||||
#
|
|
||||||
# CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
|
|
||||||
# because "cargo metadata" doesn't accept --release or --debug options
|
|
||||||
#
|
|
||||||
# We run tests with addtional features, that are turned off by default (e.g. in release builds), see
|
|
||||||
# corresponding Cargo.toml files for their descriptions.
|
|
||||||
- name: Set env variables
|
|
||||||
run: |
|
|
||||||
CARGO_FEATURES="--features testing"
|
|
||||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
|
||||||
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
|
|
||||||
CARGO_FLAGS="--locked"
|
|
||||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
|
||||||
cov_prefix=""
|
|
||||||
CARGO_FLAGS="--locked --release"
|
|
||||||
fi
|
|
||||||
{
|
|
||||||
echo "cov_prefix=${cov_prefix}"
|
|
||||||
echo "CARGO_FEATURES=${CARGO_FEATURES}"
|
|
||||||
echo "CARGO_FLAGS=${CARGO_FLAGS}"
|
|
||||||
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
|
|
||||||
} >> $GITHUB_ENV
|
|
||||||
|
|
||||||
# Disabled for now
|
|
||||||
# Don't include the ~/.cargo/registry/src directory. It contains just
|
|
||||||
# uncompressed versions of the crates in ~/.cargo/registry/cache
|
|
||||||
# directory, and it's faster to let 'cargo' to rebuild it from the
|
|
||||||
# compressed crates.
|
|
||||||
# - name: Cache cargo deps
|
|
||||||
# id: cache_cargo
|
|
||||||
# uses: actions/cache@v4
|
|
||||||
# with:
|
|
||||||
# path: |
|
|
||||||
# ~/.cargo/registry/
|
|
||||||
# !~/.cargo/registry/src
|
|
||||||
# ~/.cargo/git/
|
|
||||||
# target/
|
|
||||||
# # Fall back to older versions of the key, if no cache for current Cargo.lock was found
|
|
||||||
# key: |
|
|
||||||
# v1-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-cargo-${{ hashFiles('rust-toolchain.toml') }}-${{ hashFiles('Cargo.lock') }}
|
|
||||||
# v1-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-cargo-${{ hashFiles('rust-toolchain.toml') }}-
|
|
||||||
|
|
||||||
- name: Cache postgres v14 build
|
|
||||||
id: cache_pg_14
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: pg_install/v14
|
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
|
||||||
|
|
||||||
- name: Cache postgres v15 build
|
|
||||||
id: cache_pg_15
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: pg_install/v15
|
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
|
||||||
|
|
||||||
- name: Cache postgres v16 build
|
|
||||||
id: cache_pg_16
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: pg_install/v16
|
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
|
||||||
|
|
||||||
- name: Build postgres v14
|
|
||||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
|
||||||
run: mold -run make postgres-v14 -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build postgres v15
|
|
||||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
|
||||||
run: mold -run make postgres-v15 -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build postgres v16
|
|
||||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
|
||||||
run: mold -run make postgres-v16 -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build neon extensions
|
|
||||||
run: mold -run make neon-pg-ext -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build walproposer-lib
|
|
||||||
run: mold -run make walproposer-lib -j$(nproc)
|
|
||||||
|
|
||||||
- name: Run cargo build
|
|
||||||
run: |
|
|
||||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
|
||||||
export PQ_LIB_DIR
|
|
||||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
|
|
||||||
|
|
||||||
# Do install *before* running rust tests because they might recompile the
|
|
||||||
# binaries with different features/flags.
|
|
||||||
- name: Install rust binaries
|
|
||||||
run: |
|
|
||||||
# Install target binaries
|
|
||||||
mkdir -p /tmp/neon/bin/
|
|
||||||
binaries=$(
|
|
||||||
${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
|
|
||||||
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
|
||||||
)
|
|
||||||
for bin in $binaries; do
|
|
||||||
SRC=target/$BUILD_TYPE/$bin
|
|
||||||
DST=/tmp/neon/bin/$bin
|
|
||||||
cp "$SRC" "$DST"
|
|
||||||
done
|
|
||||||
|
|
||||||
# Install test executables and write list of all binaries (for code coverage)
|
|
||||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
|
||||||
# Keep bloated coverage data files away from the rest of the artifact
|
|
||||||
mkdir -p /tmp/coverage/
|
|
||||||
|
|
||||||
mkdir -p /tmp/neon/test_bin/
|
|
||||||
|
|
||||||
test_exe_paths=$(
|
|
||||||
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
|
|
||||||
jq -r '.executable | select(. != null)'
|
|
||||||
)
|
|
||||||
for bin in $test_exe_paths; do
|
|
||||||
SRC=$bin
|
|
||||||
DST=/tmp/neon/test_bin/$(basename $bin)
|
|
||||||
|
|
||||||
# We don't need debug symbols for code coverage, so strip them out to make
|
|
||||||
# the artifact smaller.
|
|
||||||
strip "$SRC" -o "$DST"
|
|
||||||
echo "$DST" >> /tmp/coverage/binaries.list
|
|
||||||
done
|
|
||||||
|
|
||||||
for bin in $binaries; do
|
|
||||||
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Run rust tests
|
|
||||||
env:
|
|
||||||
NEXTEST_RETRIES: 3
|
|
||||||
run: |
|
|
||||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
|
||||||
export PQ_LIB_DIR
|
|
||||||
LD_LIBRARY_PATH=$(pwd)/pg_install/v16/lib
|
|
||||||
export LD_LIBRARY_PATH
|
|
||||||
|
|
||||||
#nextest does not yet support running doctests
|
|
||||||
cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
|
||||||
|
|
||||||
for io_engine in std-fs tokio-epoll-uring ; do
|
|
||||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
|
|
||||||
done
|
|
||||||
|
|
||||||
# Run separate tests for real S3
|
|
||||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
|
||||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
|
||||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
|
||||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
|
||||||
|
|
||||||
# Run separate tests for real Azure Blob Storage
|
|
||||||
# XXX: replace region with `eu-central-1`-like region
|
|
||||||
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
|
|
||||||
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
|
|
||||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
|
||||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
|
||||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
|
||||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
|
||||||
|
|
||||||
- name: Install postgres binaries
|
|
||||||
run: cp -a pg_install /tmp/neon/pg_install
|
|
||||||
|
|
||||||
- name: Upload Neon artifact
|
|
||||||
uses: ./.github/actions/upload
|
|
||||||
with:
|
|
||||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-artifact
|
|
||||||
path: /tmp/neon
|
|
||||||
|
|
||||||
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
|
|
||||||
- name: Merge and upload coverage data
|
|
||||||
if: matrix.build_type == 'debug'
|
|
||||||
uses: ./.github/actions/save-coverage-data
|
|
||||||
|
|
||||||
regress-tests:
|
|
||||||
needs: [ check-permissions, build-neon, build-build-tools-image, tag ]
|
|
||||||
runs-on: [ self-hosted, gen3, large ]
|
|
||||||
container:
|
|
||||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
|
||||||
credentials:
|
|
||||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
|
||||||
# for changed limits, see comments on `options:` earlier in this file
|
|
||||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
build_type: [ debug, release ]
|
|
||||||
pg_version: [ v14, v15, v16 ]
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
fetch-depth: 1
|
|
||||||
|
|
||||||
- name: Pytest regression tests
|
|
||||||
uses: ./.github/actions/run-python-test-set
|
|
||||||
timeout-minutes: 60
|
|
||||||
with:
|
|
||||||
build_type: ${{ matrix.build_type }}
|
|
||||||
test_selection: regress
|
|
||||||
needs_postgres_source: true
|
|
||||||
run_with_real_s3: true
|
|
||||||
real_s3_bucket: neon-github-ci-tests
|
|
||||||
real_s3_region: eu-central-1
|
|
||||||
rerun_flaky: true
|
|
||||||
pg_version: ${{ matrix.pg_version }}
|
|
||||||
env:
|
|
||||||
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
|
||||||
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
|
|
||||||
BUILD_TAG: ${{ needs.tag.outputs.build-tag }}
|
|
||||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
|
||||||
PAGESERVER_GET_VECTORED_IMPL: vectored
|
|
||||||
PAGESERVER_GET_IMPL: vectored
|
|
||||||
PAGESERVER_VALIDATE_VEC_GET: true
|
|
||||||
|
|
||||||
# Temporary disable this step until we figure out why it's so flaky
|
|
||||||
# Ref https://github.com/neondatabase/neon/issues/4540
|
|
||||||
- name: Merge and upload coverage data
|
|
||||||
if: |
|
|
||||||
false &&
|
|
||||||
matrix.build_type == 'debug' && matrix.pg_version == 'v14'
|
|
||||||
uses: ./.github/actions/save-coverage-data
|
|
||||||
|
|
||||||
|
# Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking
|
||||||
get-benchmarks-durations:
|
get-benchmarks-durations:
|
||||||
|
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
|
||||||
outputs:
|
outputs:
|
||||||
json: ${{ steps.get-benchmark-durations.outputs.json }}
|
json: ${{ steps.get-benchmark-durations.outputs.json }}
|
||||||
needs: [ check-permissions, build-build-tools-image ]
|
needs: [ check-permissions, build-build-tools-image ]
|
||||||
@@ -488,7 +231,6 @@ jobs:
|
|||||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||||
options: --init
|
options: --init
|
||||||
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -513,7 +255,8 @@ jobs:
|
|||||||
echo "json=$(jq --compact-output '.' /tmp/benchmark_durations.json)" >> $GITHUB_OUTPUT
|
echo "json=$(jq --compact-output '.' /tmp/benchmark_durations.json)" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
benchmarks:
|
benchmarks:
|
||||||
needs: [ check-permissions, build-neon, build-build-tools-image, get-benchmarks-durations ]
|
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
|
||||||
|
needs: [ check-permissions, build-and-test-locally, build-build-tools-image, get-benchmarks-durations ]
|
||||||
runs-on: [ self-hosted, gen3, small ]
|
runs-on: [ self-hosted, gen3, small ]
|
||||||
container:
|
container:
|
||||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||||
@@ -522,7 +265,6 @@ jobs:
|
|||||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||||
# for changed limits, see comments on `options:` earlier in this file
|
# for changed limits, see comments on `options:` earlier in this file
|
||||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
||||||
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
@@ -547,9 +289,6 @@ jobs:
|
|||||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||||
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
|
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
|
||||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||||
PAGESERVER_GET_VECTORED_IMPL: vectored
|
|
||||||
PAGESERVER_GET_IMPL: vectored
|
|
||||||
PAGESERVER_VALIDATE_VEC_GET: false
|
|
||||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||||
# while coverage is currently collected for the debug ones
|
# while coverage is currently collected for the debug ones
|
||||||
|
|
||||||
@@ -570,7 +309,7 @@ jobs:
|
|||||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||||
|
|
||||||
create-test-report:
|
create-test-report:
|
||||||
needs: [ check-permissions, regress-tests, coverage-report, benchmarks, build-build-tools-image ]
|
needs: [ check-permissions, build-and-test-locally, coverage-report, build-build-tools-image, benchmarks ]
|
||||||
if: ${{ !cancelled() && contains(fromJSON('["skipped", "success"]'), needs.check-permissions.result) }}
|
if: ${{ !cancelled() && contains(fromJSON('["skipped", "success"]'), needs.check-permissions.result) }}
|
||||||
outputs:
|
outputs:
|
||||||
report-url: ${{ steps.create-allure-report.outputs.report-url }}
|
report-url: ${{ steps.create-allure-report.outputs.report-url }}
|
||||||
@@ -621,7 +360,7 @@ jobs:
|
|||||||
})
|
})
|
||||||
|
|
||||||
coverage-report:
|
coverage-report:
|
||||||
needs: [ check-permissions, regress-tests, build-build-tools-image ]
|
needs: [ check-permissions, build-build-tools-image, build-and-test-locally ]
|
||||||
runs-on: [ self-hosted, gen3, small ]
|
runs-on: [ self-hosted, gen3, small ]
|
||||||
container:
|
container:
|
||||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||||
@@ -760,6 +499,12 @@ jobs:
|
|||||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||||
|
|
||||||
|
- uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: cache.neon.build
|
||||||
|
username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }}
|
||||||
|
password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }}
|
||||||
|
|
||||||
- uses: docker/build-push-action@v6
|
- uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
@@ -771,8 +516,8 @@ jobs:
|
|||||||
push: true
|
push: true
|
||||||
pull: true
|
pull: true
|
||||||
file: Dockerfile
|
file: Dockerfile
|
||||||
cache-from: type=registry,ref=neondatabase/neon:cache-${{ matrix.arch }}
|
cache-from: type=registry,ref=cache.neon.build/neon:cache-${{ matrix.arch }}
|
||||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=neondatabase/neon:cache-{0},mode=max', matrix.arch) || '' }}
|
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon:cache-{0},mode=max', matrix.arch) || '' }}
|
||||||
tags: |
|
tags: |
|
||||||
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
||||||
|
|
||||||
@@ -851,6 +596,12 @@ jobs:
|
|||||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||||
|
|
||||||
|
- uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: cache.neon.build
|
||||||
|
username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }}
|
||||||
|
password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }}
|
||||||
|
|
||||||
- name: Build compute-node image
|
- name: Build compute-node image
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
@@ -864,8 +615,8 @@ jobs:
|
|||||||
push: true
|
push: true
|
||||||
pull: true
|
pull: true
|
||||||
file: Dockerfile.compute-node
|
file: Dockerfile.compute-node
|
||||||
cache-from: type=registry,ref=neondatabase/compute-node-${{ matrix.version }}:cache-${{ matrix.arch }}
|
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version }}:cache-${{ matrix.arch }}
|
||||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=neondatabase/compute-node-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
|
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
|
||||||
tags: |
|
tags: |
|
||||||
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
||||||
|
|
||||||
@@ -884,8 +635,8 @@ jobs:
|
|||||||
pull: true
|
pull: true
|
||||||
file: Dockerfile.compute-node
|
file: Dockerfile.compute-node
|
||||||
target: neon-pg-ext-test
|
target: neon-pg-ext-test
|
||||||
cache-from: type=registry,ref=neondatabase/neon-test-extensions-${{ matrix.version }}:cache-${{ matrix.arch }}
|
cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version }}:cache-${{ matrix.arch }}
|
||||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=neondatabase/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
|
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
|
||||||
tags: |
|
tags: |
|
||||||
neondatabase/neon-test-extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }}
|
neondatabase/neon-test-extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }}
|
||||||
|
|
||||||
@@ -1085,6 +836,9 @@ jobs:
|
|||||||
rm -rf .docker-custom
|
rm -rf .docker-custom
|
||||||
|
|
||||||
promote-images:
|
promote-images:
|
||||||
|
permissions:
|
||||||
|
contents: read # This is required for actions/checkout
|
||||||
|
id-token: write # This is required for Azure Login to work.
|
||||||
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
|
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
@@ -1111,6 +865,28 @@ jobs:
|
|||||||
neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
|
neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
|
||||||
done
|
done
|
||||||
|
|
||||||
|
- name: Azure login
|
||||||
|
if: github.ref_name == 'main'
|
||||||
|
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1
|
||||||
|
with:
|
||||||
|
client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
|
||||||
|
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
|
||||||
|
subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
|
||||||
|
|
||||||
|
- name: Login to ACR
|
||||||
|
if: github.ref_name == 'main'
|
||||||
|
run: |
|
||||||
|
az acr login --name=neoneastus2
|
||||||
|
|
||||||
|
- name: Copy docker images to ACR-dev
|
||||||
|
if: github.ref_name == 'main'
|
||||||
|
run: |
|
||||||
|
for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16}; do
|
||||||
|
docker buildx imagetools create \
|
||||||
|
-t neoneastus2.azurecr.io/neondatabase/${image}:${{ needs.tag.outputs.build-tag }} \
|
||||||
|
neondatabase/${image}:${{ needs.tag.outputs.build-tag }}
|
||||||
|
done
|
||||||
|
|
||||||
- name: Add latest tag to images
|
- name: Add latest tag to images
|
||||||
if: github.ref_name == 'main'
|
if: github.ref_name == 'main'
|
||||||
run: |
|
run: |
|
||||||
@@ -1223,7 +999,7 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
|
|
||||||
deploy:
|
deploy:
|
||||||
needs: [ check-permissions, promote-images, tag, regress-tests, trigger-custom-extensions-build-and-wait ]
|
needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
|
||||||
if: github.ref_name == 'main' || github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
if: github.ref_name == 'main' || github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
||||||
|
|
||||||
runs-on: [ self-hosted, gen3, small ]
|
runs-on: [ self-hosted, gen3, small ]
|
||||||
@@ -1324,7 +1100,7 @@ jobs:
|
|||||||
})
|
})
|
||||||
|
|
||||||
promote-compatibility-data:
|
promote-compatibility-data:
|
||||||
needs: [ check-permissions, promote-images, tag, regress-tests ]
|
needs: [ check-permissions, promote-images, tag, build-and-test-locally ]
|
||||||
if: github.ref_name == 'release'
|
if: github.ref_name == 'release'
|
||||||
|
|
||||||
runs-on: [ self-hosted, gen3, small ]
|
runs-on: [ self-hosted, gen3, small ]
|
||||||
@@ -1363,7 +1139,7 @@ jobs:
|
|||||||
done
|
done
|
||||||
|
|
||||||
pin-build-tools-image:
|
pin-build-tools-image:
|
||||||
needs: [ build-build-tools-image, promote-images, regress-tests ]
|
needs: [ build-build-tools-image, promote-images, build-and-test-locally ]
|
||||||
if: github.ref_name == 'main'
|
if: github.ref_name == 'main'
|
||||||
uses: ./.github/workflows/pin-build-tools-image.yml
|
uses: ./.github/workflows/pin-build-tools-image.yml
|
||||||
with:
|
with:
|
||||||
@@ -1385,7 +1161,7 @@ jobs:
|
|||||||
needs:
|
needs:
|
||||||
- check-codestyle-python
|
- check-codestyle-python
|
||||||
- check-codestyle-rust
|
- check-codestyle-rust
|
||||||
- regress-tests
|
- build-and-test-locally
|
||||||
- test-images
|
- test-images
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
217
.github/workflows/neon_extra_builds.yml
vendored
217
.github/workflows/neon_extra_builds.yml
vendored
@@ -133,221 +133,6 @@ jobs:
|
|||||||
- name: Check that no warnings are produced
|
- name: Check that no warnings are produced
|
||||||
run: ./run_clippy.sh
|
run: ./run_clippy.sh
|
||||||
|
|
||||||
check-linux-arm-build:
|
|
||||||
needs: [ check-permissions, build-build-tools-image ]
|
|
||||||
timeout-minutes: 90
|
|
||||||
runs-on: [ self-hosted, small-arm64 ]
|
|
||||||
|
|
||||||
env:
|
|
||||||
# Use release build only, to have less debug info around
|
|
||||||
# Hence keeping target/ (and general cache size) smaller
|
|
||||||
BUILD_TYPE: release
|
|
||||||
CARGO_FEATURES: --features testing
|
|
||||||
CARGO_FLAGS: --release
|
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
|
||||||
|
|
||||||
container:
|
|
||||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
|
||||||
credentials:
|
|
||||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
|
||||||
options: --init
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Fix git ownership
|
|
||||||
run: |
|
|
||||||
# Workaround for `fatal: detected dubious ownership in repository at ...`
|
|
||||||
#
|
|
||||||
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
|
|
||||||
# Ref https://github.com/actions/checkout/issues/785
|
|
||||||
#
|
|
||||||
git config --global --add safe.directory ${{ github.workspace }}
|
|
||||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
|
||||||
for r in 14 15 16; do
|
|
||||||
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
|
|
||||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
fetch-depth: 1
|
|
||||||
|
|
||||||
- name: Set pg 14 revision for caching
|
|
||||||
id: pg_v14_rev
|
|
||||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
- name: Set pg 15 revision for caching
|
|
||||||
id: pg_v15_rev
|
|
||||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
- name: Set pg 16 revision for caching
|
|
||||||
id: pg_v16_rev
|
|
||||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
- name: Set env variables
|
|
||||||
run: |
|
|
||||||
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Cache postgres v14 build
|
|
||||||
id: cache_pg_14
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: pg_install/v14
|
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
|
||||||
|
|
||||||
- name: Cache postgres v15 build
|
|
||||||
id: cache_pg_15
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: pg_install/v15
|
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
|
||||||
|
|
||||||
- name: Cache postgres v16 build
|
|
||||||
id: cache_pg_16
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: pg_install/v16
|
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
|
||||||
|
|
||||||
- name: Build postgres v14
|
|
||||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
|
||||||
run: mold -run make postgres-v14 -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build postgres v15
|
|
||||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
|
||||||
run: mold -run make postgres-v15 -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build postgres v16
|
|
||||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
|
||||||
run: mold -run make postgres-v16 -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build neon extensions
|
|
||||||
run: mold -run make neon-pg-ext -j$(nproc)
|
|
||||||
|
|
||||||
- name: Build walproposer-lib
|
|
||||||
run: mold -run make walproposer-lib -j$(nproc)
|
|
||||||
|
|
||||||
- name: Run cargo build
|
|
||||||
run: |
|
|
||||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
|
||||||
export PQ_LIB_DIR
|
|
||||||
mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests -j$(nproc)
|
|
||||||
|
|
||||||
- name: Run cargo test
|
|
||||||
env:
|
|
||||||
NEXTEST_RETRIES: 3
|
|
||||||
run: |
|
|
||||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
|
||||||
export PQ_LIB_DIR
|
|
||||||
LD_LIBRARY_PATH=$(pwd)/pg_install/v16/lib
|
|
||||||
export LD_LIBRARY_PATH
|
|
||||||
|
|
||||||
cargo nextest run $CARGO_FEATURES -j$(nproc)
|
|
||||||
|
|
||||||
# Run separate tests for real S3
|
|
||||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
|
||||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
|
||||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
|
||||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
|
||||||
cargo nextest run --package remote_storage --test test_real_s3 -j$(nproc)
|
|
||||||
|
|
||||||
# Run separate tests for real Azure Blob Storage
|
|
||||||
# XXX: replace region with `eu-central-1`-like region
|
|
||||||
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
|
|
||||||
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
|
|
||||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
|
||||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
|
||||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
|
||||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
|
||||||
cargo nextest run --package remote_storage --test test_real_azure -j$(nproc)
|
|
||||||
|
|
||||||
check-codestyle-rust-arm:
|
|
||||||
needs: [ check-permissions, build-build-tools-image ]
|
|
||||||
timeout-minutes: 90
|
|
||||||
runs-on: [ self-hosted, small-arm64 ]
|
|
||||||
|
|
||||||
container:
|
|
||||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
|
||||||
credentials:
|
|
||||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
|
||||||
options: --init
|
|
||||||
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
build_type: [ debug, release ]
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Fix git ownership
|
|
||||||
run: |
|
|
||||||
# Workaround for `fatal: detected dubious ownership in repository at ...`
|
|
||||||
#
|
|
||||||
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
|
|
||||||
# Ref https://github.com/actions/checkout/issues/785
|
|
||||||
#
|
|
||||||
git config --global --add safe.directory ${{ github.workspace }}
|
|
||||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
|
||||||
for r in 14 15 16; do
|
|
||||||
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
|
|
||||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
fetch-depth: 1
|
|
||||||
|
|
||||||
# Some of our rust modules use FFI and need those to be checked
|
|
||||||
- name: Get postgres headers
|
|
||||||
run: make postgres-headers -j$(nproc)
|
|
||||||
|
|
||||||
# cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
|
|
||||||
# This will catch compiler & clippy warnings in all feature combinations.
|
|
||||||
# TODO: use cargo hack for build and test as well, but, that's quite expensive.
|
|
||||||
# NB: keep clippy args in sync with ./run_clippy.sh
|
|
||||||
- run: |
|
|
||||||
CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
|
|
||||||
if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
|
|
||||||
echo "No clippy args found in .neon_clippy_args"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Run cargo clippy (debug)
|
|
||||||
if: matrix.build_type == 'debug'
|
|
||||||
run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
|
|
||||||
- name: Run cargo clippy (release)
|
|
||||||
if: matrix.build_type == 'release'
|
|
||||||
run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS
|
|
||||||
|
|
||||||
- name: Check documentation generation
|
|
||||||
if: matrix.build_type == 'release'
|
|
||||||
run: cargo doc --workspace --no-deps --document-private-items -j$(nproc)
|
|
||||||
env:
|
|
||||||
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
|
|
||||||
|
|
||||||
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
|
|
||||||
- name: Check formatting
|
|
||||||
if: ${{ !cancelled() && matrix.build_type == 'release' }}
|
|
||||||
run: cargo fmt --all -- --check
|
|
||||||
|
|
||||||
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
|
|
||||||
- name: Check rust dependencies
|
|
||||||
if: ${{ !cancelled() && matrix.build_type == 'release' }}
|
|
||||||
run: |
|
|
||||||
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
|
|
||||||
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
|
|
||||||
|
|
||||||
# https://github.com/EmbarkStudios/cargo-deny
|
|
||||||
- name: Check rust licenses/bans/advisories/sources
|
|
||||||
if: ${{ !cancelled() && matrix.build_type == 'release' }}
|
|
||||||
run: cargo deny check
|
|
||||||
|
|
||||||
gather-rust-build-stats:
|
gather-rust-build-stats:
|
||||||
needs: [ check-permissions, build-build-tools-image ]
|
needs: [ check-permissions, build-build-tools-image ]
|
||||||
if: |
|
if: |
|
||||||
@@ -364,8 +149,6 @@ jobs:
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
BUILD_TYPE: release
|
BUILD_TYPE: release
|
||||||
# remove the cachepot wrapper and build without crate caches
|
|
||||||
RUSTC_WRAPPER: ""
|
|
||||||
# build with incremental compilation produce partial results
|
# build with incremental compilation produce partial results
|
||||||
# so do not attempt to cache this build, also disable the incremental compilation
|
# so do not attempt to cache this build, also disable the incremental compilation
|
||||||
CARGO_INCREMENTAL: 0
|
CARGO_INCREMENTAL: 0
|
||||||
|
|||||||
96
.github/workflows/pg-clients.yml
vendored
96
.github/workflows/pg-clients.yml
vendored
@@ -13,6 +13,7 @@ on:
|
|||||||
paths:
|
paths:
|
||||||
- '.github/workflows/pg-clients.yml'
|
- '.github/workflows/pg-clients.yml'
|
||||||
- 'test_runner/pg_clients/**'
|
- 'test_runner/pg_clients/**'
|
||||||
|
- 'test_runner/logical_repl/**'
|
||||||
- 'poetry.lock'
|
- 'poetry.lock'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
@@ -49,6 +50,101 @@ jobs:
|
|||||||
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
|
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
|
test-logical-replication:
|
||||||
|
needs: [ build-build-tools-image ]
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
container:
|
||||||
|
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||||
|
credentials:
|
||||||
|
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||||
|
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||||
|
options: --init --user root
|
||||||
|
services:
|
||||||
|
clickhouse:
|
||||||
|
image: clickhouse/clickhouse-server:24.6.3.64
|
||||||
|
ports:
|
||||||
|
- 9000:9000
|
||||||
|
- 8123:8123
|
||||||
|
zookeeper:
|
||||||
|
image: quay.io/debezium/zookeeper:2.7
|
||||||
|
ports:
|
||||||
|
- 2181:2181
|
||||||
|
kafka:
|
||||||
|
image: quay.io/debezium/kafka:2.7
|
||||||
|
env:
|
||||||
|
ZOOKEEPER_CONNECT: "zookeeper:2181"
|
||||||
|
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
|
||||||
|
KAFKA_BROKER_ID: 1
|
||||||
|
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||||
|
KAFKA_JMX_PORT: 9991
|
||||||
|
ports:
|
||||||
|
- 9092:9092
|
||||||
|
debezium:
|
||||||
|
image: quay.io/debezium/connect:2.7
|
||||||
|
env:
|
||||||
|
BOOTSTRAP_SERVERS: kafka:9092
|
||||||
|
GROUP_ID: 1
|
||||||
|
CONFIG_STORAGE_TOPIC: debezium-config
|
||||||
|
OFFSET_STORAGE_TOPIC: debezium-offset
|
||||||
|
STATUS_STORAGE_TOPIC: debezium-status
|
||||||
|
DEBEZIUM_CONFIG_CONNECTOR_CLASS: io.debezium.connector.postgresql.PostgresConnector
|
||||||
|
ports:
|
||||||
|
- 8083:8083
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Download Neon artifact
|
||||||
|
uses: ./.github/actions/download
|
||||||
|
with:
|
||||||
|
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||||
|
path: /tmp/neon/
|
||||||
|
prefix: latest
|
||||||
|
|
||||||
|
- name: Create Neon Project
|
||||||
|
id: create-neon-project
|
||||||
|
uses: ./.github/actions/neon-project-create
|
||||||
|
with:
|
||||||
|
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||||
|
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
uses: ./.github/actions/run-python-test-set
|
||||||
|
with:
|
||||||
|
build_type: remote
|
||||||
|
test_selection: logical_repl
|
||||||
|
run_in_parallel: false
|
||||||
|
extra_params: -m remote_cluster
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
|
env:
|
||||||
|
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||||
|
|
||||||
|
- name: Delete Neon Project
|
||||||
|
if: always()
|
||||||
|
uses: ./.github/actions/neon-project-delete
|
||||||
|
with:
|
||||||
|
project_id: ${{ steps.create-neon-project.outputs.project_id }}
|
||||||
|
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||||
|
|
||||||
|
- name: Create Allure report
|
||||||
|
if: ${{ !cancelled() }}
|
||||||
|
id: create-allure-report
|
||||||
|
uses: ./.github/actions/allure-report-generate
|
||||||
|
with:
|
||||||
|
store-test-results-into-db: true
|
||||||
|
env:
|
||||||
|
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||||
|
|
||||||
|
- name: Post to a Slack channel
|
||||||
|
if: github.event.schedule && failure()
|
||||||
|
uses: slackapi/slack-github-action@v1
|
||||||
|
with:
|
||||||
|
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||||
|
slack-message: |
|
||||||
|
Testing the logical replication: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)
|
||||||
|
env:
|
||||||
|
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||||
|
|
||||||
test-postgres-client-libs:
|
test-postgres-client-libs:
|
||||||
needs: [ build-build-tools-image ]
|
needs: [ build-build-tools-image ]
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
|
|||||||
60
.github/workflows/pin-build-tools-image.yml
vendored
60
.github/workflows/pin-build-tools-image.yml
vendored
@@ -7,12 +7,20 @@ on:
|
|||||||
description: 'Source tag'
|
description: 'Source tag'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
|
force:
|
||||||
|
description: 'Force the image to be pinned'
|
||||||
|
default: false
|
||||||
|
type: boolean
|
||||||
workflow_call:
|
workflow_call:
|
||||||
inputs:
|
inputs:
|
||||||
from-tag:
|
from-tag:
|
||||||
description: 'Source tag'
|
description: 'Source tag'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
|
force:
|
||||||
|
description: 'Force the image to be pinned'
|
||||||
|
default: false
|
||||||
|
type: boolean
|
||||||
|
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
@@ -22,15 +30,18 @@ concurrency:
|
|||||||
group: pin-build-tools-image-${{ inputs.from-tag }}
|
group: pin-build-tools-image-${{ inputs.from-tag }}
|
||||||
cancel-in-progress: false
|
cancel-in-progress: false
|
||||||
|
|
||||||
|
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
|
||||||
permissions: {}
|
permissions: {}
|
||||||
|
|
||||||
jobs:
|
env:
|
||||||
tag-image:
|
FROM_TAG: ${{ inputs.from-tag }}
|
||||||
runs-on: ubuntu-22.04
|
TO_TAG: pinned
|
||||||
|
|
||||||
env:
|
jobs:
|
||||||
FROM_TAG: ${{ inputs.from-tag }}
|
check-manifests:
|
||||||
TO_TAG: pinned
|
runs-on: ubuntu-22.04
|
||||||
|
outputs:
|
||||||
|
skip: ${{ steps.check-manifests.outputs.skip }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Check if we really need to pin the image
|
- name: Check if we really need to pin the image
|
||||||
@@ -47,27 +58,44 @@ jobs:
|
|||||||
|
|
||||||
echo "skip=${skip}" | tee -a $GITHUB_OUTPUT
|
echo "skip=${skip}" | tee -a $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
tag-image:
|
||||||
|
needs: check-manifests
|
||||||
|
|
||||||
|
# use format(..) to catch both inputs.force = true AND inputs.force = 'true'
|
||||||
|
if: needs.check-manifests.outputs.skip == 'false' || format('{0}', inputs.force) == 'true'
|
||||||
|
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
id-token: write # for `azure/login`
|
||||||
|
|
||||||
|
steps:
|
||||||
- uses: docker/login-action@v3
|
- uses: docker/login-action@v3
|
||||||
if: steps.check-manifests.outputs.skip == 'false'
|
|
||||||
with:
|
with:
|
||||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||||
|
|
||||||
- name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub
|
|
||||||
if: steps.check-manifests.outputs.skip == 'false'
|
|
||||||
run: |
|
|
||||||
docker buildx imagetools create -t neondatabase/build-tools:${TO_TAG} \
|
|
||||||
neondatabase/build-tools:${FROM_TAG}
|
|
||||||
|
|
||||||
- uses: docker/login-action@v3
|
- uses: docker/login-action@v3
|
||||||
if: steps.check-manifests.outputs.skip == 'false'
|
|
||||||
with:
|
with:
|
||||||
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||||
|
|
||||||
- name: Tag build-tools with `${{ env.TO_TAG }}` in ECR
|
- name: Azure login
|
||||||
if: steps.check-manifests.outputs.skip == 'false'
|
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1
|
||||||
|
with:
|
||||||
|
client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
|
||||||
|
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
|
||||||
|
subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
|
||||||
|
|
||||||
|
- name: Login to ACR
|
||||||
|
run: |
|
||||||
|
az acr login --name=neoneastus2
|
||||||
|
|
||||||
|
- name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub, ECR, and ACR
|
||||||
run: |
|
run: |
|
||||||
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG} \
|
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG} \
|
||||||
|
-t neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG} \
|
||||||
|
-t neondatabase/build-tools:${TO_TAG} \
|
||||||
neondatabase/build-tools:${FROM_TAG}
|
neondatabase/build-tools:${FROM_TAG}
|
||||||
|
|||||||
38
.github/workflows/trigger-e2e-tests.yml
vendored
38
.github/workflows/trigger-e2e-tests.yml
vendored
@@ -13,8 +13,6 @@ defaults:
|
|||||||
env:
|
env:
|
||||||
# A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix
|
# A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix
|
||||||
E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
|
E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
cancel-previous-e2e-tests:
|
cancel-previous-e2e-tests:
|
||||||
@@ -64,19 +62,35 @@ jobs:
|
|||||||
needs: [ tag ]
|
needs: [ tag ]
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
env:
|
env:
|
||||||
|
EVENT_ACTION: ${{ github.event.action }}
|
||||||
|
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||||
TAG: ${{ needs.tag.outputs.build-tag }}
|
TAG: ${{ needs.tag.outputs.build-tag }}
|
||||||
steps:
|
steps:
|
||||||
- name: check if ecr image are present
|
- name: Wait for `promote-images` job to finish
|
||||||
env:
|
# It's important to have a timeout here, the script in the step can run infinitely
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
timeout-minutes: 60
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
|
||||||
run: |
|
run: |
|
||||||
for REPO in neon compute-tools compute-node-v14 vm-compute-node-v14 compute-node-v15 vm-compute-node-v15 compute-node-v16 vm-compute-node-v16; do
|
if [ "${GITHUB_EVENT_NAME}" != "pull_request" ] || [ "${EVENT_ACTION}" != "ready_for_review" ]; then
|
||||||
OUTPUT=$(aws ecr describe-images --repository-name ${REPO} --region eu-central-1 --query "imageDetails[?imageTags[?contains(@, '${TAG}')]]" --output text)
|
exit 0
|
||||||
if [ "$OUTPUT" == "" ]; then
|
fi
|
||||||
echo "$REPO with image tag $TAG not found" >> $GITHUB_OUTPUT
|
|
||||||
exit 1
|
# For PRs we use the run id as the tag
|
||||||
fi
|
BUILD_AND_TEST_RUN_ID=${TAG}
|
||||||
|
while true; do
|
||||||
|
conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion')
|
||||||
|
case "$conclusion" in
|
||||||
|
success)
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
failure | cancelled | skipped)
|
||||||
|
echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..."
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "The 'promote-images' hasn't succeed yet. Waiting..."
|
||||||
|
sleep 60
|
||||||
|
;;
|
||||||
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
- name: Set e2e-platforms
|
- name: Set e2e-platforms
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
/compute_tools/ @neondatabase/control-plane @neondatabase/compute
|
/compute_tools/ @neondatabase/control-plane @neondatabase/compute
|
||||||
/storage_controller @neondatabase/storage
|
/storage_controller @neondatabase/storage
|
||||||
/libs/pageserver_api/ @neondatabase/storage
|
/libs/pageserver_api/ @neondatabase/storage
|
||||||
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/safekeepers
|
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
|
||||||
/libs/remote_storage/ @neondatabase/storage
|
/libs/remote_storage/ @neondatabase/storage
|
||||||
/libs/safekeeper_api/ @neondatabase/safekeepers
|
/libs/safekeeper_api/ @neondatabase/storage
|
||||||
/libs/vm_monitor/ @neondatabase/autoscaling
|
/libs/vm_monitor/ @neondatabase/autoscaling
|
||||||
/pageserver/ @neondatabase/storage
|
/pageserver/ @neondatabase/storage
|
||||||
/pgxn/ @neondatabase/compute
|
/pgxn/ @neondatabase/compute
|
||||||
/pgxn/neon/ @neondatabase/compute @neondatabase/safekeepers
|
/pgxn/neon/ @neondatabase/compute @neondatabase/storage
|
||||||
/proxy/ @neondatabase/proxy
|
/proxy/ @neondatabase/proxy
|
||||||
/safekeeper/ @neondatabase/safekeepers
|
/safekeeper/ @neondatabase/storage
|
||||||
/vendor/ @neondatabase/compute
|
/vendor/ @neondatabase/compute
|
||||||
|
|||||||
730
Cargo.lock
generated
730
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
19
Cargo.toml
19
Cargo.toml
@@ -9,13 +9,16 @@ members = [
|
|||||||
"pageserver/ctl",
|
"pageserver/ctl",
|
||||||
"pageserver/client",
|
"pageserver/client",
|
||||||
"pageserver/pagebench",
|
"pageserver/pagebench",
|
||||||
"proxy",
|
"proxy/core",
|
||||||
|
"proxy/sasl",
|
||||||
|
"proxy/proxy",
|
||||||
|
"proxy/pg_sni_router",
|
||||||
"safekeeper",
|
"safekeeper",
|
||||||
"storage_broker",
|
"storage_broker",
|
||||||
"storage_controller",
|
"storage_controller",
|
||||||
|
"storage_controller/client",
|
||||||
"storage_scrubber",
|
"storage_scrubber",
|
||||||
"workspace_hack",
|
"workspace_hack",
|
||||||
"trace",
|
|
||||||
"libs/compute_api",
|
"libs/compute_api",
|
||||||
"libs/pageserver_api",
|
"libs/pageserver_api",
|
||||||
"libs/postgres_ffi",
|
"libs/postgres_ffi",
|
||||||
@@ -84,7 +87,6 @@ enumset = "1.0.12"
|
|||||||
fail = "0.5.0"
|
fail = "0.5.0"
|
||||||
fallible-iterator = "0.2"
|
fallible-iterator = "0.2"
|
||||||
framed-websockets = { version = "0.1.0", git = "https://github.com/neondatabase/framed-websockets" }
|
framed-websockets = { version = "0.1.0", git = "https://github.com/neondatabase/framed-websockets" }
|
||||||
fs2 = "0.4.3"
|
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
futures-core = "0.3"
|
futures-core = "0.3"
|
||||||
futures-util = "0.3"
|
futures-util = "0.3"
|
||||||
@@ -127,7 +129,7 @@ parquet = { version = "51.0.0", default-features = false, features = ["zstd"] }
|
|||||||
parquet_derive = "51.0.0"
|
parquet_derive = "51.0.0"
|
||||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||||
pin-project-lite = "0.2"
|
pin-project-lite = "0.2"
|
||||||
procfs = "0.14"
|
procfs = "0.16"
|
||||||
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
|
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
|
||||||
prost = "0.11"
|
prost = "0.11"
|
||||||
rand = "0.8"
|
rand = "0.8"
|
||||||
@@ -184,14 +186,17 @@ tower-service = "0.3.2"
|
|||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
tracing-error = "0.2.0"
|
tracing-error = "0.2.0"
|
||||||
tracing-opentelemetry = "0.21.0"
|
tracing-opentelemetry = "0.21.0"
|
||||||
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json", "ansi"] }
|
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
|
||||||
|
try-lock = "0.2.5"
|
||||||
twox-hash = { version = "1.6.3", default-features = false }
|
twox-hash = { version = "1.6.3", default-features = false }
|
||||||
|
typed-json = "0.1"
|
||||||
url = "2.2"
|
url = "2.2"
|
||||||
urlencoding = "2.1"
|
urlencoding = "2.1"
|
||||||
uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
|
uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
|
||||||
walkdir = "2.3.2"
|
walkdir = "2.3.2"
|
||||||
rustls-native-certs = "0.7"
|
rustls-native-certs = "0.7"
|
||||||
x509-parser = "0.15"
|
x509-parser = "0.15"
|
||||||
|
whoami = "1.5.1"
|
||||||
|
|
||||||
## TODO replace this with tracing
|
## TODO replace this with tracing
|
||||||
env_logger = "0.10"
|
env_logger = "0.10"
|
||||||
@@ -203,9 +208,6 @@ postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git",
|
|||||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||||
|
|
||||||
## Other git libraries
|
|
||||||
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
|
|
||||||
|
|
||||||
## Local libraries
|
## Local libraries
|
||||||
compute_api = { version = "0.1", path = "./libs/compute_api/" }
|
compute_api = { version = "0.1", path = "./libs/compute_api/" }
|
||||||
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
|
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
|
||||||
@@ -221,6 +223,7 @@ remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
|
|||||||
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
|
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
|
||||||
desim = { version = "0.1", path = "./libs/desim" }
|
desim = { version = "0.1", path = "./libs/desim" }
|
||||||
storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
|
storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
|
||||||
|
storage_controller_client = { path = "./storage_controller/client" }
|
||||||
tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
|
tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
|
||||||
tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
|
tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
|
||||||
utils = { version = "0.1", path = "./libs/utils/" }
|
utils = { version = "0.1", path = "./libs/utils/" }
|
||||||
|
|||||||
37
Dockerfile
37
Dockerfile
@@ -17,7 +17,7 @@ COPY --chown=nonroot pgxn pgxn
|
|||||||
COPY --chown=nonroot Makefile Makefile
|
COPY --chown=nonroot Makefile Makefile
|
||||||
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
|
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
|
||||||
|
|
||||||
ENV BUILD_TYPE release
|
ENV BUILD_TYPE=release
|
||||||
RUN set -e \
|
RUN set -e \
|
||||||
&& mold -run make -j $(nproc) -s neon-pg-ext \
|
&& mold -run make -j $(nproc) -s neon-pg-ext \
|
||||||
&& rm -rf pg_install/build \
|
&& rm -rf pg_install/build \
|
||||||
@@ -29,24 +29,12 @@ WORKDIR /home/nonroot
|
|||||||
ARG GIT_VERSION=local
|
ARG GIT_VERSION=local
|
||||||
ARG BUILD_TAG
|
ARG BUILD_TAG
|
||||||
|
|
||||||
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
|
|
||||||
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
|
|
||||||
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
|
|
||||||
ARG RUSTC_WRAPPER=cachepot
|
|
||||||
ENV AWS_REGION=eu-central-1
|
|
||||||
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
|
|
||||||
ARG CACHEPOT_BUCKET=neon-github-dev
|
|
||||||
#ARG AWS_ACCESS_KEY_ID
|
|
||||||
#ARG AWS_SECRET_ACCESS_KEY
|
|
||||||
|
|
||||||
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
|
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
|
||||||
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
|
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
|
||||||
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
|
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
|
||||||
COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib
|
COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib
|
||||||
COPY --chown=nonroot . .
|
COPY --chown=nonroot . .
|
||||||
|
|
||||||
# Show build caching stats to check if it was used in the end.
|
|
||||||
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
|
|
||||||
RUN set -e \
|
RUN set -e \
|
||||||
&& PQ_LIB_DIR=$(pwd)/pg_install/v16/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment" cargo build \
|
&& PQ_LIB_DIR=$(pwd)/pg_install/v16/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment" cargo build \
|
||||||
--bin pg_sni_router \
|
--bin pg_sni_router \
|
||||||
@@ -58,8 +46,7 @@ RUN set -e \
|
|||||||
--bin proxy \
|
--bin proxy \
|
||||||
--bin neon_local \
|
--bin neon_local \
|
||||||
--bin storage_scrubber \
|
--bin storage_scrubber \
|
||||||
--locked --release \
|
--locked --release
|
||||||
&& cachepot -s
|
|
||||||
|
|
||||||
# Build final image
|
# Build final image
|
||||||
#
|
#
|
||||||
@@ -93,20 +80,24 @@ COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
|
|||||||
|
|
||||||
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
|
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
|
||||||
# Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
|
# Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
|
||||||
RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
|
RUN mkdir -p /data/.neon/ && \
|
||||||
&& /usr/local/bin/pageserver -D /data/.neon/ --init \
|
echo "id=1234" > "/data/.neon/identity.toml" && \
|
||||||
-c "id=1234" \
|
echo "broker_endpoint='http://storage_broker:50051'\n" \
|
||||||
-c "broker_endpoint='http://storage_broker:50051'" \
|
"pg_distrib_dir='/usr/local/'\n" \
|
||||||
-c "pg_distrib_dir='/usr/local/'" \
|
"listen_pg_addr='0.0.0.0:6400'\n" \
|
||||||
-c "listen_pg_addr='0.0.0.0:6400'" \
|
"listen_http_addr='0.0.0.0:9898'\n" \
|
||||||
-c "listen_http_addr='0.0.0.0:9898'"
|
> /data/.neon/pageserver.toml && \
|
||||||
|
chown -R neon:neon /data/.neon
|
||||||
|
|
||||||
# When running a binary that links with libpq, default to using our most recent postgres version. Binaries
|
# When running a binary that links with libpq, default to using our most recent postgres version. Binaries
|
||||||
# that want a particular postgres version will select it explicitly: this is just a default.
|
# that want a particular postgres version will select it explicitly: this is just a default.
|
||||||
ENV LD_LIBRARY_PATH /usr/local/v16/lib
|
ENV LD_LIBRARY_PATH=/usr/local/v16/lib
|
||||||
|
|
||||||
|
|
||||||
VOLUME ["/data"]
|
VOLUME ["/data"]
|
||||||
USER neon
|
USER neon
|
||||||
EXPOSE 6400
|
EXPOSE 6400
|
||||||
EXPOSE 9898
|
EXPOSE 9898
|
||||||
|
|
||||||
|
CMD ["/usr/local/bin/pageserver", "-D", "/data/.neon"]
|
||||||
|
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ RUN set -e \
|
|||||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||||
|
|
||||||
# protobuf-compiler (protoc)
|
# protobuf-compiler (protoc)
|
||||||
ENV PROTOC_VERSION 25.1
|
ENV PROTOC_VERSION=25.1
|
||||||
RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
|
RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
|
||||||
&& unzip -q protoc.zip -d protoc \
|
&& unzip -q protoc.zip -d protoc \
|
||||||
&& mv protoc/bin/protoc /usr/local/bin/protoc \
|
&& mv protoc/bin/protoc /usr/local/bin/protoc \
|
||||||
@@ -99,7 +99,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
|
|||||||
&& rm awscliv2.zip
|
&& rm awscliv2.zip
|
||||||
|
|
||||||
# Mold: A Modern Linker
|
# Mold: A Modern Linker
|
||||||
ENV MOLD_VERSION v2.31.0
|
ENV MOLD_VERSION=v2.33.0
|
||||||
RUN set -e \
|
RUN set -e \
|
||||||
&& git clone https://github.com/rui314/mold.git \
|
&& git clone https://github.com/rui314/mold.git \
|
||||||
&& mkdir mold/build \
|
&& mkdir mold/build \
|
||||||
@@ -168,7 +168,7 @@ USER nonroot:nonroot
|
|||||||
WORKDIR /home/nonroot
|
WORKDIR /home/nonroot
|
||||||
|
|
||||||
# Python
|
# Python
|
||||||
ENV PYTHON_VERSION=3.9.18 \
|
ENV PYTHON_VERSION=3.9.19 \
|
||||||
PYENV_ROOT=/home/nonroot/.pyenv \
|
PYENV_ROOT=/home/nonroot/.pyenv \
|
||||||
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
|
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
|
||||||
RUN set -e \
|
RUN set -e \
|
||||||
@@ -192,9 +192,14 @@ WORKDIR /home/nonroot
|
|||||||
|
|
||||||
# Rust
|
# Rust
|
||||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||||
ENV RUSTC_VERSION=1.79.0
|
ENV RUSTC_VERSION=1.80.1
|
||||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||||
|
ARG RUSTFILT_VERSION=0.2.1
|
||||||
|
ARG CARGO_HAKARI_VERSION=0.9.30
|
||||||
|
ARG CARGO_DENY_VERSION=0.16.1
|
||||||
|
ARG CARGO_HACK_VERSION=0.6.31
|
||||||
|
ARG CARGO_NEXTEST_VERSION=0.9.72
|
||||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
|
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
|
||||||
chmod +x rustup-init && \
|
chmod +x rustup-init && \
|
||||||
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
|
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
|
||||||
@@ -203,15 +208,13 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
|||||||
. "$HOME/.cargo/env" && \
|
. "$HOME/.cargo/env" && \
|
||||||
cargo --version && rustup --version && \
|
cargo --version && rustup --version && \
|
||||||
rustup component add llvm-tools-preview rustfmt clippy && \
|
rustup component add llvm-tools-preview rustfmt clippy && \
|
||||||
cargo install --git https://github.com/paritytech/cachepot && \
|
cargo install rustfilt --version ${RUSTFILT_VERSION} && \
|
||||||
cargo install rustfilt && \
|
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} && \
|
||||||
cargo install cargo-hakari && \
|
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
|
||||||
cargo install cargo-deny --locked && \
|
cargo install cargo-hack --version ${CARGO_HACK_VERSION} && \
|
||||||
cargo install cargo-hack && \
|
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} && \
|
||||||
cargo install cargo-nextest && \
|
|
||||||
rm -rf /home/nonroot/.cargo/registry && \
|
rm -rf /home/nonroot/.cargo/registry && \
|
||||||
rm -rf /home/nonroot/.cargo/git
|
rm -rf /home/nonroot/.cargo/git
|
||||||
ENV RUSTC_WRAPPER=cachepot
|
|
||||||
|
|
||||||
# Show versions
|
# Show versions
|
||||||
RUN whoami \
|
RUN whoami \
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar
|
|||||||
DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
|
DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||||
make clean && cp -R /sfcgal/* /
|
make clean && cp -R /sfcgal/* /
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin:$PATH"
|
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||||
|
|
||||||
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
|
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
|
||||||
echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
|
echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
|
||||||
@@ -311,9 +311,12 @@ RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz
|
|||||||
FROM build-deps AS rum-pg-build
|
FROM build-deps AS rum-pg-build
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
|
COPY patches/rum.patch /rum.patch
|
||||||
|
|
||||||
RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
|
RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
|
||||||
echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
|
echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
|
||||||
mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
|
mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
|
||||||
|
patch -p1 < /rum.patch && \
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
|
||||||
@@ -408,7 +411,7 @@ FROM build-deps AS timescaledb-pg-build
|
|||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
ENV PATH "/usr/local/pgsql/bin:$PATH"
|
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||||
|
|
||||||
RUN case "${PG_VERSION}" in \
|
RUN case "${PG_VERSION}" in \
|
||||||
"v14" | "v15") \
|
"v14" | "v15") \
|
||||||
@@ -441,7 +444,7 @@ FROM build-deps AS pg-hint-plan-pg-build
|
|||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
ENV PATH "/usr/local/pgsql/bin:$PATH"
|
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||||
|
|
||||||
RUN case "${PG_VERSION}" in \
|
RUN case "${PG_VERSION}" in \
|
||||||
"v14") \
|
"v14") \
|
||||||
@@ -477,7 +480,7 @@ RUN case "${PG_VERSION}" in \
|
|||||||
FROM build-deps AS pg-cron-pg-build
|
FROM build-deps AS pg-cron-pg-build
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||||
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
|
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
|
||||||
echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
|
echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
|
||||||
mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
|
mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
|
||||||
@@ -503,7 +506,7 @@ RUN apt-get update && \
|
|||||||
libboost-system1.74-dev \
|
libboost-system1.74-dev \
|
||||||
libeigen3-dev
|
libeigen3-dev
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
|
ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
|
||||||
RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
|
RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
|
||||||
echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
|
echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
|
||||||
mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
|
mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
|
||||||
@@ -543,7 +546,7 @@ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.
|
|||||||
FROM build-deps AS pg-uuidv7-pg-build
|
FROM build-deps AS pg-uuidv7-pg-build
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||||
RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
|
RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
|
||||||
echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
|
echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
|
||||||
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
|
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
|
||||||
@@ -560,7 +563,7 @@ RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz
|
|||||||
FROM build-deps AS pg-roaringbitmap-pg-build
|
FROM build-deps AS pg-roaringbitmap-pg-build
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||||
RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
|
RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
|
||||||
echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
|
echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
|
||||||
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
|
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
|
||||||
@@ -577,7 +580,7 @@ RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4
|
|||||||
FROM build-deps AS pg-semver-pg-build
|
FROM build-deps AS pg-semver-pg-build
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||||
RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
|
RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
|
||||||
echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
|
echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
|
||||||
mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
|
mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
|
||||||
@@ -595,7 +598,7 @@ FROM build-deps AS pg-embedding-pg-build
|
|||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||||
RUN case "${PG_VERSION}" in \
|
RUN case "${PG_VERSION}" in \
|
||||||
"v14" | "v15") \
|
"v14" | "v15") \
|
||||||
export PG_EMBEDDING_VERSION=0.3.5 \
|
export PG_EMBEDDING_VERSION=0.3.5 \
|
||||||
@@ -619,7 +622,7 @@ RUN case "${PG_VERSION}" in \
|
|||||||
FROM build-deps AS pg-anon-pg-build
|
FROM build-deps AS pg-anon-pg-build
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||||
RUN wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
|
RUN wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
|
||||||
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
|
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
|
||||||
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
||||||
@@ -654,7 +657,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
|||||||
chmod +x rustup-init && \
|
chmod +x rustup-init && \
|
||||||
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
|
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
|
||||||
rm rustup-init && \
|
rm rustup-init && \
|
||||||
cargo install --locked --version 0.10.2 cargo-pgrx && \
|
cargo install --locked --version 0.11.3 cargo-pgrx && \
|
||||||
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
|
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
|
||||||
|
|
||||||
USER root
|
USER root
|
||||||
@@ -669,10 +672,15 @@ USER root
|
|||||||
FROM rust-extensions-build AS pg-jsonschema-pg-build
|
FROM rust-extensions-build AS pg-jsonschema-pg-build
|
||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
|
|
||||||
RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.2.0.tar.gz -O pg_jsonschema.tar.gz && \
|
RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
|
||||||
echo "9118fc508a6e231e7a39acaa6f066fcd79af17a5db757b47d2eefbe14f7794f0 pg_jsonschema.tar.gz" | sha256sum --check && \
|
echo "61df3db1ed83cf24f6aa39c826f8818bfa4f0bd33b587fd6b2b1747985642297 pg_jsonschema.tar.gz" | sha256sum --check && \
|
||||||
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
|
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
|
||||||
sed -i 's/pgrx = "0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
# see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8
|
||||||
|
# `unsafe-postgres` feature allows to build pgx extensions
|
||||||
|
# against postgres forks that decided to change their ABI name (like us).
|
||||||
|
# With that we can build extensions without forking them and using stock
|
||||||
|
# pgx. As this feature is new few manual version bumps were required.
|
||||||
|
sed -i 's/pgrx = "0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||||
cargo pgrx install --release && \
|
cargo pgrx install --release && \
|
||||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
|
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
|
||||||
|
|
||||||
@@ -686,10 +694,10 @@ RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.2.0.tar.
|
|||||||
FROM rust-extensions-build AS pg-graphql-pg-build
|
FROM rust-extensions-build AS pg-graphql-pg-build
|
||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
|
|
||||||
RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.4.0.tar.gz -O pg_graphql.tar.gz && \
|
RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
|
||||||
echo "bd8dc7230282b3efa9ae5baf053a54151ed0e66881c7c53750e2d0c765776edc pg_graphql.tar.gz" | sha256sum --check && \
|
echo "2b3e567a5b31019cb97ae0e33263c1bcc28580be5a444ac4c8ece5c4be2aea41 pg_graphql.tar.gz" | sha256sum --check && \
|
||||||
mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
|
mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
|
||||||
sed -i 's/pgrx = "=0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||||
cargo pgrx install --release && \
|
cargo pgrx install --release && \
|
||||||
# it's needed to enable extension because it uses untrusted C language
|
# it's needed to enable extension because it uses untrusted C language
|
||||||
sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
|
sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
|
||||||
@@ -709,6 +717,9 @@ ARG PG_VERSION
|
|||||||
RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
|
RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
|
||||||
echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
|
echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
|
||||||
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
|
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
|
||||||
|
# TODO update pgrx version in the pg_tiktoken repo and remove this line
|
||||||
|
sed -i 's/pgrx = { version = "=0.10.2",/pgrx = { version = "0.11.3",/g' Cargo.toml && \
|
||||||
|
sed -i 's/pgrx-tests = "=0.10.2"/pgrx-tests = "0.11.3"/g' Cargo.toml && \
|
||||||
cargo pgrx install --release && \
|
cargo pgrx install --release && \
|
||||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
|
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
|
||||||
|
|
||||||
@@ -722,14 +733,10 @@ RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6
|
|||||||
FROM rust-extensions-build AS pg-pgx-ulid-build
|
FROM rust-extensions-build AS pg-pgx-ulid-build
|
||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
|
|
||||||
RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -O pgx_ulid.tar.gz && \
|
RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
|
||||||
echo "ee5db82945d2d9f2d15597a80cf32de9dca67b897f605beb830561705f12683c pgx_ulid.tar.gz" | sha256sum --check && \
|
echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \
|
||||||
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
||||||
echo "******************* Apply a patch for Postgres 16 support; delete in the next release ******************" && \
|
sed -i 's/pgrx = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||||
wget https://github.com/pksunkara/pgx_ulid/commit/f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
|
|
||||||
patch -p1 < f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
|
|
||||||
echo "********************************************************************************************************" && \
|
|
||||||
sed -i 's/pgrx = "=0.10.2"/pgrx = { version = "=0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
|
||||||
cargo pgrx install --release && \
|
cargo pgrx install --release && \
|
||||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control
|
echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control
|
||||||
|
|
||||||
@@ -743,7 +750,7 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -
|
|||||||
FROM build-deps AS wal2json-pg-build
|
FROM build-deps AS wal2json-pg-build
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||||
RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
|
RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
|
||||||
echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
|
echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
|
||||||
mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
|
mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
|
||||||
@@ -759,7 +766,7 @@ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.
|
|||||||
FROM build-deps AS pg-ivm-build
|
FROM build-deps AS pg-ivm-build
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||||
RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
|
RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
|
||||||
echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
|
echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
|
||||||
mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
|
mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
|
||||||
@@ -776,7 +783,7 @@ RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_iv
|
|||||||
FROM build-deps AS pg-partman-build
|
FROM build-deps AS pg-partman-build
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||||
RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
|
RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
|
||||||
echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
|
echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
|
||||||
mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
|
mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
|
||||||
@@ -926,7 +933,8 @@ COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
|
|||||||
#COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
|
#COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
|
||||||
COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
|
COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
|
||||||
COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
|
COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
|
||||||
#COPY --from=rum-pg-build /rum.tar.gz /ext-src
|
COPY --from=rum-pg-build /rum.tar.gz /ext-src
|
||||||
|
COPY patches/rum.patch /ext-src
|
||||||
#COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
|
#COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
|
||||||
COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
|
COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
|
||||||
COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
|
COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
|
||||||
@@ -938,7 +946,7 @@ COPY patches/pg_hintplan.patch /ext-src
|
|||||||
COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
|
COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
|
||||||
COPY patches/pg_cron.patch /ext-src
|
COPY patches/pg_cron.patch /ext-src
|
||||||
#COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
|
#COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
|
||||||
COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
|
#COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
|
||||||
COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
|
COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
|
||||||
COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src
|
COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src
|
||||||
COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
|
COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
|
||||||
@@ -953,6 +961,7 @@ RUN cd /ext-src/ && for f in *.tar.gz; \
|
|||||||
rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
|
rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
|
||||||
|| exit 1; rm -f $f; done
|
|| exit 1; rm -f $f; done
|
||||||
RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
|
RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
|
||||||
|
RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
|
||||||
# cmake is required for the h3 test
|
# cmake is required for the h3 test
|
||||||
RUN apt-get update && apt-get install -y cmake
|
RUN apt-get update && apt-get install -y cmake
|
||||||
RUN patch -p1 < /ext-src/pg_hintplan.patch
|
RUN patch -p1 < /ext-src/pg_hintplan.patch
|
||||||
@@ -1025,6 +1034,6 @@ RUN apt update && \
|
|||||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||||
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
||||||
|
|
||||||
ENV LANG en_US.utf8
|
ENV LANG=en_US.utf8
|
||||||
USER postgres
|
USER postgres
|
||||||
ENTRYPOINT ["/usr/local/bin/compute_ctl"]
|
ENTRYPOINT ["/usr/local/bin/compute_ctl"]
|
||||||
|
|||||||
13
Makefile
13
Makefile
@@ -69,6 +69,8 @@ CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
|
|||||||
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
|
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
|
||||||
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
|
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
|
||||||
|
|
||||||
|
CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
|
||||||
|
|
||||||
#
|
#
|
||||||
# Top level Makefile to build Neon and PostgreSQL
|
# Top level Makefile to build Neon and PostgreSQL
|
||||||
#
|
#
|
||||||
@@ -79,15 +81,24 @@ all: neon postgres neon-pg-ext
|
|||||||
#
|
#
|
||||||
# The 'postgres_ffi' depends on the Postgres headers.
|
# The 'postgres_ffi' depends on the Postgres headers.
|
||||||
.PHONY: neon
|
.PHONY: neon
|
||||||
neon: postgres-headers walproposer-lib
|
neon: postgres-headers walproposer-lib cargo-target-dir
|
||||||
+@echo "Compiling Neon"
|
+@echo "Compiling Neon"
|
||||||
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
|
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
|
||||||
|
.PHONY: cargo-target-dir
|
||||||
|
cargo-target-dir:
|
||||||
|
# https://github.com/rust-lang/cargo/issues/14281
|
||||||
|
mkdir -p target
|
||||||
|
test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG
|
||||||
|
|
||||||
### PostgreSQL parts
|
### PostgreSQL parts
|
||||||
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
|
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
|
||||||
# to avoid the duplication in the future, but it's tolerable for now.
|
# to avoid the duplication in the future, but it's tolerable for now.
|
||||||
#
|
#
|
||||||
$(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
$(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
||||||
|
|
||||||
|
mkdir -p $(POSTGRES_INSTALL_DIR)
|
||||||
|
test -e $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG
|
||||||
|
|
||||||
+@echo "Configuring Postgres $* build"
|
+@echo "Configuring Postgres $* build"
|
||||||
@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
|
@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
|
||||||
echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
|
echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
|
||||||
|
|||||||
@@ -4,6 +4,11 @@ version = "0.1.0"
|
|||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = []
|
||||||
|
# Enables test specific features.
|
||||||
|
testing = []
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
async-compression.workspace = true
|
async-compression.workspace = true
|
||||||
@@ -44,3 +49,4 @@ vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
|
|||||||
zstd = "0.13"
|
zstd = "0.13"
|
||||||
bytes = "1.0"
|
bytes = "1.0"
|
||||||
rust-ini = "0.20.0"
|
rust-ini = "0.20.0"
|
||||||
|
rlimit = "0.10.1"
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
//! - Every start is a fresh start, so the data directory is removed and
|
//! - Every start is a fresh start, so the data directory is removed and
|
||||||
//! initialized again on each run.
|
//! initialized again on each run.
|
||||||
//! - If remote_extension_config is provided, it will be used to fetch extensions list
|
//! - If remote_extension_config is provided, it will be used to fetch extensions list
|
||||||
//! and download `shared_preload_libraries` from the remote storage.
|
//! and download `shared_preload_libraries` from the remote storage.
|
||||||
//! - Next it will put configuration files into the `PGDATA` directory.
|
//! - Next it will put configuration files into the `PGDATA` directory.
|
||||||
//! - Sync safekeepers and get commit LSN.
|
//! - Sync safekeepers and get commit LSN.
|
||||||
//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
|
//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
|
||||||
@@ -33,7 +33,6 @@
|
|||||||
//! -b /usr/local/bin/postgres \
|
//! -b /usr/local/bin/postgres \
|
||||||
//! -r http://pg-ext-s3-gateway \
|
//! -r http://pg-ext-s3-gateway \
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@@ -64,6 +63,7 @@ use compute_tools::monitor::launch_monitor;
|
|||||||
use compute_tools::params::*;
|
use compute_tools::params::*;
|
||||||
use compute_tools::spec::*;
|
use compute_tools::spec::*;
|
||||||
use compute_tools::swap::resize_swap;
|
use compute_tools::swap::resize_swap;
|
||||||
|
use rlimit::{setrlimit, Resource};
|
||||||
|
|
||||||
// this is an arbitrary build tag. Fine as a default / for testing purposes
|
// this is an arbitrary build tag. Fine as a default / for testing purposes
|
||||||
// in-case of not-set environment var
|
// in-case of not-set environment var
|
||||||
@@ -72,6 +72,9 @@ const BUILD_TAG_DEFAULT: &str = "latest";
|
|||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
let (build_tag, clap_args) = init()?;
|
let (build_tag, clap_args) = init()?;
|
||||||
|
|
||||||
|
// enable core dumping for all child processes
|
||||||
|
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
|
||||||
|
|
||||||
let (pg_handle, start_pg_result) = {
|
let (pg_handle, start_pg_result) = {
|
||||||
// Enter startup tracing context
|
// Enter startup tracing context
|
||||||
let _startup_context_guard = startup_context_from_env();
|
let _startup_context_guard = startup_context_from_env();
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ pub struct ComputeNode {
|
|||||||
/// - we push new spec and it does reconfiguration
|
/// - we push new spec and it does reconfiguration
|
||||||
/// - but then something happens and compute pod / VM is destroyed,
|
/// - but then something happens and compute pod / VM is destroyed,
|
||||||
/// so k8s controller starts it again with the **old** spec
|
/// so k8s controller starts it again with the **old** spec
|
||||||
|
///
|
||||||
/// and the same for empty computes:
|
/// and the same for empty computes:
|
||||||
/// - we started compute without any spec
|
/// - we started compute without any spec
|
||||||
/// - we push spec and it does configuration
|
/// - we push spec and it does configuration
|
||||||
@@ -399,7 +400,15 @@ impl ComputeNode {
|
|||||||
pub fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
|
pub fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
|
||||||
let mut retry_period_ms = 500.0;
|
let mut retry_period_ms = 500.0;
|
||||||
let mut attempts = 0;
|
let mut attempts = 0;
|
||||||
let max_attempts = 10;
|
const DEFAULT_ATTEMPTS: u16 = 10;
|
||||||
|
#[cfg(feature = "testing")]
|
||||||
|
let max_attempts = if let Ok(v) = env::var("NEON_COMPUTE_TESTING_BASEBACKUP_RETRIES") {
|
||||||
|
u16::from_str(&v).unwrap()
|
||||||
|
} else {
|
||||||
|
DEFAULT_ATTEMPTS
|
||||||
|
};
|
||||||
|
#[cfg(not(feature = "testing"))]
|
||||||
|
let max_attempts = DEFAULT_ATTEMPTS;
|
||||||
loop {
|
loop {
|
||||||
let result = self.try_get_basebackup(compute_state, lsn);
|
let result = self.try_get_basebackup(compute_state, lsn);
|
||||||
match result {
|
match result {
|
||||||
@@ -1116,7 +1125,7 @@ impl ComputeNode {
|
|||||||
// EKS worker nodes have following core dump settings:
|
// EKS worker nodes have following core dump settings:
|
||||||
// /proc/sys/kernel/core_pattern -> core
|
// /proc/sys/kernel/core_pattern -> core
|
||||||
// /proc/sys/kernel/core_uses_pid -> 1
|
// /proc/sys/kernel/core_uses_pid -> 1
|
||||||
// ulimint -c -> unlimited
|
// ulimit -c -> unlimited
|
||||||
// which results in core dumps being written to postgres data directory as core.<pid>.
|
// which results in core dumps being written to postgres data directory as core.<pid>.
|
||||||
//
|
//
|
||||||
// Use that as a default location and pattern, except macos where core dumps are written
|
// Use that as a default location and pattern, except macos where core dumps are written
|
||||||
|
|||||||
@@ -9,6 +9,9 @@ pub(crate) struct MigrationRunner<'m> {
|
|||||||
|
|
||||||
impl<'m> MigrationRunner<'m> {
|
impl<'m> MigrationRunner<'m> {
|
||||||
pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
|
pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
|
||||||
|
// The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64
|
||||||
|
assert!(migrations.len() + 1 < i64::MAX as usize);
|
||||||
|
|
||||||
Self { client, migrations }
|
Self { client, migrations }
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -22,11 +25,8 @@ impl<'m> MigrationRunner<'m> {
|
|||||||
Ok(row.get::<&str, i64>("id"))
|
Ok(row.get::<&str, i64>("id"))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_migration_id(&mut self) -> Result<()> {
|
fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
|
||||||
let setval = format!(
|
let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id);
|
||||||
"UPDATE neon_migration.migration_id SET id={}",
|
|
||||||
self.migrations.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
self.client
|
self.client
|
||||||
.simple_query(&setval)
|
.simple_query(&setval)
|
||||||
@@ -57,44 +57,49 @@ impl<'m> MigrationRunner<'m> {
|
|||||||
pub fn run_migrations(mut self) -> Result<()> {
|
pub fn run_migrations(mut self) -> Result<()> {
|
||||||
self.prepare_migrations()?;
|
self.prepare_migrations()?;
|
||||||
|
|
||||||
let mut current_migration: usize = self.get_migration_id()? as usize;
|
let mut current_migration = self.get_migration_id()? as usize;
|
||||||
let starting_migration_id = current_migration;
|
|
||||||
|
|
||||||
let query = "BEGIN";
|
|
||||||
self.client
|
|
||||||
.simple_query(query)
|
|
||||||
.context("run_migrations begin")?;
|
|
||||||
|
|
||||||
while current_migration < self.migrations.len() {
|
while current_migration < self.migrations.len() {
|
||||||
|
macro_rules! migration_id {
|
||||||
|
($cm:expr) => {
|
||||||
|
($cm + 1) as i64
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
let migration = self.migrations[current_migration];
|
let migration = self.migrations[current_migration];
|
||||||
|
|
||||||
if migration.starts_with("-- SKIP") {
|
if migration.starts_with("-- SKIP") {
|
||||||
info!("Skipping migration id={}", current_migration);
|
info!("Skipping migration id={}", migration_id!(current_migration));
|
||||||
} else {
|
} else {
|
||||||
info!(
|
info!(
|
||||||
"Running migration id={}:\n{}\n",
|
"Running migration id={}:\n{}\n",
|
||||||
current_migration, migration
|
migration_id!(current_migration),
|
||||||
|
migration
|
||||||
);
|
);
|
||||||
|
|
||||||
|
self.client
|
||||||
|
.simple_query("BEGIN")
|
||||||
|
.context("begin migration")?;
|
||||||
|
|
||||||
self.client.simple_query(migration).with_context(|| {
|
self.client.simple_query(migration).with_context(|| {
|
||||||
format!("run_migration current_migration={}", current_migration)
|
format!(
|
||||||
|
"run_migrations migration id={}",
|
||||||
|
migration_id!(current_migration)
|
||||||
|
)
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
// Migration IDs start at 1
|
||||||
|
self.update_migration_id(migration_id!(current_migration))?;
|
||||||
|
|
||||||
|
self.client
|
||||||
|
.simple_query("COMMIT")
|
||||||
|
.context("commit migration")?;
|
||||||
|
|
||||||
|
info!("Finished migration id={}", migration_id!(current_migration));
|
||||||
}
|
}
|
||||||
|
|
||||||
current_migration += 1;
|
current_migration += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.update_migration_id()?;
|
|
||||||
|
|
||||||
let query = "COMMIT";
|
|
||||||
self.client
|
|
||||||
.simple_query(query)
|
|
||||||
.context("run_migrations commit")?;
|
|
||||||
|
|
||||||
info!(
|
|
||||||
"Ran {} migrations",
|
|
||||||
(self.migrations.len() - starting_migration_id)
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||||
|
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
|
||||||
|
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
@@ -777,19 +777,22 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
|
|||||||
|
|
||||||
// Add new migrations in numerical order.
|
// Add new migrations in numerical order.
|
||||||
let migrations = [
|
let migrations = [
|
||||||
include_str!("./migrations/0000-neon_superuser_bypass_rls.sql"),
|
include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"),
|
||||||
include_str!("./migrations/0001-alter_roles.sql"),
|
include_str!("./migrations/0002-alter_roles.sql"),
|
||||||
include_str!("./migrations/0002-grant_pg_create_subscription_to_neon_superuser.sql"),
|
include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"),
|
||||||
include_str!("./migrations/0003-grant_pg_monitor_to_neon_superuser.sql"),
|
include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"),
|
||||||
include_str!("./migrations/0004-grant_all_on_tables_to_neon_superuser.sql"),
|
include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"),
|
||||||
include_str!("./migrations/0005-grant_all_on_sequences_to_neon_superuser.sql"),
|
include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"),
|
||||||
include_str!(
|
include_str!(
|
||||||
"./migrations/0006-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
|
"./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
|
||||||
),
|
),
|
||||||
include_str!(
|
include_str!(
|
||||||
"./migrations/0007-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
|
"./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
|
||||||
|
),
|
||||||
|
include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
|
||||||
|
include_str!(
|
||||||
|
"./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
|
||||||
),
|
),
|
||||||
include_str!("./migrations/0008-revoke_replication_for_previously_allowed_roles.sql"),
|
|
||||||
];
|
];
|
||||||
|
|
||||||
MigrationRunner::new(client, &migrations).run_migrations()?;
|
MigrationRunner::new(client, &migrations).run_migrations()?;
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ safekeeper_api.workspace = true
|
|||||||
postgres_connection.workspace = true
|
postgres_connection.workspace = true
|
||||||
storage_broker.workspace = true
|
storage_broker.workspace = true
|
||||||
utils.workspace = true
|
utils.workspace = true
|
||||||
|
whoami.workspace = true
|
||||||
|
|
||||||
compute_api.workspace = true
|
compute_api.workspace = true
|
||||||
workspace_hack.workspace = true
|
workspace_hack.workspace = true
|
||||||
|
|||||||
@@ -289,7 +289,7 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
|||||||
|
|
||||||
fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command {
|
fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command {
|
||||||
for (var, val) in std::env::vars() {
|
for (var, val) in std::env::vars() {
|
||||||
if var.starts_with("NEON_PAGESERVER_") {
|
if var.starts_with("NEON_") {
|
||||||
cmd = cmd.env(var, val);
|
cmd = cmd.env(var, val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,7 +21,9 @@ use pageserver_api::config::{
|
|||||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
|
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
|
||||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
|
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
|
||||||
};
|
};
|
||||||
use pageserver_api::controller_api::{PlacementPolicy, TenantCreateRequest};
|
use pageserver_api::controller_api::{
|
||||||
|
NodeAvailabilityWrapper, PlacementPolicy, TenantCreateRequest,
|
||||||
|
};
|
||||||
use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInfo};
|
use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInfo};
|
||||||
use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
|
use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
|
||||||
use postgres_backend::AuthType;
|
use postgres_backend::AuthType;
|
||||||
@@ -1250,9 +1252,70 @@ async fn handle_start_all(
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
neon_start_status_check(env, retry_timeout).await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn neon_start_status_check(
|
||||||
|
env: &local_env::LocalEnv,
|
||||||
|
retry_timeout: &Duration,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
const RETRY_INTERVAL: Duration = Duration::from_millis(100);
|
||||||
|
const NOTICE_AFTER_RETRIES: Duration = Duration::from_secs(5);
|
||||||
|
|
||||||
|
if env.control_plane_api.is_none() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let storcon = StorageController::from_env(env);
|
||||||
|
|
||||||
|
let retries = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
|
||||||
|
let notice_after_retries = retry_timeout.as_millis() / NOTICE_AFTER_RETRIES.as_millis();
|
||||||
|
|
||||||
|
println!("\nRunning neon status check");
|
||||||
|
|
||||||
|
for retry in 0..retries {
|
||||||
|
if retry == notice_after_retries {
|
||||||
|
println!("\nNeon status check has not passed yet, continuing to wait")
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut passed = true;
|
||||||
|
let mut nodes = storcon.node_list().await?;
|
||||||
|
let mut pageservers = env.pageservers.clone();
|
||||||
|
|
||||||
|
if nodes.len() != pageservers.len() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
nodes.sort_by_key(|ps| ps.id);
|
||||||
|
pageservers.sort_by_key(|ps| ps.id);
|
||||||
|
|
||||||
|
for (idx, pageserver) in pageservers.iter().enumerate() {
|
||||||
|
let node = &nodes[idx];
|
||||||
|
if node.id != pageserver.id {
|
||||||
|
passed = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !matches!(node.availability, NodeAvailabilityWrapper::Active) {
|
||||||
|
passed = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if passed {
|
||||||
|
println!("\nNeon started and passed status check");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
tokio::time::sleep(RETRY_INTERVAL).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
anyhow::bail!("\nNeon passed status check")
|
||||||
|
}
|
||||||
|
|
||||||
async fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
async fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||||
let immediate =
|
let immediate =
|
||||||
sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
|
sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
//! Code to manage the storage broker
|
//! Code to manage the storage broker
|
||||||
//!
|
//!
|
||||||
//! In the local test environment, the data for each safekeeper is stored in
|
//! In the local test environment, the storage broker stores its data directly in
|
||||||
//!
|
//!
|
||||||
//! ```text
|
//! ```text
|
||||||
//! .neon/safekeepers/<safekeeper id>
|
//! .neon
|
||||||
//! ```
|
//! ```
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
|||||||
@@ -151,23 +151,31 @@ pub struct NeonBroker {
|
|||||||
pub struct NeonStorageControllerConf {
|
pub struct NeonStorageControllerConf {
|
||||||
/// Heartbeat timeout before marking a node offline
|
/// Heartbeat timeout before marking a node offline
|
||||||
#[serde(with = "humantime_serde")]
|
#[serde(with = "humantime_serde")]
|
||||||
pub max_unavailable: Duration,
|
pub max_offline: Duration,
|
||||||
|
|
||||||
|
#[serde(with = "humantime_serde")]
|
||||||
|
pub max_warming_up: Duration,
|
||||||
|
|
||||||
/// Threshold for auto-splitting a tenant into shards
|
/// Threshold for auto-splitting a tenant into shards
|
||||||
pub split_threshold: Option<u64>,
|
pub split_threshold: Option<u64>,
|
||||||
|
|
||||||
|
pub max_secondary_lag_bytes: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NeonStorageControllerConf {
|
impl NeonStorageControllerConf {
|
||||||
// Use a shorter pageserver unavailability interval than the default to speed up tests.
|
// Use a shorter pageserver unavailability interval than the default to speed up tests.
|
||||||
const DEFAULT_MAX_UNAVAILABLE_INTERVAL: std::time::Duration =
|
const DEFAULT_MAX_OFFLINE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);
|
||||||
std::time::Duration::from_secs(10);
|
|
||||||
|
const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for NeonStorageControllerConf {
|
impl Default for NeonStorageControllerConf {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
max_unavailable: Self::DEFAULT_MAX_UNAVAILABLE_INTERVAL,
|
max_offline: Self::DEFAULT_MAX_OFFLINE_INTERVAL,
|
||||||
|
max_warming_up: Self::DEFAULT_MAX_WARMING_UP_INTERVAL,
|
||||||
split_threshold: None,
|
split_threshold: None,
|
||||||
|
max_secondary_lag_bytes: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -509,7 +517,6 @@ impl LocalEnv {
|
|||||||
#[derive(serde::Serialize, serde::Deserialize)]
|
#[derive(serde::Serialize, serde::Deserialize)]
|
||||||
// (allow unknown fields, unlike PageServerConf)
|
// (allow unknown fields, unlike PageServerConf)
|
||||||
struct PageserverConfigTomlSubset {
|
struct PageserverConfigTomlSubset {
|
||||||
id: NodeId,
|
|
||||||
listen_pg_addr: String,
|
listen_pg_addr: String,
|
||||||
listen_http_addr: String,
|
listen_http_addr: String,
|
||||||
pg_auth_type: AuthType,
|
pg_auth_type: AuthType,
|
||||||
@@ -521,18 +528,30 @@ impl LocalEnv {
|
|||||||
.with_context(|| format!("read {:?}", config_toml_path))?,
|
.with_context(|| format!("read {:?}", config_toml_path))?,
|
||||||
)
|
)
|
||||||
.context("parse pageserver.toml")?;
|
.context("parse pageserver.toml")?;
|
||||||
|
let identity_toml_path = dentry.path().join("identity.toml");
|
||||||
|
#[derive(serde::Serialize, serde::Deserialize)]
|
||||||
|
struct IdentityTomlSubset {
|
||||||
|
id: NodeId,
|
||||||
|
}
|
||||||
|
let identity_toml: IdentityTomlSubset = toml_edit::de::from_str(
|
||||||
|
&std::fs::read_to_string(&identity_toml_path)
|
||||||
|
.with_context(|| format!("read {:?}", identity_toml_path))?,
|
||||||
|
)
|
||||||
|
.context("parse identity.toml")?;
|
||||||
let PageserverConfigTomlSubset {
|
let PageserverConfigTomlSubset {
|
||||||
id: config_toml_id,
|
|
||||||
listen_pg_addr,
|
listen_pg_addr,
|
||||||
listen_http_addr,
|
listen_http_addr,
|
||||||
pg_auth_type,
|
pg_auth_type,
|
||||||
http_auth_type,
|
http_auth_type,
|
||||||
} = config_toml;
|
} = config_toml;
|
||||||
|
let IdentityTomlSubset {
|
||||||
|
id: identity_toml_id,
|
||||||
|
} = identity_toml;
|
||||||
let conf = PageServerConf {
|
let conf = PageServerConf {
|
||||||
id: {
|
id: {
|
||||||
anyhow::ensure!(
|
anyhow::ensure!(
|
||||||
config_toml_id == id,
|
identity_toml_id == id,
|
||||||
"id mismatch: config_toml.id={config_toml_id} id={id}",
|
"id mismatch: identity.toml:id={identity_toml_id} pageserver_(.*) id={id}",
|
||||||
);
|
);
|
||||||
id
|
id
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
//! Code to manage pageservers
|
//! Code to manage pageservers
|
||||||
//!
|
//!
|
||||||
//! In the local test environment, the pageserver stores its data directly in
|
//! In the local test environment, the data for each pageserver is stored in
|
||||||
//!
|
//!
|
||||||
//! .neon/
|
//! ```text
|
||||||
|
//! .neon/pageserver_<pageserver_id>
|
||||||
|
//! ```
|
||||||
//!
|
//!
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
@@ -23,6 +25,7 @@ use pageserver_client::mgmt_api;
|
|||||||
use postgres_backend::AuthType;
|
use postgres_backend::AuthType;
|
||||||
use postgres_connection::{parse_host_port, PgConnectionConfig};
|
use postgres_connection::{parse_host_port, PgConnectionConfig};
|
||||||
use utils::auth::{Claims, Scope};
|
use utils::auth::{Claims, Scope};
|
||||||
|
use utils::id::NodeId;
|
||||||
use utils::{
|
use utils::{
|
||||||
id::{TenantId, TimelineId},
|
id::{TenantId, TimelineId},
|
||||||
lsn::Lsn,
|
lsn::Lsn,
|
||||||
@@ -72,6 +75,10 @@ impl PageServerNode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::Document {
|
||||||
|
toml_edit::Document::from_str(&format!("id={node_id}")).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
fn pageserver_init_make_toml(
|
fn pageserver_init_make_toml(
|
||||||
&self,
|
&self,
|
||||||
conf: NeonLocalInitPageserverConf,
|
conf: NeonLocalInitPageserverConf,
|
||||||
@@ -120,10 +127,13 @@ impl PageServerNode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Apply the user-provided overrides
|
// Apply the user-provided overrides
|
||||||
overrides.push(
|
overrides.push({
|
||||||
toml_edit::ser::to_string_pretty(&conf)
|
let mut doc =
|
||||||
.expect("we deserialized this from toml earlier"),
|
toml_edit::ser::to_document(&conf).expect("we deserialized this from toml earlier");
|
||||||
);
|
// `id` is written out to `identity.toml` instead of `pageserver.toml`
|
||||||
|
doc.remove("id").expect("it's part of the struct");
|
||||||
|
doc.to_string()
|
||||||
|
});
|
||||||
|
|
||||||
// Turn `overrides` into a toml document.
|
// Turn `overrides` into a toml document.
|
||||||
// TODO: above code is legacy code, it should be refactored to use toml_edit directly.
|
// TODO: above code is legacy code, it should be refactored to use toml_edit directly.
|
||||||
@@ -184,6 +194,19 @@ impl PageServerNode {
|
|||||||
.write_all(config.to_string().as_bytes())
|
.write_all(config.to_string().as_bytes())
|
||||||
.context("write pageserver toml")?;
|
.context("write pageserver toml")?;
|
||||||
drop(config_file);
|
drop(config_file);
|
||||||
|
|
||||||
|
let identity_file_path = datadir.join("identity.toml");
|
||||||
|
let mut identity_file = std::fs::OpenOptions::new()
|
||||||
|
.create_new(true)
|
||||||
|
.write(true)
|
||||||
|
.open(identity_file_path)
|
||||||
|
.with_context(|| format!("open identity toml for write: {config_file_path:?}"))?;
|
||||||
|
let identity_toml = self.pageserver_make_identity_toml(node_id);
|
||||||
|
identity_file
|
||||||
|
.write_all(identity_toml.to_string().as_bytes())
|
||||||
|
.context("write identity toml")?;
|
||||||
|
drop(identity_toml);
|
||||||
|
|
||||||
// TODO: invoke a TBD config-check command to validate that pageserver will start with the written config
|
// TODO: invoke a TBD config-check command to validate that pageserver will start with the written config
|
||||||
|
|
||||||
// Write metadata file, used by pageserver on startup to register itself with
|
// Write metadata file, used by pageserver on startup to register itself with
|
||||||
@@ -349,11 +372,6 @@ impl PageServerNode {
|
|||||||
.map(|x| x.parse::<NonZeroU64>())
|
.map(|x| x.parse::<NonZeroU64>())
|
||||||
.transpose()
|
.transpose()
|
||||||
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
|
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
|
||||||
trace_read_requests: settings
|
|
||||||
.remove("trace_read_requests")
|
|
||||||
.map(|x| x.parse::<bool>())
|
|
||||||
.transpose()
|
|
||||||
.context("Failed to parse 'trace_read_requests' as bool")?,
|
|
||||||
eviction_policy: settings
|
eviction_policy: settings
|
||||||
.remove("eviction_policy")
|
.remove("eviction_policy")
|
||||||
.map(serde_json::from_str)
|
.map(serde_json::from_str)
|
||||||
@@ -454,11 +472,6 @@ impl PageServerNode {
|
|||||||
.map(|x| x.parse::<NonZeroU64>())
|
.map(|x| x.parse::<NonZeroU64>())
|
||||||
.transpose()
|
.transpose()
|
||||||
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
|
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
|
||||||
trace_read_requests: settings
|
|
||||||
.remove("trace_read_requests")
|
|
||||||
.map(|x| x.parse::<bool>())
|
|
||||||
.transpose()
|
|
||||||
.context("Failed to parse 'trace_read_requests' as bool")?,
|
|
||||||
eviction_policy: settings
|
eviction_policy: settings
|
||||||
.remove("eviction_policy")
|
.remove("eviction_policy")
|
||||||
.map(serde_json::from_str)
|
.map(serde_json::from_str)
|
||||||
|
|||||||
@@ -5,8 +5,9 @@ use crate::{
|
|||||||
use camino::{Utf8Path, Utf8PathBuf};
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
use pageserver_api::{
|
use pageserver_api::{
|
||||||
controller_api::{
|
controller_api::{
|
||||||
NodeConfigureRequest, NodeRegisterRequest, TenantCreateRequest, TenantCreateResponse,
|
NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
|
||||||
TenantLocateResponse, TenantShardMigrateRequest, TenantShardMigrateResponse,
|
TenantCreateResponse, TenantLocateResponse, TenantShardMigrateRequest,
|
||||||
|
TenantShardMigrateResponse,
|
||||||
},
|
},
|
||||||
models::{
|
models::{
|
||||||
TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
|
TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
|
||||||
@@ -29,7 +30,6 @@ use utils::{
|
|||||||
pub struct StorageController {
|
pub struct StorageController {
|
||||||
env: LocalEnv,
|
env: LocalEnv,
|
||||||
listen: String,
|
listen: String,
|
||||||
path: Utf8PathBuf,
|
|
||||||
private_key: Option<Vec<u8>>,
|
private_key: Option<Vec<u8>>,
|
||||||
public_key: Option<String>,
|
public_key: Option<String>,
|
||||||
postgres_port: u16,
|
postgres_port: u16,
|
||||||
@@ -41,6 +41,8 @@ const COMMAND: &str = "storage_controller";
|
|||||||
|
|
||||||
const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16;
|
const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16;
|
||||||
|
|
||||||
|
const DB_NAME: &str = "storage_controller";
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub struct AttachHookRequest {
|
pub struct AttachHookRequest {
|
||||||
pub tenant_shard_id: TenantShardId,
|
pub tenant_shard_id: TenantShardId,
|
||||||
@@ -65,10 +67,6 @@ pub struct InspectResponse {
|
|||||||
|
|
||||||
impl StorageController {
|
impl StorageController {
|
||||||
pub fn from_env(env: &LocalEnv) -> Self {
|
pub fn from_env(env: &LocalEnv) -> Self {
|
||||||
let path = Utf8PathBuf::from_path_buf(env.base_data_dir.clone())
|
|
||||||
.unwrap()
|
|
||||||
.join("attachments.json");
|
|
||||||
|
|
||||||
// Makes no sense to construct this if pageservers aren't going to use it: assume
|
// Makes no sense to construct this if pageservers aren't going to use it: assume
|
||||||
// pageservers have control plane API set
|
// pageservers have control plane API set
|
||||||
let listen_url = env.control_plane_api.clone().unwrap();
|
let listen_url = env.control_plane_api.clone().unwrap();
|
||||||
@@ -128,7 +126,6 @@ impl StorageController {
|
|||||||
|
|
||||||
Self {
|
Self {
|
||||||
env: env.clone(),
|
env: env.clone(),
|
||||||
path,
|
|
||||||
listen,
|
listen,
|
||||||
private_key,
|
private_key,
|
||||||
public_key,
|
public_key,
|
||||||
@@ -203,7 +200,6 @@ impl StorageController {
|
|||||||
///
|
///
|
||||||
/// Returns the database url
|
/// Returns the database url
|
||||||
pub async fn setup_database(&self) -> anyhow::Result<String> {
|
pub async fn setup_database(&self) -> anyhow::Result<String> {
|
||||||
const DB_NAME: &str = "storage_controller";
|
|
||||||
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", self.postgres_port);
|
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", self.postgres_port);
|
||||||
|
|
||||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||||
@@ -232,6 +228,30 @@ impl StorageController {
|
|||||||
Ok(database_url)
|
Ok(database_url)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn connect_to_database(
|
||||||
|
&self,
|
||||||
|
) -> anyhow::Result<(
|
||||||
|
tokio_postgres::Client,
|
||||||
|
tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,
|
||||||
|
)> {
|
||||||
|
tokio_postgres::Config::new()
|
||||||
|
.host("localhost")
|
||||||
|
.port(self.postgres_port)
|
||||||
|
// The user is the ambient operating system user name.
|
||||||
|
// That is an impurity which we want to fix in => TODO https://github.com/neondatabase/neon/issues/8400
|
||||||
|
//
|
||||||
|
// Until we get there, use the ambient operating system user name.
|
||||||
|
// Recent tokio-postgres versions default to this if the user isn't specified.
|
||||||
|
// But tokio-postgres fork doesn't have this upstream commit:
|
||||||
|
// https://github.com/sfackler/rust-postgres/commit/cb609be758f3fb5af537f04b584a2ee0cebd5e79
|
||||||
|
// => we should rebase our fork => TODO https://github.com/neondatabase/neon/issues/8399
|
||||||
|
.user(&whoami::username())
|
||||||
|
.dbname(DB_NAME)
|
||||||
|
.connect(tokio_postgres::NoTls)
|
||||||
|
.await
|
||||||
|
.map_err(anyhow::Error::new)
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
||||||
// Start a vanilla Postgres process used by the storage controller for persistence.
|
// Start a vanilla Postgres process used by the storage controller for persistence.
|
||||||
let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())
|
let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())
|
||||||
@@ -256,18 +276,21 @@ impl StorageController {
|
|||||||
if !status.success() {
|
if !status.success() {
|
||||||
anyhow::bail!("initdb failed with status {status}");
|
anyhow::bail!("initdb failed with status {status}");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write a minimal config file:
|
|
||||||
// - Specify the port, since this is chosen dynamically
|
|
||||||
// - Switch off fsync, since we're running on lightweight test environments and when e.g. scale testing
|
|
||||||
// the storage controller we don't want a slow local disk to interfere with that.
|
|
||||||
tokio::fs::write(
|
|
||||||
&pg_data_path.join("postgresql.conf"),
|
|
||||||
format!("port = {}\nfsync=off\n", self.postgres_port),
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Write a minimal config file:
|
||||||
|
// - Specify the port, since this is chosen dynamically
|
||||||
|
// - Switch off fsync, since we're running on lightweight test environments and when e.g. scale testing
|
||||||
|
// the storage controller we don't want a slow local disk to interfere with that.
|
||||||
|
//
|
||||||
|
// NB: it's important that we rewrite this file on each start command so we propagate changes
|
||||||
|
// from `LocalEnv`'s config file (`.neon/config`).
|
||||||
|
tokio::fs::write(
|
||||||
|
&pg_data_path.join("postgresql.conf"),
|
||||||
|
format!("port = {}\nfsync=off\n", self.postgres_port),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
println!("Starting storage controller database...");
|
println!("Starting storage controller database...");
|
||||||
let db_start_args = [
|
let db_start_args = [
|
||||||
"-w",
|
"-w",
|
||||||
@@ -296,16 +319,45 @@ impl StorageController {
|
|||||||
// Run migrations on every startup, in case something changed.
|
// Run migrations on every startup, in case something changed.
|
||||||
let database_url = self.setup_database().await?;
|
let database_url = self.setup_database().await?;
|
||||||
|
|
||||||
|
// We support running a startup SQL script to fiddle with the database before we launch storcon.
|
||||||
|
// This is used by the test suite.
|
||||||
|
let startup_script_path = self
|
||||||
|
.env
|
||||||
|
.base_data_dir
|
||||||
|
.join("storage_controller_db.startup.sql");
|
||||||
|
let startup_script = match tokio::fs::read_to_string(&startup_script_path).await {
|
||||||
|
Ok(script) => {
|
||||||
|
tokio::fs::remove_file(startup_script_path).await?;
|
||||||
|
script
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
if e.kind() == std::io::ErrorKind::NotFound {
|
||||||
|
// always run some startup script so that this code path doesn't bit rot
|
||||||
|
"BEGIN; COMMIT;".to_string()
|
||||||
|
} else {
|
||||||
|
anyhow::bail!("Failed to read startup script: {e}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let (mut client, conn) = self.connect_to_database().await?;
|
||||||
|
let conn = tokio::spawn(conn);
|
||||||
|
let tx = client.build_transaction();
|
||||||
|
let tx = tx.start().await?;
|
||||||
|
tx.batch_execute(&startup_script).await?;
|
||||||
|
tx.commit().await?;
|
||||||
|
drop(client);
|
||||||
|
conn.await??;
|
||||||
|
|
||||||
let mut args = vec![
|
let mut args = vec![
|
||||||
"-l",
|
"-l",
|
||||||
&self.listen,
|
&self.listen,
|
||||||
"-p",
|
|
||||||
self.path.as_ref(),
|
|
||||||
"--dev",
|
"--dev",
|
||||||
"--database-url",
|
"--database-url",
|
||||||
&database_url,
|
&database_url,
|
||||||
"--max-unavailable-interval",
|
"--max-offline-interval",
|
||||||
&humantime::Duration::from(self.config.max_unavailable).to_string(),
|
&humantime::Duration::from(self.config.max_offline).to_string(),
|
||||||
|
"--max-warming-up-interval",
|
||||||
|
&humantime::Duration::from(self.config.max_warming_up).to_string(),
|
||||||
]
|
]
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|s| s.to_string())
|
.map(|s| s.to_string())
|
||||||
@@ -331,6 +383,10 @@ impl StorageController {
|
|||||||
args.push(format!("--split-threshold={split_threshold}"))
|
args.push(format!("--split-threshold={split_threshold}"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(lag) = self.config.max_secondary_lag_bytes.as_ref() {
|
||||||
|
args.push(format!("--max-secondary-lag-bytes={lag}"))
|
||||||
|
}
|
||||||
|
|
||||||
args.push(format!(
|
args.push(format!(
|
||||||
"--neon-local-repo-dir={}",
|
"--neon-local-repo-dir={}",
|
||||||
self.env.base_data_dir.display()
|
self.env.base_data_dir.display()
|
||||||
@@ -576,6 +632,15 @@ impl StorageController {
|
|||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn node_list(&self) -> anyhow::Result<Vec<NodeDescribeResponse>> {
|
||||||
|
self.dispatch::<(), Vec<NodeDescribeResponse>>(
|
||||||
|
Method::GET,
|
||||||
|
"control/v1/node".to_string(),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip(self))]
|
||||||
pub async fn ready(&self) -> anyhow::Result<()> {
|
pub async fn ready(&self) -> anyhow::Result<()> {
|
||||||
self.dispatch::<(), ()>(Method::GET, "ready".to_string(), None)
|
self.dispatch::<(), ()>(Method::GET, "ready".to_string(), None)
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ pageserver_client.workspace = true
|
|||||||
reqwest.workspace = true
|
reqwest.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
serde_json = { workspace = true, features = ["raw_value"] }
|
serde_json = { workspace = true, features = ["raw_value"] }
|
||||||
|
storage_controller_client.workspace = true
|
||||||
thiserror.workspace = true
|
thiserror.workspace = true
|
||||||
tokio.workspace = true
|
tokio.workspace = true
|
||||||
tracing.workspace = true
|
tracing.workspace = true
|
||||||
|
|||||||
@@ -14,15 +14,15 @@ use pageserver_api::{
|
|||||||
},
|
},
|
||||||
shard::{ShardStripeSize, TenantShardId},
|
shard::{ShardStripeSize, TenantShardId},
|
||||||
};
|
};
|
||||||
use pageserver_client::mgmt_api::{self, ResponseErrorMessageExt};
|
use pageserver_client::mgmt_api::{self};
|
||||||
use reqwest::{Method, StatusCode, Url};
|
use reqwest::{Method, StatusCode, Url};
|
||||||
use serde::{de::DeserializeOwned, Serialize};
|
|
||||||
use utils::id::{NodeId, TenantId};
|
use utils::id::{NodeId, TenantId};
|
||||||
|
|
||||||
use pageserver_api::controller_api::{
|
use pageserver_api::controller_api::{
|
||||||
NodeConfigureRequest, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy,
|
NodeConfigureRequest, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy,
|
||||||
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
||||||
};
|
};
|
||||||
|
use storage_controller_client::control_api::Client;
|
||||||
|
|
||||||
#[derive(Subcommand, Debug)]
|
#[derive(Subcommand, Debug)]
|
||||||
enum Command {
|
enum Command {
|
||||||
@@ -56,6 +56,10 @@ enum Command {
|
|||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
scheduling: Option<NodeSchedulingPolicy>,
|
scheduling: Option<NodeSchedulingPolicy>,
|
||||||
},
|
},
|
||||||
|
NodeDelete {
|
||||||
|
#[arg(long)]
|
||||||
|
node_id: NodeId,
|
||||||
|
},
|
||||||
/// Modify a tenant's policies in the storage controller
|
/// Modify a tenant's policies in the storage controller
|
||||||
TenantPolicy {
|
TenantPolicy {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
@@ -245,64 +249,6 @@ impl FromStr for NodeAvailabilityArg {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Client {
|
|
||||||
base_url: Url,
|
|
||||||
jwt_token: Option<String>,
|
|
||||||
client: reqwest::Client,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Client {
|
|
||||||
fn new(base_url: Url, jwt_token: Option<String>) -> Self {
|
|
||||||
Self {
|
|
||||||
base_url,
|
|
||||||
jwt_token,
|
|
||||||
client: reqwest::ClientBuilder::new()
|
|
||||||
.build()
|
|
||||||
.expect("Failed to construct http client"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Simple HTTP request wrapper for calling into storage controller
|
|
||||||
async fn dispatch<RQ, RS>(
|
|
||||||
&self,
|
|
||||||
method: Method,
|
|
||||||
path: String,
|
|
||||||
body: Option<RQ>,
|
|
||||||
) -> mgmt_api::Result<RS>
|
|
||||||
where
|
|
||||||
RQ: Serialize + Sized,
|
|
||||||
RS: DeserializeOwned + Sized,
|
|
||||||
{
|
|
||||||
// The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
|
|
||||||
// for general purpose API access.
|
|
||||||
let url = Url::from_str(&format!(
|
|
||||||
"http://{}:{}/{path}",
|
|
||||||
self.base_url.host_str().unwrap(),
|
|
||||||
self.base_url.port().unwrap()
|
|
||||||
))
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let mut builder = self.client.request(method, url);
|
|
||||||
if let Some(body) = body {
|
|
||||||
builder = builder.json(&body)
|
|
||||||
}
|
|
||||||
if let Some(jwt_token) = &self.jwt_token {
|
|
||||||
builder = builder.header(
|
|
||||||
reqwest::header::AUTHORIZATION,
|
|
||||||
format!("Bearer {jwt_token}"),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
let response = builder.send().await.map_err(mgmt_api::Error::ReceiveBody)?;
|
|
||||||
let response = response.error_from_body().await?;
|
|
||||||
|
|
||||||
response
|
|
||||||
.json()
|
|
||||||
.await
|
|
||||||
.map_err(pageserver_client::mgmt_api::Error::ReceiveBody)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
let cli = Cli::parse();
|
let cli = Cli::parse();
|
||||||
@@ -337,7 +283,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
}
|
}
|
||||||
Command::TenantCreate { tenant_id } => {
|
Command::TenantCreate { tenant_id } => {
|
||||||
storcon_client
|
storcon_client
|
||||||
.dispatch(
|
.dispatch::<_, ()>(
|
||||||
Method::POST,
|
Method::POST,
|
||||||
"v1/tenant".to_string(),
|
"v1/tenant".to_string(),
|
||||||
Some(TenantCreateRequest {
|
Some(TenantCreateRequest {
|
||||||
@@ -357,13 +303,16 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
tracing::info!("Delete status: {}", status);
|
tracing::info!("Delete status: {}", status);
|
||||||
}
|
}
|
||||||
Command::Nodes {} => {
|
Command::Nodes {} => {
|
||||||
let resp = storcon_client
|
let mut resp = storcon_client
|
||||||
.dispatch::<(), Vec<NodeDescribeResponse>>(
|
.dispatch::<(), Vec<NodeDescribeResponse>>(
|
||||||
Method::GET,
|
Method::GET,
|
||||||
"control/v1/node".to_string(),
|
"control/v1/node".to_string(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
resp.sort_by(|a, b| a.listen_http_addr.cmp(&b.listen_http_addr));
|
||||||
|
|
||||||
let mut table = comfy_table::Table::new();
|
let mut table = comfy_table::Table::new();
|
||||||
table.set_header(["Id", "Hostname", "Scheduling", "Availability"]);
|
table.set_header(["Id", "Hostname", "Scheduling", "Availability"]);
|
||||||
for node in resp {
|
for node in resp {
|
||||||
@@ -395,13 +344,16 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
Command::Tenants {} => {
|
Command::Tenants {} => {
|
||||||
let resp = storcon_client
|
let mut resp = storcon_client
|
||||||
.dispatch::<(), Vec<TenantDescribeResponse>>(
|
.dispatch::<(), Vec<TenantDescribeResponse>>(
|
||||||
Method::GET,
|
Method::GET,
|
||||||
"control/v1/tenant".to_string(),
|
"control/v1/tenant".to_string(),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
resp.sort_by(|a, b| a.tenant_id.cmp(&b.tenant_id));
|
||||||
|
|
||||||
let mut table = comfy_table::Table::new();
|
let mut table = comfy_table::Table::new();
|
||||||
table.set_header([
|
table.set_header([
|
||||||
"TenantId",
|
"TenantId",
|
||||||
@@ -650,6 +602,11 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
.dispatch::<(), ()>(Method::POST, format!("debug/v1/node/{node_id}/drop"), None)
|
.dispatch::<(), ()>(Method::POST, format!("debug/v1/node/{node_id}/drop"), None)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
|
Command::NodeDelete { node_id } => {
|
||||||
|
storcon_client
|
||||||
|
.dispatch::<(), ()>(Method::DELETE, format!("control/v1/node/{node_id}"), None)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
Command::TenantSetTimeBasedEviction {
|
Command::TenantSetTimeBasedEviction {
|
||||||
tenant_id,
|
tenant_id,
|
||||||
period,
|
period,
|
||||||
|
|||||||
15
deny.toml
15
deny.toml
@@ -4,6 +4,7 @@
|
|||||||
# to your expectations and requirements.
|
# to your expectations and requirements.
|
||||||
|
|
||||||
# Root options
|
# Root options
|
||||||
|
[graph]
|
||||||
targets = [
|
targets = [
|
||||||
{ triple = "x86_64-unknown-linux-gnu" },
|
{ triple = "x86_64-unknown-linux-gnu" },
|
||||||
{ triple = "aarch64-unknown-linux-gnu" },
|
{ triple = "aarch64-unknown-linux-gnu" },
|
||||||
@@ -12,6 +13,7 @@ targets = [
|
|||||||
]
|
]
|
||||||
all-features = false
|
all-features = false
|
||||||
no-default-features = false
|
no-default-features = false
|
||||||
|
[output]
|
||||||
feature-depth = 1
|
feature-depth = 1
|
||||||
|
|
||||||
# This section is considered when running `cargo deny check advisories`
|
# This section is considered when running `cargo deny check advisories`
|
||||||
@@ -19,17 +21,16 @@ feature-depth = 1
|
|||||||
# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html
|
# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html
|
||||||
[advisories]
|
[advisories]
|
||||||
db-urls = ["https://github.com/rustsec/advisory-db"]
|
db-urls = ["https://github.com/rustsec/advisory-db"]
|
||||||
vulnerability = "deny"
|
|
||||||
unmaintained = "warn"
|
|
||||||
yanked = "warn"
|
yanked = "warn"
|
||||||
notice = "warn"
|
|
||||||
ignore = []
|
[[advisories.ignore]]
|
||||||
|
id = "RUSTSEC-2023-0071"
|
||||||
|
reason = "the marvin attack only affects private key decryption, not public key signature verification"
|
||||||
|
|
||||||
# This section is considered when running `cargo deny check licenses`
|
# This section is considered when running `cargo deny check licenses`
|
||||||
# More documentation for the licenses section can be found here:
|
# More documentation for the licenses section can be found here:
|
||||||
# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
|
# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
|
||||||
[licenses]
|
[licenses]
|
||||||
unlicensed = "deny"
|
|
||||||
allow = [
|
allow = [
|
||||||
"Apache-2.0",
|
"Apache-2.0",
|
||||||
"Artistic-2.0",
|
"Artistic-2.0",
|
||||||
@@ -42,10 +43,6 @@ allow = [
|
|||||||
"OpenSSL",
|
"OpenSSL",
|
||||||
"Unicode-DFS-2016",
|
"Unicode-DFS-2016",
|
||||||
]
|
]
|
||||||
deny = []
|
|
||||||
copyleft = "warn"
|
|
||||||
allow-osi-fsf-free = "neither"
|
|
||||||
default = "deny"
|
|
||||||
confidence-threshold = 0.8
|
confidence-threshold = 0.8
|
||||||
exceptions = [
|
exceptions = [
|
||||||
# Zlib license has some restrictions if we decide to change sth
|
# Zlib license has some restrictions if we decide to change sth
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ echo $result | jq .
|
|||||||
|
|
||||||
generate_id timeline_id
|
generate_id timeline_id
|
||||||
PARAMS=(
|
PARAMS=(
|
||||||
-sb
|
-sbf
|
||||||
-X POST
|
-X POST
|
||||||
-H "Content-Type: application/json"
|
-H "Content-Type: application/json"
|
||||||
-d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
|
-d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
|
||||||
|
|||||||
@@ -31,25 +31,14 @@ services:
|
|||||||
restart: always
|
restart: always
|
||||||
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
|
image: ${REPOSITORY:-neondatabase}/neon:${TAG:-latest}
|
||||||
environment:
|
environment:
|
||||||
- BROKER_ENDPOINT='http://storage_broker:50051'
|
|
||||||
- AWS_ACCESS_KEY_ID=minio
|
- AWS_ACCESS_KEY_ID=minio
|
||||||
- AWS_SECRET_ACCESS_KEY=password
|
- AWS_SECRET_ACCESS_KEY=password
|
||||||
#- RUST_BACKTRACE=1
|
#- RUST_BACKTRACE=1
|
||||||
ports:
|
ports:
|
||||||
#- 6400:6400 # pg protocol handler
|
#- 6400:6400 # pg protocol handler
|
||||||
- 9898:9898 # http endpoints
|
- 9898:9898 # http endpoints
|
||||||
entrypoint:
|
volumes:
|
||||||
- "/bin/sh"
|
- ./pageserver_config:/data/.neon/
|
||||||
- "-c"
|
|
||||||
command:
|
|
||||||
- "/usr/local/bin/pageserver -D /data/.neon/
|
|
||||||
-c \"broker_endpoint=$$BROKER_ENDPOINT\"
|
|
||||||
-c \"listen_pg_addr='0.0.0.0:6400'\"
|
|
||||||
-c \"listen_http_addr='0.0.0.0:9898'\"
|
|
||||||
-c \"remote_storage={endpoint='http://minio:9000',
|
|
||||||
bucket_name='neon',
|
|
||||||
bucket_region='eu-north-1',
|
|
||||||
prefix_in_bucket='/pageserver/'}\""
|
|
||||||
depends_on:
|
depends_on:
|
||||||
- storage_broker
|
- storage_broker
|
||||||
- minio_create_buckets
|
- minio_create_buckets
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ for pg_version in 14 15 16; do
|
|||||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
|
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
|
||||||
rm -rf $TMPDIR
|
rm -rf $TMPDIR
|
||||||
# We are running tests now
|
# We are running tests now
|
||||||
if docker exec -e SKIP=rum-src,timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,pg_graphql-src,kq_imcx-src,wal2json_2_5-src \
|
if docker exec -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,pg_graphql-src,kq_imcx-src,wal2json_2_5-src \
|
||||||
$TEST_CONTAINER_NAME /run-tests.sh | tee testout.txt
|
$TEST_CONTAINER_NAME /run-tests.sh | tee testout.txt
|
||||||
then
|
then
|
||||||
cleanup
|
cleanup
|
||||||
|
|||||||
1
docker-compose/pageserver_config/identity.toml
Normal file
1
docker-compose/pageserver_config/identity.toml
Normal file
@@ -0,0 +1 @@
|
|||||||
|
id=1234
|
||||||
5
docker-compose/pageserver_config/pageserver.toml
Normal file
5
docker-compose/pageserver_config/pageserver.toml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
broker_endpoint='http://storage_broker:50051'
|
||||||
|
pg_distrib_dir='/usr/local/'
|
||||||
|
listen_pg_addr='0.0.0.0:6400'
|
||||||
|
listen_http_addr='0.0.0.0:9898'
|
||||||
|
remote_storage={ endpoint='http://minio:9000', bucket_name='neon', bucket_region='eu-north-1', prefix_in_bucket='/pageserver' }
|
||||||
@@ -1,15 +1,15 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -x
|
set -x
|
||||||
|
|
||||||
cd /ext-src
|
cd /ext-src || exit 2
|
||||||
FAILED=
|
FAILED=
|
||||||
LIST=$((echo ${SKIP} | sed 's/,/\n/g'; ls -d *-src) | sort | uniq -u)
|
LIST=$( (echo "${SKIP//","/"\n"}"; ls -d -- *-src) | sort | uniq -u)
|
||||||
for d in ${LIST}
|
for d in ${LIST}
|
||||||
do
|
do
|
||||||
[ -d ${d} ] || continue
|
[ -d "${d}" ] || continue
|
||||||
psql -c "select 1" >/dev/null || break
|
psql -c "select 1" >/dev/null || break
|
||||||
make -C ${d} installcheck || FAILED="${d} ${FAILED}"
|
USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
|
||||||
done
|
done
|
||||||
[ -z "${FAILED}" ] && exit 0
|
[ -z "${FAILED}" ] && exit 0
|
||||||
echo ${FAILED}
|
echo "${FAILED}"
|
||||||
exit 1
|
exit 1
|
||||||
@@ -1,13 +1,18 @@
|
|||||||
# Summary
|
# Summary
|
||||||
|
|
||||||
|
# Looking for `neon.tech` docs?
|
||||||
|
|
||||||
|
This page linkes to a selection of technical content about the open source code in this repository.
|
||||||
|
|
||||||
|
Please visit https://neon.tech/docs for documentation about using the Neon service, which is based on the code
|
||||||
|
in this repository.
|
||||||
|
|
||||||
|
# Architecture
|
||||||
|
|
||||||
[Introduction]()
|
[Introduction]()
|
||||||
- [Separation of Compute and Storage](./separation-compute-storage.md)
|
- [Separation of Compute and Storage](./separation-compute-storage.md)
|
||||||
|
|
||||||
# Architecture
|
|
||||||
|
|
||||||
- [Compute]()
|
- [Compute]()
|
||||||
- [WAL proposer]()
|
|
||||||
- [WAL Backpressure]()
|
|
||||||
- [Postgres changes](./core_changes.md)
|
- [Postgres changes](./core_changes.md)
|
||||||
|
|
||||||
- [Pageserver](./pageserver.md)
|
- [Pageserver](./pageserver.md)
|
||||||
@@ -16,33 +21,15 @@
|
|||||||
- [WAL Redo](./pageserver-walredo.md)
|
- [WAL Redo](./pageserver-walredo.md)
|
||||||
- [Page cache](./pageserver-pagecache.md)
|
- [Page cache](./pageserver-pagecache.md)
|
||||||
- [Storage](./pageserver-storage.md)
|
- [Storage](./pageserver-storage.md)
|
||||||
- [Datadir mapping]()
|
|
||||||
- [Layer files]()
|
|
||||||
- [Branching]()
|
|
||||||
- [Garbage collection]()
|
|
||||||
- [Cloud Storage]()
|
|
||||||
- [Processing a GetPage request](./pageserver-processing-getpage.md)
|
- [Processing a GetPage request](./pageserver-processing-getpage.md)
|
||||||
- [Processing WAL](./pageserver-processing-wal.md)
|
- [Processing WAL](./pageserver-processing-wal.md)
|
||||||
- [Management API]()
|
|
||||||
- [Tenant Rebalancing]()
|
|
||||||
|
|
||||||
- [WAL Service](walservice.md)
|
- [WAL Service](walservice.md)
|
||||||
- [Consensus protocol](safekeeper-protocol.md)
|
- [Consensus protocol](safekeeper-protocol.md)
|
||||||
- [Management API]()
|
|
||||||
- [Rebalancing]()
|
|
||||||
|
|
||||||
- [Control Plane]()
|
|
||||||
|
|
||||||
- [Proxy]()
|
|
||||||
|
|
||||||
- [Source view](./sourcetree.md)
|
- [Source view](./sourcetree.md)
|
||||||
- [docker.md](./docker.md) — Docker images and building pipeline.
|
- [docker.md](./docker.md) — Docker images and building pipeline.
|
||||||
- [Error handling and logging](./error-handling.md)
|
- [Error handling and logging](./error-handling.md)
|
||||||
- [Testing]()
|
|
||||||
- [Unit testing]()
|
|
||||||
- [Integration testing]()
|
|
||||||
- [Benchmarks]()
|
|
||||||
|
|
||||||
|
|
||||||
- [Glossary](./glossary.md)
|
- [Glossary](./glossary.md)
|
||||||
|
|
||||||
@@ -58,28 +45,6 @@
|
|||||||
|
|
||||||
# RFCs
|
# RFCs
|
||||||
|
|
||||||
- [RFCs](./rfcs/README.md)
|
Major changes are documented in RFCS:
|
||||||
|
- See [RFCs](./rfcs/README.md) for more information
|
||||||
- [002-storage](rfcs/002-storage.md)
|
- view the RFCs at https://github.com/neondatabase/neon/tree/main/docs/rfcs
|
||||||
- [003-laptop-cli](rfcs/003-laptop-cli.md)
|
|
||||||
- [004-durability](rfcs/004-durability.md)
|
|
||||||
- [005-zenith_local](rfcs/005-zenith_local.md)
|
|
||||||
- [006-laptop-cli-v2-CLI](rfcs/006-laptop-cli-v2-CLI.md)
|
|
||||||
- [006-laptop-cli-v2-repository-structure](rfcs/006-laptop-cli-v2-repository-structure.md)
|
|
||||||
- [007-serverless-on-laptop](rfcs/007-serverless-on-laptop.md)
|
|
||||||
- [008-push-pull](rfcs/008-push-pull.md)
|
|
||||||
- [009-snapshot-first-storage-cli](rfcs/009-snapshot-first-storage-cli.md)
|
|
||||||
- [009-snapshot-first-storage](rfcs/009-snapshot-first-storage.md)
|
|
||||||
- [009-snapshot-first-storage-pitr](rfcs/009-snapshot-first-storage-pitr.md)
|
|
||||||
- [010-storage_details](rfcs/010-storage_details.md)
|
|
||||||
- [011-retention-policy](rfcs/011-retention-policy.md)
|
|
||||||
- [012-background-tasks](rfcs/012-background-tasks.md)
|
|
||||||
- [013-term-history](rfcs/013-term-history.md)
|
|
||||||
- [014-safekeepers-gossip](rfcs/014-safekeepers-gossip.md)
|
|
||||||
- [014-storage-lsm](rfcs/014-storage-lsm.md)
|
|
||||||
- [015-storage-messaging](rfcs/015-storage-messaging.md)
|
|
||||||
- [016-connection-routing](rfcs/016-connection-routing.md)
|
|
||||||
- [017-timeline-data-management](rfcs/017-timeline-data-management.md)
|
|
||||||
- [018-storage-messaging-2](rfcs/018-storage-messaging-2.md)
|
|
||||||
- [019-tenant-timeline-lifecycles](rfcs/019-tenant-timeline-lifecycles.md)
|
|
||||||
- [cluster-size-limits](rfcs/cluster-size-limits.md)
|
|
||||||
|
|||||||
252
docs/rfcs/034-ancestor-deletion.md
Normal file
252
docs/rfcs/034-ancestor-deletion.md
Normal file
@@ -0,0 +1,252 @@
|
|||||||
|
# Ancestor Timeline Deletion
|
||||||
|
|
||||||
|
Created on: 2024-02-23
|
||||||
|
|
||||||
|
Author: John Spray
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
|
||||||
|
When a tenant creates a new timeline that they will treat as their 'main' history,
|
||||||
|
it is awkward to permanently retain an 'old main' timeline as its ancestor. Currently
|
||||||
|
this is necessary because it is forbidden to delete a timeline which has descendents.
|
||||||
|
|
||||||
|
A new pageserver API is proposed to 'adopt' data from a parent timeline into
|
||||||
|
one of its children, such that the link between ancestor and child can be severed,
|
||||||
|
leaving the parent in a state where it may then be deleted.
|
||||||
|
|
||||||
|
# Motivation
|
||||||
|
|
||||||
|
Retaining parent timelines currently has two costs:
|
||||||
|
|
||||||
|
- Cognitive load on users, who have to remember which is the "real" main timeline.
|
||||||
|
- Storage capacity cost, as the parent timeline will retain layers up to the
|
||||||
|
child's timeline point, even if the child fully covers its keyspace with image
|
||||||
|
layers and will never actually read from the parent.
|
||||||
|
|
||||||
|
# Solution
|
||||||
|
|
||||||
|
A new pageserver API `PUT /v1/tenant/:tenant_id/timeline/:timeline_id/detach_ancestor`
|
||||||
|
will be added. The `timeline_id` in this URL is that of the _child_ timeline that we
|
||||||
|
wish to detach from its parent.
|
||||||
|
|
||||||
|
On success, this API will leave the following state:
|
||||||
|
|
||||||
|
- The detached child timeline will no longer have an ancestor, and will contain all
|
||||||
|
the data needed to service reads without recursing into an ancestor.
|
||||||
|
- Any other children of the parent whose timeline points were at a lower LSN than
|
||||||
|
the detached child timeline will be modified to have the child timeline as their
|
||||||
|
new parent.
|
||||||
|
- The parent timeline will still exist, but the child will no longer have it as an
|
||||||
|
ancestor. If this was the last timeline that depended on the parent, then the
|
||||||
|
parent will become deletable.
|
||||||
|
|
||||||
|
This API's implementation will consist of a series of retryable steps, such that
|
||||||
|
on failures/timeout it can safely be called again to reach the target state.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
### Before
|
||||||
|
|
||||||
|
The user has "rolled back" their project to LSN X, resulting in a "new main"
|
||||||
|
timeline. The parent "old main" timeline still exists, and they would like
|
||||||
|
to clean it up.
|
||||||
|
|
||||||
|
They have two other timelines A and B. A is from before the rollback point,
|
||||||
|
and B is from after the rollback point.
|
||||||
|
|
||||||
|
```
|
||||||
|
----"old main" timeline-------X-------------------------------------------->
|
||||||
|
| | |
|
||||||
|
|-> child A | |
|
||||||
|
|-> "new main" timeline |
|
||||||
|
-> child B
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### After calling detach ancestor API
|
||||||
|
|
||||||
|
The "new main" timeline is no longer dependent on old main, and neither
|
||||||
|
is child A, because it had a branch point before X.
|
||||||
|
|
||||||
|
The user may now choose to delete child B and "old main" to get to
|
||||||
|
a pristine state. Child B is likely to be unwanted since the user
|
||||||
|
chose to roll back to X, and it branches from after X. However, we
|
||||||
|
don't assume this in the API; it is up to the user to delete it.
|
||||||
|
|
||||||
|
```
|
||||||
|
|----"old main" timeline---------------------------------------------------->
|
||||||
|
|
|
||||||
|
|
|
||||||
|
|
|
||||||
|
-> child B
|
||||||
|
|
||||||
|
|----"new main" timeline--------->
|
||||||
|
|
|
||||||
|
|-> child A
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### After removing timelines
|
||||||
|
|
||||||
|
We end up with a totally clean state that leaves no trace that a rollback
|
||||||
|
ever happened: there is only one root timeline.
|
||||||
|
|
||||||
|
```
|
||||||
|
| ----"new main" timeline----------->
|
||||||
|
|
|
||||||
|
|-> child A
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Caveats
|
||||||
|
|
||||||
|
Important things for API users to bear in mind:
|
||||||
|
|
||||||
|
- this API does not delete the parent timeline: you must still do that explicitly.
|
||||||
|
- if there are other child timelines ahead of the branch point of the detached
|
||||||
|
child, the parent won't be deletable: you must either delete or detach those
|
||||||
|
children.
|
||||||
|
- do _not_ simply loop over all children and detach them all: this can have an
|
||||||
|
extremely high storage cost. The detach ancestor API is intended for use on a single
|
||||||
|
timeline to make it the new "main".
|
||||||
|
- The detach ancestor API should also not be
|
||||||
|
exposed directly to the user as button/API, because they might decide
|
||||||
|
to click it for all the children and thereby generate many copies of the
|
||||||
|
parent's data -- the detach ancestor API should be used as part
|
||||||
|
of a high level "clean up after rollback" feature.
|
||||||
|
|
||||||
|
## `detach_ancestor` API implementation
|
||||||
|
|
||||||
|
Terms used in the following sections:
|
||||||
|
|
||||||
|
- "the child": the timeline whose ID is specified in the detach ancestor API URL, also
|
||||||
|
called "new main" in the example.
|
||||||
|
- "the parent": the parent of "the child". Also called "old main" in the example.
|
||||||
|
- "the branch point" the ancestor_lsn of "the child"
|
||||||
|
|
||||||
|
### Phase 1: write out adopted layers to S3
|
||||||
|
|
||||||
|
The child will "adopt" layers from the parent, such that its end state contains
|
||||||
|
all the parent's history as well as its own.
|
||||||
|
|
||||||
|
For all layers in the parent's layer map whose high LSN is below the branch
|
||||||
|
point, issue S3 CopyObject requests to duplicate them into the child timeline's
|
||||||
|
prefix. Do not add them to the child's layer map yet.
|
||||||
|
|
||||||
|
For delta layers in the parent's layer map which straddle the branch point, read them
|
||||||
|
and write out only content up to the branch point into new layer objects.
|
||||||
|
|
||||||
|
This is a long running operation if the parent has many layers: it should be
|
||||||
|
implemented in a way that resumes rather than restarting from scratch, if the API
|
||||||
|
times out and is called again.
|
||||||
|
|
||||||
|
As an optimization, if there are no other timelines that will be adopted into
|
||||||
|
the child, _and_ the child's image layers already full cover the branch LSN,
|
||||||
|
then we may skip adopting layers.
|
||||||
|
|
||||||
|
### Phase 2: update the child's index
|
||||||
|
|
||||||
|
Having written out all needed layers in phase 1, atomically link them all
|
||||||
|
into the child's IndexPart and upload to S3. This may be done while the
|
||||||
|
child Timeline is still running.
|
||||||
|
|
||||||
|
### Phase 3: modify timelines ancestry
|
||||||
|
|
||||||
|
Modify the child's ancestor to None, and upload its IndexPart to persist the change.
|
||||||
|
|
||||||
|
For all timelines which have the same parent as the child, and have a branch
|
||||||
|
point lower than our branch point, switch their ancestor_timeline to the child,
|
||||||
|
and upload their IndexPart to persist the change.
|
||||||
|
|
||||||
|
## Alternatives considered
|
||||||
|
|
||||||
|
### Generate full image layer on child, rather than adopting parent deltas
|
||||||
|
|
||||||
|
This would work for the case of a single child, but would prevent re-targeting
|
||||||
|
other timelines that depended on the parent. If we detached many children this
|
||||||
|
way, the storage cost would become prohibitive (consider a 1TB database with
|
||||||
|
100 child timelines: it would cost 100TiB if they all generated their own image layers).
|
||||||
|
|
||||||
|
### Don't rewrite anything: just fake it in the API
|
||||||
|
|
||||||
|
We could add a layer of indirection that let a child "pretend" that it had no
|
||||||
|
ancestor, when in reality it still had the parent. The pageserver API could
|
||||||
|
accept deletion of ancestor timelines, and just update child metadata to make
|
||||||
|
them look like they have no ancestor.
|
||||||
|
|
||||||
|
This would not achieve the desired reduction in storage cost, and may well be more
|
||||||
|
complex to maintain than simply implementing the API described in this RFC.
|
||||||
|
|
||||||
|
### Avoid copying objects: enable child index to use parent layers directly
|
||||||
|
|
||||||
|
We could teach IndexPart to store a TimelineId for each layer, such that a child
|
||||||
|
timeline could reference a parent's layers directly, rather than copying them
|
||||||
|
into the child's prefix.
|
||||||
|
|
||||||
|
This would impose a cost for the normal case of indices that only target the
|
||||||
|
timeline's own layers, add complexity, and break the useful simplifying
|
||||||
|
invariant that timelines "own" their own path. If child timelines were
|
||||||
|
referencing layers from the parent, we would have to ensure that the parent
|
||||||
|
never runs GC/compaction again, which would make the API less flexible (the
|
||||||
|
proposal in this RFC enables deletion of the parent but doesn't require it.)
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
### Adopting layers
|
||||||
|
|
||||||
|
- CopyObject is a relatively cheap operation, but we may need to issue tens of thousands
|
||||||
|
of such requests: this can take up to tens of seconds and will compete for RemoteStorage
|
||||||
|
semaphore units with other activity on the pageserver.
|
||||||
|
- If we are running on storage backend that doesn't implement CopyObject, then
|
||||||
|
this part will be much more expensive as we would stream all layer content
|
||||||
|
through the pageserver. This is no different to issuing a lot
|
||||||
|
of reads to a timeline that does not have a warm local cache: it will move
|
||||||
|
a lot of gigabytes, but that shouldn't break anything.
|
||||||
|
- Generating truncated layers for delta that straddle the branch point will
|
||||||
|
require streaming read/write of all the layers in question.
|
||||||
|
|
||||||
|
### Updating timeline ancestry
|
||||||
|
|
||||||
|
The simplest way to update timeline ancestry will probably be to stop and start
|
||||||
|
all the Timeline objects: this is preferable to the complexity of making their
|
||||||
|
ancestry mutable at runtime.
|
||||||
|
|
||||||
|
There will be a corresponding "stutter" in the availability of the timelines,
|
||||||
|
of the order 10-100ms, which is the time taken to upload their IndexPart, and
|
||||||
|
restart the Timeline.
|
||||||
|
|
||||||
|
# Interaction with other features
|
||||||
|
|
||||||
|
## Concurrent timeline creation
|
||||||
|
|
||||||
|
If new historic timelines are created using the parent as an ancestor while the
|
||||||
|
detach ancestor API is running, they will not be re-parented to the child. This
|
||||||
|
doesn't break anything, but it leaves the parent in a state where it might not
|
||||||
|
be possible to delete it.
|
||||||
|
|
||||||
|
Since timeline creations are an explicit user action, this is not something we need to
|
||||||
|
worry about as the storage layer: a user who wants to delete their parent timeline will not create
|
||||||
|
new children, and if they do, they can choose to delete those children to
|
||||||
|
enable deleting the parent.
|
||||||
|
|
||||||
|
For the least surprise to the user, before starting the detach ancestor branch
|
||||||
|
operation, the control plane should wait until all branches are created and not
|
||||||
|
allow any branches to be created before the branch point on the ancestor branch
|
||||||
|
while the operation is ongoing.
|
||||||
|
|
||||||
|
## WAL based disaster recovery
|
||||||
|
|
||||||
|
WAL based disaster recovery currently supports only restoring of the main
|
||||||
|
branch. Enabling WAL based disaster recovery in the future requires that we
|
||||||
|
keep a record which timeline generated the WAL and at which LSN was a parent
|
||||||
|
detached. Keep a list of timeline ids and the LSN in which they were detached in
|
||||||
|
the `index_part.json`. Limit the size of the list to 100 first entries, after
|
||||||
|
which the WAL disaster recovery will not be possible.
|
||||||
|
|
||||||
|
## Sharded tenants
|
||||||
|
|
||||||
|
For sharded tenants, calls to the detach ancestor API will pass through the storage
|
||||||
|
controller, which will handle them the same as timeline creations: invoke first
|
||||||
|
on shard zero, and then on all the other shards.
|
||||||
507
docs/rfcs/035-timeline-archive.md
Normal file
507
docs/rfcs/035-timeline-archive.md
Normal file
@@ -0,0 +1,507 @@
|
|||||||
|
# Timeline Archival
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
This RFC describes a mechanism for pageservers to eliminate local storage + compute work
|
||||||
|
for timelines which are not in use, in response to external API calls to "archive" a timeline.
|
||||||
|
|
||||||
|
The archived state roughly corresponds to fully offloading a timeline to object storage, such
|
||||||
|
that its cost is purely the cost of that object storage.
|
||||||
|
|
||||||
|
## Motivation
|
||||||
|
|
||||||
|
Archived timelines serve multiple purposes:
|
||||||
|
- Act as a 'snapshot' for workloads that would like to retain restorable copies of their
|
||||||
|
database from longer ago than their PITR window.
|
||||||
|
- Enable users to create huge numbers of branches (e.g. one per github PR) without having
|
||||||
|
to diligently clean them up later to avoid overloading the pageserver (currently we support
|
||||||
|
up to ~500 branches per tenant).
|
||||||
|
|
||||||
|
### Prior art
|
||||||
|
|
||||||
|
Most storage and database systems have some form of snapshot, which can be implemented several ways:
|
||||||
|
1. full copies of data (e.g. an EBS snapshot to S3)
|
||||||
|
2. shallow snapshots which are CoW relative to the original version of the data, e.g. on a typical NFS appliance, or a filesystem like CephFS.
|
||||||
|
3. a series of snapshots which are CoW or de-duplicated relative to one another.
|
||||||
|
|
||||||
|
Today's Neon branches are approximately like `2.`, although due to implementation details branches
|
||||||
|
often end up storing much more data than they really need, as parent branches assume that all data
|
||||||
|
at the branch point is needed. The layers pinned in the parent branch may have a much larger size
|
||||||
|
than the physical size of a compressed image layer representing the data at the branch point.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Enter & exit the archived state in response to external admin API calls
|
||||||
|
- API calls to modify the archived state are atomic and durable
|
||||||
|
- An archived timeline should eventually (once out of PITR window) use an efficient compressed
|
||||||
|
representation, and avoid retaining arbitrarily large data in its parent branch.
|
||||||
|
- Remote object GETs during tenant start may be O(N) with the number of _active_ branches,
|
||||||
|
but must not scale with the number of _archived_ branches.
|
||||||
|
- Background I/O for archived branches should only be done a limited number of times to evolve them
|
||||||
|
to a long-term-efficient state (e.g. rewriting to image layers). There should be no ongoing "housekeeping"
|
||||||
|
overhead for archived branches, including operations related to calculating sizes for billing.
|
||||||
|
- The pageserver should put no load on the safekeeper for archived branches.
|
||||||
|
- Performance of un-archiving a branch must make good use of S3/disk bandwidth to restore the branch
|
||||||
|
to a performant state in a short time (linear with the branch's logical size)
|
||||||
|
|
||||||
|
## Non Goals
|
||||||
|
|
||||||
|
- Archived branches are not a literal `fullbackup` postgres snapshot: they are still stored
|
||||||
|
in Neon's internal format.
|
||||||
|
- Compute cold starts after activating an archived branch will not have comparable performance to
|
||||||
|
cold starts on an active branch.
|
||||||
|
- Archived branches will not use any new/additional compression or de-duplication beyond what
|
||||||
|
is already implemented for image layers (zstd per page).
|
||||||
|
- The pageserver will not "auto start" archived branches in response to page_service API requests: they
|
||||||
|
are only activated explicitly via the HTTP API.
|
||||||
|
- We will not implement a total offload of archived timelines from safekeepers: their control file (small) will
|
||||||
|
remain on local disk, although existing eviction mechanisms will remove any segments from local disk.
|
||||||
|
- We will not expose any prometheus metrics for archived timelines, or make them visible in any
|
||||||
|
detailed HTTP APIs other than the specific API for listing archived timelines.
|
||||||
|
- A parent branch may not be archived unless all its children are.
|
||||||
|
|
||||||
|
## Impacted Components
|
||||||
|
|
||||||
|
pageserver, storage controller
|
||||||
|
|
||||||
|
## Terminology
|
||||||
|
|
||||||
|
**Archived**: a branch is _archived_ when an HTTP API request to archive it has succeeded: the caller
|
||||||
|
may assume that this branch is now very cheap to store, although this may not be physically so until the
|
||||||
|
branch proceeds to the offloaded state.
|
||||||
|
|
||||||
|
**Active** branches are branches which are available for use by page_service clients, and have a relatively
|
||||||
|
high cost due to consuming local storage.
|
||||||
|
|
||||||
|
**Offloaded** branches are a subset of _archived_ branches, which have had their local state removed such
|
||||||
|
that they now consume minimal runtime resources and have a cost similar to the cost of object storage.
|
||||||
|
|
||||||
|
**Activate** (verb): transition from Archived to Active
|
||||||
|
|
||||||
|
**Archive** (verb): transition from Active to Archived
|
||||||
|
|
||||||
|
**Offload** (verb): transition from Archived to Offloaded
|
||||||
|
|
||||||
|
**Offload manifest**: an object stored in S3 that describes timelines which pageservers do not load.
|
||||||
|
|
||||||
|
**Warm up** (verb): operation done on an active branch, by downloading its active layers. Once a branch is
|
||||||
|
warmed up, good performance will be available to page_service clients.
|
||||||
|
|
||||||
|
## Implementation
|
||||||
|
|
||||||
|
### High level flow
|
||||||
|
|
||||||
|
We may think of a timeline which is archived and then activated as proceeding through a series of states:
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
stateDiagram
|
||||||
|
[*] --> Active(warm)
|
||||||
|
Active(warm) --> Archived
|
||||||
|
Archived --> Offloaded
|
||||||
|
Archived --> Active(warm)
|
||||||
|
Offloaded --> Active(cold)
|
||||||
|
Active(cold) --> Active(warm)
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that the transition from Archived to Active(warm) is expected to be fairly rare: the most common lifecycles
|
||||||
|
of branches will be:
|
||||||
|
- Very frequent: Short lived branches: Active -> Deleted
|
||||||
|
- Frequent: Long-lived branches: Active -> Archived -> Offloaded -> Deleted
|
||||||
|
- Rare: Branches used to restore old state: Active ->Archived -> Offloaded -> Active
|
||||||
|
|
||||||
|
These states are _not_ all stored as a single physical state on the timeline, but rather represent the combination
|
||||||
|
of:
|
||||||
|
- the timeline's lifecycle state: active or archived, stored in the timeline's index
|
||||||
|
- its offload state: whether pageserver has chosen to drop local storage of the timeline and write it into the
|
||||||
|
manifest of offloaded timelines.
|
||||||
|
- cache state (whether it's warm or cold).
|
||||||
|
|
||||||
|
### Storage format changes
|
||||||
|
|
||||||
|
There are two storage format changes:
|
||||||
|
1. `index_part.json` gets a new attribute `state` that describes whether the timeline is to
|
||||||
|
be considered active or archived.
|
||||||
|
2. A new tenant-level _manifest_ object `tenant_manifest-v1.json` describes which timelines a tenant does not need to load
|
||||||
|
at startup (and is available for storing other small, rarely changing tenant-wide attributes in future)
|
||||||
|
|
||||||
|
The manifest object will have a format like this:
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"offload_timelines": [
|
||||||
|
{
|
||||||
|
"timeline_id": ...
|
||||||
|
"last_record_lsn": ...
|
||||||
|
"last_record_lsn_time": ...
|
||||||
|
"pitr_interval": ...
|
||||||
|
"last_gc_lsn": ... # equal to last_record_lsn if this branch has no history (i.e. a snapshot)
|
||||||
|
"logical_size": ... # The size at last_record_lsn
|
||||||
|
"physical_size" ...
|
||||||
|
"parent": Option<{
|
||||||
|
"timeline_id"...
|
||||||
|
"lsn"... # Branch point LSN on the parent
|
||||||
|
"requires_data": bool # True if this branch depends on layers in its parent, identify it here
|
||||||
|
|
||||||
|
}>
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The information about a timeline in its offload state is intentionally minimal: just enough to decide:
|
||||||
|
- Whether it requires [archive optimization](#archive-branch-optimization) by rewriting as a set of image layers: we may infer this
|
||||||
|
by checking if now > last_record_lsn_time - pitr_interval, and pitr_lsn < last_record_lsn.
|
||||||
|
- Whether a parent branch should include this offloaded branch in its GC inputs to avoid removing
|
||||||
|
layers that the archived branch depends on
|
||||||
|
- Whether requests to delete this `timeline_id` should be executed (i.e. if a deletion request
|
||||||
|
is received for a timeline_id that isn't in the site of live `Timelines` or in the manifest, then
|
||||||
|
we don't need to go to S3 for the deletion.
|
||||||
|
- How much archived space to report in consumption metrics
|
||||||
|
|
||||||
|
The contents of the manifest's offload list will also be stored as an attribute of `Tenant`, such that the total
|
||||||
|
set of timelines may be found by the union of `Tenant::timelines` (non-offloaded timelines) and `Tenant::offloaded`
|
||||||
|
(offloaded timelines).
|
||||||
|
|
||||||
|
For split-brain protection, the manifest object will be written with a generation suffix, in the same way as
|
||||||
|
index_part objects are (see [generation numbers RFC](025-generation-numbers.md)). This will add some complexity, but
|
||||||
|
give us total safety against two pageservers with the same tenant attached fighting over the object. Existing code
|
||||||
|
for finding the latest generation and for cleaning up old generations (in the scrubber) will be generalized to cover
|
||||||
|
the manifest file.
|
||||||
|
|
||||||
|
### API & Timeline state
|
||||||
|
|
||||||
|
Timelines will store a lifecycle state (enum of Active or Archived) in their IndexPart. This will
|
||||||
|
be controlled by a new per-timeline `configure` endpoint. This is intentionally generic naming, which
|
||||||
|
may be used in future to control other per-timeline attributes (e.g. in future we may make PITR interval
|
||||||
|
a per-timeline configuration).
|
||||||
|
|
||||||
|
`PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configure`
|
||||||
|
```
|
||||||
|
{
|
||||||
|
'state': 'active|archive'
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
When archiving a timeline, this API will complete as soon as the timeline's state has been set in index_part, and that index has been uploaded.
|
||||||
|
|
||||||
|
When activating a timeline, this API will complete as soon as the timeline's state has been set in index_part,
|
||||||
|
**and** the `Timeline` object has been instantiated and activated. This will require reading the timeline's
|
||||||
|
index, but not any data: it should be about as fast as a couple of small S3 requests.
|
||||||
|
|
||||||
|
The API will be available with identical path via the storage controller: calling this on a sharded tenant
|
||||||
|
will simply map the API call to all the shards.
|
||||||
|
|
||||||
|
Archived timelines may never have descendent timelines which are active. This will be enforced at the API level,
|
||||||
|
such that activating a timeline requires that all its ancestors are active, and archiving a timeline requires
|
||||||
|
that all its descendents are archived. It is the callers responsibility to walk the hierarchy of timelines
|
||||||
|
in the proper order if they would like to archive whole trees of branches.
|
||||||
|
|
||||||
|
Because archive timelines will be excluded from the usual timeline listing APIs, a new API specifically
|
||||||
|
for archived timelines will be added: this is for use in support/debug:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /v1/tenants/{tenant_id}/archived_timelines
|
||||||
|
|
||||||
|
{
|
||||||
|
...same per-timeline content as the tenant manifest...
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tenant attach changes
|
||||||
|
|
||||||
|
Currently, during Tenant::spawn we list all the timelines in the S3 bucket, and then for each timeline
|
||||||
|
we load their index_part.json. To avoid the number of GETs scaling linearly with the number of archived
|
||||||
|
timelines, we must have a single object that tells us which timelines do not need to be loaded. The
|
||||||
|
number of ListObjects requests while listing timelines will still scale O(N), but this is less problematic
|
||||||
|
because each request covers 1000 timelines.
|
||||||
|
|
||||||
|
This is **not** literally the same as the set of timelines who have state=archived. Rather, it is
|
||||||
|
the set of timelines which have been offloaded in the background after their state was set to archived.
|
||||||
|
|
||||||
|
We may simply skip loading these timelines: there will be no special state of `Timeline`, they just won't
|
||||||
|
exist from the perspective of an active `Tenant` apart from in deletion: timeline deletion will need
|
||||||
|
to check for offloaded timelines as well as active timelines, to avoid wrongly returning 404 on trying
|
||||||
|
to delete an offloaded timeline.
|
||||||
|
|
||||||
|
### Warm-up API
|
||||||
|
|
||||||
|
`PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/download?wait_ms=1234`
|
||||||
|
|
||||||
|
This API will be similar to the existing `download_remote_layers` API, but smarter:
|
||||||
|
- It will not download _all_ remote layers, just the visible set (i.e. layers needed for a read)
|
||||||
|
- It will download layers in the visible set until reaching `wait_ms`, then return a struct describing progress
|
||||||
|
of downloads, so that the caller can poll.
|
||||||
|
|
||||||
|
The _visible set_ mentioned above will be calculated by the pageserver in the background, by taking the set
|
||||||
|
of readable LSNs (i.e. branch points and heads of branches), and walking the layer map to work out which layers
|
||||||
|
can possibly be read from these LSNs. This concept of layer visibility is more generally useful for cache
|
||||||
|
eviction and heatmaps, as well as in this specific case of warming up a timeline.
|
||||||
|
|
||||||
|
The caller does not have to wait for the warm up API, or call it at all. But it is strongly advised
|
||||||
|
to call it, because otherwise populating local contents for a timeline can take a long time when waiting
|
||||||
|
for SQL queries to coincidentally hit all the layers, and during that time query latency remains quite
|
||||||
|
volatile.
|
||||||
|
|
||||||
|
### Background work
|
||||||
|
|
||||||
|
Archived branches are not subject to normal compaction. Instead, when the compaction loop encounters
|
||||||
|
an archived branch, it will consider rewriting the branch to just image layers if the branch has no history
|
||||||
|
([archive branch optimization](#archive-branch-optimization)), or offloading the timeline from local disk
|
||||||
|
if its state permits that.
|
||||||
|
|
||||||
|
Additionally, the tenant compaction task will walk the state of already offloaded timelines to consider
|
||||||
|
optimizing their storage, e.g. if a timeline had some history when offloaded, but since then its PITR
|
||||||
|
has elapsed and it can now be rewritten to image layers.
|
||||||
|
|
||||||
|
#### Archive branch offload
|
||||||
|
|
||||||
|
Recall that when we archive a timeline via the HTTP API, this only sets a state: it doesn't do
|
||||||
|
any actual work.
|
||||||
|
|
||||||
|
This work is done in the background compaction loop. It makes sense to tag this work on to the compaction
|
||||||
|
loop, because it is spiritually aligned: offloading data for archived branches improves storage efficiency.
|
||||||
|
|
||||||
|
The condition for offload is simple:
|
||||||
|
- a `Timeline` object exists with state `Archived`
|
||||||
|
- the timeline does not have any non-offloaded children.
|
||||||
|
|
||||||
|
Regarding the condition that children must be offloaded, this will always be eventually true, because
|
||||||
|
we enforce at the API level that children of archived timelines must themselves be archived, and all
|
||||||
|
archived timelines will eventually be offloaded.
|
||||||
|
|
||||||
|
Offloading a timeline is simple:
|
||||||
|
- Read the timeline's attributes that we will store in its offloaded state (especially its logical size)
|
||||||
|
- Call `shutdown()` on the timeline and remove it from the `Tenant` (as if we were about to delete it)
|
||||||
|
- Erase all the timeline's content from local storage (`remove_dir_all` on its path)
|
||||||
|
- Write the tenant manifest to S3 to prevent this timeline being loaded on next start.
|
||||||
|
|
||||||
|
#### Archive branch optimization (flattening)
|
||||||
|
|
||||||
|
When we offloaded a branch, it might have had some history that prevented rewriting it to a single
|
||||||
|
point in time set of image layers. For example, a branch might have several days of writes and a 7
|
||||||
|
day PITR: when we archive it, it still has those days of history.
|
||||||
|
|
||||||
|
Once the PITR has expired, we have an opportunity to reduce the physical footprint of the branch by:
|
||||||
|
- Writing compressed image layers within the archived branch, as these are more efficient as a way of storing
|
||||||
|
a point in time compared with delta layers
|
||||||
|
- Updating the branch's offload metadata to indicate that this branch no longer depends on its ancestor
|
||||||
|
for data, i.e. the ancestor is free to GC layers files at+below the branch point
|
||||||
|
|
||||||
|
Fully compacting an archived branch into image layers at a single LSN may be thought of as *flattening* the
|
||||||
|
branch, such that it is now a one-dimensional keyspace rather than a two-dimensional key/lsn space. It becomes
|
||||||
|
a true snapshot at that LSN.
|
||||||
|
|
||||||
|
It is not always more efficient to flatten a branch than to keep some extra history on the parent: this
|
||||||
|
is described in more detail in [optimizations](#delaying-storage-optimization-if-retaining-parent-layers-is-cheaper)
|
||||||
|
|
||||||
|
Archive branch optimization should be done _before_ background offloads during compaction, because there may
|
||||||
|
be timelines which are ready to be offloaded but also would benefit from the optimization step before
|
||||||
|
being offloaded. For example, a branch which has already fallen out of PITR window and has no history
|
||||||
|
of its own may be immediately re-written as a series of image layers before being offloaded.
|
||||||
|
|
||||||
|
### Consumption metrics
|
||||||
|
|
||||||
|
Archived timelines and offloaded timelines will be excluded from the synthetic size calculation, in anticipating
|
||||||
|
that billing structures based on consumption metrics are highly likely to apply different $/GB rates to archived
|
||||||
|
vs. ordinary content.
|
||||||
|
|
||||||
|
Archived and offloaded timelines' logical size will be reported under the existing `timeline_logical_size`
|
||||||
|
variant of `MetricsKey`: receivers are then free to bill on this metric as they please.
|
||||||
|
|
||||||
|
### Secondary locations
|
||||||
|
|
||||||
|
Archived timelines (including offloaded timelines) will be excluded from heatmaps, and thereby
|
||||||
|
when a timeline is archived, after the next cycle of heatmap upload & secondary download, its contents
|
||||||
|
will be dropped from secondary locations.
|
||||||
|
|
||||||
|
### Sharding
|
||||||
|
|
||||||
|
Archiving or activating a timeline will be done symmetrically across all shards in a tenant, in
|
||||||
|
the same way that timeline creation and deletion is done. There are no special rules about ordering:
|
||||||
|
the storage controller may dispatch concurrent calls to all shards when archiving or activating a timeline.
|
||||||
|
|
||||||
|
Since consumption metrics are only transmitted from shard zero, the state of archival on this shard
|
||||||
|
will be authoritative for consumption metrics.
|
||||||
|
|
||||||
|
## Error cases
|
||||||
|
|
||||||
|
### Errors in sharded tenants
|
||||||
|
|
||||||
|
If one shard in a tenant fails an operation but others succeed, the tenant may end up in a mixed
|
||||||
|
state, where a timeline is archived on some shards but not on others.
|
||||||
|
|
||||||
|
We will not bother implementing a rollback mechanism for this: errors in archiving/activating a timeline
|
||||||
|
are either transient (e.g. S3 unavailable, shutting down), or the fault of the caller (NotFound, BadRequest).
|
||||||
|
In the transient case callers are expected to retry until success, or to make appropriate API calls to clear
|
||||||
|
up their mistake. We rely on this good behavior of callers to eventually get timelines into a consistent
|
||||||
|
state across all shards. If callers do leave a timeline in an inconsistent state across shards, this doesn't
|
||||||
|
break anything, it's just "weird".
|
||||||
|
|
||||||
|
This is similar to the status quo for timeline creation and deletion: callers are expected to retry
|
||||||
|
these operations until they succeed.
|
||||||
|
|
||||||
|
### Archiving/activating
|
||||||
|
|
||||||
|
Archiving/activating a timeline can fail in a limited number of ways:
|
||||||
|
1. I/O error storing/reading the timeline's updated index
|
||||||
|
- These errors are always retryable: a fundamental design assumption of the pageserver is that remote
|
||||||
|
storage errors are always transient.
|
||||||
|
2. NotFound if the timeline doesn't exist
|
||||||
|
- Callers of the API are expected to avoid calling deletion and archival APIs concurrently.
|
||||||
|
- The storage controller has runtime locking to prevent races such as deleting a timeline while
|
||||||
|
archiving it.
|
||||||
|
3. BadRequest if the rules around ancestors/descendents of archived timelines would be violated
|
||||||
|
- Callers are expected to do their own checks to avoid hitting this case. If they make
|
||||||
|
a mistake and encounter this error, they should give up.
|
||||||
|
|
||||||
|
### Offloading
|
||||||
|
|
||||||
|
Offloading can only fail if remote storage is unavailable, which would prevent us from writing the
|
||||||
|
tenant manifest. In such error cases, we give up in the expectation that offloading will be tried
|
||||||
|
again at the next iteration of the compaction loop.
|
||||||
|
|
||||||
|
### Archive branch optimization
|
||||||
|
|
||||||
|
Optimization is a special form of compaction, so can encounter all the same errors as regular compaction
|
||||||
|
can: it should return Result<(), CompactionError>, and as with compaction it will be retried on
|
||||||
|
the next iteration of the compaction loop.
|
||||||
|
|
||||||
|
## Optimizations
|
||||||
|
|
||||||
|
### Delaying storage optimization if retaining parent layers is cheaper
|
||||||
|
|
||||||
|
Optimizing archived branches to image layers and thereby enabling parent branch GC to progress
|
||||||
|
is a safe default: archived branches cannot over-fill a pageserver's local disk, and once they
|
||||||
|
are offloaded to S3 they're totally safe, inert things.
|
||||||
|
|
||||||
|
However, in some cases it can be advantageous to retain extra history on their parent branch rather
|
||||||
|
than flattening the archived branch. For example, if a 1TB parent branch is rather slow-changing (1GB
|
||||||
|
of data per day), and archive branches are being created nightly, then writing out full 1TB image layers
|
||||||
|
for each nightly branch is inefficient compared with just keeping more history on the main branch.
|
||||||
|
|
||||||
|
Getting this right requires consideration of:
|
||||||
|
- Compaction: if keeping more history on the main branch is going to prompt the main branch's compaction to
|
||||||
|
write out extra image layers, then it might make more sense to just write out the image layers on
|
||||||
|
the archived branch.
|
||||||
|
- Metadata bloat: keeping extra history on a parent branch doesn't just cost GB of storage, it makes
|
||||||
|
the layer map (and index_part) bigger. There are practical limits beyond which writing an indefinitely
|
||||||
|
large layer map can cause problems elsewhere.
|
||||||
|
|
||||||
|
This optimization can probably be implemented quite cheaply with some basic heuristics like:
|
||||||
|
- don't bother doing optimization on an archive branch if the LSN distance between
|
||||||
|
its branch point and the end of the PITR window is <5% of the logical size of the archive branch.
|
||||||
|
- ...but, Don't keep more history on the main branch than double the PITR
|
||||||
|
|
||||||
|
### Creating a timeline in archived state (a snapshot)
|
||||||
|
|
||||||
|
Sometimes, one might want to create a branch with no history, which will not be written to
|
||||||
|
before it is archived. This is a snapshot, although we do not require a special snapshot API,
|
||||||
|
since a snapshot can be represented as a timeline with no history.
|
||||||
|
|
||||||
|
This can be accomplished by simply creating a timeline and then immediately archiving it, but
|
||||||
|
that is somewhat wasteful: this timeline it will spin up various tasks and open a connection to the storage
|
||||||
|
broker to try and ingest WAL, before being shutdown in the subsequent archival call. To explicitly
|
||||||
|
support this common special case, we may add a parameter to the timeline creation API which
|
||||||
|
creates a timeline directly into the archived state.
|
||||||
|
|
||||||
|
Such a timeline creation will do exactly two I/Os at creation time:
|
||||||
|
- write the index_part object to record the timeline's existence
|
||||||
|
- when the timeline is offloaded in the next iteration of the compaction loop (~20s later),
|
||||||
|
write the tenant manifest.
|
||||||
|
|
||||||
|
Later, when the timeline falls off the end of the PITR interval, the usual offload logic will wake
|
||||||
|
up the 'snapshot' branch and write out image layers.
|
||||||
|
|
||||||
|
## Future Work
|
||||||
|
|
||||||
|
### Enabling `fullbackup` dumps from archive branches
|
||||||
|
|
||||||
|
It would be useful to be able to export an archive branch to another system, or for use in a local
|
||||||
|
postgres database.
|
||||||
|
|
||||||
|
This could be implemented as a general capability for all branches, in which case it would "just work"
|
||||||
|
for archive branches by activating them. However, downloading all the layers in a branch just to generate
|
||||||
|
a fullbackup is a bit inefficient: we could implement a special case for flattened archived branches
|
||||||
|
which streams image layers from S3 and outputs the fullbackup stream without writing the layers out to disk.
|
||||||
|
|
||||||
|
Implementing `fullbackup` is a bit more complicated than this because of sharding, but solving that problem
|
||||||
|
is unrelated to the topic of archived branches (it probably involves having each shard write out a fullbackup
|
||||||
|
stream to S3 in an intermediate format and, then having one node stitch them together).
|
||||||
|
|
||||||
|
### Tagging layers from archived branches
|
||||||
|
|
||||||
|
When we know a layer is an image layer written for an archived branch that has fallen off the PITR window,
|
||||||
|
we may add tags to the S3 objects to enable writing lifecycle policies that transition such layers to even
|
||||||
|
cheaper storage.
|
||||||
|
|
||||||
|
This could be done for all archived layers, or it could be driven by the archival API, to give the pageserver
|
||||||
|
external hints on which branches are likely to be reactivated, and which branches are good candidates for
|
||||||
|
tagging for low performance storage.
|
||||||
|
|
||||||
|
Tagging+lifecycles is just one mechanism: one might also directly use S3 storage classes. Other clouds' object
|
||||||
|
stores have similar mechanisms.
|
||||||
|
|
||||||
|
### Storing sequences of archive branches as deltas
|
||||||
|
|
||||||
|
When archived branches are used as scheduled snapshots, we could store them even more efficiently
|
||||||
|
by encoding them as deltas relative to each other (i.e. for nightly snapshots, when we do the
|
||||||
|
storage optimization for Tuesday's snapshot, we would read Monday's snapshot and store only the modified
|
||||||
|
pages). This is the kind of encoding that many backup storage systems use.
|
||||||
|
|
||||||
|
The utility of this depends a lot on the churn rate of the data, and the cost of doing the delta encoding
|
||||||
|
vs. just writing out a simple stream of the entire database. For smaller databases, writing out a full
|
||||||
|
copy is pretty trivial (e.g. writing a compressed copy of a 10GiB database to S3 can take under 10 seconds,
|
||||||
|
so the complexity tradeoff of diff-encoding it is dubious).
|
||||||
|
|
||||||
|
One does not necessarily have to read-back the previous snapshot in order to encoded the next one: if the
|
||||||
|
pageserver knows about the schedule, it can intentionally retain extra history on the main branch so that
|
||||||
|
we can say: "A branch exists from Monday night. I have Monday night's data still active in the main branch,
|
||||||
|
so now I can read at the Monday LSN and the Tuesday LSN, calculate the delta, and store it as Tuesday's
|
||||||
|
delta snapshot".
|
||||||
|
|
||||||
|
Clearly this all requires careful housekeeping to retain the relationship between branches that depend on
|
||||||
|
each other: perhaps this would be done by making the archive branches have child/parent relationships with
|
||||||
|
each other, or perhaps we would permit them to remain children of their original parent, but additionally
|
||||||
|
have a relationship with the snapshot they're encoded relative to.
|
||||||
|
|
||||||
|
Activating a branch that is diff-encoded may require activating several earlier branches too, so figuring
|
||||||
|
out how frequently to write a full copy is important. This is essentially a zoomed-out version of what
|
||||||
|
we do with delta layers and image layers within a timeline, except each "layer" is a whole timeline.
|
||||||
|
|
||||||
|
|
||||||
|
## FAQ/Alternatives
|
||||||
|
|
||||||
|
### Store all timelines in the tenant manifest
|
||||||
|
|
||||||
|
Rather than special-casing offloaded timelines in the offload manifest, we could store a total
|
||||||
|
manifest of all timelines, eliminating the need for the pageserver to list timelines in S3 on
|
||||||
|
startup.
|
||||||
|
|
||||||
|
That would be a more invasive change (require hooking in to timeline creation), and would
|
||||||
|
generate much more I/O to this manifest for tenants that had many branches _and_ frequent
|
||||||
|
create/delete cycles for short lived branches. Restricting the manifest to offloaded timelines
|
||||||
|
means that we only have to cope with the rate at which long-lived timelines are archived, rather
|
||||||
|
than the rate at which sort lived timelines are created & destroyed.
|
||||||
|
|
||||||
|
### Automatically archiving/activating timelines without external API calls
|
||||||
|
|
||||||
|
We could implement TTL driven offload of timelines, waking them up when a page request
|
||||||
|
arrives.
|
||||||
|
|
||||||
|
This has downsides:
|
||||||
|
- Opacity: if we do TTL-driven offload inside the pageserver, then the end user doesn't
|
||||||
|
know which of their branches are in this state, and might get a surprise when they try
|
||||||
|
to use such a branch.
|
||||||
|
- Price fluctuation: if the archival of a branch is used in end user pricing, then users
|
||||||
|
prefer clarity & consistency. Ideally a branch's storage should cost the same from the moment it
|
||||||
|
is created, rather than having a usage-dependency storage price.
|
||||||
|
- Complexity: enabling the page service to call up into the Tenant to activate a timeline
|
||||||
|
would be awkward, compared with an external entry point.
|
||||||
|
|
||||||
|
### Make offloaded a state of Timeline
|
||||||
|
|
||||||
|
To reduce the operator-facing complexity of having some timelines APIs that only return
|
||||||
|
non-offloaded timelines, we could build the offloaded state into the Timeline type.
|
||||||
|
|
||||||
|
`timeline.rs` is already one of the most egregiously long source files in the tree, so
|
||||||
|
this is rejected on the basis that we need to avoid making that complexity worse.
|
||||||
@@ -44,7 +44,7 @@ If you need to modify the database schema, here’s how to create a migration:
|
|||||||
- Use `diesel migration generate <name>` to create a new migration
|
- Use `diesel migration generate <name>` to create a new migration
|
||||||
- Populate the SQL files in the `migrations/` subdirectory
|
- Populate the SQL files in the `migrations/` subdirectory
|
||||||
- Use `DATABASE_URL=... diesel migration run` to apply the migration you just wrote: this will update the `[schema.rs](http://schema.rs)` file automatically.
|
- Use `DATABASE_URL=... diesel migration run` to apply the migration you just wrote: this will update the `[schema.rs](http://schema.rs)` file automatically.
|
||||||
- This requires a running database: the easiest way to do that is to just run `cargo neon init ; cargo neon start`, which will leave a database available at `postgresql://localhost:1235/attachment_service`
|
- This requires a running database: the easiest way to do that is to just run `cargo neon init ; cargo neon start`, which will leave a database available at `postgresql://localhost:1235/storage_controller`
|
||||||
- Commit the migration files and the changes to schema.rs
|
- Commit the migration files and the changes to schema.rs
|
||||||
- If you need to iterate, you can rewind migrations with `diesel migration revert -a` and then `diesel migration run` again.
|
- If you need to iterate, you can rewind migrations with `diesel migration revert -a` and then `diesel migration run` again.
|
||||||
- The migrations are build into the storage controller binary, and automatically run at startup after it is deployed, so once you’ve committed a migration no further steps are needed.
|
- The migrations are build into the storage controller binary, and automatically run at startup after it is deployed, so once you’ve committed a migration no further steps are needed.
|
||||||
|
|||||||
@@ -21,9 +21,9 @@ implementation where we keep more data than we would need to, do not
|
|||||||
change the synthetic size or incur any costs to the user.
|
change the synthetic size or incur any costs to the user.
|
||||||
|
|
||||||
The synthetic size is calculated for the whole project. It is not
|
The synthetic size is calculated for the whole project. It is not
|
||||||
straightforward to attribute size to individual branches. See "What is
|
straightforward to attribute size to individual branches. See [What is
|
||||||
the size of an individual branch?" for discussion on those
|
the size of an individual branch?](#what-is-the-size-of-an-individual-branch)
|
||||||
difficulties.
|
for a discussion of those difficulties.
|
||||||
|
|
||||||
The synthetic size is designed to:
|
The synthetic size is designed to:
|
||||||
|
|
||||||
@@ -40,8 +40,9 @@ The synthetic size is designed to:
|
|||||||
- logical size is the size of a branch *at a given point in
|
- logical size is the size of a branch *at a given point in
|
||||||
time*. It's the total size of all tables in all databases, as you
|
time*. It's the total size of all tables in all databases, as you
|
||||||
see with "\l+" in psql for example, plus the Postgres SLRUs and some
|
see with "\l+" in psql for example, plus the Postgres SLRUs and some
|
||||||
small amount of metadata. NOTE that currently, Neon does not include
|
small amount of metadata. Note that currently, Neon does not include
|
||||||
the SLRUs and metadata in the logical size. See comment to `get_current_logical_size_non_incremental()`.
|
the SLRUs and metadata in the logical size. Refer to the comment in
|
||||||
|
[`get_current_logical_size_non_incremental()`](/pageserver/src/pgdatadir_mapping.rs#L813-L814).
|
||||||
|
|
||||||
- a "point in time" is defined as an LSN value. You can convert a
|
- a "point in time" is defined as an LSN value. You can convert a
|
||||||
timestamp to an LSN, but the storage internally works with LSNs.
|
timestamp to an LSN, but the storage internally works with LSNs.
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
/// Request/response types for the storage controller
|
/// Request/response types for the storage controller
|
||||||
/// API (`/control/v1` prefix). Implemented by the server
|
/// API (`/control/v1` prefix). Implemented by the server
|
||||||
@@ -87,7 +89,7 @@ pub struct TenantLocateResponse {
|
|||||||
pub shard_params: ShardParameters,
|
pub shard_params: ShardParameters,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
pub struct TenantDescribeResponse {
|
pub struct TenantDescribeResponse {
|
||||||
pub tenant_id: TenantId,
|
pub tenant_id: TenantId,
|
||||||
pub shards: Vec<TenantDescribeResponseShard>,
|
pub shards: Vec<TenantDescribeResponseShard>,
|
||||||
@@ -110,7 +112,7 @@ pub struct NodeDescribeResponse {
|
|||||||
pub listen_pg_port: u16,
|
pub listen_pg_port: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
pub struct TenantDescribeResponseShard {
|
pub struct TenantDescribeResponseShard {
|
||||||
pub tenant_shard_id: TenantShardId,
|
pub tenant_shard_id: TenantShardId,
|
||||||
|
|
||||||
@@ -150,11 +152,16 @@ impl UtilizationScore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
|
#[derive(Serialize, Clone, Copy, Debug)]
|
||||||
#[serde(into = "NodeAvailabilityWrapper")]
|
#[serde(into = "NodeAvailabilityWrapper")]
|
||||||
pub enum NodeAvailability {
|
pub enum NodeAvailability {
|
||||||
// Normal, happy state
|
// Normal, happy state
|
||||||
Active(UtilizationScore),
|
Active(UtilizationScore),
|
||||||
|
// Node is warming up, but we expect it to become available soon. Covers
|
||||||
|
// the time span between the re-attach response being composed on the storage controller
|
||||||
|
// and the first successful heartbeat after the processing of the re-attach response
|
||||||
|
// finishes on the pageserver.
|
||||||
|
WarmingUp(Instant),
|
||||||
// Offline: Tenants shouldn't try to attach here, but they may assume that their
|
// Offline: Tenants shouldn't try to attach here, but they may assume that their
|
||||||
// secondary locations on this node still exist. Newly added nodes are in this
|
// secondary locations on this node still exist. Newly added nodes are in this
|
||||||
// state until we successfully contact them.
|
// state until we successfully contact them.
|
||||||
@@ -164,7 +171,10 @@ pub enum NodeAvailability {
|
|||||||
impl PartialEq for NodeAvailability {
|
impl PartialEq for NodeAvailability {
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
use NodeAvailability::*;
|
use NodeAvailability::*;
|
||||||
matches!((self, other), (Active(_), Active(_)) | (Offline, Offline))
|
matches!(
|
||||||
|
(self, other),
|
||||||
|
(Active(_), Active(_)) | (Offline, Offline) | (WarmingUp(_), WarmingUp(_))
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -176,6 +186,7 @@ impl Eq for NodeAvailability {}
|
|||||||
#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
|
#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
|
||||||
pub enum NodeAvailabilityWrapper {
|
pub enum NodeAvailabilityWrapper {
|
||||||
Active,
|
Active,
|
||||||
|
WarmingUp,
|
||||||
Offline,
|
Offline,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -185,6 +196,7 @@ impl From<NodeAvailabilityWrapper> for NodeAvailability {
|
|||||||
// Assume the worst utilisation score to begin with. It will later be updated by
|
// Assume the worst utilisation score to begin with. It will later be updated by
|
||||||
// the heartbeats.
|
// the heartbeats.
|
||||||
NodeAvailabilityWrapper::Active => NodeAvailability::Active(UtilizationScore::worst()),
|
NodeAvailabilityWrapper::Active => NodeAvailability::Active(UtilizationScore::worst()),
|
||||||
|
NodeAvailabilityWrapper::WarmingUp => NodeAvailability::WarmingUp(Instant::now()),
|
||||||
NodeAvailabilityWrapper::Offline => NodeAvailability::Offline,
|
NodeAvailabilityWrapper::Offline => NodeAvailability::Offline,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -194,6 +206,7 @@ impl From<NodeAvailability> for NodeAvailabilityWrapper {
|
|||||||
fn from(val: NodeAvailability) -> Self {
|
fn from(val: NodeAvailability) -> Self {
|
||||||
match val {
|
match val {
|
||||||
NodeAvailability::Active(_) => NodeAvailabilityWrapper::Active,
|
NodeAvailability::Active(_) => NodeAvailabilityWrapper::Active,
|
||||||
|
NodeAvailability::WarmingUp(_) => NodeAvailabilityWrapper::WarmingUp,
|
||||||
NodeAvailability::Offline => NodeAvailabilityWrapper::Offline,
|
NodeAvailability::Offline => NodeAvailabilityWrapper::Offline,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -282,6 +295,42 @@ pub enum PlacementPolicy {
|
|||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
pub struct TenantShardMigrateResponse {}
|
pub struct TenantShardMigrateResponse {}
|
||||||
|
|
||||||
|
/// Metadata health record posted from scrubber.
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
pub struct MetadataHealthRecord {
|
||||||
|
pub tenant_shard_id: TenantShardId,
|
||||||
|
pub healthy: bool,
|
||||||
|
pub last_scrubbed_at: chrono::DateTime<chrono::Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
pub struct MetadataHealthUpdateRequest {
|
||||||
|
pub healthy_tenant_shards: HashSet<TenantShardId>,
|
||||||
|
pub unhealthy_tenant_shards: HashSet<TenantShardId>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
pub struct MetadataHealthUpdateResponse {}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
|
||||||
|
pub struct MetadataHealthListUnhealthyResponse {
|
||||||
|
pub unhealthy_tenant_shards: Vec<TenantShardId>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
|
||||||
|
pub struct MetadataHealthListOutdatedRequest {
|
||||||
|
#[serde(with = "humantime_serde")]
|
||||||
|
pub not_scrubbed_for: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
|
||||||
|
pub struct MetadataHealthListOutdatedResponse {
|
||||||
|
pub health_records: Vec<MetadataHealthRecord>,
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
@@ -107,7 +107,10 @@ impl Key {
|
|||||||
/// As long as Neon does not support tablespace (because of lack of access to local file system),
|
/// As long as Neon does not support tablespace (because of lack of access to local file system),
|
||||||
/// we can assume that only some predefined namespace OIDs are used which can fit in u16
|
/// we can assume that only some predefined namespace OIDs are used which can fit in u16
|
||||||
pub fn to_i128(&self) -> i128 {
|
pub fn to_i128(&self) -> i128 {
|
||||||
assert!(self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222);
|
assert!(
|
||||||
|
self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222,
|
||||||
|
"invalid key: {self}",
|
||||||
|
);
|
||||||
(((self.field1 & 0x7F) as i128) << 120)
|
(((self.field1 & 0x7F) as i128) << 120)
|
||||||
| (((self.field2 & 0xFFFF) as i128) << 104)
|
| (((self.field2 & 0xFFFF) as i128) << 104)
|
||||||
| ((self.field3 as i128) << 72)
|
| ((self.field3 as i128) << 72)
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ pub mod utilization;
|
|||||||
pub use utilization::PageserverUtilization;
|
pub use utilization::PageserverUtilization;
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
borrow::Cow,
|
|
||||||
collections::HashMap,
|
collections::HashMap,
|
||||||
io::{BufRead, Read},
|
io::{BufRead, Read},
|
||||||
num::{NonZeroU64, NonZeroUsize},
|
num::{NonZeroU64, NonZeroUsize},
|
||||||
@@ -20,7 +19,6 @@ use serde::{Deserialize, Serialize};
|
|||||||
use serde_with::serde_as;
|
use serde_with::serde_as;
|
||||||
use utils::{
|
use utils::{
|
||||||
completion,
|
completion,
|
||||||
history_buffer::HistoryBufferWithDropCounter,
|
|
||||||
id::{NodeId, TenantId, TimelineId},
|
id::{NodeId, TenantId, TimelineId},
|
||||||
lsn::Lsn,
|
lsn::Lsn,
|
||||||
serde_system_time,
|
serde_system_time,
|
||||||
@@ -294,7 +292,6 @@ pub struct TenantConfig {
|
|||||||
pub walreceiver_connect_timeout: Option<String>,
|
pub walreceiver_connect_timeout: Option<String>,
|
||||||
pub lagging_wal_timeout: Option<String>,
|
pub lagging_wal_timeout: Option<String>,
|
||||||
pub max_lsn_wal_lag: Option<NonZeroU64>,
|
pub max_lsn_wal_lag: Option<NonZeroU64>,
|
||||||
pub trace_read_requests: Option<bool>,
|
|
||||||
pub eviction_policy: Option<EvictionPolicy>,
|
pub eviction_policy: Option<EvictionPolicy>,
|
||||||
pub min_resident_size_override: Option<u64>,
|
pub min_resident_size_override: Option<u64>,
|
||||||
pub evictions_low_residence_duration_metric_threshold: Option<String>,
|
pub evictions_low_residence_duration_metric_threshold: Option<String>,
|
||||||
@@ -640,6 +637,13 @@ pub struct TenantInfo {
|
|||||||
pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
|
pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
|
||||||
pub attachment_status: TenantAttachmentStatus,
|
pub attachment_status: TenantAttachmentStatus,
|
||||||
pub generation: u32,
|
pub generation: u32,
|
||||||
|
|
||||||
|
/// Opaque explanation if gc is being blocked.
|
||||||
|
///
|
||||||
|
/// Only looked up for the individual tenant detail, not the listing. This is purely for
|
||||||
|
/// debugging, not included in openapi.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub gc_blocking: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Clone)]
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
@@ -652,6 +656,17 @@ pub struct TenantDetails {
|
|||||||
pub timelines: Vec<TimelineId>,
|
pub timelines: Vec<TimelineId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug)]
|
||||||
|
pub enum TimelineArchivalState {
|
||||||
|
Archived,
|
||||||
|
Unarchived,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||||
|
pub struct TimelineArchivalConfigRequest {
|
||||||
|
pub state: TimelineArchivalState,
|
||||||
|
}
|
||||||
|
|
||||||
/// This represents the output of the "timeline_detail" and "timeline_list" API calls.
|
/// This represents the output of the "timeline_detail" and "timeline_list" API calls.
|
||||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||||
pub struct TimelineInfo {
|
pub struct TimelineInfo {
|
||||||
@@ -716,58 +731,7 @@ pub struct LayerMapInfo {
|
|||||||
pub historic_layers: Vec<HistoricLayerInfo>,
|
pub historic_layers: Vec<HistoricLayerInfo>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy, Serialize, Deserialize, enum_map::Enum)]
|
/// The residence status of a layer
|
||||||
#[repr(usize)]
|
|
||||||
pub enum LayerAccessKind {
|
|
||||||
GetValueReconstructData,
|
|
||||||
Iter,
|
|
||||||
KeyIter,
|
|
||||||
Dump,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct LayerAccessStatFullDetails {
|
|
||||||
pub when_millis_since_epoch: u64,
|
|
||||||
pub task_kind: Cow<'static, str>,
|
|
||||||
pub access_kind: LayerAccessKind,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An event that impacts the layer's residence status.
|
|
||||||
#[serde_as]
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct LayerResidenceEvent {
|
|
||||||
/// The time when the event occurred.
|
|
||||||
/// NB: this timestamp is captured while the residence status changes.
|
|
||||||
/// So, it might be behind/ahead of the actual residence change by a short amount of time.
|
|
||||||
///
|
|
||||||
#[serde(rename = "timestamp_millis_since_epoch")]
|
|
||||||
#[serde_as(as = "serde_with::TimestampMilliSeconds")]
|
|
||||||
pub timestamp: SystemTime,
|
|
||||||
/// The new residence status of the layer.
|
|
||||||
pub status: LayerResidenceStatus,
|
|
||||||
/// The reason why we had to record this event.
|
|
||||||
pub reason: LayerResidenceEventReason,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The reason for recording a given [`LayerResidenceEvent`].
|
|
||||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
|
||||||
pub enum LayerResidenceEventReason {
|
|
||||||
/// The layer map is being populated, e.g. during timeline load or attach.
|
|
||||||
/// This includes [`RemoteLayer`] objects created in [`reconcile_with_remote`].
|
|
||||||
/// We need to record such events because there is no persistent storage for the events.
|
|
||||||
///
|
|
||||||
// https://github.com/rust-lang/rust/issues/74481
|
|
||||||
/// [`RemoteLayer`]: ../../tenant/storage_layer/struct.RemoteLayer.html
|
|
||||||
/// [`reconcile_with_remote`]: ../../tenant/struct.Timeline.html#method.reconcile_with_remote
|
|
||||||
LayerLoad,
|
|
||||||
/// We just created the layer (e.g., freeze_and_flush or compaction).
|
|
||||||
/// Such layers are always [`LayerResidenceStatus::Resident`].
|
|
||||||
LayerCreate,
|
|
||||||
/// We on-demand downloaded or evicted the given layer.
|
|
||||||
ResidenceChange,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The residence status of the layer, after the given [`LayerResidenceEvent`].
|
|
||||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||||
pub enum LayerResidenceStatus {
|
pub enum LayerResidenceStatus {
|
||||||
/// Residence status for a layer file that exists locally.
|
/// Residence status for a layer file that exists locally.
|
||||||
@@ -777,23 +741,16 @@ pub enum LayerResidenceStatus {
|
|||||||
Evicted,
|
Evicted,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LayerResidenceEvent {
|
#[serde_as]
|
||||||
pub fn new(status: LayerResidenceStatus, reason: LayerResidenceEventReason) -> Self {
|
|
||||||
Self {
|
|
||||||
status,
|
|
||||||
reason,
|
|
||||||
timestamp: SystemTime::now(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct LayerAccessStats {
|
pub struct LayerAccessStats {
|
||||||
pub access_count_by_access_kind: HashMap<LayerAccessKind, u64>,
|
#[serde_as(as = "serde_with::TimestampMilliSeconds")]
|
||||||
pub task_kind_access_flag: Vec<Cow<'static, str>>,
|
pub access_time: SystemTime,
|
||||||
pub first: Option<LayerAccessStatFullDetails>,
|
|
||||||
pub accesses_history: HistoryBufferWithDropCounter<LayerAccessStatFullDetails, 16>,
|
#[serde_as(as = "serde_with::TimestampMilliSeconds")]
|
||||||
pub residence_events_history: HistoryBufferWithDropCounter<LayerResidenceEvent, 16>,
|
pub residence_time: SystemTime,
|
||||||
|
|
||||||
|
pub visible: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@@ -990,6 +947,8 @@ pub struct TopTenantShardsResponse {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub mod virtual_file {
|
pub mod virtual_file {
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
#[derive(
|
#[derive(
|
||||||
Copy,
|
Copy,
|
||||||
Clone,
|
Clone,
|
||||||
@@ -1008,6 +967,53 @@ pub mod virtual_file {
|
|||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
TokioEpollUring,
|
TokioEpollUring,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Direct IO modes for a pageserver.
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
|
||||||
|
#[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
|
||||||
|
pub enum DirectIoMode {
|
||||||
|
/// Direct IO disabled (uses usual buffered IO).
|
||||||
|
#[default]
|
||||||
|
Disabled,
|
||||||
|
/// Direct IO disabled (performs checks and perf simulations).
|
||||||
|
Evaluate {
|
||||||
|
/// Alignment check level
|
||||||
|
alignment_check: DirectIoAlignmentCheckLevel,
|
||||||
|
/// Latency padded for performance simulation.
|
||||||
|
latency_padding: DirectIoLatencyPadding,
|
||||||
|
},
|
||||||
|
/// Direct IO enabled.
|
||||||
|
Enabled {
|
||||||
|
/// Actions to perform on alignment error.
|
||||||
|
on_alignment_error: DirectIoOnAlignmentErrorAction,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
|
||||||
|
#[serde(rename_all = "kebab-case")]
|
||||||
|
pub enum DirectIoAlignmentCheckLevel {
|
||||||
|
#[default]
|
||||||
|
Error,
|
||||||
|
Log,
|
||||||
|
None,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
|
||||||
|
#[serde(rename_all = "kebab-case")]
|
||||||
|
pub enum DirectIoOnAlignmentErrorAction {
|
||||||
|
Error,
|
||||||
|
#[default]
|
||||||
|
FallbackToBuffered,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
|
||||||
|
#[serde(tag = "type", rename_all = "kebab-case")]
|
||||||
|
pub enum DirectIoLatencyPadding {
|
||||||
|
/// Pad virtual file operations with IO to a fake file.
|
||||||
|
FakeFileRW { path: PathBuf },
|
||||||
|
#[default]
|
||||||
|
None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wrapped in libpq CopyData
|
// Wrapped in libpq CopyData
|
||||||
@@ -1477,6 +1483,7 @@ mod tests {
|
|||||||
current_physical_size: Some(42),
|
current_physical_size: Some(42),
|
||||||
attachment_status: TenantAttachmentStatus::Attached,
|
attachment_status: TenantAttachmentStatus::Attached,
|
||||||
generation: 1,
|
generation: 1,
|
||||||
|
gc_blocking: None,
|
||||||
};
|
};
|
||||||
let expected_active = json!({
|
let expected_active = json!({
|
||||||
"id": original_active.id.to_string(),
|
"id": original_active.id.to_string(),
|
||||||
@@ -1499,6 +1506,7 @@ mod tests {
|
|||||||
current_physical_size: Some(42),
|
current_physical_size: Some(42),
|
||||||
attachment_status: TenantAttachmentStatus::Attached,
|
attachment_status: TenantAttachmentStatus::Attached,
|
||||||
generation: 1,
|
generation: 1,
|
||||||
|
gc_blocking: None,
|
||||||
};
|
};
|
||||||
let expected_broken = json!({
|
let expected_broken = json!({
|
||||||
"id": original_broken.id.to_string(),
|
"id": original_broken.id.to_string(),
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use utils::id::TimelineId;
|
use utils::id::TimelineId;
|
||||||
|
|
||||||
#[derive(Default, serde::Serialize)]
|
#[derive(Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
|
||||||
pub struct AncestorDetached {
|
pub struct AncestorDetached {
|
||||||
pub reparented_timelines: Vec<TimelineId>,
|
pub reparented_timelines: HashSet<TimelineId>,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -144,7 +144,20 @@ impl PgConnectionConfig {
|
|||||||
// implement and this function is hardly a bottleneck. The function is only called around
|
// implement and this function is hardly a bottleneck. The function is only called around
|
||||||
// establishing a new connection.
|
// establishing a new connection.
|
||||||
#[allow(unstable_name_collisions)]
|
#[allow(unstable_name_collisions)]
|
||||||
config.options(&encode_options(&self.options));
|
config.options(
|
||||||
|
&self
|
||||||
|
.options
|
||||||
|
.iter()
|
||||||
|
.map(|s| {
|
||||||
|
if s.contains(['\\', ' ']) {
|
||||||
|
Cow::Owned(s.replace('\\', "\\\\").replace(' ', "\\ "))
|
||||||
|
} else {
|
||||||
|
Cow::Borrowed(s.as_str())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.intersperse(Cow::Borrowed(" ")) // TODO: use impl from std once it's stabilized
|
||||||
|
.collect::<String>(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
config
|
config
|
||||||
}
|
}
|
||||||
@@ -165,21 +178,6 @@ impl PgConnectionConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(unstable_name_collisions)]
|
|
||||||
fn encode_options(options: &[String]) -> String {
|
|
||||||
options
|
|
||||||
.iter()
|
|
||||||
.map(|s| {
|
|
||||||
if s.contains(['\\', ' ']) {
|
|
||||||
Cow::Owned(s.replace('\\', "\\\\").replace(' ', "\\ "))
|
|
||||||
} else {
|
|
||||||
Cow::Borrowed(s.as_str())
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.intersperse(Cow::Borrowed(" ")) // TODO: use impl from std once it's stabilized
|
|
||||||
.collect::<String>()
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for PgConnectionConfig {
|
impl fmt::Display for PgConnectionConfig {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
// The password is intentionally hidden and not part of this display string.
|
// The password is intentionally hidden and not part of this display string.
|
||||||
@@ -208,7 +206,7 @@ impl fmt::Debug for PgConnectionConfig {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests_pg_connection_config {
|
mod tests_pg_connection_config {
|
||||||
use crate::{encode_options, PgConnectionConfig};
|
use crate::PgConnectionConfig;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use url::Host;
|
use url::Host;
|
||||||
|
|
||||||
@@ -257,12 +255,18 @@ mod tests_pg_connection_config {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_with_options() {
|
fn test_with_options() {
|
||||||
let options = encode_options(&[
|
let cfg = PgConnectionConfig::new_host_port(STUB_HOST.clone(), 123).extend_options([
|
||||||
"hello".to_owned(),
|
"hello",
|
||||||
"world".to_owned(),
|
"world",
|
||||||
"with space".to_owned(),
|
"with space",
|
||||||
"and \\ backslashes".to_owned(),
|
"and \\ backslashes",
|
||||||
]);
|
]);
|
||||||
assert_eq!(options, "hello world with\\ space and\\ \\\\\\ backslashes");
|
assert_eq!(cfg.host(), &*STUB_HOST);
|
||||||
|
assert_eq!(cfg.port(), 123);
|
||||||
|
assert_eq!(cfg.raw_address(), "stub.host.example:123");
|
||||||
|
assert_eq!(
|
||||||
|
cfg.to_tokio_postgres_config().get_options(),
|
||||||
|
Some("hello world with\\ space and\\ \\\\\\ backslashes")
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ use anyhow::{bail, Result};
|
|||||||
use bytes::{Bytes, BytesMut};
|
use bytes::{Bytes, BytesMut};
|
||||||
|
|
||||||
/// Equivalent to sizeof(ControlFileData) in C
|
/// Equivalent to sizeof(ControlFileData) in C
|
||||||
const SIZEOF_CONTROLDATA: usize = std::mem::size_of::<ControlFileData>();
|
const SIZEOF_CONTROLDATA: usize = size_of::<ControlFileData>();
|
||||||
|
|
||||||
impl ControlFileData {
|
impl ControlFileData {
|
||||||
/// Compute the offset of the `crc` field within the `ControlFileData` struct.
|
/// Compute the offset of the `crc` field within the `ControlFileData` struct.
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ pub const SMGR_TRUNCATE_FSM: u32 = 0x0004;
|
|||||||
//
|
//
|
||||||
|
|
||||||
// Assumes 8 byte alignment
|
// Assumes 8 byte alignment
|
||||||
const SIZEOF_PAGE_HEADER_DATA: usize = std::mem::size_of::<PageHeaderData>();
|
const SIZEOF_PAGE_HEADER_DATA: usize = size_of::<PageHeaderData>();
|
||||||
pub const MAXALIGN_SIZE_OF_PAGE_HEADER_DATA: usize = (SIZEOF_PAGE_HEADER_DATA + 7) & !7;
|
pub const MAXALIGN_SIZE_OF_PAGE_HEADER_DATA: usize = (SIZEOF_PAGE_HEADER_DATA + 7) & !7;
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -191,7 +191,7 @@ pub const XLR_RMGR_INFO_MASK: u8 = 0xF0;
|
|||||||
pub const XLOG_TBLSPC_CREATE: u8 = 0x00;
|
pub const XLOG_TBLSPC_CREATE: u8 = 0x00;
|
||||||
pub const XLOG_TBLSPC_DROP: u8 = 0x10;
|
pub const XLOG_TBLSPC_DROP: u8 = 0x10;
|
||||||
|
|
||||||
pub const SIZEOF_XLOGRECORD: u32 = std::mem::size_of::<XLogRecord>() as u32;
|
pub const SIZEOF_XLOGRECORD: u32 = size_of::<XLogRecord>() as u32;
|
||||||
|
|
||||||
//
|
//
|
||||||
// from xlogrecord.h
|
// from xlogrecord.h
|
||||||
|
|||||||
@@ -42,9 +42,9 @@ pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
|
|||||||
pub const XLP_REM_LEN_OFFS: usize = 2 + 2 + 4 + 8;
|
pub const XLP_REM_LEN_OFFS: usize = 2 + 2 + 4 + 8;
|
||||||
pub const XLOG_RECORD_CRC_OFFS: usize = 4 + 4 + 8 + 1 + 1 + 2;
|
pub const XLOG_RECORD_CRC_OFFS: usize = 4 + 4 + 8 + 1 + 1 + 2;
|
||||||
|
|
||||||
pub const XLOG_SIZE_OF_XLOG_SHORT_PHD: usize = std::mem::size_of::<XLogPageHeaderData>();
|
pub const XLOG_SIZE_OF_XLOG_SHORT_PHD: usize = size_of::<XLogPageHeaderData>();
|
||||||
pub const XLOG_SIZE_OF_XLOG_LONG_PHD: usize = std::mem::size_of::<XLogLongPageHeaderData>();
|
pub const XLOG_SIZE_OF_XLOG_LONG_PHD: usize = size_of::<XLogLongPageHeaderData>();
|
||||||
pub const XLOG_SIZE_OF_XLOG_RECORD: usize = std::mem::size_of::<XLogRecord>();
|
pub const XLOG_SIZE_OF_XLOG_RECORD: usize = size_of::<XLogRecord>();
|
||||||
#[allow(clippy::identity_op)]
|
#[allow(clippy::identity_op)]
|
||||||
pub const SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT: usize = 1 * 2;
|
pub const SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT: usize = 1 * 2;
|
||||||
|
|
||||||
@@ -311,7 +311,7 @@ impl XLogLongPageHeaderData {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const SIZEOF_CHECKPOINT: usize = std::mem::size_of::<CheckPoint>();
|
pub const SIZEOF_CHECKPOINT: usize = size_of::<CheckPoint>();
|
||||||
|
|
||||||
impl CheckPoint {
|
impl CheckPoint {
|
||||||
pub fn encode(&self) -> Result<Bytes, SerializeError> {
|
pub fn encode(&self) -> Result<Bytes, SerializeError> {
|
||||||
|
|||||||
@@ -178,7 +178,7 @@ pub fn test_find_end_of_wal_last_crossing_segment() {
|
|||||||
/// currently 1024.
|
/// currently 1024.
|
||||||
#[test]
|
#[test]
|
||||||
pub fn test_update_next_xid() {
|
pub fn test_update_next_xid() {
|
||||||
let checkpoint_buf = [0u8; std::mem::size_of::<CheckPoint>()];
|
let checkpoint_buf = [0u8; size_of::<CheckPoint>()];
|
||||||
let mut checkpoint = CheckPoint::decode(&checkpoint_buf).unwrap();
|
let mut checkpoint = CheckPoint::decode(&checkpoint_buf).unwrap();
|
||||||
|
|
||||||
checkpoint.nextXid = FullTransactionId { value: 10 };
|
checkpoint.nextXid = FullTransactionId { value: 10 };
|
||||||
@@ -204,7 +204,7 @@ pub fn test_update_next_xid() {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
pub fn test_update_next_multixid() {
|
pub fn test_update_next_multixid() {
|
||||||
let checkpoint_buf = [0u8; std::mem::size_of::<CheckPoint>()];
|
let checkpoint_buf = [0u8; size_of::<CheckPoint>()];
|
||||||
let mut checkpoint = CheckPoint::decode(&checkpoint_buf).unwrap();
|
let mut checkpoint = CheckPoint::decode(&checkpoint_buf).unwrap();
|
||||||
|
|
||||||
// simple case
|
// simple case
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ license.workspace = true
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
async-trait.workspace = true
|
async-trait.workspace = true
|
||||||
|
async-stream.workspace = true
|
||||||
once_cell.workspace = true
|
once_cell.workspace = true
|
||||||
aws-smithy-async.workspace = true
|
aws-smithy-async.workspace = true
|
||||||
aws-smithy-types.workspace = true
|
aws-smithy-types.workspace = true
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ use std::time::SystemTime;
|
|||||||
use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
|
use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use azure_core::request_options::{MaxResults, Metadata, Range};
|
use azure_core::request_options::{MaxResults, Metadata, Range};
|
||||||
use azure_core::RetryOptions;
|
use azure_core::{Continuable, RetryOptions};
|
||||||
use azure_identity::DefaultAzureCredential;
|
use azure_identity::DefaultAzureCredential;
|
||||||
use azure_storage::StorageCredentials;
|
use azure_storage::StorageCredentials;
|
||||||
use azure_storage_blobs::blob::CopyStatus;
|
use azure_storage_blobs::blob::CopyStatus;
|
||||||
@@ -33,6 +33,7 @@ use tracing::debug;
|
|||||||
use utils::backoff;
|
use utils::backoff;
|
||||||
|
|
||||||
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
|
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
|
||||||
|
use crate::ListingObject;
|
||||||
use crate::{
|
use crate::{
|
||||||
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError, Listing,
|
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError, Listing,
|
||||||
ListingMode, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
|
ListingMode, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
|
||||||
@@ -40,6 +41,7 @@ use crate::{
|
|||||||
|
|
||||||
pub struct AzureBlobStorage {
|
pub struct AzureBlobStorage {
|
||||||
client: ContainerClient,
|
client: ContainerClient,
|
||||||
|
container_name: String,
|
||||||
prefix_in_container: Option<String>,
|
prefix_in_container: Option<String>,
|
||||||
max_keys_per_list_response: Option<NonZeroU32>,
|
max_keys_per_list_response: Option<NonZeroU32>,
|
||||||
concurrency_limiter: ConcurrencyLimiter,
|
concurrency_limiter: ConcurrencyLimiter,
|
||||||
@@ -85,6 +87,7 @@ impl AzureBlobStorage {
|
|||||||
|
|
||||||
Ok(AzureBlobStorage {
|
Ok(AzureBlobStorage {
|
||||||
client,
|
client,
|
||||||
|
container_name: azure_config.container_name.to_owned(),
|
||||||
prefix_in_container: azure_config.prefix_in_container.to_owned(),
|
prefix_in_container: azure_config.prefix_in_container.to_owned(),
|
||||||
max_keys_per_list_response,
|
max_keys_per_list_response,
|
||||||
concurrency_limiter: ConcurrencyLimiter::new(azure_config.concurrency_limit.get()),
|
concurrency_limiter: ConcurrencyLimiter::new(azure_config.concurrency_limit.get()),
|
||||||
@@ -238,6 +241,10 @@ impl AzureBlobStorage {
|
|||||||
_ = cancel.cancelled() => Err(Cancelled),
|
_ = cancel.cancelled() => Err(Cancelled),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn container_name(&self) -> &str {
|
||||||
|
&self.container_name
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn to_azure_metadata(metadata: StorageMetadata) -> Metadata {
|
fn to_azure_metadata(metadata: StorageMetadata) -> Metadata {
|
||||||
@@ -261,30 +268,30 @@ fn to_download_error(error: azure_core::Error) -> DownloadError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl RemoteStorage for AzureBlobStorage {
|
impl RemoteStorage for AzureBlobStorage {
|
||||||
async fn list(
|
fn list_streaming(
|
||||||
&self,
|
&self,
|
||||||
prefix: Option<&RemotePath>,
|
prefix: Option<&RemotePath>,
|
||||||
mode: ListingMode,
|
mode: ListingMode,
|
||||||
max_keys: Option<NonZeroU32>,
|
max_keys: Option<NonZeroU32>,
|
||||||
cancel: &CancellationToken,
|
cancel: &CancellationToken,
|
||||||
) -> anyhow::Result<Listing, DownloadError> {
|
) -> impl Stream<Item = Result<Listing, DownloadError>> {
|
||||||
let _permit = self.permit(RequestKind::List, cancel).await?;
|
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||||
|
let list_prefix = prefix
|
||||||
|
.map(|p| self.relative_path_to_name(p))
|
||||||
|
.or_else(|| self.prefix_in_container.clone())
|
||||||
|
.map(|mut p| {
|
||||||
|
// required to end with a separator
|
||||||
|
// otherwise request will return only the entry of a prefix
|
||||||
|
if matches!(mode, ListingMode::WithDelimiter)
|
||||||
|
&& !p.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR)
|
||||||
|
{
|
||||||
|
p.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||||
|
}
|
||||||
|
p
|
||||||
|
});
|
||||||
|
|
||||||
let op = async {
|
async_stream::stream! {
|
||||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
let _permit = self.permit(RequestKind::List, cancel).await?;
|
||||||
let list_prefix = prefix
|
|
||||||
.map(|p| self.relative_path_to_name(p))
|
|
||||||
.or_else(|| self.prefix_in_container.clone())
|
|
||||||
.map(|mut p| {
|
|
||||||
// required to end with a separator
|
|
||||||
// otherwise request will return only the entry of a prefix
|
|
||||||
if matches!(mode, ListingMode::WithDelimiter)
|
|
||||||
&& !p.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR)
|
|
||||||
{
|
|
||||||
p.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
|
||||||
}
|
|
||||||
p
|
|
||||||
});
|
|
||||||
|
|
||||||
let mut builder = self.client.list_blobs();
|
let mut builder = self.client.list_blobs();
|
||||||
|
|
||||||
@@ -300,21 +307,43 @@ impl RemoteStorage for AzureBlobStorage {
|
|||||||
builder = builder.max_results(MaxResults::new(limit));
|
builder = builder.max_results(MaxResults::new(limit));
|
||||||
}
|
}
|
||||||
|
|
||||||
let response = builder.into_stream();
|
let mut next_marker = None;
|
||||||
let response = response.into_stream().map_err(to_download_error);
|
|
||||||
let response = tokio_stream::StreamExt::timeout(response, self.timeout);
|
|
||||||
let response = response.map(|res| match res {
|
|
||||||
Ok(res) => res,
|
|
||||||
Err(_elapsed) => Err(DownloadError::Timeout),
|
|
||||||
});
|
|
||||||
|
|
||||||
let mut response = std::pin::pin!(response);
|
'outer: loop {
|
||||||
|
let mut builder = builder.clone();
|
||||||
|
if let Some(marker) = next_marker.clone() {
|
||||||
|
builder = builder.marker(marker);
|
||||||
|
}
|
||||||
|
let response = builder.into_stream();
|
||||||
|
let response = response.into_stream().map_err(to_download_error);
|
||||||
|
let response = tokio_stream::StreamExt::timeout(response, self.timeout);
|
||||||
|
let response = response.map(|res| match res {
|
||||||
|
Ok(res) => res,
|
||||||
|
Err(_elapsed) => Err(DownloadError::Timeout),
|
||||||
|
});
|
||||||
|
|
||||||
let mut res = Listing::default();
|
let mut response = std::pin::pin!(response);
|
||||||
|
|
||||||
let mut max_keys = max_keys.map(|mk| mk.get());
|
let mut max_keys = max_keys.map(|mk| mk.get());
|
||||||
while let Some(entry) = response.next().await {
|
let next_item = tokio::select! {
|
||||||
let entry = entry?;
|
op = response.next() => Ok(op),
|
||||||
|
_ = cancel.cancelled() => Err(DownloadError::Cancelled),
|
||||||
|
}?;
|
||||||
|
let Some(entry) = next_item else {
|
||||||
|
// The list is complete, so yield it.
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut res = Listing::default();
|
||||||
|
let entry = match entry {
|
||||||
|
Ok(entry) => entry,
|
||||||
|
Err(e) => {
|
||||||
|
// The error is potentially retryable, so we must rewind the loop after yielding.
|
||||||
|
yield Err(e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
next_marker = entry.continuation();
|
||||||
let prefix_iter = entry
|
let prefix_iter = entry
|
||||||
.blobs
|
.blobs
|
||||||
.prefixes()
|
.prefixes()
|
||||||
@@ -324,7 +353,12 @@ impl RemoteStorage for AzureBlobStorage {
|
|||||||
let blob_iter = entry
|
let blob_iter = entry
|
||||||
.blobs
|
.blobs
|
||||||
.blobs()
|
.blobs()
|
||||||
.map(|k| self.name_to_relative_path(&k.name));
|
.map(|k| ListingObject{
|
||||||
|
key: self.name_to_relative_path(&k.name),
|
||||||
|
last_modified: k.properties.last_modified.into(),
|
||||||
|
size: k.properties.content_length,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
for key in blob_iter {
|
for key in blob_iter {
|
||||||
res.keys.push(key);
|
res.keys.push(key);
|
||||||
@@ -333,19 +367,19 @@ impl RemoteStorage for AzureBlobStorage {
|
|||||||
assert!(mk > 0);
|
assert!(mk > 0);
|
||||||
mk -= 1;
|
mk -= 1;
|
||||||
if mk == 0 {
|
if mk == 0 {
|
||||||
return Ok(res); // limit reached
|
yield Ok(res); // limit reached
|
||||||
|
break 'outer;
|
||||||
}
|
}
|
||||||
max_keys = Some(mk);
|
max_keys = Some(mk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
yield Ok(res);
|
||||||
|
|
||||||
|
// We are done here
|
||||||
|
if next_marker.is_none() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(res)
|
|
||||||
};
|
|
||||||
|
|
||||||
tokio::select! {
|
|
||||||
res = op => res,
|
|
||||||
_ = cancel.cancelled() => Err(DownloadError::Cancelled),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ use anyhow::Context;
|
|||||||
use camino::{Utf8Path, Utf8PathBuf};
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use futures::stream::Stream;
|
use futures::{stream::Stream, StreamExt};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tokio::sync::Semaphore;
|
use tokio::sync::Semaphore;
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
@@ -144,15 +144,23 @@ impl RemotePath {
|
|||||||
///
|
///
|
||||||
/// The WithDelimiter mode will populate `prefixes` and `keys` in the result. The
|
/// The WithDelimiter mode will populate `prefixes` and `keys` in the result. The
|
||||||
/// NoDelimiter mode will only populate `keys`.
|
/// NoDelimiter mode will only populate `keys`.
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
pub enum ListingMode {
|
pub enum ListingMode {
|
||||||
WithDelimiter,
|
WithDelimiter,
|
||||||
NoDelimiter,
|
NoDelimiter,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, Debug)]
|
||||||
|
pub struct ListingObject {
|
||||||
|
pub key: RemotePath,
|
||||||
|
pub last_modified: SystemTime,
|
||||||
|
pub size: u64,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct Listing {
|
pub struct Listing {
|
||||||
pub prefixes: Vec<RemotePath>,
|
pub prefixes: Vec<RemotePath>,
|
||||||
pub keys: Vec<RemotePath>,
|
pub keys: Vec<ListingObject>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Storage (potentially remote) API to manage its state.
|
/// Storage (potentially remote) API to manage its state.
|
||||||
@@ -160,13 +168,18 @@ pub struct Listing {
|
|||||||
/// providing basic CRUD operations for storage files.
|
/// providing basic CRUD operations for storage files.
|
||||||
#[allow(async_fn_in_trait)]
|
#[allow(async_fn_in_trait)]
|
||||||
pub trait RemoteStorage: Send + Sync + 'static {
|
pub trait RemoteStorage: Send + Sync + 'static {
|
||||||
/// List objects in remote storage, with semantics matching AWS S3's ListObjectsV2.
|
/// List objects in remote storage, with semantics matching AWS S3's [`ListObjectsV2`].
|
||||||
/// (see `<https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html>`)
|
///
|
||||||
|
/// The stream is guaranteed to return at least one element, even in the case of errors
|
||||||
|
/// (in that case it's an `Err()`), or an empty `Listing`.
|
||||||
|
///
|
||||||
|
/// The stream is not ending if it returns an error, as long as [`is_permanent`] returns false on the error.
|
||||||
|
/// The `next` function can be retried, and maybe in a future retry, there will be success.
|
||||||
///
|
///
|
||||||
/// Note that the prefix is relative to any `prefix_in_bucket` configured for the client, not
|
/// Note that the prefix is relative to any `prefix_in_bucket` configured for the client, not
|
||||||
/// from the absolute root of the bucket.
|
/// from the absolute root of the bucket.
|
||||||
///
|
///
|
||||||
/// `mode` configures whether to use a delimiter. Without a delimiter all keys
|
/// `mode` configures whether to use a delimiter. Without a delimiter, all keys
|
||||||
/// within the prefix are listed in the `keys` of the result. With a delimiter, any "directories" at the top level of
|
/// within the prefix are listed in the `keys` of the result. With a delimiter, any "directories" at the top level of
|
||||||
/// the prefix are returned in the `prefixes` of the result, and keys in the top level of the prefix are
|
/// the prefix are returned in the `prefixes` of the result, and keys in the top level of the prefix are
|
||||||
/// returned in `keys` ().
|
/// returned in `keys` ().
|
||||||
@@ -175,13 +188,32 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
|||||||
/// will iteratively call listobjects until it runs out of keys. Note that this is not safe to use on
|
/// will iteratively call listobjects until it runs out of keys. Note that this is not safe to use on
|
||||||
/// unlimted size buckets, as the full list of objects is allocated into a monolithic data structure.
|
/// unlimted size buckets, as the full list of objects is allocated into a monolithic data structure.
|
||||||
///
|
///
|
||||||
|
/// [`ListObjectsV2`]: <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html>
|
||||||
|
/// [`is_permanent`]: DownloadError::is_permanent
|
||||||
|
fn list_streaming(
|
||||||
|
&self,
|
||||||
|
prefix: Option<&RemotePath>,
|
||||||
|
mode: ListingMode,
|
||||||
|
max_keys: Option<NonZeroU32>,
|
||||||
|
cancel: &CancellationToken,
|
||||||
|
) -> impl Stream<Item = Result<Listing, DownloadError>> + Send;
|
||||||
|
|
||||||
async fn list(
|
async fn list(
|
||||||
&self,
|
&self,
|
||||||
prefix: Option<&RemotePath>,
|
prefix: Option<&RemotePath>,
|
||||||
_mode: ListingMode,
|
mode: ListingMode,
|
||||||
max_keys: Option<NonZeroU32>,
|
max_keys: Option<NonZeroU32>,
|
||||||
cancel: &CancellationToken,
|
cancel: &CancellationToken,
|
||||||
) -> Result<Listing, DownloadError>;
|
) -> Result<Listing, DownloadError> {
|
||||||
|
let mut stream = std::pin::pin!(self.list_streaming(prefix, mode, max_keys, cancel));
|
||||||
|
let mut combined = stream.next().await.expect("At least one item required")?;
|
||||||
|
while let Some(list) = stream.next().await {
|
||||||
|
let list = list?;
|
||||||
|
combined.keys.extend(list.keys.into_iter());
|
||||||
|
combined.prefixes.extend_from_slice(&list.prefixes);
|
||||||
|
}
|
||||||
|
Ok(combined)
|
||||||
|
}
|
||||||
|
|
||||||
/// Streams the local file contents into remote into the remote storage entry.
|
/// Streams the local file contents into remote into the remote storage entry.
|
||||||
///
|
///
|
||||||
@@ -288,8 +320,8 @@ impl Debug for Download {
|
|||||||
|
|
||||||
/// Every storage, currently supported.
|
/// Every storage, currently supported.
|
||||||
/// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
|
/// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
|
||||||
#[derive(Clone)]
|
|
||||||
// Require Clone for `Other` due to https://github.com/rust-lang/rust/issues/26925
|
// Require Clone for `Other` due to https://github.com/rust-lang/rust/issues/26925
|
||||||
|
#[derive(Clone)]
|
||||||
pub enum GenericRemoteStorage<Other: Clone = Arc<UnreliableWrapper>> {
|
pub enum GenericRemoteStorage<Other: Clone = Arc<UnreliableWrapper>> {
|
||||||
LocalFs(LocalFs),
|
LocalFs(LocalFs),
|
||||||
AwsS3(Arc<S3Bucket>),
|
AwsS3(Arc<S3Bucket>),
|
||||||
@@ -298,13 +330,14 @@ pub enum GenericRemoteStorage<Other: Clone = Arc<UnreliableWrapper>> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
||||||
|
// See [`RemoteStorage::list`].
|
||||||
pub async fn list(
|
pub async fn list(
|
||||||
&self,
|
&self,
|
||||||
prefix: Option<&RemotePath>,
|
prefix: Option<&RemotePath>,
|
||||||
mode: ListingMode,
|
mode: ListingMode,
|
||||||
max_keys: Option<NonZeroU32>,
|
max_keys: Option<NonZeroU32>,
|
||||||
cancel: &CancellationToken,
|
cancel: &CancellationToken,
|
||||||
) -> anyhow::Result<Listing, DownloadError> {
|
) -> Result<Listing, DownloadError> {
|
||||||
match self {
|
match self {
|
||||||
Self::LocalFs(s) => s.list(prefix, mode, max_keys, cancel).await,
|
Self::LocalFs(s) => s.list(prefix, mode, max_keys, cancel).await,
|
||||||
Self::AwsS3(s) => s.list(prefix, mode, max_keys, cancel).await,
|
Self::AwsS3(s) => s.list(prefix, mode, max_keys, cancel).await,
|
||||||
@@ -313,6 +346,23 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See [`RemoteStorage::list_streaming`].
|
||||||
|
pub fn list_streaming<'a>(
|
||||||
|
&'a self,
|
||||||
|
prefix: Option<&'a RemotePath>,
|
||||||
|
mode: ListingMode,
|
||||||
|
max_keys: Option<NonZeroU32>,
|
||||||
|
cancel: &'a CancellationToken,
|
||||||
|
) -> impl Stream<Item = Result<Listing, DownloadError>> + 'a + Send {
|
||||||
|
match self {
|
||||||
|
Self::LocalFs(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel))
|
||||||
|
as Pin<Box<dyn Stream<Item = Result<Listing, DownloadError>> + Send>>,
|
||||||
|
Self::AwsS3(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),
|
||||||
|
Self::AzureBlob(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),
|
||||||
|
Self::Unreliable(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// See [`RemoteStorage::upload`]
|
/// See [`RemoteStorage::upload`]
|
||||||
pub async fn upload(
|
pub async fn upload(
|
||||||
&self,
|
&self,
|
||||||
@@ -443,7 +493,7 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl GenericRemoteStorage {
|
impl GenericRemoteStorage {
|
||||||
pub fn from_config(storage_config: &RemoteStorageConfig) -> anyhow::Result<Self> {
|
pub async fn from_config(storage_config: &RemoteStorageConfig) -> anyhow::Result<Self> {
|
||||||
let timeout = storage_config.timeout;
|
let timeout = storage_config.timeout;
|
||||||
Ok(match &storage_config.storage {
|
Ok(match &storage_config.storage {
|
||||||
RemoteStorageKind::LocalFs { local_path: path } => {
|
RemoteStorageKind::LocalFs { local_path: path } => {
|
||||||
@@ -458,7 +508,7 @@ impl GenericRemoteStorage {
|
|||||||
std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "<none>".into());
|
std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "<none>".into());
|
||||||
info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
|
info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
|
||||||
s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
|
s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
|
||||||
Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout)?))
|
Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout).await?))
|
||||||
}
|
}
|
||||||
RemoteStorageKind::AzureContainer(azure_config) => {
|
RemoteStorageKind::AzureContainer(azure_config) => {
|
||||||
let storage_account = azure_config
|
let storage_account = azure_config
|
||||||
@@ -504,6 +554,16 @@ impl GenericRemoteStorage {
|
|||||||
None => self.download(from, cancel).await,
|
None => self.download(from, cancel).await,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The name of the bucket/container/etc.
|
||||||
|
pub fn bucket_name(&self) -> Option<&str> {
|
||||||
|
match self {
|
||||||
|
Self::LocalFs(_s) => None,
|
||||||
|
Self::AwsS3(s) => Some(s.bucket_name()),
|
||||||
|
Self::AzureBlob(s) => Some(s.container_name()),
|
||||||
|
Self::Unreliable(_s) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extra set of key-value pairs that contain arbitrary metadata about the storage entry.
|
/// Extra set of key-value pairs that contain arbitrary metadata about the storage entry.
|
||||||
|
|||||||
@@ -23,8 +23,8 @@ use tokio_util::{io::ReaderStream, sync::CancellationToken};
|
|||||||
use utils::crashsafe::path_with_suffix_extension;
|
use utils::crashsafe::path_with_suffix_extension;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
Download, DownloadError, Listing, ListingMode, RemotePath, TimeTravelError, TimeoutOrCancel,
|
Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath, TimeTravelError,
|
||||||
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{RemoteStorage, StorageMetadata};
|
use super::{RemoteStorage, StorageMetadata};
|
||||||
@@ -331,6 +331,17 @@ impl LocalFs {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl RemoteStorage for LocalFs {
|
impl RemoteStorage for LocalFs {
|
||||||
|
fn list_streaming(
|
||||||
|
&self,
|
||||||
|
prefix: Option<&RemotePath>,
|
||||||
|
mode: ListingMode,
|
||||||
|
max_keys: Option<NonZeroU32>,
|
||||||
|
cancel: &CancellationToken,
|
||||||
|
) -> impl Stream<Item = Result<Listing, DownloadError>> {
|
||||||
|
let listing = self.list(prefix, mode, max_keys, cancel);
|
||||||
|
futures::stream::once(listing)
|
||||||
|
}
|
||||||
|
|
||||||
async fn list(
|
async fn list(
|
||||||
&self,
|
&self,
|
||||||
prefix: Option<&RemotePath>,
|
prefix: Option<&RemotePath>,
|
||||||
@@ -346,19 +357,29 @@ impl RemoteStorage for LocalFs {
|
|||||||
.list_recursive(prefix)
|
.list_recursive(prefix)
|
||||||
.await
|
.await
|
||||||
.map_err(DownloadError::Other)?;
|
.map_err(DownloadError::Other)?;
|
||||||
let keys = keys
|
let objects = keys
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|k| {
|
.filter_map(|k| {
|
||||||
let path = k.with_base(&self.storage_root);
|
let path = k.with_base(&self.storage_root);
|
||||||
!path.is_dir()
|
if path.is_dir() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(ListingObject {
|
||||||
|
key: k.clone(),
|
||||||
|
// LocalFs is just for testing, so just specify a dummy time
|
||||||
|
last_modified: SystemTime::now(),
|
||||||
|
size: 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
if let ListingMode::NoDelimiter = mode {
|
if let ListingMode::NoDelimiter = mode {
|
||||||
result.keys = keys;
|
result.keys = objects;
|
||||||
} else {
|
} else {
|
||||||
let mut prefixes = HashSet::new();
|
let mut prefixes = HashSet::new();
|
||||||
for key in keys {
|
for object in objects {
|
||||||
|
let key = object.key;
|
||||||
// If the part after the prefix includes a "/", take only the first part and put it in `prefixes`.
|
// If the part after the prefix includes a "/", take only the first part and put it in `prefixes`.
|
||||||
let relative_key = if let Some(prefix) = prefix {
|
let relative_key = if let Some(prefix) = prefix {
|
||||||
let mut prefix = prefix.clone();
|
let mut prefix = prefix.clone();
|
||||||
@@ -387,9 +408,12 @@ impl RemoteStorage for LocalFs {
|
|||||||
.to_owned();
|
.to_owned();
|
||||||
prefixes.insert(first_part);
|
prefixes.insert(first_part);
|
||||||
} else {
|
} else {
|
||||||
result
|
result.keys.push(ListingObject {
|
||||||
.keys
|
key: RemotePath::from_string(&relative_key).unwrap(),
|
||||||
.push(RemotePath::from_string(&relative_key).unwrap());
|
// LocalFs is just for testing
|
||||||
|
last_modified: SystemTime::now(),
|
||||||
|
size: 0,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result.prefixes = prefixes
|
result.prefixes = prefixes
|
||||||
@@ -939,7 +963,11 @@ mod fs_tests {
|
|||||||
.await?;
|
.await?;
|
||||||
assert!(listing.prefixes.is_empty());
|
assert!(listing.prefixes.is_empty());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
listing.keys.into_iter().collect::<HashSet<_>>(),
|
listing
|
||||||
|
.keys
|
||||||
|
.into_iter()
|
||||||
|
.map(|o| o.key)
|
||||||
|
.collect::<HashSet<_>>(),
|
||||||
HashSet::from([uncle.clone(), child.clone(), child_sibling.clone()])
|
HashSet::from([uncle.clone(), child.clone(), child_sibling.clone()])
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -964,7 +992,7 @@ mod fs_tests {
|
|||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
listing.keys,
|
listing.keys.into_iter().map(|o| o.key).collect::<Vec<_>>(),
|
||||||
[RemotePath::from_string("uncle").unwrap()].to_vec()
|
[RemotePath::from_string("uncle").unwrap()].to_vec()
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -981,7 +1009,7 @@ mod fs_tests {
|
|||||||
&cancel,
|
&cancel,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
assert_eq!(listing.keys, [].to_vec());
|
assert_eq!(listing.keys, vec![]);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
listing.prefixes,
|
listing.prefixes,
|
||||||
[RemotePath::from_string("grandparent").unwrap()].to_vec()
|
[RemotePath::from_string("grandparent").unwrap()].to_vec()
|
||||||
@@ -996,7 +1024,7 @@ mod fs_tests {
|
|||||||
&cancel,
|
&cancel,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
assert_eq!(listing.keys, [].to_vec());
|
assert_eq!(listing.keys, vec![]);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
listing.prefixes,
|
listing.prefixes,
|
||||||
[RemotePath::from_string("grandparent").unwrap()].to_vec()
|
[RemotePath::from_string("grandparent").unwrap()].to_vec()
|
||||||
@@ -1029,7 +1057,7 @@ mod fs_tests {
|
|||||||
&cancel,
|
&cancel,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
assert_eq!(listing.keys, [].to_vec());
|
assert_eq!(listing.keys, vec![]);
|
||||||
|
|
||||||
let mut found_prefixes = listing.prefixes.clone();
|
let mut found_prefixes = listing.prefixes.clone();
|
||||||
found_prefixes.sort();
|
found_prefixes.sort();
|
||||||
|
|||||||
@@ -16,16 +16,10 @@ use std::{
|
|||||||
|
|
||||||
use anyhow::{anyhow, Context as _};
|
use anyhow::{anyhow, Context as _};
|
||||||
use aws_config::{
|
use aws_config::{
|
||||||
environment::credentials::EnvironmentVariableCredentialsProvider,
|
default_provider::credentials::DefaultCredentialsChain,
|
||||||
imds::credentials::ImdsCredentialsProvider,
|
|
||||||
meta::credentials::CredentialsProviderChain,
|
|
||||||
profile::ProfileFileCredentialsProvider,
|
|
||||||
provider_config::ProviderConfig,
|
|
||||||
retry::{RetryConfigBuilder, RetryMode},
|
retry::{RetryConfigBuilder, RetryMode},
|
||||||
web_identity_token::WebIdentityTokenCredentialsProvider,
|
|
||||||
BehaviorVersion,
|
BehaviorVersion,
|
||||||
};
|
};
|
||||||
use aws_credential_types::provider::SharedCredentialsProvider;
|
|
||||||
use aws_sdk_s3::{
|
use aws_sdk_s3::{
|
||||||
config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep},
|
config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep},
|
||||||
error::SdkError,
|
error::SdkError,
|
||||||
@@ -50,8 +44,9 @@ use crate::{
|
|||||||
error::Cancelled,
|
error::Cancelled,
|
||||||
metrics::{start_counting_cancelled_wait, start_measuring_requests},
|
metrics::{start_counting_cancelled_wait, start_measuring_requests},
|
||||||
support::PermitCarrying,
|
support::PermitCarrying,
|
||||||
ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode, RemotePath, RemoteStorage,
|
ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath,
|
||||||
TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
|
||||||
|
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::metrics::AttemptOutcome;
|
use crate::metrics::AttemptOutcome;
|
||||||
@@ -76,40 +71,27 @@ struct GetObjectRequest {
|
|||||||
}
|
}
|
||||||
impl S3Bucket {
|
impl S3Bucket {
|
||||||
/// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
|
/// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
|
||||||
pub fn new(remote_storage_config: &S3Config, timeout: Duration) -> anyhow::Result<Self> {
|
pub async fn new(remote_storage_config: &S3Config, timeout: Duration) -> anyhow::Result<Self> {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
"Creating s3 remote storage for S3 bucket {}",
|
"Creating s3 remote storage for S3 bucket {}",
|
||||||
remote_storage_config.bucket_name
|
remote_storage_config.bucket_name
|
||||||
);
|
);
|
||||||
|
|
||||||
let region = Some(Region::new(remote_storage_config.bucket_region.clone()));
|
let region = Region::new(remote_storage_config.bucket_region.clone());
|
||||||
|
let region_opt = Some(region.clone());
|
||||||
|
|
||||||
let provider_conf = ProviderConfig::without_region().with_region(region.clone());
|
// https://docs.aws.amazon.com/sdkref/latest/guide/standardized-credentials.html
|
||||||
|
// https://docs.rs/aws-config/latest/aws_config/default_provider/credentials/struct.DefaultCredentialsChain.html
|
||||||
let credentials_provider = {
|
// Incomplete list of auth methods used by this:
|
||||||
// uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
|
// * "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
|
||||||
CredentialsProviderChain::first_try(
|
// * "AWS_PROFILE" / `aws sso login --profile <profile>`
|
||||||
"env",
|
// * "AWS_WEB_IDENTITY_TOKEN_FILE", "AWS_ROLE_ARN", "AWS_ROLE_SESSION_NAME"
|
||||||
EnvironmentVariableCredentialsProvider::new(),
|
// * http (ECS/EKS) container credentials
|
||||||
)
|
// * imds v2
|
||||||
// uses "AWS_PROFILE" / `aws sso login --profile <profile>`
|
let credentials_provider = DefaultCredentialsChain::builder()
|
||||||
.or_else(
|
.region(region)
|
||||||
"profile-sso",
|
.build()
|
||||||
ProfileFileCredentialsProvider::builder()
|
.await;
|
||||||
.configure(&provider_conf)
|
|
||||||
.build(),
|
|
||||||
)
|
|
||||||
// uses "AWS_WEB_IDENTITY_TOKEN_FILE", "AWS_ROLE_ARN", "AWS_ROLE_SESSION_NAME"
|
|
||||||
// needed to access remote extensions bucket
|
|
||||||
.or_else(
|
|
||||||
"token",
|
|
||||||
WebIdentityTokenCredentialsProvider::builder()
|
|
||||||
.configure(&provider_conf)
|
|
||||||
.build(),
|
|
||||||
)
|
|
||||||
// uses imds v2
|
|
||||||
.or_else("imds", ImdsCredentialsProvider::builder().build())
|
|
||||||
};
|
|
||||||
|
|
||||||
// AWS SDK requires us to specify how the RetryConfig should sleep when it wants to back off
|
// AWS SDK requires us to specify how the RetryConfig should sleep when it wants to back off
|
||||||
let sleep_impl: Arc<dyn AsyncSleep> = Arc::new(TokioSleep::new());
|
let sleep_impl: Arc<dyn AsyncSleep> = Arc::new(TokioSleep::new());
|
||||||
@@ -118,9 +100,9 @@ impl S3Bucket {
|
|||||||
#[allow(deprecated)] /* TODO: https://github.com/neondatabase/neon/issues/7665 */
|
#[allow(deprecated)] /* TODO: https://github.com/neondatabase/neon/issues/7665 */
|
||||||
BehaviorVersion::v2023_11_09(),
|
BehaviorVersion::v2023_11_09(),
|
||||||
)
|
)
|
||||||
.region(region)
|
.region(region_opt)
|
||||||
.identity_cache(IdentityCache::lazy().build())
|
.identity_cache(IdentityCache::lazy().build())
|
||||||
.credentials_provider(SharedCredentialsProvider::new(credentials_provider))
|
.credentials_provider(credentials_provider)
|
||||||
.sleep_impl(SharedAsyncSleep::from(sleep_impl));
|
.sleep_impl(SharedAsyncSleep::from(sleep_impl));
|
||||||
|
|
||||||
let sdk_config: aws_config::SdkConfig = std::thread::scope(|s| {
|
let sdk_config: aws_config::SdkConfig = std::thread::scope(|s| {
|
||||||
@@ -405,6 +387,10 @@ impl S3Bucket {
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn bucket_name(&self) -> &str {
|
||||||
|
&self.bucket_name
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pin_project_lite::pin_project! {
|
pin_project_lite::pin_project! {
|
||||||
@@ -482,17 +468,16 @@ impl<S: Stream<Item = std::io::Result<Bytes>>> Stream for TimedDownload<S> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl RemoteStorage for S3Bucket {
|
impl RemoteStorage for S3Bucket {
|
||||||
async fn list(
|
fn list_streaming(
|
||||||
&self,
|
&self,
|
||||||
prefix: Option<&RemotePath>,
|
prefix: Option<&RemotePath>,
|
||||||
mode: ListingMode,
|
mode: ListingMode,
|
||||||
max_keys: Option<NonZeroU32>,
|
max_keys: Option<NonZeroU32>,
|
||||||
cancel: &CancellationToken,
|
cancel: &CancellationToken,
|
||||||
) -> Result<Listing, DownloadError> {
|
) -> impl Stream<Item = Result<Listing, DownloadError>> {
|
||||||
let kind = RequestKind::List;
|
let kind = RequestKind::List;
|
||||||
// s3 sdk wants i32
|
// s3 sdk wants i32
|
||||||
let mut max_keys = max_keys.map(|mk| mk.get() as i32);
|
let mut max_keys = max_keys.map(|mk| mk.get() as i32);
|
||||||
let mut result = Listing::default();
|
|
||||||
|
|
||||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||||
let list_prefix = prefix
|
let list_prefix = prefix
|
||||||
@@ -504,89 +489,119 @@ impl RemoteStorage for S3Bucket {
|
|||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
let _permit = self.permit(kind, cancel).await?;
|
async_stream::stream! {
|
||||||
|
let _permit = self.permit(kind, cancel).await?;
|
||||||
|
|
||||||
let mut continuation_token = None;
|
let mut continuation_token = None;
|
||||||
|
'outer: loop {
|
||||||
|
let started_at = start_measuring_requests(kind);
|
||||||
|
|
||||||
loop {
|
// min of two Options, returning Some if one is value and another is
|
||||||
let started_at = start_measuring_requests(kind);
|
// None (None is smaller than anything, so plain min doesn't work).
|
||||||
|
let request_max_keys = self
|
||||||
|
.max_keys_per_list_response
|
||||||
|
.into_iter()
|
||||||
|
.chain(max_keys.into_iter())
|
||||||
|
.min();
|
||||||
|
let mut request = self
|
||||||
|
.client
|
||||||
|
.list_objects_v2()
|
||||||
|
.bucket(self.bucket_name.clone())
|
||||||
|
.set_prefix(list_prefix.clone())
|
||||||
|
.set_continuation_token(continuation_token.clone())
|
||||||
|
.set_max_keys(request_max_keys);
|
||||||
|
|
||||||
// min of two Options, returning Some if one is value and another is
|
if let ListingMode::WithDelimiter = mode {
|
||||||
// None (None is smaller than anything, so plain min doesn't work).
|
request = request.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string());
|
||||||
let request_max_keys = self
|
|
||||||
.max_keys_per_list_response
|
|
||||||
.into_iter()
|
|
||||||
.chain(max_keys.into_iter())
|
|
||||||
.min();
|
|
||||||
let mut request = self
|
|
||||||
.client
|
|
||||||
.list_objects_v2()
|
|
||||||
.bucket(self.bucket_name.clone())
|
|
||||||
.set_prefix(list_prefix.clone())
|
|
||||||
.set_continuation_token(continuation_token)
|
|
||||||
.set_max_keys(request_max_keys);
|
|
||||||
|
|
||||||
if let ListingMode::WithDelimiter = mode {
|
|
||||||
request = request.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
let request = request.send();
|
|
||||||
|
|
||||||
let response = tokio::select! {
|
|
||||||
res = request => res,
|
|
||||||
_ = tokio::time::sleep(self.timeout) => return Err(DownloadError::Timeout),
|
|
||||||
_ = cancel.cancelled() => return Err(DownloadError::Cancelled),
|
|
||||||
};
|
|
||||||
|
|
||||||
let response = response
|
|
||||||
.context("Failed to list S3 prefixes")
|
|
||||||
.map_err(DownloadError::Other);
|
|
||||||
|
|
||||||
let started_at = ScopeGuard::into_inner(started_at);
|
|
||||||
|
|
||||||
crate::metrics::BUCKET_METRICS
|
|
||||||
.req_seconds
|
|
||||||
.observe_elapsed(kind, &response, started_at);
|
|
||||||
|
|
||||||
let response = response?;
|
|
||||||
|
|
||||||
let keys = response.contents();
|
|
||||||
let empty = Vec::new();
|
|
||||||
let prefixes = response.common_prefixes.as_ref().unwrap_or(&empty);
|
|
||||||
|
|
||||||
tracing::debug!("list: {} prefixes, {} keys", prefixes.len(), keys.len());
|
|
||||||
|
|
||||||
for object in keys {
|
|
||||||
let object_path = object.key().expect("response does not contain a key");
|
|
||||||
let remote_path = self.s3_object_to_relative_path(object_path);
|
|
||||||
result.keys.push(remote_path);
|
|
||||||
if let Some(mut mk) = max_keys {
|
|
||||||
assert!(mk > 0);
|
|
||||||
mk -= 1;
|
|
||||||
if mk == 0 {
|
|
||||||
return Ok(result); // limit reached
|
|
||||||
}
|
|
||||||
max_keys = Some(mk);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let request = request.send();
|
||||||
|
|
||||||
|
let response = tokio::select! {
|
||||||
|
res = request => Ok(res),
|
||||||
|
_ = tokio::time::sleep(self.timeout) => Err(DownloadError::Timeout),
|
||||||
|
_ = cancel.cancelled() => Err(DownloadError::Cancelled),
|
||||||
|
}?;
|
||||||
|
|
||||||
|
let response = response
|
||||||
|
.context("Failed to list S3 prefixes")
|
||||||
|
.map_err(DownloadError::Other);
|
||||||
|
|
||||||
|
let started_at = ScopeGuard::into_inner(started_at);
|
||||||
|
|
||||||
|
crate::metrics::BUCKET_METRICS
|
||||||
|
.req_seconds
|
||||||
|
.observe_elapsed(kind, &response, started_at);
|
||||||
|
|
||||||
|
let response = match response {
|
||||||
|
Ok(response) => response,
|
||||||
|
Err(e) => {
|
||||||
|
// The error is potentially retryable, so we must rewind the loop after yielding.
|
||||||
|
yield Err(e);
|
||||||
|
continue 'outer;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let keys = response.contents();
|
||||||
|
let prefixes = response.common_prefixes.as_deref().unwrap_or_default();
|
||||||
|
|
||||||
|
tracing::debug!("list: {} prefixes, {} keys", prefixes.len(), keys.len());
|
||||||
|
let mut result = Listing::default();
|
||||||
|
|
||||||
|
for object in keys {
|
||||||
|
let key = object.key().expect("response does not contain a key");
|
||||||
|
let key = self.s3_object_to_relative_path(key);
|
||||||
|
|
||||||
|
let last_modified = match object.last_modified.map(SystemTime::try_from) {
|
||||||
|
Some(Ok(t)) => t,
|
||||||
|
Some(Err(_)) => {
|
||||||
|
tracing::warn!("Remote storage last_modified {:?} for {} is out of bounds",
|
||||||
|
object.last_modified, key
|
||||||
|
);
|
||||||
|
SystemTime::now()
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
SystemTime::now()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let size = object.size.unwrap_or(0) as u64;
|
||||||
|
|
||||||
|
result.keys.push(ListingObject{
|
||||||
|
key,
|
||||||
|
last_modified,
|
||||||
|
size,
|
||||||
|
});
|
||||||
|
if let Some(mut mk) = max_keys {
|
||||||
|
assert!(mk > 0);
|
||||||
|
mk -= 1;
|
||||||
|
if mk == 0 {
|
||||||
|
// limit reached
|
||||||
|
yield Ok(result);
|
||||||
|
break 'outer;
|
||||||
|
}
|
||||||
|
max_keys = Some(mk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// S3 gives us prefixes like "foo/", we return them like "foo"
|
||||||
|
result.prefixes.extend(prefixes.iter().filter_map(|o| {
|
||||||
|
Some(
|
||||||
|
self.s3_object_to_relative_path(
|
||||||
|
o.prefix()?
|
||||||
|
.trim_end_matches(REMOTE_STORAGE_PREFIX_SEPARATOR),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
}));
|
||||||
|
|
||||||
|
yield Ok(result);
|
||||||
|
|
||||||
|
continuation_token = match response.next_continuation_token {
|
||||||
|
Some(new_token) => Some(new_token),
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// S3 gives us prefixes like "foo/", we return them like "foo"
|
|
||||||
result.prefixes.extend(prefixes.iter().filter_map(|o| {
|
|
||||||
Some(
|
|
||||||
self.s3_object_to_relative_path(
|
|
||||||
o.prefix()?
|
|
||||||
.trim_end_matches(REMOTE_STORAGE_PREFIX_SEPARATOR),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
}));
|
|
||||||
|
|
||||||
continuation_token = match response.next_continuation_token {
|
|
||||||
Some(new_token) => Some(new_token),
|
|
||||||
None => break,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(result)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn upload(
|
async fn upload(
|
||||||
@@ -1041,8 +1056,8 @@ mod tests {
|
|||||||
|
|
||||||
use crate::{RemotePath, S3Bucket, S3Config};
|
use crate::{RemotePath, S3Bucket, S3Config};
|
||||||
|
|
||||||
#[test]
|
#[tokio::test]
|
||||||
fn relative_path() {
|
async fn relative_path() {
|
||||||
let all_paths = ["", "some/path", "some/path/"];
|
let all_paths = ["", "some/path", "some/path/"];
|
||||||
let all_paths: Vec<RemotePath> = all_paths
|
let all_paths: Vec<RemotePath> = all_paths
|
||||||
.iter()
|
.iter()
|
||||||
@@ -1085,8 +1100,9 @@ mod tests {
|
|||||||
max_keys_per_list_response: Some(5),
|
max_keys_per_list_response: Some(5),
|
||||||
upload_storage_class: None,
|
upload_storage_class: None,
|
||||||
};
|
};
|
||||||
let storage =
|
let storage = S3Bucket::new(&config, std::time::Duration::ZERO)
|
||||||
S3Bucket::new(&config, std::time::Duration::ZERO).expect("remote storage init");
|
.await
|
||||||
|
.expect("remote storage init");
|
||||||
for (test_path_idx, test_path) in all_paths.iter().enumerate() {
|
for (test_path_idx, test_path) in all_paths.iter().enumerate() {
|
||||||
let result = storage.relative_path_to_s3_object(test_path);
|
let result = storage.relative_path_to_s3_object(test_path);
|
||||||
let expected = expected_outputs[prefix_idx][test_path_idx];
|
let expected = expected_outputs[prefix_idx][test_path_idx];
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
//! testing purposes.
|
//! testing purposes.
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use futures::stream::Stream;
|
use futures::stream::Stream;
|
||||||
|
use futures::StreamExt;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::num::NonZeroU32;
|
use std::num::NonZeroU32;
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
@@ -107,6 +108,23 @@ impl UnreliableWrapper {
|
|||||||
type VoidStorage = crate::LocalFs;
|
type VoidStorage = crate::LocalFs;
|
||||||
|
|
||||||
impl RemoteStorage for UnreliableWrapper {
|
impl RemoteStorage for UnreliableWrapper {
|
||||||
|
fn list_streaming(
|
||||||
|
&self,
|
||||||
|
prefix: Option<&RemotePath>,
|
||||||
|
mode: ListingMode,
|
||||||
|
max_keys: Option<NonZeroU32>,
|
||||||
|
cancel: &CancellationToken,
|
||||||
|
) -> impl Stream<Item = Result<Listing, DownloadError>> + Send {
|
||||||
|
async_stream::stream! {
|
||||||
|
self.attempt(RemoteOp::ListPrefixes(prefix.cloned()))
|
||||||
|
.map_err(DownloadError::Other)?;
|
||||||
|
let mut stream = self.inner
|
||||||
|
.list_streaming(prefix, mode, max_keys, cancel);
|
||||||
|
while let Some(item) = stream.next().await {
|
||||||
|
yield item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
async fn list(
|
async fn list(
|
||||||
&self,
|
&self,
|
||||||
prefix: Option<&RemotePath>,
|
prefix: Option<&RemotePath>,
|
||||||
|
|||||||
@@ -152,7 +152,7 @@ pub(crate) async fn upload_remote_data(
|
|||||||
let mut upload_tasks = JoinSet::new();
|
let mut upload_tasks = JoinSet::new();
|
||||||
let cancel = CancellationToken::new();
|
let cancel = CancellationToken::new();
|
||||||
|
|
||||||
for i in 1..upload_tasks_count + 1 {
|
for i in 1..=upload_tasks_count {
|
||||||
let task_client = Arc::clone(client);
|
let task_client = Arc::clone(client);
|
||||||
let cancel = cancel.clone();
|
let cancel = cancel.clone();
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use camino::Utf8Path;
|
use camino::Utf8Path;
|
||||||
|
use futures::StreamExt;
|
||||||
use remote_storage::ListingMode;
|
use remote_storage::ListingMode;
|
||||||
use remote_storage::RemotePath;
|
use remote_storage::RemotePath;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -29,10 +30,10 @@ use super::{
|
|||||||
/// * with no prefix, it lists everything after its `${random_prefix_part}/` — that should be `${base_prefix_str}` value only
|
/// * with no prefix, it lists everything after its `${random_prefix_part}/` — that should be `${base_prefix_str}` value only
|
||||||
/// * with `${base_prefix_str}/` prefix, it lists every `sub_prefix_${i}`
|
/// * with `${base_prefix_str}/` prefix, it lists every `sub_prefix_${i}`
|
||||||
///
|
///
|
||||||
/// With the real S3 enabled and `#[cfg(test)]` Rust configuration used, the S3 client test adds a `max-keys` param to limit the response keys.
|
/// In the `MaybeEnabledStorageWithTestBlobs::setup`, we set the `max_keys_in_list_response` param to limit the keys in a single response.
|
||||||
/// This way, we are able to test the pagination implicitly, by ensuring all results are returned from the remote storage and avoid uploading too many blobs to S3,
|
/// This way, we are able to test the pagination, by ensuring all results are returned from the remote storage and avoid uploading too many blobs to S3,
|
||||||
/// since current default AWS S3 pagination limit is 1000.
|
/// as the current default AWS S3 pagination limit is 1000.
|
||||||
/// (see https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax)
|
/// (see <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>).
|
||||||
///
|
///
|
||||||
/// Lastly, the test attempts to clean up and remove all uploaded S3 files.
|
/// Lastly, the test attempts to clean up and remove all uploaded S3 files.
|
||||||
/// If any errors appear during the clean up, they get logged, but the test is not failed or stopped until clean up is finished.
|
/// If any errors appear during the clean up, they get logged, but the test is not failed or stopped until clean up is finished.
|
||||||
@@ -87,6 +88,41 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
|
|||||||
"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
|
"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// list_streaming
|
||||||
|
|
||||||
|
let prefix_with_slash = base_prefix.add_trailing_slash();
|
||||||
|
let mut nested_remote_prefixes_st = test_client.list_streaming(
|
||||||
|
Some(&prefix_with_slash),
|
||||||
|
ListingMode::WithDelimiter,
|
||||||
|
None,
|
||||||
|
&cancel,
|
||||||
|
);
|
||||||
|
let mut nested_remote_prefixes_combined = HashSet::new();
|
||||||
|
let mut segments = 0;
|
||||||
|
let mut segment_max_size = 0;
|
||||||
|
while let Some(st) = nested_remote_prefixes_st.next().await {
|
||||||
|
let st = st?;
|
||||||
|
segment_max_size = segment_max_size.max(st.prefixes.len());
|
||||||
|
nested_remote_prefixes_combined.extend(st.prefixes.into_iter());
|
||||||
|
segments += 1;
|
||||||
|
}
|
||||||
|
assert!(segments > 1, "less than 2 segments: {segments}");
|
||||||
|
assert!(
|
||||||
|
segment_max_size * 2 <= nested_remote_prefixes_combined.len(),
|
||||||
|
"double of segment_max_size={segment_max_size} larger number of remote prefixes of {}",
|
||||||
|
nested_remote_prefixes_combined.len()
|
||||||
|
);
|
||||||
|
let remote_only_prefixes = nested_remote_prefixes_combined
|
||||||
|
.difference(&expected_remote_prefixes)
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
let missing_uploaded_prefixes = expected_remote_prefixes
|
||||||
|
.difference(&nested_remote_prefixes_combined)
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
assert_eq!(
|
||||||
|
remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
|
||||||
|
"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
|
||||||
|
);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -120,6 +156,7 @@ async fn list_no_delimiter_works(
|
|||||||
.context("client list root files failure")?
|
.context("client list root files failure")?
|
||||||
.keys
|
.keys
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
.map(|o| o.key)
|
||||||
.collect::<HashSet<_>>();
|
.collect::<HashSet<_>>();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
root_files,
|
root_files,
|
||||||
@@ -146,6 +183,7 @@ async fn list_no_delimiter_works(
|
|||||||
.context("client list nested files failure")?
|
.context("client list nested files failure")?
|
||||||
.keys
|
.keys
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
.map(|o| o.key)
|
||||||
.collect::<HashSet<_>>();
|
.collect::<HashSet<_>>();
|
||||||
let trim_remote_blobs: HashSet<_> = ctx
|
let trim_remote_blobs: HashSet<_> = ctx
|
||||||
.remote_blobs
|
.remote_blobs
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ struct EnabledAzure {
|
|||||||
impl EnabledAzure {
|
impl EnabledAzure {
|
||||||
async fn setup(max_keys_in_list_response: Option<i32>) -> Self {
|
async fn setup(max_keys_in_list_response: Option<i32>) -> Self {
|
||||||
let client = create_azure_client(max_keys_in_list_response)
|
let client = create_azure_client(max_keys_in_list_response)
|
||||||
|
.await
|
||||||
.context("Azure client creation")
|
.context("Azure client creation")
|
||||||
.expect("Azure client creation failed");
|
.expect("Azure client creation failed");
|
||||||
|
|
||||||
@@ -187,7 +188,7 @@ impl AsyncTestContext for MaybeEnabledStorageWithSimpleTestBlobs {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_azure_client(
|
async fn create_azure_client(
|
||||||
max_keys_per_list_response: Option<i32>,
|
max_keys_per_list_response: Option<i32>,
|
||||||
) -> anyhow::Result<Arc<GenericRemoteStorage>> {
|
) -> anyhow::Result<Arc<GenericRemoteStorage>> {
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
@@ -221,6 +222,8 @@ fn create_azure_client(
|
|||||||
timeout: Duration::from_secs(120),
|
timeout: Duration::from_secs(120),
|
||||||
};
|
};
|
||||||
Ok(Arc::new(
|
Ok(Arc::new(
|
||||||
GenericRemoteStorage::from_config(&remote_storage_config).context("remote storage init")?,
|
GenericRemoteStorage::from_config(&remote_storage_config)
|
||||||
|
.await
|
||||||
|
.context("remote storage init")?,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -81,6 +81,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
|
|||||||
.context("list root files failure")?
|
.context("list root files failure")?
|
||||||
.keys
|
.keys
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
.map(|o| o.key)
|
||||||
.collect::<HashSet<_>>(),
|
.collect::<HashSet<_>>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -197,6 +198,7 @@ struct EnabledS3 {
|
|||||||
impl EnabledS3 {
|
impl EnabledS3 {
|
||||||
async fn setup(max_keys_in_list_response: Option<i32>) -> Self {
|
async fn setup(max_keys_in_list_response: Option<i32>) -> Self {
|
||||||
let client = create_s3_client(max_keys_in_list_response)
|
let client = create_s3_client(max_keys_in_list_response)
|
||||||
|
.await
|
||||||
.context("S3 client creation")
|
.context("S3 client creation")
|
||||||
.expect("S3 client creation failed");
|
.expect("S3 client creation failed");
|
||||||
|
|
||||||
@@ -352,7 +354,7 @@ impl AsyncTestContext for MaybeEnabledStorageWithSimpleTestBlobs {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_s3_client(
|
async fn create_s3_client(
|
||||||
max_keys_per_list_response: Option<i32>,
|
max_keys_per_list_response: Option<i32>,
|
||||||
) -> anyhow::Result<Arc<GenericRemoteStorage>> {
|
) -> anyhow::Result<Arc<GenericRemoteStorage>> {
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
@@ -385,7 +387,9 @@ fn create_s3_client(
|
|||||||
timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
|
timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
|
||||||
};
|
};
|
||||||
Ok(Arc::new(
|
Ok(Arc::new(
|
||||||
GenericRemoteStorage::from_config(&remote_storage_config).context("remote storage init")?,
|
GenericRemoteStorage::from_config(&remote_storage_config)
|
||||||
|
.await
|
||||||
|
.context("remote storage init")?,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ bincode.workspace = true
|
|||||||
bytes.workspace = true
|
bytes.workspace = true
|
||||||
camino.workspace = true
|
camino.workspace = true
|
||||||
chrono.workspace = true
|
chrono.workspace = true
|
||||||
heapless.workspace = true
|
|
||||||
hex = { workspace = true, features = ["serde"] }
|
hex = { workspace = true, features = ["serde"] }
|
||||||
humantime.workspace = true
|
humantime.workspace = true
|
||||||
hyper = { workspace = true, features = ["full"] }
|
hyper = { workspace = true, features = ["full"] }
|
||||||
|
|||||||
@@ -18,21 +18,25 @@ const STORAGE_TOKEN_ALGORITHM: Algorithm = Algorithm::EdDSA;
|
|||||||
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
|
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
|
||||||
#[serde(rename_all = "lowercase")]
|
#[serde(rename_all = "lowercase")]
|
||||||
pub enum Scope {
|
pub enum Scope {
|
||||||
// Provides access to all data for a specific tenant (specified in `struct Claims` below)
|
/// Provides access to all data for a specific tenant (specified in `struct Claims` below)
|
||||||
// TODO: join these two?
|
// TODO: join these two?
|
||||||
Tenant,
|
Tenant,
|
||||||
// Provides blanket access to all tenants on the pageserver plus pageserver-wide APIs.
|
/// Provides blanket access to all tenants on the pageserver plus pageserver-wide APIs.
|
||||||
// Should only be used e.g. for status check/tenant creation/list.
|
/// Should only be used e.g. for status check/tenant creation/list.
|
||||||
PageServerApi,
|
PageServerApi,
|
||||||
// Provides blanket access to all data on the safekeeper plus safekeeper-wide APIs.
|
/// Provides blanket access to all data on the safekeeper plus safekeeper-wide APIs.
|
||||||
// Should only be used e.g. for status check.
|
/// Should only be used e.g. for status check.
|
||||||
// Currently also used for connection from any pageserver to any safekeeper.
|
/// Currently also used for connection from any pageserver to any safekeeper.
|
||||||
SafekeeperData,
|
SafekeeperData,
|
||||||
// The scope used by pageservers in upcalls to storage controller and cloud control plane
|
/// The scope used by pageservers in upcalls to storage controller and cloud control plane
|
||||||
#[serde(rename = "generations_api")]
|
#[serde(rename = "generations_api")]
|
||||||
GenerationsApi,
|
GenerationsApi,
|
||||||
// Allows access to control plane managment API and some storage controller endpoints.
|
/// Allows access to control plane managment API and some storage controller endpoints.
|
||||||
Admin,
|
Admin,
|
||||||
|
|
||||||
|
/// Allows access to storage controller APIs used by the scrubber, to interrogate the state
|
||||||
|
/// of a tenant & post scrub results.
|
||||||
|
Scrubber,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// JWT payload. See docs/authentication.md for the format
|
/// JWT payload. See docs/authentication.md for the format
|
||||||
|
|||||||
114
libs/utils/src/circuit_breaker.rs
Normal file
114
libs/utils/src/circuit_breaker.rs
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
use std::{
|
||||||
|
fmt::Display,
|
||||||
|
time::{Duration, Instant},
|
||||||
|
};
|
||||||
|
|
||||||
|
use metrics::IntCounter;
|
||||||
|
|
||||||
|
/// Circuit breakers are for operations that are expensive and fallible: if they fail repeatedly,
|
||||||
|
/// we will stop attempting them for some period of time, to avoid denial-of-service from retries, and
|
||||||
|
/// to mitigate the log spam from repeated failures.
|
||||||
|
pub struct CircuitBreaker {
|
||||||
|
/// An identifier that enables us to log useful errors when a circuit is broken
|
||||||
|
name: String,
|
||||||
|
|
||||||
|
/// Consecutive failures since last success
|
||||||
|
fail_count: usize,
|
||||||
|
|
||||||
|
/// How many consecutive failures before we break the circuit
|
||||||
|
fail_threshold: usize,
|
||||||
|
|
||||||
|
/// If circuit is broken, when was it broken?
|
||||||
|
broken_at: Option<Instant>,
|
||||||
|
|
||||||
|
/// If set, we will auto-reset the circuit this long after it was broken. If None, broken
|
||||||
|
/// circuits stay broken forever, or until success() is called.
|
||||||
|
reset_period: Option<Duration>,
|
||||||
|
|
||||||
|
/// If this is true, no actual circuit-breaking happens. This is for overriding a circuit breaker
|
||||||
|
/// to permit something to keep running even if it would otherwise have tripped it.
|
||||||
|
short_circuit: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CircuitBreaker {
|
||||||
|
pub fn new(name: String, fail_threshold: usize, reset_period: Option<Duration>) -> Self {
|
||||||
|
Self {
|
||||||
|
name,
|
||||||
|
fail_count: 0,
|
||||||
|
fail_threshold,
|
||||||
|
broken_at: None,
|
||||||
|
reset_period,
|
||||||
|
short_circuit: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Construct an unbreakable circuit breaker, for use in unit tests etc.
|
||||||
|
pub fn short_circuit() -> Self {
|
||||||
|
Self {
|
||||||
|
name: String::new(),
|
||||||
|
fail_threshold: 0,
|
||||||
|
fail_count: 0,
|
||||||
|
broken_at: None,
|
||||||
|
reset_period: None,
|
||||||
|
short_circuit: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fail<E>(&mut self, metric: &IntCounter, error: E)
|
||||||
|
where
|
||||||
|
E: Display,
|
||||||
|
{
|
||||||
|
if self.short_circuit {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.fail_count += 1;
|
||||||
|
if self.broken_at.is_none() && self.fail_count >= self.fail_threshold {
|
||||||
|
self.break_circuit(metric, error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call this after successfully executing an operation
|
||||||
|
pub fn success(&mut self, metric: &IntCounter) {
|
||||||
|
self.fail_count = 0;
|
||||||
|
if let Some(broken_at) = &self.broken_at {
|
||||||
|
tracing::info!(breaker=%self.name, "Circuit breaker failure ended (was broken for {})",
|
||||||
|
humantime::format_duration(broken_at.elapsed()));
|
||||||
|
self.broken_at = None;
|
||||||
|
metric.inc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Call this before attempting an operation, and skip the operation if we are currently broken.
|
||||||
|
pub fn is_broken(&mut self) -> bool {
|
||||||
|
if self.short_circuit {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(broken_at) = self.broken_at {
|
||||||
|
match self.reset_period {
|
||||||
|
Some(reset_period) if broken_at.elapsed() > reset_period => {
|
||||||
|
self.reset_circuit();
|
||||||
|
false
|
||||||
|
}
|
||||||
|
_ => true,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn break_circuit<E>(&mut self, metric: &IntCounter, error: E)
|
||||||
|
where
|
||||||
|
E: Display,
|
||||||
|
{
|
||||||
|
self.broken_at = Some(Instant::now());
|
||||||
|
tracing::error!(breaker=%self.name, "Circuit breaker broken! Last error: {error}");
|
||||||
|
metric.inc();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reset_circuit(&mut self) {
|
||||||
|
self.broken_at = None;
|
||||||
|
self.fail_count = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,196 +0,0 @@
|
|||||||
//! A heapless buffer for events of sorts.
|
|
||||||
|
|
||||||
use std::ops;
|
|
||||||
|
|
||||||
use heapless::HistoryBuffer;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct HistoryBufferWithDropCounter<T, const L: usize> {
|
|
||||||
buffer: HistoryBuffer<T, L>,
|
|
||||||
drop_count: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T, const L: usize> HistoryBufferWithDropCounter<T, L> {
|
|
||||||
pub fn write(&mut self, data: T) {
|
|
||||||
let len_before = self.buffer.len();
|
|
||||||
self.buffer.write(data);
|
|
||||||
let len_after = self.buffer.len();
|
|
||||||
self.drop_count += u64::from(len_before == len_after);
|
|
||||||
}
|
|
||||||
pub fn drop_count(&self) -> u64 {
|
|
||||||
self.drop_count
|
|
||||||
}
|
|
||||||
pub fn map<U, F: Fn(&T) -> U>(&self, f: F) -> HistoryBufferWithDropCounter<U, L> {
|
|
||||||
let mut buffer = HistoryBuffer::new();
|
|
||||||
buffer.extend(self.buffer.oldest_ordered().map(f));
|
|
||||||
HistoryBufferWithDropCounter::<U, L> {
|
|
||||||
buffer,
|
|
||||||
drop_count: self.drop_count,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T, const L: usize> Default for HistoryBufferWithDropCounter<T, L> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self {
|
|
||||||
buffer: HistoryBuffer::default(),
|
|
||||||
drop_count: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T, const L: usize> ops::Deref for HistoryBufferWithDropCounter<T, L> {
|
|
||||||
type Target = HistoryBuffer<T, L>;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
&self.buffer
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(serde::Serialize, serde::Deserialize)]
|
|
||||||
struct SerdeRepr<T> {
|
|
||||||
buffer: Vec<T>,
|
|
||||||
buffer_size: usize,
|
|
||||||
drop_count: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T, const L: usize> From<&'a HistoryBufferWithDropCounter<T, L>> for SerdeRepr<T>
|
|
||||||
where
|
|
||||||
T: Clone + serde::Serialize,
|
|
||||||
{
|
|
||||||
fn from(value: &'a HistoryBufferWithDropCounter<T, L>) -> Self {
|
|
||||||
let HistoryBufferWithDropCounter { buffer, drop_count } = value;
|
|
||||||
SerdeRepr {
|
|
||||||
buffer: buffer.iter().cloned().collect(),
|
|
||||||
buffer_size: L,
|
|
||||||
drop_count: *drop_count,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T, const L: usize> serde::Serialize for HistoryBufferWithDropCounter<T, L>
|
|
||||||
where
|
|
||||||
T: Clone + serde::Serialize,
|
|
||||||
{
|
|
||||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|
||||||
where
|
|
||||||
S: serde::Serializer,
|
|
||||||
{
|
|
||||||
SerdeRepr::from(self).serialize(serializer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'de, T, const L: usize> serde::de::Deserialize<'de> for HistoryBufferWithDropCounter<T, L>
|
|
||||||
where
|
|
||||||
T: Clone + serde::Deserialize<'de>,
|
|
||||||
{
|
|
||||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
|
||||||
where
|
|
||||||
D: serde::Deserializer<'de>,
|
|
||||||
{
|
|
||||||
let SerdeRepr {
|
|
||||||
buffer: des_buffer,
|
|
||||||
drop_count,
|
|
||||||
buffer_size,
|
|
||||||
} = SerdeRepr::<T>::deserialize(deserializer)?;
|
|
||||||
if buffer_size != L {
|
|
||||||
use serde::de::Error;
|
|
||||||
return Err(D::Error::custom(format!(
|
|
||||||
"invalid buffer_size, expecting {L} got {buffer_size}"
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
let mut buffer = HistoryBuffer::new();
|
|
||||||
buffer.extend(des_buffer);
|
|
||||||
Ok(HistoryBufferWithDropCounter { buffer, drop_count })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use super::HistoryBufferWithDropCounter;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_basics() {
|
|
||||||
let mut b = HistoryBufferWithDropCounter::<usize, 2>::default();
|
|
||||||
b.write(1);
|
|
||||||
b.write(2);
|
|
||||||
b.write(3);
|
|
||||||
assert!(b.iter().any(|e| *e == 2));
|
|
||||||
assert!(b.iter().any(|e| *e == 3));
|
|
||||||
assert!(!b.iter().any(|e| *e == 1));
|
|
||||||
|
|
||||||
// round-trip serde
|
|
||||||
let round_tripped: HistoryBufferWithDropCounter<usize, 2> =
|
|
||||||
serde_json::from_str(&serde_json::to_string(&b).unwrap()).unwrap();
|
|
||||||
assert_eq!(
|
|
||||||
round_tripped.iter().cloned().collect::<Vec<_>>(),
|
|
||||||
b.iter().cloned().collect::<Vec<_>>()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_drop_count_works() {
|
|
||||||
let mut b = HistoryBufferWithDropCounter::<_, 2>::default();
|
|
||||||
b.write(1);
|
|
||||||
assert_eq!(b.drop_count(), 0);
|
|
||||||
b.write(2);
|
|
||||||
assert_eq!(b.drop_count(), 0);
|
|
||||||
b.write(3);
|
|
||||||
assert_eq!(b.drop_count(), 1);
|
|
||||||
b.write(4);
|
|
||||||
assert_eq!(b.drop_count(), 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_clone_works() {
|
|
||||||
let mut b = HistoryBufferWithDropCounter::<_, 2>::default();
|
|
||||||
b.write(1);
|
|
||||||
b.write(2);
|
|
||||||
b.write(3);
|
|
||||||
assert_eq!(b.drop_count(), 1);
|
|
||||||
let mut c = b.clone();
|
|
||||||
assert_eq!(c.drop_count(), 1);
|
|
||||||
assert!(c.iter().any(|e| *e == 2));
|
|
||||||
assert!(c.iter().any(|e| *e == 3));
|
|
||||||
assert!(!c.iter().any(|e| *e == 1));
|
|
||||||
|
|
||||||
c.write(4);
|
|
||||||
assert!(c.iter().any(|e| *e == 4));
|
|
||||||
assert!(!b.iter().any(|e| *e == 4));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_map() {
|
|
||||||
let mut b = HistoryBufferWithDropCounter::<_, 2>::default();
|
|
||||||
|
|
||||||
b.write(1);
|
|
||||||
assert_eq!(b.drop_count(), 0);
|
|
||||||
{
|
|
||||||
let c = b.map(|i| i + 10);
|
|
||||||
assert_eq!(c.oldest_ordered().cloned().collect::<Vec<_>>(), vec![11]);
|
|
||||||
assert_eq!(c.drop_count(), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
b.write(2);
|
|
||||||
assert_eq!(b.drop_count(), 0);
|
|
||||||
{
|
|
||||||
let c = b.map(|i| i + 10);
|
|
||||||
assert_eq!(
|
|
||||||
c.oldest_ordered().cloned().collect::<Vec<_>>(),
|
|
||||||
vec![11, 12]
|
|
||||||
);
|
|
||||||
assert_eq!(c.drop_count(), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
b.write(3);
|
|
||||||
assert_eq!(b.drop_count(), 1);
|
|
||||||
{
|
|
||||||
let c = b.map(|i| i + 10);
|
|
||||||
assert_eq!(
|
|
||||||
c.oldest_ordered().cloned().collect::<Vec<_>>(),
|
|
||||||
vec![12, 13]
|
|
||||||
);
|
|
||||||
assert_eq!(c.drop_count(), 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -52,17 +52,17 @@ struct RequestId(String);
|
|||||||
/// There could be other ways to implement similar functionality:
|
/// There could be other ways to implement similar functionality:
|
||||||
///
|
///
|
||||||
/// * procmacros placed on top of all handler methods
|
/// * procmacros placed on top of all handler methods
|
||||||
/// With all the drawbacks of procmacros, brings no difference implementation-wise,
|
/// With all the drawbacks of procmacros, brings no difference implementation-wise,
|
||||||
/// and little code reduction compared to the existing approach.
|
/// and little code reduction compared to the existing approach.
|
||||||
///
|
///
|
||||||
/// * Another `TraitExt` with e.g. the `get_with_span`, `post_with_span` methods to do similar logic,
|
/// * Another `TraitExt` with e.g. the `get_with_span`, `post_with_span` methods to do similar logic,
|
||||||
/// implemented for [`RouterBuilder`].
|
/// implemented for [`RouterBuilder`].
|
||||||
/// Could be simpler, but we don't want to depend on [`routerify`] more, targeting to use other library later.
|
/// Could be simpler, but we don't want to depend on [`routerify`] more, targeting to use other library later.
|
||||||
///
|
///
|
||||||
/// * In theory, a span guard could've been created in a pre-request middleware and placed into a global collection, to be dropped
|
/// * In theory, a span guard could've been created in a pre-request middleware and placed into a global collection, to be dropped
|
||||||
/// later, in a post-response middleware.
|
/// later, in a post-response middleware.
|
||||||
/// Due to suspendable nature of the futures, would give contradictive results which is exactly the opposite of what `tracing-futures`
|
/// Due to suspendable nature of the futures, would give contradictive results which is exactly the opposite of what `tracing-futures`
|
||||||
/// tries to achive with its `.instrument` used in the current approach.
|
/// tries to achive with its `.instrument` used in the current approach.
|
||||||
///
|
///
|
||||||
/// If needed, a declarative macro to substitute the |r| ... closure boilerplate could be introduced.
|
/// If needed, a declarative macro to substitute the |r| ... closure boilerplate could be introduced.
|
||||||
pub async fn request_span<R, H>(request: Request<Body>, handler: H) -> R::Output
|
pub async fn request_span<R, H>(request: Request<Body>, handler: H) -> R::Output
|
||||||
|
|||||||
@@ -302,17 +302,6 @@ pub struct TenantId(Id);
|
|||||||
|
|
||||||
id_newtype!(TenantId);
|
id_newtype!(TenantId);
|
||||||
|
|
||||||
/// Neon Connection Id identifies long-lived connections (for example a pagestream
|
|
||||||
/// connection with the page_service). Is used for better logging and tracing
|
|
||||||
///
|
|
||||||
/// NOTE: It (de)serializes as an array of hex bytes, so the string representation would look
|
|
||||||
/// like `[173,80,132,115,129,226,72,254,170,201,135,108,199,26,228,24]`.
|
|
||||||
/// See [`Id`] for alternative ways to serialize it.
|
|
||||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)]
|
|
||||||
pub struct ConnectionId(Id);
|
|
||||||
|
|
||||||
id_newtype!(ConnectionId);
|
|
||||||
|
|
||||||
// A pair uniquely identifying Neon instance.
|
// A pair uniquely identifying Neon instance.
|
||||||
#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||||
pub struct TenantTimelineId {
|
pub struct TenantTimelineId {
|
||||||
|
|||||||
@@ -59,8 +59,6 @@ pub mod signals;
|
|||||||
|
|
||||||
pub mod fs_ext;
|
pub mod fs_ext;
|
||||||
|
|
||||||
pub mod history_buffer;
|
|
||||||
|
|
||||||
pub mod measured_stream;
|
pub mod measured_stream;
|
||||||
|
|
||||||
pub mod serde_percent;
|
pub mod serde_percent;
|
||||||
@@ -98,6 +96,8 @@ pub mod poison;
|
|||||||
|
|
||||||
pub mod toml_edit_ext;
|
pub mod toml_edit_ext;
|
||||||
|
|
||||||
|
pub mod circuit_breaker;
|
||||||
|
|
||||||
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
|
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
|
||||||
///
|
///
|
||||||
/// we have several cases:
|
/// we have several cases:
|
||||||
@@ -128,7 +128,7 @@ pub mod toml_edit_ext;
|
|||||||
///
|
///
|
||||||
/// #############################################################################################
|
/// #############################################################################################
|
||||||
/// TODO this macro is not the way the library is intended to be used, see <https://github.com/neondatabase/neon/issues/1565> for details.
|
/// TODO this macro is not the way the library is intended to be used, see <https://github.com/neondatabase/neon/issues/1565> for details.
|
||||||
/// We use `cachepot` to reduce our current CI build times: <https://github.com/neondatabase/cloud/pull/1033#issuecomment-1100935036>
|
/// We used `cachepot` to reduce our current CI build times: <https://github.com/neondatabase/cloud/pull/1033#issuecomment-1100935036>
|
||||||
/// Yet, it seems to ignore the GIT_VERSION env variable, passed to Docker build, even with build.rs that contains
|
/// Yet, it seems to ignore the GIT_VERSION env variable, passed to Docker build, even with build.rs that contains
|
||||||
/// `println!("cargo:rerun-if-env-changed=GIT_VERSION");` code for cachepot cache invalidation.
|
/// `println!("cargo:rerun-if-env-changed=GIT_VERSION");` code for cachepot cache invalidation.
|
||||||
/// The problem needs further investigation and regular `const` declaration instead of a macro.
|
/// The problem needs further investigation and regular `const` declaration instead of a macro.
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ pub struct TenantShardId {
|
|||||||
|
|
||||||
impl ShardCount {
|
impl ShardCount {
|
||||||
pub const MAX: Self = Self(u8::MAX);
|
pub const MAX: Self = Self(u8::MAX);
|
||||||
|
pub const MIN: Self = Self(0);
|
||||||
|
|
||||||
/// The internal value of a ShardCount may be zero, which means "1 shard, but use
|
/// The internal value of a ShardCount may be zero, which means "1 shard, but use
|
||||||
/// legacy format for TenantShardId that excludes the shard suffix", also known
|
/// legacy format for TenantShardId that excludes the shard suffix", also known
|
||||||
|
|||||||
@@ -78,8 +78,9 @@ impl Drop for GateGuard {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum GateError {
|
pub enum GateError {
|
||||||
|
#[error("gate is closed")]
|
||||||
GateClosed,
|
GateClosed,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ postgres_backend.workspace = true
|
|||||||
postgres-protocol.workspace = true
|
postgres-protocol.workspace = true
|
||||||
postgres-types.workspace = true
|
postgres-types.workspace = true
|
||||||
rand.workspace = true
|
rand.workspace = true
|
||||||
|
range-set-blaze = { version = "0.1.16", features = ["alloc"] }
|
||||||
regex.workspace = true
|
regex.workspace = true
|
||||||
scopeguard.workspace = true
|
scopeguard.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
@@ -107,3 +108,7 @@ harness = false
|
|||||||
[[bench]]
|
[[bench]]
|
||||||
name = "bench_walredo"
|
name = "bench_walredo"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "bench_ingest"
|
||||||
|
harness = false
|
||||||
|
|||||||
239
pageserver/benches/bench_ingest.rs
Normal file
239
pageserver/benches/bench_ingest.rs
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
use std::{env, num::NonZeroUsize};
|
||||||
|
|
||||||
|
use bytes::Bytes;
|
||||||
|
use camino::Utf8PathBuf;
|
||||||
|
use criterion::{criterion_group, criterion_main, Criterion};
|
||||||
|
use pageserver::{
|
||||||
|
config::PageServerConf,
|
||||||
|
context::{DownloadBehavior, RequestContext},
|
||||||
|
l0_flush::{L0FlushConfig, L0FlushGlobalState},
|
||||||
|
page_cache,
|
||||||
|
repository::Value,
|
||||||
|
task_mgr::TaskKind,
|
||||||
|
tenant::storage_layer::InMemoryLayer,
|
||||||
|
virtual_file,
|
||||||
|
};
|
||||||
|
use pageserver_api::{key::Key, shard::TenantShardId};
|
||||||
|
use utils::{
|
||||||
|
bin_ser::BeSer,
|
||||||
|
id::{TenantId, TimelineId},
|
||||||
|
};
|
||||||
|
|
||||||
|
// A very cheap hash for generating non-sequential keys.
|
||||||
|
fn murmurhash32(mut h: u32) -> u32 {
|
||||||
|
h ^= h >> 16;
|
||||||
|
h = h.wrapping_mul(0x85ebca6b);
|
||||||
|
h ^= h >> 13;
|
||||||
|
h = h.wrapping_mul(0xc2b2ae35);
|
||||||
|
h ^= h >> 16;
|
||||||
|
h
|
||||||
|
}
|
||||||
|
|
||||||
|
enum KeyLayout {
|
||||||
|
/// Sequential unique keys
|
||||||
|
Sequential,
|
||||||
|
/// Random unique keys
|
||||||
|
Random,
|
||||||
|
/// Random keys, but only use the bits from the mask of them
|
||||||
|
RandomReuse(u32),
|
||||||
|
}
|
||||||
|
|
||||||
|
enum WriteDelta {
|
||||||
|
Yes,
|
||||||
|
No,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn ingest(
|
||||||
|
conf: &'static PageServerConf,
|
||||||
|
put_size: usize,
|
||||||
|
put_count: usize,
|
||||||
|
key_layout: KeyLayout,
|
||||||
|
write_delta: WriteDelta,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let mut lsn = utils::lsn::Lsn(1000);
|
||||||
|
let mut key = Key::from_i128(0x0);
|
||||||
|
|
||||||
|
let timeline_id = TimelineId::generate();
|
||||||
|
let tenant_id = TenantId::generate();
|
||||||
|
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||||
|
|
||||||
|
tokio::fs::create_dir_all(conf.timeline_path(&tenant_shard_id, &timeline_id)).await?;
|
||||||
|
|
||||||
|
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||||
|
|
||||||
|
let gate = utils::sync::gate::Gate::default();
|
||||||
|
let entered = gate.enter().unwrap();
|
||||||
|
|
||||||
|
let layer =
|
||||||
|
InMemoryLayer::create(conf, timeline_id, tenant_shard_id, lsn, entered, &ctx).await?;
|
||||||
|
|
||||||
|
let data = Value::Image(Bytes::from(vec![0u8; put_size])).ser()?;
|
||||||
|
let ctx = RequestContext::new(
|
||||||
|
pageserver::task_mgr::TaskKind::WalReceiverConnectionHandler,
|
||||||
|
pageserver::context::DownloadBehavior::Download,
|
||||||
|
);
|
||||||
|
|
||||||
|
for i in 0..put_count {
|
||||||
|
lsn += put_size as u64;
|
||||||
|
|
||||||
|
// Generate lots of keys within a single relation, which simulates the typical bulk ingest case: people
|
||||||
|
// usually care the most about write performance when they're blasting a huge batch of data into a huge table.
|
||||||
|
match key_layout {
|
||||||
|
KeyLayout::Sequential => {
|
||||||
|
// Use sequential order to illustrate the experience a user is likely to have
|
||||||
|
// when ingesting bulk data.
|
||||||
|
key.field6 = i as u32;
|
||||||
|
}
|
||||||
|
KeyLayout::Random => {
|
||||||
|
// Use random-order keys to avoid giving a false advantage to data structures that are
|
||||||
|
// faster when inserting on the end.
|
||||||
|
key.field6 = murmurhash32(i as u32);
|
||||||
|
}
|
||||||
|
KeyLayout::RandomReuse(mask) => {
|
||||||
|
// Use low bits only, to limit cardinality
|
||||||
|
key.field6 = murmurhash32(i as u32) & mask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
layer.put_value(key, lsn, &data, &ctx).await?;
|
||||||
|
}
|
||||||
|
layer.freeze(lsn + 1).await;
|
||||||
|
|
||||||
|
if matches!(write_delta, WriteDelta::Yes) {
|
||||||
|
let l0_flush_state = L0FlushGlobalState::new(L0FlushConfig::Direct {
|
||||||
|
max_concurrency: NonZeroUsize::new(1).unwrap(),
|
||||||
|
});
|
||||||
|
let (_desc, path) = layer
|
||||||
|
.write_to_disk(&ctx, None, l0_flush_state.inner())
|
||||||
|
.await?
|
||||||
|
.unwrap();
|
||||||
|
tokio::fs::remove_file(path).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wrapper to instantiate a tokio runtime
|
||||||
|
fn ingest_main(
|
||||||
|
conf: &'static PageServerConf,
|
||||||
|
put_size: usize,
|
||||||
|
put_count: usize,
|
||||||
|
key_layout: KeyLayout,
|
||||||
|
write_delta: WriteDelta,
|
||||||
|
) {
|
||||||
|
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||||
|
.enable_all()
|
||||||
|
.build()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
runtime.block_on(async move {
|
||||||
|
let r = ingest(conf, put_size, put_count, key_layout, write_delta).await;
|
||||||
|
if let Err(e) = r {
|
||||||
|
panic!("{e:?}");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Declare a series of benchmarks for the Pageserver's ingest write path.
|
||||||
|
///
|
||||||
|
/// This benchmark does not include WAL decode: it starts at InMemoryLayer::put_value, and ends either
|
||||||
|
/// at freezing the ephemeral layer, or writing the ephemeral layer out to an L0 (depending on whether WriteDelta is set).
|
||||||
|
///
|
||||||
|
/// Genuine disk I/O is used, so expect results to differ depending on storage. However, when running on
|
||||||
|
/// a fast disk, CPU is the bottleneck at time of writing.
|
||||||
|
fn criterion_benchmark(c: &mut Criterion) {
|
||||||
|
let temp_dir_parent: Utf8PathBuf = env::current_dir().unwrap().try_into().unwrap();
|
||||||
|
let temp_dir = camino_tempfile::tempdir_in(temp_dir_parent).unwrap();
|
||||||
|
eprintln!("Data directory: {}", temp_dir.path());
|
||||||
|
|
||||||
|
let conf: &'static PageServerConf = Box::leak(Box::new(
|
||||||
|
pageserver::config::PageServerConf::dummy_conf(temp_dir.path().to_path_buf()),
|
||||||
|
));
|
||||||
|
virtual_file::init(16384, virtual_file::io_engine_for_bench());
|
||||||
|
page_cache::init(conf.page_cache_size);
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut group = c.benchmark_group("ingest-small-values");
|
||||||
|
let put_size = 100usize;
|
||||||
|
let put_count = 128 * 1024 * 1024 / put_size;
|
||||||
|
group.throughput(criterion::Throughput::Bytes((put_size * put_count) as u64));
|
||||||
|
group.sample_size(10);
|
||||||
|
group.bench_function("ingest 128MB/100b seq", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
ingest_main(
|
||||||
|
conf,
|
||||||
|
put_size,
|
||||||
|
put_count,
|
||||||
|
KeyLayout::Sequential,
|
||||||
|
WriteDelta::Yes,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
});
|
||||||
|
group.bench_function("ingest 128MB/100b rand", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
ingest_main(
|
||||||
|
conf,
|
||||||
|
put_size,
|
||||||
|
put_count,
|
||||||
|
KeyLayout::Random,
|
||||||
|
WriteDelta::Yes,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
});
|
||||||
|
group.bench_function("ingest 128MB/100b rand-1024keys", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
ingest_main(
|
||||||
|
conf,
|
||||||
|
put_size,
|
||||||
|
put_count,
|
||||||
|
KeyLayout::RandomReuse(0x3ff),
|
||||||
|
WriteDelta::Yes,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
});
|
||||||
|
group.bench_function("ingest 128MB/100b seq, no delta", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
ingest_main(
|
||||||
|
conf,
|
||||||
|
put_size,
|
||||||
|
put_count,
|
||||||
|
KeyLayout::Sequential,
|
||||||
|
WriteDelta::No,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut group = c.benchmark_group("ingest-big-values");
|
||||||
|
let put_size = 8192usize;
|
||||||
|
let put_count = 128 * 1024 * 1024 / put_size;
|
||||||
|
group.throughput(criterion::Throughput::Bytes((put_size * put_count) as u64));
|
||||||
|
group.sample_size(10);
|
||||||
|
group.bench_function("ingest 128MB/8k seq", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
ingest_main(
|
||||||
|
conf,
|
||||||
|
put_size,
|
||||||
|
put_count,
|
||||||
|
KeyLayout::Sequential,
|
||||||
|
WriteDelta::Yes,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
});
|
||||||
|
group.bench_function("ingest 128MB/8k seq, no delta", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
ingest_main(
|
||||||
|
conf,
|
||||||
|
put_size,
|
||||||
|
put_count,
|
||||||
|
KeyLayout::Sequential,
|
||||||
|
WriteDelta::No,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, criterion_benchmark);
|
||||||
|
criterion_main!(benches);
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
use criterion::measurement::WallTime;
|
||||||
use pageserver::keyspace::{KeyPartitioning, KeySpace};
|
use pageserver::keyspace::{KeyPartitioning, KeySpace};
|
||||||
use pageserver::repository::Key;
|
use pageserver::repository::Key;
|
||||||
use pageserver::tenant::layer_map::LayerMap;
|
use pageserver::tenant::layer_map::LayerMap;
|
||||||
@@ -15,7 +16,11 @@ use utils::id::{TenantId, TimelineId};
|
|||||||
|
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
use criterion::{black_box, criterion_group, criterion_main, BenchmarkGroup, Criterion};
|
||||||
|
|
||||||
|
fn fixture_path(relative: &str) -> PathBuf {
|
||||||
|
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(relative)
|
||||||
|
}
|
||||||
|
|
||||||
fn build_layer_map(filename_dump: PathBuf) -> LayerMap {
|
fn build_layer_map(filename_dump: PathBuf) -> LayerMap {
|
||||||
let mut layer_map = LayerMap::default();
|
let mut layer_map = LayerMap::default();
|
||||||
@@ -109,7 +114,7 @@ fn uniform_key_partitioning(layer_map: &LayerMap, _lsn: Lsn) -> KeyPartitioning
|
|||||||
// between each test run.
|
// between each test run.
|
||||||
fn bench_from_captest_env(c: &mut Criterion) {
|
fn bench_from_captest_env(c: &mut Criterion) {
|
||||||
// TODO consider compressing this file
|
// TODO consider compressing this file
|
||||||
let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt"));
|
let layer_map = build_layer_map(fixture_path("benches/odd-brook-layernames.txt"));
|
||||||
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map);
|
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map);
|
||||||
|
|
||||||
// Test with uniform query pattern
|
// Test with uniform query pattern
|
||||||
@@ -139,7 +144,7 @@ fn bench_from_captest_env(c: &mut Criterion) {
|
|||||||
fn bench_from_real_project(c: &mut Criterion) {
|
fn bench_from_real_project(c: &mut Criterion) {
|
||||||
// Init layer map
|
// Init layer map
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt"));
|
let layer_map = build_layer_map(fixture_path("benches/odd-brook-layernames.txt"));
|
||||||
println!("Finished layer map init in {:?}", now.elapsed());
|
println!("Finished layer map init in {:?}", now.elapsed());
|
||||||
|
|
||||||
// Choose uniformly distributed queries
|
// Choose uniformly distributed queries
|
||||||
@@ -242,7 +247,72 @@ fn bench_sequential(c: &mut Criterion) {
|
|||||||
group.finish();
|
group.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn bench_visibility_with_map(
|
||||||
|
group: &mut BenchmarkGroup<WallTime>,
|
||||||
|
layer_map: LayerMap,
|
||||||
|
read_points: Vec<Lsn>,
|
||||||
|
bench_name: &str,
|
||||||
|
) {
|
||||||
|
group.bench_function(bench_name, |b| {
|
||||||
|
b.iter(|| black_box(layer_map.get_visibility(read_points.clone())));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Benchmark using synthetic data. Arrange image layers on stacked diagonal lines.
|
||||||
|
fn bench_visibility(c: &mut Criterion) {
|
||||||
|
let mut group = c.benchmark_group("visibility");
|
||||||
|
{
|
||||||
|
// Init layer map. Create 100_000 layers arranged in 1000 diagonal lines.
|
||||||
|
let now = Instant::now();
|
||||||
|
let mut layer_map = LayerMap::default();
|
||||||
|
let mut updates = layer_map.batch_update();
|
||||||
|
for i in 0..100_000 {
|
||||||
|
let i32 = (i as u32) % 100;
|
||||||
|
let zero = Key::from_hex("000000000000000000000000000000000000").unwrap();
|
||||||
|
let layer = PersistentLayerDesc::new_img(
|
||||||
|
TenantShardId::unsharded(TenantId::generate()),
|
||||||
|
TimelineId::generate(),
|
||||||
|
zero.add(10 * i32)..zero.add(10 * i32 + 1),
|
||||||
|
Lsn(i),
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
updates.insert_historic(layer);
|
||||||
|
}
|
||||||
|
updates.flush();
|
||||||
|
println!("Finished layer map init in {:?}", now.elapsed());
|
||||||
|
|
||||||
|
let mut read_points = Vec::new();
|
||||||
|
for i in (0..100_000).step_by(1000) {
|
||||||
|
read_points.push(Lsn(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
bench_visibility_with_map(&mut group, layer_map, read_points, "sequential");
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let layer_map = build_layer_map(fixture_path("benches/odd-brook-layernames.txt"));
|
||||||
|
let read_points = vec![Lsn(0x1C760FA190)];
|
||||||
|
bench_visibility_with_map(&mut group, layer_map, read_points, "real_map");
|
||||||
|
|
||||||
|
let layer_map = build_layer_map(fixture_path("benches/odd-brook-layernames.txt"));
|
||||||
|
let read_points = vec![
|
||||||
|
Lsn(0x1C760FA190),
|
||||||
|
Lsn(0x000000931BEAD539),
|
||||||
|
Lsn(0x000000931BF63011),
|
||||||
|
Lsn(0x000000931B33AE68),
|
||||||
|
Lsn(0x00000038E67ABFA0),
|
||||||
|
Lsn(0x000000931B33AE68),
|
||||||
|
Lsn(0x000000914E3F38F0),
|
||||||
|
Lsn(0x000000931B33AE68),
|
||||||
|
];
|
||||||
|
bench_visibility_with_map(&mut group, layer_map, read_points, "real_map_many_branches");
|
||||||
|
}
|
||||||
|
|
||||||
|
group.finish();
|
||||||
|
}
|
||||||
|
|
||||||
criterion_group!(group_1, bench_from_captest_env);
|
criterion_group!(group_1, bench_from_captest_env);
|
||||||
criterion_group!(group_2, bench_from_real_project);
|
criterion_group!(group_2, bench_from_real_project);
|
||||||
criterion_group!(group_3, bench_sequential);
|
criterion_group!(group_3, bench_sequential);
|
||||||
criterion_main!(group_1, group_2, group_3);
|
criterion_group!(group_4, bench_visibility);
|
||||||
|
criterion_main!(group_1, group_2, group_3, group_4);
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
|
use detach_ancestor::AncestorDetached;
|
||||||
use pageserver_api::{models::*, shard::TenantShardId};
|
use pageserver_api::{models::*, shard::TenantShardId};
|
||||||
use reqwest::{IntoUrl, Method, StatusCode};
|
use reqwest::{IntoUrl, Method, StatusCode};
|
||||||
use utils::{
|
use utils::{
|
||||||
@@ -418,6 +419,23 @@ impl Client {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn timeline_detach_ancestor(
|
||||||
|
&self,
|
||||||
|
tenant_shard_id: TenantShardId,
|
||||||
|
timeline_id: TimelineId,
|
||||||
|
) -> Result<AncestorDetached> {
|
||||||
|
let uri = format!(
|
||||||
|
"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/detach_ancestor",
|
||||||
|
self.mgmt_api_endpoint
|
||||||
|
);
|
||||||
|
|
||||||
|
self.request(Method::PUT, &uri, ())
|
||||||
|
.await?
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(Error::ReceiveBody)
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {
|
pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {
|
||||||
let uri = format!(
|
let uri = format!(
|
||||||
"{}/v1/tenant/{}/reset",
|
"{}/v1/tenant/{}/reset",
|
||||||
|
|||||||
@@ -131,7 +131,7 @@ impl CompactionKey for Key {
|
|||||||
pub type CompactionKeySpace<K> = Vec<Range<K>>;
|
pub type CompactionKeySpace<K> = Vec<Range<K>>;
|
||||||
|
|
||||||
/// Functions needed from all layers.
|
/// Functions needed from all layers.
|
||||||
pub trait CompactionLayer<K: CompactionKey + ?Sized> {
|
pub trait CompactionLayer<K: CompactionKey> {
|
||||||
fn key_range(&self) -> &Range<K>;
|
fn key_range(&self) -> &Range<K>;
|
||||||
fn lsn_range(&self) -> &Range<Lsn>;
|
fn lsn_range(&self) -> &Range<Lsn>;
|
||||||
|
|
||||||
|
|||||||
@@ -179,7 +179,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
.get("remote_storage")
|
.get("remote_storage")
|
||||||
.expect("need remote_storage");
|
.expect("need remote_storage");
|
||||||
let config = RemoteStorageConfig::from_toml(toml_item)?;
|
let config = RemoteStorageConfig::from_toml(toml_item)?;
|
||||||
let storage = remote_storage::GenericRemoteStorage::from_config(&config);
|
let storage = remote_storage::GenericRemoteStorage::from_config(&config).await;
|
||||||
let cancel = CancellationToken::new();
|
let cancel = CancellationToken::new();
|
||||||
storage
|
storage
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|||||||
@@ -14,12 +14,14 @@ pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<
|
|||||||
}
|
}
|
||||||
(Scope::PageServerApi, None) => Ok(()), // access to management api for PageServerApi scope
|
(Scope::PageServerApi, None) => Ok(()), // access to management api for PageServerApi scope
|
||||||
(Scope::PageServerApi, Some(_)) => Ok(()), // access to tenant api using PageServerApi scope
|
(Scope::PageServerApi, Some(_)) => Ok(()), // access to tenant api using PageServerApi scope
|
||||||
(Scope::Admin | Scope::SafekeeperData | Scope::GenerationsApi, _) => Err(AuthError(
|
(Scope::Admin | Scope::SafekeeperData | Scope::GenerationsApi | Scope::Scrubber, _) => {
|
||||||
format!(
|
Err(AuthError(
|
||||||
"JWT scope '{:?}' is ineligible for Pageserver auth",
|
format!(
|
||||||
claims.scope
|
"JWT scope '{:?}' is ineligible for Pageserver auth",
|
||||||
)
|
claims.scope
|
||||||
.into(),
|
)
|
||||||
)),
|
.into(),
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,35 +2,36 @@
|
|||||||
|
|
||||||
//! Main entry point for the Page Server executable.
|
//! Main entry point for the Page Server executable.
|
||||||
|
|
||||||
|
use std::env;
|
||||||
use std::env::{var, VarError};
|
use std::env::{var, VarError};
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use std::{env, ops::ControlFlow, str::FromStr};
|
|
||||||
|
|
||||||
use anyhow::{anyhow, Context};
|
use anyhow::{anyhow, Context};
|
||||||
use camino::Utf8Path;
|
use camino::Utf8Path;
|
||||||
use clap::{Arg, ArgAction, Command};
|
use clap::{Arg, ArgAction, Command};
|
||||||
|
|
||||||
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
|
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
|
||||||
|
use pageserver::config::PageserverIdentity;
|
||||||
use pageserver::control_plane_client::ControlPlaneClient;
|
use pageserver::control_plane_client::ControlPlaneClient;
|
||||||
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
|
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
|
||||||
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
|
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
|
||||||
use pageserver::task_mgr::WALRECEIVER_RUNTIME;
|
use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME};
|
||||||
use pageserver::tenant::{secondary, TenantSharedResources};
|
use pageserver::tenant::{secondary, TenantSharedResources};
|
||||||
|
use pageserver::{CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener};
|
||||||
use remote_storage::GenericRemoteStorage;
|
use remote_storage::GenericRemoteStorage;
|
||||||
use tokio::signal::unix::SignalKind;
|
use tokio::signal::unix::SignalKind;
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
|
||||||
use metrics::set_build_info_metric;
|
use metrics::set_build_info_metric;
|
||||||
use pageserver::{
|
use pageserver::{
|
||||||
config::{defaults::*, PageServerConf},
|
config::PageServerConf,
|
||||||
context::{DownloadBehavior, RequestContext},
|
|
||||||
deletion_queue::DeletionQueue,
|
deletion_queue::DeletionQueue,
|
||||||
http, page_cache, page_service, task_mgr,
|
http, page_cache, page_service, task_mgr,
|
||||||
task_mgr::TaskKind,
|
task_mgr::{BACKGROUND_RUNTIME, MGMT_REQUEST_RUNTIME},
|
||||||
task_mgr::{BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME},
|
|
||||||
tenant::mgr,
|
tenant::mgr,
|
||||||
virtual_file,
|
virtual_file,
|
||||||
};
|
};
|
||||||
@@ -84,18 +85,13 @@ fn main() -> anyhow::Result<()> {
|
|||||||
.with_context(|| format!("Error opening workdir '{workdir}'"))?;
|
.with_context(|| format!("Error opening workdir '{workdir}'"))?;
|
||||||
|
|
||||||
let cfg_file_path = workdir.join("pageserver.toml");
|
let cfg_file_path = workdir.join("pageserver.toml");
|
||||||
|
let identity_file_path = workdir.join("identity.toml");
|
||||||
|
|
||||||
// Set CWD to workdir for non-daemon modes
|
// Set CWD to workdir for non-daemon modes
|
||||||
env::set_current_dir(&workdir)
|
env::set_current_dir(&workdir)
|
||||||
.with_context(|| format!("Failed to set application's current dir to '{workdir}'"))?;
|
.with_context(|| format!("Failed to set application's current dir to '{workdir}'"))?;
|
||||||
|
|
||||||
let conf = match initialize_config(&cfg_file_path, arg_matches, &workdir)? {
|
let conf = initialize_config(&identity_file_path, &cfg_file_path, &workdir)?;
|
||||||
ControlFlow::Continue(conf) => conf,
|
|
||||||
ControlFlow::Break(()) => {
|
|
||||||
info!("Pageserver config init successful");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Initialize logging.
|
// Initialize logging.
|
||||||
//
|
//
|
||||||
@@ -127,8 +123,10 @@ fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
// after setting up logging, log the effective IO engine choice and read path implementations
|
// after setting up logging, log the effective IO engine choice and read path implementations
|
||||||
info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
|
info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
|
||||||
|
info!(?conf.virtual_file_direct_io, "starting with virtual_file Direct IO settings");
|
||||||
info!(?conf.get_impl, "starting with get page implementation");
|
info!(?conf.get_impl, "starting with get page implementation");
|
||||||
info!(?conf.get_vectored_impl, "starting with vectored get page implementation");
|
info!(?conf.get_vectored_impl, "starting with vectored get page implementation");
|
||||||
|
info!(?conf.compact_level0_phase1_value_access, "starting with setting for compact_level0_phase1_value_access");
|
||||||
|
|
||||||
let tenants_path = conf.tenants_path();
|
let tenants_path = conf.tenants_path();
|
||||||
if !tenants_path.exists() {
|
if !tenants_path.exists() {
|
||||||
@@ -150,70 +148,55 @@ fn main() -> anyhow::Result<()> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn initialize_config(
|
fn initialize_config(
|
||||||
|
identity_file_path: &Utf8Path,
|
||||||
cfg_file_path: &Utf8Path,
|
cfg_file_path: &Utf8Path,
|
||||||
arg_matches: clap::ArgMatches,
|
|
||||||
workdir: &Utf8Path,
|
workdir: &Utf8Path,
|
||||||
) -> anyhow::Result<ControlFlow<(), &'static PageServerConf>> {
|
) -> anyhow::Result<&'static PageServerConf> {
|
||||||
let init = arg_matches.get_flag("init");
|
// The deployment orchestrator writes out an indentity file containing the node id
|
||||||
|
// for all pageservers. This file is the source of truth for the node id. In order
|
||||||
let file_contents: Option<toml_edit::Document> = match std::fs::File::open(cfg_file_path) {
|
// to allow for rolling back pageserver releases, the node id is also included in
|
||||||
|
// the pageserver config that the deployment orchestrator writes to disk for the pageserver.
|
||||||
|
// A rolled back version of the pageserver will get the node id from the pageserver.toml
|
||||||
|
// config file.
|
||||||
|
let identity = match std::fs::File::open(identity_file_path) {
|
||||||
Ok(mut f) => {
|
Ok(mut f) => {
|
||||||
if init {
|
let md = f.metadata().context("stat config file")?;
|
||||||
anyhow::bail!("config file already exists: {cfg_file_path}");
|
if !md.is_file() {
|
||||||
|
anyhow::bail!("Pageserver found identity file but it is a dir entry: {identity_file_path}. Aborting start up ...");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut s = String::new();
|
||||||
|
f.read_to_string(&mut s).context("read identity file")?;
|
||||||
|
toml_edit::de::from_str::<PageserverIdentity>(&s)?
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
anyhow::bail!("Pageserver could not read identity file: {identity_file_path}: {e}. Aborting start up ...");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let config: toml_edit::Document = match std::fs::File::open(cfg_file_path) {
|
||||||
|
Ok(mut f) => {
|
||||||
let md = f.metadata().context("stat config file")?;
|
let md = f.metadata().context("stat config file")?;
|
||||||
if md.is_file() {
|
if md.is_file() {
|
||||||
let mut s = String::new();
|
let mut s = String::new();
|
||||||
f.read_to_string(&mut s).context("read config file")?;
|
f.read_to_string(&mut s).context("read config file")?;
|
||||||
Some(s.parse().context("parse config file toml")?)
|
s.parse().context("parse config file toml")?
|
||||||
} else {
|
} else {
|
||||||
anyhow::bail!("directory entry exists but is not a file: {cfg_file_path}");
|
anyhow::bail!("directory entry exists but is not a file: {cfg_file_path}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
|
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
anyhow::bail!("open pageserver config: {e}: {cfg_file_path}");
|
anyhow::bail!("open pageserver config: {e}: {cfg_file_path}");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut effective_config = file_contents.unwrap_or_else(|| {
|
debug!("Using pageserver toml: {config}");
|
||||||
DEFAULT_CONFIG_FILE
|
|
||||||
.parse()
|
|
||||||
.expect("unit tests ensure this works")
|
|
||||||
});
|
|
||||||
|
|
||||||
// Patch with overrides from the command line
|
|
||||||
if let Some(values) = arg_matches.get_many::<String>("config-override") {
|
|
||||||
for option_line in values {
|
|
||||||
let doc = toml_edit::Document::from_str(option_line).with_context(|| {
|
|
||||||
format!("Option '{option_line}' could not be parsed as a toml document")
|
|
||||||
})?;
|
|
||||||
|
|
||||||
for (key, item) in doc.iter() {
|
|
||||||
effective_config.insert(key, item.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!("Resulting toml: {effective_config}");
|
|
||||||
|
|
||||||
// Construct the runtime representation
|
// Construct the runtime representation
|
||||||
let conf = PageServerConf::parse_and_validate(&effective_config, workdir)
|
let conf = PageServerConf::parse_and_validate(identity.id, &config, workdir)
|
||||||
.context("Failed to parse pageserver configuration")?;
|
.context("Failed to parse pageserver configuration")?;
|
||||||
|
|
||||||
if init {
|
Ok(Box::leak(Box::new(conf)))
|
||||||
info!("Writing pageserver config to '{cfg_file_path}'");
|
|
||||||
|
|
||||||
std::fs::write(cfg_file_path, effective_config.to_string())
|
|
||||||
.with_context(|| format!("Failed to write pageserver config to '{cfg_file_path}'"))?;
|
|
||||||
info!("Config successfully written to '{cfg_file_path}'")
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(if init {
|
|
||||||
ControlFlow::Break(())
|
|
||||||
} else {
|
|
||||||
ControlFlow::Continue(Box::leak(Box::new(conf)))
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct WaitForPhaseResult<F: std::future::Future + Unpin> {
|
struct WaitForPhaseResult<F: std::future::Future + Unpin> {
|
||||||
@@ -305,6 +288,7 @@ fn start_pageserver(
|
|||||||
// Create and lock PID file. This ensures that there cannot be more than one
|
// Create and lock PID file. This ensures that there cannot be more than one
|
||||||
// pageserver process running at the same time.
|
// pageserver process running at the same time.
|
||||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||||
|
info!("Claiming pid file at {lock_file_path:?}...");
|
||||||
let lock_file =
|
let lock_file =
|
||||||
utils::pid_file::claim_for_current_process(&lock_file_path).context("claim pid file")?;
|
utils::pid_file::claim_for_current_process(&lock_file_path).context("claim pid file")?;
|
||||||
info!("Claimed pid file at {lock_file_path:?}");
|
info!("Claimed pid file at {lock_file_path:?}");
|
||||||
@@ -385,7 +369,7 @@ fn start_pageserver(
|
|||||||
let shutdown_pageserver = tokio_util::sync::CancellationToken::new();
|
let shutdown_pageserver = tokio_util::sync::CancellationToken::new();
|
||||||
|
|
||||||
// Set up remote storage client
|
// Set up remote storage client
|
||||||
let remote_storage = create_remote_storage_client(conf)?;
|
let remote_storage = BACKGROUND_RUNTIME.block_on(create_remote_storage_client(conf))?;
|
||||||
|
|
||||||
// Set up deletion queue
|
// Set up deletion queue
|
||||||
let (deletion_queue, deletion_workers) = DeletionQueue::new(
|
let (deletion_queue, deletion_workers) = DeletionQueue::new(
|
||||||
@@ -430,8 +414,10 @@ fn start_pageserver(
|
|||||||
|
|
||||||
// Scan the local 'tenants/' directory and start loading the tenants
|
// Scan the local 'tenants/' directory and start loading the tenants
|
||||||
let deletion_queue_client = deletion_queue.new_client();
|
let deletion_queue_client = deletion_queue.new_client();
|
||||||
|
let background_purges = mgr::BackgroundPurges::default();
|
||||||
let tenant_manager = BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(
|
let tenant_manager = BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(
|
||||||
conf,
|
conf,
|
||||||
|
background_purges.clone(),
|
||||||
TenantSharedResources {
|
TenantSharedResources {
|
||||||
broker_client: broker_client.clone(),
|
broker_client: broker_client.clone(),
|
||||||
remote_storage: remote_storage.clone(),
|
remote_storage: remote_storage.clone(),
|
||||||
@@ -523,7 +509,7 @@ fn start_pageserver(
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
let secondary_controller = secondary::spawn_tasks(
|
let (secondary_controller, secondary_controller_tasks) = secondary::spawn_tasks(
|
||||||
tenant_manager.clone(),
|
tenant_manager.clone(),
|
||||||
remote_storage.clone(),
|
remote_storage.clone(),
|
||||||
background_jobs_barrier.clone(),
|
background_jobs_barrier.clone(),
|
||||||
@@ -536,18 +522,19 @@ fn start_pageserver(
|
|||||||
// been configured.
|
// been configured.
|
||||||
let disk_usage_eviction_state: Arc<disk_usage_eviction_task::State> = Arc::default();
|
let disk_usage_eviction_state: Arc<disk_usage_eviction_task::State> = Arc::default();
|
||||||
|
|
||||||
launch_disk_usage_global_eviction_task(
|
let disk_usage_eviction_task = launch_disk_usage_global_eviction_task(
|
||||||
conf,
|
conf,
|
||||||
remote_storage.clone(),
|
remote_storage.clone(),
|
||||||
disk_usage_eviction_state.clone(),
|
disk_usage_eviction_state.clone(),
|
||||||
tenant_manager.clone(),
|
tenant_manager.clone(),
|
||||||
background_jobs_barrier.clone(),
|
background_jobs_barrier.clone(),
|
||||||
)?;
|
);
|
||||||
|
|
||||||
// Start up the service to handle HTTP mgmt API request. We created the
|
// Start up the service to handle HTTP mgmt API request. We created the
|
||||||
// listener earlier already.
|
// listener earlier already.
|
||||||
{
|
let http_endpoint_listener = {
|
||||||
let _rt_guard = MGMT_REQUEST_RUNTIME.enter();
|
let _rt_guard = MGMT_REQUEST_RUNTIME.enter(); // for hyper
|
||||||
|
let cancel = CancellationToken::new();
|
||||||
|
|
||||||
let router_state = Arc::new(
|
let router_state = Arc::new(
|
||||||
http::routes::State::new(
|
http::routes::State::new(
|
||||||
@@ -568,109 +555,49 @@ fn start_pageserver(
|
|||||||
let service = utils::http::RouterService::new(router).unwrap();
|
let service = utils::http::RouterService::new(router).unwrap();
|
||||||
let server = hyper::Server::from_tcp(http_listener)?
|
let server = hyper::Server::from_tcp(http_listener)?
|
||||||
.serve(service)
|
.serve(service)
|
||||||
.with_graceful_shutdown(task_mgr::shutdown_watcher());
|
.with_graceful_shutdown({
|
||||||
|
let cancel = cancel.clone();
|
||||||
|
async move { cancel.clone().cancelled().await }
|
||||||
|
});
|
||||||
|
|
||||||
task_mgr::spawn(
|
let task = MGMT_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
|
||||||
MGMT_REQUEST_RUNTIME.handle(),
|
|
||||||
TaskKind::HttpEndpointListener,
|
|
||||||
None,
|
|
||||||
None,
|
|
||||||
"http endpoint listener",
|
"http endpoint listener",
|
||||||
true,
|
server,
|
||||||
async {
|
));
|
||||||
server.await?;
|
HttpEndpointListener(CancellableTask { task, cancel })
|
||||||
Ok(())
|
};
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
|
let consumption_metrics_tasks = {
|
||||||
let metrics_ctx = RequestContext::todo_child(
|
let cancel = shutdown_pageserver.child_token();
|
||||||
TaskKind::MetricsCollection,
|
let task = crate::BACKGROUND_RUNTIME.spawn({
|
||||||
// This task itself shouldn't download anything.
|
let tenant_manager = tenant_manager.clone();
|
||||||
// The actual size calculation does need downloads, and
|
let cancel = cancel.clone();
|
||||||
// creates a child context with the right DownloadBehavior.
|
async move {
|
||||||
DownloadBehavior::Error,
|
// first wait until background jobs are cleared to launch.
|
||||||
);
|
//
|
||||||
|
// this is because we only process active tenants and timelines, and the
|
||||||
|
// Timeline::get_current_logical_size will spawn the logical size calculation,
|
||||||
|
// which will not be rate-limited.
|
||||||
|
tokio::select! {
|
||||||
|
_ = cancel.cancelled() => { return; },
|
||||||
|
_ = background_jobs_barrier.wait() => {}
|
||||||
|
};
|
||||||
|
|
||||||
let local_disk_storage = conf.workdir.join("last_consumption_metrics.json");
|
pageserver::consumption_metrics::run(conf, tenant_manager, cancel).await;
|
||||||
|
}
|
||||||
task_mgr::spawn(
|
});
|
||||||
crate::BACKGROUND_RUNTIME.handle(),
|
ConsumptionMetricsTasks(CancellableTask { task, cancel })
|
||||||
TaskKind::MetricsCollection,
|
};
|
||||||
None,
|
|
||||||
None,
|
|
||||||
"consumption metrics collection",
|
|
||||||
true,
|
|
||||||
{
|
|
||||||
let tenant_manager = tenant_manager.clone();
|
|
||||||
async move {
|
|
||||||
// first wait until background jobs are cleared to launch.
|
|
||||||
//
|
|
||||||
// this is because we only process active tenants and timelines, and the
|
|
||||||
// Timeline::get_current_logical_size will spawn the logical size calculation,
|
|
||||||
// which will not be rate-limited.
|
|
||||||
let cancel = task_mgr::shutdown_token();
|
|
||||||
|
|
||||||
tokio::select! {
|
|
||||||
_ = cancel.cancelled() => { return Ok(()); },
|
|
||||||
_ = background_jobs_barrier.wait() => {}
|
|
||||||
};
|
|
||||||
|
|
||||||
pageserver::consumption_metrics::collect_metrics(
|
|
||||||
tenant_manager,
|
|
||||||
metric_collection_endpoint,
|
|
||||||
&conf.metric_collection_bucket,
|
|
||||||
conf.metric_collection_interval,
|
|
||||||
conf.cached_metric_collection_interval,
|
|
||||||
conf.synthetic_size_calculation_interval,
|
|
||||||
conf.id,
|
|
||||||
local_disk_storage,
|
|
||||||
cancel,
|
|
||||||
metrics_ctx,
|
|
||||||
)
|
|
||||||
.instrument(info_span!("metrics_collection"))
|
|
||||||
.await?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Spawn a task to listen for libpq connections. It will spawn further tasks
|
// Spawn a task to listen for libpq connections. It will spawn further tasks
|
||||||
// for each connection. We created the listener earlier already.
|
// for each connection. We created the listener earlier already.
|
||||||
{
|
let page_service = page_service::spawn(conf, tenant_manager.clone(), pg_auth, {
|
||||||
let libpq_ctx = RequestContext::todo_child(
|
let _entered = COMPUTE_REQUEST_RUNTIME.enter(); // TcpListener::from_std requires it
|
||||||
TaskKind::LibpqEndpointListener,
|
pageserver_listener
|
||||||
// listener task shouldn't need to download anything. (We will
|
.set_nonblocking(true)
|
||||||
// create a separate sub-contexts for each connection, with their
|
.context("set listener to nonblocking")?;
|
||||||
// own download behavior. This context is used only to listen and
|
tokio::net::TcpListener::from_std(pageserver_listener).context("create tokio listener")?
|
||||||
// accept connections.)
|
});
|
||||||
DownloadBehavior::Error,
|
|
||||||
);
|
|
||||||
task_mgr::spawn(
|
|
||||||
COMPUTE_REQUEST_RUNTIME.handle(),
|
|
||||||
TaskKind::LibpqEndpointListener,
|
|
||||||
None,
|
|
||||||
None,
|
|
||||||
"libpq endpoint listener",
|
|
||||||
true,
|
|
||||||
{
|
|
||||||
let tenant_manager = tenant_manager.clone();
|
|
||||||
async move {
|
|
||||||
page_service::libpq_listener_main(
|
|
||||||
tenant_manager,
|
|
||||||
pg_auth,
|
|
||||||
pageserver_listener,
|
|
||||||
conf.pg_auth_type,
|
|
||||||
libpq_ctx,
|
|
||||||
task_mgr::shutdown_token(),
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut shutdown_pageserver = Some(shutdown_pageserver.drop_guard());
|
let mut shutdown_pageserver = Some(shutdown_pageserver.drop_guard());
|
||||||
|
|
||||||
@@ -696,13 +623,24 @@ fn start_pageserver(
|
|||||||
// Right now that tree doesn't reach very far, and `task_mgr` is used instead.
|
// Right now that tree doesn't reach very far, and `task_mgr` is used instead.
|
||||||
// The plan is to change that over time.
|
// The plan is to change that over time.
|
||||||
shutdown_pageserver.take();
|
shutdown_pageserver.take();
|
||||||
pageserver::shutdown_pageserver(&tenant_manager, deletion_queue.clone(), 0).await;
|
pageserver::shutdown_pageserver(
|
||||||
|
http_endpoint_listener,
|
||||||
|
page_service,
|
||||||
|
consumption_metrics_tasks,
|
||||||
|
disk_usage_eviction_task,
|
||||||
|
&tenant_manager,
|
||||||
|
background_purges,
|
||||||
|
deletion_queue.clone(),
|
||||||
|
secondary_controller_tasks,
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
unreachable!()
|
unreachable!()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_remote_storage_client(
|
async fn create_remote_storage_client(
|
||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
) -> anyhow::Result<GenericRemoteStorage> {
|
) -> anyhow::Result<GenericRemoteStorage> {
|
||||||
let config = if let Some(config) = &conf.remote_storage_config {
|
let config = if let Some(config) = &conf.remote_storage_config {
|
||||||
@@ -712,7 +650,7 @@ fn create_remote_storage_client(
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Create the client
|
// Create the client
|
||||||
let mut remote_storage = GenericRemoteStorage::from_config(config)?;
|
let mut remote_storage = GenericRemoteStorage::from_config(config).await?;
|
||||||
|
|
||||||
// If `test_remote_failures` is non-zero, wrap the client with a
|
// If `test_remote_failures` is non-zero, wrap the client with a
|
||||||
// wrapper that simulates failures.
|
// wrapper that simulates failures.
|
||||||
@@ -735,28 +673,12 @@ fn cli() -> Command {
|
|||||||
Command::new("Neon page server")
|
Command::new("Neon page server")
|
||||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||||
.version(version())
|
.version(version())
|
||||||
.arg(
|
|
||||||
Arg::new("init")
|
|
||||||
.long("init")
|
|
||||||
.action(ArgAction::SetTrue)
|
|
||||||
.help("Initialize pageserver with all given config overrides"),
|
|
||||||
)
|
|
||||||
.arg(
|
.arg(
|
||||||
Arg::new("workdir")
|
Arg::new("workdir")
|
||||||
.short('D')
|
.short('D')
|
||||||
.long("workdir")
|
.long("workdir")
|
||||||
.help("Working directory for the pageserver"),
|
.help("Working directory for the pageserver"),
|
||||||
)
|
)
|
||||||
// See `settings.md` for more details on the extra configuration patameters pageserver can process
|
|
||||||
.arg(
|
|
||||||
Arg::new("config-override")
|
|
||||||
.long("config-override")
|
|
||||||
.short('c')
|
|
||||||
.num_args(1)
|
|
||||||
.action(ArgAction::Append)
|
|
||||||
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there). \
|
|
||||||
Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
|
|
||||||
)
|
|
||||||
.arg(
|
.arg(
|
||||||
Arg::new("enabled-features")
|
Arg::new("enabled-features")
|
||||||
.long("enabled-features")
|
.long("enabled-features")
|
||||||
|
|||||||
@@ -7,12 +7,11 @@
|
|||||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||||
use pageserver_api::{models::ImageCompressionAlgorithm, shard::TenantShardId};
|
use pageserver_api::{models::ImageCompressionAlgorithm, shard::TenantShardId};
|
||||||
use remote_storage::{RemotePath, RemoteStorageConfig};
|
use remote_storage::{RemotePath, RemoteStorageConfig};
|
||||||
use serde;
|
|
||||||
use serde::de::IntoDeserializer;
|
use serde::de::IntoDeserializer;
|
||||||
|
use serde::{self, Deserialize};
|
||||||
use std::env;
|
use std::env;
|
||||||
use storage_broker::Uri;
|
use storage_broker::Uri;
|
||||||
use utils::crashsafe::path_with_suffix_extension;
|
use utils::crashsafe::path_with_suffix_extension;
|
||||||
use utils::id::ConnectionId;
|
|
||||||
use utils::logging::SecretString;
|
use utils::logging::SecretString;
|
||||||
|
|
||||||
use once_cell::sync::OnceCell;
|
use once_cell::sync::OnceCell;
|
||||||
@@ -30,6 +29,7 @@ use utils::{
|
|||||||
logging::LogFormat,
|
logging::LogFormat,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use crate::tenant::timeline::compaction::CompactL0Phase1ValueAccess;
|
||||||
use crate::tenant::vectored_blob_io::MaxVectoredReadBytes;
|
use crate::tenant::vectored_blob_io::MaxVectoredReadBytes;
|
||||||
use crate::tenant::{config::TenantConfOpt, timeline::GetImpl};
|
use crate::tenant::{config::TenantConfOpt, timeline::GetImpl};
|
||||||
use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
|
use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
|
||||||
@@ -53,7 +53,7 @@ pub mod defaults {
|
|||||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||||
pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
|
pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
|
||||||
|
|
||||||
pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "60 s";
|
pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
|
||||||
pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
|
pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
|
||||||
|
|
||||||
pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
|
pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
|
||||||
@@ -69,7 +69,6 @@ pub mod defaults {
|
|||||||
super::ConfigurableSemaphore::DEFAULT_INITIAL.get();
|
super::ConfigurableSemaphore::DEFAULT_INITIAL.get();
|
||||||
|
|
||||||
pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
|
pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
|
||||||
pub const DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL: &str = "0s";
|
|
||||||
pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
|
pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
|
||||||
pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
|
pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
|
||||||
pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
|
pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
|
||||||
@@ -85,16 +84,16 @@ pub mod defaults {
|
|||||||
#[cfg(not(target_os = "linux"))]
|
#[cfg(not(target_os = "linux"))]
|
||||||
pub const DEFAULT_VIRTUAL_FILE_IO_ENGINE: &str = "std-fs";
|
pub const DEFAULT_VIRTUAL_FILE_IO_ENGINE: &str = "std-fs";
|
||||||
|
|
||||||
pub const DEFAULT_GET_VECTORED_IMPL: &str = "sequential";
|
pub const DEFAULT_GET_VECTORED_IMPL: &str = "vectored";
|
||||||
|
|
||||||
pub const DEFAULT_GET_IMPL: &str = "legacy";
|
pub const DEFAULT_GET_IMPL: &str = "vectored";
|
||||||
|
|
||||||
pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 128 * 1024; // 128 KiB
|
pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 128 * 1024; // 128 KiB
|
||||||
|
|
||||||
pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
|
pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
|
||||||
ImageCompressionAlgorithm::Disabled;
|
ImageCompressionAlgorithm::Disabled;
|
||||||
|
|
||||||
pub const DEFAULT_VALIDATE_VECTORED_GET: bool = true;
|
pub const DEFAULT_VALIDATE_VECTORED_GET: bool = false;
|
||||||
|
|
||||||
pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
|
pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
|
||||||
|
|
||||||
@@ -124,7 +123,6 @@ pub mod defaults {
|
|||||||
#concurrent_tenant_warmup = '{DEFAULT_CONCURRENT_TENANT_WARMUP}'
|
#concurrent_tenant_warmup = '{DEFAULT_CONCURRENT_TENANT_WARMUP}'
|
||||||
|
|
||||||
#metric_collection_interval = '{DEFAULT_METRIC_COLLECTION_INTERVAL}'
|
#metric_collection_interval = '{DEFAULT_METRIC_COLLECTION_INTERVAL}'
|
||||||
#cached_metric_collection_interval = '{DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL}'
|
|
||||||
#synthetic_size_calculation_interval = '{DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL}'
|
#synthetic_size_calculation_interval = '{DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL}'
|
||||||
|
|
||||||
#disk_usage_based_eviction = {{ max_usage_pct = .., min_avail_bytes = .., period = "10s"}}
|
#disk_usage_based_eviction = {{ max_usage_pct = .., min_avail_bytes = .., period = "10s"}}
|
||||||
@@ -239,7 +237,6 @@ pub struct PageServerConf {
|
|||||||
// How often to collect metrics and send them to the metrics endpoint.
|
// How often to collect metrics and send them to the metrics endpoint.
|
||||||
pub metric_collection_interval: Duration,
|
pub metric_collection_interval: Duration,
|
||||||
// How often to send unchanged cached metrics to the metrics endpoint.
|
// How often to send unchanged cached metrics to the metrics endpoint.
|
||||||
pub cached_metric_collection_interval: Duration,
|
|
||||||
pub metric_collection_endpoint: Option<Url>,
|
pub metric_collection_endpoint: Option<Url>,
|
||||||
pub metric_collection_bucket: Option<RemoteStorageConfig>,
|
pub metric_collection_bucket: Option<RemoteStorageConfig>,
|
||||||
pub synthetic_size_calculation_interval: Duration,
|
pub synthetic_size_calculation_interval: Duration,
|
||||||
@@ -299,6 +296,13 @@ pub struct PageServerConf {
|
|||||||
pub ephemeral_bytes_per_memory_kb: usize,
|
pub ephemeral_bytes_per_memory_kb: usize,
|
||||||
|
|
||||||
pub l0_flush: L0FlushConfig,
|
pub l0_flush: L0FlushConfig,
|
||||||
|
|
||||||
|
/// This flag is temporary and will be removed after gradual rollout.
|
||||||
|
/// See <https://github.com/neondatabase/neon/issues/8184>.
|
||||||
|
pub compact_level0_phase1_value_access: CompactL0Phase1ValueAccess,
|
||||||
|
|
||||||
|
/// Direct IO settings
|
||||||
|
pub virtual_file_direct_io: virtual_file::DirectIoMode,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// We do not want to store this in a PageServerConf because the latter may be logged
|
/// We do not want to store this in a PageServerConf because the latter may be logged
|
||||||
@@ -360,8 +364,6 @@ struct PageServerConfigBuilder {
|
|||||||
auth_validation_public_key_path: BuilderValue<Option<Utf8PathBuf>>,
|
auth_validation_public_key_path: BuilderValue<Option<Utf8PathBuf>>,
|
||||||
remote_storage_config: BuilderValue<Option<RemoteStorageConfig>>,
|
remote_storage_config: BuilderValue<Option<RemoteStorageConfig>>,
|
||||||
|
|
||||||
id: BuilderValue<NodeId>,
|
|
||||||
|
|
||||||
broker_endpoint: BuilderValue<Uri>,
|
broker_endpoint: BuilderValue<Uri>,
|
||||||
broker_keepalive_interval: BuilderValue<Duration>,
|
broker_keepalive_interval: BuilderValue<Duration>,
|
||||||
|
|
||||||
@@ -371,7 +373,6 @@ struct PageServerConfigBuilder {
|
|||||||
concurrent_tenant_size_logical_size_queries: BuilderValue<NonZeroUsize>,
|
concurrent_tenant_size_logical_size_queries: BuilderValue<NonZeroUsize>,
|
||||||
|
|
||||||
metric_collection_interval: BuilderValue<Duration>,
|
metric_collection_interval: BuilderValue<Duration>,
|
||||||
cached_metric_collection_interval: BuilderValue<Duration>,
|
|
||||||
metric_collection_endpoint: BuilderValue<Option<Url>>,
|
metric_collection_endpoint: BuilderValue<Option<Url>>,
|
||||||
synthetic_size_calculation_interval: BuilderValue<Duration>,
|
synthetic_size_calculation_interval: BuilderValue<Duration>,
|
||||||
metric_collection_bucket: BuilderValue<Option<RemoteStorageConfig>>,
|
metric_collection_bucket: BuilderValue<Option<RemoteStorageConfig>>,
|
||||||
@@ -408,9 +409,17 @@ struct PageServerConfigBuilder {
|
|||||||
ephemeral_bytes_per_memory_kb: BuilderValue<usize>,
|
ephemeral_bytes_per_memory_kb: BuilderValue<usize>,
|
||||||
|
|
||||||
l0_flush: BuilderValue<L0FlushConfig>,
|
l0_flush: BuilderValue<L0FlushConfig>,
|
||||||
|
|
||||||
|
compact_level0_phase1_value_access: BuilderValue<CompactL0Phase1ValueAccess>,
|
||||||
|
|
||||||
|
virtual_file_direct_io: BuilderValue<virtual_file::DirectIoMode>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PageServerConfigBuilder {
|
impl PageServerConfigBuilder {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn default_values() -> Self {
|
fn default_values() -> Self {
|
||||||
use self::BuilderValue::*;
|
use self::BuilderValue::*;
|
||||||
@@ -436,7 +445,6 @@ impl PageServerConfigBuilder {
|
|||||||
pg_auth_type: Set(AuthType::Trust),
|
pg_auth_type: Set(AuthType::Trust),
|
||||||
auth_validation_public_key_path: Set(None),
|
auth_validation_public_key_path: Set(None),
|
||||||
remote_storage_config: Set(None),
|
remote_storage_config: Set(None),
|
||||||
id: NotSet,
|
|
||||||
broker_endpoint: Set(storage_broker::DEFAULT_ENDPOINT
|
broker_endpoint: Set(storage_broker::DEFAULT_ENDPOINT
|
||||||
.parse()
|
.parse()
|
||||||
.expect("failed to parse default broker endpoint")),
|
.expect("failed to parse default broker endpoint")),
|
||||||
@@ -455,10 +463,6 @@ impl PageServerConfigBuilder {
|
|||||||
DEFAULT_METRIC_COLLECTION_INTERVAL,
|
DEFAULT_METRIC_COLLECTION_INTERVAL,
|
||||||
)
|
)
|
||||||
.expect("cannot parse default metric collection interval")),
|
.expect("cannot parse default metric collection interval")),
|
||||||
cached_metric_collection_interval: Set(humantime::parse_duration(
|
|
||||||
DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL,
|
|
||||||
)
|
|
||||||
.expect("cannot parse default cached_metric_collection_interval")),
|
|
||||||
synthetic_size_calculation_interval: Set(humantime::parse_duration(
|
synthetic_size_calculation_interval: Set(humantime::parse_duration(
|
||||||
DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,
|
DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,
|
||||||
)
|
)
|
||||||
@@ -498,6 +502,8 @@ impl PageServerConfigBuilder {
|
|||||||
validate_vectored_get: Set(DEFAULT_VALIDATE_VECTORED_GET),
|
validate_vectored_get: Set(DEFAULT_VALIDATE_VECTORED_GET),
|
||||||
ephemeral_bytes_per_memory_kb: Set(DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
|
ephemeral_bytes_per_memory_kb: Set(DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
|
||||||
l0_flush: Set(L0FlushConfig::default()),
|
l0_flush: Set(L0FlushConfig::default()),
|
||||||
|
compact_level0_phase1_value_access: Set(CompactL0Phase1ValueAccess::default()),
|
||||||
|
virtual_file_direct_io: Set(virtual_file::DirectIoMode::default()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -570,10 +576,6 @@ impl PageServerConfigBuilder {
|
|||||||
self.broker_keepalive_interval = BuilderValue::Set(broker_keepalive_interval)
|
self.broker_keepalive_interval = BuilderValue::Set(broker_keepalive_interval)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn id(&mut self, node_id: NodeId) {
|
|
||||||
self.id = BuilderValue::Set(node_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn log_format(&mut self, log_format: LogFormat) {
|
pub fn log_format(&mut self, log_format: LogFormat) {
|
||||||
self.log_format = BuilderValue::Set(log_format)
|
self.log_format = BuilderValue::Set(log_format)
|
||||||
}
|
}
|
||||||
@@ -590,14 +592,6 @@ impl PageServerConfigBuilder {
|
|||||||
self.metric_collection_interval = BuilderValue::Set(metric_collection_interval)
|
self.metric_collection_interval = BuilderValue::Set(metric_collection_interval)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn cached_metric_collection_interval(
|
|
||||||
&mut self,
|
|
||||||
cached_metric_collection_interval: Duration,
|
|
||||||
) {
|
|
||||||
self.cached_metric_collection_interval =
|
|
||||||
BuilderValue::Set(cached_metric_collection_interval)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn metric_collection_endpoint(&mut self, metric_collection_endpoint: Option<Url>) {
|
pub fn metric_collection_endpoint(&mut self, metric_collection_endpoint: Option<Url>) {
|
||||||
self.metric_collection_endpoint = BuilderValue::Set(metric_collection_endpoint)
|
self.metric_collection_endpoint = BuilderValue::Set(metric_collection_endpoint)
|
||||||
}
|
}
|
||||||
@@ -693,7 +687,15 @@ impl PageServerConfigBuilder {
|
|||||||
self.l0_flush = BuilderValue::Set(value);
|
self.l0_flush = BuilderValue::Set(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build(self) -> anyhow::Result<PageServerConf> {
|
pub fn compact_level0_phase1_value_access(&mut self, value: CompactL0Phase1ValueAccess) {
|
||||||
|
self.compact_level0_phase1_value_access = BuilderValue::Set(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn virtual_file_direct_io(&mut self, value: virtual_file::DirectIoMode) {
|
||||||
|
self.virtual_file_direct_io = BuilderValue::Set(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build(self, id: NodeId) -> anyhow::Result<PageServerConf> {
|
||||||
let default = Self::default_values();
|
let default = Self::default_values();
|
||||||
|
|
||||||
macro_rules! conf {
|
macro_rules! conf {
|
||||||
@@ -726,12 +728,10 @@ impl PageServerConfigBuilder {
|
|||||||
pg_auth_type,
|
pg_auth_type,
|
||||||
auth_validation_public_key_path,
|
auth_validation_public_key_path,
|
||||||
remote_storage_config,
|
remote_storage_config,
|
||||||
id,
|
|
||||||
broker_endpoint,
|
broker_endpoint,
|
||||||
broker_keepalive_interval,
|
broker_keepalive_interval,
|
||||||
log_format,
|
log_format,
|
||||||
metric_collection_interval,
|
metric_collection_interval,
|
||||||
cached_metric_collection_interval,
|
|
||||||
metric_collection_endpoint,
|
metric_collection_endpoint,
|
||||||
metric_collection_bucket,
|
metric_collection_bucket,
|
||||||
synthetic_size_calculation_interval,
|
synthetic_size_calculation_interval,
|
||||||
@@ -752,9 +752,12 @@ impl PageServerConfigBuilder {
|
|||||||
image_compression,
|
image_compression,
|
||||||
ephemeral_bytes_per_memory_kb,
|
ephemeral_bytes_per_memory_kb,
|
||||||
l0_flush,
|
l0_flush,
|
||||||
|
compact_level0_phase1_value_access,
|
||||||
|
virtual_file_direct_io,
|
||||||
}
|
}
|
||||||
CUSTOM LOGIC
|
CUSTOM LOGIC
|
||||||
{
|
{
|
||||||
|
id: id,
|
||||||
// TenantConf is handled separately
|
// TenantConf is handled separately
|
||||||
default_tenant_conf: TenantConf::default(),
|
default_tenant_conf: TenantConf::default(),
|
||||||
concurrent_tenant_warmup: ConfigurableSemaphore::new({
|
concurrent_tenant_warmup: ConfigurableSemaphore::new({
|
||||||
@@ -870,22 +873,6 @@ impl PageServerConf {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn traces_path(&self) -> Utf8PathBuf {
|
|
||||||
self.workdir.join("traces")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn trace_path(
|
|
||||||
&self,
|
|
||||||
tenant_shard_id: &TenantShardId,
|
|
||||||
timeline_id: &TimelineId,
|
|
||||||
connection_id: &ConnectionId,
|
|
||||||
) -> Utf8PathBuf {
|
|
||||||
self.traces_path()
|
|
||||||
.join(tenant_shard_id.to_string())
|
|
||||||
.join(timeline_id.to_string())
|
|
||||||
.join(connection_id.to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Turns storage remote path of a file into its local path.
|
/// Turns storage remote path of a file into its local path.
|
||||||
pub fn local_path(&self, remote_path: &RemotePath) -> Utf8PathBuf {
|
pub fn local_path(&self, remote_path: &RemotePath) -> Utf8PathBuf {
|
||||||
remote_path.with_base(&self.workdir)
|
remote_path.with_base(&self.workdir)
|
||||||
@@ -915,8 +902,12 @@ impl PageServerConf {
|
|||||||
/// validating the input and failing on errors.
|
/// validating the input and failing on errors.
|
||||||
///
|
///
|
||||||
/// This leaves any options not present in the file in the built-in defaults.
|
/// This leaves any options not present in the file in the built-in defaults.
|
||||||
pub fn parse_and_validate(toml: &Document, workdir: &Utf8Path) -> anyhow::Result<Self> {
|
pub fn parse_and_validate(
|
||||||
let mut builder = PageServerConfigBuilder::default();
|
node_id: NodeId,
|
||||||
|
toml: &Document,
|
||||||
|
workdir: &Utf8Path,
|
||||||
|
) -> anyhow::Result<Self> {
|
||||||
|
let mut builder = PageServerConfigBuilder::new();
|
||||||
builder.workdir(workdir.to_owned());
|
builder.workdir(workdir.to_owned());
|
||||||
|
|
||||||
let mut t_conf = TenantConfOpt::default();
|
let mut t_conf = TenantConfOpt::default();
|
||||||
@@ -947,7 +938,6 @@ impl PageServerConf {
|
|||||||
"tenant_config" => {
|
"tenant_config" => {
|
||||||
t_conf = TenantConfOpt::try_from(item.to_owned()).context(format!("failed to parse: '{key}'"))?;
|
t_conf = TenantConfOpt::try_from(item.to_owned()).context(format!("failed to parse: '{key}'"))?;
|
||||||
}
|
}
|
||||||
"id" => builder.id(NodeId(parse_toml_u64(key, item)?)),
|
|
||||||
"broker_endpoint" => builder.broker_endpoint(parse_toml_string(key, item)?.parse().context("failed to parse broker endpoint")?),
|
"broker_endpoint" => builder.broker_endpoint(parse_toml_string(key, item)?.parse().context("failed to parse broker endpoint")?),
|
||||||
"broker_keepalive_interval" => builder.broker_keepalive_interval(parse_toml_duration(key, item)?),
|
"broker_keepalive_interval" => builder.broker_keepalive_interval(parse_toml_duration(key, item)?),
|
||||||
"log_format" => builder.log_format(
|
"log_format" => builder.log_format(
|
||||||
@@ -964,7 +954,6 @@ impl PageServerConf {
|
|||||||
NonZeroUsize::new(permits).context("initial semaphore permits out of range: 0, use other configuration to disable a feature")?
|
NonZeroUsize::new(permits).context("initial semaphore permits out of range: 0, use other configuration to disable a feature")?
|
||||||
}),
|
}),
|
||||||
"metric_collection_interval" => builder.metric_collection_interval(parse_toml_duration(key, item)?),
|
"metric_collection_interval" => builder.metric_collection_interval(parse_toml_duration(key, item)?),
|
||||||
"cached_metric_collection_interval" => builder.cached_metric_collection_interval(parse_toml_duration(key, item)?),
|
|
||||||
"metric_collection_endpoint" => {
|
"metric_collection_endpoint" => {
|
||||||
let endpoint = parse_toml_string(key, item)?.parse().context("failed to parse metric_collection_endpoint")?;
|
let endpoint = parse_toml_string(key, item)?.parse().context("failed to parse metric_collection_endpoint")?;
|
||||||
builder.metric_collection_endpoint(Some(endpoint));
|
builder.metric_collection_endpoint(Some(endpoint));
|
||||||
@@ -1037,11 +1026,17 @@ impl PageServerConf {
|
|||||||
"l0_flush" => {
|
"l0_flush" => {
|
||||||
builder.l0_flush(utils::toml_edit_ext::deserialize_item(item).context("l0_flush")?)
|
builder.l0_flush(utils::toml_edit_ext::deserialize_item(item).context("l0_flush")?)
|
||||||
}
|
}
|
||||||
|
"compact_level0_phase1_value_access" => {
|
||||||
|
builder.compact_level0_phase1_value_access(utils::toml_edit_ext::deserialize_item(item).context("compact_level0_phase1_value_access")?)
|
||||||
|
}
|
||||||
|
"virtual_file_direct_io" => {
|
||||||
|
builder.virtual_file_direct_io(utils::toml_edit_ext::deserialize_item(item).context("virtual_file_direct_io")?)
|
||||||
|
}
|
||||||
_ => bail!("unrecognized pageserver option '{key}'"),
|
_ => bail!("unrecognized pageserver option '{key}'"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut conf = builder.build().context("invalid config")?;
|
let mut conf = builder.build(node_id).context("invalid config")?;
|
||||||
|
|
||||||
if conf.http_auth_type == AuthType::NeonJWT || conf.pg_auth_type == AuthType::NeonJWT {
|
if conf.http_auth_type == AuthType::NeonJWT || conf.pg_auth_type == AuthType::NeonJWT {
|
||||||
let auth_validation_public_key_path = conf
|
let auth_validation_public_key_path = conf
|
||||||
@@ -1097,7 +1092,6 @@ impl PageServerConf {
|
|||||||
eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore::default(
|
eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore::default(
|
||||||
),
|
),
|
||||||
metric_collection_interval: Duration::from_secs(60),
|
metric_collection_interval: Duration::from_secs(60),
|
||||||
cached_metric_collection_interval: Duration::from_secs(60 * 60),
|
|
||||||
metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT,
|
metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT,
|
||||||
metric_collection_bucket: None,
|
metric_collection_bucket: None,
|
||||||
synthetic_size_calculation_interval: Duration::from_secs(60),
|
synthetic_size_calculation_interval: Duration::from_secs(60),
|
||||||
@@ -1122,10 +1116,18 @@ impl PageServerConf {
|
|||||||
validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
|
validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
|
||||||
ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
|
ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
|
||||||
l0_flush: L0FlushConfig::default(),
|
l0_flush: L0FlushConfig::default(),
|
||||||
|
compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(),
|
||||||
|
virtual_file_direct_io: virtual_file::DirectIoMode::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
#[serde(deny_unknown_fields)]
|
||||||
|
pub struct PageserverIdentity {
|
||||||
|
pub id: NodeId,
|
||||||
|
}
|
||||||
|
|
||||||
// Helper functions to parse a toml Item
|
// Helper functions to parse a toml Item
|
||||||
|
|
||||||
fn parse_toml_string(name: &str, item: &Item) -> Result<String> {
|
fn parse_toml_string(name: &str, item: &Item) -> Result<String> {
|
||||||
@@ -1273,10 +1275,8 @@ max_file_descriptors = 333
|
|||||||
|
|
||||||
# initial superuser role name to use when creating a new tenant
|
# initial superuser role name to use when creating a new tenant
|
||||||
initial_superuser_name = 'zzzz'
|
initial_superuser_name = 'zzzz'
|
||||||
id = 10
|
|
||||||
|
|
||||||
metric_collection_interval = '222 s'
|
metric_collection_interval = '222 s'
|
||||||
cached_metric_collection_interval = '22200 s'
|
|
||||||
metric_collection_endpoint = 'http://localhost:80/metrics'
|
metric_collection_endpoint = 'http://localhost:80/metrics'
|
||||||
synthetic_size_calculation_interval = '333 s'
|
synthetic_size_calculation_interval = '333 s'
|
||||||
|
|
||||||
@@ -1291,12 +1291,11 @@ background_task_maximum_delay = '334 s'
|
|||||||
let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
|
let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
|
||||||
let broker_endpoint = storage_broker::DEFAULT_ENDPOINT;
|
let broker_endpoint = storage_broker::DEFAULT_ENDPOINT;
|
||||||
// we have to create dummy values to overcome the validation errors
|
// we have to create dummy values to overcome the validation errors
|
||||||
let config_string = format!(
|
let config_string =
|
||||||
"pg_distrib_dir='{pg_distrib_dir}'\nid=10\nbroker_endpoint = '{broker_endpoint}'",
|
format!("pg_distrib_dir='{pg_distrib_dir}'\nbroker_endpoint = '{broker_endpoint}'",);
|
||||||
);
|
|
||||||
let toml = config_string.parse()?;
|
let toml = config_string.parse()?;
|
||||||
|
|
||||||
let parsed_config = PageServerConf::parse_and_validate(&toml, &workdir)
|
let parsed_config = PageServerConf::parse_and_validate(NodeId(10), &toml, &workdir)
|
||||||
.unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e:?}"));
|
.unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e:?}"));
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -1332,9 +1331,6 @@ background_task_maximum_delay = '334 s'
|
|||||||
metric_collection_interval: humantime::parse_duration(
|
metric_collection_interval: humantime::parse_duration(
|
||||||
defaults::DEFAULT_METRIC_COLLECTION_INTERVAL
|
defaults::DEFAULT_METRIC_COLLECTION_INTERVAL
|
||||||
)?,
|
)?,
|
||||||
cached_metric_collection_interval: humantime::parse_duration(
|
|
||||||
defaults::DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL
|
|
||||||
)?,
|
|
||||||
metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT,
|
metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT,
|
||||||
metric_collection_bucket: None,
|
metric_collection_bucket: None,
|
||||||
synthetic_size_calculation_interval: humantime::parse_duration(
|
synthetic_size_calculation_interval: humantime::parse_duration(
|
||||||
@@ -1363,6 +1359,8 @@ background_task_maximum_delay = '334 s'
|
|||||||
image_compression: defaults::DEFAULT_IMAGE_COMPRESSION,
|
image_compression: defaults::DEFAULT_IMAGE_COMPRESSION,
|
||||||
ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
|
ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
|
||||||
l0_flush: L0FlushConfig::default(),
|
l0_flush: L0FlushConfig::default(),
|
||||||
|
compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(),
|
||||||
|
virtual_file_direct_io: virtual_file::DirectIoMode::default(),
|
||||||
},
|
},
|
||||||
"Correct defaults should be used when no config values are provided"
|
"Correct defaults should be used when no config values are provided"
|
||||||
);
|
);
|
||||||
@@ -1381,7 +1379,7 @@ background_task_maximum_delay = '334 s'
|
|||||||
);
|
);
|
||||||
let toml = config_string.parse()?;
|
let toml = config_string.parse()?;
|
||||||
|
|
||||||
let parsed_config = PageServerConf::parse_and_validate(&toml, &workdir)
|
let parsed_config = PageServerConf::parse_and_validate(NodeId(10), &toml, &workdir)
|
||||||
.unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e:?}"));
|
.unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e:?}"));
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -1413,7 +1411,6 @@ background_task_maximum_delay = '334 s'
|
|||||||
eviction_task_immitated_concurrent_logical_size_queries:
|
eviction_task_immitated_concurrent_logical_size_queries:
|
||||||
ConfigurableSemaphore::default(),
|
ConfigurableSemaphore::default(),
|
||||||
metric_collection_interval: Duration::from_secs(222),
|
metric_collection_interval: Duration::from_secs(222),
|
||||||
cached_metric_collection_interval: Duration::from_secs(22200),
|
|
||||||
metric_collection_endpoint: Some(Url::parse("http://localhost:80/metrics")?),
|
metric_collection_endpoint: Some(Url::parse("http://localhost:80/metrics")?),
|
||||||
metric_collection_bucket: None,
|
metric_collection_bucket: None,
|
||||||
synthetic_size_calculation_interval: Duration::from_secs(333),
|
synthetic_size_calculation_interval: Duration::from_secs(333),
|
||||||
@@ -1438,6 +1435,8 @@ background_task_maximum_delay = '334 s'
|
|||||||
image_compression: defaults::DEFAULT_IMAGE_COMPRESSION,
|
image_compression: defaults::DEFAULT_IMAGE_COMPRESSION,
|
||||||
ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
|
ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
|
||||||
l0_flush: L0FlushConfig::default(),
|
l0_flush: L0FlushConfig::default(),
|
||||||
|
compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(),
|
||||||
|
virtual_file_direct_io: virtual_file::DirectIoMode::default(),
|
||||||
},
|
},
|
||||||
"Should be able to parse all basic config values correctly"
|
"Should be able to parse all basic config values correctly"
|
||||||
);
|
);
|
||||||
@@ -1472,12 +1471,13 @@ broker_endpoint = '{broker_endpoint}'
|
|||||||
|
|
||||||
let toml = config_string.parse()?;
|
let toml = config_string.parse()?;
|
||||||
|
|
||||||
let parsed_remote_storage_config = PageServerConf::parse_and_validate(&toml, &workdir)
|
let parsed_remote_storage_config =
|
||||||
.unwrap_or_else(|e| {
|
PageServerConf::parse_and_validate(NodeId(10), &toml, &workdir)
|
||||||
panic!("Failed to parse config '{config_string}', reason: {e:?}")
|
.unwrap_or_else(|e| {
|
||||||
})
|
panic!("Failed to parse config '{config_string}', reason: {e:?}")
|
||||||
.remote_storage_config
|
})
|
||||||
.expect("Should have remote storage config for the local FS");
|
.remote_storage_config
|
||||||
|
.expect("Should have remote storage config for the local FS");
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parsed_remote_storage_config,
|
parsed_remote_storage_config,
|
||||||
@@ -1533,12 +1533,13 @@ broker_endpoint = '{broker_endpoint}'
|
|||||||
|
|
||||||
let toml = config_string.parse()?;
|
let toml = config_string.parse()?;
|
||||||
|
|
||||||
let parsed_remote_storage_config = PageServerConf::parse_and_validate(&toml, &workdir)
|
let parsed_remote_storage_config =
|
||||||
.unwrap_or_else(|e| {
|
PageServerConf::parse_and_validate(NodeId(10), &toml, &workdir)
|
||||||
panic!("Failed to parse config '{config_string}', reason: {e:?}")
|
.unwrap_or_else(|e| {
|
||||||
})
|
panic!("Failed to parse config '{config_string}', reason: {e:?}")
|
||||||
.remote_storage_config
|
})
|
||||||
.expect("Should have remote storage config for S3");
|
.remote_storage_config
|
||||||
|
.expect("Should have remote storage config for S3");
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parsed_remote_storage_config,
|
parsed_remote_storage_config,
|
||||||
@@ -1560,34 +1561,6 @@ broker_endpoint = '{broker_endpoint}'
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_tenant_config() -> anyhow::Result<()> {
|
|
||||||
let tempdir = tempdir()?;
|
|
||||||
let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
|
|
||||||
|
|
||||||
let broker_endpoint = "http://127.0.0.1:7777";
|
|
||||||
let trace_read_requests = true;
|
|
||||||
|
|
||||||
let config_string = format!(
|
|
||||||
r#"{ALL_BASE_VALUES_TOML}
|
|
||||||
pg_distrib_dir='{pg_distrib_dir}'
|
|
||||||
broker_endpoint = '{broker_endpoint}'
|
|
||||||
|
|
||||||
[tenant_config]
|
|
||||||
trace_read_requests = {trace_read_requests}"#,
|
|
||||||
);
|
|
||||||
|
|
||||||
let toml = config_string.parse()?;
|
|
||||||
|
|
||||||
let conf = PageServerConf::parse_and_validate(&toml, &workdir)?;
|
|
||||||
assert_eq!(
|
|
||||||
conf.default_tenant_conf.trace_read_requests, trace_read_requests,
|
|
||||||
"Tenant config from pageserver config file should be parsed and udpated values used as defaults for all tenants",
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_incorrect_tenant_config() -> anyhow::Result<()> {
|
fn parse_incorrect_tenant_config() -> anyhow::Result<()> {
|
||||||
let config_string = r#"
|
let config_string = r#"
|
||||||
@@ -1628,7 +1601,6 @@ trace_read_requests = {trace_read_requests}"#,
|
|||||||
r#"pg_distrib_dir = "{pg_distrib_dir}"
|
r#"pg_distrib_dir = "{pg_distrib_dir}"
|
||||||
metric_collection_endpoint = "http://sample.url"
|
metric_collection_endpoint = "http://sample.url"
|
||||||
metric_collection_interval = "10min"
|
metric_collection_interval = "10min"
|
||||||
id = 222
|
|
||||||
|
|
||||||
[disk_usage_based_eviction]
|
[disk_usage_based_eviction]
|
||||||
max_usage_pct = 80
|
max_usage_pct = 80
|
||||||
@@ -1645,7 +1617,7 @@ threshold = "20m"
|
|||||||
"#,
|
"#,
|
||||||
);
|
);
|
||||||
let toml: Document = pageserver_conf_toml.parse()?;
|
let toml: Document = pageserver_conf_toml.parse()?;
|
||||||
let conf = PageServerConf::parse_and_validate(&toml, &workdir)?;
|
let conf = PageServerConf::parse_and_validate(NodeId(333), &toml, &workdir)?;
|
||||||
|
|
||||||
assert_eq!(conf.pg_distrib_dir, pg_distrib_dir);
|
assert_eq!(conf.pg_distrib_dir, pg_distrib_dir);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -1661,7 +1633,11 @@ threshold = "20m"
|
|||||||
.evictions_low_residence_duration_metric_threshold,
|
.evictions_low_residence_duration_metric_threshold,
|
||||||
Duration::from_secs(20 * 60)
|
Duration::from_secs(20 * 60)
|
||||||
);
|
);
|
||||||
assert_eq!(conf.id, NodeId(222));
|
|
||||||
|
// Assert that the node id provided by the indentity file (threaded
|
||||||
|
// through the call to [`PageServerConf::parse_and_validate`] is
|
||||||
|
// used.
|
||||||
|
assert_eq!(conf.id, NodeId(333));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
conf.disk_usage_based_eviction,
|
conf.disk_usage_based_eviction,
|
||||||
Some(DiskUsageEvictionTaskConfig {
|
Some(DiskUsageEvictionTaskConfig {
|
||||||
@@ -1670,7 +1646,7 @@ threshold = "20m"
|
|||||||
period: Duration::from_secs(10),
|
period: Duration::from_secs(10),
|
||||||
#[cfg(feature = "testing")]
|
#[cfg(feature = "testing")]
|
||||||
mock_statvfs: None,
|
mock_statvfs: None,
|
||||||
eviction_order: crate::disk_usage_eviction_task::EvictionOrder::AbsoluteAccessed,
|
eviction_order: Default::default(),
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -1694,7 +1670,6 @@ threshold = "20m"
|
|||||||
r#"pg_distrib_dir = "{pg_distrib_dir}"
|
r#"pg_distrib_dir = "{pg_distrib_dir}"
|
||||||
metric_collection_endpoint = "http://sample.url"
|
metric_collection_endpoint = "http://sample.url"
|
||||||
metric_collection_interval = "10min"
|
metric_collection_interval = "10min"
|
||||||
id = 222
|
|
||||||
|
|
||||||
[tenant_config]
|
[tenant_config]
|
||||||
evictions_low_residence_duration_metric_threshold = "20m"
|
evictions_low_residence_duration_metric_threshold = "20m"
|
||||||
@@ -1706,7 +1681,7 @@ threshold = "20m"
|
|||||||
"#,
|
"#,
|
||||||
);
|
);
|
||||||
let toml: Document = pageserver_conf_toml.parse().unwrap();
|
let toml: Document = pageserver_conf_toml.parse().unwrap();
|
||||||
let conf = PageServerConf::parse_and_validate(&toml, &workdir).unwrap();
|
let conf = PageServerConf::parse_and_validate(NodeId(222), &toml, &workdir).unwrap();
|
||||||
|
|
||||||
match &conf.default_tenant_conf.eviction_policy {
|
match &conf.default_tenant_conf.eviction_policy {
|
||||||
EvictionPolicy::OnlyImitiate(t) => {
|
EvictionPolicy::OnlyImitiate(t) => {
|
||||||
@@ -1725,7 +1700,7 @@ threshold = "20m"
|
|||||||
remote_storage = {}
|
remote_storage = {}
|
||||||
"#;
|
"#;
|
||||||
let doc = toml_edit::Document::from_str(input).unwrap();
|
let doc = toml_edit::Document::from_str(input).unwrap();
|
||||||
let err = PageServerConf::parse_and_validate(&doc, &workdir)
|
let err = PageServerConf::parse_and_validate(NodeId(222), &doc, &workdir)
|
||||||
.expect_err("empty remote_storage field should fail, don't specify it if you want no remote_storage");
|
.expect_err("empty remote_storage field should fail, don't specify it if you want no remote_storage");
|
||||||
assert!(format!("{err}").contains("remote_storage"), "{err}");
|
assert!(format!("{err}").contains("remote_storage"), "{err}");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
//! Periodically collect consumption metrics for all active tenants
|
//! Periodically collect consumption metrics for all active tenants
|
||||||
//! and push them to a HTTP endpoint.
|
//! and push them to a HTTP endpoint.
|
||||||
|
use crate::config::PageServerConf;
|
||||||
use crate::context::{DownloadBehavior, RequestContext};
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
|
use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
|
||||||
use crate::tenant::size::CalculateSyntheticSizeError;
|
use crate::tenant::size::CalculateSyntheticSizeError;
|
||||||
@@ -39,56 +40,74 @@ type RawMetric = (MetricsKey, (EventType, u64));
|
|||||||
/// for deduplication, but that is no longer needed.
|
/// for deduplication, but that is no longer needed.
|
||||||
type Cache = HashMap<MetricsKey, (EventType, u64)>;
|
type Cache = HashMap<MetricsKey, (EventType, u64)>;
|
||||||
|
|
||||||
|
pub async fn run(
|
||||||
|
conf: &'static PageServerConf,
|
||||||
|
tenant_manager: Arc<TenantManager>,
|
||||||
|
cancel: CancellationToken,
|
||||||
|
) {
|
||||||
|
let Some(metric_collection_endpoint) = conf.metric_collection_endpoint.as_ref() else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let local_disk_storage = conf.workdir.join("last_consumption_metrics.json");
|
||||||
|
|
||||||
|
let metrics_ctx = RequestContext::todo_child(
|
||||||
|
TaskKind::MetricsCollection,
|
||||||
|
// This task itself shouldn't download anything.
|
||||||
|
// The actual size calculation does need downloads, and
|
||||||
|
// creates a child context with the right DownloadBehavior.
|
||||||
|
DownloadBehavior::Error,
|
||||||
|
);
|
||||||
|
let collect_metrics = BACKGROUND_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
|
||||||
|
"consumption metrics collection",
|
||||||
|
collect_metrics(
|
||||||
|
tenant_manager.clone(),
|
||||||
|
metric_collection_endpoint,
|
||||||
|
&conf.metric_collection_bucket,
|
||||||
|
conf.metric_collection_interval,
|
||||||
|
conf.id,
|
||||||
|
local_disk_storage,
|
||||||
|
cancel.clone(),
|
||||||
|
metrics_ctx,
|
||||||
|
)
|
||||||
|
.instrument(info_span!("metrics_collection")),
|
||||||
|
));
|
||||||
|
|
||||||
|
let worker_ctx =
|
||||||
|
RequestContext::todo_child(TaskKind::CalculateSyntheticSize, DownloadBehavior::Download);
|
||||||
|
let synthetic_size_worker = BACKGROUND_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
|
||||||
|
"synthetic size calculation",
|
||||||
|
calculate_synthetic_size_worker(
|
||||||
|
tenant_manager.clone(),
|
||||||
|
conf.synthetic_size_calculation_interval,
|
||||||
|
cancel.clone(),
|
||||||
|
worker_ctx,
|
||||||
|
)
|
||||||
|
.instrument(info_span!("synthetic_size_worker")),
|
||||||
|
));
|
||||||
|
|
||||||
|
let (collect_metrics, synthetic_size_worker) =
|
||||||
|
futures::future::join(collect_metrics, synthetic_size_worker).await;
|
||||||
|
collect_metrics
|
||||||
|
.expect("unreachable: exit_on_panic_or_error would catch the panic and exit the process");
|
||||||
|
synthetic_size_worker
|
||||||
|
.expect("unreachable: exit_on_panic_or_error would catch the panic and exit the process");
|
||||||
|
}
|
||||||
|
|
||||||
/// Main thread that serves metrics collection
|
/// Main thread that serves metrics collection
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub async fn collect_metrics(
|
async fn collect_metrics(
|
||||||
tenant_manager: Arc<TenantManager>,
|
tenant_manager: Arc<TenantManager>,
|
||||||
metric_collection_endpoint: &Url,
|
metric_collection_endpoint: &Url,
|
||||||
metric_collection_bucket: &Option<RemoteStorageConfig>,
|
metric_collection_bucket: &Option<RemoteStorageConfig>,
|
||||||
metric_collection_interval: Duration,
|
metric_collection_interval: Duration,
|
||||||
_cached_metric_collection_interval: Duration,
|
|
||||||
synthetic_size_calculation_interval: Duration,
|
|
||||||
node_id: NodeId,
|
node_id: NodeId,
|
||||||
local_disk_storage: Utf8PathBuf,
|
local_disk_storage: Utf8PathBuf,
|
||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
ctx: RequestContext,
|
ctx: RequestContext,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
if _cached_metric_collection_interval != Duration::ZERO {
|
|
||||||
tracing::warn!(
|
|
||||||
"cached_metric_collection_interval is no longer used, please set it to zero."
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// spin up background worker that caclulates tenant sizes
|
|
||||||
let worker_ctx =
|
|
||||||
ctx.detached_child(TaskKind::CalculateSyntheticSize, DownloadBehavior::Download);
|
|
||||||
task_mgr::spawn(
|
|
||||||
BACKGROUND_RUNTIME.handle(),
|
|
||||||
TaskKind::CalculateSyntheticSize,
|
|
||||||
None,
|
|
||||||
None,
|
|
||||||
"synthetic size calculation",
|
|
||||||
false,
|
|
||||||
{
|
|
||||||
let tenant_manager = tenant_manager.clone();
|
|
||||||
async move {
|
|
||||||
calculate_synthetic_size_worker(
|
|
||||||
tenant_manager,
|
|
||||||
synthetic_size_calculation_interval,
|
|
||||||
&cancel,
|
|
||||||
&worker_ctx,
|
|
||||||
)
|
|
||||||
.instrument(info_span!("synthetic_size_worker"))
|
|
||||||
.await?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
let path: Arc<Utf8PathBuf> = Arc::new(local_disk_storage);
|
let path: Arc<Utf8PathBuf> = Arc::new(local_disk_storage);
|
||||||
|
|
||||||
let cancel = task_mgr::shutdown_token();
|
|
||||||
|
|
||||||
let restore_and_reschedule = restore_and_reschedule(&path, metric_collection_interval);
|
let restore_and_reschedule = restore_and_reschedule(&path, metric_collection_interval);
|
||||||
|
|
||||||
let mut cached_metrics = tokio::select! {
|
let mut cached_metrics = tokio::select! {
|
||||||
@@ -103,7 +122,7 @@ pub async fn collect_metrics(
|
|||||||
.expect("Failed to create http client with timeout");
|
.expect("Failed to create http client with timeout");
|
||||||
|
|
||||||
let bucket_client = if let Some(bucket_config) = metric_collection_bucket {
|
let bucket_client = if let Some(bucket_config) = metric_collection_bucket {
|
||||||
match GenericRemoteStorage::from_config(bucket_config) {
|
match GenericRemoteStorage::from_config(bucket_config).await {
|
||||||
Ok(client) => Some(client),
|
Ok(client) => Some(client),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
// Non-fatal error: if we were given an invalid config, we will proceed
|
// Non-fatal error: if we were given an invalid config, we will proceed
|
||||||
@@ -175,11 +194,9 @@ pub async fn collect_metrics(
|
|||||||
BackgroundLoopKind::ConsumptionMetricsCollectMetrics,
|
BackgroundLoopKind::ConsumptionMetricsCollectMetrics,
|
||||||
);
|
);
|
||||||
|
|
||||||
let res = tokio::time::timeout_at(
|
let res =
|
||||||
started_at + metric_collection_interval,
|
tokio::time::timeout_at(started_at + metric_collection_interval, cancel.cancelled())
|
||||||
task_mgr::shutdown_token().cancelled(),
|
.await;
|
||||||
)
|
|
||||||
.await;
|
|
||||||
if res.is_ok() {
|
if res.is_ok() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
@@ -279,8 +296,8 @@ async fn reschedule(
|
|||||||
async fn calculate_synthetic_size_worker(
|
async fn calculate_synthetic_size_worker(
|
||||||
tenant_manager: Arc<TenantManager>,
|
tenant_manager: Arc<TenantManager>,
|
||||||
synthetic_size_calculation_interval: Duration,
|
synthetic_size_calculation_interval: Duration,
|
||||||
cancel: &CancellationToken,
|
cancel: CancellationToken,
|
||||||
ctx: &RequestContext,
|
ctx: RequestContext,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
info!("starting calculate_synthetic_size_worker");
|
info!("starting calculate_synthetic_size_worker");
|
||||||
scopeguard::defer! {
|
scopeguard::defer! {
|
||||||
@@ -320,7 +337,7 @@ async fn calculate_synthetic_size_worker(
|
|||||||
// there is never any reason to exit calculate_synthetic_size_worker following any
|
// there is never any reason to exit calculate_synthetic_size_worker following any
|
||||||
// return value -- we don't need to care about shutdown because no tenant is found when
|
// return value -- we don't need to care about shutdown because no tenant is found when
|
||||||
// pageserver is shut down.
|
// pageserver is shut down.
|
||||||
calculate_and_log(&tenant, cancel, ctx).await;
|
calculate_and_log(&tenant, &cancel, &ctx).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
crate::tenant::tasks::warn_when_period_overrun(
|
crate::tenant::tasks::warn_when_period_overrun(
|
||||||
|
|||||||
@@ -59,6 +59,7 @@
|
|||||||
//! 1. It should be easy to forward the context to callees.
|
//! 1. It should be easy to forward the context to callees.
|
||||||
//! 2. To propagate more data from high-level to low-level code, the functions in
|
//! 2. To propagate more data from high-level to low-level code, the functions in
|
||||||
//! the middle should not need to be modified.
|
//! the middle should not need to be modified.
|
||||||
|
//!
|
||||||
//! The solution is to have a container structure ([`RequestContext`]) that
|
//! The solution is to have a container structure ([`RequestContext`]) that
|
||||||
//! carries the information. Functions that don't care about what's in it
|
//! carries the information. Functions that don't care about what's in it
|
||||||
//! pass it along to callees.
|
//! pass it along to callees.
|
||||||
|
|||||||
@@ -171,14 +171,14 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
|
|||||||
register,
|
register,
|
||||||
};
|
};
|
||||||
|
|
||||||
fail::fail_point!("control-plane-client-re-attach");
|
|
||||||
|
|
||||||
let response: ReAttachResponse = self.retry_http_forever(&re_attach_path, request).await?;
|
let response: ReAttachResponse = self.retry_http_forever(&re_attach_path, request).await?;
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
"Received re-attach response with {} tenants",
|
"Received re-attach response with {} tenants",
|
||||||
response.tenants.len()
|
response.tenants.len()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
failpoint_support::sleep_millis_async!("control-plane-client-re-attach");
|
||||||
|
|
||||||
Ok(response
|
Ok(response
|
||||||
.tenants
|
.tenants
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
|||||||
@@ -828,9 +828,9 @@ mod test {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn setup(test_name: &str) -> anyhow::Result<TestSetup> {
|
async fn setup(test_name: &str) -> anyhow::Result<TestSetup> {
|
||||||
let test_name = Box::leak(Box::new(format!("deletion_queue__{test_name}")));
|
let test_name = Box::leak(Box::new(format!("deletion_queue__{test_name}")));
|
||||||
let harness = TenantHarness::create(test_name)?;
|
let harness = TenantHarness::create(test_name).await?;
|
||||||
|
|
||||||
// We do not load() the harness: we only need its config and remote_storage
|
// We do not load() the harness: we only need its config and remote_storage
|
||||||
|
|
||||||
@@ -844,7 +844,9 @@ mod test {
|
|||||||
},
|
},
|
||||||
timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
|
timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
|
||||||
};
|
};
|
||||||
let storage = GenericRemoteStorage::from_config(&storage_config).unwrap();
|
let storage = GenericRemoteStorage::from_config(&storage_config)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let mock_control_plane = MockControlPlane::new();
|
let mock_control_plane = MockControlPlane::new();
|
||||||
|
|
||||||
@@ -922,7 +924,9 @@ mod test {
|
|||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn deletion_queue_smoke() -> anyhow::Result<()> {
|
async fn deletion_queue_smoke() -> anyhow::Result<()> {
|
||||||
// Basic test that the deletion queue processes the deletions we pass into it
|
// Basic test that the deletion queue processes the deletions we pass into it
|
||||||
let ctx = setup("deletion_queue_smoke").expect("Failed test setup");
|
let ctx = setup("deletion_queue_smoke")
|
||||||
|
.await
|
||||||
|
.expect("Failed test setup");
|
||||||
let client = ctx.deletion_queue.new_client();
|
let client = ctx.deletion_queue.new_client();
|
||||||
client.recover(HashMap::new())?;
|
client.recover(HashMap::new())?;
|
||||||
|
|
||||||
@@ -992,7 +996,9 @@ mod test {
|
|||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn deletion_queue_validation() -> anyhow::Result<()> {
|
async fn deletion_queue_validation() -> anyhow::Result<()> {
|
||||||
let ctx = setup("deletion_queue_validation").expect("Failed test setup");
|
let ctx = setup("deletion_queue_validation")
|
||||||
|
.await
|
||||||
|
.expect("Failed test setup");
|
||||||
let client = ctx.deletion_queue.new_client();
|
let client = ctx.deletion_queue.new_client();
|
||||||
client.recover(HashMap::new())?;
|
client.recover(HashMap::new())?;
|
||||||
|
|
||||||
@@ -1051,7 +1057,9 @@ mod test {
|
|||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn deletion_queue_recovery() -> anyhow::Result<()> {
|
async fn deletion_queue_recovery() -> anyhow::Result<()> {
|
||||||
// Basic test that the deletion queue processes the deletions we pass into it
|
// Basic test that the deletion queue processes the deletions we pass into it
|
||||||
let mut ctx = setup("deletion_queue_recovery").expect("Failed test setup");
|
let mut ctx = setup("deletion_queue_recovery")
|
||||||
|
.await
|
||||||
|
.expect("Failed test setup");
|
||||||
let client = ctx.deletion_queue.new_client();
|
let client = ctx.deletion_queue.new_client();
|
||||||
client.recover(HashMap::new())?;
|
client.recover(HashMap::new())?;
|
||||||
|
|
||||||
|
|||||||
@@ -59,13 +59,14 @@ use utils::{completion, id::TimelineId};
|
|||||||
use crate::{
|
use crate::{
|
||||||
config::PageServerConf,
|
config::PageServerConf,
|
||||||
metrics::disk_usage_based_eviction::METRICS,
|
metrics::disk_usage_based_eviction::METRICS,
|
||||||
task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
|
task_mgr::{self, BACKGROUND_RUNTIME},
|
||||||
tenant::{
|
tenant::{
|
||||||
mgr::TenantManager,
|
mgr::TenantManager,
|
||||||
remote_timeline_client::LayerFileMetadata,
|
remote_timeline_client::LayerFileMetadata,
|
||||||
secondary::SecondaryTenant,
|
secondary::SecondaryTenant,
|
||||||
storage_layer::{AsLayerDesc, EvictionError, Layer, LayerName},
|
storage_layer::{AsLayerDesc, EvictionError, Layer, LayerName},
|
||||||
},
|
},
|
||||||
|
CancellableTask, DiskUsageEvictionTask,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
@@ -83,17 +84,9 @@ pub struct DiskUsageEvictionTaskConfig {
|
|||||||
|
|
||||||
/// Selects the sort order for eviction candidates *after* per tenant `min_resident_size`
|
/// Selects the sort order for eviction candidates *after* per tenant `min_resident_size`
|
||||||
/// partitioning.
|
/// partitioning.
|
||||||
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
#[serde(tag = "type", content = "args")]
|
#[serde(tag = "type", content = "args")]
|
||||||
pub enum EvictionOrder {
|
pub enum EvictionOrder {
|
||||||
/// Order the layers to be evicted by how recently they have been accessed in absolute
|
|
||||||
/// time.
|
|
||||||
///
|
|
||||||
/// This strategy is unfair when some tenants grow faster than others towards the slower
|
|
||||||
/// growing.
|
|
||||||
#[default]
|
|
||||||
AbsoluteAccessed,
|
|
||||||
|
|
||||||
/// Order the layers to be evicted by how recently they have been accessed relatively within
|
/// Order the layers to be evicted by how recently they have been accessed relatively within
|
||||||
/// the set of resident layers of a tenant.
|
/// the set of resident layers of a tenant.
|
||||||
RelativeAccessed {
|
RelativeAccessed {
|
||||||
@@ -108,6 +101,14 @@ pub enum EvictionOrder {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Default for EvictionOrder {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::RelativeAccessed {
|
||||||
|
highest_layer_count_loses_first: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn default_highest_layer_count_loses_first() -> bool {
|
fn default_highest_layer_count_loses_first() -> bool {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
@@ -117,11 +118,6 @@ impl EvictionOrder {
|
|||||||
use EvictionOrder::*;
|
use EvictionOrder::*;
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
AbsoluteAccessed => {
|
|
||||||
candidates.sort_unstable_by_key(|(partition, candidate)| {
|
|
||||||
(*partition, candidate.last_activity_ts)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
RelativeAccessed { .. } => candidates.sort_unstable_by_key(|(partition, candidate)| {
|
RelativeAccessed { .. } => candidates.sort_unstable_by_key(|(partition, candidate)| {
|
||||||
(*partition, candidate.relative_last_activity)
|
(*partition, candidate.relative_last_activity)
|
||||||
}),
|
}),
|
||||||
@@ -134,7 +130,6 @@ impl EvictionOrder {
|
|||||||
use EvictionOrder::*;
|
use EvictionOrder::*;
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
AbsoluteAccessed => finite_f32::FiniteF32::ZERO,
|
|
||||||
RelativeAccessed {
|
RelativeAccessed {
|
||||||
highest_layer_count_loses_first,
|
highest_layer_count_loses_first,
|
||||||
} => {
|
} => {
|
||||||
@@ -192,36 +187,34 @@ pub fn launch_disk_usage_global_eviction_task(
|
|||||||
state: Arc<State>,
|
state: Arc<State>,
|
||||||
tenant_manager: Arc<TenantManager>,
|
tenant_manager: Arc<TenantManager>,
|
||||||
background_jobs_barrier: completion::Barrier,
|
background_jobs_barrier: completion::Barrier,
|
||||||
) -> anyhow::Result<()> {
|
) -> Option<DiskUsageEvictionTask> {
|
||||||
let Some(task_config) = &conf.disk_usage_based_eviction else {
|
let Some(task_config) = &conf.disk_usage_based_eviction else {
|
||||||
info!("disk usage based eviction task not configured");
|
info!("disk usage based eviction task not configured");
|
||||||
return Ok(());
|
return None;
|
||||||
};
|
};
|
||||||
|
|
||||||
info!("launching disk usage based eviction task");
|
info!("launching disk usage based eviction task");
|
||||||
|
|
||||||
task_mgr::spawn(
|
let cancel = CancellationToken::new();
|
||||||
BACKGROUND_RUNTIME.handle(),
|
let task = BACKGROUND_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
|
||||||
TaskKind::DiskUsageEviction,
|
|
||||||
None,
|
|
||||||
None,
|
|
||||||
"disk usage based eviction",
|
"disk usage based eviction",
|
||||||
false,
|
{
|
||||||
async move {
|
let cancel = cancel.clone();
|
||||||
let cancel = task_mgr::shutdown_token();
|
async move {
|
||||||
|
// wait until initial load is complete, because we cannot evict from loading tenants.
|
||||||
|
tokio::select! {
|
||||||
|
_ = cancel.cancelled() => { return anyhow::Ok(()); },
|
||||||
|
_ = background_jobs_barrier.wait() => { }
|
||||||
|
};
|
||||||
|
|
||||||
// wait until initial load is complete, because we cannot evict from loading tenants.
|
disk_usage_eviction_task(&state, task_config, &storage, tenant_manager, cancel)
|
||||||
tokio::select! {
|
.await;
|
||||||
_ = cancel.cancelled() => { return Ok(()); },
|
anyhow::Ok(())
|
||||||
_ = background_jobs_barrier.wait() => { }
|
}
|
||||||
};
|
|
||||||
|
|
||||||
disk_usage_eviction_task(&state, task_config, &storage, tenant_manager, cancel).await;
|
|
||||||
Ok(())
|
|
||||||
},
|
},
|
||||||
);
|
));
|
||||||
|
|
||||||
Ok(())
|
Some(DiskUsageEvictionTask(CancellableTask { cancel, task }))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
|
|||||||
@@ -308,6 +308,45 @@ paths:
|
|||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
|
||||||
|
/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/block_gc:
|
||||||
|
parameters:
|
||||||
|
- name: tenant_shard_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: timeline_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
format: hex
|
||||||
|
post:
|
||||||
|
description: Persistently add a gc blocking at the tenant level because of this timeline
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: OK
|
||||||
|
|
||||||
|
/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/unblock_gc:
|
||||||
|
parameters:
|
||||||
|
- name: tenant_shard_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: timeline_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
format: hex
|
||||||
|
post:
|
||||||
|
description: Persistently remove a tenant level gc blocking for this timeline
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: OK
|
||||||
|
|
||||||
/v1/tenant/{tenant_shard_id}/location_config:
|
/v1/tenant/{tenant_shard_id}/location_config:
|
||||||
parameters:
|
parameters:
|
||||||
- name: tenant_shard_id
|
- name: tenant_shard_id
|
||||||
@@ -377,7 +416,7 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/ConflictError"
|
$ref: "#/components/schemas/ConflictError"
|
||||||
|
|
||||||
/v1/tenant/{tenant_id}/{timeline_id}/preserve_initdb_archive:
|
/v1/tenant/{tenant_id}/timeline/{timeline_id}/preserve_initdb_archive:
|
||||||
parameters:
|
parameters:
|
||||||
- name: tenant_id
|
- name: tenant_id
|
||||||
in: path
|
in: path
|
||||||
@@ -397,6 +436,51 @@ paths:
|
|||||||
"202":
|
"202":
|
||||||
description: Tenant scheduled to load successfully
|
description: Tenant scheduled to load successfully
|
||||||
|
|
||||||
|
/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/archival_config:
|
||||||
|
parameters:
|
||||||
|
- name: tenant_shard_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: timeline_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
put:
|
||||||
|
description: |
|
||||||
|
Either archives or unarchives the given timeline.
|
||||||
|
An archived timeline may not have any non-archived children.
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/ArchivalConfigRequest"
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Timeline (un)archived successfully
|
||||||
|
"409":
|
||||||
|
description: |
|
||||||
|
The tenant/timeline is already being modified, perhaps by a concurrent call to this API
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/ConflictError"
|
||||||
|
"500":
|
||||||
|
description: Generic operation error
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/Error"
|
||||||
|
"503":
|
||||||
|
description: Temporarily unavailable, please retry.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/ServiceUnavailableError"
|
||||||
|
|
||||||
/v1/tenant/{tenant_id}/synthetic_size:
|
/v1/tenant/{tenant_id}/synthetic_size:
|
||||||
parameters:
|
parameters:
|
||||||
- name: tenant_id
|
- name: tenant_id
|
||||||
@@ -429,7 +513,9 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/SyntheticSizeResponse"
|
$ref: "#/components/schemas/SyntheticSizeResponse"
|
||||||
text/html:
|
text/html:
|
||||||
description: SVG representation of the tenant and it's timelines.
|
schema:
|
||||||
|
type: string
|
||||||
|
description: SVG representation of the tenant and its timelines.
|
||||||
"401":
|
"401":
|
||||||
description: Unauthorized Error
|
description: Unauthorized Error
|
||||||
content:
|
content:
|
||||||
@@ -568,7 +654,7 @@ paths:
|
|||||||
type: string
|
type: string
|
||||||
- name: timeline_id
|
- name: timeline_id
|
||||||
in: path
|
in: path
|
||||||
ŕequired: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
|
||||||
@@ -774,15 +860,13 @@ components:
|
|||||||
TenantCreateRequest:
|
TenantCreateRequest:
|
||||||
allOf:
|
allOf:
|
||||||
- $ref: '#/components/schemas/TenantConfig'
|
- $ref: '#/components/schemas/TenantConfig'
|
||||||
|
- $ref: '#/components/schemas/TenantLoadRequest'
|
||||||
- type: object
|
- type: object
|
||||||
required:
|
required:
|
||||||
- new_tenant_id
|
- new_tenant_id
|
||||||
properties:
|
properties:
|
||||||
new_tenant_id:
|
new_tenant_id:
|
||||||
type: string
|
type: string
|
||||||
generation:
|
|
||||||
type: integer
|
|
||||||
description: Attachment generation number.
|
|
||||||
TenantLoadRequest:
|
TenantLoadRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
@@ -846,6 +930,15 @@ components:
|
|||||||
warm:
|
warm:
|
||||||
type: boolean
|
type: boolean
|
||||||
description: Whether to poll remote storage for layers to download. If false, secondary locations don't download anything.
|
description: Whether to poll remote storage for layers to download. If false, secondary locations don't download anything.
|
||||||
|
ArchivalConfigRequest:
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- state
|
||||||
|
properties:
|
||||||
|
state:
|
||||||
|
description: The archival state of a timeline
|
||||||
|
type: string
|
||||||
|
enum: ["Archived", "Unarchived"]
|
||||||
TenantConfig:
|
TenantConfig:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
@@ -873,8 +966,6 @@ components:
|
|||||||
type: string
|
type: string
|
||||||
max_lsn_wal_lag:
|
max_lsn_wal_lag:
|
||||||
type: integer
|
type: integer
|
||||||
trace_read_requests:
|
|
||||||
type: boolean
|
|
||||||
heatmap_period:
|
heatmap_period:
|
||||||
type: string
|
type: string
|
||||||
TenantConfigResponse:
|
TenantConfigResponse:
|
||||||
@@ -1108,7 +1199,7 @@ components:
|
|||||||
reparented_timelines:
|
reparented_timelines:
|
||||||
type: array
|
type: array
|
||||||
description: Set of reparented timeline ids
|
description: Set of reparented timeline ids
|
||||||
properties:
|
items:
|
||||||
type: string
|
type: string
|
||||||
format: hex
|
format: hex
|
||||||
description: TimelineId
|
description: TimelineId
|
||||||
|
|||||||
@@ -18,14 +18,17 @@ use hyper::StatusCode;
|
|||||||
use hyper::{Body, Request, Response, Uri};
|
use hyper::{Body, Request, Response, Uri};
|
||||||
use metrics::launch_timestamp::LaunchTimestamp;
|
use metrics::launch_timestamp::LaunchTimestamp;
|
||||||
use pageserver_api::models::AuxFilePolicy;
|
use pageserver_api::models::AuxFilePolicy;
|
||||||
|
use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
|
||||||
use pageserver_api::models::IngestAuxFilesRequest;
|
use pageserver_api::models::IngestAuxFilesRequest;
|
||||||
use pageserver_api::models::ListAuxFilesRequest;
|
use pageserver_api::models::ListAuxFilesRequest;
|
||||||
use pageserver_api::models::LocationConfig;
|
use pageserver_api::models::LocationConfig;
|
||||||
use pageserver_api::models::LocationConfigListResponse;
|
use pageserver_api::models::LocationConfigListResponse;
|
||||||
|
use pageserver_api::models::LocationConfigMode;
|
||||||
use pageserver_api::models::LsnLease;
|
use pageserver_api::models::LsnLease;
|
||||||
use pageserver_api::models::LsnLeaseRequest;
|
use pageserver_api::models::LsnLeaseRequest;
|
||||||
use pageserver_api::models::ShardParameters;
|
use pageserver_api::models::ShardParameters;
|
||||||
use pageserver_api::models::TenantDetails;
|
use pageserver_api::models::TenantDetails;
|
||||||
|
use pageserver_api::models::TenantLocationConfigRequest;
|
||||||
use pageserver_api::models::TenantLocationConfigResponse;
|
use pageserver_api::models::TenantLocationConfigResponse;
|
||||||
use pageserver_api::models::TenantScanRemoteStorageResponse;
|
use pageserver_api::models::TenantScanRemoteStorageResponse;
|
||||||
use pageserver_api::models::TenantScanRemoteStorageShard;
|
use pageserver_api::models::TenantScanRemoteStorageShard;
|
||||||
@@ -33,12 +36,10 @@ use pageserver_api::models::TenantShardLocation;
|
|||||||
use pageserver_api::models::TenantShardSplitRequest;
|
use pageserver_api::models::TenantShardSplitRequest;
|
||||||
use pageserver_api::models::TenantShardSplitResponse;
|
use pageserver_api::models::TenantShardSplitResponse;
|
||||||
use pageserver_api::models::TenantSorting;
|
use pageserver_api::models::TenantSorting;
|
||||||
|
use pageserver_api::models::TimelineArchivalConfigRequest;
|
||||||
use pageserver_api::models::TopTenantShardItem;
|
use pageserver_api::models::TopTenantShardItem;
|
||||||
use pageserver_api::models::TopTenantShardsRequest;
|
use pageserver_api::models::TopTenantShardsRequest;
|
||||||
use pageserver_api::models::TopTenantShardsResponse;
|
use pageserver_api::models::TopTenantShardsResponse;
|
||||||
use pageserver_api::models::{
|
|
||||||
DownloadRemoteLayersTaskSpawnRequest, LocationConfigMode, TenantLocationConfigRequest,
|
|
||||||
};
|
|
||||||
use pageserver_api::shard::ShardCount;
|
use pageserver_api::shard::ShardCount;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use remote_storage::DownloadError;
|
use remote_storage::DownloadError;
|
||||||
@@ -295,6 +296,11 @@ impl From<GetActiveTenantError> for ApiError {
|
|||||||
GetActiveTenantError::WaitForActiveTimeout { .. } => {
|
GetActiveTenantError::WaitForActiveTimeout { .. } => {
|
||||||
ApiError::ResourceUnavailable(format!("{}", e).into())
|
ApiError::ResourceUnavailable(format!("{}", e).into())
|
||||||
}
|
}
|
||||||
|
GetActiveTenantError::SwitchedTenant => {
|
||||||
|
// in our HTTP handlers, this error doesn't happen
|
||||||
|
// TODO: separate error types
|
||||||
|
ApiError::ResourceUnavailable("switched tenant".into())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -664,6 +670,39 @@ async fn timeline_preserve_initdb_handler(
|
|||||||
json_response(StatusCode::OK, ())
|
json_response(StatusCode::OK, ())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn timeline_archival_config_handler(
|
||||||
|
mut request: Request<Body>,
|
||||||
|
_cancel: CancellationToken,
|
||||||
|
) -> Result<Response<Body>, ApiError> {
|
||||||
|
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||||
|
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||||
|
|
||||||
|
let request_data: TimelineArchivalConfigRequest = json_request(&mut request).await?;
|
||||||
|
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||||
|
let state = get_state(&request);
|
||||||
|
|
||||||
|
async {
|
||||||
|
let tenant = state
|
||||||
|
.tenant_manager
|
||||||
|
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||||
|
|
||||||
|
tenant
|
||||||
|
.apply_timeline_archival_config(timeline_id, request_data.state)
|
||||||
|
.await
|
||||||
|
.context("applying archival config")
|
||||||
|
.map_err(ApiError::InternalServerError)?;
|
||||||
|
Ok::<_, ApiError>(())
|
||||||
|
}
|
||||||
|
.instrument(info_span!("timeline_archival_config",
|
||||||
|
tenant_id = %tenant_shard_id.tenant_id,
|
||||||
|
shard_id = %tenant_shard_id.shard_slug(),
|
||||||
|
state = ?request_data.state,
|
||||||
|
%timeline_id))
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
json_response(StatusCode::OK, ())
|
||||||
|
}
|
||||||
|
|
||||||
async fn timeline_detail_handler(
|
async fn timeline_detail_handler(
|
||||||
request: Request<Body>,
|
request: Request<Body>,
|
||||||
_cancel: CancellationToken,
|
_cancel: CancellationToken,
|
||||||
@@ -896,6 +935,7 @@ async fn tenant_list_handler(
|
|||||||
generation: (*gen)
|
generation: (*gen)
|
||||||
.into()
|
.into()
|
||||||
.expect("Tenants are always attached with a generation"),
|
.expect("Tenants are always attached with a generation"),
|
||||||
|
gc_blocking: None,
|
||||||
})
|
})
|
||||||
.collect::<Vec<TenantInfo>>();
|
.collect::<Vec<TenantInfo>>();
|
||||||
|
|
||||||
@@ -947,6 +987,7 @@ async fn tenant_status(
|
|||||||
.generation()
|
.generation()
|
||||||
.into()
|
.into()
|
||||||
.expect("Tenants are always attached with a generation"),
|
.expect("Tenants are always attached with a generation"),
|
||||||
|
gc_blocking: tenant.gc_block.summary().map(|x| format!("{x:?}")),
|
||||||
},
|
},
|
||||||
walredo: tenant.wal_redo_manager_status(),
|
walredo: tenant.wal_redo_manager_status(),
|
||||||
timelines: tenant.list_timeline_ids(),
|
timelines: tenant.list_timeline_ids(),
|
||||||
@@ -1121,7 +1162,10 @@ async fn layer_map_info_handler(
|
|||||||
let timeline =
|
let timeline =
|
||||||
active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
|
active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
|
||||||
.await?;
|
.await?;
|
||||||
let layer_map_info = timeline.layer_map_info(reset).await;
|
let layer_map_info = timeline
|
||||||
|
.layer_map_info(reset)
|
||||||
|
.await
|
||||||
|
.map_err(|_shutdown| ApiError::ShuttingDown)?;
|
||||||
|
|
||||||
json_response(StatusCode::OK, layer_map_info)
|
json_response(StatusCode::OK, layer_map_info)
|
||||||
}
|
}
|
||||||
@@ -1187,6 +1231,72 @@ async fn evict_timeline_layer_handler(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn timeline_gc_blocking_handler(
|
||||||
|
request: Request<Body>,
|
||||||
|
_cancel: CancellationToken,
|
||||||
|
) -> Result<Response<Body>, ApiError> {
|
||||||
|
block_or_unblock_gc(request, true).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn timeline_gc_unblocking_handler(
|
||||||
|
request: Request<Body>,
|
||||||
|
_cancel: CancellationToken,
|
||||||
|
) -> Result<Response<Body>, ApiError> {
|
||||||
|
block_or_unblock_gc(request, false).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Adding a block is `POST ../block_gc`, removing a block is `POST ../unblock_gc`.
|
||||||
|
///
|
||||||
|
/// Both are technically unsafe because they might fire off index uploads, thus they are POST.
|
||||||
|
async fn block_or_unblock_gc(
|
||||||
|
request: Request<Body>,
|
||||||
|
block: bool,
|
||||||
|
) -> Result<Response<Body>, ApiError> {
|
||||||
|
use crate::tenant::{
|
||||||
|
remote_timeline_client::WaitCompletionError, upload_queue::NotInitialized,
|
||||||
|
};
|
||||||
|
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||||
|
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||||
|
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||||
|
let state = get_state(&request);
|
||||||
|
|
||||||
|
let tenant = state
|
||||||
|
.tenant_manager
|
||||||
|
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||||
|
|
||||||
|
tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
|
||||||
|
|
||||||
|
let timeline = tenant.get_timeline(timeline_id, true)?;
|
||||||
|
|
||||||
|
let fut = async {
|
||||||
|
if block {
|
||||||
|
timeline.block_gc(&tenant).await.map(|_| ())
|
||||||
|
} else {
|
||||||
|
timeline.unblock_gc(&tenant).await
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let span = tracing::info_span!(
|
||||||
|
"block_or_unblock_gc",
|
||||||
|
tenant_id = %tenant_shard_id.tenant_id,
|
||||||
|
shard_id = %tenant_shard_id.shard_slug(),
|
||||||
|
timeline_id = %timeline_id,
|
||||||
|
block = block,
|
||||||
|
);
|
||||||
|
|
||||||
|
let res = fut.instrument(span).await;
|
||||||
|
|
||||||
|
res.map_err(|e| {
|
||||||
|
if e.is::<NotInitialized>() || e.is::<WaitCompletionError>() {
|
||||||
|
ApiError::ShuttingDown
|
||||||
|
} else {
|
||||||
|
ApiError::InternalServerError(e)
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
json_response(StatusCode::OK, ())
|
||||||
|
}
|
||||||
|
|
||||||
/// Get tenant_size SVG graph along with the JSON data.
|
/// Get tenant_size SVG graph along with the JSON data.
|
||||||
fn synthetic_size_html_response(
|
fn synthetic_size_html_response(
|
||||||
inputs: ModelInputs,
|
inputs: ModelInputs,
|
||||||
@@ -1616,7 +1726,9 @@ async fn timeline_compact_handler(
|
|||||||
.await
|
.await
|
||||||
.map_err(|e| ApiError::InternalServerError(e.into()))?;
|
.map_err(|e| ApiError::InternalServerError(e.into()))?;
|
||||||
if wait_until_uploaded {
|
if wait_until_uploaded {
|
||||||
timeline.remote_client.wait_completion().await.map_err(ApiError::InternalServerError)?;
|
timeline.remote_client.wait_completion().await
|
||||||
|
// XXX map to correct ApiError for the cases where it's due to shutdown
|
||||||
|
.context("wait completion").map_err(ApiError::InternalServerError)?;
|
||||||
}
|
}
|
||||||
json_response(StatusCode::OK, ())
|
json_response(StatusCode::OK, ())
|
||||||
}
|
}
|
||||||
@@ -1642,6 +1754,10 @@ async fn timeline_checkpoint_handler(
|
|||||||
if Some(true) == parse_query_param::<_, bool>(&request, "force_image_layer_creation")? {
|
if Some(true) == parse_query_param::<_, bool>(&request, "force_image_layer_creation")? {
|
||||||
flags |= CompactFlags::ForceImageLayerCreation;
|
flags |= CompactFlags::ForceImageLayerCreation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// By default, checkpoints come with a compaction, but this may be optionally disabled by tests that just want to flush + upload.
|
||||||
|
let compact = parse_query_param::<_, bool>(&request, "compact")?.unwrap_or(true);
|
||||||
|
|
||||||
let wait_until_uploaded =
|
let wait_until_uploaded =
|
||||||
parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false);
|
parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false);
|
||||||
|
|
||||||
@@ -1658,18 +1774,22 @@ async fn timeline_checkpoint_handler(
|
|||||||
|
|
||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
timeline
|
if compact {
|
||||||
.compact(&cancel, flags, &ctx)
|
timeline
|
||||||
.await
|
.compact(&cancel, flags, &ctx)
|
||||||
.map_err(|e|
|
.await
|
||||||
match e {
|
.map_err(|e|
|
||||||
CompactionError::ShuttingDown => ApiError::ShuttingDown,
|
match e {
|
||||||
CompactionError::Other(e) => ApiError::InternalServerError(e)
|
CompactionError::ShuttingDown => ApiError::ShuttingDown,
|
||||||
}
|
CompactionError::Other(e) => ApiError::InternalServerError(e)
|
||||||
)?;
|
}
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
if wait_until_uploaded {
|
if wait_until_uploaded {
|
||||||
timeline.remote_client.wait_completion().await.map_err(ApiError::InternalServerError)?;
|
timeline.remote_client.wait_completion().await
|
||||||
|
// XXX map to correct ApiError for the cases where it's due to shutdown
|
||||||
|
.context("wait completion").map_err(ApiError::InternalServerError)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
json_response(StatusCode::OK, ())
|
json_response(StatusCode::OK, ())
|
||||||
@@ -1721,7 +1841,9 @@ async fn timeline_detach_ancestor_handler(
|
|||||||
request: Request<Body>,
|
request: Request<Body>,
|
||||||
_cancel: CancellationToken,
|
_cancel: CancellationToken,
|
||||||
) -> Result<Response<Body>, ApiError> {
|
) -> Result<Response<Body>, ApiError> {
|
||||||
use crate::tenant::timeline::detach_ancestor::Options;
|
use crate::tenant::timeline::detach_ancestor;
|
||||||
|
use pageserver_api::models::detach_ancestor::AncestorDetached;
|
||||||
|
|
||||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||||
@@ -1729,7 +1851,7 @@ async fn timeline_detach_ancestor_handler(
|
|||||||
let span = tracing::info_span!("detach_ancestor", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);
|
let span = tracing::info_span!("detach_ancestor", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);
|
||||||
|
|
||||||
async move {
|
async move {
|
||||||
let mut options = Options::default();
|
let mut options = detach_ancestor::Options::default();
|
||||||
|
|
||||||
let rewrite_concurrency =
|
let rewrite_concurrency =
|
||||||
parse_query_param::<_, std::num::NonZeroUsize>(&request, "rewrite_concurrency")?;
|
parse_query_param::<_, std::num::NonZeroUsize>(&request, "rewrite_concurrency")?;
|
||||||
@@ -1757,27 +1879,36 @@ async fn timeline_detach_ancestor_handler(
|
|||||||
|
|
||||||
let timeline = tenant.get_timeline(timeline_id, true)?;
|
let timeline = tenant.get_timeline(timeline_id, true)?;
|
||||||
|
|
||||||
let (_guard, prepared) = timeline
|
let progress = timeline
|
||||||
.prepare_to_detach_from_ancestor(&tenant, options, ctx)
|
.prepare_to_detach_from_ancestor(&tenant, options, ctx)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let res = state
|
// uncomment to allow early as possible Tenant::drop
|
||||||
.tenant_manager
|
// drop(tenant);
|
||||||
.complete_detaching_timeline_ancestor(tenant_shard_id, timeline_id, prepared, ctx)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
match res {
|
let resp = match progress {
|
||||||
Ok(reparented_timelines) => {
|
detach_ancestor::Progress::Prepared(_guard, prepared) => {
|
||||||
let resp = pageserver_api::models::detach_ancestor::AncestorDetached {
|
// it would be great to tag the guard on to the tenant activation future
|
||||||
|
let reparented_timelines = state
|
||||||
|
.tenant_manager
|
||||||
|
.complete_detaching_timeline_ancestor(
|
||||||
|
tenant_shard_id,
|
||||||
|
timeline_id,
|
||||||
|
prepared,
|
||||||
|
ctx,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("timeline detach ancestor completion")
|
||||||
|
.map_err(ApiError::InternalServerError)?;
|
||||||
|
|
||||||
|
AncestorDetached {
|
||||||
reparented_timelines,
|
reparented_timelines,
|
||||||
};
|
}
|
||||||
|
|
||||||
json_response(StatusCode::OK, resp)
|
|
||||||
}
|
}
|
||||||
Err(e) => Err(ApiError::InternalServerError(
|
detach_ancestor::Progress::Done(resp) => resp,
|
||||||
e.context("timeline detach completion"),
|
};
|
||||||
)),
|
|
||||||
}
|
json_response(StatusCode::OK, resp)
|
||||||
}
|
}
|
||||||
.instrument(span)
|
.instrument(span)
|
||||||
.await
|
.await
|
||||||
@@ -2074,14 +2205,24 @@ async fn secondary_download_handler(
|
|||||||
|
|
||||||
let timeout = wait.unwrap_or(Duration::MAX);
|
let timeout = wait.unwrap_or(Duration::MAX);
|
||||||
|
|
||||||
let status = match tokio::time::timeout(
|
let result = tokio::time::timeout(
|
||||||
timeout,
|
timeout,
|
||||||
state.secondary_controller.download_tenant(tenant_shard_id),
|
state.secondary_controller.download_tenant(tenant_shard_id),
|
||||||
)
|
)
|
||||||
.await
|
.await;
|
||||||
{
|
|
||||||
// Download job ran to completion.
|
let progress = secondary_tenant.progress.lock().unwrap().clone();
|
||||||
Ok(Ok(())) => StatusCode::OK,
|
|
||||||
|
let status = match result {
|
||||||
|
Ok(Ok(())) => {
|
||||||
|
if progress.layers_downloaded >= progress.layers_total {
|
||||||
|
// Download job ran to completion
|
||||||
|
StatusCode::OK
|
||||||
|
} else {
|
||||||
|
// Download dropped out without errors because it ran out of time budget
|
||||||
|
StatusCode::ACCEPTED
|
||||||
|
}
|
||||||
|
}
|
||||||
// Edge case: downloads aren't usually fallible: things like a missing heatmap are considered
|
// Edge case: downloads aren't usually fallible: things like a missing heatmap are considered
|
||||||
// okay. We could get an error here in the unlikely edge case that the tenant
|
// okay. We could get an error here in the unlikely edge case that the tenant
|
||||||
// was detached between our check above and executing the download job.
|
// was detached between our check above and executing the download job.
|
||||||
@@ -2091,8 +2232,6 @@ async fn secondary_download_handler(
|
|||||||
Err(_) => StatusCode::ACCEPTED,
|
Err(_) => StatusCode::ACCEPTED,
|
||||||
};
|
};
|
||||||
|
|
||||||
let progress = secondary_tenant.progress.lock().unwrap().clone();
|
|
||||||
|
|
||||||
json_response(status, progress)
|
json_response(status, progress)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2778,6 +2917,10 @@ pub fn make_router(
|
|||||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive",
|
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive",
|
||||||
|r| api_handler(r, timeline_preserve_initdb_handler),
|
|r| api_handler(r, timeline_preserve_initdb_handler),
|
||||||
)
|
)
|
||||||
|
.post(
|
||||||
|
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/archival_config",
|
||||||
|
|r| api_handler(r, timeline_archival_config_handler),
|
||||||
|
)
|
||||||
.get("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
|
.get("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
|
||||||
api_handler(r, timeline_detail_handler)
|
api_handler(r, timeline_detail_handler)
|
||||||
})
|
})
|
||||||
@@ -2832,6 +2975,14 @@ pub fn make_router(
|
|||||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
|
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
|
||||||
|r| api_handler(r, evict_timeline_layer_handler),
|
|r| api_handler(r, evict_timeline_layer_handler),
|
||||||
)
|
)
|
||||||
|
.post(
|
||||||
|
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/block_gc",
|
||||||
|
|r| api_handler(r, timeline_gc_blocking_handler),
|
||||||
|
)
|
||||||
|
.post(
|
||||||
|
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/unblock_gc",
|
||||||
|
|r| api_handler(r, timeline_gc_unblocking_handler),
|
||||||
|
)
|
||||||
.post("/v1/tenant/:tenant_shard_id/heatmap_upload", |r| {
|
.post("/v1/tenant/:tenant_shard_id/heatmap_upload", |r| {
|
||||||
api_handler(r, secondary_upload_handler)
|
api_handler(r, secondary_upload_handler)
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -2,19 +2,29 @@ use std::{num::NonZeroUsize, sync::Arc};
|
|||||||
|
|
||||||
use crate::tenant::ephemeral_file;
|
use crate::tenant::ephemeral_file;
|
||||||
|
|
||||||
#[derive(Default, Debug, PartialEq, Eq, Clone, serde::Deserialize)]
|
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize)]
|
||||||
#[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
|
#[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
|
||||||
pub enum L0FlushConfig {
|
pub enum L0FlushConfig {
|
||||||
#[default]
|
|
||||||
PageCached,
|
PageCached,
|
||||||
#[serde(rename_all = "snake_case")]
|
#[serde(rename_all = "snake_case")]
|
||||||
Direct { max_concurrency: NonZeroUsize },
|
Direct {
|
||||||
|
max_concurrency: NonZeroUsize,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for L0FlushConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Direct {
|
||||||
|
// TODO: using num_cpus results in different peak memory usage on different instance types.
|
||||||
|
max_concurrency: NonZeroUsize::new(usize::max(1, num_cpus::get())).unwrap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct L0FlushGlobalState(Arc<Inner>);
|
pub struct L0FlushGlobalState(Arc<Inner>);
|
||||||
|
|
||||||
pub(crate) enum Inner {
|
pub enum Inner {
|
||||||
PageCached,
|
PageCached,
|
||||||
Direct { semaphore: tokio::sync::Semaphore },
|
Direct { semaphore: tokio::sync::Semaphore },
|
||||||
}
|
}
|
||||||
@@ -30,7 +40,7 @@ impl L0FlushGlobalState {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn inner(&self) -> &Arc<Inner> {
|
pub fn inner(&self) -> &Arc<Inner> {
|
||||||
&self.0
|
&self.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user