mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-18 10:52:55 +00:00
Compare commits
305 Commits
local-prox
...
jcsp/hack
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b38714fcf8 | ||
|
|
95b9b2eb8a | ||
|
|
563e0dc7fc | ||
|
|
e8cc41c858 | ||
|
|
8780f03a47 | ||
|
|
fadce9723d | ||
|
|
695e5b9fce | ||
|
|
aba81434ce | ||
|
|
3282944125 | ||
|
|
64032a77e9 | ||
|
|
658d15450a | ||
|
|
cf4bc1e43d | ||
|
|
cc937dc2f2 | ||
|
|
e27527f7c1 | ||
|
|
e2f111124b | ||
|
|
7d75e32ed9 | ||
|
|
c213373a59 | ||
|
|
e0addc100d | ||
|
|
0519138b04 | ||
|
|
5da39b469c | ||
|
|
82027e22dd | ||
|
|
c431e2f1c5 | ||
|
|
4e5724d9c3 | ||
|
|
0d3e499059 | ||
|
|
7b860b837c | ||
|
|
41fc96e20f | ||
|
|
fb2b1ce57b | ||
|
|
464717451b | ||
|
|
c6ed86d3d0 | ||
|
|
f0a9017008 | ||
|
|
bb7949ba00 | ||
|
|
1df0f69664 | ||
|
|
970066a914 | ||
|
|
1ebd3897c0 | ||
|
|
6460beffcd | ||
|
|
6f7f8958db | ||
|
|
936a00e077 | ||
|
|
96a4e8de66 | ||
|
|
01180666b0 | ||
|
|
6c94269c32 | ||
|
|
edc691647d | ||
|
|
855d7b4781 | ||
|
|
c49c9707ce | ||
|
|
2227540a0d | ||
|
|
f1347f2417 | ||
|
|
30b295b017 | ||
|
|
1cef395266 | ||
|
|
78d160f76d | ||
|
|
b9238059d6 | ||
|
|
d0cb4b88c8 | ||
|
|
1ec3e39d4e | ||
|
|
a1a74eef2c | ||
|
|
90e689adda | ||
|
|
f0b2d4b053 | ||
|
|
299d9474c9 | ||
|
|
7234208b36 | ||
|
|
93450f11f5 | ||
|
|
2f0f9edf33 | ||
|
|
d424f2b7c8 | ||
|
|
21315e80bc | ||
|
|
483b66d383 | ||
|
|
aa72a22661 | ||
|
|
5c0264b591 | ||
|
|
9f13277729 | ||
|
|
54aa319805 | ||
|
|
4a227484bf | ||
|
|
2f83f85291 | ||
|
|
d6cfcb0d93 | ||
|
|
392843ad2a | ||
|
|
bd4dae8f4a | ||
|
|
b05fe53cfd | ||
|
|
c13a2f0df1 | ||
|
|
39be366fc5 | ||
|
|
6eda0a3158 | ||
|
|
306c7a1813 | ||
|
|
80be423a58 | ||
|
|
5dcfef82f2 | ||
|
|
e67b8f69c0 | ||
|
|
e546872ab4 | ||
|
|
322ea1cf7c | ||
|
|
3633742de9 | ||
|
|
079d3a37ba | ||
|
|
a46e77b476 | ||
|
|
a92702b01e | ||
|
|
8ff3253f20 | ||
|
|
04b82c92a7 | ||
|
|
e5bf423e68 | ||
|
|
60af392e45 | ||
|
|
661fc41e71 | ||
|
|
702c488f32 | ||
|
|
45c5122754 | ||
|
|
558394f710 | ||
|
|
73b0898608 | ||
|
|
e65be4c2dc | ||
|
|
40087b8164 | ||
|
|
c762b59483 | ||
|
|
5d71601ca9 | ||
|
|
a113c3e433 | ||
|
|
e81fc598f4 | ||
|
|
48b845fa76 | ||
|
|
27096858dc | ||
|
|
4430d0ae7d | ||
|
|
6e183aa0de | ||
|
|
fd6d0b7635 | ||
|
|
3710c32aae | ||
|
|
be83bee49d | ||
|
|
cf28e5922a | ||
|
|
7d384d6953 | ||
|
|
4b3b37b912 | ||
|
|
1d8d200f4d | ||
|
|
0d80d6ce18 | ||
|
|
f653ee039f | ||
|
|
e614a95853 | ||
|
|
850db4cc13 | ||
|
|
8a316b1277 | ||
|
|
4d13bae449 | ||
|
|
49377abd98 | ||
|
|
a6b2f4e54e | ||
|
|
face60d50b | ||
|
|
9768aa27f2 | ||
|
|
96b2e575e1 | ||
|
|
7222777784 | ||
|
|
5469fdede0 | ||
|
|
72aa6b9fdd | ||
|
|
ae0634b7be | ||
|
|
70711f32fa | ||
|
|
52a88af0aa | ||
|
|
b7a43bf817 | ||
|
|
dce91b33a4 | ||
|
|
23ee4f3050 | ||
|
|
46857e8282 | ||
|
|
368ab0ce54 | ||
|
|
a5987eebfd | ||
|
|
6686ede30f | ||
|
|
373c7057cc | ||
|
|
7d6ec16166 | ||
|
|
0e6fdc8a58 | ||
|
|
521438a5c6 | ||
|
|
07d7874bc8 | ||
|
|
1804111a02 | ||
|
|
cd0178efed | ||
|
|
333574be57 | ||
|
|
79a799a143 | ||
|
|
9da06af6c9 | ||
|
|
ce1753d036 | ||
|
|
67db8432b4 | ||
|
|
4e2e44e524 | ||
|
|
ed786104f3 | ||
|
|
84b74f2bd1 | ||
|
|
fec2ad6283 | ||
|
|
98eebd4682 | ||
|
|
2f74287c9b | ||
|
|
aee1bf95e3 | ||
|
|
b9de9d75ff | ||
|
|
7943b709e6 | ||
|
|
d7d066d493 | ||
|
|
e78ac22107 | ||
|
|
76a8f2bb44 | ||
|
|
8d59a8581f | ||
|
|
b1ddd01289 | ||
|
|
6eae4fc9aa | ||
|
|
765455bca2 | ||
|
|
4204960942 | ||
|
|
67345d66ea | ||
|
|
2266ee5971 | ||
|
|
b58445d855 | ||
|
|
36050e7f3d | ||
|
|
33360ed96d | ||
|
|
39a28d1108 | ||
|
|
efa6aa134f | ||
|
|
2c724e56e2 | ||
|
|
feff887c6f | ||
|
|
353d915fcf | ||
|
|
2e38098cbc | ||
|
|
a6fe5ea1ac | ||
|
|
05b0aed0c1 | ||
|
|
cd1705357d | ||
|
|
6bc7561290 | ||
|
|
fbd3ac14b5 | ||
|
|
e437787c8f | ||
|
|
3460dbf90b | ||
|
|
6b89d99677 | ||
|
|
6cc8ea86e4 | ||
|
|
e62a492d6f | ||
|
|
a475cdf642 | ||
|
|
7002c79a47 | ||
|
|
ee6cf357b4 | ||
|
|
e5c2086b5f | ||
|
|
5f1208296a | ||
|
|
88e8e473cd | ||
|
|
b0a77844f6 | ||
|
|
1baf464307 | ||
|
|
e9b8e81cea | ||
|
|
85d6194aa4 | ||
|
|
333a7a68ef | ||
|
|
6aa4e41bee | ||
|
|
840183e51f | ||
|
|
cbccc94b03 | ||
|
|
fce227df22 | ||
|
|
bd787e800f | ||
|
|
4a7704b4a3 | ||
|
|
ff1119da66 | ||
|
|
4c3ba1627b | ||
|
|
1407174fb2 | ||
|
|
ec9dcb1889 | ||
|
|
d11d781afc | ||
|
|
4e44565b71 | ||
|
|
4ed51ad33b | ||
|
|
1c1ebe5537 | ||
|
|
c19cb7f386 | ||
|
|
4b97d31b16 | ||
|
|
923ade3dd7 | ||
|
|
b04e711975 | ||
|
|
afd0a6b39a | ||
|
|
99752286d8 | ||
|
|
15df93363c | ||
|
|
bc0ab741af | ||
|
|
51d9dfeaa3 | ||
|
|
f63cb18155 | ||
|
|
0de603d88e | ||
|
|
240913912a | ||
|
|
91a4ea0de2 | ||
|
|
8608704f49 | ||
|
|
efef68ce99 | ||
|
|
8daefd24da | ||
|
|
46cc8b7982 | ||
|
|
38cd90dd0c | ||
|
|
a51b269f15 | ||
|
|
43bf6d0a0f | ||
|
|
15273a9b66 | ||
|
|
78aca668d0 | ||
|
|
acbf4148ea | ||
|
|
6508540561 | ||
|
|
a41b5244a8 | ||
|
|
2b3189be95 | ||
|
|
248563c595 | ||
|
|
14cd6ca933 | ||
|
|
eb36403e71 | ||
|
|
3c6f779698 | ||
|
|
f67f0c1c11 | ||
|
|
edb02d3299 | ||
|
|
664a69e65b | ||
|
|
478322ebf9 | ||
|
|
802f174072 | ||
|
|
47f9890bae | ||
|
|
262265daad | ||
|
|
300da5b872 | ||
|
|
7b22b5c433 | ||
|
|
ffca97bc1e | ||
|
|
cb356f3259 | ||
|
|
c85374295f | ||
|
|
4992160677 | ||
|
|
bd535b3371 | ||
|
|
d90c5a03af | ||
|
|
2d02cc9079 | ||
|
|
49ad94b99f | ||
|
|
948a217398 | ||
|
|
125381eae7 | ||
|
|
cd01bbc715 | ||
|
|
d8b5e3b88d | ||
|
|
06d25f2186 | ||
|
|
f759b561f3 | ||
|
|
ece0555600 | ||
|
|
73ea0a0b01 | ||
|
|
d8f6d6fd6f | ||
|
|
d24de169a7 | ||
|
|
0816168296 | ||
|
|
277b44d57a | ||
|
|
68c2c3880e | ||
|
|
49da498f65 | ||
|
|
2c76ba3dd7 | ||
|
|
dbe3dc69ad | ||
|
|
8e5bb3ed49 | ||
|
|
ab0be7b8da | ||
|
|
b4c55f5d24 | ||
|
|
ede70d833c | ||
|
|
70c3d18bb0 | ||
|
|
7a491f52c4 | ||
|
|
323c4ecb4f | ||
|
|
3d2466607e | ||
|
|
ed478b39f4 | ||
|
|
91585a558d | ||
|
|
93467eae1f | ||
|
|
f3aac81d19 | ||
|
|
979ad60c19 | ||
|
|
9316cb1b1f | ||
|
|
e7939a527a | ||
|
|
36d26665e1 | ||
|
|
873347f977 | ||
|
|
e814ac16f9 | ||
|
|
ad3055d386 | ||
|
|
94e03eb452 | ||
|
|
380f26ef79 | ||
|
|
3c5b7f59d7 | ||
|
|
fee89f80b5 | ||
|
|
41cce8eaf1 | ||
|
|
f88fe0218d | ||
|
|
cc856eca85 | ||
|
|
cf350c6002 | ||
|
|
0ce6b6a0a3 | ||
|
|
73f247d537 | ||
|
|
960be82183 | ||
|
|
806e5a6c19 | ||
|
|
8d5df07cce | ||
|
|
df7a9d1407 |
@@ -23,30 +23,10 @@ platforms = [
|
||||
]
|
||||
|
||||
[final-excludes]
|
||||
workspace-members = [
|
||||
# vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but
|
||||
# it is built primarly in separate repo neondatabase/autoscaling and thus is excluded
|
||||
# from depending on workspace-hack because most of the dependencies are not used.
|
||||
"vm_monitor",
|
||||
# All of these exist in libs and are not usually built independently.
|
||||
# Putting workspace hack there adds a bottleneck for cargo builds.
|
||||
"compute_api",
|
||||
"consumption_metrics",
|
||||
"desim",
|
||||
"metrics",
|
||||
"pageserver_api",
|
||||
"postgres_backend",
|
||||
"postgres_connection",
|
||||
"postgres_ffi",
|
||||
"pq_proto",
|
||||
"remote_storage",
|
||||
"safekeeper_api",
|
||||
"tenant_size_model",
|
||||
"tracing-utils",
|
||||
"utils",
|
||||
"wal_craft",
|
||||
"walproposer",
|
||||
]
|
||||
# vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but
|
||||
# it is built primarly in separate repo neondatabase/autoscaling and thus is excluded
|
||||
# from depending on workspace-hack because most of the dependencies are not used.
|
||||
workspace-members = ["vm_monitor"]
|
||||
|
||||
# Write out exact versions rather than a semver range. (Defaults to false.)
|
||||
# exact-versions = true
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
[profile.default]
|
||||
slow-timeout = { period = "60s", terminate-after = 3 }
|
||||
slow-timeout = { period = "20s", terminate-after = 3 }
|
||||
|
||||
@@ -8,21 +8,18 @@
|
||||
!scripts/combine_control_files.py
|
||||
!scripts/ninstall.sh
|
||||
!vm-cgconfig.conf
|
||||
!docker-compose/run-tests.sh
|
||||
|
||||
# Directories
|
||||
!.cargo/
|
||||
!.config/
|
||||
!compute/
|
||||
!compute_tools/
|
||||
!control_plane/
|
||||
!libs/
|
||||
!neon_local/
|
||||
!pageserver/
|
||||
!patches/
|
||||
!pgxn/
|
||||
!proxy/
|
||||
!storage_scrubber/
|
||||
!s3_scrubber/
|
||||
!safekeeper/
|
||||
!storage_broker/
|
||||
!storage_controller/
|
||||
|
||||
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -1,2 +0,0 @@
|
||||
# allows for nicer hunk headers with git show
|
||||
*.rs diff=rust
|
||||
6
.github/ISSUE_TEMPLATE/config.yml
vendored
6
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,6 +0,0 @@
|
||||
|
||||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: Feature request
|
||||
url: https://console.neon.tech/app/projects?modal=feedback
|
||||
about: For feature requests in the Neon product, please submit via the feedback form on `https://console.neon.tech`
|
||||
16
.github/actionlint.yml
vendored
16
.github/actionlint.yml
vendored
@@ -1,22 +1,14 @@
|
||||
self-hosted-runner:
|
||||
labels:
|
||||
- arm64
|
||||
- dev
|
||||
- gen3
|
||||
- large
|
||||
- large-arm64
|
||||
# Remove `macos-14` from the list after https://github.com/rhysd/actionlint/pull/392 is merged.
|
||||
- macos-14
|
||||
- small
|
||||
- small-arm64
|
||||
- us-east-2
|
||||
config-variables:
|
||||
- AZURE_DEV_CLIENT_ID
|
||||
- AZURE_DEV_REGISTRY_NAME
|
||||
- AZURE_DEV_SUBSCRIPTION_ID
|
||||
- AZURE_PROD_CLIENT_ID
|
||||
- AZURE_PROD_REGISTRY_NAME
|
||||
- AZURE_PROD_SUBSCRIPTION_ID
|
||||
- AZURE_TENANT_ID
|
||||
- BENCHMARK_PROJECT_ID_PUB
|
||||
- BENCHMARK_PROJECT_ID_SUB
|
||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||
- REMOTE_STORAGE_AZURE_REGION
|
||||
- SLACK_UPCOMING_RELEASE_CHANNEL_ID
|
||||
- DEV_AWS_OIDC_ROLE_ARN
|
||||
|
||||
@@ -183,7 +183,7 @@ runs:
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
|
||||
key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- name: Store Allure test stat in the DB (new)
|
||||
if: ${{ !cancelled() && inputs.store-test-results-into-db == 'true' }}
|
||||
|
||||
2
.github/actions/download/action.yml
vendored
2
.github/actions/download/action.yml
vendored
@@ -26,7 +26,7 @@ runs:
|
||||
TARGET: ${{ inputs.path }}
|
||||
ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
|
||||
SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
|
||||
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id, github.run_attempt) }}
|
||||
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
|
||||
run: |
|
||||
BUCKET=neon-github-public-dev
|
||||
FILENAME=$(basename $ARCHIVE)
|
||||
|
||||
@@ -3,13 +3,13 @@ description: 'Create Branch using API'
|
||||
|
||||
inputs:
|
||||
api_key:
|
||||
description: 'Neon API key'
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
project_id:
|
||||
description: 'ID of the Project to create Branch in'
|
||||
desctiption: 'ID of the Project to create Branch in'
|
||||
required: true
|
||||
api_host:
|
||||
description: 'Neon API host'
|
||||
desctiption: 'Neon API host'
|
||||
default: console-stage.neon.build
|
||||
outputs:
|
||||
dsn:
|
||||
|
||||
@@ -3,16 +3,16 @@ description: 'Delete Branch using API'
|
||||
|
||||
inputs:
|
||||
api_key:
|
||||
description: 'Neon API key'
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
project_id:
|
||||
description: 'ID of the Project which should be deleted'
|
||||
desctiption: 'ID of the Project which should be deleted'
|
||||
required: true
|
||||
branch_id:
|
||||
description: 'ID of the branch to delete'
|
||||
desctiption: 'ID of the branch to delete'
|
||||
required: true
|
||||
api_host:
|
||||
description: 'Neon API host'
|
||||
desctiption: 'Neon API host'
|
||||
default: console-stage.neon.build
|
||||
|
||||
runs:
|
||||
|
||||
22
.github/actions/neon-project-create/action.yml
vendored
22
.github/actions/neon-project-create/action.yml
vendored
@@ -3,19 +3,22 @@ description: 'Create Neon Project using API'
|
||||
|
||||
inputs:
|
||||
api_key:
|
||||
description: 'Neon API key'
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
region_id:
|
||||
description: 'Region ID, if not set the project will be created in the default region'
|
||||
desctiption: 'Region ID, if not set the project will be created in the default region'
|
||||
default: aws-us-east-2
|
||||
postgres_version:
|
||||
description: 'Postgres version; default is 16'
|
||||
default: '16'
|
||||
desctiption: 'Postgres version; default is 15'
|
||||
default: 15
|
||||
api_host:
|
||||
description: 'Neon API host'
|
||||
desctiption: 'Neon API host'
|
||||
default: console-stage.neon.build
|
||||
provisioner:
|
||||
desctiption: 'k8s-pod or k8s-neonvm'
|
||||
default: 'k8s-pod'
|
||||
compute_units:
|
||||
description: '[Min, Max] compute units'
|
||||
desctiption: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
|
||||
default: '[1, 1]'
|
||||
|
||||
outputs:
|
||||
@@ -34,6 +37,10 @@ runs:
|
||||
# A shell without `set -x` to not to expose password/dsn in logs
|
||||
shell: bash -euo pipefail {0}
|
||||
run: |
|
||||
if [ "${PROVISIONER}" == "k8s-pod" ] && [ "${MIN_CU}" != "${MAX_CU}" ]; then
|
||||
echo >&2 "For k8s-pod provisioner MIN_CU should be equal to MAX_CU"
|
||||
fi
|
||||
|
||||
project=$(curl \
|
||||
"https://${API_HOST}/api/v2/projects" \
|
||||
--fail \
|
||||
@@ -45,7 +52,7 @@ runs:
|
||||
\"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
|
||||
\"pg_version\": ${POSTGRES_VERSION},
|
||||
\"region_id\": \"${REGION_ID}\",
|
||||
\"provisioner\": \"k8s-neonvm\",
|
||||
\"provisioner\": \"${PROVISIONER}\",
|
||||
\"autoscaling_limit_min_cu\": ${MIN_CU},
|
||||
\"autoscaling_limit_max_cu\": ${MAX_CU},
|
||||
\"settings\": { }
|
||||
@@ -68,5 +75,6 @@ runs:
|
||||
API_KEY: ${{ inputs.api_key }}
|
||||
REGION_ID: ${{ inputs.region_id }}
|
||||
POSTGRES_VERSION: ${{ inputs.postgres_version }}
|
||||
PROVISIONER: ${{ inputs.provisioner }}
|
||||
MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
|
||||
MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}
|
||||
|
||||
@@ -3,13 +3,13 @@ description: 'Delete Neon Project using API'
|
||||
|
||||
inputs:
|
||||
api_key:
|
||||
description: 'Neon API key'
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
project_id:
|
||||
description: 'ID of the Project to delete'
|
||||
desctiption: 'ID of the Project to delete'
|
||||
required: true
|
||||
api_host:
|
||||
description: 'Neon API host'
|
||||
desctiption: 'Neon API host'
|
||||
default: console-stage.neon.build
|
||||
|
||||
runs:
|
||||
|
||||
41
.github/actions/run-python-test-set/action.yml
vendored
41
.github/actions/run-python-test-set/action.yml
vendored
@@ -43,7 +43,7 @@ inputs:
|
||||
pg_version:
|
||||
description: 'Postgres version to use for tests'
|
||||
required: false
|
||||
default: 'v16'
|
||||
default: 'v14'
|
||||
benchmark_durations:
|
||||
description: 'benchmark durations JSON'
|
||||
required: false
|
||||
@@ -56,14 +56,14 @@ runs:
|
||||
if: inputs.build_type != 'remote'
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
|
||||
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
|
||||
path: /tmp/neon
|
||||
|
||||
- name: Download Neon binaries for the previous release
|
||||
if: inputs.build_type != 'remote'
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
|
||||
name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
|
||||
path: /tmp/neon-previous
|
||||
prefix: latest
|
||||
|
||||
@@ -71,7 +71,7 @@ runs:
|
||||
if: inputs.build_type != 'remote'
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: compatibility-snapshot-${{ runner.arch }}-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
|
||||
name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
|
||||
path: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
|
||||
prefix: latest
|
||||
# The lack of compatibility snapshot (for example, for the new Postgres version)
|
||||
@@ -83,12 +83,13 @@ runs:
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Cache poetry deps
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
|
||||
key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- name: Install Python deps
|
||||
shell: bash -euxo pipefail {0}
|
||||
@@ -113,8 +114,6 @@ runs:
|
||||
export PLATFORM=${PLATFORM:-github-actions-selfhosted}
|
||||
export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
|
||||
export DEFAULT_PG_VERSION=${PG_VERSION#v}
|
||||
export LD_LIBRARY_PATH=${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/lib
|
||||
export BENCHMARK_CONNSTR=${BENCHMARK_CONNSTR:-}
|
||||
|
||||
if [ "${BUILD_TYPE}" = "remote" ]; then
|
||||
export REMOTE_ENV=1
|
||||
@@ -130,8 +129,8 @@ runs:
|
||||
exit 1
|
||||
fi
|
||||
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
|
||||
# -n sets the number of parallel processes that pytest-xdist will run
|
||||
EXTRA_PARAMS="-n12 $EXTRA_PARAMS"
|
||||
# -n16 uses sixteen processes to run tests via pytest-xdist
|
||||
EXTRA_PARAMS="-n16 $EXTRA_PARAMS"
|
||||
|
||||
# --dist=loadgroup points tests marked with @pytest.mark.xdist_group
|
||||
# to the same worker to make @pytest.mark.order work with xdist
|
||||
@@ -169,28 +168,23 @@ runs:
|
||||
EXTRA_PARAMS="--durations-path $TEST_OUTPUT/benchmark_durations.json $EXTRA_PARAMS"
|
||||
fi
|
||||
|
||||
if [[ $BUILD_TYPE == "debug" && $RUNNER_ARCH == 'X64' ]]; then
|
||||
if [[ "${{ inputs.build_type }}" == "debug" ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||
elif [[ "${{ inputs.build_type }}" == "release" ]]; then
|
||||
cov_prefix=()
|
||||
else
|
||||
cov_prefix=()
|
||||
fi
|
||||
|
||||
# Wake up the cluster if we use remote neon instance
|
||||
if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then
|
||||
QUERIES=("SELECT version()")
|
||||
if [[ "${PLATFORM}" = "neon"* ]]; then
|
||||
QUERIES+=("SHOW neon.tenant_id")
|
||||
QUERIES+=("SHOW neon.timeline_id")
|
||||
fi
|
||||
|
||||
for q in "${QUERIES[@]}"; do
|
||||
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "${q}"
|
||||
done
|
||||
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();"
|
||||
fi
|
||||
|
||||
# Run the tests.
|
||||
#
|
||||
# --alluredir saves test results in Allure format (in a specified directory)
|
||||
# The junit.xml file allows CI tools to display more fine-grained test information
|
||||
# in its "Tests" tab in the results page.
|
||||
# --verbose prints name of each test (helpful when there are
|
||||
# multiple tests in one file)
|
||||
# -rA prints summary in the end
|
||||
@@ -199,6 +193,7 @@ runs:
|
||||
#
|
||||
mkdir -p $TEST_OUTPUT/allure/results
|
||||
"${cov_prefix[@]}" ./scripts/pytest \
|
||||
--junitxml=$TEST_OUTPUT/junit.xml \
|
||||
--alluredir=$TEST_OUTPUT/allure/results \
|
||||
--tb=short \
|
||||
--verbose \
|
||||
@@ -211,13 +206,13 @@ runs:
|
||||
fi
|
||||
|
||||
- name: Upload compatibility snapshot
|
||||
# Note, that we use `github.base_ref` which is a target branch for a PR
|
||||
if: github.event_name == 'pull_request' && github.base_ref == 'release'
|
||||
if: github.ref_name == 'release'
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
name: compatibility-snapshot-${{ runner.arch }}-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
|
||||
name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}-${{ github.run_id }}
|
||||
# Directory is created by test_compatibility.py::test_create_snapshot, keep the path in sync with the test
|
||||
path: /tmp/test_output/compatibility_snapshot_pg${{ inputs.pg_version }}/
|
||||
prefix: latest
|
||||
|
||||
- name: Upload test results
|
||||
if: ${{ !cancelled() }}
|
||||
|
||||
36
.github/actions/set-docker-config-dir/action.yml
vendored
36
.github/actions/set-docker-config-dir/action.yml
vendored
@@ -1,36 +0,0 @@
|
||||
name: "Set custom docker config directory"
|
||||
description: "Create a directory for docker config and set DOCKER_CONFIG"
|
||||
|
||||
# Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Show warning on GitHub-hosted runners
|
||||
if: runner.environment == 'github-hosted'
|
||||
shell: bash -euo pipefail {0}
|
||||
run: |
|
||||
# Using the following environment variables to find a path to the workflow file
|
||||
# ${GITHUB_WORKFLOW_REF} - octocat/hello-world/.github/workflows/my-workflow.yml@refs/heads/my_branch
|
||||
# ${GITHUB_REPOSITORY} - octocat/hello-world
|
||||
# ${GITHUB_REF} - refs/heads/my_branch
|
||||
# From https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/variables
|
||||
|
||||
filename_with_ref=${GITHUB_WORKFLOW_REF#"$GITHUB_REPOSITORY/"}
|
||||
filename=${filename_with_ref%"@$GITHUB_REF"}
|
||||
|
||||
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-a-warning-message
|
||||
title='Unnecessary usage of `.github/actions/set-docker-config-dir`'
|
||||
message='No need to use `.github/actions/set-docker-config-dir` action on GitHub-hosted runners'
|
||||
echo "::warning file=${filename},title=${title}::${message}"
|
||||
|
||||
- uses: pyTooling/Actions/with-post-step@74afc5a42a17a046c90c68cb5cfa627e5c6c5b6b # v1.0.7
|
||||
env:
|
||||
DOCKER_CONFIG: .docker-custom-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
with:
|
||||
main: |
|
||||
mkdir -p "${DOCKER_CONFIG}"
|
||||
echo DOCKER_CONFIG=${DOCKER_CONFIG} | tee -a $GITHUB_ENV
|
||||
post: |
|
||||
if [ -d "${DOCKER_CONFIG}" ]; then
|
||||
rm -r "${DOCKER_CONFIG}"
|
||||
fi
|
||||
4
.github/actions/upload/action.yml
vendored
4
.github/actions/upload/action.yml
vendored
@@ -8,7 +8,7 @@ inputs:
|
||||
description: "A directory or file to upload"
|
||||
required: true
|
||||
prefix:
|
||||
description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
|
||||
description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
|
||||
required: false
|
||||
|
||||
runs:
|
||||
@@ -45,7 +45,7 @@ runs:
|
||||
env:
|
||||
SOURCE: ${{ inputs.path }}
|
||||
ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
|
||||
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id , github.run_attempt) }}
|
||||
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
|
||||
run: |
|
||||
BUCKET=neon-github-public-dev
|
||||
FILENAME=$(basename $ARCHIVE)
|
||||
|
||||
168
.github/workflows/_benchmarking_preparation.yml
vendored
168
.github/workflows/_benchmarking_preparation.yml
vendored
@@ -1,168 +0,0 @@
|
||||
name: Prepare benchmarking databases by restoring dumps
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
# no inputs needed
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
jobs:
|
||||
setup-databases:
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon ]
|
||||
database: [ clickbench, tpch, userexample ]
|
||||
|
||||
env:
|
||||
LD_LIBRARY_PATH: /tmp/neon/pg_install/v16/lib
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
PG_BINARIES: /tmp/neon/pg_install/v16/bin
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Set up Connection String
|
||||
id: set-up-prep-connstr
|
||||
run: |
|
||||
case "${PLATFORM}" in
|
||||
neon)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
|
||||
;;
|
||||
aws-rds-postgres)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
|
||||
;;
|
||||
aws-aurora-serverless-v2-postgres)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
# we create a table that has one row for each database that we want to restore with the status whether the restore is done
|
||||
- name: Create benchmark_restore_status table if it does not exist
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
|
||||
DATABASE_NAME: ${{ matrix.database }}
|
||||
# to avoid a race condition of multiple jobs trying to create the table at the same time,
|
||||
# we use an advisory lock
|
||||
run: |
|
||||
${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "
|
||||
SELECT pg_advisory_lock(4711);
|
||||
CREATE TABLE IF NOT EXISTS benchmark_restore_status (
|
||||
databasename text primary key,
|
||||
restore_done boolean
|
||||
);
|
||||
SELECT pg_advisory_unlock(4711);
|
||||
"
|
||||
|
||||
- name: Check if restore is already done
|
||||
id: check-restore-done
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
|
||||
DATABASE_NAME: ${{ matrix.database }}
|
||||
run: |
|
||||
skip=false
|
||||
if ${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -tAc "SELECT 1 FROM benchmark_restore_status WHERE databasename='${{ env.DATABASE_NAME }}' AND restore_done=true;" | grep -q 1; then
|
||||
echo "Restore already done for database ${{ env.DATABASE_NAME }} on platform ${{ env.PLATFORM }}. Skipping this database."
|
||||
skip=true
|
||||
fi
|
||||
echo "skip=${skip}" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
- name: Check and create database if it does not exist
|
||||
if: steps.check-restore-done.outputs.skip != 'true'
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
|
||||
DATABASE_NAME: ${{ matrix.database }}
|
||||
run: |
|
||||
DB_EXISTS=$(${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -tAc "SELECT 1 FROM pg_database WHERE datname='${{ env.DATABASE_NAME }}'")
|
||||
if [ "$DB_EXISTS" != "1" ]; then
|
||||
echo "Database ${{ env.DATABASE_NAME }} does not exist. Creating it..."
|
||||
${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "CREATE DATABASE \"${{ env.DATABASE_NAME }}\";"
|
||||
else
|
||||
echo "Database ${{ env.DATABASE_NAME }} already exists."
|
||||
fi
|
||||
|
||||
- name: Download dump from S3 to /tmp/dumps
|
||||
if: steps.check-restore-done.outputs.skip != 'true'
|
||||
env:
|
||||
DATABASE_NAME: ${{ matrix.database }}
|
||||
run: |
|
||||
mkdir -p /tmp/dumps
|
||||
aws s3 cp s3://neon-github-dev/performance/pgdumps/$DATABASE_NAME/$DATABASE_NAME.pg_dump /tmp/dumps/
|
||||
|
||||
- name: Replace database name in connection string
|
||||
if: steps.check-restore-done.outputs.skip != 'true'
|
||||
id: replace-dbname
|
||||
env:
|
||||
DATABASE_NAME: ${{ matrix.database }}
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
|
||||
run: |
|
||||
# Extract the part before the database name
|
||||
base_connstr="${BENCHMARK_CONNSTR%/*}"
|
||||
# Extract the query parameters (if any) after the database name
|
||||
query_params="${BENCHMARK_CONNSTR#*\?}"
|
||||
# Reconstruct the new connection string
|
||||
if [ "$query_params" != "$BENCHMARK_CONNSTR" ]; then
|
||||
new_connstr="${base_connstr}/${DATABASE_NAME}?${query_params}"
|
||||
else
|
||||
new_connstr="${base_connstr}/${DATABASE_NAME}"
|
||||
fi
|
||||
echo "database_connstr=${new_connstr}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Restore dump
|
||||
if: steps.check-restore-done.outputs.skip != 'true'
|
||||
env:
|
||||
DATABASE_NAME: ${{ matrix.database }}
|
||||
DATABASE_CONNSTR: ${{ steps.replace-dbname.outputs.database_connstr }}
|
||||
# the following works only with larger computes:
|
||||
# PGOPTIONS: "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
|
||||
# we add the || true because:
|
||||
# the dumps were created with Neon and contain neon extensions that are not
|
||||
# available in RDS, so we will always report an error, but we can ignore it
|
||||
run: |
|
||||
${PG_BINARIES}/pg_restore --clean --if-exists --no-owner --jobs=4 \
|
||||
-d "${DATABASE_CONNSTR}" /tmp/dumps/${DATABASE_NAME}.pg_dump || true
|
||||
|
||||
- name: Update benchmark_restore_status table
|
||||
if: steps.check-restore-done.outputs.skip != 'true'
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
|
||||
DATABASE_NAME: ${{ matrix.database }}
|
||||
run: |
|
||||
${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "
|
||||
INSERT INTO benchmark_restore_status (databasename, restore_done) VALUES ('${{ env.DATABASE_NAME }}', true)
|
||||
ON CONFLICT (databasename) DO UPDATE SET restore_done = true;
|
||||
"
|
||||
324
.github/workflows/_build-and-test-locally.yml
vendored
324
.github/workflows/_build-and-test-locally.yml
vendored
@@ -1,324 +0,0 @@
|
||||
name: Build and Test Locally
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
arch:
|
||||
description: 'x64 or arm64'
|
||||
required: true
|
||||
type: string
|
||||
build-tag:
|
||||
description: 'build tag'
|
||||
required: true
|
||||
type: string
|
||||
build-tools-image:
|
||||
description: 'build-tools image'
|
||||
required: true
|
||||
type: string
|
||||
build-type:
|
||||
description: 'debug or release'
|
||||
required: true
|
||||
type: string
|
||||
pg-versions:
|
||||
description: 'a json array of postgres versions to run regression tests on'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
COPT: '-Werror'
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
|
||||
jobs:
|
||||
build-neon:
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
# Raise locked memory limit for tokio-epoll-uring.
|
||||
# On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),
|
||||
# io_uring will account the memory of the CQ and SQ as locked.
|
||||
# More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391
|
||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
||||
env:
|
||||
BUILD_TYPE: ${{ inputs.build-type }}
|
||||
GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
BUILD_TAG: ${{ inputs.build-tag }}
|
||||
|
||||
steps:
|
||||
- name: Fix git ownership
|
||||
run: |
|
||||
# Workaround for `fatal: detected dubious ownership in repository at ...`
|
||||
#
|
||||
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
|
||||
# Ref https://github.com/actions/checkout/issues/785
|
||||
#
|
||||
git config --global --add safe.directory ${{ github.workspace }}
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
for r in 14 15 16 17; do
|
||||
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
|
||||
done
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Set pg 14 revision for caching
|
||||
id: pg_v14_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 15 revision for caching
|
||||
id: pg_v15_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 16 revision for caching
|
||||
id: pg_v16_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 17 revision for caching
|
||||
id: pg_v17_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
|
||||
|
||||
# Set some environment variables used by all the steps.
|
||||
#
|
||||
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
|
||||
# It also includes --features, if any
|
||||
#
|
||||
# CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
|
||||
# because "cargo metadata" doesn't accept --release or --debug options
|
||||
#
|
||||
# We run tests with addtional features, that are turned off by default (e.g. in release builds), see
|
||||
# corresponding Cargo.toml files for their descriptions.
|
||||
- name: Set env variables
|
||||
env:
|
||||
ARCH: ${{ inputs.arch }}
|
||||
run: |
|
||||
CARGO_FEATURES="--features testing"
|
||||
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
|
||||
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
|
||||
CARGO_FLAGS="--locked"
|
||||
elif [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix=""
|
||||
CARGO_FLAGS="--locked"
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=""
|
||||
CARGO_FLAGS="--locked --release"
|
||||
fi
|
||||
{
|
||||
echo "cov_prefix=${cov_prefix}"
|
||||
echo "CARGO_FEATURES=${CARGO_FEATURES}"
|
||||
echo "CARGO_FLAGS=${CARGO_FLAGS}"
|
||||
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
|
||||
} >> $GITHUB_ENV
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg_14
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_15
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_16
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_17
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||
|
||||
- name: Build postgres v14
|
||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v14 -j$(nproc)
|
||||
|
||||
- name: Build postgres v15
|
||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v15 -j$(nproc)
|
||||
|
||||
- name: Build postgres v16
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v16 -j$(nproc)
|
||||
|
||||
- name: Build postgres v17
|
||||
if: steps.cache_pg_17.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v17 -j$(nproc)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: mold -run make neon-pg-ext -j$(nproc)
|
||||
|
||||
- name: Build walproposer-lib
|
||||
run: mold -run make walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Run cargo build
|
||||
run: |
|
||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
||||
export PQ_LIB_DIR
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
|
||||
|
||||
# Do install *before* running rust tests because they might recompile the
|
||||
# binaries with different features/flags.
|
||||
- name: Install rust binaries
|
||||
env:
|
||||
ARCH: ${{ inputs.arch }}
|
||||
run: |
|
||||
# Install target binaries
|
||||
mkdir -p /tmp/neon/bin/
|
||||
binaries=$(
|
||||
${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
|
||||
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
||||
)
|
||||
for bin in $binaries; do
|
||||
SRC=target/$BUILD_TYPE/$bin
|
||||
DST=/tmp/neon/bin/$bin
|
||||
cp "$SRC" "$DST"
|
||||
done
|
||||
|
||||
# Install test executables and write list of all binaries (for code coverage)
|
||||
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
|
||||
# Keep bloated coverage data files away from the rest of the artifact
|
||||
mkdir -p /tmp/coverage/
|
||||
|
||||
mkdir -p /tmp/neon/test_bin/
|
||||
|
||||
test_exe_paths=$(
|
||||
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
|
||||
jq -r '.executable | select(. != null)'
|
||||
)
|
||||
for bin in $test_exe_paths; do
|
||||
SRC=$bin
|
||||
DST=/tmp/neon/test_bin/$(basename $bin)
|
||||
|
||||
# We don't need debug symbols for code coverage, so strip them out to make
|
||||
# the artifact smaller.
|
||||
strip "$SRC" -o "$DST"
|
||||
echo "$DST" >> /tmp/coverage/binaries.list
|
||||
done
|
||||
|
||||
for bin in $binaries; do
|
||||
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
|
||||
done
|
||||
fi
|
||||
|
||||
- name: Run rust tests
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
||||
export PQ_LIB_DIR
|
||||
LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib
|
||||
export LD_LIBRARY_PATH
|
||||
|
||||
#nextest does not yet support running doctests
|
||||
${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
||||
|
||||
# run all non-pageserver tests
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
|
||||
|
||||
# run pageserver tests with different settings
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
done
|
||||
|
||||
# Run separate tests for real S3
|
||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
||||
|
||||
# Run separate tests for real Azure Blob Storage
|
||||
# XXX: replace region with `eu-central-1`-like region
|
||||
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
|
||||
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
|
||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||
|
||||
- name: Install postgres binaries
|
||||
run: |
|
||||
# Use tar to copy files matching the pattern, preserving the paths in the destionation
|
||||
tar c \
|
||||
pg_install/v* \
|
||||
pg_install/build/*/src/test/regress/*.so \
|
||||
pg_install/build/*/src/test/regress/pg_regress \
|
||||
pg_install/build/*/src/test/isolation/isolationtester \
|
||||
pg_install/build/*/src/test/isolation/pg_isolation_regress \
|
||||
| tar x -C /tmp/neon
|
||||
|
||||
- name: Upload Neon artifact
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact
|
||||
path: /tmp/neon
|
||||
|
||||
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
|
||||
- name: Merge and upload coverage data
|
||||
if: inputs.build-type == 'debug'
|
||||
uses: ./.github/actions/save-coverage-data
|
||||
|
||||
regress-tests:
|
||||
# Don't run regression tests on debug arm64 builds
|
||||
if: inputs.build-type != 'debug' || inputs.arch != 'arm64'
|
||||
needs: [ build-neon ]
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
# for changed limits, see comments on `options:` earlier in this file
|
||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
pg_version: ${{ fromJson(inputs.pg-versions) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Pytest regression tests
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
timeout-minutes: 60
|
||||
with:
|
||||
build_type: ${{ inputs.build-type }}
|
||||
test_selection: regress
|
||||
needs_postgres_source: true
|
||||
run_with_real_s3: true
|
||||
real_s3_bucket: neon-github-ci-tests
|
||||
real_s3_region: eu-central-1
|
||||
rerun_flaky: true
|
||||
pg_version: ${{ matrix.pg_version }}
|
||||
env:
|
||||
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
|
||||
BUILD_TAG: ${{ inputs.build-tag }}
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||
- name: Merge and upload coverage data
|
||||
if: |
|
||||
false &&
|
||||
inputs.build-type == 'debug' && matrix.pg_version == 'v16'
|
||||
uses: ./.github/actions/save-coverage-data
|
||||
56
.github/workflows/_push-to-acr.yml
vendored
56
.github/workflows/_push-to-acr.yml
vendored
@@ -1,56 +0,0 @@
|
||||
name: Push images to ACR
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
client_id:
|
||||
description: Client ID of Azure managed identity or Entra app
|
||||
required: true
|
||||
type: string
|
||||
image_tag:
|
||||
description: Tag for the container image
|
||||
required: true
|
||||
type: string
|
||||
images:
|
||||
description: Images to push
|
||||
required: true
|
||||
type: string
|
||||
registry_name:
|
||||
description: Name of the container registry
|
||||
required: true
|
||||
type: string
|
||||
subscription_id:
|
||||
description: Azure subscription ID
|
||||
required: true
|
||||
type: string
|
||||
tenant_id:
|
||||
description: Azure tenant ID
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
push-to-acr:
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
contents: read # This is required for actions/checkout
|
||||
id-token: write # This is required for Azure Login to work.
|
||||
|
||||
steps:
|
||||
- name: Azure login
|
||||
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1
|
||||
with:
|
||||
client-id: ${{ inputs.client_id }}
|
||||
subscription-id: ${{ inputs.subscription_id }}
|
||||
tenant-id: ${{ inputs.tenant_id }}
|
||||
|
||||
- name: Login to ACR
|
||||
run: |
|
||||
az acr login --name=${{ inputs.registry_name }}
|
||||
|
||||
- name: Copy docker images to ACR ${{ inputs.registry_name }}
|
||||
run: |
|
||||
images='${{ inputs.images }}'
|
||||
for image in ${images}; do
|
||||
docker buildx imagetools create \
|
||||
-t ${{ inputs.registry_name }}.azurecr.io/neondatabase/${image}:${{ inputs.image_tag }} \
|
||||
neondatabase/${image}:${{ inputs.image_tag }}
|
||||
done
|
||||
15
.github/workflows/actionlint.yml
vendored
15
.github/workflows/actionlint.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
|
||||
actionlint:
|
||||
needs: [ check-permissions ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: reviewdog/action-actionlint@v1
|
||||
@@ -36,16 +36,3 @@ jobs:
|
||||
fail_on_error: true
|
||||
filter_mode: nofilter
|
||||
level: error
|
||||
|
||||
- name: Disallow 'ubuntu-latest' runners
|
||||
run: |
|
||||
PAT='^\s*runs-on:.*-latest'
|
||||
if grep -ERq $PAT .github/workflows; then
|
||||
grep -ERl $PAT .github/workflows |\
|
||||
while read -r f
|
||||
do
|
||||
l=$(grep -nE $PAT $f | awk -F: '{print $1}' | head -1)
|
||||
echo "::error file=$f,line=$l::Please use 'ubuntu-22.04' instead of 'ubuntu-latest'"
|
||||
done
|
||||
exit 1
|
||||
fi
|
||||
|
||||
59
.github/workflows/approved-for-ci-run.yml
vendored
59
.github/workflows/approved-for-ci-run.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
contains(fromJSON('["opened", "synchronize", "reopened", "closed"]'), github.event.action) &&
|
||||
contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
|
||||
@@ -60,7 +60,7 @@ jobs:
|
||||
github.event.action == 'labeled' &&
|
||||
contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
|
||||
@@ -69,41 +69,15 @@ jobs:
|
||||
with:
|
||||
ref: main
|
||||
token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
- name: Look for existing PR
|
||||
id: get-pr
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
|
||||
echo "ALREADY_CREATED=${ALREADY_CREATED}" >> ${GITHUB_OUTPUT}
|
||||
|
||||
- name: Get changed labels
|
||||
id: get-labels
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED != ''
|
||||
env:
|
||||
ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
LABELS_TO_REMOVE=$(comm -23 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) | sort) \
|
||||
<(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
|
||||
( grep -v run-e2e-tests-in-draft || true ) | paste -sd , -)
|
||||
LABELS_TO_ADD=$(comm -13 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) |sort) \
|
||||
<(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
|
||||
paste -sd , -)
|
||||
echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
|
||||
echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
|
||||
|
||||
- run: gh pr checkout "${PR_NUMBER}"
|
||||
|
||||
- run: git checkout -b "${BRANCH}"
|
||||
|
||||
- run: git push --force origin "${BRANCH}"
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED == ''
|
||||
|
||||
- name: Create a Pull Request for CI run (if required)
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED == ''
|
||||
env:
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
cat << EOF > body.md
|
||||
@@ -114,33 +88,16 @@ jobs:
|
||||
Feel free to review/comment/discuss the original PR #${PR_NUMBER}.
|
||||
EOF
|
||||
|
||||
LABELS=$( (gh pr --repo "${GITHUB_REPOSITORY}" view ${PR_NUMBER} --json labels --jq '.labels.[].name'; echo run-e2e-tests-in-draft )| \
|
||||
grep -E '^run' | paste -sd , -)
|
||||
gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
|
||||
ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
|
||||
if [ -z "${ALREADY_CREATED}" ]; then
|
||||
gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
|
||||
--body-file "body.md" \
|
||||
--head "${BRANCH}" \
|
||||
--base "main" \
|
||||
--label ${LABELS} \
|
||||
--label "run-e2e-tests-in-draft" \
|
||||
--draft
|
||||
- name: Modify the existing pull request (if required)
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED != ''
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
LABELS_TO_ADD: ${{ steps.get-labels.outputs.LABELS_TO_ADD }}
|
||||
LABELS_TO_REMOVE: ${{ steps.get-labels.outputs.LABELS_TO_REMOVE }}
|
||||
ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
|
||||
run: |
|
||||
ADD_CMD=
|
||||
REMOVE_CMD=
|
||||
[ -z "${LABELS_TO_ADD}" ] || ADD_CMD="--add-label ${LABELS_TO_ADD}"
|
||||
[ -z "${LABELS_TO_REMOVE}" ] || REMOVE_CMD="--remove-label ${LABELS_TO_REMOVE}"
|
||||
if [ -n "${ADD_CMD}" ] || [ -n "${REMOVE_CMD}" ]; then
|
||||
gh pr --repo "${GITHUB_REPOSITORY}" edit ${ALREADY_CREATED} ${ADD_CMD} ${REMOVE_CMD}
|
||||
fi
|
||||
|
||||
- run: git push --force origin "${BRANCH}"
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED != ''
|
||||
|
||||
cleanup:
|
||||
# Close PRs and delete branchs if the original PR is closed.
|
||||
|
||||
@@ -152,7 +109,7 @@ jobs:
|
||||
github.event.action == 'closed' &&
|
||||
github.event.pull_request.head.repo.full_name != github.repository
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Close PR and delete `ci-run/pr-${{ env.PR_NUMBER }}` branch
|
||||
|
||||
548
.github/workflows/benchmarking.yml
vendored
548
.github/workflows/benchmarking.yml
vendored
@@ -12,6 +12,7 @@ on:
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 3 * * *' # run once a day, timezone is utc
|
||||
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
inputs:
|
||||
region_id:
|
||||
@@ -37,11 +38,6 @@ on:
|
||||
description: 'AWS-RDS and AWS-AURORA normally only run on Saturday. Set this to true to run them on every workflow_dispatch'
|
||||
required: false
|
||||
default: false
|
||||
run_only_pgvector_tests:
|
||||
type: boolean
|
||||
description: 'Run pgvector tests but no other tests. If not set, all tests including pgvector tests will be run'
|
||||
required: false
|
||||
default: false
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -54,55 +50,28 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
bench:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- DEFAULT_PG_VERSION: 16
|
||||
PLATFORM: "neon-staging"
|
||||
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
||||
RUNNER: [ self-hosted, us-east-2, x64 ]
|
||||
- DEFAULT_PG_VERSION: 16
|
||||
PLATFORM: "azure-staging"
|
||||
region_id: 'azure-eastus2'
|
||||
RUNNER: [ self-hosted, eastus2, x64 ]
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: ${{ matrix.DEFAULT_PG_VERSION }}
|
||||
DEFAULT_PG_VERSION: 14
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: ${{ matrix.PLATFORM }}
|
||||
PLATFORM: "neon-staging"
|
||||
|
||||
runs-on: ${{ matrix.RUNNER }}
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials # necessary on Azure runners
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
name: neon-${{ runner.os }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
@@ -110,7 +79,7 @@ jobs:
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
region_id: ${{ matrix.region_id }}
|
||||
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
@@ -121,18 +90,10 @@ jobs:
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
# Set --sparse-ordering option of pytest-order plugin
|
||||
# to ensure tests are running in order of appears in the file.
|
||||
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
|
||||
extra_params:
|
||||
-m remote_cluster
|
||||
--sparse-ordering
|
||||
--timeout 14400
|
||||
--ignore test_runner/performance/test_perf_olap.py
|
||||
--ignore test_runner/performance/test_perf_pgvector_queries.py
|
||||
--ignore test_runner/performance/test_logical_replication.py
|
||||
--ignore test_runner/performance/test_physical_replication.py
|
||||
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -146,7 +107,6 @@ jobs:
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
@@ -155,119 +115,22 @@ jobs:
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic perf testing: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
replication-tests:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 16
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: "neon-staging"
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Run Logical Replication benchmarks
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_logical_replication.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 5400
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
BENCHMARK_PROJECT_ID_PUB: ${{ vars.BENCHMARK_PROJECT_ID_PUB }}
|
||||
BENCHMARK_PROJECT_ID_SUB: ${{ vars.BENCHMARK_PROJECT_ID_SUB }}
|
||||
|
||||
- name: Run Physical Replication benchmarks
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_physical_replication.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 5400
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06T9AMNDQQ" # on-call-compute-staging-stream
|
||||
slack-message: |
|
||||
Periodic replication testing: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
generate-matrices:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
|
||||
#
|
||||
# Available platforms:
|
||||
# - neonvm-captest-new: Freshly created project (1 CU)
|
||||
# - neonvm-captest-freetier: Use freetier-sized compute (0.25 CU)
|
||||
# - neonvm-captest-azure-new: Freshly created project (1 CU) in azure region
|
||||
# - neonvm-captest-azure-freetier: Use freetier-sized compute (0.25 CU) in azure region
|
||||
# - neonvm-captest-reuse: Reusing existing project
|
||||
# - neon-captest-new: Freshly created project (1 CU)
|
||||
# - neon-captest-freetier: Use freetier-sized compute (0.25 CU)
|
||||
# - neon-captest-reuse: Reusing existing project
|
||||
# - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
|
||||
# - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
|
||||
env:
|
||||
RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }}
|
||||
DEFAULT_REGION_ID: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
|
||||
olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}
|
||||
@@ -277,37 +140,23 @@ jobs:
|
||||
- name: Generate matrix for pgbench benchmark
|
||||
id: pgbench-compare-matrix
|
||||
run: |
|
||||
region_id_default=${{ env.DEFAULT_REGION_ID }}
|
||||
runner_default='["self-hosted", "us-east-2", "x64"]'
|
||||
runner_azure='["self-hosted", "eastus2", "x64"]'
|
||||
image_default="neondatabase/build-tools:pinned"
|
||||
matrix='{
|
||||
"pg_version" : [
|
||||
16
|
||||
],
|
||||
"region_id" : [
|
||||
"'"$region_id_default"'"
|
||||
],
|
||||
"platform": [
|
||||
"neonvm-captest-new",
|
||||
"neonvm-captest-reuse",
|
||||
"neon-captest-new",
|
||||
"neon-captest-reuse",
|
||||
"neonvm-captest-new"
|
||||
],
|
||||
"db_size": [ "10gb" ],
|
||||
"runner": ['"$runner_default"'],
|
||||
"image": [ "'"$image_default"'" ],
|
||||
"include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
|
||||
"include": [{ "platform": "neon-captest-freetier", "db_size": "3gb" },
|
||||
{ "platform": "neon-captest-new", "db_size": "50gb" },
|
||||
{ "platform": "neonvm-captest-freetier", "db_size": "3gb" },
|
||||
{ "platform": "neonvm-captest-new", "db_size": "50gb" },
|
||||
{ "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb" }]
|
||||
}'
|
||||
|
||||
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
|
||||
matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "rds-postgres", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "rds-aurora", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]')
|
||||
if [ "$(date +%A)" = "Saturday" ]; then
|
||||
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "db_size": "10gb"},
|
||||
{ "platform": "rds-aurora", "db_size": "50gb"}]')
|
||||
fi
|
||||
|
||||
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
|
||||
@@ -317,7 +166,7 @@ jobs:
|
||||
run: |
|
||||
matrix='{
|
||||
"platform": [
|
||||
"neonvm-captest-reuse"
|
||||
"neon-captest-reuse"
|
||||
]
|
||||
}'
|
||||
|
||||
@@ -333,7 +182,7 @@ jobs:
|
||||
run: |
|
||||
matrix='{
|
||||
"platform": [
|
||||
"neonvm-captest-reuse"
|
||||
"neon-captest-reuse"
|
||||
],
|
||||
"scale": [
|
||||
"10"
|
||||
@@ -347,17 +196,8 @@ jobs:
|
||||
|
||||
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
|
||||
|
||||
prepare_AWS_RDS_databases:
|
||||
uses: ./.github/workflows/_benchmarking_preparation.yml
|
||||
secrets: inherit
|
||||
|
||||
pgbench-compare:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
needs: [ generate-matrices, prepare_AWS_RDS_databases ]
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
needs: [ generate-matrices ]
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -367,15 +207,15 @@ jobs:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: ${{ matrix.pg_version }}
|
||||
DEFAULT_PG_VERSION: 14
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
|
||||
runs-on: ${{ matrix.runner }}
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: ${{ matrix.image }}
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
# Increase timeout to 8h, default timeout is 6h
|
||||
@@ -384,41 +224,40 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
name: neon-${{ runner.os }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Add Postgres binaries to PATH
|
||||
run: |
|
||||
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
|
||||
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Create Neon Project
|
||||
if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
|
||||
if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier"]'), matrix.platform)
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
region_id: ${{ matrix.region_id }}
|
||||
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
compute_units: ${{ (contains(matrix.platform, 'captest-freetier') && '[0.25, 0.25]') || '[1, 1]' }}
|
||||
compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
|
||||
provisioner: ${{ (contains(matrix.platform, 'neonvm-') && 'k8s-neonvm') || 'k8s-pod' }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
case "${PLATFORM}" in
|
||||
neonvm-captest-reuse)
|
||||
neon-captest-reuse)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
|
||||
;;
|
||||
neonvm-captest-sharding-reuse)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
|
||||
;;
|
||||
neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
|
||||
neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier)
|
||||
CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
|
||||
;;
|
||||
rds-aurora)
|
||||
@@ -435,6 +274,16 @@ jobs:
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
QUERIES=("SELECT version()")
|
||||
if [[ "${PLATFORM}" = "neon"* ]]; then
|
||||
QUERIES+=("SHOW neon.tenant_id")
|
||||
QUERIES+=("SHOW neon.timeline_id")
|
||||
fi
|
||||
|
||||
for q in "${QUERIES[@]}"; do
|
||||
psql ${CONNSTR} -c "${q}"
|
||||
done
|
||||
|
||||
- name: Benchmark init
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
@@ -443,7 +292,6 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -457,7 +305,6 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -471,7 +318,6 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -485,7 +331,6 @@ jobs:
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
@@ -494,143 +339,7 @@ jobs:
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic perf testing on ${{ matrix.platform }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
pgbench-pgvector:
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- PLATFORM: "neonvm-captest-pgvector"
|
||||
RUNNER: [ self-hosted, us-east-2, x64 ]
|
||||
- PLATFORM: "azure-captest-pgvector"
|
||||
RUNNER: [ self-hosted, eastus2, x64 ]
|
||||
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "1"
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 16
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: ${{ matrix.PLATFORM }}
|
||||
|
||||
runs-on: ${{ matrix.RUNNER }}
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
# until https://github.com/neondatabase/neon/issues/8275 is fixed we temporarily install postgresql-16
|
||||
# instead of using Neon artifacts containing pgbench
|
||||
- name: Install postgresql-16 where pytest expects it
|
||||
run: |
|
||||
# Just to make it easier to test things locally on macOS (with arm64)
|
||||
arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
|
||||
|
||||
cd /home/nonroot
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.0-1.pgdg110+1_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110+2_${arch}.deb"
|
||||
dpkg -x libpq5_17.0-1.pgdg110+1_${arch}.deb pg
|
||||
dpkg -x postgresql-16_16.4-1.pgdg110+2_${arch}.deb pg
|
||||
dpkg -x postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb pg
|
||||
|
||||
mkdir -p /tmp/neon/pg_install/v16/bin
|
||||
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
|
||||
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql
|
||||
ln -s /home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu /tmp/neon/pg_install/v16/lib
|
||||
|
||||
LD_LIBRARY_PATH="/home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu:${LD_LIBRARY_PATH:-}"
|
||||
export LD_LIBRARY_PATH
|
||||
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> ${GITHUB_ENV}
|
||||
|
||||
/tmp/neon/pg_install/v16/bin/pgbench --version
|
||||
/tmp/neon/pg_install/v16/bin/psql --version
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
case "${PLATFORM}" in
|
||||
neonvm-captest-pgvector)
|
||||
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
|
||||
;;
|
||||
azure-captest-pgvector)
|
||||
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_AZURE }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Benchmark pgvector hnsw indexing
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_perf_olap.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
|
||||
- name: Benchmark pgvector queries
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_perf_pgvector_queries.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic perf testing on ${{ env.PLATFORM }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
@@ -642,12 +351,8 @@ jobs:
|
||||
#
|
||||
# *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
|
||||
# *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
needs: [ generate-matrices, pgbench-compare, prepare_AWS_RDS_databases ]
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [ generate-matrices, pgbench-compare ]
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -655,7 +360,7 @@ jobs:
|
||||
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 16
|
||||
DEFAULT_PG_VERSION: 14
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
TEST_OLAP_COLLECT_EXPLAIN: ${{ github.event.inputs.collect_olap_explain }}
|
||||
TEST_OLAP_COLLECT_PG_STAT_STATEMENTS: ${{ github.event.inputs.collect_pg_stat_statements }}
|
||||
@@ -665,34 +370,29 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
name: neon-${{ runner.os }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Add Postgres binaries to PATH
|
||||
run: |
|
||||
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
|
||||
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
case "${PLATFORM}" in
|
||||
neonvm-captest-reuse)
|
||||
neon-captest-reuse)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
|
||||
;;
|
||||
rds-aurora)
|
||||
@@ -702,13 +402,23 @@ jobs:
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
QUERIES=("SELECT version()")
|
||||
if [[ "${PLATFORM}" = "neon"* ]]; then
|
||||
QUERIES+=("SHOW neon.tenant_id")
|
||||
QUERIES+=("SHOW neon.timeline_id")
|
||||
fi
|
||||
|
||||
for q in "${QUERIES[@]}"; do
|
||||
psql ${CONNSTR} -c "${q}"
|
||||
done
|
||||
|
||||
- name: ClickBench benchmark
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
@@ -717,7 +427,6 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_clickbench
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -727,7 +436,6 @@ jobs:
|
||||
TEST_OLAP_SCALE: 10
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
@@ -736,10 +444,7 @@ jobs:
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic OLAP perf testing on ${{ matrix.platform }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
slack-message: "Periodic OLAP perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
@@ -750,12 +455,8 @@ jobs:
|
||||
# We might change it after https://github.com/neondatabase/neon/issues/2900.
|
||||
#
|
||||
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
needs: [ generate-matrices, clickbench-compare, prepare_AWS_RDS_databases ]
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [ generate-matrices, clickbench-compare ]
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -763,7 +464,7 @@ jobs:
|
||||
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 16
|
||||
DEFAULT_PG_VERSION: 14
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
@@ -772,43 +473,38 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
name: neon-${{ runner.os }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Add Postgres binaries to PATH
|
||||
run: |
|
||||
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
|
||||
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Get Connstring Secret Name
|
||||
run: |
|
||||
case "${PLATFORM}" in
|
||||
neonvm-captest-reuse)
|
||||
neon-captest-reuse)
|
||||
ENV_PLATFORM=CAPTEST_TPCH
|
||||
;;
|
||||
rds-aurora)
|
||||
ENV_PLATFORM=RDS_AURORA_TPCH
|
||||
;;
|
||||
rds-postgres)
|
||||
ENV_PLATFORM=RDS_POSTGRES_TPCH
|
||||
ENV_PLATFORM=RDS_AURORA_TPCH
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@@ -823,6 +519,16 @@ jobs:
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
QUERIES=("SELECT version()")
|
||||
if [[ "${PLATFORM}" = "neon"* ]]; then
|
||||
QUERIES+=("SHOW neon.tenant_id")
|
||||
QUERIES+=("SHOW neon.timeline_id")
|
||||
fi
|
||||
|
||||
for q in "${QUERIES[@]}"; do
|
||||
psql ${CONNSTR} -c "${q}"
|
||||
done
|
||||
|
||||
- name: Run TPC-H benchmark
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
@@ -831,7 +537,6 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -839,7 +544,6 @@ jobs:
|
||||
TEST_OLAP_SCALE: ${{ matrix.scale }}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
@@ -848,20 +552,13 @@ jobs:
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
user-examples-compare:
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
needs: [ generate-matrices, tpch-compare, prepare_AWS_RDS_databases ]
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [ generate-matrices, tpch-compare ]
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -869,7 +566,7 @@ jobs:
|
||||
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 16
|
||||
DEFAULT_PG_VERSION: 14
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
@@ -877,34 +574,29 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
name: neon-${{ runner.os }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Add Postgres binaries to PATH
|
||||
run: |
|
||||
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
|
||||
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
case "${PLATFORM}" in
|
||||
neonvm-captest-reuse)
|
||||
neon-captest-reuse)
|
||||
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
|
||||
;;
|
||||
rds-aurora)
|
||||
@@ -914,13 +606,23 @@ jobs:
|
||||
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
QUERIES=("SELECT version()")
|
||||
if [[ "${PLATFORM}" = "neon"* ]]; then
|
||||
QUERIES+=("SHOW neon.tenant_id")
|
||||
QUERIES+=("SHOW neon.timeline_id")
|
||||
fi
|
||||
|
||||
for q in "${QUERIES[@]}"; do
|
||||
psql ${CONNSTR} -c "${q}"
|
||||
done
|
||||
|
||||
- name: Run user examples
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
@@ -929,14 +631,12 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
@@ -945,10 +645,6 @@ jobs:
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
|
||||
slack-message: "Periodic User example perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
37
.github/workflows/build-build-tools-image.yml
vendored
37
.github/workflows/build-build-tools-image.yml
vendored
@@ -30,6 +30,7 @@ jobs:
|
||||
check-image:
|
||||
uses: ./.github/workflows/check-build-tools-image.yml
|
||||
|
||||
# This job uses older version of GitHub Actions because it's run on gen2 runners, which don't support node 20 (for newer versions)
|
||||
build-image:
|
||||
needs: [ check-image ]
|
||||
if: needs.check-image.outputs.found == 'false'
|
||||
@@ -38,7 +39,7 @@ jobs:
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "dev", "{0}"]', matrix.arch)) }}
|
||||
|
||||
env:
|
||||
IMAGE_TAG: ${{ inputs.image-tag }}
|
||||
@@ -54,38 +55,40 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- uses: ./.github/actions/set-docker-config-dir
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
cache-binary: false
|
||||
# Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
|
||||
# The default value is ~/.docker
|
||||
- name: Set custom docker config directory
|
||||
run: |
|
||||
mkdir -p /tmp/.docker-custom
|
||||
echo DOCKER_CONFIG=/tmp/.docker-custom >> $GITHUB_ENV
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
- uses: docker/setup-buildx-action@v2
|
||||
|
||||
- uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: cache.neon.build
|
||||
username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }}
|
||||
password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }}
|
||||
|
||||
- uses: docker/build-push-action@v6
|
||||
- uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
provenance: false
|
||||
push: true
|
||||
pull: true
|
||||
file: Dockerfile.build-tools
|
||||
cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0},mode=max', matrix.arch) || '' }}
|
||||
cache-from: type=registry,ref=neondatabase/build-tools:cache-${{ matrix.arch }}
|
||||
cache-to: type=registry,ref=neondatabase/build-tools:cache-${{ matrix.arch }},mode=max
|
||||
tags: neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.arch }}
|
||||
|
||||
- name: Remove custom docker config directory
|
||||
run: |
|
||||
rm -rf /tmp/.docker-custom
|
||||
|
||||
merge-images:
|
||||
needs: [ build-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
IMAGE_TAG: ${{ inputs.image-tag }}
|
||||
|
||||
1034
.github/workflows/build_and_test.yml
vendored
1034
.github/workflows/build_and_test.yml
vendored
File diff suppressed because it is too large
Load Diff
25
.github/workflows/check-build-tools-image.yml
vendored
25
.github/workflows/check-build-tools-image.yml
vendored
@@ -19,23 +19,32 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
check-image:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
|
||||
found: ${{ steps.check-image.outputs.found }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Get build-tools image tag for the current commit
|
||||
id: get-build-tools-tag
|
||||
env:
|
||||
IMAGE_TAG: |
|
||||
${{ hashFiles('Dockerfile.build-tools',
|
||||
'.github/workflows/check-build-tools-image.yml',
|
||||
'.github/workflows/build-build-tools-image.yml') }}
|
||||
# Usually, for COMMIT_SHA, we use `github.event.pull_request.head.sha || github.sha`, but here, even for PRs,
|
||||
# we want to use `github.sha` i.e. point to a phantom merge commit to determine the image tag correctly.
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT
|
||||
LAST_BUILD_TOOLS_SHA=$(
|
||||
gh api \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
--method GET \
|
||||
--field path=Dockerfile.build-tools \
|
||||
--field sha=${COMMIT_SHA} \
|
||||
--field per_page=1 \
|
||||
--jq ".[0].sha" \
|
||||
"/repos/${GITHUB_REPOSITORY}/commits"
|
||||
)
|
||||
echo "image-tag=${LAST_BUILD_TOOLS_SHA}" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
- name: Check if such tag found in the registry
|
||||
id: check-image
|
||||
|
||||
2
.github/workflows/check-permissions.yml
vendored
2
.github/workflows/check-permissions.yml
vendored
@@ -16,7 +16,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
check-permissions:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Disallow CI runs on PRs from forks
|
||||
if: |
|
||||
|
||||
@@ -9,7 +9,7 @@ on:
|
||||
|
||||
jobs:
|
||||
cleanup:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Cleanup
|
||||
run: |
|
||||
|
||||
102
.github/workflows/cloud-regress.yml
vendored
102
.github/workflows/cloud-regress.yml
vendored
@@ -1,102 +0,0 @@
|
||||
name: Cloud Regression Test
|
||||
on:
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '45 1 * * *' # run once a day, timezone is utc
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow
|
||||
group: ${{ github.workflow }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
regress:
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 16
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
|
||||
runs-on: us-east-2
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Patch the test
|
||||
run: |
|
||||
cd "vendor/postgres-v${DEFAULT_PG_VERSION}"
|
||||
patch -p1 < "../../compute/patches/cloud_regress_pg${DEFAULT_PG_VERSION}.patch"
|
||||
|
||||
- name: Generate a random password
|
||||
id: pwgen
|
||||
run: |
|
||||
set +x
|
||||
DBPASS=$(dd if=/dev/random bs=48 count=1 2>/dev/null | base64)
|
||||
echo "::add-mask::${DBPASS//\//}"
|
||||
echo DBPASS="${DBPASS//\//}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Change tests according to the generated password
|
||||
env:
|
||||
DBPASS: ${{ steps.pwgen.outputs.DBPASS }}
|
||||
run: |
|
||||
cd vendor/postgres-v"${DEFAULT_PG_VERSION}"/src/test/regress
|
||||
for fname in sql/*.sql expected/*.out; do
|
||||
sed -i.bak s/NEON_PASSWORD_PLACEHOLDER/"'${DBPASS}'"/ "${fname}"
|
||||
done
|
||||
for ph in $(grep NEON_MD5_PLACEHOLDER expected/password.out | awk '{print $3;}' | sort | uniq); do
|
||||
USER=$(echo "${ph}" | cut -c 22-)
|
||||
MD5=md5$(echo -n "${DBPASS}${USER}" | md5sum | awk '{print $1;}')
|
||||
sed -i.bak "s/${ph}/${MD5}/" expected/password.out
|
||||
done
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Run the regression tests
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: cloud_regress
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
extra_params: -m remote_cluster
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ secrets.PG_REGRESS_CONNSTR }}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # on-call-staging-stream
|
||||
slack-message: |
|
||||
Periodic pg_regress on staging: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
78
.github/workflows/label-for-external-users.yml
vendored
78
.github/workflows/label-for-external-users.yml
vendored
@@ -1,78 +0,0 @@
|
||||
name: Add `external` label to issues and PRs created by external users
|
||||
|
||||
on:
|
||||
issues:
|
||||
types:
|
||||
- opened
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
github-actor:
|
||||
description: 'GitHub username. If empty, the username of the current user will be used'
|
||||
required: false
|
||||
|
||||
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
|
||||
permissions: {}
|
||||
|
||||
env:
|
||||
LABEL: external
|
||||
|
||||
jobs:
|
||||
check-user:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
outputs:
|
||||
is-member: ${{ steps.check-user.outputs.is-member }}
|
||||
|
||||
steps:
|
||||
- name: Check whether `${{ github.actor }}` is a member of `${{ github.repository_owner }}`
|
||||
id: check-user
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
ACTOR: ${{ inputs.github-actor || github.actor }}
|
||||
run: |
|
||||
expected_error="User does not exist or is not a member of the organization"
|
||||
output_file=output.txt
|
||||
|
||||
for i in $(seq 1 10); do
|
||||
if gh api "/orgs/${GITHUB_REPOSITORY_OWNER}/members/${ACTOR}" \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" > ${output_file}; then
|
||||
|
||||
is_member=true
|
||||
break
|
||||
elif grep -q "${expected_error}" ${output_file}; then
|
||||
is_member=false
|
||||
break
|
||||
elif [ $i -eq 10 ]; then
|
||||
title="Failed to get memmbership status for ${ACTOR}"
|
||||
message="The latest GitHub API error message: '$(cat ${output_file})'"
|
||||
echo "::error file=.github/workflows/label-for-external-users.yml,title=${title}::${message}"
|
||||
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo "is-member=${is_member}" | tee -a ${GITHUB_OUTPUT}
|
||||
|
||||
add-label:
|
||||
if: needs.check-user.outputs.is-member == 'false'
|
||||
needs: [ check-user ]
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
pull-requests: write # for `gh pr edit`
|
||||
issues: write # for `gh issue edit`
|
||||
|
||||
steps:
|
||||
- name: Add `${{ env.LABEL }}` label
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
ITEM_NUMBER: ${{ github.event[github.event_name == 'pull_request_target' && 'pull_request' || 'issue'].number }}
|
||||
GH_CLI_COMMAND: ${{ github.event_name == 'pull_request_target' && 'pr' || 'issue' }}
|
||||
run: |
|
||||
gh ${GH_CLI_COMMAND} --repo ${GITHUB_REPOSITORY} edit --add-label=${LABEL} ${ITEM_NUMBER}
|
||||
222
.github/workflows/neon_extra_builds.yml
vendored
222
.github/workflows/neon_extra_builds.yml
vendored
@@ -56,6 +56,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Install macOS postgres dependencies
|
||||
run: brew install flex bison openssl protobuf icu4c pkg-config
|
||||
@@ -72,10 +73,6 @@ jobs:
|
||||
id: pg_v16_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 17 revision for caching
|
||||
id: pg_v17_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg_14
|
||||
uses: actions/cache@v4
|
||||
@@ -97,13 +94,6 @@ jobs:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_17
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Set extra env for macOS
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
@@ -131,10 +121,6 @@ jobs:
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: make postgres-v16 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build postgres v17
|
||||
if: steps.cache_pg_17.outputs.cache-hit != 'true'
|
||||
run: make postgres-v17 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: make neon-pg-ext -j$(sysctl -n hw.ncpu)
|
||||
|
||||
@@ -147,13 +133,212 @@ jobs:
|
||||
- name: Check that no warnings are produced
|
||||
run: ./run_clippy.sh
|
||||
|
||||
check-linux-arm-build:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
timeout-minutes: 90
|
||||
runs-on: [ self-hosted, dev, arm64 ]
|
||||
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
# Hence keeping target/ (and general cache size) smaller
|
||||
BUILD_TYPE: release
|
||||
CARGO_FEATURES: --features testing
|
||||
CARGO_FLAGS: --release
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Fix git ownership
|
||||
run: |
|
||||
# Workaround for `fatal: detected dubious ownership in repository at ...`
|
||||
#
|
||||
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
|
||||
# Ref https://github.com/actions/checkout/issues/785
|
||||
#
|
||||
git config --global --add safe.directory ${{ github.workspace }}
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
for r in 14 15 16; do
|
||||
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
|
||||
done
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Set pg 14 revision for caching
|
||||
id: pg_v14_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 15 revision for caching
|
||||
id: pg_v15_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 16 revision for caching
|
||||
id: pg_v16_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set env variables
|
||||
run: |
|
||||
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo" >> $GITHUB_ENV
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg_14
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_15
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_16
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Build postgres v14
|
||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v14 -j$(nproc)
|
||||
|
||||
- name: Build postgres v15
|
||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v15 -j$(nproc)
|
||||
|
||||
- name: Build postgres v16
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v16 -j$(nproc)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: mold -run make neon-pg-ext -j$(nproc)
|
||||
|
||||
- name: Build walproposer-lib
|
||||
run: mold -run make walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Run cargo build
|
||||
run: |
|
||||
mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests
|
||||
|
||||
- name: Run cargo test
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
cargo nextest run $CARGO_FEATURES
|
||||
|
||||
# Run separate tests for real S3
|
||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
cargo nextest run --package remote_storage --test test_real_s3
|
||||
|
||||
# Run separate tests for real Azure Blob Storage
|
||||
# XXX: replace region with `eu-central-1`-like region
|
||||
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
|
||||
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
|
||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
cargo nextest run --package remote_storage --test test_real_azure
|
||||
|
||||
check-codestyle-rust-arm:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
timeout-minutes: 90
|
||||
runs-on: [ self-hosted, dev, arm64 ]
|
||||
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Fix git ownership
|
||||
run: |
|
||||
# Workaround for `fatal: detected dubious ownership in repository at ...`
|
||||
#
|
||||
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
|
||||
# Ref https://github.com/actions/checkout/issues/785
|
||||
#
|
||||
git config --global --add safe.directory ${{ github.workspace }}
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
for r in 14 15 16; do
|
||||
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
|
||||
done
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
# Some of our rust modules use FFI and need those to be checked
|
||||
- name: Get postgres headers
|
||||
run: make postgres-headers -j$(nproc)
|
||||
|
||||
# cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
|
||||
# This will catch compiler & clippy warnings in all feature combinations.
|
||||
# TODO: use cargo hack for build and test as well, but, that's quite expensive.
|
||||
# NB: keep clippy args in sync with ./run_clippy.sh
|
||||
- run: |
|
||||
CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
|
||||
if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
|
||||
echo "No clippy args found in .neon_clippy_args"
|
||||
exit 1
|
||||
fi
|
||||
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
|
||||
- name: Run cargo clippy (debug)
|
||||
run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
|
||||
- name: Run cargo clippy (release)
|
||||
run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS
|
||||
|
||||
- name: Check documentation generation
|
||||
run: cargo doc --workspace --no-deps --document-private-items
|
||||
env:
|
||||
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
|
||||
|
||||
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
|
||||
- name: Check formatting
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo fmt --all -- --check
|
||||
|
||||
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
|
||||
- name: Check rust dependencies
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
|
||||
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
|
||||
|
||||
# https://github.com/EmbarkStudios/cargo-deny
|
||||
- name: Check rust licenses/bans/advisories/sources
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo deny check
|
||||
|
||||
gather-rust-build-stats:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
runs-on: [ self-hosted, large ]
|
||||
runs-on: [ self-hosted, gen3, large ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
credentials:
|
||||
@@ -163,6 +348,8 @@ jobs:
|
||||
|
||||
env:
|
||||
BUILD_TYPE: release
|
||||
# remove the cachepot wrapper and build without crate caches
|
||||
RUSTC_WRAPPER: ""
|
||||
# build with incremental compilation produce partial results
|
||||
# so do not attempt to cache this build, also disable the incremental compilation
|
||||
CARGO_INCREMENTAL: 0
|
||||
@@ -172,6 +359,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
# Some of our rust modules use FFI and need those to be checked
|
||||
- name: Get postgres headers
|
||||
@@ -181,7 +369,7 @@ jobs:
|
||||
run: make walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Produce the build stats
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc)
|
||||
run: cargo build --all --release --timings
|
||||
|
||||
- name: Upload the build stats
|
||||
id: upload-stats
|
||||
|
||||
155
.github/workflows/periodic_pagebench.yml
vendored
155
.github/workflows/periodic_pagebench.yml
vendored
@@ -1,155 +0,0 @@
|
||||
name: Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 18 * * *' # Runs at 6 PM UTC every day
|
||||
workflow_dispatch: # Allows manual triggering of the workflow
|
||||
inputs:
|
||||
commit_hash:
|
||||
type: string
|
||||
description: 'The long neon repo commit hash for the system under test (pageserver) to be tested.'
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
trigger_bench_on_ec2_machine_in_eu_central_1:
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
timeout-minutes: 360 # Set the timeout to 6 hours
|
||||
env:
|
||||
API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY : ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_SECRET }}
|
||||
AWS_DEFAULT_REGION : "eu-central-1"
|
||||
AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74"
|
||||
steps:
|
||||
# we don't need the neon source code because we run everything remotely
|
||||
# however we still need the local github actions to run the allure step below
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Show my own (github runner) external IP address - usefull for IP allowlisting
|
||||
run: curl https://ifconfig.me
|
||||
|
||||
- name: Start EC2 instance and wait for the instance to boot up
|
||||
run: |
|
||||
aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID
|
||||
aws ec2 wait instance-running --instance-ids $AWS_INSTANCE_ID
|
||||
sleep 60 # sleep some time to allow cloudinit and our API server to start up
|
||||
|
||||
- name: Determine public IP of the EC2 instance and set env variable EC2_MACHINE_URL_US
|
||||
run: |
|
||||
public_ip=$(aws ec2 describe-instances --instance-ids $AWS_INSTANCE_ID --query 'Reservations[*].Instances[*].PublicIpAddress' --output text)
|
||||
echo "Public IP of the EC2 instance: $public_ip"
|
||||
echo "EC2_MACHINE_URL_US=https://${public_ip}:8443" >> $GITHUB_ENV
|
||||
|
||||
- name: Determine commit hash
|
||||
env:
|
||||
INPUT_COMMIT_HASH: ${{ github.event.inputs.commit_hash }}
|
||||
run: |
|
||||
if [ -z "$INPUT_COMMIT_HASH" ]; then
|
||||
echo "COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')" >> $GITHUB_ENV
|
||||
else
|
||||
echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Start Bench with run_id
|
||||
run: |
|
||||
curl -k -X 'POST' \
|
||||
"${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \
|
||||
-H 'accept: application/json' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H "Authorization: Bearer $API_KEY" \
|
||||
-d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\"}"
|
||||
|
||||
- name: Poll Test Status
|
||||
id: poll_step
|
||||
run: |
|
||||
status=""
|
||||
while [[ "$status" != "failure" && "$status" != "success" ]]; do
|
||||
response=$(curl -k -X 'GET' \
|
||||
"${EC2_MACHINE_URL_US}/test_status/${GITHUB_RUN_ID}" \
|
||||
-H 'accept: application/json' \
|
||||
-H "Authorization: Bearer $API_KEY")
|
||||
echo "Response: $response"
|
||||
set +x
|
||||
status=$(echo $response | jq -r '.status')
|
||||
echo "Test status: $status"
|
||||
if [[ "$status" == "failure" ]]; then
|
||||
echo "Test failed"
|
||||
exit 1 # Fail the job step if status is failure
|
||||
elif [[ "$status" == "success" || "$status" == "null" ]]; then
|
||||
break
|
||||
elif [[ "$status" == "too_many_runs" ]]; then
|
||||
echo "Too many runs already running"
|
||||
echo "too_many_runs=true" >> "$GITHUB_OUTPUT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sleep 60 # Poll every 60 seconds
|
||||
done
|
||||
|
||||
- name: Retrieve Test Logs
|
||||
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
|
||||
run: |
|
||||
curl -k -X 'GET' \
|
||||
"${EC2_MACHINE_URL_US}/test_log/${GITHUB_RUN_ID}" \
|
||||
-H 'accept: application/gzip' \
|
||||
-H "Authorization: Bearer $API_KEY" \
|
||||
--output "test_log_${GITHUB_RUN_ID}.gz"
|
||||
|
||||
- name: Unzip Test Log and Print it into this job's log
|
||||
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
|
||||
run: |
|
||||
gzip -d "test_log_${GITHUB_RUN_ID}.gz"
|
||||
cat "test_log_${GITHUB_RUN_ID}"
|
||||
|
||||
- name: Create Allure report
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
- name: Cleanup Test Resources
|
||||
if: always()
|
||||
run: |
|
||||
curl -k -X 'POST' \
|
||||
"${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \
|
||||
-H 'accept: application/json' \
|
||||
-H "Authorization: Bearer $API_KEY" \
|
||||
-d ''
|
||||
|
||||
- name: Stop EC2 instance and wait for the instance to be stopped
|
||||
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
|
||||
run: |
|
||||
aws ec2 stop-instances --instance-ids $AWS_INSTANCE_ID
|
||||
aws ec2 wait instance-stopped --instance-ids $AWS_INSTANCE_ID
|
||||
211
.github/workflows/pg-clients.yml
vendored
211
.github/workflows/pg-clients.yml
vendored
@@ -1,211 +0,0 @@
|
||||
name: Test Postgres client libraries
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '23 02 * * *' # run once a day, timezone is utc
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/workflows/pg-clients.yml'
|
||||
- 'test_runner/pg_clients/**'
|
||||
- 'test_runner/logical_repl/**'
|
||||
- 'poetry.lock'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref_name }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
env:
|
||||
DEFAULT_PG_VERSION: 16
|
||||
PLATFORM: neon-captest-new
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
AWS_DEFAULT_REGION: eu-central-1
|
||||
|
||||
jobs:
|
||||
check-permissions:
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
|
||||
uses: ./.github/workflows/check-permissions.yml
|
||||
with:
|
||||
github-event-name: ${{ github.event_name }}
|
||||
|
||||
check-build-tools-image:
|
||||
needs: [ check-permissions ]
|
||||
uses: ./.github/workflows/check-build-tools-image.yml
|
||||
|
||||
build-build-tools-image:
|
||||
needs: [ check-build-tools-image ]
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
with:
|
||||
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
|
||||
secrets: inherit
|
||||
|
||||
test-logical-replication:
|
||||
needs: [ build-build-tools-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init --user root
|
||||
services:
|
||||
clickhouse:
|
||||
image: clickhouse/clickhouse-server:24.6.3.64
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 8123:8123
|
||||
zookeeper:
|
||||
image: quay.io/debezium/zookeeper:2.7
|
||||
ports:
|
||||
- 2181:2181
|
||||
kafka:
|
||||
image: quay.io/debezium/kafka:2.7
|
||||
env:
|
||||
ZOOKEEPER_CONNECT: "zookeeper:2181"
|
||||
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
KAFKA_JMX_PORT: 9991
|
||||
ports:
|
||||
- 9092:9092
|
||||
debezium:
|
||||
image: quay.io/debezium/connect:2.7
|
||||
env:
|
||||
BOOTSTRAP_SERVERS: kafka:9092
|
||||
GROUP_ID: 1
|
||||
CONFIG_STORAGE_TOPIC: debezium-config
|
||||
OFFSET_STORAGE_TOPIC: debezium-offset
|
||||
STATUS_STORAGE_TOPIC: debezium-status
|
||||
DEBEZIUM_CONFIG_CONNECTOR_CLASS: io.debezium.connector.postgresql.PostgresConnector
|
||||
ports:
|
||||
- 8083:8083
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
|
||||
- name: Run tests
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: remote
|
||||
test_selection: logical_repl
|
||||
run_in_parallel: false
|
||||
extra_params: -m remote_cluster
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: always()
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
project_id: ${{ steps.create-neon-project.outputs.project_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
id: create-allure-report
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: github.event.schedule && failure()
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
slack-message: |
|
||||
Testing the logical replication: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
test-postgres-client-libs:
|
||||
needs: [ build-build-tools-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init --user root
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
|
||||
- name: Run tests
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: remote
|
||||
test_selection: pg_clients
|
||||
run_in_parallel: false
|
||||
extra_params: -m remote_cluster
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: always()
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
project_id: ${{ steps.create-neon-project.outputs.project_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
id: create-allure-report
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: github.event.schedule && failure()
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
slack-message: |
|
||||
Testing Postgres clients: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
98
.github/workflows/pg_clients.yml
vendored
Normal file
98
.github/workflows/pg_clients.yml
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
name: Test Postgres client libraries
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '23 02 * * *' # run once a day, timezone is utc
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow per any non-`main` branch.
|
||||
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test-postgres-client-libs:
|
||||
# TODO: switch to gen2 runner, requires docker
|
||||
runs-on: [ ubuntu-latest ]
|
||||
|
||||
env:
|
||||
DEFAULT_PG_VERSION: 14
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.9
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
|
||||
- name: Cache poetry deps
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: v2-${{ runner.os }}-python-deps-ubunutu-latest-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- name: Install Python deps
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: ./scripts/pysync
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
|
||||
- name: Run pytest
|
||||
env:
|
||||
REMOTE_ENV: 1
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
# Test framework expects we have psql binary;
|
||||
# but since we don't really need it in this test, let's mock it
|
||||
mkdir -p "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin" && touch "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin/psql";
|
||||
./scripts/pytest \
|
||||
--junitxml=$TEST_OUTPUT/junit.xml \
|
||||
--tb=short \
|
||||
--verbose \
|
||||
-m "remote_cluster" \
|
||||
-rA "test_runner/pg_clients"
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: ${{ always() }}
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
project_id: ${{ steps.create-neon-project.outputs.project_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
# We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
|
||||
# It will be fixed after switching to gen2 runner
|
||||
- name: Upload python test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
retention-days: 7
|
||||
name: python-test-pg_clients-${{ runner.os }}-stage-logs
|
||||
path: ${{ env.TEST_OUTPUT }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
60
.github/workflows/pin-build-tools-image.yml
vendored
60
.github/workflows/pin-build-tools-image.yml
vendored
@@ -7,20 +7,12 @@ on:
|
||||
description: 'Source tag'
|
||||
required: true
|
||||
type: string
|
||||
force:
|
||||
description: 'Force the image to be pinned'
|
||||
default: false
|
||||
type: boolean
|
||||
workflow_call:
|
||||
inputs:
|
||||
from-tag:
|
||||
description: 'Source tag'
|
||||
required: true
|
||||
type: string
|
||||
force:
|
||||
description: 'Force the image to be pinned'
|
||||
default: false
|
||||
type: boolean
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -30,18 +22,15 @@ concurrency:
|
||||
group: pin-build-tools-image-${{ inputs.from-tag }}
|
||||
cancel-in-progress: false
|
||||
|
||||
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
|
||||
permissions: {}
|
||||
|
||||
env:
|
||||
FROM_TAG: ${{ inputs.from-tag }}
|
||||
TO_TAG: pinned
|
||||
|
||||
jobs:
|
||||
check-manifests:
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
skip: ${{ steps.check-manifests.outputs.skip }}
|
||||
tag-image:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
FROM_TAG: ${{ inputs.from-tag }}
|
||||
TO_TAG: pinned
|
||||
|
||||
steps:
|
||||
- name: Check if we really need to pin the image
|
||||
@@ -58,44 +47,27 @@ jobs:
|
||||
|
||||
echo "skip=${skip}" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
tag-image:
|
||||
needs: check-manifests
|
||||
|
||||
# use format(..) to catch both inputs.force = true AND inputs.force = 'true'
|
||||
if: needs.check-manifests.outputs.skip == 'false' || format('{0}', inputs.force) == 'true'
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
permissions:
|
||||
id-token: write # for `azure/login`
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
|
||||
if: steps.check-manifests.outputs.skip == 'false'
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub
|
||||
if: steps.check-manifests.outputs.skip == 'false'
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/build-tools:${TO_TAG} \
|
||||
neondatabase/build-tools:${FROM_TAG}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
if: steps.check-manifests.outputs.skip == 'false'
|
||||
with:
|
||||
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
|
||||
- name: Azure login
|
||||
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1
|
||||
with:
|
||||
client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
|
||||
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
|
||||
subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
|
||||
|
||||
- name: Login to ACR
|
||||
run: |
|
||||
az acr login --name=neoneastus2
|
||||
|
||||
- name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub, ECR, and ACR
|
||||
- name: Tag build-tools with `${{ env.TO_TAG }}` in ECR
|
||||
if: steps.check-manifests.outputs.skip == 'false'
|
||||
run: |
|
||||
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG} \
|
||||
-t neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG} \
|
||||
-t neondatabase/build-tools:${TO_TAG} \
|
||||
neondatabase/build-tools:${FROM_TAG}
|
||||
|
||||
2
.github/workflows/release-notify.yml
vendored
2
.github/workflows/release-notify.yml
vendored
@@ -19,7 +19,7 @@ on:
|
||||
|
||||
jobs:
|
||||
notify:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [ ubuntu-latest ]
|
||||
|
||||
steps:
|
||||
- uses: neondatabase/dev-actions/release-pr-notify@main
|
||||
|
||||
16
.github/workflows/release.yml
vendored
16
.github/workflows/release.yml
vendored
@@ -26,7 +26,7 @@ defaults:
|
||||
jobs:
|
||||
create-storage-release-branch:
|
||||
if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }}
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: write # for `git push`
|
||||
@@ -52,22 +52,20 @@ jobs:
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
TITLE="Storage & Compute release ${RELEASE_DATE}"
|
||||
|
||||
cat << EOF > body.md
|
||||
## ${TITLE}
|
||||
## Release ${RELEASE_DATE}
|
||||
|
||||
**Please merge this Pull Request using 'Create a merge commit' button**
|
||||
EOF
|
||||
|
||||
gh pr create --title "${TITLE}" \
|
||||
gh pr create --title "Release ${RELEASE_DATE}" \
|
||||
--body-file "body.md" \
|
||||
--head "${RELEASE_BRANCH}" \
|
||||
--base "release"
|
||||
|
||||
create-proxy-release-branch:
|
||||
if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }}
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: write # for `git push`
|
||||
@@ -93,15 +91,13 @@ jobs:
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
TITLE="Proxy release ${RELEASE_DATE}"
|
||||
|
||||
cat << EOF > body.md
|
||||
## ${TITLE}
|
||||
## Proxy release ${RELEASE_DATE}
|
||||
|
||||
**Please merge this Pull Request using 'Create a merge commit' button**
|
||||
EOF
|
||||
|
||||
gh pr create --title "${TITLE}" \
|
||||
gh pr create --title "Proxy release ${RELEASE_DATE}" \
|
||||
--body-file "body.md" \
|
||||
--head "${RELEASE_BRANCH}" \
|
||||
--base "release-proxy"
|
||||
|
||||
57
.github/workflows/trigger-e2e-tests.yml
vendored
57
.github/workflows/trigger-e2e-tests.yml
vendored
@@ -13,11 +13,13 @@ defaults:
|
||||
env:
|
||||
# A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix
|
||||
E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
|
||||
jobs:
|
||||
cancel-previous-e2e-tests:
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Cancel previous e2e-tests runs for this PR
|
||||
@@ -29,13 +31,13 @@ jobs:
|
||||
--field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
|
||||
|
||||
tag:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [ ubuntu-latest ]
|
||||
outputs:
|
||||
build-tag: ${{ steps.build-tag.outputs.tag }}
|
||||
|
||||
steps:
|
||||
# Need `fetch-depth: 0` to count the number of commits in the branch
|
||||
- uses: actions/checkout@v4
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -60,37 +62,21 @@ jobs:
|
||||
|
||||
trigger-e2e-tests:
|
||||
needs: [ tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
EVENT_ACTION: ${{ github.event.action }}
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
TAG: ${{ needs.tag.outputs.build-tag }}
|
||||
steps:
|
||||
- name: Wait for `promote-images` job to finish
|
||||
# It's important to have a timeout here, the script in the step can run infinitely
|
||||
timeout-minutes: 60
|
||||
- name: check if ecr image are present
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
run: |
|
||||
if [ "${GITHUB_EVENT_NAME}" != "pull_request" ] || [ "${EVENT_ACTION}" != "ready_for_review" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# For PRs we use the run id as the tag
|
||||
BUILD_AND_TEST_RUN_ID=${TAG}
|
||||
while true; do
|
||||
conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion')
|
||||
case "$conclusion" in
|
||||
success)
|
||||
break
|
||||
;;
|
||||
failure | cancelled | skipped)
|
||||
echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..."
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo "The 'promote-images' hasn't succeed yet. Waiting..."
|
||||
sleep 60
|
||||
;;
|
||||
esac
|
||||
for REPO in neon compute-tools compute-node-v14 vm-compute-node-v14 compute-node-v15 vm-compute-node-v15 compute-node-v16 vm-compute-node-v16; do
|
||||
OUTPUT=$(aws ecr describe-images --repository-name ${REPO} --region eu-central-1 --query "imageDetails[?imageTags[?contains(@, '${TAG}')]]" --output text)
|
||||
if [ "$OUTPUT" == "" ]; then
|
||||
echo "$REPO with image tag $TAG not found" >> $GITHUB_OUTPUT
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Set e2e-platforms
|
||||
@@ -102,17 +88,12 @@ jobs:
|
||||
# Default set of platforms to run e2e tests on
|
||||
platforms='["docker", "k8s"]'
|
||||
|
||||
# If a PR changes anything that affects computes, add k8s-neonvm to the list of platforms.
|
||||
# If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or Dockerfile.compute-node, add k8s-neonvm to the list of platforms.
|
||||
# If the workflow run is not a pull request, add k8s-neonvm to the list.
|
||||
if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
|
||||
for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do
|
||||
case "$f" in
|
||||
# List of directories that contain code which affect compute images.
|
||||
#
|
||||
# This isn't exhaustive, just the paths that are most directly compute-related.
|
||||
# For example, compute_ctl also depends on libs/utils, but we don't trigger
|
||||
# an e2e run on that.
|
||||
vendor/*|pgxn/*|compute_tools/*|libs/vm_monitor/*|compute/Dockerfile.compute-node)
|
||||
vendor/*|pgxn/*|libs/vm_monitor/*|Dockerfile.compute-node)
|
||||
platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
|
||||
;;
|
||||
*)
|
||||
|
||||
4
.gitmodules
vendored
4
.gitmodules
vendored
@@ -10,7 +10,3 @@
|
||||
path = vendor/postgres-v16
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
branch = REL_16_STABLE_neon
|
||||
[submodule "vendor/postgres-v17"]
|
||||
path = vendor/postgres-v17
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
branch = REL_17_STABLE_neon
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
# * `-A unknown_lints` – do not warn about unknown lint suppressions
|
||||
# that people with newer toolchains might use
|
||||
# * `-D warnings` - fail on any warnings (`cargo` returns non-zero exit status)
|
||||
# * `-D clippy::todo` - don't let `todo!()` slip into `main`
|
||||
export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings -D clippy::todo"
|
||||
export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings"
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
/compute_tools/ @neondatabase/control-plane @neondatabase/compute
|
||||
/storage_controller @neondatabase/storage
|
||||
/libs/pageserver_api/ @neondatabase/storage
|
||||
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
|
||||
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/safekeepers
|
||||
/libs/remote_storage/ @neondatabase/storage
|
||||
/libs/safekeeper_api/ @neondatabase/storage
|
||||
/libs/safekeeper_api/ @neondatabase/safekeepers
|
||||
/libs/vm_monitor/ @neondatabase/autoscaling
|
||||
/pageserver/ @neondatabase/storage
|
||||
/pgxn/ @neondatabase/compute
|
||||
/pgxn/neon/ @neondatabase/compute @neondatabase/storage
|
||||
/pgxn/neon/ @neondatabase/compute @neondatabase/safekeepers
|
||||
/proxy/ @neondatabase/proxy
|
||||
/safekeeper/ @neondatabase/storage
|
||||
/safekeeper/ @neondatabase/safekeepers
|
||||
/vendor/ @neondatabase/compute
|
||||
|
||||
2384
Cargo.lock
generated
2384
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
146
Cargo.toml
146
Cargo.toml
@@ -13,9 +13,9 @@ members = [
|
||||
"safekeeper",
|
||||
"storage_broker",
|
||||
"storage_controller",
|
||||
"storage_controller/client",
|
||||
"storage_scrubber",
|
||||
"s3_scrubber",
|
||||
"workspace_hack",
|
||||
"trace",
|
||||
"libs/compute_api",
|
||||
"libs/pageserver_api",
|
||||
"libs/postgres_ffi",
|
||||
@@ -41,31 +41,29 @@ license = "Apache-2.0"
|
||||
|
||||
## All dependency versions, used in the project
|
||||
[workspace.dependencies]
|
||||
ahash = "0.8"
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
arc-swap = "1.6"
|
||||
async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
|
||||
atomic-take = "1.1.0"
|
||||
azure_core = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls", "hmac_rust"] }
|
||||
azure_identity = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_storage = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_storage_blobs = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_core = "0.18"
|
||||
azure_identity = "0.18"
|
||||
azure_storage = "0.18"
|
||||
azure_storage_blobs = "0.18"
|
||||
flate2 = "1.0.26"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
aws-config = { version = "1.5", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "1.52"
|
||||
aws-sdk-iam = "1.46.0"
|
||||
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
|
||||
aws-smithy-types = "1.2"
|
||||
aws-credential-types = "1.2.0"
|
||||
aws-sigv4 = { version = "1.2", features = ["sign-http"] }
|
||||
aws-types = "1.3"
|
||||
axum = { version = "0.7.5", features = ["ws"] }
|
||||
aws-config = { version = "1.1.4", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "1.14"
|
||||
aws-sdk-iam = "1.15.0"
|
||||
aws-smithy-async = { version = "1.1.4", default-features = false, features=["rt-tokio"] }
|
||||
aws-smithy-types = "1.1.4"
|
||||
aws-credential-types = "1.1.4"
|
||||
aws-sigv4 = { version = "1.2.0", features = ["sign-http"] }
|
||||
aws-types = "1.1.7"
|
||||
axum = { version = "0.6.20", features = ["ws"] }
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
bindgen = "0.70"
|
||||
bit_field = "0.10.2"
|
||||
bindgen = "0.65"
|
||||
bstr = "1.0"
|
||||
byteorder = "1.4"
|
||||
bytes = "1.0"
|
||||
@@ -73,81 +71,79 @@ camino = "1.1.6"
|
||||
cfg-if = "1.0.0"
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
comfy-table = "7.1"
|
||||
comfy-table = "6.1"
|
||||
const_format = "0.2"
|
||||
crc32c = "0.6"
|
||||
crossbeam-utils = "0.8.5"
|
||||
dashmap = { version = "5.5.0", features = ["raw-api"] }
|
||||
either = "1.8"
|
||||
enum-map = "2.4.2"
|
||||
enumset = "1.0.12"
|
||||
fail = "0.5.0"
|
||||
fallible-iterator = "0.2"
|
||||
framed-websockets = { version = "0.1.0", git = "https://github.com/neondatabase/framed-websockets" }
|
||||
fs2 = "0.4.3"
|
||||
futures = "0.3"
|
||||
futures-core = "0.3"
|
||||
futures-util = "0.3"
|
||||
git-version = "0.3"
|
||||
hashbrown = "0.14"
|
||||
hashlink = "0.9.1"
|
||||
hashbrown = "0.13"
|
||||
hashlink = "0.8.4"
|
||||
hdrhistogram = "7.5.2"
|
||||
hex = "0.4"
|
||||
hex-literal = "0.4"
|
||||
hmac = "0.12.1"
|
||||
hostname = "0.4"
|
||||
hostname = "0.3.1"
|
||||
http = {version = "1.1.0", features = ["std"]}
|
||||
http-types = { version = "2", default-features = false }
|
||||
http-body-util = "0.1.2"
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1.1"
|
||||
hyper0 = { package = "hyper", version = "0.14" }
|
||||
hyper = "1.4"
|
||||
hyper-util = "0.1"
|
||||
tokio-tungstenite = "0.21.0"
|
||||
indexmap = "2"
|
||||
indoc = "2"
|
||||
hyper = "0.14"
|
||||
hyper-tungstenite = "0.13.0"
|
||||
inotify = "0.10.2"
|
||||
ipnet = "2.9.0"
|
||||
itertools = "0.10"
|
||||
jsonwebtoken = "9"
|
||||
lasso = "0.7"
|
||||
leaky-bucket = "1.0.1"
|
||||
libc = "0.2"
|
||||
md5 = "0.7.0"
|
||||
measured = { version = "0.0.22", features=["lasso"] }
|
||||
measured-process = { version = "0.0.22" }
|
||||
memoffset = "0.9"
|
||||
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
|
||||
measured = { version = "0.0.21", features=["lasso"] }
|
||||
measured-process = { version = "0.0.21" }
|
||||
memoffset = "0.8"
|
||||
native-tls = "0.2"
|
||||
nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
|
||||
notify = "6.0.0"
|
||||
num_cpus = "1.15"
|
||||
num-traits = "0.2.15"
|
||||
once_cell = "1.13"
|
||||
opentelemetry = "0.24"
|
||||
opentelemetry_sdk = "0.24"
|
||||
opentelemetry-otlp = { version = "0.17", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.16"
|
||||
opentelemetry = "0.20.0"
|
||||
opentelemetry-otlp = { version = "0.13.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.12.0"
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "53"
|
||||
parquet = { version = "49.0.0", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "49.0.0"
|
||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||
pin-project-lite = "0.2"
|
||||
procfs = "0.16"
|
||||
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
|
||||
procfs = "0.14"
|
||||
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
|
||||
prost = "0.11"
|
||||
rand = "0.8"
|
||||
redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
|
||||
regex = "1.10.2"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
|
||||
reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_24"] }
|
||||
reqwest-middleware = "0.3.0"
|
||||
reqwest-retry = "0.5"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
|
||||
reqwest-tracing = { version = "0.4.7", features = ["opentelemetry_0_20"] }
|
||||
reqwest-middleware = "0.2.0"
|
||||
reqwest-retry = "0.2.2"
|
||||
routerify = "3"
|
||||
rpds = "0.13"
|
||||
rustc-hash = "1.1.0"
|
||||
rustls = "0.22"
|
||||
rustls-pemfile = "2"
|
||||
rustls-split = "0.3"
|
||||
scopeguard = "1.1"
|
||||
sysinfo = "0.29.2"
|
||||
sd-notify = "0.4.1"
|
||||
send-future = "0.1.0"
|
||||
sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
|
||||
sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_path_to_error = "0.1"
|
||||
@@ -158,12 +154,13 @@ signal-hook = "0.3"
|
||||
smallvec = "1.11"
|
||||
smol_str = { version = "0.2.0", features = ["serde"] }
|
||||
socket2 = "0.5"
|
||||
strum = "0.26"
|
||||
strum_macros = "0.26"
|
||||
strum = "0.24"
|
||||
strum_macros = "0.24"
|
||||
"subtle" = "2.5.0"
|
||||
svg_fmt = "0.4.3"
|
||||
svg_fmt = "0.4.1"
|
||||
sync_wrapper = "0.1.2"
|
||||
tar = "0.4"
|
||||
task-local-extensions = "0.1.4"
|
||||
test-context = "0.3"
|
||||
thiserror = "1.0"
|
||||
tikv-jemallocator = "0.5"
|
||||
@@ -176,45 +173,34 @@ tokio-rustls = "0.25"
|
||||
tokio-stream = "0.1"
|
||||
tokio-tar = "0.3"
|
||||
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
|
||||
toml = "0.8"
|
||||
toml_edit = "0.22"
|
||||
toml = "0.7"
|
||||
toml_edit = "0.19"
|
||||
tonic = {version = "0.9", features = ["tls", "tls-roots"]}
|
||||
tower-service = "0.3.2"
|
||||
tracing = "0.1"
|
||||
tracing-error = "0.2"
|
||||
tracing-opentelemetry = "0.25"
|
||||
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
|
||||
try-lock = "0.2.5"
|
||||
tracing-error = "0.2.0"
|
||||
tracing-opentelemetry = "0.20.0"
|
||||
tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
|
||||
twox-hash = { version = "1.6.3", default-features = false }
|
||||
typed-json = "0.1"
|
||||
url = "2.2"
|
||||
urlencoding = "2.1"
|
||||
uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
|
||||
walkdir = "2.3.2"
|
||||
rustls-native-certs = "0.7"
|
||||
webpki-roots = "0.25"
|
||||
x509-parser = "0.15"
|
||||
whoami = "1.5.1"
|
||||
|
||||
## TODO replace this with tracing
|
||||
env_logger = "0.10"
|
||||
log = "0.4"
|
||||
|
||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
|
||||
# We want to use the 'neon' branch for these, but there's currently one
|
||||
# incompatible change on the branch. See:
|
||||
#
|
||||
# - PR #8076 which contained changes that depended on the new changes in
|
||||
# the rust-postgres crate, and
|
||||
# - PR #8654 which reverted those changes and made the code in proxy incompatible
|
||||
# with the tip of the 'neon' branch again.
|
||||
#
|
||||
# When those proxy changes are re-applied (see PR #8747), we can switch using
|
||||
# the tip of the 'neon' branch again.
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
## Other git libraries
|
||||
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
|
||||
|
||||
## Local libraries
|
||||
compute_api = { version = "0.1", path = "./libs/compute_api/" }
|
||||
@@ -231,7 +217,6 @@ remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
|
||||
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
|
||||
desim = { version = "0.1", path = "./libs/desim" }
|
||||
storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
|
||||
storage_controller_client = { path = "./storage_controller/client" }
|
||||
tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
|
||||
tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
|
||||
utils = { version = "0.1", path = "./libs/utils/" }
|
||||
@@ -250,8 +235,13 @@ tonic-build = "0.9"
|
||||
|
||||
[patch.crates-io]
|
||||
|
||||
# Needed to get `tokio-postgres-rustls` to depend on our fork.
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
# This is only needed for proxy's tests.
|
||||
# TODO: we should probably fork `tokio-postgres-rustls` instead.
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
|
||||
# bug fixes for UUID
|
||||
parquet = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
|
||||
parquet_derive = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
|
||||
|
||||
################# Binary contents sections
|
||||
|
||||
|
||||
54
Dockerfile
54
Dockerfile
@@ -5,8 +5,6 @@
|
||||
ARG REPOSITORY=neondatabase
|
||||
ARG IMAGE=build-tools
|
||||
ARG TAG=pinned
|
||||
ARG DEFAULT_PG_VERSION=17
|
||||
ARG STABLE_PG_VERSION=16
|
||||
|
||||
# Build Postgres
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
|
||||
@@ -15,12 +13,11 @@ WORKDIR /home/nonroot
|
||||
COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
|
||||
COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
|
||||
COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
|
||||
COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
|
||||
COPY --chown=nonroot pgxn pgxn
|
||||
COPY --chown=nonroot Makefile Makefile
|
||||
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
|
||||
|
||||
ENV BUILD_TYPE=release
|
||||
ENV BUILD_TYPE release
|
||||
RUN set -e \
|
||||
&& mold -run make -j $(nproc) -s neon-pg-ext \
|
||||
&& rm -rf pg_install/build \
|
||||
@@ -31,19 +28,26 @@ FROM $REPOSITORY/$IMAGE:$TAG AS build
|
||||
WORKDIR /home/nonroot
|
||||
ARG GIT_VERSION=local
|
||||
ARG BUILD_TAG
|
||||
ARG STABLE_PG_VERSION
|
||||
|
||||
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
|
||||
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
|
||||
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
|
||||
ARG RUSTC_WRAPPER=cachepot
|
||||
ENV AWS_REGION=eu-central-1
|
||||
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
|
||||
ARG CACHEPOT_BUCKET=neon-github-dev
|
||||
#ARG AWS_ACCESS_KEY_ID
|
||||
#ARG AWS_SECRET_ACCESS_KEY
|
||||
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v17/lib pg_install/v17/lib
|
||||
COPY --chown=nonroot . .
|
||||
|
||||
ARG ADDITIONAL_RUSTFLAGS
|
||||
# Show build caching stats to check if it was used in the end.
|
||||
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
|
||||
RUN set -e \
|
||||
&& PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
&& RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment" cargo build \
|
||||
--bin pg_sni_router \
|
||||
--bin pageserver \
|
||||
--bin pagectl \
|
||||
@@ -52,13 +56,12 @@ RUN set -e \
|
||||
--bin storage_controller \
|
||||
--bin proxy \
|
||||
--bin neon_local \
|
||||
--bin storage_scrubber \
|
||||
--locked --release
|
||||
--locked --release \
|
||||
&& cachepot -s
|
||||
|
||||
# Build final image
|
||||
#
|
||||
FROM debian:bullseye-slim
|
||||
ARG DEFAULT_PG_VERSION
|
||||
WORKDIR /data
|
||||
|
||||
RUN set -e \
|
||||
@@ -66,6 +69,8 @@ RUN set -e \
|
||||
&& apt install -y \
|
||||
libreadline-dev \
|
||||
libseccomp-dev \
|
||||
libicu67 \
|
||||
openssl \
|
||||
ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
|
||||
&& useradd -d /data neon \
|
||||
@@ -79,35 +84,28 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_controller /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubber /usr/local/bin
|
||||
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v17 /usr/local/v17/
|
||||
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
|
||||
|
||||
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
|
||||
# Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
|
||||
RUN mkdir -p /data/.neon/ && \
|
||||
echo "id=1234" > "/data/.neon/identity.toml" && \
|
||||
echo "broker_endpoint='http://storage_broker:50051'\n" \
|
||||
"pg_distrib_dir='/usr/local/'\n" \
|
||||
"listen_pg_addr='0.0.0.0:6400'\n" \
|
||||
"listen_http_addr='0.0.0.0:9898'\n" \
|
||||
"availability_zone='local'\n" \
|
||||
> /data/.neon/pageserver.toml && \
|
||||
chown -R neon:neon /data/.neon
|
||||
RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
|
||||
&& /usr/local/bin/pageserver -D /data/.neon/ --init \
|
||||
-c "id=1234" \
|
||||
-c "broker_endpoint='http://storage_broker:50051'" \
|
||||
-c "pg_distrib_dir='/usr/local/'" \
|
||||
-c "listen_pg_addr='0.0.0.0:6400'" \
|
||||
-c "listen_http_addr='0.0.0.0:9898'"
|
||||
|
||||
# When running a binary that links with libpq, default to using our most recent postgres version. Binaries
|
||||
# that want a particular postgres version will select it explicitly: this is just a default.
|
||||
ENV LD_LIBRARY_PATH=/usr/local/v${DEFAULT_PG_VERSION}/lib
|
||||
ENV LD_LIBRARY_PATH /usr/local/v16/lib
|
||||
|
||||
|
||||
VOLUME ["/data"]
|
||||
USER neon
|
||||
EXPOSE 6400
|
||||
EXPOSE 9898
|
||||
|
||||
CMD ["/usr/local/bin/pageserver", "-D", "/data/.neon"]
|
||||
|
||||
|
||||
@@ -1,21 +1,10 @@
|
||||
FROM debian:bullseye-slim
|
||||
|
||||
# Use ARG as a build-time environment variable here to allow.
|
||||
# It's not supposed to be set outside.
|
||||
# Alternatively it can be obtained using the following command
|
||||
# ```
|
||||
# . /etc/os-release && echo "${VERSION_CODENAME}"
|
||||
# ```
|
||||
ARG DEBIAN_VERSION_CODENAME=bullseye
|
||||
|
||||
# Add nonroot user
|
||||
RUN useradd -ms /bin/bash nonroot -b /home
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
# System deps
|
||||
#
|
||||
# 'gdb' is included so that we get backtraces of core dumps produced in
|
||||
# regression tests
|
||||
RUN set -e \
|
||||
&& apt update \
|
||||
&& apt install -y \
|
||||
@@ -27,7 +16,6 @@ RUN set -e \
|
||||
cmake \
|
||||
curl \
|
||||
flex \
|
||||
gdb \
|
||||
git \
|
||||
gnupg \
|
||||
gzip \
|
||||
@@ -38,6 +26,7 @@ RUN set -e \
|
||||
liblzma-dev \
|
||||
libncurses5-dev \
|
||||
libncursesw5-dev \
|
||||
libpq-dev \
|
||||
libreadline-dev \
|
||||
libseccomp-dev \
|
||||
libsqlite3-dev \
|
||||
@@ -62,7 +51,7 @@ RUN set -e \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# protobuf-compiler (protoc)
|
||||
ENV PROTOC_VERSION=25.1
|
||||
ENV PROTOC_VERSION 25.1
|
||||
RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
|
||||
&& unzip -q protoc.zip -d protoc \
|
||||
&& mv protoc/bin/protoc /usr/local/bin/protoc \
|
||||
@@ -76,26 +65,21 @@ RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/
|
||||
&& mv s5cmd /usr/local/bin/s5cmd
|
||||
|
||||
# LLVM
|
||||
ENV LLVM_VERSION=18
|
||||
ENV LLVM_VERSION=17
|
||||
RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
|
||||
&& echo "deb http://apt.llvm.org/${DEBIAN_VERSION_CODENAME}/ llvm-toolchain-${DEBIAN_VERSION_CODENAME}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
|
||||
&& echo "deb http://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
|
||||
&& apt update \
|
||||
&& apt install -y clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \
|
||||
&& bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# Install docker
|
||||
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION_CODENAME} stable" > /etc/apt/sources.list.d/docker.list \
|
||||
# PostgreSQL 14
|
||||
RUN curl -fsSL 'https://www.postgresql.org/media/keys/ACCC4CF8.asc' | apt-key add - \
|
||||
&& echo 'deb http://apt.postgresql.org/pub/repos/apt bullseye-pgdg main' > /etc/apt/sources.list.d/pgdg.list \
|
||||
&& apt update \
|
||||
&& apt install -y docker-ce docker-ce-cli \
|
||||
&& apt install -y postgresql-client-14 \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# Configure sudo & docker
|
||||
RUN usermod -aG sudo nonroot && \
|
||||
echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \
|
||||
usermod -aG docker nonroot
|
||||
|
||||
# AWS CLI
|
||||
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip" \
|
||||
&& unzip -q awscliv2.zip \
|
||||
@@ -103,7 +87,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
|
||||
&& rm awscliv2.zip
|
||||
|
||||
# Mold: A Modern Linker
|
||||
ENV MOLD_VERSION=v2.33.0
|
||||
ENV MOLD_VERSION v2.4.0
|
||||
RUN set -e \
|
||||
&& git clone https://github.com/rui314/mold.git \
|
||||
&& mkdir mold/build \
|
||||
@@ -128,51 +112,12 @@ RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JS
|
||||
&& make install \
|
||||
&& rm -rf ../lcov.tar.gz
|
||||
|
||||
# Compile and install the static OpenSSL library
|
||||
ENV OPENSSL_VERSION=1.1.1w
|
||||
ENV OPENSSL_PREFIX=/usr/local/openssl
|
||||
RUN wget -O /tmp/openssl-${OPENSSL_VERSION}.tar.gz https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz && \
|
||||
echo "cf3098950cb4d853ad95c0841f1f9c6d3dc102dccfcacd521d93925208b76ac8 /tmp/openssl-${OPENSSL_VERSION}.tar.gz" | sha256sum --check && \
|
||||
cd /tmp && \
|
||||
tar xzvf /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
|
||||
rm /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
|
||||
cd /tmp/openssl-${OPENSSL_VERSION} && \
|
||||
./config --prefix=${OPENSSL_PREFIX} -static --static no-shared -fPIC && \
|
||||
make -j "$(nproc)" && \
|
||||
make install && \
|
||||
cd /tmp && \
|
||||
rm -rf /tmp/openssl-${OPENSSL_VERSION}
|
||||
|
||||
# Use the same version of libicu as the compute nodes so that
|
||||
# clusters created using inidb on pageserver can be used by computes.
|
||||
#
|
||||
# TODO: at this time, Dockerfile.compute-node uses the debian bullseye libicu
|
||||
# package, which is 67.1. We're duplicating that knowledge here, and also, technically,
|
||||
# Debian has a few patches on top of 67.1 that we're not adding here.
|
||||
ENV ICU_VERSION=67.1
|
||||
ENV ICU_PREFIX=/usr/local/icu
|
||||
|
||||
# Download and build static ICU
|
||||
RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION//./-}/icu4c-${ICU_VERSION//./_}-src.tgz && \
|
||||
echo "94a80cd6f251a53bd2a997f6f1b5ac6653fe791dfab66e1eb0227740fb86d5dc /tmp/libicu-${ICU_VERSION}.tgz" | sha256sum --check && \
|
||||
mkdir /tmp/icu && \
|
||||
pushd /tmp/icu && \
|
||||
tar -xzf /tmp/libicu-${ICU_VERSION}.tgz && \
|
||||
pushd icu/source && \
|
||||
./configure --prefix=${ICU_PREFIX} --enable-static --enable-shared=no CXXFLAGS="-fPIC" CFLAGS="-fPIC" && \
|
||||
make -j "$(nproc)" && \
|
||||
make install && \
|
||||
popd && \
|
||||
rm -rf icu && \
|
||||
rm -f /tmp/libicu-${ICU_VERSION}.tgz && \
|
||||
popd
|
||||
|
||||
# Switch to nonroot user
|
||||
USER nonroot:nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
# Python
|
||||
ENV PYTHON_VERSION=3.9.19 \
|
||||
ENV PYTHON_VERSION=3.9.18 \
|
||||
PYENV_ROOT=/home/nonroot/.pyenv \
|
||||
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
|
||||
RUN set -e \
|
||||
@@ -196,14 +141,9 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.81.0
|
||||
ENV RUSTC_VERSION=1.77.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
ARG CARGO_HAKARI_VERSION=0.9.30
|
||||
ARG CARGO_DENY_VERSION=0.16.1
|
||||
ARG CARGO_HACK_VERSION=0.6.31
|
||||
ARG CARGO_NEXTEST_VERSION=0.9.72
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
|
||||
chmod +x rustup-init && \
|
||||
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
|
||||
@@ -211,14 +151,16 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
||||
export PATH="$HOME/.cargo/bin:$PATH" && \
|
||||
. "$HOME/.cargo/env" && \
|
||||
cargo --version && rustup --version && \
|
||||
rustup component add llvm-tools rustfmt clippy && \
|
||||
cargo install rustfilt --version ${RUSTFILT_VERSION} && \
|
||||
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} && \
|
||||
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
|
||||
cargo install cargo-hack --version ${CARGO_HACK_VERSION} && \
|
||||
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} && \
|
||||
rustup component add llvm-tools-preview rustfmt clippy && \
|
||||
cargo install --git https://github.com/paritytech/cachepot && \
|
||||
cargo install rustfilt && \
|
||||
cargo install cargo-hakari && \
|
||||
cargo install cargo-deny --locked && \
|
||||
cargo install cargo-hack && \
|
||||
cargo install cargo-nextest && \
|
||||
rm -rf /home/nonroot/.cargo/registry && \
|
||||
rm -rf /home/nonroot/.cargo/git
|
||||
ENV RUSTC_WRAPPER=cachepot
|
||||
|
||||
# Show versions
|
||||
RUN whoami \
|
||||
@@ -228,6 +170,3 @@ RUN whoami \
|
||||
&& rustup --version --verbose \
|
||||
&& rustc --version --verbose \
|
||||
&& clang --version
|
||||
|
||||
# Set following flag to check in Makefile if its running in Docker
|
||||
RUN touch /home/nonroot/.docker_build
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
113
Makefile
113
Makefile
@@ -3,9 +3,6 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
# Where to install Postgres, default is ./pg_install, maybe useful for package managers
|
||||
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
|
||||
|
||||
OPENSSL_PREFIX_DIR := /usr/local/openssl
|
||||
ICU_PREFIX_DIR := /usr/local/icu
|
||||
|
||||
#
|
||||
# We differentiate between release / debug build types using the BUILD_TYPE
|
||||
# environment variable.
|
||||
@@ -23,31 +20,19 @@ else
|
||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||
endif
|
||||
|
||||
ifeq ($(shell test -e /home/nonroot/.docker_build && echo -n yes),yes)
|
||||
# Exclude static build openssl, icu for local build (MacOS, Linux)
|
||||
# Only keep for build type release and debug
|
||||
PG_CFLAGS += -I$(OPENSSL_PREFIX_DIR)/include
|
||||
PG_CONFIGURE_OPTS += --with-icu
|
||||
PG_CONFIGURE_OPTS += ICU_CFLAGS='-I/$(ICU_PREFIX_DIR)/include -DU_STATIC_IMPLEMENTATION'
|
||||
PG_CONFIGURE_OPTS += ICU_LIBS='-L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -licui18n -licuuc -licudata -lstdc++ -Wl,-Bdynamic -lm'
|
||||
PG_CONFIGURE_OPTS += LDFLAGS='-L$(OPENSSL_PREFIX_DIR)/lib -L$(OPENSSL_PREFIX_DIR)/lib64 -L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -Wl,-Bstatic -lssl -lcrypto -Wl,-Bdynamic -lrt -lm -ldl -lpthread'
|
||||
endif
|
||||
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
# Seccomp BPF is only available for Linux
|
||||
PG_CONFIGURE_OPTS += --with-libseccomp
|
||||
else ifeq ($(UNAME_S),Darwin)
|
||||
ifndef DISABLE_HOMEBREW
|
||||
# macOS with brew-installed openssl requires explicit paths
|
||||
# It can be configured with OPENSSL_PREFIX variable
|
||||
OPENSSL_PREFIX := $(shell brew --prefix openssl@3)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
||||
PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
|
||||
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
|
||||
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
|
||||
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
|
||||
endif
|
||||
# macOS with brew-installed openssl requires explicit paths
|
||||
# It can be configured with OPENSSL_PREFIX variable
|
||||
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
||||
PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
|
||||
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
|
||||
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
|
||||
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
|
||||
endif
|
||||
|
||||
# Use -C option so that when PostgreSQL "make install" installs the
|
||||
@@ -69,8 +54,6 @@ CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
|
||||
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
|
||||
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
|
||||
|
||||
CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
|
||||
|
||||
#
|
||||
# Top level Makefile to build Neon and PostgreSQL
|
||||
#
|
||||
@@ -81,46 +64,32 @@ all: neon postgres neon-pg-ext
|
||||
#
|
||||
# The 'postgres_ffi' depends on the Postgres headers.
|
||||
.PHONY: neon
|
||||
neon: postgres-headers walproposer-lib cargo-target-dir
|
||||
neon: postgres-headers walproposer-lib
|
||||
+@echo "Compiling Neon"
|
||||
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
|
||||
.PHONY: cargo-target-dir
|
||||
cargo-target-dir:
|
||||
# https://github.com/rust-lang/cargo/issues/14281
|
||||
mkdir -p target
|
||||
test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG
|
||||
|
||||
### PostgreSQL parts
|
||||
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
|
||||
# to avoid the duplication in the future, but it's tolerable for now.
|
||||
#
|
||||
$(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
||||
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)
|
||||
test -e $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG
|
||||
|
||||
+@echo "Configuring Postgres $* build"
|
||||
@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
|
||||
echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
|
||||
echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
|
||||
exit 1; }
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
|
||||
|
||||
VERSION=$*; \
|
||||
EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/$* && \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \
|
||||
CFLAGS='$(PG_CFLAGS)' \
|
||||
$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
|
||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
|
||||
$(PG_CONFIGURE_OPTS) \
|
||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log)
|
||||
|
||||
# nicer alias to run 'configure'
|
||||
# Note: I've been unable to use templates for this part of our configuration.
|
||||
# I'm not sure why it wouldn't work, but this is the only place (apart from
|
||||
# the "build-all-versions" entry points) where direct mention of PostgreSQL
|
||||
# versions is used.
|
||||
.PHONY: postgres-configure-v17
|
||||
postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
|
||||
.PHONY: postgres-configure-v16
|
||||
postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
|
||||
.PHONY: postgres-configure-v15
|
||||
@@ -150,8 +119,6 @@ postgres-%: postgres-configure-% \
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
|
||||
+@echo "Compiling amcheck $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
|
||||
+@echo "Compiling test_decoding $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install
|
||||
|
||||
.PHONY: postgres-clean-%
|
||||
postgres-clean-%:
|
||||
@@ -217,31 +184,29 @@ neon-pg-clean-ext-%:
|
||||
# they depend on openssl and other libraries that are not included in our
|
||||
# Rust build.
|
||||
.PHONY: walproposer-lib
|
||||
walproposer-lib: neon-pg-ext-v17
|
||||
walproposer-lib: neon-pg-ext-v16
|
||||
+@echo "Compiling walproposer-lib"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
|
||||
pg_strong_random.o
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
|
||||
checksum_helper.o \
|
||||
cryptohash_openssl.o \
|
||||
pg_crc32c.o \
|
||||
hmac_openssl.o \
|
||||
cryptohash_openssl.o \
|
||||
scram-common.o \
|
||||
md5_common.o \
|
||||
parse_manifest.o \
|
||||
scram-common.o
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
|
||||
pg_crc32c.o
|
||||
checksum_helper.o
|
||||
endif
|
||||
|
||||
.PHONY: walproposer-lib-clean
|
||||
walproposer-lib-clean:
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
|
||||
|
||||
@@ -249,44 +214,38 @@ walproposer-lib-clean:
|
||||
neon-pg-ext: \
|
||||
neon-pg-ext-v14 \
|
||||
neon-pg-ext-v15 \
|
||||
neon-pg-ext-v16 \
|
||||
neon-pg-ext-v17
|
||||
neon-pg-ext-v16
|
||||
|
||||
.PHONY: neon-pg-clean-ext
|
||||
neon-pg-clean-ext: \
|
||||
neon-pg-clean-ext-v14 \
|
||||
neon-pg-clean-ext-v15 \
|
||||
neon-pg-clean-ext-v16 \
|
||||
neon-pg-clean-ext-v17
|
||||
neon-pg-clean-ext-v16
|
||||
|
||||
# shorthand to build all Postgres versions
|
||||
.PHONY: postgres
|
||||
postgres: \
|
||||
postgres-v14 \
|
||||
postgres-v15 \
|
||||
postgres-v16 \
|
||||
postgres-v17
|
||||
postgres-v16
|
||||
|
||||
.PHONY: postgres-headers
|
||||
postgres-headers: \
|
||||
postgres-headers-v14 \
|
||||
postgres-headers-v15 \
|
||||
postgres-headers-v16 \
|
||||
postgres-headers-v17
|
||||
postgres-headers-v16
|
||||
|
||||
.PHONY: postgres-clean
|
||||
postgres-clean: \
|
||||
postgres-clean-v14 \
|
||||
postgres-clean-v15 \
|
||||
postgres-clean-v16 \
|
||||
postgres-clean-v17
|
||||
postgres-clean-v16
|
||||
|
||||
.PHONY: postgres-check
|
||||
postgres-check: \
|
||||
postgres-check-v14 \
|
||||
postgres-check-v15 \
|
||||
postgres-check-v16 \
|
||||
postgres-check-v17
|
||||
postgres-check-v16
|
||||
|
||||
# This doesn't remove the effects of 'configure'.
|
||||
.PHONY: clean
|
||||
@@ -331,13 +290,13 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
|
||||
rm -f pg*.BAK
|
||||
|
||||
# Indent pxgn/neon.
|
||||
.PHONY: neon-pgindent
|
||||
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
|
||||
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \
|
||||
.PHONY: pgindent
|
||||
neon-pgindent: postgres-v16-pg-bsd-indent neon-pg-ext-v16
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/find_typedef \
|
||||
INDENT=$(POSTGRES_INSTALL_DIR)/build/v16/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/pgindent/pgindent \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-v16 \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
|
||||
|
||||
|
||||
|
||||
16
README.md
16
README.md
@@ -1,6 +1,4 @@
|
||||
[](https://neon.tech)
|
||||
|
||||
|
||||
[](https://neon.tech)
|
||||
|
||||
# Neon
|
||||
|
||||
@@ -58,18 +56,12 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
1. Install XCode and dependencies
|
||||
```
|
||||
xcode-select --install
|
||||
brew install protobuf openssl flex bison icu4c pkg-config m4
|
||||
brew install protobuf openssl flex bison icu4c pkg-config
|
||||
|
||||
# add openssl to PATH, required for ed25519 keys generation in neon_local
|
||||
echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc
|
||||
```
|
||||
|
||||
If you get errors about missing `m4` you may have to install it manually:
|
||||
```
|
||||
brew install m4
|
||||
brew link --force m4
|
||||
```
|
||||
|
||||
2. [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
```
|
||||
# recommended approach from https://www.rust-lang.org/tools/install
|
||||
@@ -132,7 +124,7 @@ make -j`sysctl -n hw.logicalcpu` -s
|
||||
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
|
||||
|
||||
To run the integration tests or Python scripts (not required to use the code), install
|
||||
Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.
|
||||
Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.3](https://python-poetry.org/)) in the project directory.
|
||||
|
||||
|
||||
#### Running neon database
|
||||
@@ -268,7 +260,7 @@ By default, this runs both debug and release modes, and all supported postgres v
|
||||
testing locally, it is convenient to run just one set of permutations, like this:
|
||||
|
||||
```sh
|
||||
DEFAULT_PG_VERSION=16 BUILD_TYPE=release ./scripts/pytest
|
||||
DEFAULT_PG_VERSION=15 BUILD_TYPE=release ./scripts/pytest
|
||||
```
|
||||
|
||||
## Flamegraphs
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
This directory contains files that are needed to build the compute
|
||||
images, or included in the compute images.
|
||||
|
||||
Dockerfile.compute-node
|
||||
To build the compute image
|
||||
|
||||
vm-image-spec.yaml
|
||||
Instructions for vm-builder, to turn the compute-node image into
|
||||
corresponding vm-compute-node image.
|
||||
|
||||
etc/
|
||||
Configuration files included in /etc in the compute image
|
||||
|
||||
patches/
|
||||
Some extensions need to be patched to work with Neon. This
|
||||
directory contains such patches. They are applied to the extension
|
||||
sources in Dockerfile.compute-node
|
||||
|
||||
In addition to these, postgres itself, the neon postgres extension,
|
||||
and compute_ctl are built and copied into the compute image by
|
||||
Dockerfile.compute-node.
|
||||
@@ -1,331 +0,0 @@
|
||||
collector_name: neon_collector
|
||||
metrics:
|
||||
- metric_name: lfc_misses
|
||||
type: gauge
|
||||
help: 'lfc_misses'
|
||||
key_labels:
|
||||
values: [lfc_misses]
|
||||
query: |
|
||||
select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
|
||||
|
||||
- metric_name: lfc_used
|
||||
type: gauge
|
||||
help: 'LFC chunks used (chunk = 1MB)'
|
||||
key_labels:
|
||||
values: [lfc_used]
|
||||
query: |
|
||||
select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
|
||||
|
||||
- metric_name: lfc_hits
|
||||
type: gauge
|
||||
help: 'lfc_hits'
|
||||
key_labels:
|
||||
values: [lfc_hits]
|
||||
query: |
|
||||
select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
|
||||
|
||||
- metric_name: lfc_writes
|
||||
type: gauge
|
||||
help: 'lfc_writes'
|
||||
key_labels:
|
||||
values: [lfc_writes]
|
||||
query: |
|
||||
select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
|
||||
|
||||
- metric_name: lfc_cache_size_limit
|
||||
type: gauge
|
||||
help: 'LFC cache size limit in bytes'
|
||||
key_labels:
|
||||
values: [lfc_cache_size_limit]
|
||||
query: |
|
||||
select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
|
||||
|
||||
- metric_name: connection_counts
|
||||
type: gauge
|
||||
help: 'Connection counts'
|
||||
key_labels:
|
||||
- datname
|
||||
- state
|
||||
values: [count]
|
||||
query: |
|
||||
select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state;
|
||||
|
||||
- metric_name: pg_stats_userdb
|
||||
type: gauge
|
||||
help: 'Stats for several oldest non-system dbs'
|
||||
key_labels:
|
||||
- datname
|
||||
value_label: kind
|
||||
values:
|
||||
- db_size
|
||||
- deadlocks
|
||||
# Rows
|
||||
- inserted
|
||||
- updated
|
||||
- deleted
|
||||
# We export stats for 10 non-system database. Without this limit
|
||||
# it is too easy to abuse the system by creating lots of databases.
|
||||
query: |
|
||||
select pg_database_size(datname) as db_size, deadlocks,
|
||||
tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted,
|
||||
datname
|
||||
from pg_stat_database
|
||||
where datname IN (
|
||||
select datname
|
||||
from pg_database
|
||||
where datname <> 'postgres' and not datistemplate
|
||||
order by oid
|
||||
limit 10
|
||||
);
|
||||
|
||||
- metric_name: max_cluster_size
|
||||
type: gauge
|
||||
help: 'neon.max_cluster_size setting'
|
||||
key_labels:
|
||||
values: [max_cluster_size]
|
||||
query: |
|
||||
select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size';
|
||||
|
||||
- metric_name: db_total_size
|
||||
type: gauge
|
||||
help: 'Size of all databases'
|
||||
key_labels:
|
||||
values: [total]
|
||||
query: |
|
||||
select sum(pg_database_size(datname)) as total from pg_database;
|
||||
|
||||
- metric_name: getpage_wait_seconds_count
|
||||
type: counter
|
||||
help: 'Number of getpage requests'
|
||||
values: [getpage_wait_seconds_count]
|
||||
query_ref: neon_perf_counters
|
||||
|
||||
- metric_name: getpage_wait_seconds_sum
|
||||
type: counter
|
||||
help: 'Time spent in getpage requests'
|
||||
values: [getpage_wait_seconds_sum]
|
||||
query_ref: neon_perf_counters
|
||||
|
||||
- metric_name: getpage_prefetch_requests_total
|
||||
type: counter
|
||||
help: 'Number of getpage issued for prefetching'
|
||||
values: [getpage_prefetch_requests_total]
|
||||
query_ref: neon_perf_counters
|
||||
|
||||
- metric_name: getpage_sync_requests_total
|
||||
type: counter
|
||||
help: 'Number of synchronous getpage issued'
|
||||
values: [getpage_sync_requests_total]
|
||||
query_ref: neon_perf_counters
|
||||
|
||||
- metric_name: getpage_prefetch_misses_total
|
||||
type: counter
|
||||
help: 'Total number of readahead misses; consisting of either prefetches that don''t satisfy the LSN bounds once the prefetch got read by the backend, or cases where somehow no readahead was issued for the read'
|
||||
values: [getpage_prefetch_misses_total]
|
||||
query_ref: neon_perf_counters
|
||||
|
||||
- metric_name: getpage_prefetch_discards_total
|
||||
type: counter
|
||||
help: 'Number of prefetch responses issued but not used'
|
||||
values: [getpage_prefetch_discards_total]
|
||||
query_ref: neon_perf_counters
|
||||
|
||||
- metric_name: pageserver_requests_sent_total
|
||||
type: counter
|
||||
help: 'Number of all requests sent to the pageserver (not just GetPage requests)'
|
||||
values: [pageserver_requests_sent_total]
|
||||
query_ref: neon_perf_counters
|
||||
|
||||
- metric_name: pageserver_disconnects_total
|
||||
type: counter
|
||||
help: 'Number of times that the connection to the pageserver was lost'
|
||||
values: [pageserver_disconnects_total]
|
||||
query_ref: neon_perf_counters
|
||||
|
||||
- metric_name: pageserver_send_flushes_total
|
||||
type: counter
|
||||
help: 'Number of flushes to the pageserver connection'
|
||||
values: [pageserver_send_flushes_total]
|
||||
query_ref: neon_perf_counters
|
||||
|
||||
- metric_name: getpage_wait_seconds_bucket
|
||||
type: counter
|
||||
help: 'Histogram buckets of getpage request latency'
|
||||
key_labels:
|
||||
- bucket_le
|
||||
values: [value]
|
||||
query_ref: getpage_wait_seconds_buckets
|
||||
|
||||
# DEPRECATED
|
||||
- metric_name: lfc_approximate_working_set_size
|
||||
type: gauge
|
||||
help: 'Approximate working set size in pages of 8192 bytes'
|
||||
key_labels:
|
||||
values: [approximate_working_set_size]
|
||||
query: |
|
||||
select neon.approximate_working_set_size(false) as approximate_working_set_size;
|
||||
|
||||
- metric_name: lfc_approximate_working_set_size_windows
|
||||
type: gauge
|
||||
help: 'Approximate working set size in pages of 8192 bytes'
|
||||
key_labels: [duration]
|
||||
values: [size]
|
||||
# NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection
|
||||
# of durations in a pretty-printed form.
|
||||
query: |
|
||||
select
|
||||
x as duration,
|
||||
neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size
|
||||
from
|
||||
(values ('5m'),('15m'),('1h')) as t (x);
|
||||
|
||||
- metric_name: compute_current_lsn
|
||||
type: gauge
|
||||
help: 'Current LSN of the database'
|
||||
key_labels:
|
||||
values: [lsn]
|
||||
query: |
|
||||
select
|
||||
case
|
||||
when pg_catalog.pg_is_in_recovery()
|
||||
then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
|
||||
else (pg_current_wal_lsn() - '0/0')::FLOAT8
|
||||
end as lsn;
|
||||
|
||||
- metric_name: compute_receive_lsn
|
||||
type: gauge
|
||||
help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication'
|
||||
key_labels:
|
||||
values: [lsn]
|
||||
query: |
|
||||
SELECT
|
||||
CASE
|
||||
WHEN pg_catalog.pg_is_in_recovery()
|
||||
THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
|
||||
ELSE 0
|
||||
END AS lsn;
|
||||
|
||||
- metric_name: replication_delay_bytes
|
||||
type: gauge
|
||||
help: 'Bytes between received and replayed LSN'
|
||||
key_labels:
|
||||
values: [replication_delay_bytes]
|
||||
# We use a GREATEST call here because this calculation can be negative.
|
||||
# The calculation is not atomic, meaning after we've gotten the receive
|
||||
# LSN, the replay LSN may have advanced past the receive LSN we
|
||||
# are using for the calculation.
|
||||
query: |
|
||||
SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
|
||||
|
||||
- metric_name: replication_delay_seconds
|
||||
type: gauge
|
||||
help: 'Time since last LSN was replayed'
|
||||
key_labels:
|
||||
values: [replication_delay_seconds]
|
||||
query: |
|
||||
SELECT
|
||||
CASE
|
||||
WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
|
||||
ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
|
||||
END AS replication_delay_seconds;
|
||||
|
||||
- metric_name: checkpoints_req
|
||||
type: gauge
|
||||
help: 'Number of requested checkpoints'
|
||||
key_labels:
|
||||
values: [checkpoints_req]
|
||||
query: |
|
||||
SELECT checkpoints_req FROM pg_stat_bgwriter;
|
||||
|
||||
- metric_name: checkpoints_timed
|
||||
type: gauge
|
||||
help: 'Number of scheduled checkpoints'
|
||||
key_labels:
|
||||
values: [checkpoints_timed]
|
||||
query: |
|
||||
SELECT checkpoints_timed FROM pg_stat_bgwriter;
|
||||
|
||||
- metric_name: compute_logical_snapshot_files
|
||||
type: gauge
|
||||
help: 'Number of snapshot files in pg_logical/snapshot'
|
||||
key_labels:
|
||||
- timeline_id
|
||||
values: [num_logical_snapshot_files]
|
||||
query: |
|
||||
SELECT
|
||||
(SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
|
||||
-- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These
|
||||
-- temporary snapshot files are renamed to the actual snapshot files after they are
|
||||
-- completely built. We only WAL-log the completely built snapshot files.
|
||||
(SELECT COUNT(*) FROM pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
|
||||
|
||||
# In all the below metrics, we cast LSNs to floats because Prometheus only supports floats.
|
||||
# It's probably fine because float64 can store integers from -2^53 to +2^53 exactly.
|
||||
|
||||
# Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad.
|
||||
- metric_name: logical_slot_restart_lsn
|
||||
type: gauge
|
||||
help: 'restart_lsn of logical slots'
|
||||
key_labels:
|
||||
- slot_name
|
||||
values: [restart_lsn]
|
||||
query: |
|
||||
select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
|
||||
from pg_replication_slots
|
||||
where slot_type = 'logical';
|
||||
|
||||
- metric_name: compute_subscriptions_count
|
||||
type: gauge
|
||||
help: 'Number of logical replication subscriptions grouped by enabled/disabled'
|
||||
key_labels:
|
||||
- enabled
|
||||
values: [subscriptions_count]
|
||||
query: |
|
||||
select subenabled::text as enabled, count(*) as subscriptions_count
|
||||
from pg_subscription
|
||||
group by subenabled;
|
||||
|
||||
- metric_name: retained_wal
|
||||
type: gauge
|
||||
help: 'Retained WAL in inactive replication slots'
|
||||
key_labels:
|
||||
- slot_name
|
||||
values: [retained_wal]
|
||||
query: |
|
||||
SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
|
||||
FROM pg_replication_slots
|
||||
WHERE active = false;
|
||||
|
||||
- metric_name: wal_is_lost
|
||||
type: gauge
|
||||
help: 'Whether or not the replication slot wal_status is lost'
|
||||
key_labels:
|
||||
- slot_name
|
||||
values: [wal_is_lost]
|
||||
query: |
|
||||
SELECT slot_name,
|
||||
CASE WHEN wal_status = 'lost' THEN 1 ELSE 0 END AS wal_is_lost
|
||||
FROM pg_replication_slots;
|
||||
|
||||
queries:
|
||||
- query_name: neon_perf_counters
|
||||
query: |
|
||||
WITH c AS (
|
||||
SELECT pg_catalog.jsonb_object_agg(metric, value) jb FROM neon.neon_perf_counters
|
||||
)
|
||||
SELECT d.*
|
||||
FROM pg_catalog.jsonb_to_record((select jb from c)) as d(
|
||||
getpage_wait_seconds_count numeric,
|
||||
getpage_wait_seconds_sum numeric,
|
||||
getpage_prefetch_requests_total numeric,
|
||||
getpage_sync_requests_total numeric,
|
||||
getpage_prefetch_misses_total numeric,
|
||||
getpage_prefetch_discards_total numeric,
|
||||
pageserver_requests_sent_total numeric,
|
||||
pageserver_disconnects_total numeric,
|
||||
pageserver_send_flushes_total numeric
|
||||
);
|
||||
|
||||
- query_name: getpage_wait_seconds_buckets
|
||||
query: |
|
||||
SELECT bucket_le, value FROM neon.neon_perf_counters WHERE metric = 'getpage_wait_seconds_bucket';
|
||||
@@ -1,55 +0,0 @@
|
||||
collector_name: neon_collector_autoscaling
|
||||
metrics:
|
||||
- metric_name: lfc_misses
|
||||
type: gauge
|
||||
help: 'lfc_misses'
|
||||
key_labels:
|
||||
values: [lfc_misses]
|
||||
query: |
|
||||
select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
|
||||
|
||||
- metric_name: lfc_used
|
||||
type: gauge
|
||||
help: 'LFC chunks used (chunk = 1MB)'
|
||||
key_labels:
|
||||
values: [lfc_used]
|
||||
query: |
|
||||
select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
|
||||
|
||||
- metric_name: lfc_hits
|
||||
type: gauge
|
||||
help: 'lfc_hits'
|
||||
key_labels:
|
||||
values: [lfc_hits]
|
||||
query: |
|
||||
select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
|
||||
|
||||
- metric_name: lfc_writes
|
||||
type: gauge
|
||||
help: 'lfc_writes'
|
||||
key_labels:
|
||||
values: [lfc_writes]
|
||||
query: |
|
||||
select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
|
||||
|
||||
- metric_name: lfc_cache_size_limit
|
||||
type: gauge
|
||||
help: 'LFC cache size limit in bytes'
|
||||
key_labels:
|
||||
values: [lfc_cache_size_limit]
|
||||
query: |
|
||||
select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
|
||||
|
||||
- metric_name: lfc_approximate_working_set_size_windows
|
||||
type: gauge
|
||||
help: 'Approximate working set size in pages of 8192 bytes'
|
||||
key_labels: [duration_seconds]
|
||||
values: [size]
|
||||
# NOTE: This is the "internal" / "machine-readable" version. This outputs the working set
|
||||
# size looking back 1..60 minutes, labeled with the number of minutes.
|
||||
query: |
|
||||
select
|
||||
x::text as duration_seconds,
|
||||
neon.approximate_working_set_size_seconds(x) as size
|
||||
from
|
||||
(select generate_series * 60 as x from generate_series(1, 60)) as t (x);
|
||||
@@ -1,17 +0,0 @@
|
||||
[databases]
|
||||
*=host=localhost port=5432 auth_user=cloud_admin
|
||||
[pgbouncer]
|
||||
listen_port=6432
|
||||
listen_addr=0.0.0.0
|
||||
auth_type=scram-sha-256
|
||||
auth_user=cloud_admin
|
||||
auth_dbname=postgres
|
||||
client_tls_sslmode=disable
|
||||
server_tls_sslmode=disable
|
||||
pool_mode=transaction
|
||||
max_client_conn=10000
|
||||
default_pool_size=64
|
||||
max_prepared_statements=0
|
||||
admin_users=postgres
|
||||
unix_socket_dir=/tmp/
|
||||
unix_socket_mode=0777
|
||||
@@ -1,33 +0,0 @@
|
||||
# Configuration for sql_exporter
|
||||
# Global defaults.
|
||||
global:
|
||||
# If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
|
||||
scrape_timeout: 10s
|
||||
# Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
|
||||
scrape_timeout_offset: 500ms
|
||||
# Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
|
||||
min_interval: 0s
|
||||
# Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
|
||||
# as will concurrent scrapes.
|
||||
max_connections: 1
|
||||
# Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
|
||||
# always be the same as max_connections.
|
||||
max_idle_connections: 1
|
||||
# Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
|
||||
# If 0, connections are not closed due to a connection's age.
|
||||
max_connection_lifetime: 5m
|
||||
|
||||
# The target to monitor and the collectors to execute on it.
|
||||
target:
|
||||
# Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
|
||||
# the schema gets dropped or replaced to match the driver expected DSN format.
|
||||
data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter'
|
||||
|
||||
# Collectors (referenced by name) to execute on the target.
|
||||
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
|
||||
collectors: [neon_collector]
|
||||
|
||||
# Collector files specifies a list of globs. One collector definition is read from each matching file.
|
||||
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
|
||||
collector_files:
|
||||
- "neon_collector.yml"
|
||||
@@ -1,33 +0,0 @@
|
||||
# Configuration for sql_exporter for autoscaling-agent
|
||||
# Global defaults.
|
||||
global:
|
||||
# If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
|
||||
scrape_timeout: 10s
|
||||
# Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
|
||||
scrape_timeout_offset: 500ms
|
||||
# Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
|
||||
min_interval: 0s
|
||||
# Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
|
||||
# as will concurrent scrapes.
|
||||
max_connections: 1
|
||||
# Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
|
||||
# always be the same as max_connections.
|
||||
max_idle_connections: 1
|
||||
# Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
|
||||
# If 0, connections are not closed due to a connection's age.
|
||||
max_connection_lifetime: 5m
|
||||
|
||||
# The target to monitor and the collectors to execute on it.
|
||||
target:
|
||||
# Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
|
||||
# the schema gets dropped or replaced to match the driver expected DSN format.
|
||||
data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling'
|
||||
|
||||
# Collectors (referenced by name) to execute on the target.
|
||||
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
|
||||
collectors: [neon_collector_autoscaling]
|
||||
|
||||
# Collector files specifies a list of globs. One collector definition is read from each matching file.
|
||||
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
|
||||
collector_files:
|
||||
- "neon_collector_autoscaling.yml"
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,223 +0,0 @@
|
||||
commit 7dd414ee75f2875cffb1d6ba474df1f135a6fc6f
|
||||
Author: Alexey Masterov <alexeymasterov@neon.tech>
|
||||
Date: Fri May 31 06:34:26 2024 +0000
|
||||
|
||||
These alternative expected files were added to consider the neon features
|
||||
|
||||
diff --git a/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
|
||||
new file mode 100644
|
||||
index 0000000..2539cfd
|
||||
--- /dev/null
|
||||
+++ b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
|
||||
@@ -0,0 +1,101 @@
|
||||
+BEGIN;
|
||||
+CREATE EXTENSION anon CASCADE;
|
||||
+NOTICE: installing required extension "pgcrypto"
|
||||
+SELECT anon.init();
|
||||
+ init
|
||||
+------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+CREATE ROLE mallory_the_masked_user;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
|
||||
+CREATE TABLE t1(i INT);
|
||||
+ALTER TABLE t1 ADD COLUMN t TEXT;
|
||||
+SECURITY LABEL FOR anon ON COLUMN t1.t
|
||||
+IS 'MASKED WITH VALUE NULL';
|
||||
+INSERT INTO t1 VALUES (1,'test');
|
||||
+--
|
||||
+-- We're checking the owner's permissions
|
||||
+--
|
||||
+-- see
|
||||
+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
|
||||
+--
|
||||
+SET ROLE mallory_the_masked_user;
|
||||
+SELECT anon.pseudo_first_name(0) IS NOT NULL;
|
||||
+ ?column?
|
||||
+----------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ PERFORM anon.init();
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ PERFORM anon.anonymize_table('t1');
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_start_engine;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ERROR: Only supersusers can start the dynamic masking engine.
|
||||
+CONTEXT: PL/pgSQL function anon.start_dynamic_masking(boolean) line 18 at RAISE
|
||||
+ROLLBACK TO fail_start_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ start_dynamic_masking
|
||||
+-----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE mallory_the_masked_user;
|
||||
+SELECT * FROM mask.t1;
|
||||
+ i | t
|
||||
+---+---
|
||||
+ 1 |
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ SELECT * FROM public.t1;
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_stop_engine;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+ERROR: Only supersusers can stop the dynamic masking engine.
|
||||
+CONTEXT: PL/pgSQL function anon.stop_dynamic_masking() line 18 at RAISE
|
||||
+ROLLBACK TO fail_stop_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+NOTICE: The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
|
||||
+ stop_dynamic_masking
|
||||
+----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE mallory_the_masked_user;
|
||||
+SELECT COUNT(*)=1 FROM anon.pg_masking_rules;
|
||||
+ ?column?
|
||||
+----------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_seclabel_on_role;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
|
||||
+ERROR: permission denied
|
||||
+DETAIL: The current user must have the CREATEROLE attribute.
|
||||
+ROLLBACK TO fail_seclabel_on_role;
|
||||
+ROLLBACK;
|
||||
diff --git a/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
|
||||
new file mode 100644
|
||||
index 0000000..8b090fe
|
||||
--- /dev/null
|
||||
+++ b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
|
||||
@@ -0,0 +1,104 @@
|
||||
+BEGIN;
|
||||
+CREATE EXTENSION anon CASCADE;
|
||||
+NOTICE: installing required extension "pgcrypto"
|
||||
+SELECT anon.init();
|
||||
+ init
|
||||
+------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+CREATE ROLE oscar_the_owner;
|
||||
+ALTER DATABASE :DBNAME OWNER TO oscar_the_owner;
|
||||
+CREATE ROLE mallory_the_masked_user;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
|
||||
+--
|
||||
+-- We're checking the owner's permissions
|
||||
+--
|
||||
+-- see
|
||||
+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
|
||||
+--
|
||||
+SET ROLE oscar_the_owner;
|
||||
+SELECT anon.pseudo_first_name(0) IS NOT NULL;
|
||||
+ ?column?
|
||||
+----------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ PERFORM anon.init();
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+CREATE TABLE t1(i INT);
|
||||
+ALTER TABLE t1 ADD COLUMN t TEXT;
|
||||
+SECURITY LABEL FOR anon ON COLUMN t1.t
|
||||
+IS 'MASKED WITH VALUE NULL';
|
||||
+INSERT INTO t1 VALUES (1,'test');
|
||||
+SELECT anon.anonymize_table('t1');
|
||||
+ anonymize_table
|
||||
+-----------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SELECT * FROM t1;
|
||||
+ i | t
|
||||
+---+---
|
||||
+ 1 |
|
||||
+(1 row)
|
||||
+
|
||||
+UPDATE t1 SET t='test' WHERE i=1;
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_start_engine;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ start_dynamic_masking
|
||||
+-----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+ROLLBACK TO fail_start_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ start_dynamic_masking
|
||||
+-----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE oscar_the_owner;
|
||||
+SELECT * FROM t1;
|
||||
+ i | t
|
||||
+---+------
|
||||
+ 1 | test
|
||||
+(1 row)
|
||||
+
|
||||
+--SELECT * FROM mask.t1;
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_stop_engine;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+ERROR: permission denied for schema mask
|
||||
+CONTEXT: SQL statement "DROP VIEW mask.t1;"
|
||||
+PL/pgSQL function anon.mask_drop_view(oid) line 3 at EXECUTE
|
||||
+SQL statement "SELECT anon.mask_drop_view(oid)
|
||||
+ FROM pg_catalog.pg_class
|
||||
+ WHERE relnamespace=quote_ident(pg_catalog.current_setting('anon.sourceschema'))::REGNAMESPACE
|
||||
+ AND relkind IN ('r','p','f')"
|
||||
+PL/pgSQL function anon.stop_dynamic_masking() line 22 at PERFORM
|
||||
+ROLLBACK TO fail_stop_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+NOTICE: The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
|
||||
+ stop_dynamic_masking
|
||||
+----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE oscar_the_owner;
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_seclabel_on_role;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
|
||||
+ERROR: permission denied
|
||||
+DETAIL: The current user must have the CREATEROLE attribute.
|
||||
+ROLLBACK TO fail_seclabel_on_role;
|
||||
+ROLLBACK;
|
||||
@@ -1,19 +0,0 @@
|
||||
commit b3ea51ee158f113f2f82d0b97c12c54343c9a695 (HEAD -> master)
|
||||
Author: Alexey Masterov <alexeymasterov@neon.tech>
|
||||
Date: Fri Jun 7 19:23:42 2024 +0000
|
||||
|
||||
Disable REGRESS_OPTIONS causing initdb
|
||||
|
||||
diff --git a/ext-src/pg_cron-src/Makefile b/ext-src/pg_cron-src/Makefile
|
||||
index 053314c..fbd5fb5 100644
|
||||
--- a/ext-src/pg_cron-src/Makefile
|
||||
+++ b/ext-src/pg_cron-src/Makefile
|
||||
@@ -5,7 +5,7 @@ EXTENSION = pg_cron
|
||||
DATA_built = $(EXTENSION)--1.0.sql
|
||||
DATA = $(wildcard $(EXTENSION)--*--*.sql)
|
||||
|
||||
-REGRESS_OPTS =--temp-config=./pg_cron.conf --temp-instance=./tmp_check
|
||||
+#REGRESS_OPTS =--temp-config=./pg_cron.conf --temp-instance=./tmp_check
|
||||
REGRESS = pg_cron-test
|
||||
|
||||
# compilation configuration
|
||||
@@ -1,53 +0,0 @@
|
||||
diff --git a/expected/ut-A.out b/expected/ut-A.out
|
||||
index da723b8..5328114 100644
|
||||
--- a/expected/ut-A.out
|
||||
+++ b/expected/ut-A.out
|
||||
@@ -9,13 +9,16 @@ SET search_path TO public;
|
||||
----
|
||||
-- No.A-1-1-3
|
||||
CREATE EXTENSION pg_hint_plan;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
|
||||
-- No.A-1-2-3
|
||||
DROP EXTENSION pg_hint_plan;
|
||||
-- No.A-1-1-4
|
||||
CREATE SCHEMA other_schema;
|
||||
CREATE EXTENSION pg_hint_plan SCHEMA other_schema;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
|
||||
ERROR: extension "pg_hint_plan" must be installed in schema "hint_plan"
|
||||
CREATE EXTENSION pg_hint_plan;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
|
||||
DROP SCHEMA other_schema;
|
||||
----
|
||||
---- No. A-5-1 comment pattern
|
||||
@@ -3175,6 +3178,7 @@ SELECT s.query, s.calls
|
||||
FROM public.pg_stat_statements s
|
||||
JOIN pg_catalog.pg_database d
|
||||
ON (s.dbid = d.oid)
|
||||
+ WHERE s.query LIKE 'SELECT * FROM s1.t1%' OR s.query LIKE '%pg_stat_statements_reset%'
|
||||
ORDER BY 1;
|
||||
query | calls
|
||||
--------------------------------------+-------
|
||||
diff --git a/expected/ut-fdw.out b/expected/ut-fdw.out
|
||||
index d372459..6282afe 100644
|
||||
--- a/expected/ut-fdw.out
|
||||
+++ b/expected/ut-fdw.out
|
||||
@@ -7,6 +7,7 @@ SET pg_hint_plan.debug_print TO on;
|
||||
SET client_min_messages TO LOG;
|
||||
SET pg_hint_plan.enable_hint TO on;
|
||||
CREATE EXTENSION file_fdw;
|
||||
+LOG: Sending request to compute_ctl: http://localhost:3080/extension_server/file_fdw
|
||||
CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw;
|
||||
CREATE USER MAPPING FOR PUBLIC SERVER file_server;
|
||||
CREATE FOREIGN TABLE ft1 (id int, val int) SERVER file_server OPTIONS (format 'csv', filename :'filename');
|
||||
diff --git a/sql/ut-A.sql b/sql/ut-A.sql
|
||||
index 7c7d58a..4fd1a07 100644
|
||||
--- a/sql/ut-A.sql
|
||||
+++ b/sql/ut-A.sql
|
||||
@@ -963,6 +963,7 @@ SELECT s.query, s.calls
|
||||
FROM public.pg_stat_statements s
|
||||
JOIN pg_catalog.pg_database d
|
||||
ON (s.dbid = d.oid)
|
||||
+ WHERE s.query LIKE 'SELECT * FROM s1.t1%' OR s.query LIKE '%pg_stat_statements_reset%'
|
||||
ORDER BY 1;
|
||||
|
||||
----
|
||||
@@ -1,62 +0,0 @@
|
||||
diff --git a/src/hnswbuild.c b/src/hnswbuild.c
|
||||
index dcfb2bd..d5189ee 100644
|
||||
--- a/src/hnswbuild.c
|
||||
+++ b/src/hnswbuild.c
|
||||
@@ -860,9 +860,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
|
||||
|
||||
hnswarea = shm_toc_lookup(toc, PARALLEL_KEY_HNSW_AREA, false);
|
||||
|
||||
+#ifdef NEON_SMGR
|
||||
+ smgr_start_unlogged_build(RelationGetSmgr(indexRel));
|
||||
+#endif
|
||||
+
|
||||
/* Perform inserts */
|
||||
HnswParallelScanAndInsert(heapRel, indexRel, hnswshared, hnswarea, false);
|
||||
|
||||
+#ifdef NEON_SMGR
|
||||
+ smgr_finish_unlogged_build_phase_1(RelationGetSmgr(indexRel));
|
||||
+#endif
|
||||
+
|
||||
/* Close relations within worker */
|
||||
index_close(indexRel, indexLockmode);
|
||||
table_close(heapRel, heapLockmode);
|
||||
@@ -1117,12 +1125,38 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
|
||||
SeedRandom(42);
|
||||
#endif
|
||||
|
||||
+#ifdef NEON_SMGR
|
||||
+ smgr_start_unlogged_build(RelationGetSmgr(index));
|
||||
+#endif
|
||||
+
|
||||
InitBuildState(buildstate, heap, index, indexInfo, forkNum);
|
||||
|
||||
BuildGraph(buildstate, forkNum);
|
||||
|
||||
- if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM)
|
||||
+#ifdef NEON_SMGR
|
||||
+ smgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));
|
||||
+#endif
|
||||
+
|
||||
+ if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM) {
|
||||
log_newpage_range(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true);
|
||||
+#ifdef NEON_SMGR
|
||||
+ {
|
||||
+#if PG_VERSION_NUM >= 160000
|
||||
+ RelFileLocator rlocator = RelationGetSmgr(index)->smgr_rlocator.locator;
|
||||
+#else
|
||||
+ RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
|
||||
+#endif
|
||||
+
|
||||
+ SetLastWrittenLSNForBlockRange(XactLastRecEnd, rlocator,
|
||||
+ MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
|
||||
+ SetLastWrittenLSNForRelation(XactLastRecEnd, rlocator, MAIN_FORKNUM);
|
||||
+ }
|
||||
+#endif
|
||||
+ }
|
||||
+
|
||||
+#ifdef NEON_SMGR
|
||||
+ smgr_end_unlogged_build(RelationGetSmgr(index));
|
||||
+#endif
|
||||
|
||||
FreeBuildState(buildstate);
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
commit 68f3b3b0d594f08aacc4a082ee210749ed5677eb
|
||||
Author: Anastasia Lubennikova <anastasia@neon.tech>
|
||||
Date: Mon Jul 15 12:31:56 2024 +0100
|
||||
|
||||
Neon: fix unlogged index build patch
|
||||
|
||||
diff --git a/src/ruminsert.c b/src/ruminsert.c
|
||||
index e8b209d..e89bf2a 100644
|
||||
--- a/src/ruminsert.c
|
||||
+++ b/src/ruminsert.c
|
||||
@@ -628,6 +628,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
|
||||
elog(ERROR, "index \"%s\" already contains data",
|
||||
RelationGetRelationName(index));
|
||||
|
||||
+#ifdef NEON_SMGR
|
||||
+ smgr_start_unlogged_build(index->rd_smgr);
|
||||
+#endif
|
||||
+
|
||||
initRumState(&buildstate.rumstate, index);
|
||||
buildstate.rumstate.isBuild = true;
|
||||
buildstate.indtuples = 0;
|
||||
@@ -693,6 +697,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
|
||||
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
|
||||
rumUpdateStats(index, &buildstate.buildStats, buildstate.rumstate.isBuild);
|
||||
|
||||
+#ifdef NEON_SMGR
|
||||
+ smgr_finish_unlogged_build_phase_1(index->rd_smgr);
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Write index to xlog
|
||||
*/
|
||||
@@ -713,6 +721,21 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
+#ifdef NEON_SMGR
|
||||
+ {
|
||||
+#if PG_VERSION_NUM >= 160000
|
||||
+ RelFileLocator rlocator = RelationGetSmgr(index)->smgr_rlocator.locator;
|
||||
+#else
|
||||
+ RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
|
||||
+#endif
|
||||
+
|
||||
+ SetLastWrittenLSNForBlockRange(XactLastRecEnd, rlocator, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
|
||||
+ SetLastWrittenLSNForRelation(XactLastRecEnd, rlocator, MAIN_FORKNUM);
|
||||
+
|
||||
+ smgr_end_unlogged_build(index->rd_smgr);
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* Return statistics
|
||||
*/
|
||||
@@ -1,117 +0,0 @@
|
||||
# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image.
|
||||
---
|
||||
commands:
|
||||
- name: cgconfigparser
|
||||
user: root
|
||||
sysvInitAction: sysinit
|
||||
shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
|
||||
# restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for
|
||||
# running it as root.
|
||||
- name: chmod-resize-swap
|
||||
user: root
|
||||
sysvInitAction: sysinit
|
||||
shell: 'chmod 711 /neonvm/bin/resize-swap'
|
||||
- name: chmod-set-disk-quota
|
||||
user: root
|
||||
sysvInitAction: sysinit
|
||||
shell: 'chmod 711 /neonvm/bin/set-disk-quota'
|
||||
- name: pgbouncer
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
|
||||
- name: sql-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399'
|
||||
- name: sql-exporter-autoscaling
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
|
||||
shutdownHook: |
|
||||
su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
|
||||
files:
|
||||
- filename: compute_ctl-sudoers
|
||||
content: |
|
||||
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
|
||||
# and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
|
||||
# regardless of hostname (ALL)
|
||||
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota
|
||||
- filename: cgconfig.conf
|
||||
content: |
|
||||
# Configuration for cgroups in VM compute nodes
|
||||
group neon-postgres {
|
||||
perm {
|
||||
admin {
|
||||
uid = postgres;
|
||||
}
|
||||
task {
|
||||
gid = users;
|
||||
}
|
||||
}
|
||||
memory {}
|
||||
}
|
||||
build: |
|
||||
# Build cgroup-tools
|
||||
#
|
||||
# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
|
||||
# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
|
||||
# requires cgroup v2, so we'll build cgroup-tools ourselves.
|
||||
FROM debian:bullseye-slim as libcgroup-builder
|
||||
ENV LIBCGROUP_VERSION=v2.0.3
|
||||
|
||||
RUN set -exu \
|
||||
&& apt update \
|
||||
&& apt install --no-install-recommends -y \
|
||||
git \
|
||||
ca-certificates \
|
||||
automake \
|
||||
cmake \
|
||||
make \
|
||||
gcc \
|
||||
byacc \
|
||||
flex \
|
||||
libtool \
|
||||
libpam0g-dev \
|
||||
&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
|
||||
&& INSTALL_DIR="/libcgroup-install" \
|
||||
&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
|
||||
&& cd libcgroup \
|
||||
# extracted from bootstrap.sh, with modified flags:
|
||||
&& (test -d m4 || mkdir m4) \
|
||||
&& autoreconf -fi \
|
||||
&& rm -rf autom4te.cache \
|
||||
&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
|
||||
# actually build the thing...
|
||||
&& make install
|
||||
merge: |
|
||||
# tweak nofile limits
|
||||
RUN set -e \
|
||||
&& echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \
|
||||
&& test ! -e /etc/security || ( \
|
||||
echo '* - nofile 1048576' >>/etc/security/limits.conf \
|
||||
&& echo 'root - nofile 1048576' >>/etc/security/limits.conf \
|
||||
)
|
||||
|
||||
# Allow postgres user (compute_ctl) to run swap resizer.
|
||||
# Need to install sudo in order to allow this.
|
||||
#
|
||||
# Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.
|
||||
RUN set -e \
|
||||
&& apt update \
|
||||
&& apt install --no-install-recommends -y \
|
||||
sudo \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
COPY compute_ctl-sudoers /etc/sudoers.d/compute_ctl-sudoers
|
||||
|
||||
COPY cgconfig.conf /etc/cgconfig.conf
|
||||
|
||||
RUN set -e \
|
||||
&& chmod 0644 /etc/cgconfig.conf
|
||||
|
||||
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
|
||||
COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
|
||||
COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
|
||||
@@ -4,27 +4,22 @@ version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
default = []
|
||||
# Enables test specific features.
|
||||
testing = []
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
# camino.workspace = true
|
||||
async-compression.workspace = true
|
||||
chrono.workspace = true
|
||||
cfg-if.workspace = true
|
||||
clap.workspace = true
|
||||
flate2.workspace = true
|
||||
futures.workspace = true
|
||||
hyper0 = { workspace = true, features = ["full"] }
|
||||
hyper = { workspace = true, features = ["full"] }
|
||||
nix.workspace = true
|
||||
notify.workspace = true
|
||||
num_cpus.workspace = true
|
||||
opentelemetry.workspace = true
|
||||
opentelemetry_sdk.workspace = true
|
||||
postgres.workspace = true
|
||||
regex.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
signal-hook.workspace = true
|
||||
tar.workspace = true
|
||||
@@ -32,20 +27,18 @@ reqwest = { workspace = true, features = ["json"] }
|
||||
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
tokio-postgres.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-opentelemetry.workspace = true
|
||||
tracing-subscriber.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
thiserror.workspace = true
|
||||
url.workspace = true
|
||||
|
||||
compute_api.workspace = true
|
||||
utils.workspace = true
|
||||
workspace_hack.workspace = true
|
||||
toml_edit.workspace = true
|
||||
remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
|
||||
vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
|
||||
zstd = "0.13"
|
||||
bytes = "1.0"
|
||||
rust-ini = "0.20.0"
|
||||
rlimit = "0.10.1"
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
//! - Every start is a fresh start, so the data directory is removed and
|
||||
//! initialized again on each run.
|
||||
//! - If remote_extension_config is provided, it will be used to fetch extensions list
|
||||
//! and download `shared_preload_libraries` from the remote storage.
|
||||
//! and download `shared_preload_libraries` from the remote storage.
|
||||
//! - Next it will put configuration files into the `PGDATA` directory.
|
||||
//! - Sync safekeepers and get commit LSN.
|
||||
//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
|
||||
@@ -33,6 +33,7 @@
|
||||
//! -b /usr/local/bin/postgres \
|
||||
//! -r http://pg-ext-s3-gateway \
|
||||
//! ```
|
||||
//!
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
@@ -44,15 +45,12 @@ use std::{thread, time::Duration};
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Arg;
|
||||
use compute_tools::disk_quota::set_disk_quota;
|
||||
use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use signal_hook::consts::{SIGQUIT, SIGTERM};
|
||||
use signal_hook::{consts::SIGINT, iterator::Signals};
|
||||
use tracing::{error, info, warn};
|
||||
use tracing::{error, info};
|
||||
use url::Url;
|
||||
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::ComputeSpec;
|
||||
|
||||
use compute_tools::compute::{
|
||||
forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
|
||||
@@ -64,45 +62,12 @@ use compute_tools::logger::*;
|
||||
use compute_tools::monitor::launch_monitor;
|
||||
use compute_tools::params::*;
|
||||
use compute_tools::spec::*;
|
||||
use compute_tools::swap::resize_swap;
|
||||
use rlimit::{setrlimit, Resource};
|
||||
|
||||
// this is an arbitrary build tag. Fine as a default / for testing purposes
|
||||
// in-case of not-set environment var
|
||||
const BUILD_TAG_DEFAULT: &str = "latest";
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let (build_tag, clap_args) = init()?;
|
||||
|
||||
// enable core dumping for all child processes
|
||||
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
|
||||
|
||||
let (pg_handle, start_pg_result) = {
|
||||
// Enter startup tracing context
|
||||
let _startup_context_guard = startup_context_from_env();
|
||||
|
||||
let cli_args = process_cli(&clap_args)?;
|
||||
|
||||
let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
|
||||
|
||||
let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
|
||||
|
||||
start_postgres(&clap_args, wait_spec_result)?
|
||||
|
||||
// Startup is finished, exit the startup tracing span
|
||||
};
|
||||
|
||||
// PostgreSQL is now running, if startup was successful. Wait until it exits.
|
||||
let wait_pg_result = wait_postgres(pg_handle)?;
|
||||
|
||||
let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
|
||||
|
||||
maybe_delay_exit(delay_exit);
|
||||
|
||||
deinit_and_exit(wait_pg_result);
|
||||
}
|
||||
|
||||
fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
@@ -117,15 +82,9 @@ fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
.to_string();
|
||||
info!("build_tag: {build_tag}");
|
||||
|
||||
Ok((build_tag, cli().get_matches()))
|
||||
}
|
||||
|
||||
fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
|
||||
let pgbin_default = "postgres";
|
||||
let pgbin = matches
|
||||
.get_one::<String>("pgbin")
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or(pgbin_default);
|
||||
let matches = cli().get_matches();
|
||||
let pgbin_default = String::from("postgres");
|
||||
let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);
|
||||
|
||||
let ext_remote_storage = matches
|
||||
.get_one::<String>("remote-ext-config")
|
||||
@@ -151,35 +110,7 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
|
||||
.expect("Postgres connection string is required");
|
||||
let spec_json = matches.get_one::<String>("spec");
|
||||
let spec_path = matches.get_one::<String>("spec-path");
|
||||
let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
|
||||
let set_disk_quota_for_fs = matches.get_one::<String>("set-disk-quota-for-fs");
|
||||
|
||||
Ok(ProcessCliResult {
|
||||
connstr,
|
||||
pgdata,
|
||||
pgbin,
|
||||
ext_remote_storage,
|
||||
http_port,
|
||||
spec_json,
|
||||
spec_path,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
})
|
||||
}
|
||||
|
||||
struct ProcessCliResult<'clap> {
|
||||
connstr: &'clap str,
|
||||
pgdata: &'clap str,
|
||||
pgbin: &'clap str,
|
||||
ext_remote_storage: Option<&'clap str>,
|
||||
http_port: u16,
|
||||
spec_json: Option<&'clap String>,
|
||||
spec_path: Option<&'clap String>,
|
||||
resize_swap_on_bind: bool,
|
||||
set_disk_quota_for_fs: Option<&'clap String>,
|
||||
}
|
||||
|
||||
fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
// Extract OpenTelemetry context for the startup actions from the
|
||||
// TRACEPARENT and TRACESTATE env variables, and attach it to the current
|
||||
// tracing context.
|
||||
@@ -216,9 +147,9 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
if let Ok(val) = std::env::var("TRACESTATE") {
|
||||
startup_tracing_carrier.insert("tracestate".to_string(), val);
|
||||
}
|
||||
if !startup_tracing_carrier.is_empty() {
|
||||
let startup_context_guard = if !startup_tracing_carrier.is_empty() {
|
||||
use opentelemetry::propagation::TextMapPropagator;
|
||||
use opentelemetry_sdk::propagation::TraceContextPropagator;
|
||||
use opentelemetry::sdk::propagation::TraceContextPropagator;
|
||||
let guard = TraceContextPropagator::new()
|
||||
.extract(&startup_tracing_carrier)
|
||||
.attach();
|
||||
@@ -226,17 +157,8 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
Some(guard)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
fn try_spec_from_cli(
|
||||
matches: &clap::ArgMatches,
|
||||
ProcessCliResult {
|
||||
spec_json,
|
||||
spec_path,
|
||||
..
|
||||
}: &ProcessCliResult,
|
||||
) -> Result<CliSpecParams> {
|
||||
let compute_id = matches.get_one::<String>("compute-id");
|
||||
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
|
||||
|
||||
@@ -277,35 +199,6 @@ fn try_spec_from_cli(
|
||||
}
|
||||
};
|
||||
|
||||
Ok(CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed,
|
||||
})
|
||||
}
|
||||
|
||||
struct CliSpecParams {
|
||||
/// If a spec was provided via CLI or file, the [`ComputeSpec`]
|
||||
spec: Option<ComputeSpec>,
|
||||
live_config_allowed: bool,
|
||||
}
|
||||
|
||||
fn wait_spec(
|
||||
build_tag: String,
|
||||
ProcessCliResult {
|
||||
connstr,
|
||||
pgdata,
|
||||
pgbin,
|
||||
ext_remote_storage,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
http_port,
|
||||
..
|
||||
}: ProcessCliResult,
|
||||
CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed,
|
||||
}: CliSpecParams,
|
||||
) -> Result<WaitSpecResult> {
|
||||
let mut new_state = ComputeState::new();
|
||||
let spec_set;
|
||||
|
||||
@@ -333,17 +226,19 @@ fn wait_spec(
|
||||
|
||||
// If this is a pooled VM, prewarm before starting HTTP server and becoming
|
||||
// available for binding. Prewarming helps Postgres start quicker later,
|
||||
// because QEMU will already have its memory allocated from the host, and
|
||||
// because QEMU will already have it's memory allocated from the host, and
|
||||
// the necessary binaries will already be cached.
|
||||
if !spec_set {
|
||||
compute.prewarm_postgres()?;
|
||||
}
|
||||
|
||||
// Launch http service first, so that we can serve control-plane requests
|
||||
// while configuration is still in progress.
|
||||
// Launch http service first, so we were able to serve control-plane
|
||||
// requests, while configuration is still in progress.
|
||||
let _http_handle =
|
||||
launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
|
||||
|
||||
let extension_server_port: u16 = http_port;
|
||||
|
||||
if !spec_set {
|
||||
// No spec provided, hang waiting for it.
|
||||
info!("no compute spec provided, waiting");
|
||||
@@ -358,50 +253,21 @@ fn wait_spec(
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Record for how long we slept waiting for the spec.
|
||||
let now = Utc::now();
|
||||
state.metrics.wait_for_spec_ms = now
|
||||
.signed_duration_since(state.start_time)
|
||||
.to_std()
|
||||
.unwrap()
|
||||
.as_millis() as u64;
|
||||
|
||||
// Reset start time, so that the total startup time that is calculated later will
|
||||
// not include the time that we waited for the spec.
|
||||
state.start_time = now;
|
||||
}
|
||||
|
||||
launch_lsn_lease_bg_task_for_static(&compute);
|
||||
|
||||
Ok(WaitSpecResult {
|
||||
compute,
|
||||
http_port,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(),
|
||||
})
|
||||
}
|
||||
|
||||
struct WaitSpecResult {
|
||||
compute: Arc<ComputeNode>,
|
||||
// passed through from ProcessCliResult
|
||||
http_port: u16,
|
||||
resize_swap_on_bind: bool,
|
||||
set_disk_quota_for_fs: Option<String>,
|
||||
}
|
||||
|
||||
fn start_postgres(
|
||||
// need to allow unused because `matches` is only used if target_os = "linux"
|
||||
#[allow(unused_variables)] matches: &clap::ArgMatches,
|
||||
WaitSpecResult {
|
||||
compute,
|
||||
http_port,
|
||||
resize_swap_on_bind,
|
||||
set_disk_quota_for_fs,
|
||||
}: WaitSpecResult,
|
||||
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
|
||||
// We got all we need, update the state.
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
|
||||
// Record for how long we slept waiting for the spec.
|
||||
state.metrics.wait_for_spec_ms = Utc::now()
|
||||
.signed_duration_since(state.start_time)
|
||||
.to_std()
|
||||
.unwrap()
|
||||
.as_millis() as u64;
|
||||
// Reset start time to the actual start of the configuration, so that
|
||||
// total startup time was properly measured at the end.
|
||||
state.start_time = Utc::now();
|
||||
|
||||
state.status = ComputeStatus::Init;
|
||||
compute.state_changed.notify_all();
|
||||
|
||||
@@ -409,83 +275,33 @@ fn start_postgres(
|
||||
"running compute with features: {:?}",
|
||||
state.pspec.as_ref().unwrap().spec.features
|
||||
);
|
||||
// before we release the mutex, fetch the swap size (if any) for later.
|
||||
let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
|
||||
let disk_quota_bytes = state.pspec.as_ref().unwrap().spec.disk_quota_bytes;
|
||||
drop(state);
|
||||
|
||||
// Launch remaining service threads
|
||||
let _monitor_handle = launch_monitor(&compute);
|
||||
let _configurator_handle = launch_configurator(&compute);
|
||||
|
||||
let mut prestartup_failed = false;
|
||||
let mut delay_exit = false;
|
||||
|
||||
// Resize swap to the desired size if the compute spec says so
|
||||
if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
|
||||
// To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
|
||||
// *before* starting postgres.
|
||||
//
|
||||
// In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
|
||||
// carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
|
||||
// OOM-killed during startup because swap wasn't available yet.
|
||||
match resize_swap(size_bytes) {
|
||||
Ok(()) => {
|
||||
let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
|
||||
info!(%size_bytes, %size_mib, "resized swap");
|
||||
}
|
||||
Err(err) => {
|
||||
let err = err.context("failed to resize swap");
|
||||
error!("{err:#}");
|
||||
|
||||
// Mark compute startup as failed; don't try to start postgres, and report this
|
||||
// error to the control plane when it next asks.
|
||||
prestartup_failed = true;
|
||||
compute.set_failed_status(err);
|
||||
delay_exit = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set disk quota if the compute spec says so
|
||||
if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
|
||||
(disk_quota_bytes, set_disk_quota_for_fs)
|
||||
{
|
||||
match set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint) {
|
||||
Ok(()) => {
|
||||
let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
|
||||
info!(%disk_quota_bytes, %size_mib, "set disk quota");
|
||||
}
|
||||
Err(err) => {
|
||||
let err = err.context("failed to set disk quota");
|
||||
error!("{err:#}");
|
||||
|
||||
// Mark compute startup as failed; don't try to start postgres, and report this
|
||||
// error to the control plane when it next asks.
|
||||
prestartup_failed = true;
|
||||
compute.set_failed_status(err);
|
||||
delay_exit = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let extension_server_port: u16 = http_port;
|
||||
|
||||
// Start Postgres
|
||||
let mut pg = None;
|
||||
if !prestartup_failed {
|
||||
pg = match compute.start_compute(extension_server_port) {
|
||||
Ok(pg) => Some(pg),
|
||||
Err(err) => {
|
||||
error!("could not start the compute node: {:#}", err);
|
||||
compute.set_failed_status(err);
|
||||
delay_exit = true;
|
||||
None
|
||||
}
|
||||
};
|
||||
} else {
|
||||
warn!("skipping postgres startup because pre-startup step failed");
|
||||
}
|
||||
let mut delay_exit = false;
|
||||
let mut exit_code = None;
|
||||
let pg = match compute.start_compute(extension_server_port) {
|
||||
Ok(pg) => Some(pg),
|
||||
Err(err) => {
|
||||
error!("could not start the compute node: {:#}", err);
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
state.error = Some(format!("{:?}", err));
|
||||
state.status = ComputeStatus::Failed;
|
||||
// Notify others that Postgres failed to start. In case of configuring the
|
||||
// empty compute, it's likely that API handler is still waiting for compute
|
||||
// state change. With this we will notify it that compute is in Failed state,
|
||||
// so control plane will know about it earlier and record proper error instead
|
||||
// of timeout.
|
||||
compute.state_changed.notify_all();
|
||||
drop(state); // unlock
|
||||
delay_exit = true;
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
// Start the vm-monitor if directed to. The vm-monitor only runs on linux
|
||||
// because it requires cgroups.
|
||||
@@ -518,7 +334,7 @@ fn start_postgres(
|
||||
// This token is used internally by the monitor to clean up all threads
|
||||
let token = CancellationToken::new();
|
||||
|
||||
let vm_monitor = rt.as_ref().map(|rt| {
|
||||
let vm_monitor = &rt.as_ref().map(|rt| {
|
||||
rt.spawn(vm_monitor::start(
|
||||
Box::leak(Box::new(vm_monitor::Args {
|
||||
cgroup: cgroup.cloned(),
|
||||
@@ -531,41 +347,12 @@ fn start_postgres(
|
||||
}
|
||||
}
|
||||
|
||||
Ok((
|
||||
pg,
|
||||
StartPostgresResult {
|
||||
delay_exit,
|
||||
compute,
|
||||
#[cfg(target_os = "linux")]
|
||||
rt,
|
||||
#[cfg(target_os = "linux")]
|
||||
token,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
type PostgresHandle = (std::process::Child, std::thread::JoinHandle<()>);
|
||||
|
||||
struct StartPostgresResult {
|
||||
delay_exit: bool,
|
||||
// passed through from WaitSpecResult
|
||||
compute: Arc<ComputeNode>,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
rt: Option<tokio::runtime::Runtime>,
|
||||
#[cfg(target_os = "linux")]
|
||||
token: tokio_util::sync::CancellationToken,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
|
||||
}
|
||||
|
||||
fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
|
||||
// Wait for the child Postgres process forever. In this state Ctrl+C will
|
||||
// propagate to Postgres and it will be shut down as well.
|
||||
let mut exit_code = None;
|
||||
if let Some((mut pg, logs_handle)) = pg {
|
||||
// Startup is finished, exit the startup tracing span
|
||||
drop(startup_context_guard);
|
||||
|
||||
let ecode = pg
|
||||
.wait()
|
||||
.expect("failed to start waiting on Postgres process");
|
||||
@@ -580,25 +367,6 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
|
||||
exit_code = ecode.code()
|
||||
}
|
||||
|
||||
Ok(WaitPostgresResult { exit_code })
|
||||
}
|
||||
|
||||
struct WaitPostgresResult {
|
||||
exit_code: Option<i32>,
|
||||
}
|
||||
|
||||
fn cleanup_after_postgres_exit(
|
||||
StartPostgresResult {
|
||||
mut delay_exit,
|
||||
compute,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor,
|
||||
#[cfg(target_os = "linux")]
|
||||
token,
|
||||
#[cfg(target_os = "linux")]
|
||||
rt,
|
||||
}: StartPostgresResult,
|
||||
) -> Result<bool> {
|
||||
// Terminate the vm_monitor so it releases the file watcher on
|
||||
// /sys/fs/cgroup/neon-postgres.
|
||||
// Note: the vm-monitor only runs on linux because it requires cgroups.
|
||||
@@ -640,19 +408,13 @@ fn cleanup_after_postgres_exit(
|
||||
error!("error while checking for core dumps: {err:?}");
|
||||
}
|
||||
|
||||
Ok(delay_exit)
|
||||
}
|
||||
|
||||
fn maybe_delay_exit(delay_exit: bool) {
|
||||
// If launch failed, keep serving HTTP requests for a while, so the cloud
|
||||
// control plane can get the actual error.
|
||||
if delay_exit {
|
||||
info!("giving control plane 30s to collect the error before shutdown");
|
||||
thread::sleep(Duration::from_secs(30));
|
||||
}
|
||||
}
|
||||
|
||||
fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
|
||||
// Shutdown trace pipeline gracefully, so that it has a chance to send any
|
||||
// pending traces before we exit. Shutting down OTEL tracing provider may
|
||||
// hang for quite some time, see, for example:
|
||||
@@ -760,20 +522,10 @@ fn cli() -> clap::Command {
|
||||
Arg::new("filecache-connstr")
|
||||
.long("filecache-connstr")
|
||||
.default_value(
|
||||
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
|
||||
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable",
|
||||
)
|
||||
.value_name("FILECACHE_CONNSTR"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("resize-swap-on-bind")
|
||||
.long("resize-swap-on-bind")
|
||||
.action(clap::ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("set-disk-quota-for-fs")
|
||||
.long("set-disk-quota-for-fs")
|
||||
.value_name("SET_DISK_QUOTA_FOR_FS")
|
||||
)
|
||||
}
|
||||
|
||||
/// When compute_ctl is killed, send also termination signal to sync-safekeepers
|
||||
|
||||
@@ -1,116 +0,0 @@
|
||||
use compute_api::{
|
||||
responses::CatalogObjects,
|
||||
spec::{Database, Role},
|
||||
};
|
||||
use futures::Stream;
|
||||
use postgres::{Client, NoTls};
|
||||
use std::{path::Path, process::Stdio, result::Result, sync::Arc};
|
||||
use tokio::{
|
||||
io::{AsyncBufReadExt, BufReader},
|
||||
process::Command,
|
||||
task,
|
||||
};
|
||||
use tokio_stream::{self as stream, StreamExt};
|
||||
use tokio_util::codec::{BytesCodec, FramedRead};
|
||||
use tracing::warn;
|
||||
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
pg_helpers::{get_existing_dbs, get_existing_roles},
|
||||
};
|
||||
|
||||
pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
|
||||
let connstr = compute.connstr.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut client = Client::connect(connstr.as_str(), NoTls)?;
|
||||
let roles: Vec<Role>;
|
||||
{
|
||||
let mut xact = client.transaction()?;
|
||||
roles = get_existing_roles(&mut xact)?;
|
||||
}
|
||||
let databases: Vec<Database> = get_existing_dbs(&mut client)?.values().cloned().collect();
|
||||
|
||||
Ok(CatalogObjects { roles, databases })
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum SchemaDumpError {
|
||||
#[error("Database does not exist.")]
|
||||
DatabaseDoesNotExist,
|
||||
#[error("Failed to execute pg_dump.")]
|
||||
IO(#[from] std::io::Error),
|
||||
}
|
||||
|
||||
// It uses the pg_dump utility to dump the schema of the specified database.
|
||||
// The output is streamed back to the caller and supposed to be streamed via HTTP.
|
||||
//
|
||||
// Before return the result with the output, it checks that pg_dump produced any output.
|
||||
// If not, it tries to parse the stderr output to determine if the database does not exist
|
||||
// and special error is returned.
|
||||
//
|
||||
// To make sure that the process is killed when the caller drops the stream, we use tokio kill_on_drop feature.
|
||||
pub async fn get_database_schema(
|
||||
compute: &Arc<ComputeNode>,
|
||||
dbname: &str,
|
||||
) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>>, SchemaDumpError> {
|
||||
let pgbin = &compute.pgbin;
|
||||
let basepath = Path::new(pgbin).parent().unwrap();
|
||||
let pgdump = basepath.join("pg_dump");
|
||||
let mut connstr = compute.connstr.clone();
|
||||
connstr.set_path(dbname);
|
||||
let mut cmd = Command::new(pgdump)
|
||||
.arg("--schema-only")
|
||||
.arg(connstr.as_str())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.kill_on_drop(true)
|
||||
.spawn()?;
|
||||
|
||||
let stdout = cmd.stdout.take().ok_or_else(|| {
|
||||
std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stdout.")
|
||||
})?;
|
||||
|
||||
let stderr = cmd.stderr.take().ok_or_else(|| {
|
||||
std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stderr.")
|
||||
})?;
|
||||
|
||||
let mut stdout_reader = FramedRead::new(stdout, BytesCodec::new());
|
||||
let stderr_reader = BufReader::new(stderr);
|
||||
|
||||
let first_chunk = match stdout_reader.next().await {
|
||||
Some(Ok(bytes)) if !bytes.is_empty() => bytes,
|
||||
Some(Err(e)) => {
|
||||
return Err(SchemaDumpError::IO(e));
|
||||
}
|
||||
_ => {
|
||||
let mut lines = stderr_reader.lines();
|
||||
if let Some(line) = lines.next_line().await? {
|
||||
if line.contains(&format!("FATAL: database \"{}\" does not exist", dbname)) {
|
||||
return Err(SchemaDumpError::DatabaseDoesNotExist);
|
||||
}
|
||||
warn!("pg_dump stderr: {}", line)
|
||||
}
|
||||
tokio::spawn(async move {
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
warn!("pg_dump stderr: {}", line)
|
||||
}
|
||||
});
|
||||
|
||||
return Err(SchemaDumpError::IO(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
"failed to start pg_dump",
|
||||
)));
|
||||
}
|
||||
};
|
||||
let initial_stream = stream::once(Ok(first_chunk.freeze()));
|
||||
// Consume stderr and log warnings
|
||||
tokio::spawn(async move {
|
||||
let mut lines = stderr_reader.lines();
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
warn!("pg_dump stderr: {}", line)
|
||||
}
|
||||
});
|
||||
Ok(initial_stream.chain(stdout_reader.map(|res| res.map(|b| b.freeze()))))
|
||||
}
|
||||
@@ -10,7 +10,6 @@ use std::sync::atomic::AtomicU32;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::{Condvar, Mutex, RwLock};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
@@ -57,7 +56,6 @@ pub struct ComputeNode {
|
||||
/// - we push new spec and it does reconfiguration
|
||||
/// - but then something happens and compute pod / VM is destroyed,
|
||||
/// so k8s controller starts it again with the **old** spec
|
||||
///
|
||||
/// and the same for empty computes:
|
||||
/// - we started compute without any spec
|
||||
/// - we push spec and it does configuration
|
||||
@@ -306,13 +304,6 @@ impl ComputeNode {
|
||||
self.state_changed.notify_all();
|
||||
}
|
||||
|
||||
pub fn set_failed_status(&self, err: anyhow::Error) {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.error = Some(format!("{err:?}"));
|
||||
state.status = ComputeStatus::Failed;
|
||||
self.state_changed.notify_all();
|
||||
}
|
||||
|
||||
pub fn get_status(&self) -> ComputeStatus {
|
||||
self.state.lock().unwrap().status
|
||||
}
|
||||
@@ -408,15 +399,7 @@ impl ComputeNode {
|
||||
pub fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
|
||||
let mut retry_period_ms = 500.0;
|
||||
let mut attempts = 0;
|
||||
const DEFAULT_ATTEMPTS: u16 = 10;
|
||||
#[cfg(feature = "testing")]
|
||||
let max_attempts = if let Ok(v) = env::var("NEON_COMPUTE_TESTING_BASEBACKUP_RETRIES") {
|
||||
u16::from_str(&v).unwrap()
|
||||
} else {
|
||||
DEFAULT_ATTEMPTS
|
||||
};
|
||||
#[cfg(not(feature = "testing"))]
|
||||
let max_attempts = DEFAULT_ATTEMPTS;
|
||||
let max_attempts = 10;
|
||||
loop {
|
||||
let result = self.try_get_basebackup(compute_state, lsn);
|
||||
match result {
|
||||
@@ -718,7 +701,7 @@ impl ComputeNode {
|
||||
info!("running initdb");
|
||||
let initdb_bin = Path::new(&self.pgbin).parent().unwrap().join("initdb");
|
||||
Command::new(initdb_bin)
|
||||
.args(["--pgdata", pgdata])
|
||||
.args(["-D", pgdata])
|
||||
.output()
|
||||
.expect("cannot start initdb process");
|
||||
|
||||
@@ -815,11 +798,7 @@ impl ComputeNode {
|
||||
// In this case we need to connect with old `zenith_admin` name
|
||||
// and create new user. We cannot simply rename connected user,
|
||||
// but we can create a new one and grant it all privileges.
|
||||
let mut connstr = self.connstr.clone();
|
||||
connstr
|
||||
.query_pairs_mut()
|
||||
.append_pair("application_name", "apply_config");
|
||||
|
||||
let connstr = self.connstr.clone();
|
||||
let mut client = match Client::connect(connstr.as_str(), NoTls) {
|
||||
Err(e) => match e.code() {
|
||||
Some(&SqlState::INVALID_PASSWORD)
|
||||
@@ -888,19 +867,15 @@ impl ComputeNode {
|
||||
|
||||
// Run migrations separately to not hold up cold starts
|
||||
thread::spawn(move || {
|
||||
let mut connstr = connstr.clone();
|
||||
connstr
|
||||
.query_pairs_mut()
|
||||
.append_pair("application_name", "migrations");
|
||||
|
||||
let mut client = Client::connect(connstr.as_str(), NoTls)?;
|
||||
handle_migrations(&mut client).context("apply_config handle_migrations")
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Wrapped this around `pg_ctl reload`, but right now we don't use
|
||||
// `pg_ctl` for start / stop.
|
||||
// We could've wrapped this around `pg_ctl reload`, but right now we don't use
|
||||
// `pg_ctl` for start / stop, so this just seems much easier to do as we already
|
||||
// have opened connection to Postgres and superuser access.
|
||||
#[instrument(skip_all)]
|
||||
fn pg_reload_conf(&self) -> Result<()> {
|
||||
let pgctl_bin = Path::new(&self.pgbin).parent().unwrap().join("pg_ctl");
|
||||
@@ -943,39 +918,38 @@ impl ComputeNode {
|
||||
// temporarily reset max_cluster_size in config
|
||||
// to avoid the possibility of hitting the limit, while we are reconfiguring:
|
||||
// creating new extensions, roles, etc...
|
||||
config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
|
||||
self.pg_reload_conf()?;
|
||||
config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
|
||||
self.pg_reload_conf()?;
|
||||
|
||||
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
|
||||
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
|
||||
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
||||
if spec.mode == ComputeMode::Primary {
|
||||
client.simple_query("SET neon.forward_ddl = false")?;
|
||||
cleanup_instance(&mut client)?;
|
||||
handle_roles(&spec, &mut client)?;
|
||||
handle_databases(&spec, &mut client)?;
|
||||
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
|
||||
handle_grants(
|
||||
&spec,
|
||||
&mut client,
|
||||
self.connstr.as_str(),
|
||||
self.has_feature(ComputeFeature::AnonExtension),
|
||||
)?;
|
||||
handle_extensions(&spec, &mut client)?;
|
||||
handle_extension_neon(&mut client)?;
|
||||
// We can skip handle_migrations here because a new migration can only appear
|
||||
// if we have a new version of the compute_ctl binary, which can only happen
|
||||
// if compute got restarted, in which case we'll end up inside of apply_config
|
||||
// instead of reconfigure.
|
||||
}
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
||||
if spec.mode == ComputeMode::Primary {
|
||||
client.simple_query("SET neon.forward_ddl = false")?;
|
||||
cleanup_instance(&mut client)?;
|
||||
handle_roles(&spec, &mut client)?;
|
||||
handle_databases(&spec, &mut client)?;
|
||||
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
|
||||
handle_grants(
|
||||
&spec,
|
||||
&mut client,
|
||||
self.connstr.as_str(),
|
||||
self.has_feature(ComputeFeature::AnonExtension),
|
||||
)?;
|
||||
handle_extensions(&spec, &mut client)?;
|
||||
handle_extension_neon(&mut client)?;
|
||||
// We can skip handle_migrations here because a new migration can only appear
|
||||
// if we have a new version of the compute_ctl binary, which can only happen
|
||||
// if compute got restarted, in which case we'll end up inside of apply_config
|
||||
// instead of reconfigure.
|
||||
}
|
||||
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
|
||||
// reset max_cluster_size in config back to original value and reload config
|
||||
config::compute_ctl_temp_override_remove(pgdata_path)?;
|
||||
self.pg_reload_conf()?;
|
||||
|
||||
let unknown_op = "unknown".to_string();
|
||||
@@ -1066,17 +1040,12 @@ impl ComputeNode {
|
||||
// temporarily reset max_cluster_size in config
|
||||
// to avoid the possibility of hitting the limit, while we are applying config:
|
||||
// creating new extensions, roles, etc...
|
||||
config::with_compute_ctl_tmp_override(
|
||||
pgdata_path,
|
||||
"neon.max_cluster_size=-1",
|
||||
|| {
|
||||
self.pg_reload_conf()?;
|
||||
config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
|
||||
self.pg_reload_conf()?;
|
||||
|
||||
self.apply_config(&compute_state)?;
|
||||
self.apply_config(&compute_state)?;
|
||||
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
config::compute_ctl_temp_override_remove(pgdata_path)?;
|
||||
self.pg_reload_conf()?;
|
||||
}
|
||||
self.post_apply_config()?;
|
||||
@@ -1133,14 +1102,11 @@ impl ComputeNode {
|
||||
// EKS worker nodes have following core dump settings:
|
||||
// /proc/sys/kernel/core_pattern -> core
|
||||
// /proc/sys/kernel/core_uses_pid -> 1
|
||||
// ulimit -c -> unlimited
|
||||
// ulimint -c -> unlimited
|
||||
// which results in core dumps being written to postgres data directory as core.<pid>.
|
||||
//
|
||||
// Use that as a default location and pattern, except macos where core dumps are written
|
||||
// to /cores/ directory by default.
|
||||
//
|
||||
// With default Linux settings, the core dump file is called just "core", so check for
|
||||
// that too.
|
||||
pub fn check_for_core_dumps(&self) -> Result<()> {
|
||||
let core_dump_dir = match std::env::consts::OS {
|
||||
"macos" => Path::new("/cores/"),
|
||||
@@ -1152,17 +1118,8 @@ impl ComputeNode {
|
||||
let files = fs::read_dir(core_dump_dir)?;
|
||||
let cores = files.filter_map(|entry| {
|
||||
let entry = entry.ok()?;
|
||||
|
||||
let is_core_dump = match entry.file_name().to_str()? {
|
||||
n if n.starts_with("core.") => true,
|
||||
"core" => true,
|
||||
_ => false,
|
||||
};
|
||||
if is_core_dump {
|
||||
Some(entry.path())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
let _ = entry.file_name().to_str()?.strip_prefix("core.")?;
|
||||
Some(entry.path())
|
||||
});
|
||||
|
||||
// Print backtrace for each core dump
|
||||
@@ -1413,36 +1370,6 @@ LIMIT 100",
|
||||
}
|
||||
Ok(remote_ext_metrics)
|
||||
}
|
||||
|
||||
/// Waits until current thread receives a state changed notification and
|
||||
/// the pageserver connection strings has changed.
|
||||
///
|
||||
/// The operation will time out after a specified duration.
|
||||
pub fn wait_timeout_while_pageserver_connstr_unchanged(&self, duration: Duration) {
|
||||
let state = self.state.lock().unwrap();
|
||||
let old_pageserver_connstr = state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.expect("spec must be set")
|
||||
.pageserver_connstr
|
||||
.clone();
|
||||
let mut unchanged = true;
|
||||
let _ = self
|
||||
.state_changed
|
||||
.wait_timeout_while(state, duration, |s| {
|
||||
let pageserver_connstr = &s
|
||||
.pspec
|
||||
.as_ref()
|
||||
.expect("spec must be set")
|
||||
.pageserver_connstr;
|
||||
unchanged = pageserver_connstr == &old_pageserver_connstr;
|
||||
unchanged
|
||||
})
|
||||
.unwrap();
|
||||
if !unchanged {
|
||||
info!("Pageserver config changed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn forward_termination_signal() {
|
||||
@@ -1454,9 +1381,7 @@ pub fn forward_termination_signal() {
|
||||
let pg_pid = PG_PID.load(Ordering::SeqCst);
|
||||
if pg_pid != 0 {
|
||||
let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
|
||||
// Use 'fast' shutdown (SIGINT) because it also creates a shutdown checkpoint, which is important for
|
||||
// ROs to get a list of running xacts faster instead of going through the CLOG.
|
||||
// See https://www.postgresql.org/docs/current/server-shutdown.html for the list of modes and signals.
|
||||
kill(pg_pid, Signal::SIGINT).ok();
|
||||
// use 'immediate' shutdown (SIGQUIT): https://www.postgresql.org/docs/current/server-shutdown.html
|
||||
kill(pg_pid, Signal::SIGQUIT).ok();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,6 +83,12 @@ pub fn write_postgres_conf(
|
||||
ComputeMode::Replica => {
|
||||
// hot_standby is 'on' by default, but let's be explicit
|
||||
writeln!(file, "hot_standby=on")?;
|
||||
|
||||
// Inform the replica about the primary state
|
||||
// Default is 'false'
|
||||
if let Some(primary_is_running) = spec.primary_is_running {
|
||||
writeln!(file, "neon.primary_is_running={}", primary_is_running)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,17 +131,18 @@ pub fn write_postgres_conf(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn with_compute_ctl_tmp_override<F>(pgdata_path: &Path, options: &str, exec: F) -> Result<()>
|
||||
where
|
||||
F: FnOnce() -> Result<()>,
|
||||
{
|
||||
/// create file compute_ctl_temp_override.conf in pgdata_dir
|
||||
/// add provided options to this file
|
||||
pub fn compute_ctl_temp_override_create(pgdata_path: &Path, options: &str) -> Result<()> {
|
||||
let path = pgdata_path.join("compute_ctl_temp_override.conf");
|
||||
let mut file = File::create(path)?;
|
||||
write!(file, "{}", options)?;
|
||||
|
||||
let res = exec();
|
||||
|
||||
file.set_len(0)?;
|
||||
|
||||
res
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// remove file compute_ctl_temp_override.conf in pgdata_dir
|
||||
pub fn compute_ctl_temp_override_remove(pgdata_path: &Path) -> Result<()> {
|
||||
let path = pgdata_path.join("compute_ctl_temp_override.conf");
|
||||
std::fs::remove_file(path)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -11,17 +11,9 @@ use crate::compute::ComputeNode;
|
||||
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
||||
info!("waiting for reconfiguration requests");
|
||||
loop {
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
let state = compute.state.lock().unwrap();
|
||||
let mut state = compute.state_changed.wait(state).unwrap();
|
||||
|
||||
// We have to re-check the status after re-acquiring the lock because it could be that
|
||||
// the status has changed while we were waiting for the lock, and we might not need to
|
||||
// wait on the condition variable. Otherwise, we might end up in some soft-/deadlock, i.e.
|
||||
// we are waiting for a condition variable that will never be signaled.
|
||||
if state.status != ComputeStatus::ConfigurationPending {
|
||||
state = compute.state_changed.wait(state).unwrap();
|
||||
}
|
||||
|
||||
// Re-check the status after waking up
|
||||
if state.status == ComputeStatus::ConfigurationPending {
|
||||
info!("got configuration request");
|
||||
state.status = ComputeStatus::Configuration;
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
use anyhow::Context;
|
||||
|
||||
pub const DISK_QUOTA_BIN: &str = "/neonvm/bin/set-disk-quota";
|
||||
|
||||
/// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
|
||||
/// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set.
|
||||
pub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> {
|
||||
let size_kb = size_bytes / 1024;
|
||||
// run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}`
|
||||
let child_result = std::process::Command::new("/usr/bin/sudo")
|
||||
.arg(DISK_QUOTA_BIN)
|
||||
.arg(size_kb.to_string())
|
||||
.arg(fs_mountpoint)
|
||||
.spawn();
|
||||
|
||||
child_result
|
||||
.context("spawn() failed")
|
||||
.and_then(|mut child| child.wait().context("wait() failed"))
|
||||
.and_then(|status| match status.success() {
|
||||
true => Ok(()),
|
||||
false => Err(anyhow::anyhow!("process exited with {status}")),
|
||||
})
|
||||
// wrap any prior error with the overall context that we couldn't run the command
|
||||
.with_context(|| format!("could not run `/usr/bin/sudo {DISK_QUOTA_BIN}`"))
|
||||
}
|
||||
@@ -124,7 +124,6 @@ fn parse_pg_version(human_version: &str) -> &str {
|
||||
"14" => return "v14",
|
||||
"15" => return "v15",
|
||||
"16" => return "v16",
|
||||
"17" => return "v17",
|
||||
_ => {}
|
||||
},
|
||||
_ => {}
|
||||
|
||||
@@ -5,21 +5,17 @@ use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
use crate::catalog::SchemaDumpError;
|
||||
use crate::catalog::{get_database_schema, get_dbs_and_roles};
|
||||
use crate::compute::forward_termination_signal;
|
||||
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
|
||||
use compute_api::requests::ConfigurationRequest;
|
||||
use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
|
||||
|
||||
use anyhow::Result;
|
||||
use hyper::header::CONTENT_TYPE;
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Method, Request, Response, Server, StatusCode};
|
||||
use tokio::task;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use tracing::{error, info, warn};
|
||||
use tracing_utils::http::OtelName;
|
||||
use utils::http::request::must_get_query_param;
|
||||
|
||||
fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
|
||||
ComputeStatusResponse {
|
||||
@@ -48,7 +44,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
|
||||
match (req.method(), req.uri().path()) {
|
||||
// Serialized compute state.
|
||||
(&Method::GET, "/status") => {
|
||||
debug!("serving /status GET request");
|
||||
info!("serving /status GET request");
|
||||
let state = compute.state.lock().unwrap();
|
||||
let status_response = status_response_from_state(&state);
|
||||
Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
|
||||
@@ -137,34 +133,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::GET, "/dbs_and_roles") => {
|
||||
info!("serving /dbs_and_roles GET request",);
|
||||
match get_dbs_and_roles(compute).await {
|
||||
Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
|
||||
Err(_) => {
|
||||
render_json_error("can't get dbs and roles", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::GET, "/database_schema") => {
|
||||
let database = match must_get_query_param(&req, "database") {
|
||||
Err(e) => return e.into_response(),
|
||||
Ok(database) => database,
|
||||
};
|
||||
info!("serving /database_schema GET request with database: {database}",);
|
||||
match get_database_schema(compute, &database).await {
|
||||
Ok(res) => render_plain(Body::wrap_stream(res)),
|
||||
Err(SchemaDumpError::DatabaseDoesNotExist) => {
|
||||
render_json_error("database does not exist", StatusCode::NOT_FOUND)
|
||||
}
|
||||
Err(e) => {
|
||||
error!("can't get schema dump: {}", e);
|
||||
render_json_error("can't get schema dump", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// download extension files from remote extension storage on demand
|
||||
(&Method::POST, route) if route.starts_with("/extension_server/") => {
|
||||
info!("serving {:?} POST request", route);
|
||||
@@ -264,72 +232,68 @@ async fn handle_configure_request(
|
||||
|
||||
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
|
||||
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
|
||||
match serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
|
||||
Ok(request) => {
|
||||
let spec = request.spec;
|
||||
if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
|
||||
let spec = request.spec;
|
||||
|
||||
let parsed_spec = match ParsedSpec::try_from(spec) {
|
||||
Ok(ps) => ps,
|
||||
Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
|
||||
};
|
||||
let parsed_spec = match ParsedSpec::try_from(spec) {
|
||||
Ok(ps) => ps,
|
||||
Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
|
||||
};
|
||||
|
||||
// XXX: wrap state update under lock in code blocks. Otherwise,
|
||||
// we will try to `Send` `mut state` into the spawned thread
|
||||
// bellow, which will cause error:
|
||||
// ```
|
||||
// error: future cannot be sent between threads safely
|
||||
// ```
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for configuration request: {:?}",
|
||||
state.status.clone()
|
||||
);
|
||||
return Err((msg, StatusCode::PRECONDITION_FAILED));
|
||||
// XXX: wrap state update under lock in code blocks. Otherwise,
|
||||
// we will try to `Send` `mut state` into the spawned thread
|
||||
// bellow, which will cause error:
|
||||
// ```
|
||||
// error: future cannot be sent between threads safely
|
||||
// ```
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for configuration request: {:?}",
|
||||
state.status.clone()
|
||||
);
|
||||
return Err((msg, StatusCode::PRECONDITION_FAILED));
|
||||
}
|
||||
state.pspec = Some(parsed_spec);
|
||||
state.status = ComputeStatus::ConfigurationPending;
|
||||
compute.state_changed.notify_all();
|
||||
drop(state);
|
||||
info!("set new spec and notified waiters");
|
||||
}
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Running.
|
||||
// This is needed to do not block the main pool of workers and
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Running {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become Running, current status: {:?}",
|
||||
state.status
|
||||
);
|
||||
|
||||
if state.status == ComputeStatus::Failed {
|
||||
let err = state.error.as_ref().map_or("unknown error", |x| x);
|
||||
let msg = format!("compute configuration failed: {:?}", err);
|
||||
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
|
||||
}
|
||||
state.pspec = Some(parsed_spec);
|
||||
state.status = ComputeStatus::ConfigurationPending;
|
||||
compute.state_changed.notify_all();
|
||||
drop(state);
|
||||
info!("set new spec and notified waiters");
|
||||
}
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Running.
|
||||
// This is needed to do not block the main pool of workers and
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Running {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become Running, current status: {:?}",
|
||||
state.status
|
||||
);
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()?;
|
||||
|
||||
if state.status == ComputeStatus::Failed {
|
||||
let err = state.error.as_ref().map_or("unknown error", |x| x);
|
||||
let msg = format!("compute configuration failed: {:?}", err);
|
||||
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.unwrap()?;
|
||||
|
||||
// Return current compute state if everything went well.
|
||||
let state = compute.state.lock().unwrap().clone();
|
||||
let status_response = status_response_from_state(&state);
|
||||
Ok(serde_json::to_string(&status_response).unwrap())
|
||||
}
|
||||
Err(err) => {
|
||||
error!("could not parse spec: {spec_raw}");
|
||||
Err((format!("invalid spec: {err:?}"), StatusCode::BAD_REQUEST))
|
||||
}
|
||||
// Return current compute state if everything went well.
|
||||
let state = compute.state.lock().unwrap().clone();
|
||||
let status_response = status_response_from_state(&state);
|
||||
Ok(serde_json::to_string(&status_response).unwrap())
|
||||
} else {
|
||||
Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -339,25 +303,10 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
|
||||
};
|
||||
Response::builder()
|
||||
.status(status)
|
||||
.header(CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(serde_json::to_string(&error).unwrap()))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn render_json(body: Body) -> Response<Body> {
|
||||
Response::builder()
|
||||
.header(CONTENT_TYPE, "application/json")
|
||||
.body(body)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn render_plain(body: Body) -> Response<Body> {
|
||||
Response::builder()
|
||||
.header(CONTENT_TYPE, "text/plain")
|
||||
.body(body)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
|
||||
@@ -68,51 +68,6 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Info"
|
||||
|
||||
/dbs_and_roles:
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
summary: Get databases and roles in the catalog.
|
||||
description: ""
|
||||
operationId: getDbsAndRoles
|
||||
responses:
|
||||
200:
|
||||
description: Compute schema objects
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/DbsAndRoles"
|
||||
|
||||
/database_schema:
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
summary: Get schema dump
|
||||
parameters:
|
||||
- name: database
|
||||
in: query
|
||||
description: Database name to dump.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
example: "postgres"
|
||||
description: Get schema dump in SQL format.
|
||||
operationId: getDatabaseSchema
|
||||
responses:
|
||||
200:
|
||||
description: Schema dump
|
||||
content:
|
||||
text/plain:
|
||||
schema:
|
||||
type: string
|
||||
description: Schema dump in SQL format.
|
||||
404:
|
||||
description: Non existing database.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/GenericError"
|
||||
|
||||
/check_writability:
|
||||
post:
|
||||
tags:
|
||||
@@ -274,73 +229,6 @@ components:
|
||||
num_cpus:
|
||||
type: integer
|
||||
|
||||
DbsAndRoles:
|
||||
type: object
|
||||
description: Databases and Roles
|
||||
required:
|
||||
- roles
|
||||
- databases
|
||||
properties:
|
||||
roles:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/Role"
|
||||
databases:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/Database"
|
||||
|
||||
Database:
|
||||
type: object
|
||||
description: Database
|
||||
required:
|
||||
- name
|
||||
- owner
|
||||
- restrict_conn
|
||||
- invalid
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
owner:
|
||||
type: string
|
||||
options:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/GenericOption"
|
||||
restrict_conn:
|
||||
type: boolean
|
||||
invalid:
|
||||
type: boolean
|
||||
|
||||
Role:
|
||||
type: object
|
||||
description: Role
|
||||
required:
|
||||
- name
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
encrypted_password:
|
||||
type: string
|
||||
options:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/GenericOption"
|
||||
|
||||
GenericOption:
|
||||
type: object
|
||||
description: Schema Generic option
|
||||
required:
|
||||
- name
|
||||
- vartype
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
value:
|
||||
type: string
|
||||
vartype:
|
||||
type: string
|
||||
|
||||
ComputeState:
|
||||
type: object
|
||||
required:
|
||||
|
||||
@@ -2,25 +2,16 @@
|
||||
//! configuration.
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
extern crate hyper0 as hyper;
|
||||
|
||||
pub mod checker;
|
||||
pub mod config;
|
||||
pub mod configurator;
|
||||
pub mod http;
|
||||
#[macro_use]
|
||||
pub mod logger;
|
||||
pub mod catalog;
|
||||
pub mod compute;
|
||||
pub mod disk_quota;
|
||||
pub mod extension_server;
|
||||
// pub mod local_proxy;
|
||||
pub mod lsn_lease;
|
||||
mod migration;
|
||||
pub mod monitor;
|
||||
pub mod params;
|
||||
pub mod pg_helpers;
|
||||
pub mod spec;
|
||||
pub mod swap;
|
||||
pub mod sync_sk;
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
//! Local Proxy is a feature of our BaaS Neon Authorize project.
|
||||
//!
|
||||
//! Local Proxy validates JWTs and manages the pg_session_jwt extension.
|
||||
//! It also maintains a connection pool to postgres.
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use camino::Utf8Path;
|
||||
use compute_api::spec::LocalProxySpec;
|
||||
use nix::sys::signal::Signal;
|
||||
use utils::pid_file::{self, PidFileRead};
|
||||
|
||||
pub fn configure(local_proxy: &LocalProxySpec) -> Result<()> {
|
||||
write_local_proxy_conf("/etc/local_proxy/config.json".as_ref(), local_proxy)?;
|
||||
notify_local_proxy("/etc/local_proxy/pid".as_ref())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create or completely rewrite configuration file specified by `path`
|
||||
fn write_local_proxy_conf(path: &Utf8Path, local_proxy: &LocalProxySpec) -> Result<()> {
|
||||
let config =
|
||||
serde_json::to_string_pretty(local_proxy).context("serializing LocalProxySpec to json")?;
|
||||
std::fs::write(path, config).with_context(|| format!("writing {path}"))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Notify local proxy about a new config file.
|
||||
fn notify_local_proxy(path: &Utf8Path) -> Result<()> {
|
||||
match pid_file::read(path)? {
|
||||
// if the file doesn't exist, or isn't locked, local_proxy isn't running
|
||||
// and will naturally pick up our config later
|
||||
PidFileRead::NotExist | PidFileRead::NotHeldByAnyProcess(_) => {}
|
||||
PidFileRead::LockedByOtherProcess(pid) => {
|
||||
// From the pid_file docs:
|
||||
//
|
||||
// > 1. The other process might exit at any time, turning the given PID stale.
|
||||
// > 2. There is a small window in which `claim_for_current_process` has already
|
||||
// > locked the file but not yet updates its contents. [`read`] will return
|
||||
// > this variant here, but with the old file contents, i.e., a stale PID.
|
||||
// >
|
||||
// > The kernel is free to recycle PID once it has been `wait(2)`ed upon by
|
||||
// > its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`
|
||||
// > system call on it, bears the risk of killing an unrelated process.
|
||||
// > This is an inherent limitation of using pidfiles.
|
||||
// > The only race-free solution is to have a supervisor-process with a lifetime
|
||||
// > that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).
|
||||
//
|
||||
// This is an ok risk as we only send a SIGHUP which likely won't actually
|
||||
// kill the process, only reload config.
|
||||
nix::sys::signal::kill(pid, Signal::SIGHUP).context("sending signal to local_proxy")?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
use tracing_opentelemetry::OpenTelemetryLayer;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
use tracing_subscriber::prelude::*;
|
||||
|
||||
@@ -22,7 +23,8 @@ pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
|
||||
.with_writer(std::io::stderr);
|
||||
|
||||
// Initialize OpenTelemetry
|
||||
let otlp_layer = tracing_utils::init_tracing_without_runtime("compute_ctl");
|
||||
let otlp_layer =
|
||||
tracing_utils::init_tracing_without_runtime("compute_ctl").map(OpenTelemetryLayer::new);
|
||||
|
||||
// Put it all together
|
||||
tracing_subscriber::registry()
|
||||
|
||||
@@ -1,185 +0,0 @@
|
||||
use anyhow::bail;
|
||||
use anyhow::Result;
|
||||
use postgres::{NoTls, SimpleQueryMessage};
|
||||
use std::time::SystemTime;
|
||||
use std::{str::FromStr, sync::Arc, thread, time::Duration};
|
||||
use utils::id::TenantId;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use compute_api::spec::ComputeMode;
|
||||
use tracing::{info, warn};
|
||||
use utils::{
|
||||
lsn::Lsn,
|
||||
shard::{ShardCount, ShardNumber, TenantShardId},
|
||||
};
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
/// Spawns a background thread to periodically renew LSN leases for static compute.
|
||||
/// Do nothing if the compute is not in static mode.
|
||||
pub fn launch_lsn_lease_bg_task_for_static(compute: &Arc<ComputeNode>) {
|
||||
let (tenant_id, timeline_id, lsn) = {
|
||||
let state = compute.state.lock().unwrap();
|
||||
let spec = state.pspec.as_ref().expect("Spec must be set");
|
||||
match spec.spec.mode {
|
||||
ComputeMode::Static(lsn) => (spec.tenant_id, spec.timeline_id, lsn),
|
||||
_ => return,
|
||||
}
|
||||
};
|
||||
let compute = compute.clone();
|
||||
|
||||
let span = tracing::info_span!("lsn_lease_bg_task", %tenant_id, %timeline_id, %lsn);
|
||||
thread::spawn(move || {
|
||||
let _entered = span.entered();
|
||||
if let Err(e) = lsn_lease_bg_task(compute, tenant_id, timeline_id, lsn) {
|
||||
// TODO: might need stronger error feedback than logging an warning.
|
||||
warn!("Exited with error: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Renews lsn lease periodically so static compute are not affected by GC.
|
||||
fn lsn_lease_bg_task(
|
||||
compute: Arc<ComputeNode>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<()> {
|
||||
loop {
|
||||
let valid_until = acquire_lsn_lease_with_retry(&compute, tenant_id, timeline_id, lsn)?;
|
||||
let valid_duration = valid_until
|
||||
.duration_since(SystemTime::now())
|
||||
.unwrap_or(Duration::ZERO);
|
||||
|
||||
// Sleep for 60 seconds less than the valid duration but no more than half of the valid duration.
|
||||
let sleep_duration = valid_duration
|
||||
.saturating_sub(Duration::from_secs(60))
|
||||
.max(valid_duration / 2);
|
||||
|
||||
info!(
|
||||
"Request succeeded, sleeping for {} seconds",
|
||||
sleep_duration.as_secs()
|
||||
);
|
||||
compute.wait_timeout_while_pageserver_connstr_unchanged(sleep_duration);
|
||||
}
|
||||
}
|
||||
|
||||
/// Acquires lsn lease in a retry loop. Returns the expiration time if a lease is granted.
|
||||
/// Returns an error if a lease is explicitly not granted. Otherwise, we keep sending requests.
|
||||
fn acquire_lsn_lease_with_retry(
|
||||
compute: &Arc<ComputeNode>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<SystemTime> {
|
||||
let mut attempts = 0usize;
|
||||
let mut retry_period_ms: f64 = 500.0;
|
||||
const MAX_RETRY_PERIOD_MS: f64 = 60.0 * 1000.0;
|
||||
|
||||
loop {
|
||||
// Note: List of pageservers is dynamic, need to re-read configs before each attempt.
|
||||
let configs = {
|
||||
let state = compute.state.lock().unwrap();
|
||||
|
||||
let spec = state.pspec.as_ref().expect("spec must be set");
|
||||
|
||||
let conn_strings = spec.pageserver_connstr.split(',');
|
||||
|
||||
conn_strings
|
||||
.map(|connstr| {
|
||||
let mut config = postgres::Config::from_str(connstr).expect("Invalid connstr");
|
||||
if let Some(storage_auth_token) = &spec.storage_auth_token {
|
||||
config.password(storage_auth_token.clone());
|
||||
}
|
||||
config
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
let result = try_acquire_lsn_lease(tenant_id, timeline_id, lsn, &configs);
|
||||
match result {
|
||||
Ok(Some(res)) => {
|
||||
return Ok(res);
|
||||
}
|
||||
Ok(None) => {
|
||||
bail!("Permanent error: lease could not be obtained, LSN is behind the GC cutoff");
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Failed to acquire lsn lease: {e} (attempt {attempts})");
|
||||
|
||||
compute.wait_timeout_while_pageserver_connstr_unchanged(Duration::from_millis(
|
||||
retry_period_ms as u64,
|
||||
));
|
||||
retry_period_ms *= 1.5;
|
||||
retry_period_ms = retry_period_ms.min(MAX_RETRY_PERIOD_MS);
|
||||
}
|
||||
}
|
||||
attempts += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Tries to acquire an LSN lease through PS page_service API.
|
||||
fn try_acquire_lsn_lease(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
configs: &[postgres::Config],
|
||||
) -> Result<Option<SystemTime>> {
|
||||
fn get_valid_until(
|
||||
config: &postgres::Config,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<Option<SystemTime>> {
|
||||
let mut client = config.connect(NoTls)?;
|
||||
let cmd = format!("lease lsn {} {} {} ", tenant_shard_id, timeline_id, lsn);
|
||||
let res = client.simple_query(&cmd)?;
|
||||
let msg = match res.first() {
|
||||
Some(msg) => msg,
|
||||
None => bail!("empty response"),
|
||||
};
|
||||
let row = match msg {
|
||||
SimpleQueryMessage::Row(row) => row,
|
||||
_ => bail!("error parsing lsn lease response"),
|
||||
};
|
||||
|
||||
// Note: this will be None if a lease is explicitly not granted.
|
||||
let valid_until_str = row.get("valid_until");
|
||||
|
||||
let valid_until = valid_until_str.map(|s| {
|
||||
SystemTime::UNIX_EPOCH
|
||||
.checked_add(Duration::from_millis(u128::from_str(s).unwrap() as u64))
|
||||
.expect("Time larger than max SystemTime could handle")
|
||||
});
|
||||
Ok(valid_until)
|
||||
}
|
||||
|
||||
let shard_count = configs.len();
|
||||
|
||||
let valid_until = if shard_count > 1 {
|
||||
configs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(shard_number, config)| {
|
||||
let tenant_shard_id = TenantShardId {
|
||||
tenant_id,
|
||||
shard_count: ShardCount::new(shard_count as u8),
|
||||
shard_number: ShardNumber(shard_number as u8),
|
||||
};
|
||||
get_valid_until(config, tenant_shard_id, timeline_id, lsn)
|
||||
})
|
||||
.collect::<Result<Vec<Option<SystemTime>>>>()?
|
||||
.into_iter()
|
||||
.min()
|
||||
.unwrap()
|
||||
} else {
|
||||
get_valid_until(
|
||||
&configs[0],
|
||||
TenantShardId::unsharded(tenant_id),
|
||||
timeline_id,
|
||||
lsn,
|
||||
)?
|
||||
};
|
||||
|
||||
Ok(valid_until)
|
||||
}
|
||||
@@ -1,105 +0,0 @@
|
||||
use anyhow::{Context, Result};
|
||||
use postgres::Client;
|
||||
use tracing::info;
|
||||
|
||||
pub(crate) struct MigrationRunner<'m> {
|
||||
client: &'m mut Client,
|
||||
migrations: &'m [&'m str],
|
||||
}
|
||||
|
||||
impl<'m> MigrationRunner<'m> {
|
||||
pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
|
||||
// The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64
|
||||
assert!(migrations.len() + 1 < i64::MAX as usize);
|
||||
|
||||
Self { client, migrations }
|
||||
}
|
||||
|
||||
fn get_migration_id(&mut self) -> Result<i64> {
|
||||
let query = "SELECT id FROM neon_migration.migration_id";
|
||||
let row = self
|
||||
.client
|
||||
.query_one(query, &[])
|
||||
.context("run_migrations get migration_id")?;
|
||||
|
||||
Ok(row.get::<&str, i64>("id"))
|
||||
}
|
||||
|
||||
fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
|
||||
let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id);
|
||||
|
||||
self.client
|
||||
.simple_query(&setval)
|
||||
.context("run_migrations update id")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn prepare_migrations(&mut self) -> Result<()> {
|
||||
let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
|
||||
self.client.simple_query(query)?;
|
||||
|
||||
let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
|
||||
self.client.simple_query(query)?;
|
||||
|
||||
let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
|
||||
self.client.simple_query(query)?;
|
||||
|
||||
let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
|
||||
self.client.simple_query(query)?;
|
||||
|
||||
let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
|
||||
self.client.simple_query(query)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn run_migrations(mut self) -> Result<()> {
|
||||
self.prepare_migrations()?;
|
||||
|
||||
let mut current_migration = self.get_migration_id()? as usize;
|
||||
while current_migration < self.migrations.len() {
|
||||
macro_rules! migration_id {
|
||||
($cm:expr) => {
|
||||
($cm + 1) as i64
|
||||
};
|
||||
}
|
||||
|
||||
let migration = self.migrations[current_migration];
|
||||
|
||||
if migration.starts_with("-- SKIP") {
|
||||
info!("Skipping migration id={}", migration_id!(current_migration));
|
||||
} else {
|
||||
info!(
|
||||
"Running migration id={}:\n{}\n",
|
||||
migration_id!(current_migration),
|
||||
migration
|
||||
);
|
||||
|
||||
self.client
|
||||
.simple_query("BEGIN")
|
||||
.context("begin migration")?;
|
||||
|
||||
self.client.simple_query(migration).with_context(|| {
|
||||
format!(
|
||||
"run_migrations migration id={}",
|
||||
migration_id!(current_migration)
|
||||
)
|
||||
})?;
|
||||
|
||||
// Migration IDs start at 1
|
||||
self.update_migration_id(migration_id!(current_migration))?;
|
||||
|
||||
self.client
|
||||
.simple_query("COMMIT")
|
||||
.context("commit migration")?;
|
||||
|
||||
info!("Finished migration id={}", migration_id!(current_migration));
|
||||
}
|
||||
|
||||
current_migration += 1;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
ALTER ROLE neon_superuser BYPASSRLS;
|
||||
@@ -1,18 +0,0 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
role_name text;
|
||||
BEGIN
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
|
||||
END LOOP;
|
||||
|
||||
FOR role_name IN SELECT rolname FROM pg_roles
|
||||
WHERE
|
||||
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
|
||||
END LOOP;
|
||||
END $$;
|
||||
@@ -1,6 +0,0 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -1 +0,0 @@
|
||||
GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;
|
||||
@@ -1,4 +0,0 @@
|
||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||
-- interacted with by neon_superuser without permission issues.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;
|
||||
@@ -1,4 +0,0 @@
|
||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||
-- interacted with by neon_superuser without permission issues.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;
|
||||
@@ -1,3 +0,0 @@
|
||||
-- SKIP: Moved inline to the handle_grants() functions.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
|
||||
@@ -1,3 +0,0 @@
|
||||
-- SKIP: Moved inline to the handle_grants() functions.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
|
||||
@@ -1,13 +0,0 @@
|
||||
-- SKIP: The original goal of this migration was to prevent creating
|
||||
-- subscriptions, but this migration was insufficient.
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
role_name TEXT;
|
||||
BEGIN
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
|
||||
END LOOP;
|
||||
END $$;
|
||||
@@ -1,7 +0,0 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
|
||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -1 +0,0 @@
|
||||
GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;
|
||||
@@ -17,11 +17,7 @@ const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
|
||||
// should be handled gracefully.
|
||||
fn watch_compute_activity(compute: &ComputeNode) {
|
||||
// Suppose that `connstr` doesn't change
|
||||
let mut connstr = compute.connstr.clone();
|
||||
connstr
|
||||
.query_pairs_mut()
|
||||
.append_pair("application_name", "compute_activity_monitor");
|
||||
let connstr = connstr.as_str();
|
||||
let connstr = compute.connstr.as_str();
|
||||
|
||||
// During startup and configuration we connect to every Postgres database,
|
||||
// but we don't want to count this as some user activity. So wait until
|
||||
|
||||
@@ -22,10 +22,9 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
|
||||
|
||||
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
|
||||
|
||||
/// Escape a string for including it in a SQL literal.
|
||||
///
|
||||
/// Wrapping the result with `E'{}'` or `'{}'` is not required,
|
||||
/// as it returns a ready-to-use SQL string literal, e.g. `'db'''` or `E'db\\'`.
|
||||
/// Escape a string for including it in a SQL literal. Wrapping the result
|
||||
/// with `E'{}'` or `'{}'` is not required, as it returns a ready-to-use
|
||||
/// SQL string literal, e.g. `'db'''` or `E'db\\'`.
|
||||
/// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47>
|
||||
/// for the original implementation.
|
||||
pub fn escape_literal(s: &str) -> String {
|
||||
@@ -490,7 +489,7 @@ pub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<()>
|
||||
/// Read Postgres logs from `stderr` until EOF. Buffer is flushed on one of the following conditions:
|
||||
/// - next line starts with timestamp
|
||||
/// - EOF
|
||||
/// - no new lines were written for the last 100 milliseconds
|
||||
/// - no new lines were written for the last second
|
||||
async fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Result<()> {
|
||||
let mut lines = tokio::io::BufReader::new(stderr).lines();
|
||||
let timeout_duration = Duration::from_millis(100);
|
||||
|
||||
@@ -10,7 +10,6 @@ use tracing::{error, info, info_span, instrument, span_enabled, warn, Level};
|
||||
|
||||
use crate::config;
|
||||
use crate::logger::inlinify;
|
||||
use crate::migration::MigrationRunner;
|
||||
use crate::params::PG_HBA_ALL_MD5;
|
||||
use crate::pg_helpers::*;
|
||||
|
||||
@@ -491,7 +490,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
"rename_db" => {
|
||||
let new_name = op.new_name.as_ref().unwrap();
|
||||
|
||||
if existing_dbs.contains_key(&op.name) {
|
||||
if existing_dbs.get(&op.name).is_some() {
|
||||
let query: String = format!(
|
||||
"ALTER DATABASE {} RENAME TO {}",
|
||||
op.name.pg_quote(),
|
||||
@@ -775,30 +774,106 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
|
||||
// !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN!
|
||||
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
// Add new migrations in numerical order.
|
||||
let migrations = [
|
||||
include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"),
|
||||
include_str!("./migrations/0002-alter_roles.sql"),
|
||||
include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"),
|
||||
include_str!(
|
||||
"./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
|
||||
),
|
||||
include_str!(
|
||||
"./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
|
||||
),
|
||||
include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
|
||||
include_str!(
|
||||
"./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
|
||||
),
|
||||
include_str!(
|
||||
"./migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql"
|
||||
),
|
||||
"ALTER ROLE neon_superuser BYPASSRLS",
|
||||
r#"
|
||||
DO $$
|
||||
DECLARE
|
||||
role_name text;
|
||||
BEGIN
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
|
||||
END LOOP;
|
||||
|
||||
FOR role_name IN SELECT rolname FROM pg_roles
|
||||
WHERE
|
||||
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
|
||||
END LOOP;
|
||||
END $$;
|
||||
"#,
|
||||
r#"
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
|
||||
END IF;
|
||||
END
|
||||
$$;"#,
|
||||
"GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION",
|
||||
// Don't remove: these are some SQLs that we originally applied in migrations but turned out to execute somewhere else.
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
// Add new migrations below.
|
||||
];
|
||||
|
||||
MigrationRunner::new(client, &migrations).run_migrations()?;
|
||||
let mut func = || {
|
||||
let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
|
||||
client.simple_query(query)?;
|
||||
|
||||
let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
|
||||
client.simple_query(query)?;
|
||||
|
||||
let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
|
||||
client.simple_query(query)?;
|
||||
|
||||
let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
|
||||
client.simple_query(query)?;
|
||||
|
||||
let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
|
||||
client.simple_query(query)?;
|
||||
Ok::<_, anyhow::Error>(())
|
||||
};
|
||||
func().context("handle_migrations prepare")?;
|
||||
|
||||
let query = "SELECT id FROM neon_migration.migration_id";
|
||||
let row = client
|
||||
.query_one(query, &[])
|
||||
.context("handle_migrations get migration_id")?;
|
||||
let mut current_migration: usize = row.get::<&str, i64>("id") as usize;
|
||||
let starting_migration_id = current_migration;
|
||||
|
||||
let query = "BEGIN";
|
||||
client
|
||||
.simple_query(query)
|
||||
.context("handle_migrations begin")?;
|
||||
|
||||
while current_migration < migrations.len() {
|
||||
let migration = &migrations[current_migration];
|
||||
if migration.is_empty() {
|
||||
info!("Skip migration id={}", current_migration);
|
||||
} else {
|
||||
info!("Running migration:\n{}\n", migration);
|
||||
client.simple_query(migration).with_context(|| {
|
||||
format!("handle_migrations current_migration={}", current_migration)
|
||||
})?;
|
||||
}
|
||||
current_migration += 1;
|
||||
}
|
||||
let setval = format!(
|
||||
"UPDATE neon_migration.migration_id SET id={}",
|
||||
migrations.len()
|
||||
);
|
||||
client
|
||||
.simple_query(&setval)
|
||||
.context("handle_migrations update id")?;
|
||||
|
||||
let query = "COMMIT";
|
||||
client
|
||||
.simple_query(query)
|
||||
.context("handle_migrations commit")?;
|
||||
|
||||
info!(
|
||||
"Ran {} migrations",
|
||||
(migrations.len() - starting_migration_id)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use tracing::warn;
|
||||
|
||||
pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
|
||||
|
||||
pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
|
||||
// run `/neonvm/bin/resize-swap --once {size_bytes}`
|
||||
//
|
||||
// Passing '--once' causes resize-swap to delete itself after successful completion, which
|
||||
// means that if compute_ctl restarts later, we won't end up calling 'swapoff' while
|
||||
// postgres is running.
|
||||
//
|
||||
// NOTE: resize-swap is not very clever. If present, --once MUST be the first arg.
|
||||
let child_result = std::process::Command::new("/usr/bin/sudo")
|
||||
.arg(RESIZE_SWAP_BIN)
|
||||
.arg("--once")
|
||||
.arg(size_bytes.to_string())
|
||||
.spawn();
|
||||
|
||||
child_result
|
||||
.context("spawn() failed")
|
||||
.and_then(|mut child| child.wait().context("wait() failed"))
|
||||
.and_then(|status| match status.success() {
|
||||
true => Ok(()),
|
||||
false => {
|
||||
// The command failed. Maybe it was because the resize-swap file doesn't exist?
|
||||
// The --once flag causes it to delete itself on success so we don't disable swap
|
||||
// while postgres is running; maybe this is fine.
|
||||
match Path::new(RESIZE_SWAP_BIN).try_exists() {
|
||||
Err(_) | Ok(true) => Err(anyhow!("process exited with {status}")),
|
||||
// The path doesn't exist; we're actually ok
|
||||
Ok(false) => {
|
||||
warn!("ignoring \"not found\" error from resize-swap to avoid swapoff while compute is running");
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
}
|
||||
})
|
||||
// wrap any prior error with the overall context that we couldn't run the command
|
||||
.with_context(|| {
|
||||
format!("could not run `/usr/bin/sudo {RESIZE_SWAP_BIN} --once {size_bytes}`")
|
||||
})
|
||||
}
|
||||
@@ -6,23 +6,27 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
async-trait.workspace = true
|
||||
camino.workspace = true
|
||||
clap.workspace = true
|
||||
comfy-table.workspace = true
|
||||
futures.workspace = true
|
||||
git-version.workspace = true
|
||||
humantime.workspace = true
|
||||
nix.workspace = true
|
||||
once_cell.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
hyper0.workspace = true
|
||||
postgres.workspace = true
|
||||
hex.workspace = true
|
||||
hyper.workspace = true
|
||||
regex.workspace = true
|
||||
reqwest = { workspace = true, features = ["blocking", "json"] }
|
||||
scopeguard.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde_with.workspace = true
|
||||
tar.workspace = true
|
||||
thiserror.workspace = true
|
||||
toml.workspace = true
|
||||
toml_edit.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-postgres.workspace = true
|
||||
tokio-util.workspace = true
|
||||
@@ -34,7 +38,6 @@ safekeeper_api.workspace = true
|
||||
postgres_connection.workspace = true
|
||||
storage_broker.workspace = true
|
||||
utils.workspace = true
|
||||
whoami.workspace = true
|
||||
|
||||
compute_api.workspace = true
|
||||
workspace_hack.workspace = true
|
||||
|
||||
@@ -36,11 +36,11 @@ use utils::pid_file::{self, PidFileRead};
|
||||
// it's waiting. If the process hasn't started/stopped after 5 seconds,
|
||||
// it prints a notice that it's taking long, but keeps waiting.
|
||||
//
|
||||
const STOP_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
const STOP_RETRIES: u128 = STOP_RETRY_TIMEOUT.as_millis() / RETRY_INTERVAL.as_millis();
|
||||
const RETRY_INTERVAL: Duration = Duration::from_millis(100);
|
||||
const DOT_EVERY_RETRIES: u128 = 10;
|
||||
const NOTICE_AFTER_RETRIES: u128 = 50;
|
||||
const RETRY_UNTIL_SECS: u64 = 10;
|
||||
const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
|
||||
const RETRY_INTERVAL_MILLIS: u64 = 100;
|
||||
const DOT_EVERY_RETRIES: u64 = 10;
|
||||
const NOTICE_AFTER_RETRIES: u64 = 50;
|
||||
|
||||
/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
|
||||
/// it itself.
|
||||
@@ -52,7 +52,6 @@ pub enum InitialPidFile {
|
||||
}
|
||||
|
||||
/// Start a background child process using the parameters given.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn start_process<F, Fut, AI, A, EI>(
|
||||
process_name: &str,
|
||||
datadir: &Path,
|
||||
@@ -60,7 +59,6 @@ pub async fn start_process<F, Fut, AI, A, EI>(
|
||||
args: AI,
|
||||
envs: EI,
|
||||
initial_pid_file: InitialPidFile,
|
||||
retry_timeout: &Duration,
|
||||
process_status_check: F,
|
||||
) -> anyhow::Result<()>
|
||||
where
|
||||
@@ -71,10 +69,6 @@ where
|
||||
// Not generic AsRef<OsStr>, otherwise empty `envs` prevents type inference
|
||||
EI: IntoIterator<Item = (String, String)>,
|
||||
{
|
||||
let retries: u128 = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
|
||||
if !datadir.metadata().context("stat datadir")?.is_dir() {
|
||||
anyhow::bail!("`datadir` must be a directory when calling this function: {datadir:?}");
|
||||
}
|
||||
let log_path = datadir.join(format!("{process_name}.log"));
|
||||
let process_log_file = fs::OpenOptions::new()
|
||||
.create(true)
|
||||
@@ -91,13 +85,7 @@ where
|
||||
let background_command = command
|
||||
.stdout(process_log_file)
|
||||
.stderr(same_file_for_stderr)
|
||||
.args(args)
|
||||
// spawn all child processes in their datadir, useful for all kinds of things,
|
||||
// not least cleaning up child processes e.g. after an unclean exit from the test suite:
|
||||
// ```
|
||||
// lsof -d cwd -a +D Users/cs/src/neon/test_output
|
||||
// ```
|
||||
.current_dir(datadir);
|
||||
.args(args);
|
||||
|
||||
let filled_cmd = fill_env_vars_prefixed_neon(fill_remote_storage_secrets_vars(
|
||||
fill_rust_env_vars(background_command),
|
||||
@@ -133,7 +121,7 @@ where
|
||||
.unwrap();
|
||||
});
|
||||
|
||||
for retries in 0..retries {
|
||||
for retries in 0..RETRIES {
|
||||
match process_started(pid, pid_file_to_check, &process_status_check).await {
|
||||
Ok(true) => {
|
||||
println!("\n{process_name} started and passed status check, pid: {pid}");
|
||||
@@ -151,7 +139,7 @@ where
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
}
|
||||
tokio::time::sleep(RETRY_INTERVAL).await;
|
||||
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
|
||||
}
|
||||
Err(e) => {
|
||||
println!("error starting process {process_name:?}: {e:#}");
|
||||
@@ -160,10 +148,9 @@ where
|
||||
}
|
||||
}
|
||||
println!();
|
||||
anyhow::bail!(format!(
|
||||
"{} did not start+pass status checks within {:?} seconds",
|
||||
process_name, retry_timeout
|
||||
));
|
||||
anyhow::bail!(
|
||||
"{process_name} did not start+pass status checks within {RETRY_UNTIL_SECS} seconds"
|
||||
);
|
||||
}
|
||||
|
||||
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
|
||||
@@ -219,7 +206,7 @@ pub fn stop_process(
|
||||
}
|
||||
|
||||
pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
|
||||
for retries in 0..STOP_RETRIES {
|
||||
for retries in 0..RETRIES {
|
||||
match process_has_stopped(pid) {
|
||||
Ok(true) => {
|
||||
println!("\n{process_name} stopped");
|
||||
@@ -235,7 +222,7 @@ pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
}
|
||||
thread::sleep(RETRY_INTERVAL);
|
||||
thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{process_name} with pid {pid} failed to stop: {e:#}");
|
||||
@@ -244,10 +231,7 @@ pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
|
||||
}
|
||||
}
|
||||
println!();
|
||||
anyhow::bail!(format!(
|
||||
"{} with pid {} did not stop in {:?} seconds",
|
||||
process_name, pid, STOP_RETRY_TIMEOUT
|
||||
));
|
||||
anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
|
||||
}
|
||||
|
||||
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
|
||||
@@ -289,7 +273,7 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
|
||||
fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command {
|
||||
for (var, val) in std::env::vars() {
|
||||
if var.starts_with("NEON_") {
|
||||
if var.starts_with("NEON_PAGESERVER_") {
|
||||
cmd = cmd.env(var, val);
|
||||
}
|
||||
}
|
||||
@@ -379,7 +363,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
|
||||
fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
|
||||
match kill(pid, None) {
|
||||
// Process exists, keep waiting
|
||||
Ok(_) => Ok(false),
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,94 +0,0 @@
|
||||
//! Branch mappings for convenience
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use utils::id::{TenantId, TenantTimelineId, TimelineId};
|
||||
|
||||
/// Keep human-readable aliases in memory (and persist them to config XXX), to hide tenant/timeline hex strings from the user.
|
||||
#[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]
|
||||
#[serde(default, deny_unknown_fields)]
|
||||
pub struct BranchMappings {
|
||||
/// Default tenant ID to use with the 'neon_local' command line utility, when
|
||||
/// --tenant_id is not explicitly specified. This comes from the branches.
|
||||
pub default_tenant_id: Option<TenantId>,
|
||||
|
||||
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
|
||||
// but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
|
||||
// https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
|
||||
pub mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
}
|
||||
|
||||
impl BranchMappings {
|
||||
pub fn register_branch_mapping(
|
||||
&mut self,
|
||||
branch_name: String,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> anyhow::Result<()> {
|
||||
let existing_values = self.mappings.entry(branch_name.clone()).or_default();
|
||||
|
||||
let existing_ids = existing_values
|
||||
.iter()
|
||||
.find(|(existing_tenant_id, _)| existing_tenant_id == &tenant_id);
|
||||
|
||||
if let Some((_, old_timeline_id)) = existing_ids {
|
||||
if old_timeline_id == &timeline_id {
|
||||
Ok(())
|
||||
} else {
|
||||
bail!("branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}");
|
||||
}
|
||||
} else {
|
||||
existing_values.push((tenant_id, timeline_id));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_branch_timeline_id(
|
||||
&self,
|
||||
branch_name: &str,
|
||||
tenant_id: TenantId,
|
||||
) -> Option<TimelineId> {
|
||||
// If it looks like a timeline ID, return it as it is
|
||||
if let Ok(timeline_id) = branch_name.parse::<TimelineId>() {
|
||||
return Some(timeline_id);
|
||||
}
|
||||
|
||||
self.mappings
|
||||
.get(branch_name)?
|
||||
.iter()
|
||||
.find(|(mapped_tenant_id, _)| mapped_tenant_id == &tenant_id)
|
||||
.map(|&(_, timeline_id)| timeline_id)
|
||||
.map(TimelineId::from)
|
||||
}
|
||||
|
||||
pub fn timeline_name_mappings(&self) -> HashMap<TenantTimelineId, String> {
|
||||
self.mappings
|
||||
.iter()
|
||||
.flat_map(|(name, tenant_timelines)| {
|
||||
tenant_timelines.iter().map(|&(tenant_id, timeline_id)| {
|
||||
(TenantTimelineId::new(tenant_id, timeline_id), name.clone())
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn persist(&self, path: &Path) -> anyhow::Result<()> {
|
||||
let content = &toml::to_string_pretty(self)?;
|
||||
fs::write(path, content).with_context(|| {
|
||||
format!(
|
||||
"Failed to write branch information into path '{}'",
|
||||
path.display()
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn load(path: &Path) -> anyhow::Result<BranchMappings> {
|
||||
let branches_file_contents = fs::read_to_string(path)?;
|
||||
Ok(toml::from_str(branches_file_contents.as_str())?)
|
||||
}
|
||||
}
|
||||
@@ -1,22 +1,17 @@
|
||||
//! Code to manage the storage broker
|
||||
//!
|
||||
//! In the local test environment, the storage broker stores its data directly in
|
||||
//! In the local test environment, the data for each safekeeper is stored in
|
||||
//!
|
||||
//! ```text
|
||||
//! .neon
|
||||
//! .neon/safekeepers/<safekeeper id>
|
||||
//! ```
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
|
||||
use camino::Utf8PathBuf;
|
||||
|
||||
use crate::{background_process, local_env};
|
||||
|
||||
pub async fn start_broker_process(
|
||||
env: &local_env::LocalEnv,
|
||||
retry_timeout: &Duration,
|
||||
) -> anyhow::Result<()> {
|
||||
pub async fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
let broker = &env.broker;
|
||||
let listen_addr = &broker.listen_addr;
|
||||
|
||||
@@ -32,7 +27,6 @@ pub async fn start_broker_process(
|
||||
args,
|
||||
[],
|
||||
background_process::InitialPidFile::Create(storage_broker_pid_file_path(env)),
|
||||
retry_timeout,
|
||||
|| async {
|
||||
let url = broker.client_url();
|
||||
let status_url = url.join("status").with_context(|| {
|
||||
|
||||
@@ -499,23 +499,6 @@ impl Endpoint {
|
||||
.join(",")
|
||||
}
|
||||
|
||||
/// Map safekeepers ids to the actual connection strings.
|
||||
fn build_safekeepers_connstrs(&self, sk_ids: Vec<NodeId>) -> Result<Vec<String>> {
|
||||
let mut safekeeper_connstrings = Vec::new();
|
||||
if self.mode == ComputeMode::Primary {
|
||||
for sk_id in sk_ids {
|
||||
let sk = self
|
||||
.env
|
||||
.safekeepers
|
||||
.iter()
|
||||
.find(|node| node.id == sk_id)
|
||||
.ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
|
||||
safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.get_compute_port()));
|
||||
}
|
||||
}
|
||||
Ok(safekeeper_connstrings)
|
||||
}
|
||||
|
||||
pub async fn start(
|
||||
&self,
|
||||
auth_token: &Option<String>,
|
||||
@@ -540,7 +523,18 @@ impl Endpoint {
|
||||
let pageserver_connstring = Self::build_pageserver_connstr(&pageservers);
|
||||
assert!(!pageserver_connstring.is_empty());
|
||||
|
||||
let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
|
||||
let mut safekeeper_connstrings = Vec::new();
|
||||
if self.mode == ComputeMode::Primary {
|
||||
for sk_id in safekeepers {
|
||||
let sk = self
|
||||
.env
|
||||
.safekeepers
|
||||
.iter()
|
||||
.find(|node| node.id == sk_id)
|
||||
.ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
|
||||
safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.get_compute_port()));
|
||||
}
|
||||
}
|
||||
|
||||
// check for file remote_extensions_spec.json
|
||||
// if it is present, read it and pass to compute_ctl
|
||||
@@ -560,8 +554,6 @@ impl Endpoint {
|
||||
format_version: 1.0,
|
||||
operation_uuid: None,
|
||||
features: self.features.clone(),
|
||||
swap_size_bytes: None,
|
||||
disk_quota_bytes: None,
|
||||
cluster: Cluster {
|
||||
cluster_id: None, // project ID: not used
|
||||
name: None, // project name: not used
|
||||
@@ -599,7 +591,7 @@ impl Endpoint {
|
||||
remote_extensions,
|
||||
pgbouncer_settings: None,
|
||||
shard_stripe_size: Some(shard_stripe_size),
|
||||
local_proxy_config: None,
|
||||
primary_is_running: None,
|
||||
};
|
||||
let spec_path = self.endpoint_path().join("spec.json");
|
||||
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
|
||||
@@ -704,7 +696,7 @@ impl Endpoint {
|
||||
}
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(ATTEMPT_INTERVAL).await;
|
||||
std::thread::sleep(ATTEMPT_INTERVAL);
|
||||
}
|
||||
|
||||
// disarm the scopeguard, let the child outlive this function (and neon_local invoction)
|
||||
@@ -748,7 +740,6 @@ impl Endpoint {
|
||||
&self,
|
||||
mut pageservers: Vec<(Host, u16)>,
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
safekeepers: Option<Vec<NodeId>>,
|
||||
) -> Result<()> {
|
||||
let mut spec: ComputeSpec = {
|
||||
let spec_path = self.endpoint_path().join("spec.json");
|
||||
@@ -783,12 +774,6 @@ impl Endpoint {
|
||||
spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
|
||||
}
|
||||
|
||||
// If safekeepers are not specified, don't change them.
|
||||
if let Some(safekeepers) = safekeepers {
|
||||
let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
|
||||
spec.safekeeper_connstrings = safekeeper_connstrings;
|
||||
}
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.build()
|
||||
@@ -826,12 +811,11 @@ impl Endpoint {
|
||||
// cleanup work to do after postgres stops, like syncing safekeepers,
|
||||
// etc.
|
||||
//
|
||||
// If destroying or stop mode is immediate, send it SIGTERM before
|
||||
// waiting. Sometimes we do *not* want this cleanup: tests intentionally
|
||||
// do stop when majority of safekeepers is down, so sync-safekeepers
|
||||
// would hang otherwise. This could be a separate flag though.
|
||||
let send_sigterm = destroy || mode == "immediate";
|
||||
self.wait_for_compute_ctl_to_exit(send_sigterm)?;
|
||||
// If destroying, send it SIGTERM before waiting. Sometimes we do *not*
|
||||
// want this cleanup: tests intentionally do stop when majority of
|
||||
// safekeepers is down, so sync-safekeepers would hang otherwise. This
|
||||
// could be a separate flag though.
|
||||
self.wait_for_compute_ctl_to_exit(destroy)?;
|
||||
if destroy {
|
||||
println!(
|
||||
"Destroying postgres data directory '{}'",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//! Now it also provides init method which acts like a stub for proper installation
|
||||
//! script which will use local paths.
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use anyhow::{bail, ensure, Context};
|
||||
|
||||
use clap::ValueEnum;
|
||||
use postgres_backend::AuthType;
|
||||
@@ -17,17 +17,14 @@ use std::net::Ipv4Addr;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use std::time::Duration;
|
||||
use utils::{
|
||||
auth::{encode_from_key_file, Claims},
|
||||
id::{NodeId, TenantId, TenantTimelineId, TimelineId},
|
||||
};
|
||||
|
||||
use crate::pageserver::PageServerNode;
|
||||
use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR;
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
|
||||
pub const DEFAULT_PG_VERSION: u32 = 16;
|
||||
pub const DEFAULT_PG_VERSION: u32 = 15;
|
||||
|
||||
//
|
||||
// This data structures represents neon_local CLI config
|
||||
@@ -36,107 +33,63 @@ pub const DEFAULT_PG_VERSION: u32 = 16;
|
||||
// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
|
||||
// an example.
|
||||
//
|
||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
pub struct LocalEnv {
|
||||
// Base directory for all the nodes (the pageserver, safekeepers and
|
||||
// compute endpoints).
|
||||
//
|
||||
// This is not stored in the config file. Rather, this is the path where the
|
||||
// config file itself is. It is read from the NEON_REPO_DIR env variable which
|
||||
// must be an absolute path. If the env var is not set, $PWD/.neon is used.
|
||||
// config file itself is. It is read from the NEON_REPO_DIR env variable or
|
||||
// '.neon' if not given.
|
||||
#[serde(skip)]
|
||||
pub base_data_dir: PathBuf,
|
||||
|
||||
// Path to postgres distribution. It's expected that "bin", "include",
|
||||
// "lib", "share" from postgres distribution are there. If at some point
|
||||
// in time we will be able to run against vanilla postgres we may split that
|
||||
// to four separate paths and match OS-specific installation layout.
|
||||
#[serde(default)]
|
||||
pub pg_distrib_dir: PathBuf,
|
||||
|
||||
// Path to pageserver binary.
|
||||
#[serde(default)]
|
||||
pub neon_distrib_dir: PathBuf,
|
||||
|
||||
// Default tenant ID to use with the 'neon_local' command line utility, when
|
||||
// --tenant_id is not explicitly specified.
|
||||
#[serde(default)]
|
||||
pub default_tenant_id: Option<TenantId>,
|
||||
|
||||
// used to issue tokens during e.g pg start
|
||||
#[serde(default)]
|
||||
pub private_key_path: PathBuf,
|
||||
|
||||
pub broker: NeonBroker,
|
||||
|
||||
// Configuration for the storage controller (1 per neon_local environment)
|
||||
pub storage_controller: NeonStorageControllerConf,
|
||||
|
||||
/// This Vec must always contain at least one pageserver
|
||||
/// Populdated by [`Self::load_config`] from the individual `pageserver.toml`s.
|
||||
/// NB: not used anymore except for informing users that they need to change their `.neon/config`.
|
||||
pub pageservers: Vec<PageServerConf>,
|
||||
|
||||
#[serde(default)]
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
|
||||
// Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will
|
||||
// be propagated into each pageserver's configuration.
|
||||
#[serde(default)]
|
||||
pub control_plane_api: Option<Url>,
|
||||
|
||||
// Control plane upcall API for storage controller. If set, this will be propagated into the
|
||||
// storage controller's configuration.
|
||||
#[serde(default)]
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
|
||||
/// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
|
||||
#[serde(default)]
|
||||
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
|
||||
// but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
|
||||
// https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
|
||||
pub branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
}
|
||||
|
||||
/// On-disk state stored in `.neon/config`.
|
||||
#[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]
|
||||
#[serde(default, deny_unknown_fields)]
|
||||
pub struct OnDiskConfig {
|
||||
pub pg_distrib_dir: PathBuf,
|
||||
pub neon_distrib_dir: PathBuf,
|
||||
pub default_tenant_id: Option<TenantId>,
|
||||
pub private_key_path: PathBuf,
|
||||
pub broker: NeonBroker,
|
||||
pub storage_controller: NeonStorageControllerConf,
|
||||
#[serde(
|
||||
skip_serializing,
|
||||
deserialize_with = "fail_if_pageservers_field_specified"
|
||||
)]
|
||||
pub pageservers: Vec<PageServerConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
}
|
||||
|
||||
fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
Err(serde::de::Error::custom(
|
||||
"The 'pageservers' field is no longer used; pageserver.toml is now authoritative; \
|
||||
Please remove the `pageservers` from your .neon/config.",
|
||||
))
|
||||
}
|
||||
|
||||
/// The description of the neon_local env to be initialized by `neon_local init --config`.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct NeonLocalInitConf {
|
||||
// TODO: do we need this? Seems unused
|
||||
pub pg_distrib_dir: Option<PathBuf>,
|
||||
// TODO: do we need this? Seems unused
|
||||
pub neon_distrib_dir: Option<PathBuf>,
|
||||
pub default_tenant_id: TenantId,
|
||||
pub broker: NeonBroker,
|
||||
pub storage_controller: Option<NeonStorageControllerConf>,
|
||||
pub pageservers: Vec<NeonLocalInitPageserverConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Option<Url>>,
|
||||
pub control_plane_compute_hook_api: Option<Option<Url>>,
|
||||
}
|
||||
|
||||
/// Broker config for cluster internal communication.
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
@@ -145,59 +98,6 @@ pub struct NeonBroker {
|
||||
pub listen_addr: SocketAddr,
|
||||
}
|
||||
|
||||
/// Broker config for cluster internal communication.
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct NeonStorageControllerConf {
|
||||
/// Heartbeat timeout before marking a node offline
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub max_offline: Duration,
|
||||
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub max_warming_up: Duration,
|
||||
|
||||
pub start_as_candidate: bool,
|
||||
|
||||
/// Database url used when running multiple storage controller instances
|
||||
pub database_url: Option<SocketAddr>,
|
||||
|
||||
/// Threshold for auto-splitting a tenant into shards
|
||||
pub split_threshold: Option<u64>,
|
||||
|
||||
pub max_secondary_lag_bytes: Option<u64>,
|
||||
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub heartbeat_interval: Duration,
|
||||
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub long_reconcile_threshold: Option<Duration>,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerConf {
|
||||
// Use a shorter pageserver unavailability interval than the default to speed up tests.
|
||||
const DEFAULT_MAX_OFFLINE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);
|
||||
|
||||
const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);
|
||||
|
||||
// Very tight heartbeat interval to speed up tests
|
||||
const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(100);
|
||||
}
|
||||
|
||||
impl Default for NeonStorageControllerConf {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_offline: Self::DEFAULT_MAX_OFFLINE_INTERVAL,
|
||||
max_warming_up: Self::DEFAULT_MAX_WARMING_UP_INTERVAL,
|
||||
start_as_candidate: false,
|
||||
database_url: None,
|
||||
split_threshold: None,
|
||||
max_secondary_lag_bytes: None,
|
||||
heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
|
||||
long_reconcile_threshold: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Dummy Default impl to satisfy Deserialize derive.
|
||||
impl Default for NeonBroker {
|
||||
fn default() -> Self {
|
||||
@@ -213,18 +113,22 @@ impl NeonBroker {
|
||||
}
|
||||
}
|
||||
|
||||
// neon_local needs to know this subset of pageserver configuration.
|
||||
// For legacy reasons, this information is duplicated from `pageserver.toml` into `.neon/config`.
|
||||
// It can get stale if `pageserver.toml` is changed.
|
||||
// TODO(christian): don't store this at all in `.neon/config`, always load it from `pageserver.toml`
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default, deny_unknown_fields)]
|
||||
pub struct PageServerConf {
|
||||
// node id
|
||||
pub id: NodeId,
|
||||
|
||||
// Pageserver connection settings
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
|
||||
// auth type used for the PG and HTTP ports
|
||||
pub pg_auth_type: AuthType,
|
||||
pub http_auth_type: AuthType,
|
||||
|
||||
pub(crate) virtual_file_io_engine: Option<String>,
|
||||
pub(crate) get_vectored_impl: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for PageServerConf {
|
||||
@@ -235,40 +139,8 @@ impl Default for PageServerConf {
|
||||
listen_http_addr: String::new(),
|
||||
pg_auth_type: AuthType::Trust,
|
||||
http_auth_type: AuthType::Trust,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The toml that can be passed to `neon_local init --config`.
|
||||
/// This is a subset of the `pageserver.toml` configuration.
|
||||
// TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
|
||||
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
|
||||
pub struct NeonLocalInitPageserverConf {
|
||||
pub id: NodeId,
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
pub pg_auth_type: AuthType,
|
||||
pub http_auth_type: AuthType,
|
||||
#[serde(flatten)]
|
||||
pub other: HashMap<String, toml::Value>,
|
||||
}
|
||||
|
||||
impl From<&NeonLocalInitPageserverConf> for PageServerConf {
|
||||
fn from(conf: &NeonLocalInitPageserverConf) -> Self {
|
||||
let NeonLocalInitPageserverConf {
|
||||
id,
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
other: _,
|
||||
} = conf;
|
||||
Self {
|
||||
id: *id,
|
||||
listen_pg_addr: listen_pg_addr.clone(),
|
||||
listen_http_addr: listen_http_addr.clone(),
|
||||
pg_auth_type: *pg_auth_type,
|
||||
http_auth_type: *http_auth_type,
|
||||
virtual_file_io_engine: None,
|
||||
get_vectored_impl: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -346,21 +218,16 @@ impl LocalEnv {
|
||||
|
||||
#[allow(clippy::manual_range_patterns)]
|
||||
match pg_version {
|
||||
14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
|
||||
14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pg_dir(&self, pg_version: u32, dir_name: &str) -> anyhow::Result<PathBuf> {
|
||||
Ok(self.pg_distrib_dir(pg_version)?.join(dir_name))
|
||||
}
|
||||
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
self.pg_dir(pg_version, "bin")
|
||||
Ok(self.pg_distrib_dir(pg_version)?.join("bin"))
|
||||
}
|
||||
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
self.pg_dir(pg_version, "lib")
|
||||
Ok(self.pg_distrib_dir(pg_version)?.join("lib"))
|
||||
}
|
||||
|
||||
pub fn pageserver_bin(&self) -> PathBuf {
|
||||
@@ -410,36 +277,6 @@ impl LocalEnv {
|
||||
}
|
||||
}
|
||||
|
||||
/// Inspect the base data directory and extract the instance id and instance directory path
|
||||
/// for all storage controller instances
|
||||
pub async fn storage_controller_instances(&self) -> std::io::Result<Vec<(u8, PathBuf)>> {
|
||||
let mut instances = Vec::default();
|
||||
|
||||
let dir = std::fs::read_dir(self.base_data_dir.clone())?;
|
||||
for dentry in dir {
|
||||
let dentry = dentry?;
|
||||
let is_dir = dentry.metadata()?.is_dir();
|
||||
let filename = dentry.file_name().into_string().unwrap();
|
||||
let parsed_instance_id = match filename.strip_prefix("storage_controller_") {
|
||||
Some(suffix) => suffix.parse::<u8>().ok(),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let is_instance_dir = is_dir && parsed_instance_id.is_some();
|
||||
|
||||
if !is_instance_dir {
|
||||
continue;
|
||||
}
|
||||
|
||||
instances.push((
|
||||
parsed_instance_id.expect("Checked previously"),
|
||||
dentry.path(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(instances)
|
||||
}
|
||||
|
||||
pub fn register_branch_mapping(
|
||||
&mut self,
|
||||
branch_name: String,
|
||||
@@ -491,8 +328,44 @@ impl LocalEnv {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Construct `Self` from on-disk state.
|
||||
pub fn load_config(repopath: &Path) -> anyhow::Result<Self> {
|
||||
/// Create a LocalEnv from a config file.
|
||||
///
|
||||
/// Unlike 'load_config', this function fills in any defaults that are missing
|
||||
/// from the config file.
|
||||
pub fn parse_config(toml: &str) -> anyhow::Result<Self> {
|
||||
let mut env: LocalEnv = toml::from_str(toml)?;
|
||||
|
||||
// Find postgres binaries.
|
||||
// Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
|
||||
// Note that later in the code we assume, that distrib dirs follow the same pattern
|
||||
// for all postgres versions.
|
||||
if env.pg_distrib_dir == Path::new("") {
|
||||
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
|
||||
env.pg_distrib_dir = postgres_bin.into();
|
||||
} else {
|
||||
let cwd = env::current_dir()?;
|
||||
env.pg_distrib_dir = cwd.join("pg_install")
|
||||
}
|
||||
}
|
||||
|
||||
// Find neon binaries.
|
||||
if env.neon_distrib_dir == Path::new("") {
|
||||
env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
|
||||
}
|
||||
|
||||
if env.pageservers.is_empty() {
|
||||
anyhow::bail!("Configuration must contain at least one pageserver");
|
||||
}
|
||||
|
||||
env.base_data_dir = base_path();
|
||||
|
||||
Ok(env)
|
||||
}
|
||||
|
||||
/// Locate and load config
|
||||
pub fn load_config() -> anyhow::Result<Self> {
|
||||
let repopath = base_path();
|
||||
|
||||
if !repopath.exists() {
|
||||
bail!(
|
||||
"Neon config is not found in {}. You need to run 'neon_local init' first",
|
||||
@@ -503,140 +376,38 @@ impl LocalEnv {
|
||||
// TODO: check that it looks like a neon repository
|
||||
|
||||
// load and parse file
|
||||
let config_file_contents = fs::read_to_string(repopath.join("config"))?;
|
||||
let on_disk_config: OnDiskConfig = toml::from_str(config_file_contents.as_str())?;
|
||||
let mut env = {
|
||||
let OnDiskConfig {
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id,
|
||||
private_key_path,
|
||||
broker,
|
||||
storage_controller,
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
branch_name_mappings,
|
||||
} = on_disk_config;
|
||||
LocalEnv {
|
||||
base_data_dir: repopath.to_owned(),
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id,
|
||||
private_key_path,
|
||||
broker,
|
||||
storage_controller,
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
branch_name_mappings,
|
||||
}
|
||||
};
|
||||
let config = fs::read_to_string(repopath.join("config"))?;
|
||||
let mut env: LocalEnv = toml::from_str(config.as_str())?;
|
||||
|
||||
// The source of truth for pageserver configuration is the pageserver.toml.
|
||||
assert!(
|
||||
env.pageservers.is_empty(),
|
||||
"we ensure this during deserialization"
|
||||
);
|
||||
env.pageservers = {
|
||||
let iter = std::fs::read_dir(repopath).context("open dir")?;
|
||||
let mut pageservers = Vec::new();
|
||||
for res in iter {
|
||||
let dentry = res?;
|
||||
const PREFIX: &str = "pageserver_";
|
||||
let dentry_name = dentry
|
||||
.file_name()
|
||||
.into_string()
|
||||
.ok()
|
||||
.with_context(|| format!("non-utf8 dentry: {:?}", dentry.path()))
|
||||
.unwrap();
|
||||
if !dentry_name.starts_with(PREFIX) {
|
||||
continue;
|
||||
}
|
||||
if !dentry.file_type().context("determine file type")?.is_dir() {
|
||||
anyhow::bail!("expected a directory, got {:?}", dentry.path());
|
||||
}
|
||||
let id = dentry_name[PREFIX.len()..]
|
||||
.parse::<NodeId>()
|
||||
.with_context(|| format!("parse id from {:?}", dentry.path()))?;
|
||||
// TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
|
||||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
// (allow unknown fields, unlike PageServerConf)
|
||||
struct PageserverConfigTomlSubset {
|
||||
listen_pg_addr: String,
|
||||
listen_http_addr: String,
|
||||
pg_auth_type: AuthType,
|
||||
http_auth_type: AuthType,
|
||||
}
|
||||
let config_toml_path = dentry.path().join("pageserver.toml");
|
||||
let config_toml: PageserverConfigTomlSubset = toml_edit::de::from_str(
|
||||
&std::fs::read_to_string(&config_toml_path)
|
||||
.with_context(|| format!("read {:?}", config_toml_path))?,
|
||||
)
|
||||
.context("parse pageserver.toml")?;
|
||||
let identity_toml_path = dentry.path().join("identity.toml");
|
||||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
struct IdentityTomlSubset {
|
||||
id: NodeId,
|
||||
}
|
||||
let identity_toml: IdentityTomlSubset = toml_edit::de::from_str(
|
||||
&std::fs::read_to_string(&identity_toml_path)
|
||||
.with_context(|| format!("read {:?}", identity_toml_path))?,
|
||||
)
|
||||
.context("parse identity.toml")?;
|
||||
let PageserverConfigTomlSubset {
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
} = config_toml;
|
||||
let IdentityTomlSubset {
|
||||
id: identity_toml_id,
|
||||
} = identity_toml;
|
||||
let conf = PageServerConf {
|
||||
id: {
|
||||
anyhow::ensure!(
|
||||
identity_toml_id == id,
|
||||
"id mismatch: identity.toml:id={identity_toml_id} pageserver_(.*) id={id}",
|
||||
);
|
||||
id
|
||||
},
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
};
|
||||
pageservers.push(conf);
|
||||
}
|
||||
pageservers
|
||||
};
|
||||
env.base_data_dir = repopath;
|
||||
|
||||
Ok(env)
|
||||
}
|
||||
|
||||
pub fn persist_config(&self) -> anyhow::Result<()> {
|
||||
Self::persist_config_impl(
|
||||
&self.base_data_dir,
|
||||
&OnDiskConfig {
|
||||
pg_distrib_dir: self.pg_distrib_dir.clone(),
|
||||
neon_distrib_dir: self.neon_distrib_dir.clone(),
|
||||
default_tenant_id: self.default_tenant_id,
|
||||
private_key_path: self.private_key_path.clone(),
|
||||
broker: self.broker.clone(),
|
||||
storage_controller: self.storage_controller.clone(),
|
||||
pageservers: vec![], // it's skip_serializing anyway
|
||||
safekeepers: self.safekeepers.clone(),
|
||||
control_plane_api: self.control_plane_api.clone(),
|
||||
control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
|
||||
branch_name_mappings: self.branch_name_mappings.clone(),
|
||||
},
|
||||
)
|
||||
}
|
||||
pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
|
||||
// Currently, the user first passes a config file with 'neon_local init --config=<path>'
|
||||
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
|
||||
// to .neon/config. TODO: We lose any formatting and comments along the way, which is
|
||||
// a bit sad.
|
||||
let mut conf_content = r#"# This file describes a local deployment of the page server
|
||||
# and safekeeeper node. It is read by the 'neon_local' command-line
|
||||
# utility.
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
// Convert the LocalEnv to a toml file.
|
||||
//
|
||||
// This could be as simple as this:
|
||||
//
|
||||
// conf_content += &toml::to_string_pretty(env)?;
|
||||
//
|
||||
// But it results in a "values must be emitted before tables". I'm not sure
|
||||
// why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
|
||||
// Maybe rust reorders the fields to squeeze avoid padding or something?
|
||||
// In any case, converting to toml::Value first, and serializing that, works.
|
||||
// See https://github.com/alexcrichton/toml-rs/issues/142
|
||||
conf_content += &toml::to_string_pretty(&toml::Value::try_from(self)?)?;
|
||||
|
||||
pub fn persist_config_impl(base_path: &Path, config: &OnDiskConfig) -> anyhow::Result<()> {
|
||||
let conf_content = &toml::to_string_pretty(config)?;
|
||||
let target_config_path = base_path.join("config");
|
||||
fs::write(&target_config_path, conf_content).with_context(|| {
|
||||
format!(
|
||||
@@ -661,13 +432,17 @@ impl LocalEnv {
|
||||
}
|
||||
}
|
||||
|
||||
/// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
|
||||
pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
|
||||
let base_path = base_path();
|
||||
assert_ne!(base_path, Path::new(""));
|
||||
let base_path = &base_path;
|
||||
//
|
||||
// Initialize a new Neon repository
|
||||
//
|
||||
pub fn init(&mut self, pg_version: u32, force: &InitForceMode) -> anyhow::Result<()> {
|
||||
// check if config already exists
|
||||
let base_path = &self.base_data_dir;
|
||||
ensure!(
|
||||
base_path != Path::new(""),
|
||||
"repository base path is missing"
|
||||
);
|
||||
|
||||
// create base_path dir
|
||||
if base_path.exists() {
|
||||
match force {
|
||||
InitForceMode::MustNotExist => {
|
||||
@@ -699,115 +474,74 @@ impl LocalEnv {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
|
||||
bail!(
|
||||
"Can't find postgres binary at {}",
|
||||
self.pg_bin_dir(pg_version)?.display()
|
||||
);
|
||||
}
|
||||
for binary in ["pageserver", "safekeeper"] {
|
||||
if !self.neon_distrib_dir.join(binary).exists() {
|
||||
bail!(
|
||||
"Can't find binary '{binary}' in neon distrib dir '{}'",
|
||||
self.neon_distrib_dir.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if !base_path.exists() {
|
||||
fs::create_dir(base_path)?;
|
||||
}
|
||||
|
||||
let NeonLocalInitConf {
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id,
|
||||
broker,
|
||||
storage_controller,
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
} = conf;
|
||||
|
||||
// Find postgres binaries.
|
||||
// Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
|
||||
// Note that later in the code we assume, that distrib dirs follow the same pattern
|
||||
// for all postgres versions.
|
||||
let pg_distrib_dir = pg_distrib_dir.unwrap_or_else(|| {
|
||||
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
|
||||
postgres_bin.into()
|
||||
} else {
|
||||
let cwd = env::current_dir().unwrap();
|
||||
cwd.join("pg_install")
|
||||
}
|
||||
});
|
||||
|
||||
// Find neon binaries.
|
||||
let neon_distrib_dir = neon_distrib_dir
|
||||
.unwrap_or_else(|| env::current_exe().unwrap().parent().unwrap().to_owned());
|
||||
|
||||
// Generate keypair for JWT.
|
||||
//
|
||||
// The keypair is only needed if authentication is enabled in any of the
|
||||
// components. For convenience, we generate the keypair even if authentication
|
||||
// is not enabled, so that you can easily enable it after the initialization
|
||||
// step.
|
||||
generate_auth_keys(
|
||||
base_path.join("auth_private_key.pem").as_path(),
|
||||
base_path.join("auth_public_key.pem").as_path(),
|
||||
)
|
||||
.context("generate auth keys")?;
|
||||
let private_key_path = PathBuf::from("auth_private_key.pem");
|
||||
|
||||
// create the runtime type because the remaining initialization code below needs
|
||||
// a LocalEnv instance op operation
|
||||
// TODO: refactor to avoid this, LocalEnv should only be constructed from on-disk state
|
||||
let env = LocalEnv {
|
||||
base_data_dir: base_path.clone(),
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id: Some(default_tenant_id),
|
||||
private_key_path,
|
||||
broker,
|
||||
storage_controller: storage_controller.unwrap_or_default(),
|
||||
pageservers: pageservers.iter().map(Into::into).collect(),
|
||||
safekeepers,
|
||||
control_plane_api: control_plane_api.unwrap_or_default(),
|
||||
control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
|
||||
branch_name_mappings: Default::default(),
|
||||
};
|
||||
|
||||
// create endpoints dir
|
||||
fs::create_dir_all(env.endpoints_path())?;
|
||||
|
||||
// create safekeeper dirs
|
||||
for safekeeper in &env.safekeepers {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
|
||||
// step. However, if the key generation fails, we treat it as non-fatal if
|
||||
// authentication was not enabled.
|
||||
if self.private_key_path == PathBuf::new() {
|
||||
match generate_auth_keys(
|
||||
base_path.join("auth_private_key.pem").as_path(),
|
||||
base_path.join("auth_public_key.pem").as_path(),
|
||||
) {
|
||||
Ok(()) => {
|
||||
self.private_key_path = PathBuf::from("auth_private_key.pem");
|
||||
}
|
||||
Err(e) => {
|
||||
if !self.auth_keys_needed() {
|
||||
eprintln!("Could not generate keypair for JWT authentication: {e}");
|
||||
eprintln!("Continuing anyway because authentication was not enabled");
|
||||
self.private_key_path = PathBuf::from("auth_private_key.pem");
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// initialize pageserver state
|
||||
for (i, ps) in pageservers.into_iter().enumerate() {
|
||||
let runtime_ps = &env.pageservers[i];
|
||||
assert_eq!(&PageServerConf::from(&ps), runtime_ps);
|
||||
fs::create_dir(env.pageserver_data_dir(ps.id))?;
|
||||
PageServerNode::from_env(&env, runtime_ps)
|
||||
.initialize(ps)
|
||||
.context("pageserver init failed")?;
|
||||
fs::create_dir_all(self.endpoints_path())?;
|
||||
|
||||
for safekeeper in &self.safekeepers {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
|
||||
}
|
||||
|
||||
// setup remote remote location for default LocalFs remote storage
|
||||
std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
|
||||
self.persist_config(base_path)
|
||||
}
|
||||
|
||||
env.persist_config()
|
||||
fn auth_keys_needed(&self) -> bool {
|
||||
self.pageservers.iter().any(|ps| {
|
||||
ps.pg_auth_type == AuthType::NeonJWT || ps.http_auth_type == AuthType::NeonJWT
|
||||
}) || self.safekeepers.iter().any(|sk| sk.auth_enabled)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn base_path() -> PathBuf {
|
||||
let path = match std::env::var_os("NEON_REPO_DIR") {
|
||||
Some(val) => {
|
||||
let path = PathBuf::from(val);
|
||||
if !path.is_absolute() {
|
||||
// repeat the env var in the error because our default is always absolute
|
||||
panic!("NEON_REPO_DIR must be an absolute path, got {path:?}");
|
||||
}
|
||||
path
|
||||
}
|
||||
None => {
|
||||
let pwd = std::env::current_dir()
|
||||
// technically this can fail but it's quite unlikeley
|
||||
.expect("determine current directory");
|
||||
let pwd_abs = pwd.canonicalize().expect("canonicalize current directory");
|
||||
pwd_abs.join(".neon")
|
||||
}
|
||||
};
|
||||
assert!(path.is_absolute());
|
||||
path
|
||||
fn base_path() -> PathBuf {
|
||||
match std::env::var_os("NEON_REPO_DIR") {
|
||||
Some(val) => PathBuf::from(val),
|
||||
None => PathBuf::from(".neon"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a public/private key pair for JWT authentication
|
||||
@@ -846,3 +580,31 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn simple_conf_parsing() {
|
||||
let simple_conf_toml = include_str!("../simple.conf");
|
||||
let simple_conf_parse_result = LocalEnv::parse_config(simple_conf_toml);
|
||||
assert!(
|
||||
simple_conf_parse_result.is_ok(),
|
||||
"failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
|
||||
);
|
||||
|
||||
let string_to_replace = "listen_addr = '127.0.0.1:50051'";
|
||||
let spoiled_url_str = "listen_addr = '!@$XOXO%^&'";
|
||||
let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
|
||||
assert!(
|
||||
spoiled_url_toml.contains(spoiled_url_str),
|
||||
"Failed to replace string {string_to_replace} in the toml file {simple_conf_toml}"
|
||||
);
|
||||
let spoiled_url_parse_result = LocalEnv::parse_config(&spoiled_url_toml);
|
||||
assert!(
|
||||
spoiled_url_parse_result.is_err(),
|
||||
"expected toml with invalid Url {spoiled_url_toml} to fail the parsing, but got {spoiled_url_parse_result:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,35 +1,36 @@
|
||||
//! Code to manage pageservers
|
||||
//!
|
||||
//! In the local test environment, the data for each pageserver is stored in
|
||||
//! In the local test environment, the pageserver stores its data directly in
|
||||
//!
|
||||
//! ```text
|
||||
//! .neon/pageserver_<pageserver_id>
|
||||
//! ```
|
||||
//! .neon/
|
||||
//!
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::num::NonZeroU64;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::models::{self, AuxFilePolicy, TenantInfo, TimelineInfo};
|
||||
use futures::SinkExt;
|
||||
use pageserver_api::models::{
|
||||
self, LocationConfig, ShardParameters, TenantHistorySize, TenantInfo, TimelineInfo,
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api;
|
||||
use postgres_backend::AuthType;
|
||||
use postgres_connection::{parse_host_port, PgConnectionConfig};
|
||||
use utils::auth::{Claims, Scope};
|
||||
use utils::id::NodeId;
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use crate::local_env::{NeonLocalInitPageserverConf, PageServerConf};
|
||||
use crate::local_env::PageServerConf;
|
||||
use crate::{background_process, local_env::LocalEnv};
|
||||
|
||||
/// Directory within .neon which will be used by default for LocalFs remote storage.
|
||||
@@ -73,27 +74,57 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
|
||||
fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::DocumentMut {
|
||||
toml_edit::DocumentMut::from_str(&format!("id={node_id}")).unwrap()
|
||||
}
|
||||
|
||||
fn pageserver_init_make_toml(
|
||||
&self,
|
||||
conf: NeonLocalInitPageserverConf,
|
||||
) -> anyhow::Result<toml_edit::DocumentMut> {
|
||||
assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
|
||||
|
||||
// TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)
|
||||
|
||||
/// Merge overrides provided by the user on the command line with our default overides derived from neon_local configuration.
|
||||
///
|
||||
/// These all end up on the command line of the `pageserver` binary.
|
||||
fn neon_local_overrides(&self, cli_overrides: &[&str]) -> Vec<String> {
|
||||
// FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
|
||||
let pg_distrib_dir_param = format!(
|
||||
"pg_distrib_dir='{}'",
|
||||
self.env.pg_distrib_dir_raw().display()
|
||||
);
|
||||
|
||||
let PageServerConf {
|
||||
id,
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
virtual_file_io_engine,
|
||||
get_vectored_impl,
|
||||
} = &self.conf;
|
||||
|
||||
let id = format!("id={}", id);
|
||||
|
||||
let http_auth_type_param = format!("http_auth_type='{}'", http_auth_type);
|
||||
let listen_http_addr_param = format!("listen_http_addr='{}'", listen_http_addr);
|
||||
|
||||
let pg_auth_type_param = format!("pg_auth_type='{}'", pg_auth_type);
|
||||
let listen_pg_addr_param = format!("listen_pg_addr='{}'", listen_pg_addr);
|
||||
let virtual_file_io_engine = if let Some(virtual_file_io_engine) = virtual_file_io_engine {
|
||||
format!("virtual_file_io_engine='{virtual_file_io_engine}'")
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let get_vectored_impl = if let Some(get_vectored_impl) = get_vectored_impl {
|
||||
format!("get_vectored_impl='{get_vectored_impl}'")
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
let broker_endpoint_param = format!("broker_endpoint='{}'", self.env.broker.client_url());
|
||||
|
||||
let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param];
|
||||
let mut overrides = vec![
|
||||
id,
|
||||
pg_distrib_dir_param,
|
||||
http_auth_type_param,
|
||||
pg_auth_type_param,
|
||||
listen_http_addr_param,
|
||||
listen_pg_addr_param,
|
||||
broker_endpoint_param,
|
||||
virtual_file_io_engine,
|
||||
get_vectored_impl,
|
||||
];
|
||||
|
||||
if let Some(control_plane_api) = &self.env.control_plane_api {
|
||||
overrides.push(format!(
|
||||
@@ -103,7 +134,7 @@ impl PageServerNode {
|
||||
|
||||
// Storage controller uses the same auth as pageserver: if JWT is enabled
|
||||
// for us, we will also need it to talk to them.
|
||||
if matches!(conf.http_auth_type, AuthType::NeonJWT) {
|
||||
if matches!(http_auth_type, AuthType::NeonJWT) {
|
||||
let jwt_token = self
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
|
||||
@@ -112,43 +143,31 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
|
||||
if !conf.other.contains_key("remote_storage") {
|
||||
if !cli_overrides
|
||||
.iter()
|
||||
.any(|c| c.starts_with("remote_storage"))
|
||||
{
|
||||
overrides.push(format!(
|
||||
"remote_storage={{local_path='../{PAGESERVER_REMOTE_STORAGE_DIR}'}}"
|
||||
));
|
||||
}
|
||||
|
||||
if conf.http_auth_type != AuthType::Trust || conf.pg_auth_type != AuthType::Trust {
|
||||
if *http_auth_type != AuthType::Trust || *pg_auth_type != AuthType::Trust {
|
||||
// Keys are generated in the toplevel repo dir, pageservers' workdirs
|
||||
// are one level below that, so refer to keys with ../
|
||||
overrides.push("auth_validation_public_key_path='../auth_public_key.pem'".to_owned());
|
||||
}
|
||||
|
||||
// Apply the user-provided overrides
|
||||
overrides.push({
|
||||
let mut doc =
|
||||
toml_edit::ser::to_document(&conf).expect("we deserialized this from toml earlier");
|
||||
// `id` is written out to `identity.toml` instead of `pageserver.toml`
|
||||
doc.remove("id").expect("it's part of the struct");
|
||||
doc.to_string()
|
||||
});
|
||||
overrides.extend(cli_overrides.iter().map(|&c| c.to_owned()));
|
||||
|
||||
// Turn `overrides` into a toml document.
|
||||
// TODO: above code is legacy code, it should be refactored to use toml_edit directly.
|
||||
let mut config_toml = toml_edit::DocumentMut::new();
|
||||
for fragment_str in overrides {
|
||||
let fragment = toml_edit::DocumentMut::from_str(&fragment_str)
|
||||
.expect("all fragments in `overrides` are valid toml documents, this function controls that");
|
||||
for (key, item) in fragment.iter() {
|
||||
config_toml.insert(key, item.clone());
|
||||
}
|
||||
}
|
||||
Ok(config_toml)
|
||||
overrides
|
||||
}
|
||||
|
||||
/// Initializes a pageserver node by creating its config with the overrides provided.
|
||||
pub fn initialize(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {
|
||||
self.pageserver_init(conf)
|
||||
pub fn initialize(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
|
||||
// First, run `pageserver --init` and wait for it to write a config into FS and exit.
|
||||
self.pageserver_init(config_overrides)
|
||||
.with_context(|| format!("Failed to run init for pageserver node {}", self.conf.id))
|
||||
}
|
||||
|
||||
@@ -164,11 +183,11 @@ impl PageServerNode {
|
||||
.expect("non-Unicode path")
|
||||
}
|
||||
|
||||
pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
||||
self.start_node(retry_timeout).await
|
||||
pub async fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
|
||||
self.start_node(config_overrides, false).await
|
||||
}
|
||||
|
||||
fn pageserver_init(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {
|
||||
fn pageserver_init(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
|
||||
let datadir = self.repo_path();
|
||||
let node_id = self.conf.id;
|
||||
println!(
|
||||
@@ -179,50 +198,29 @@ impl PageServerNode {
|
||||
);
|
||||
io::stdout().flush()?;
|
||||
|
||||
// If the config file we got as a CLI argument includes the `availability_zone`
|
||||
// config, then use that to populate the `metadata.json` file for the pageserver.
|
||||
// In production the deployment orchestrator does this for us.
|
||||
let az_id = conf
|
||||
.other
|
||||
.get("availability_zone")
|
||||
.map(|toml| {
|
||||
let az_str = toml.to_string();
|
||||
// Trim the (") chars from the toml representation
|
||||
if az_str.starts_with('"') && az_str.ends_with('"') {
|
||||
az_str[1..az_str.len() - 1].to_string()
|
||||
} else {
|
||||
az_str
|
||||
}
|
||||
})
|
||||
.unwrap_or("local".to_string());
|
||||
if !datadir.exists() {
|
||||
std::fs::create_dir(&datadir)?;
|
||||
}
|
||||
|
||||
let config = self
|
||||
.pageserver_init_make_toml(conf)
|
||||
.context("make pageserver toml")?;
|
||||
let config_file_path = datadir.join("pageserver.toml");
|
||||
let mut config_file = std::fs::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true)
|
||||
.open(&config_file_path)
|
||||
.with_context(|| format!("open pageserver toml for write: {config_file_path:?}"))?;
|
||||
config_file
|
||||
.write_all(config.to_string().as_bytes())
|
||||
.context("write pageserver toml")?;
|
||||
drop(config_file);
|
||||
let datadir_path_str = datadir.to_str().with_context(|| {
|
||||
format!("Cannot start pageserver node {node_id} in path that has no string representation: {datadir:?}")
|
||||
})?;
|
||||
let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str);
|
||||
args.push(Cow::Borrowed("--init"));
|
||||
|
||||
let identity_file_path = datadir.join("identity.toml");
|
||||
let mut identity_file = std::fs::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true)
|
||||
.open(identity_file_path)
|
||||
.with_context(|| format!("open identity toml for write: {config_file_path:?}"))?;
|
||||
let identity_toml = self.pageserver_make_identity_toml(node_id);
|
||||
identity_file
|
||||
.write_all(identity_toml.to_string().as_bytes())
|
||||
.context("write identity toml")?;
|
||||
drop(identity_toml);
|
||||
let init_output = Command::new(self.env.pageserver_bin())
|
||||
.args(args.iter().map(Cow::as_ref))
|
||||
.envs(self.pageserver_env_variables()?)
|
||||
.output()
|
||||
.with_context(|| format!("Failed to run pageserver init for node {node_id}"))?;
|
||||
|
||||
// TODO: invoke a TBD config-check command to validate that pageserver will start with the written config
|
||||
anyhow::ensure!(
|
||||
init_output.status.success(),
|
||||
"Pageserver init for node {} did not finish successfully, stdout: {}, stderr: {}",
|
||||
node_id,
|
||||
String::from_utf8_lossy(&init_output.stdout),
|
||||
String::from_utf8_lossy(&init_output.stderr),
|
||||
);
|
||||
|
||||
// Write metadata file, used by pageserver on startup to register itself with
|
||||
// the storage controller
|
||||
@@ -231,22 +229,17 @@ impl PageServerNode {
|
||||
let (_http_host, http_port) =
|
||||
parse_host_port(&self.conf.listen_http_addr).expect("Unable to parse listen_http_addr");
|
||||
let http_port = http_port.unwrap_or(9898);
|
||||
|
||||
// Intentionally hand-craft JSON: this acts as an implicit format compat test
|
||||
// in case the pageserver-side structure is edited, and reflects the real life
|
||||
// situation: the metadata is written by some other script.
|
||||
std::fs::write(
|
||||
metadata_path,
|
||||
serde_json::to_vec(&pageserver_api::config::NodeMetadata {
|
||||
postgres_host: "localhost".to_string(),
|
||||
postgres_port: self.pg_connection_config.port(),
|
||||
http_host: "localhost".to_string(),
|
||||
http_port,
|
||||
other: HashMap::from([(
|
||||
"availability_zone_id".to_string(),
|
||||
serde_json::json!(az_id),
|
||||
)]),
|
||||
})
|
||||
serde_json::to_vec(&serde_json::json!({
|
||||
"host": "localhost",
|
||||
"port": self.pg_connection_config.port(),
|
||||
"http_host": "localhost",
|
||||
"http_port": http_port,
|
||||
}))
|
||||
.unwrap(),
|
||||
)
|
||||
.expect("Failed to write metadata file");
|
||||
@@ -254,15 +247,18 @@ impl PageServerNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn start_node(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
||||
async fn start_node(
|
||||
&self,
|
||||
config_overrides: &[&str],
|
||||
update_config: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
// TODO: using a thread here because start_process() is not async but we need to call check_status()
|
||||
let datadir = self.repo_path();
|
||||
print!(
|
||||
"Starting pageserver node {} at '{}' in {:?}, retrying for {:?}",
|
||||
"Starting pageserver node {} at '{}' in {:?}",
|
||||
self.conf.id,
|
||||
self.pg_connection_config.raw_address(),
|
||||
datadir,
|
||||
retry_timeout
|
||||
datadir
|
||||
);
|
||||
io::stdout().flush().context("flush stdout")?;
|
||||
|
||||
@@ -272,15 +268,17 @@ impl PageServerNode {
|
||||
self.conf.id, datadir,
|
||||
)
|
||||
})?;
|
||||
let args = vec!["-D", datadir_path_str];
|
||||
let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str);
|
||||
if update_config {
|
||||
args.push(Cow::Borrowed("--update-config"));
|
||||
}
|
||||
background_process::start_process(
|
||||
"pageserver",
|
||||
&datadir,
|
||||
&self.env.pageserver_bin(),
|
||||
args,
|
||||
args.iter().map(Cow::as_ref),
|
||||
self.pageserver_env_variables()?,
|
||||
background_process::InitialPidFile::Expect(self.pid_file()),
|
||||
retry_timeout,
|
||||
|| async {
|
||||
let st = self.check_status().await;
|
||||
match st {
|
||||
@@ -295,6 +293,22 @@ impl PageServerNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn pageserver_basic_args<'a>(
|
||||
&self,
|
||||
config_overrides: &'a [&'a str],
|
||||
datadir_path_str: &'a str,
|
||||
) -> Vec<Cow<'a, str>> {
|
||||
let mut args = vec![Cow::Borrowed("-D"), Cow::Borrowed(datadir_path_str)];
|
||||
|
||||
let overrides = self.neon_local_overrides(config_overrides);
|
||||
for config_override in overrides {
|
||||
args.push(Cow::Borrowed("-c"));
|
||||
args.push(Cow::Owned(config_override));
|
||||
}
|
||||
|
||||
args
|
||||
}
|
||||
|
||||
fn pageserver_env_variables(&self) -> anyhow::Result<Vec<(String, String)>> {
|
||||
// FIXME: why is this tied to pageserver's auth type? Whether or not the safekeeper
|
||||
// needs a token, and how to generate that token, seems independent to whether
|
||||
@@ -322,6 +336,22 @@ impl PageServerNode {
|
||||
background_process::stop_process(immediate, "pageserver", &self.pid_file())
|
||||
}
|
||||
|
||||
pub async fn page_server_psql_client(
|
||||
&self,
|
||||
) -> anyhow::Result<(
|
||||
tokio_postgres::Client,
|
||||
tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,
|
||||
)> {
|
||||
let mut config = self.pg_connection_config.clone();
|
||||
if self.conf.pg_auth_type == AuthType::NeonJWT {
|
||||
let token = self
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
|
||||
config = config.set_password(Some(token));
|
||||
}
|
||||
Ok(config.connect_no_tls().await?)
|
||||
}
|
||||
|
||||
pub async fn check_status(&self) -> mgmt_api::Result<()> {
|
||||
self.http_client.status().await
|
||||
}
|
||||
@@ -375,6 +405,11 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<NonZeroU64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
|
||||
trace_read_requests: settings
|
||||
.remove("trace_read_requests")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'trace_read_requests' as bool")?,
|
||||
eviction_policy: settings
|
||||
.remove("eviction_policy")
|
||||
.map(serde_json::from_str)
|
||||
@@ -399,15 +434,6 @@ impl PageServerNode {
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("parse `timeline_get_throttle` from json")?,
|
||||
switch_aux_file_policy: settings
|
||||
.remove("switch_aux_file_policy")
|
||||
.map(|x| x.parse::<AuxFilePolicy>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'switch_aux_file_policy'")?,
|
||||
lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
|
||||
lsn_lease_length_for_ts: settings
|
||||
.remove("lsn_lease_length_for_ts")
|
||||
.map(|x| x.to_string()),
|
||||
};
|
||||
if !settings.is_empty() {
|
||||
bail!("Unrecognized tenant settings: {settings:?}")
|
||||
@@ -416,6 +442,28 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn tenant_create(
|
||||
&self,
|
||||
new_tenant_id: TenantId,
|
||||
generation: Option<u32>,
|
||||
settings: HashMap<&str, &str>,
|
||||
) -> anyhow::Result<TenantId> {
|
||||
let config = Self::parse_config(settings.clone())?;
|
||||
|
||||
let request = models::TenantCreateRequest {
|
||||
new_tenant_id: TenantShardId::unsharded(new_tenant_id),
|
||||
generation,
|
||||
config,
|
||||
shard_parameters: ShardParameters::default(),
|
||||
// Placement policy is not meaningful for creations not done via storage controller
|
||||
placement_policy: None,
|
||||
};
|
||||
if !settings.is_empty() {
|
||||
bail!("Unrecognized tenant settings: {settings:?}")
|
||||
}
|
||||
Ok(self.http_client.tenant_create(&request).await?)
|
||||
}
|
||||
|
||||
pub async fn tenant_config(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
@@ -475,6 +523,11 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<NonZeroU64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
|
||||
trace_read_requests: settings
|
||||
.remove("trace_read_requests")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'trace_read_requests' as bool")?,
|
||||
eviction_policy: settings
|
||||
.remove("eviction_policy")
|
||||
.map(serde_json::from_str)
|
||||
@@ -499,15 +552,6 @@ impl PageServerNode {
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("parse `timeline_get_throttle` from json")?,
|
||||
switch_aux_file_policy: settings
|
||||
.remove("switch_aux_file_policy")
|
||||
.map(|x| x.parse::<AuxFilePolicy>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'switch_aux_file_policy'")?,
|
||||
lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
|
||||
lsn_lease_length_for_ts: settings
|
||||
.remove("lsn_lease_length_for_ts")
|
||||
.map(|x| x.to_string()),
|
||||
}
|
||||
};
|
||||
|
||||
@@ -522,6 +566,19 @@ impl PageServerNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn location_config(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
config: LocationConfig,
|
||||
flush_ms: Option<Duration>,
|
||||
lazy: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
Ok(self
|
||||
.http_client
|
||||
.location_config(tenant_shard_id, config, flush_ms, lazy)
|
||||
.await?)
|
||||
}
|
||||
|
||||
pub async fn timeline_list(
|
||||
&self,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
@@ -568,41 +625,72 @@ impl PageServerNode {
|
||||
pg_wal: Option<(Lsn, PathBuf)>,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<()> {
|
||||
let (client, conn) = self.page_server_psql_client().await?;
|
||||
// The connection object performs the actual communication with the database,
|
||||
// so spawn it off to run on its own.
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = conn.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
}
|
||||
});
|
||||
let client = std::pin::pin!(client);
|
||||
|
||||
// Init base reader
|
||||
let (start_lsn, base_tarfile_path) = base;
|
||||
let base_tarfile = tokio::fs::File::open(base_tarfile_path).await?;
|
||||
let base_tarfile =
|
||||
mgmt_api::ReqwestBody::wrap_stream(tokio_util::io::ReaderStream::new(base_tarfile));
|
||||
let base_tarfile = tokio_util::io::ReaderStream::new(base_tarfile);
|
||||
|
||||
// Init wal reader if necessary
|
||||
let (end_lsn, wal_reader) = if let Some((end_lsn, wal_tarfile_path)) = pg_wal {
|
||||
let wal_tarfile = tokio::fs::File::open(wal_tarfile_path).await?;
|
||||
let wal_reader =
|
||||
mgmt_api::ReqwestBody::wrap_stream(tokio_util::io::ReaderStream::new(wal_tarfile));
|
||||
let wal_reader = tokio_util::io::ReaderStream::new(wal_tarfile);
|
||||
(end_lsn, Some(wal_reader))
|
||||
} else {
|
||||
(start_lsn, None)
|
||||
};
|
||||
|
||||
// Import base
|
||||
self.http_client
|
||||
.import_basebackup(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
start_lsn,
|
||||
end_lsn,
|
||||
pg_version,
|
||||
base_tarfile,
|
||||
)
|
||||
.await?;
|
||||
let copy_in = |reader, cmd| {
|
||||
let client = &client;
|
||||
async move {
|
||||
let writer = client.copy_in(&cmd).await?;
|
||||
let writer = std::pin::pin!(writer);
|
||||
let mut writer = writer.sink_map_err(|e| {
|
||||
std::io::Error::new(std::io::ErrorKind::Other, format!("{e}"))
|
||||
});
|
||||
let mut reader = std::pin::pin!(reader);
|
||||
writer.send_all(&mut reader).await?;
|
||||
writer.into_inner().finish().await?;
|
||||
anyhow::Ok(())
|
||||
}
|
||||
};
|
||||
|
||||
// Import base
|
||||
copy_in(
|
||||
base_tarfile,
|
||||
format!(
|
||||
"import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn} {pg_version}"
|
||||
),
|
||||
)
|
||||
.await?;
|
||||
// Import wal if necessary
|
||||
if let Some(wal_reader) = wal_reader {
|
||||
self.http_client
|
||||
.import_wal(tenant_id, timeline_id, start_lsn, end_lsn, wal_reader)
|
||||
.await?;
|
||||
copy_in(
|
||||
wal_reader,
|
||||
format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}"),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn tenant_synthetic_size(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
) -> anyhow::Result<TenantHistorySize> {
|
||||
Ok(self
|
||||
.http_client
|
||||
.tenant_synthetic_size(tenant_shard_id)
|
||||
.await?)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,10 +4,13 @@
|
||||
/// NOTE: This doesn't implement the full, correct postgresql.conf syntax. Just
|
||||
/// enough to extract a few settings we need in Neon, assuming you don't do
|
||||
/// funny stuff like include-directives or funny escaping.
|
||||
use anyhow::{bail, Context, Result};
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::io::BufRead;
|
||||
use std::str::FromStr;
|
||||
|
||||
/// In-memory representation of a postgresql.conf file
|
||||
#[derive(Default, Debug)]
|
||||
@@ -16,16 +19,84 @@ pub struct PostgresConf {
|
||||
hash: HashMap<String, String>,
|
||||
}
|
||||
|
||||
static CONF_LINE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap());
|
||||
|
||||
impl PostgresConf {
|
||||
pub fn new() -> PostgresConf {
|
||||
PostgresConf::default()
|
||||
}
|
||||
|
||||
/// Read file into memory
|
||||
pub fn read(read: impl std::io::Read) -> Result<PostgresConf> {
|
||||
let mut result = Self::new();
|
||||
|
||||
for line in std::io::BufReader::new(read).lines() {
|
||||
let line = line?;
|
||||
|
||||
// Store each line in a vector, in original format
|
||||
result.lines.push(line.clone());
|
||||
|
||||
// Also parse each line and insert key=value lines into a hash map.
|
||||
//
|
||||
// FIXME: This doesn't match exactly the flex/bison grammar in PostgreSQL.
|
||||
// But it's close enough for our usage.
|
||||
let line = line.trim();
|
||||
if line.starts_with('#') {
|
||||
// comment, ignore
|
||||
continue;
|
||||
} else if let Some(caps) = CONF_LINE_RE.captures(line) {
|
||||
let name = caps.get(1).unwrap().as_str();
|
||||
let raw_val = caps.get(2).unwrap().as_str();
|
||||
|
||||
if let Ok(val) = deescape_str(raw_val) {
|
||||
// Note: if there's already an entry in the hash map for
|
||||
// this key, this will replace it. That's the behavior what
|
||||
// we want; when PostgreSQL reads the file, each line
|
||||
// overrides any previous value for the same setting.
|
||||
result.hash.insert(name.to_string(), val.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Return the current value of 'option'
|
||||
pub fn get(&self, option: &str) -> Option<&str> {
|
||||
self.hash.get(option).map(|x| x.as_ref())
|
||||
}
|
||||
|
||||
/// Return the current value of a field, parsed to the right datatype.
|
||||
///
|
||||
/// This calls the FromStr::parse() function on the value of the field. If
|
||||
/// the field does not exist, or parsing fails, returns an error.
|
||||
///
|
||||
pub fn parse_field<T>(&self, field_name: &str, context: &str) -> Result<T>
|
||||
where
|
||||
T: FromStr,
|
||||
<T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
|
||||
{
|
||||
self.get(field_name)
|
||||
.with_context(|| format!("could not find '{}' option {}", field_name, context))?
|
||||
.parse::<T>()
|
||||
.with_context(|| format!("could not parse '{}' option {}", field_name, context))
|
||||
}
|
||||
|
||||
pub fn parse_field_optional<T>(&self, field_name: &str, context: &str) -> Result<Option<T>>
|
||||
where
|
||||
T: FromStr,
|
||||
<T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
|
||||
{
|
||||
if let Some(val) = self.get(field_name) {
|
||||
let result = val
|
||||
.parse::<T>()
|
||||
.with_context(|| format!("could not parse '{}' option {}", field_name, context))?;
|
||||
|
||||
Ok(Some(result))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Note: if you call this multiple times for the same option, the config
|
||||
/// file will a line for each call. It would be nice to have a function
|
||||
@@ -83,8 +154,48 @@ fn escape_str(s: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
/// De-escape a possibly-quoted value.
|
||||
///
|
||||
/// See `DeescapeQuotedString` function in PostgreSQL sources for how PostgreSQL
|
||||
/// does this.
|
||||
fn deescape_str(s: &str) -> Result<String> {
|
||||
// If the string has a quote at the beginning and end, strip them out.
|
||||
if s.len() >= 2 && s.starts_with('\'') && s.ends_with('\'') {
|
||||
let mut result = String::new();
|
||||
|
||||
let mut iter = s[1..(s.len() - 1)].chars().peekable();
|
||||
while let Some(c) = iter.next() {
|
||||
let newc = if c == '\\' {
|
||||
match iter.next() {
|
||||
Some('b') => '\x08',
|
||||
Some('f') => '\x0c',
|
||||
Some('n') => '\n',
|
||||
Some('r') => '\r',
|
||||
Some('t') => '\t',
|
||||
Some('0'..='7') => {
|
||||
// TODO
|
||||
bail!("octal escapes not supported");
|
||||
}
|
||||
Some(n) => n,
|
||||
None => break,
|
||||
}
|
||||
} else if c == '\'' && iter.peek() == Some(&'\'') {
|
||||
// doubled quote becomes just one quote
|
||||
iter.next().unwrap()
|
||||
} else {
|
||||
c
|
||||
};
|
||||
|
||||
result.push(newc);
|
||||
}
|
||||
Ok(result)
|
||||
} else {
|
||||
Ok(s.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_postgresql_conf_escapes() -> anyhow::Result<()> {
|
||||
fn test_postgresql_conf_escapes() -> Result<()> {
|
||||
assert_eq!(escape_str("foo bar"), "'foo bar'");
|
||||
// these don't need to be quoted
|
||||
assert_eq!(escape_str("foo"), "foo");
|
||||
@@ -103,5 +214,13 @@ fn test_postgresql_conf_escapes() -> anyhow::Result<()> {
|
||||
assert_eq!(escape_str("fo\\o"), "'fo\\\\o'");
|
||||
assert_eq!(escape_str("10 cats"), "'10 cats'");
|
||||
|
||||
// Test de-escaping
|
||||
assert_eq!(deescape_str(&escape_str("foo"))?, "foo");
|
||||
assert_eq!(deescape_str(&escape_str("fo'o\nba\\r"))?, "fo'o\nba\\r");
|
||||
assert_eq!(deescape_str("'\\b\\f\\n\\r\\t'")?, "\x08\x0c\n\r\t");
|
||||
|
||||
// octal-escapes are currently not supported
|
||||
assert!(deescape_str("'foo\\7\\07\\007'").is_err());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -5,10 +5,8 @@
|
||||
//! ```text
|
||||
//! .neon/safekeepers/<safekeeper id>
|
||||
//! ```
|
||||
use std::future::Future;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
use std::{io, result};
|
||||
|
||||
use anyhow::Context;
|
||||
@@ -16,7 +14,6 @@ use camino::Utf8PathBuf;
|
||||
use postgres_connection::PgConnectionConfig;
|
||||
use reqwest::{IntoUrl, Method};
|
||||
use thiserror::Error;
|
||||
use utils::auth::{Claims, Scope};
|
||||
use utils::{http::error::HttpErrorBody, id::NodeId};
|
||||
|
||||
use crate::{
|
||||
@@ -35,10 +32,12 @@ pub enum SafekeeperHttpError {
|
||||
|
||||
type Result<T> = result::Result<T, SafekeeperHttpError>;
|
||||
|
||||
pub(crate) trait ResponseErrorMessageExt: Sized {
|
||||
fn error_from_body(self) -> impl Future<Output = Result<Self>> + Send;
|
||||
#[async_trait::async_trait]
|
||||
pub trait ResponseErrorMessageExt: Sized {
|
||||
async fn error_from_body(self) -> Result<Self>;
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ResponseErrorMessageExt for reqwest::Response {
|
||||
async fn error_from_body(self) -> Result<Self> {
|
||||
let status = self.status();
|
||||
@@ -111,16 +110,11 @@ impl SafekeeperNode {
|
||||
.expect("non-Unicode path")
|
||||
}
|
||||
|
||||
pub async fn start(
|
||||
&self,
|
||||
extra_opts: &[String],
|
||||
retry_timeout: &Duration,
|
||||
) -> anyhow::Result<()> {
|
||||
pub async fn start(&self, extra_opts: Vec<String>) -> anyhow::Result<()> {
|
||||
print!(
|
||||
"Starting safekeeper at '{}' in '{}', retrying for {:?}",
|
||||
"Starting safekeeper at '{}' in '{}'",
|
||||
self.pg_connection_config.raw_address(),
|
||||
self.datadir_path().display(),
|
||||
retry_timeout,
|
||||
self.datadir_path().display()
|
||||
);
|
||||
io::stdout().flush().unwrap();
|
||||
|
||||
@@ -196,16 +190,15 @@ impl SafekeeperNode {
|
||||
]);
|
||||
}
|
||||
|
||||
args.extend_from_slice(extra_opts);
|
||||
args.extend(extra_opts);
|
||||
|
||||
background_process::start_process(
|
||||
&format!("safekeeper-{id}"),
|
||||
&datadir,
|
||||
&self.env.safekeeper_bin(),
|
||||
&args,
|
||||
self.safekeeper_env_variables()?,
|
||||
[],
|
||||
background_process::InitialPidFile::Expect(self.pid_file()),
|
||||
retry_timeout,
|
||||
|| async {
|
||||
match self.check_status().await {
|
||||
Ok(()) => Ok(true),
|
||||
@@ -217,18 +210,6 @@ impl SafekeeperNode {
|
||||
.await
|
||||
}
|
||||
|
||||
fn safekeeper_env_variables(&self) -> anyhow::Result<Vec<(String, String)>> {
|
||||
// Generate a token to connect from safekeeper to peers
|
||||
if self.conf.auth_enabled {
|
||||
let token = self
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;
|
||||
Ok(vec![("SAFEKEEPER_AUTH_TOKEN".to_owned(), token)])
|
||||
} else {
|
||||
Ok(Vec::new())
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Stop the server.
|
||||
///
|
||||
|
||||
@@ -1,26 +1,21 @@
|
||||
use crate::{
|
||||
background_process,
|
||||
local_env::{LocalEnv, NeonStorageControllerConf},
|
||||
};
|
||||
use crate::{background_process, local_env::LocalEnv};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hyper0::Uri;
|
||||
use nix::unistd::Pid;
|
||||
use hyper::Method;
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
|
||||
TenantCreateResponse, TenantLocateResponse, TenantShardMigrateRequest,
|
||||
TenantShardMigrateResponse,
|
||||
NodeConfigureRequest, NodeRegisterRequest, TenantCreateResponse, TenantLocateResponse,
|
||||
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
||||
},
|
||||
models::{
|
||||
TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
|
||||
TenantCreateRequest, TenantShardSplitRequest, TenantShardSplitResponse,
|
||||
TimelineCreateRequest, TimelineInfo,
|
||||
},
|
||||
shard::{ShardStripeSize, TenantShardId},
|
||||
};
|
||||
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::Method;
|
||||
use serde::{de::DeserializeOwned, Deserialize, Serialize};
|
||||
use std::{fs, net::SocketAddr, path::PathBuf, str::FromStr, sync::OnceLock};
|
||||
use std::{fs, str::FromStr};
|
||||
use tokio::process::Command;
|
||||
use tracing::instrument;
|
||||
use url::Url;
|
||||
@@ -28,61 +23,28 @@ use utils::{
|
||||
auth::{encode_from_key_file, Claims, Scope},
|
||||
id::{NodeId, TenantId},
|
||||
};
|
||||
use whoami::username;
|
||||
|
||||
pub struct StorageController {
|
||||
env: LocalEnv,
|
||||
listen: String,
|
||||
path: Utf8PathBuf,
|
||||
private_key: Option<Vec<u8>>,
|
||||
public_key: Option<String>,
|
||||
postgres_port: u16,
|
||||
client: reqwest::Client,
|
||||
config: NeonStorageControllerConf,
|
||||
|
||||
// The listen addresses is learned when starting the storage controller,
|
||||
// hence the use of OnceLock to init it at the right time.
|
||||
listen: OnceLock<SocketAddr>,
|
||||
}
|
||||
|
||||
const COMMAND: &str = "storage_controller";
|
||||
|
||||
const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16;
|
||||
|
||||
const DB_NAME: &str = "storage_controller";
|
||||
|
||||
pub struct NeonStorageControllerStartArgs {
|
||||
pub instance_id: u8,
|
||||
pub base_port: Option<u16>,
|
||||
pub start_timeout: humantime::Duration,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerStartArgs {
|
||||
pub fn with_default_instance_id(start_timeout: humantime::Duration) -> Self {
|
||||
Self {
|
||||
instance_id: 1,
|
||||
base_port: None,
|
||||
start_timeout,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct NeonStorageControllerStopArgs {
|
||||
pub instance_id: u8,
|
||||
pub immediate: bool,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerStopArgs {
|
||||
pub fn with_default_instance_id(immediate: bool) -> Self {
|
||||
Self {
|
||||
instance_id: 1,
|
||||
immediate,
|
||||
}
|
||||
}
|
||||
}
|
||||
// Use a shorter pageserver unavailability interval than the default to speed up tests.
|
||||
const NEON_LOCAL_MAX_UNAVAILABLE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct AttachHookRequest {
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
pub node_id: Option<NodeId>,
|
||||
pub generation_override: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
@@ -102,6 +64,27 @@ pub struct InspectResponse {
|
||||
|
||||
impl StorageController {
|
||||
pub fn from_env(env: &LocalEnv) -> Self {
|
||||
let path = Utf8PathBuf::from_path_buf(env.base_data_dir.clone())
|
||||
.unwrap()
|
||||
.join("attachments.json");
|
||||
|
||||
// Makes no sense to construct this if pageservers aren't going to use it: assume
|
||||
// pageservers have control plane API set
|
||||
let listen_url = env.control_plane_api.clone().unwrap();
|
||||
|
||||
let listen = format!(
|
||||
"{}:{}",
|
||||
listen_url.host_str().unwrap(),
|
||||
listen_url.port().unwrap()
|
||||
);
|
||||
|
||||
// Convention: NeonEnv in python tests reserves the next port after the control_plane_api
|
||||
// port, for use by our captive postgres.
|
||||
let postgres_port = listen_url
|
||||
.port()
|
||||
.expect("Control plane API setting should always have a port")
|
||||
+ 1;
|
||||
|
||||
// Assume all pageservers have symmetric auth configuration: this service
|
||||
// expects to use one JWT token to talk to all of them.
|
||||
let ps_conf = env
|
||||
@@ -144,28 +127,20 @@ impl StorageController {
|
||||
|
||||
Self {
|
||||
env: env.clone(),
|
||||
path,
|
||||
listen,
|
||||
private_key,
|
||||
public_key,
|
||||
postgres_port,
|
||||
client: reqwest::ClientBuilder::new()
|
||||
.build()
|
||||
.expect("Failed to construct http client"),
|
||||
config: env.storage_controller.clone(),
|
||||
listen: OnceLock::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn storage_controller_instance_dir(&self, instance_id: u8) -> PathBuf {
|
||||
self.env
|
||||
.base_data_dir
|
||||
.join(format!("storage_controller_{}", instance_id))
|
||||
}
|
||||
|
||||
fn pid_file(&self, instance_id: u8) -> Utf8PathBuf {
|
||||
Utf8PathBuf::from_path_buf(
|
||||
self.storage_controller_instance_dir(instance_id)
|
||||
.join("storage_controller.pid"),
|
||||
)
|
||||
.expect("non-Unicode path")
|
||||
fn pid_file(&self) -> Utf8PathBuf {
|
||||
Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("storage_controller.pid"))
|
||||
.expect("non-Unicode path")
|
||||
}
|
||||
|
||||
/// PIDFile for the postgres instance used to store storage controller state
|
||||
@@ -178,16 +153,16 @@ impl StorageController {
|
||||
.expect("non-Unicode path")
|
||||
}
|
||||
|
||||
/// Find the directory containing postgres subdirectories, such `bin` and `lib`
|
||||
/// Find the directory containing postgres binaries, such as `initdb` and `pg_ctl`
|
||||
///
|
||||
/// This usually uses STORAGE_CONTROLLER_POSTGRES_VERSION of postgres, but will fall back
|
||||
/// to other versions if that one isn't found. Some automated tests create circumstances
|
||||
/// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
|
||||
async fn get_pg_dir(&self, dir_name: &str) -> anyhow::Result<Utf8PathBuf> {
|
||||
let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 16, 15, 14];
|
||||
pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> {
|
||||
let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 15, 14];
|
||||
|
||||
for v in prefer_versions {
|
||||
let path = Utf8PathBuf::from_path_buf(self.env.pg_dir(v, dir_name)?).unwrap();
|
||||
let path = Utf8PathBuf::from_path_buf(self.env.pg_bin_dir(v)?).unwrap();
|
||||
if tokio::fs::try_exists(&path).await? {
|
||||
return Ok(path);
|
||||
}
|
||||
@@ -195,51 +170,30 @@ impl StorageController {
|
||||
|
||||
// Fall through
|
||||
anyhow::bail!(
|
||||
"Postgres directory '{}' not found in {}",
|
||||
dir_name,
|
||||
self.env.pg_distrib_dir.display(),
|
||||
"Postgres binaries not found in {}",
|
||||
self.env.pg_distrib_dir.display()
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> {
|
||||
self.get_pg_dir("bin").await
|
||||
}
|
||||
|
||||
pub async fn get_pg_lib_dir(&self) -> anyhow::Result<Utf8PathBuf> {
|
||||
self.get_pg_dir("lib").await
|
||||
}
|
||||
|
||||
/// Readiness check for our postgres process
|
||||
async fn pg_isready(&self, pg_bin_dir: &Utf8Path, postgres_port: u16) -> anyhow::Result<bool> {
|
||||
async fn pg_isready(&self, pg_bin_dir: &Utf8Path) -> anyhow::Result<bool> {
|
||||
let bin_path = pg_bin_dir.join("pg_isready");
|
||||
let args = [
|
||||
"-h",
|
||||
"localhost",
|
||||
"-U",
|
||||
&username(),
|
||||
"-d",
|
||||
DB_NAME,
|
||||
"-p",
|
||||
&format!("{}", postgres_port),
|
||||
];
|
||||
let args = ["-h", "localhost", "-p", &format!("{}", self.postgres_port)];
|
||||
let exitcode = Command::new(bin_path).args(args).spawn()?.wait().await?;
|
||||
|
||||
Ok(exitcode.success())
|
||||
}
|
||||
|
||||
/// Create our database if it doesn't exist
|
||||
/// Create our database if it doesn't exist, and run migrations.
|
||||
///
|
||||
/// This function is equivalent to the `diesel setup` command in the diesel CLI. We implement
|
||||
/// the same steps by hand to avoid imposing a dependency on installing diesel-cli for developers
|
||||
/// who just want to run `cargo neon_local` without knowing about diesel.
|
||||
///
|
||||
/// Returns the database url
|
||||
pub async fn setup_database(&self, postgres_port: u16) -> anyhow::Result<String> {
|
||||
let database_url = format!(
|
||||
"postgresql://{}@localhost:{}/{DB_NAME}",
|
||||
&username(),
|
||||
postgres_port
|
||||
);
|
||||
pub async fn setup_database(&self) -> anyhow::Result<String> {
|
||||
const DB_NAME: &str = "storage_controller";
|
||||
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", self.postgres_port);
|
||||
|
||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||
let createdb_path = pg_bin_dir.join("createdb");
|
||||
@@ -248,11 +202,7 @@ impl StorageController {
|
||||
"-h",
|
||||
"localhost",
|
||||
"-p",
|
||||
&format!("{}", postgres_port),
|
||||
"-U",
|
||||
&username(),
|
||||
"-O",
|
||||
&username(),
|
||||
&format!("{}", self.postgres_port),
|
||||
DB_NAME,
|
||||
])
|
||||
.output()
|
||||
@@ -271,232 +221,78 @@ impl StorageController {
|
||||
Ok(database_url)
|
||||
}
|
||||
|
||||
pub async fn connect_to_database(
|
||||
&self,
|
||||
postgres_port: u16,
|
||||
) -> anyhow::Result<(
|
||||
tokio_postgres::Client,
|
||||
tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,
|
||||
)> {
|
||||
tokio_postgres::Config::new()
|
||||
.host("localhost")
|
||||
.port(postgres_port)
|
||||
// The user is the ambient operating system user name.
|
||||
// That is an impurity which we want to fix in => TODO https://github.com/neondatabase/neon/issues/8400
|
||||
//
|
||||
// Until we get there, use the ambient operating system user name.
|
||||
// Recent tokio-postgres versions default to this if the user isn't specified.
|
||||
// But tokio-postgres fork doesn't have this upstream commit:
|
||||
// https://github.com/sfackler/rust-postgres/commit/cb609be758f3fb5af537f04b584a2ee0cebd5e79
|
||||
// => we should rebase our fork => TODO https://github.com/neondatabase/neon/issues/8399
|
||||
.user(&username())
|
||||
.dbname(DB_NAME)
|
||||
.connect(tokio_postgres::NoTls)
|
||||
.await
|
||||
.map_err(anyhow::Error::new)
|
||||
}
|
||||
pub async fn start(&self) -> anyhow::Result<()> {
|
||||
// Start a vanilla Postgres process used by the storage controller for persistence.
|
||||
let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())
|
||||
.unwrap()
|
||||
.join("storage_controller_db");
|
||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||
let pg_log_path = pg_data_path.join("postgres.log");
|
||||
|
||||
pub async fn start(&self, start_args: NeonStorageControllerStartArgs) -> anyhow::Result<()> {
|
||||
let instance_dir = self.storage_controller_instance_dir(start_args.instance_id);
|
||||
if let Err(err) = tokio::fs::create_dir(&instance_dir).await {
|
||||
if err.kind() != std::io::ErrorKind::AlreadyExists {
|
||||
panic!("Failed to create instance dir {instance_dir:?}");
|
||||
if !tokio::fs::try_exists(&pg_data_path).await? {
|
||||
// Initialize empty database
|
||||
let initdb_path = pg_bin_dir.join("initdb");
|
||||
let mut child = Command::new(&initdb_path)
|
||||
.args(["-D", pg_data_path.as_ref()])
|
||||
.spawn()
|
||||
.expect("Failed to spawn initdb");
|
||||
let status = child.wait().await?;
|
||||
if !status.success() {
|
||||
anyhow::bail!("initdb failed with status {status}");
|
||||
}
|
||||
}
|
||||
|
||||
let (listen, postgres_port) = {
|
||||
if let Some(base_port) = start_args.base_port {
|
||||
(
|
||||
format!("127.0.0.1:{base_port}"),
|
||||
self.config
|
||||
.database_url
|
||||
.expect("--base-port requires NeonStorageControllerConf::database_url")
|
||||
.port(),
|
||||
)
|
||||
} else {
|
||||
let listen_url = self.env.control_plane_api.clone().unwrap();
|
||||
|
||||
let listen = format!(
|
||||
"{}:{}",
|
||||
listen_url.host_str().unwrap(),
|
||||
listen_url.port().unwrap()
|
||||
);
|
||||
|
||||
(listen, listen_url.port().unwrap() + 1)
|
||||
}
|
||||
};
|
||||
|
||||
let socket_addr = listen
|
||||
.parse()
|
||||
.expect("listen address is a valid socket address");
|
||||
self.listen
|
||||
.set(socket_addr)
|
||||
.expect("StorageController::listen is only set here");
|
||||
|
||||
// Do we remove the pid file on stop?
|
||||
let pg_started = self.is_postgres_running().await?;
|
||||
let pg_lib_dir = self.get_pg_lib_dir().await?;
|
||||
|
||||
if !pg_started {
|
||||
// Start a vanilla Postgres process used by the storage controller for persistence.
|
||||
let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())
|
||||
.unwrap()
|
||||
.join("storage_controller_db");
|
||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||
let pg_log_path = pg_data_path.join("postgres.log");
|
||||
|
||||
if !tokio::fs::try_exists(&pg_data_path).await? {
|
||||
let initdb_args = [
|
||||
"--pgdata",
|
||||
pg_data_path.as_ref(),
|
||||
"--username",
|
||||
&username(),
|
||||
"--no-sync",
|
||||
"--no-instructions",
|
||||
];
|
||||
tracing::info!(
|
||||
"Initializing storage controller database with args: {:?}",
|
||||
initdb_args
|
||||
);
|
||||
|
||||
// Initialize empty database
|
||||
let initdb_path = pg_bin_dir.join("initdb");
|
||||
let mut child = Command::new(&initdb_path)
|
||||
.envs(vec![
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
])
|
||||
.args(initdb_args)
|
||||
.spawn()
|
||||
.expect("Failed to spawn initdb");
|
||||
let status = child.wait().await?;
|
||||
if !status.success() {
|
||||
anyhow::bail!("initdb failed with status {status}");
|
||||
}
|
||||
};
|
||||
|
||||
// Write a minimal config file:
|
||||
// - Specify the port, since this is chosen dynamically
|
||||
// - Switch off fsync, since we're running on lightweight test environments and when e.g. scale testing
|
||||
// the storage controller we don't want a slow local disk to interfere with that.
|
||||
//
|
||||
// NB: it's important that we rewrite this file on each start command so we propagate changes
|
||||
// from `LocalEnv`'s config file (`.neon/config`).
|
||||
tokio::fs::write(
|
||||
&pg_data_path.join("postgresql.conf"),
|
||||
format!("port = {}\nfsync=off\n", postgres_port),
|
||||
format!("port = {}", self.postgres_port),
|
||||
)
|
||||
.await?;
|
||||
|
||||
println!("Starting storage controller database...");
|
||||
let db_start_args = [
|
||||
"-w",
|
||||
"-D",
|
||||
pg_data_path.as_ref(),
|
||||
"-l",
|
||||
pg_log_path.as_ref(),
|
||||
"-U",
|
||||
&username(),
|
||||
"start",
|
||||
];
|
||||
tracing::info!(
|
||||
"Starting storage controller database with args: {:?}",
|
||||
db_start_args
|
||||
);
|
||||
|
||||
background_process::start_process(
|
||||
"storage_controller_db",
|
||||
&self.env.base_data_dir,
|
||||
pg_bin_dir.join("pg_ctl").as_std_path(),
|
||||
db_start_args,
|
||||
vec![
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
],
|
||||
background_process::InitialPidFile::Create(self.postgres_pid_file()),
|
||||
&start_args.start_timeout,
|
||||
|| self.pg_isready(&pg_bin_dir, postgres_port),
|
||||
)
|
||||
.await?;
|
||||
|
||||
self.setup_database(postgres_port).await?;
|
||||
}
|
||||
|
||||
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", postgres_port);
|
||||
|
||||
// We support running a startup SQL script to fiddle with the database before we launch storcon.
|
||||
// This is used by the test suite.
|
||||
let startup_script_path = self
|
||||
.env
|
||||
.base_data_dir
|
||||
.join("storage_controller_db.startup.sql");
|
||||
let startup_script = match tokio::fs::read_to_string(&startup_script_path).await {
|
||||
Ok(script) => {
|
||||
tokio::fs::remove_file(startup_script_path).await?;
|
||||
script
|
||||
}
|
||||
Err(e) => {
|
||||
if e.kind() == std::io::ErrorKind::NotFound {
|
||||
// always run some startup script so that this code path doesn't bit rot
|
||||
"BEGIN; COMMIT;".to_string()
|
||||
} else {
|
||||
anyhow::bail!("Failed to read startup script: {e}")
|
||||
}
|
||||
}
|
||||
};
|
||||
let (mut client, conn) = self.connect_to_database(postgres_port).await?;
|
||||
let conn = tokio::spawn(conn);
|
||||
let tx = client.build_transaction();
|
||||
let tx = tx.start().await?;
|
||||
tx.batch_execute(&startup_script).await?;
|
||||
tx.commit().await?;
|
||||
drop(client);
|
||||
conn.await??;
|
||||
|
||||
let listen = self
|
||||
.listen
|
||||
.get()
|
||||
.expect("cell is set earlier in this function");
|
||||
let address_for_peers = Uri::builder()
|
||||
.scheme("http")
|
||||
.authority(format!("{}:{}", listen.ip(), listen.port()))
|
||||
.path_and_query("")
|
||||
.build()
|
||||
.unwrap();
|
||||
println!("Starting storage controller database...");
|
||||
let db_start_args = [
|
||||
"-w",
|
||||
"-D",
|
||||
pg_data_path.as_ref(),
|
||||
"-l",
|
||||
pg_log_path.as_ref(),
|
||||
"start",
|
||||
];
|
||||
|
||||
background_process::start_process(
|
||||
"storage_controller_db",
|
||||
&self.env.base_data_dir,
|
||||
pg_bin_dir.join("pg_ctl").as_std_path(),
|
||||
db_start_args,
|
||||
[],
|
||||
background_process::InitialPidFile::Create(self.postgres_pid_file()),
|
||||
|| self.pg_isready(&pg_bin_dir),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Run migrations on every startup, in case something changed.
|
||||
let database_url = self.setup_database().await?;
|
||||
|
||||
let max_unavailable: humantime::Duration = NEON_LOCAL_MAX_UNAVAILABLE_INTERVAL.into();
|
||||
|
||||
let mut args = vec![
|
||||
"-l",
|
||||
&listen.to_string(),
|
||||
&self.listen,
|
||||
"-p",
|
||||
self.path.as_ref(),
|
||||
"--dev",
|
||||
"--database-url",
|
||||
&database_url,
|
||||
"--max-offline-interval",
|
||||
&humantime::Duration::from(self.config.max_offline).to_string(),
|
||||
"--max-warming-up-interval",
|
||||
&humantime::Duration::from(self.config.max_warming_up).to_string(),
|
||||
"--heartbeat-interval",
|
||||
&humantime::Duration::from(self.config.heartbeat_interval).to_string(),
|
||||
"--address-for-peers",
|
||||
&address_for_peers.to_string(),
|
||||
"--max-unavailable-interval",
|
||||
&max_unavailable.to_string(),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if self.config.start_as_candidate {
|
||||
args.push("--start-as-candidate".to_string());
|
||||
}
|
||||
|
||||
if let Some(private_key) = &self.private_key {
|
||||
let claims = Claims::new(None, Scope::PageServerApi);
|
||||
let jwt_token =
|
||||
encode_from_key_file(&claims, private_key).expect("failed to generate jwt token");
|
||||
args.push(format!("--jwt-token={jwt_token}"));
|
||||
|
||||
let peer_claims = Claims::new(None, Scope::Admin);
|
||||
let peer_jwt_token = encode_from_key_file(&peer_claims, private_key)
|
||||
.expect("failed to generate jwt token");
|
||||
args.push(format!("--peer-jwt-token={peer_jwt_token}"));
|
||||
}
|
||||
|
||||
if let Some(public_key) = &self.public_key {
|
||||
@@ -509,37 +305,16 @@ impl StorageController {
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(split_threshold) = self.config.split_threshold.as_ref() {
|
||||
args.push(format!("--split-threshold={split_threshold}"))
|
||||
}
|
||||
|
||||
if let Some(lag) = self.config.max_secondary_lag_bytes.as_ref() {
|
||||
args.push(format!("--max-secondary-lag-bytes={lag}"))
|
||||
}
|
||||
|
||||
if let Some(threshold) = self.config.long_reconcile_threshold {
|
||||
args.push(format!(
|
||||
"--long-reconcile-threshold={}",
|
||||
humantime::Duration::from(threshold)
|
||||
))
|
||||
}
|
||||
|
||||
args.push(format!(
|
||||
"--neon-local-repo-dir={}",
|
||||
self.env.base_data_dir.display()
|
||||
));
|
||||
|
||||
background_process::start_process(
|
||||
COMMAND,
|
||||
&instance_dir,
|
||||
&self.env.base_data_dir,
|
||||
&self.env.storage_controller_bin(),
|
||||
args,
|
||||
vec![
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
],
|
||||
background_process::InitialPidFile::Create(self.pid_file(start_args.instance_id)),
|
||||
&start_args.start_timeout,
|
||||
[(
|
||||
"NEON_REPO_DIR".to_string(),
|
||||
self.env.base_data_dir.to_string_lossy().to_string(),
|
||||
)],
|
||||
background_process::InitialPidFile::Create(self.pid_file()),
|
||||
|| async {
|
||||
match self.ready().await {
|
||||
Ok(_) => Ok(true),
|
||||
@@ -552,35 +327,8 @@ impl StorageController {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn stop(&self, stop_args: NeonStorageControllerStopArgs) -> anyhow::Result<()> {
|
||||
background_process::stop_process(
|
||||
stop_args.immediate,
|
||||
COMMAND,
|
||||
&self.pid_file(stop_args.instance_id),
|
||||
)?;
|
||||
|
||||
let storcon_instances = self.env.storage_controller_instances().await?;
|
||||
for (instance_id, instanced_dir_path) in storcon_instances {
|
||||
if instance_id == stop_args.instance_id {
|
||||
continue;
|
||||
}
|
||||
|
||||
let pid_file = instanced_dir_path.join("storage_controller.pid");
|
||||
let pid = tokio::fs::read_to_string(&pid_file)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
anyhow::anyhow!("Failed to read storcon pid file at {pid_file:?}: {err}")
|
||||
})?
|
||||
.parse::<i32>()
|
||||
.expect("pid is valid i32");
|
||||
|
||||
let other_proc_alive = !background_process::process_has_stopped(Pid::from_raw(pid))?;
|
||||
if other_proc_alive {
|
||||
// There is another storage controller instance running, so we return
|
||||
// and leave the database running.
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
pub async fn stop(&self, immediate: bool) -> anyhow::Result<()> {
|
||||
background_process::stop_process(immediate, COMMAND, &self.pid_file())?;
|
||||
|
||||
let pg_data_path = self.env.base_data_dir.join("storage_controller_db");
|
||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||
@@ -593,51 +341,27 @@ impl StorageController {
|
||||
.wait()
|
||||
.await?;
|
||||
if !stop_status.success() {
|
||||
match self.is_postgres_running().await {
|
||||
Ok(false) => {
|
||||
println!("Storage controller database is already stopped");
|
||||
return Ok(());
|
||||
}
|
||||
Ok(true) => {
|
||||
anyhow::bail!("Failed to stop storage controller database");
|
||||
}
|
||||
Err(err) => {
|
||||
anyhow::bail!("Failed to stop storage controller database: {err}");
|
||||
}
|
||||
let pg_status_args = ["-D", &pg_data_path.to_string_lossy(), "status"];
|
||||
let status_exitcode = Command::new(pg_bin_dir.join("pg_ctl"))
|
||||
.args(pg_status_args)
|
||||
.spawn()?
|
||||
.wait()
|
||||
.await?;
|
||||
|
||||
// pg_ctl status returns this exit code if postgres is not running: in this case it is
|
||||
// fine that stop failed. Otherwise it is an error that stop failed.
|
||||
const PG_STATUS_NOT_RUNNING: i32 = 3;
|
||||
if Some(PG_STATUS_NOT_RUNNING) == status_exitcode.code() {
|
||||
println!("Storage controller database is already stopped");
|
||||
return Ok(());
|
||||
} else {
|
||||
anyhow::bail!("Failed to stop storage controller database: {stop_status}")
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn is_postgres_running(&self) -> anyhow::Result<bool> {
|
||||
let pg_data_path = self.env.base_data_dir.join("storage_controller_db");
|
||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||
|
||||
let pg_status_args = ["-D", &pg_data_path.to_string_lossy(), "status"];
|
||||
let status_exitcode = Command::new(pg_bin_dir.join("pg_ctl"))
|
||||
.args(pg_status_args)
|
||||
.spawn()?
|
||||
.wait()
|
||||
.await?;
|
||||
|
||||
// pg_ctl status returns this exit code if postgres is not running: in this case it is
|
||||
// fine that stop failed. Otherwise it is an error that stop failed.
|
||||
const PG_STATUS_NOT_RUNNING: i32 = 3;
|
||||
const PG_NO_DATA_DIR: i32 = 4;
|
||||
const PG_STATUS_RUNNING: i32 = 0;
|
||||
match status_exitcode.code() {
|
||||
Some(PG_STATUS_NOT_RUNNING) => Ok(false),
|
||||
Some(PG_NO_DATA_DIR) => Ok(false),
|
||||
Some(PG_STATUS_RUNNING) => Ok(true),
|
||||
Some(code) => Err(anyhow::anyhow!(
|
||||
"pg_ctl status returned unexpected status code: {:?}",
|
||||
code
|
||||
)),
|
||||
None => Err(anyhow::anyhow!("pg_ctl status returned no status code")),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_claims_for_path(path: &str) -> anyhow::Result<Option<Claims>> {
|
||||
let category = match path.find('/') {
|
||||
Some(idx) => &path[..idx],
|
||||
@@ -655,7 +379,7 @@ impl StorageController {
|
||||
/// Simple HTTP request wrapper for calling into storage controller
|
||||
async fn dispatch<RQ, RS>(
|
||||
&self,
|
||||
method: reqwest::Method,
|
||||
method: hyper::Method,
|
||||
path: String,
|
||||
body: Option<RQ>,
|
||||
) -> anyhow::Result<RS>
|
||||
@@ -663,31 +387,15 @@ impl StorageController {
|
||||
RQ: Serialize + Sized,
|
||||
RS: DeserializeOwned + Sized,
|
||||
{
|
||||
// In the special case of the `storage_controller start` subcommand, we wish
|
||||
// to use the API endpoint of the newly started storage controller in order
|
||||
// to pass the readiness check. In this scenario [`Self::listen`] will be set
|
||||
// (see [`Self::start`]).
|
||||
//
|
||||
// Otherwise, we infer the storage controller api endpoint from the configured
|
||||
// control plane API.
|
||||
let url = if let Some(socket_addr) = self.listen.get() {
|
||||
Url::from_str(&format!(
|
||||
"http://{}:{}/{path}",
|
||||
socket_addr.ip().to_canonical(),
|
||||
socket_addr.port()
|
||||
))
|
||||
.unwrap()
|
||||
} else {
|
||||
// The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
|
||||
// for general purpose API access.
|
||||
let listen_url = self.env.control_plane_api.clone().unwrap();
|
||||
Url::from_str(&format!(
|
||||
"http://{}:{}/{path}",
|
||||
listen_url.host_str().unwrap(),
|
||||
listen_url.port().unwrap()
|
||||
))
|
||||
.unwrap()
|
||||
};
|
||||
// The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
|
||||
// for general purpose API access.
|
||||
let listen_url = self.env.control_plane_api.clone().unwrap();
|
||||
let url = Url::from_str(&format!(
|
||||
"http://{}:{}/{path}",
|
||||
listen_url.host_str().unwrap(),
|
||||
listen_url.port().unwrap()
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let mut builder = self.client.request(method, url);
|
||||
if let Some(body) = body {
|
||||
@@ -724,7 +432,6 @@ impl StorageController {
|
||||
let request = AttachHookRequest {
|
||||
tenant_shard_id,
|
||||
node_id: Some(pageserver_id),
|
||||
generation_override: None,
|
||||
};
|
||||
|
||||
let response = self
|
||||
@@ -836,15 +543,6 @@ impl StorageController {
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn node_list(&self) -> anyhow::Result<Vec<NodeDescribeResponse>> {
|
||||
self.dispatch::<(), Vec<NodeDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/node".to_string(),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub async fn ready(&self) -> anyhow::Result<()> {
|
||||
self.dispatch::<(), ()>(Method::GET, "ready".to_string(), None)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user