mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-20 14:40:37 +00:00
Compare commits
43 Commits
luist18/po
...
thesuhas/r
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
33a55ba8f8 | ||
|
|
90cf481ad8 | ||
|
|
9a9e479b32 | ||
|
|
db30e1669c | ||
|
|
fdf04d4d81 | ||
|
|
136cae76c2 | ||
|
|
15e63afe7d | ||
|
|
966abd3bd6 | ||
|
|
8566cad23b | ||
|
|
228bb75354 | ||
|
|
a5b00b87ba | ||
|
|
a674ed8caf | ||
|
|
53d50c7ea5 | ||
|
|
3168bd0e3a | ||
|
|
4a97cd0b7e | ||
|
|
b7c6738524 | ||
|
|
7fe5a689b4 | ||
|
|
b0922967e0 | ||
|
|
f68be2b5e2 | ||
|
|
04370b48b3 | ||
|
|
5359cf717c | ||
|
|
d6d78a050f | ||
|
|
4ff000c042 | ||
|
|
9a3020d2ce | ||
|
|
23b713900e | ||
|
|
b1a1be6a4c | ||
|
|
8afae9d03c | ||
|
|
066b0a1be9 | ||
|
|
398d2794eb | ||
|
|
3c3b9dc919 | ||
|
|
ed31dd2a3c | ||
|
|
3dec117572 | ||
|
|
b2286f5bcb | ||
|
|
c036fec065 | ||
|
|
8af19c6a13 | ||
|
|
2e7c56182f | ||
|
|
370dfee4c8 | ||
|
|
dc75717bc0 | ||
|
|
b1ef701a06 | ||
|
|
3a86c48367 | ||
|
|
418305d250 | ||
|
|
9ba7421ec5 | ||
|
|
f2d94e3cf3 |
2
.github/workflows/_meta.yml
vendored
2
.github/workflows/_meta.yml
vendored
@@ -125,5 +125,5 @@ jobs:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
run: |
|
||||
RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy)|(compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Faied to find Build and Test run from RC PR!" | halt_error(1))')
|
||||
RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy|compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Failed to find Build and Test run from RC PR!" | halt_error(1))')
|
||||
echo "release-pr-run-id=$RELEASE_PR_RUN_ID" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
49
.github/workflows/build_and_test.yml
vendored
49
.github/workflows/build_and_test.yml
vendored
@@ -978,16 +978,55 @@ jobs:
|
||||
acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
|
||||
secrets: inherit
|
||||
|
||||
# This is a bit of a special case so we're not using a generated image map.
|
||||
add-latest-tag-to-neon-extensions-test-image:
|
||||
if: github.ref_name == 'main'
|
||||
push-neon-test-extensions-image-ghcr:
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
needs: [ meta, compute-node-image ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}"
|
||||
],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}"
|
||||
]
|
||||
}
|
||||
secrets: inherit
|
||||
|
||||
add-latest-tag-to-neon-test-extensions-image:
|
||||
if: ${{ needs.meta.outputs.run-kind == 'push-main' }}
|
||||
needs: [ meta, compute-node-image ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:latest",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:latest"
|
||||
],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:latest",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:latest"
|
||||
]
|
||||
}
|
||||
secrets: inherit
|
||||
|
||||
add-release-tag-to-neon-test-extensions-image:
|
||||
if: ${{ needs.meta.outputs.run-kind == 'compute-release' }}
|
||||
needs: [ meta, compute-node-image ]
|
||||
uses: ./.github/workflows/_push-to-container-registry.yml
|
||||
with:
|
||||
image-map: |
|
||||
{
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.release-pr-run-id }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}"
|
||||
],
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.release-pr-run-id }}": [
|
||||
"docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}",
|
||||
"ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}"
|
||||
]
|
||||
}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
89
.github/workflows/large_oltp_benchmark.yml
vendored
89
.github/workflows/large_oltp_benchmark.yml
vendored
@@ -2,8 +2,8 @@ name: large oltp benchmark
|
||||
|
||||
on:
|
||||
# uncomment to run on push for debugging your PR
|
||||
push:
|
||||
branches: [ bodobolero/synthetic_oltp_workload ]
|
||||
#push:
|
||||
# branches: [ bodobolero/synthetic_oltp_workload ]
|
||||
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
@@ -12,7 +12,7 @@ on:
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 15 * * *' # run once a day, timezone is utc, avoid conflict with other benchmarks
|
||||
- cron: '0 15 * * 0,2,4' # run on Sunday, Tuesday, Thursday at 3 PM UTC
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
|
||||
defaults:
|
||||
@@ -22,7 +22,7 @@ defaults:
|
||||
concurrency:
|
||||
# Allow only one workflow globally because we need dedicated resources which only exist once
|
||||
group: large-oltp-bench-workflow
|
||||
cancel-in-progress: true
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
oltp:
|
||||
@@ -31,9 +31,9 @@ jobs:
|
||||
matrix:
|
||||
include:
|
||||
- target: new_branch
|
||||
custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
- target: reuse_branch
|
||||
custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -46,7 +46,6 @@ jobs:
|
||||
PG_VERSION: 16 # pre-determined by pre-determined project
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.ref_name == 'main' }}
|
||||
PLATFORM: ${{ matrix.target }}
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
@@ -57,8 +56,10 @@ jobs:
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
# Increase timeout to 8h, default timeout is 6h
|
||||
timeout-minutes: 480
|
||||
# Increase timeout to 2 days, default timeout is 6h - database maintenance can take a long time
|
||||
# (normally 1h pgbench, 3h vacuum analyze 3.5h re-index) x 2 = 15h, leave some buffer for regressions
|
||||
# in one run vacuum didn't finish within 12 hours
|
||||
timeout-minutes: 2880
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -89,29 +90,45 @@ jobs:
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
case "${{ matrix.target }}" in
|
||||
new_branch)
|
||||
CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
|
||||
;;
|
||||
reuse_branch)
|
||||
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown target=${{ matrix.target }}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
case "${{ matrix.target }}" in
|
||||
new_branch)
|
||||
CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
|
||||
;;
|
||||
reuse_branch)
|
||||
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown target=${{ matrix.target }}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"
|
||||
|
||||
- name: Benchmark pgbench with custom-scripts
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Delete rows from prior runs in reuse branch
|
||||
if: ${{ matrix.target == 'reuse_branch' }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
|
||||
PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config
|
||||
PSQL: /tmp/neon/pg_install/v16/bin/psql
|
||||
PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib
|
||||
run: |
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') - Deleting rows in table webhook.incoming_webhooks from prior runs"
|
||||
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
||||
${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';"
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs"
|
||||
|
||||
- name: Benchmark pgbench with custom-scripts
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_perf_oltp_large_tenant
|
||||
save_perf_report: true
|
||||
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
@@ -119,6 +136,21 @@ jobs:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Benchmark database maintenance
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: true
|
||||
extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Delete Neon Branch for large tenant
|
||||
if: ${{ always() && matrix.target == 'new_branch' }}
|
||||
uses: ./.github/actions/neon-branch-delete
|
||||
@@ -127,6 +159,13 @@ jobs:
|
||||
branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Configure AWS credentials # again because prior steps could have exceeded 5 hours
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
|
||||
6
.github/workflows/pre-merge-checks.yml
vendored
6
.github/workflows/pre-merge-checks.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
|
||||
- uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
|
||||
id: python-src
|
||||
with:
|
||||
files: |
|
||||
@@ -38,7 +38,7 @@ jobs:
|
||||
poetry.lock
|
||||
pyproject.toml
|
||||
|
||||
- uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4
|
||||
- uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
|
||||
id: rust-src
|
||||
with:
|
||||
files: |
|
||||
@@ -148,7 +148,7 @@ jobs:
|
||||
${{
|
||||
always()
|
||||
&& github.event_name == 'merge_group'
|
||||
&& contains(fromJson('["release", "release-proxy", "release-compute"]'), github.base_ref)
|
||||
&& contains(fromJson('["release", "release-proxy", "release-compute"]'), needs.meta.outputs.branch)
|
||||
}}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
81
Cargo.lock
generated
81
Cargo.lock
generated
@@ -1309,6 +1309,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
"indexmap 2.0.1",
|
||||
"jsonwebtoken",
|
||||
"regex",
|
||||
"remote_storage",
|
||||
@@ -1339,6 +1340,7 @@ dependencies = [
|
||||
"flate2",
|
||||
"futures",
|
||||
"http 1.1.0",
|
||||
"indexmap 2.0.1",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"nix 0.27.1",
|
||||
@@ -1347,17 +1349,20 @@ dependencies = [
|
||||
"once_cell",
|
||||
"opentelemetry",
|
||||
"opentelemetry_sdk",
|
||||
"p256 0.13.2",
|
||||
"postgres",
|
||||
"postgres_initdb",
|
||||
"regex",
|
||||
"remote_storage",
|
||||
"reqwest",
|
||||
"ring",
|
||||
"rlimit",
|
||||
"rust-ini",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"signal-hook",
|
||||
"spki 0.7.3",
|
||||
"tar",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
@@ -1377,6 +1382,7 @@ dependencies = [
|
||||
"vm_monitor",
|
||||
"walkdir",
|
||||
"workspace_hack",
|
||||
"x509-cert",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
@@ -1801,6 +1807,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fffa369a668c8af7dbf8b5e56c9f744fbd399949ed171606040001947de40b1c"
|
||||
dependencies = [
|
||||
"const-oid",
|
||||
"der_derive",
|
||||
"flagset",
|
||||
"pem-rfc7468",
|
||||
"zeroize",
|
||||
]
|
||||
@@ -1819,6 +1827,17 @@ dependencies = [
|
||||
"rusticata-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "der_derive"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8034092389675178f570469e6c3b0465d3d30b4505c294a6550db47f3c17ad18"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deranged"
|
||||
version = "0.3.11"
|
||||
@@ -2282,6 +2301,12 @@ version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
|
||||
|
||||
[[package]]
|
||||
name = "flagset"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.26"
|
||||
@@ -2838,6 +2863,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"camino",
|
||||
"fail",
|
||||
"futures",
|
||||
"hyper 0.14.30",
|
||||
@@ -2848,6 +2874,7 @@ dependencies = [
|
||||
"pprof",
|
||||
"regex",
|
||||
"routerify",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_path_to_error",
|
||||
@@ -2877,9 +2904,9 @@ checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "2.1.0"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||
checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
|
||||
|
||||
[[package]]
|
||||
name = "humantime-serde"
|
||||
@@ -4302,8 +4329,6 @@ dependencies = [
|
||||
"reqwest",
|
||||
"rpds",
|
||||
"rustls 0.23.18",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"rustls-pki-types",
|
||||
"scopeguard",
|
||||
"send-future",
|
||||
"serde",
|
||||
@@ -6019,6 +6044,7 @@ dependencies = [
|
||||
"regex",
|
||||
"remote_storage",
|
||||
"reqwest",
|
||||
"rustls 0.23.18",
|
||||
"safekeeper_api",
|
||||
"safekeeper_client",
|
||||
"scopeguard",
|
||||
@@ -6035,6 +6061,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-io-timeout",
|
||||
"tokio-postgres",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
@@ -6425,9 +6452,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.5"
|
||||
version = "0.10.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3"
|
||||
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
@@ -7135,6 +7162,27 @@ version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||
|
||||
[[package]]
|
||||
name = "tls_codec"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0de2e01245e2bb89d6f05801c564fa27624dbd7b1846859876c7dad82e90bf6b"
|
||||
dependencies = [
|
||||
"tls_codec_derive",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tls_codec_derive"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d2e76690929402faae40aebdda620a2c0e25dd6d3b9afe48867dfd95991f4bd"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.43.0"
|
||||
@@ -8387,12 +8435,15 @@ dependencies = [
|
||||
"chrono",
|
||||
"clap",
|
||||
"clap_builder",
|
||||
"const-oid",
|
||||
"crypto-bigint 0.5.5",
|
||||
"der 0.7.8",
|
||||
"deranged",
|
||||
"digest",
|
||||
"displaydoc",
|
||||
"ecdsa 0.16.9",
|
||||
"either",
|
||||
"elliptic-curve 0.13.8",
|
||||
"env_filter",
|
||||
"env_logger",
|
||||
"fail",
|
||||
@@ -8427,6 +8478,7 @@ dependencies = [
|
||||
"num-rational",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"p256 0.13.2",
|
||||
"parquet",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
@@ -8439,6 +8491,7 @@ dependencies = [
|
||||
"reqwest",
|
||||
"rustls 0.23.18",
|
||||
"scopeguard",
|
||||
"sec1 0.7.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
@@ -8484,6 +8537,18 @@ version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
|
||||
|
||||
[[package]]
|
||||
name = "x509-cert"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1301e935010a701ae5f8655edc0ad17c44bad3ac5ce8c39185f75453b720ae94"
|
||||
dependencies = [
|
||||
"const-oid",
|
||||
"der 0.7.8",
|
||||
"spki 0.7.3",
|
||||
"tls_codec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "x509-certificate"
|
||||
version = "0.23.1"
|
||||
@@ -8612,9 +8677,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zeroize"
|
||||
version = "1.7.0"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"
|
||||
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"zeroize_derive",
|
||||
|
||||
@@ -106,13 +106,13 @@ hostname = "0.4"
|
||||
http = {version = "1.1.0", features = ["std"]}
|
||||
http-types = { version = "2", default-features = false }
|
||||
http-body-util = "0.1.2"
|
||||
humantime = "2.1"
|
||||
humantime = "2.2"
|
||||
humantime-serde = "1.1.1"
|
||||
hyper0 = { package = "hyper", version = "0.14" }
|
||||
hyper = "1.4"
|
||||
hyper-util = "0.1"
|
||||
tokio-tungstenite = "0.21.0"
|
||||
indexmap = "2"
|
||||
indexmap = { version = "2", features = ["serde"] }
|
||||
indoc = "2"
|
||||
ipnet = "2.10.0"
|
||||
itertools = "0.10"
|
||||
|
||||
@@ -1055,6 +1055,34 @@ RUN if [ -d pg_embedding-src ]; then \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install; \
|
||||
fi
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg_anon-build"
|
||||
# compile anon extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg_anon-src
|
||||
ARG PG_VERSION
|
||||
|
||||
# This is an experimental extension, never got to real production.
|
||||
# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
|
||||
WORKDIR /ext-src
|
||||
RUN case "${PG_VERSION:?}" in "v17") \
|
||||
echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
|
||||
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C .
|
||||
|
||||
FROM pg-build AS pg_anon-build
|
||||
COPY --from=pg_anon-src /ext-src/ /ext-src/
|
||||
WORKDIR /ext-src
|
||||
RUN if [ -d pg_anon-src ]; then \
|
||||
cd pg_anon-src && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control; \
|
||||
fi
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg build with nonroot user and cargo installed"
|
||||
@@ -1352,27 +1380,6 @@ COPY --from=pg_session_jwt-src /ext-src/ /ext-src/
|
||||
WORKDIR /ext-src/pg_session_jwt-src
|
||||
RUN cargo pgrx install --release
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-anon-pg-build"
|
||||
# compile anon extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM rust-extensions-build-pgrx12 AS pg-anon-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# This is an experimental extension, never got to real production.
|
||||
# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/latest/postgresql_anonymizer-latest.tar.gz -O pg_anon.tar.gz && \
|
||||
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt && \
|
||||
sed -i 's/pgrx = "0.12.9"/pgrx = { version = "=0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) extension PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo "$PG_VERSION" | sed 's/^v//') && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo "$PG_VERSION" | sed 's/^v//') && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "wal2json-build"
|
||||
@@ -1670,6 +1677,7 @@ COPY --from=pg_roaringbitmap-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_semver-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_embedding-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=wal2json-build /usr/local/pgsql /usr/local/pgsql
|
||||
COPY --from=pg_anon-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
@@ -1727,6 +1735,8 @@ RUN set -e \
|
||||
libevent-dev \
|
||||
libtool \
|
||||
pkg-config \
|
||||
libcurl4-openssl-dev \
|
||||
libssl-dev \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
|
||||
@@ -1735,7 +1745,7 @@ RUN set -e \
|
||||
&& git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
|
||||
&& cd pgbouncer \
|
||||
&& ./autogen.sh \
|
||||
&& ./configure --prefix=/usr/local/pgbouncer --without-openssl \
|
||||
&& ./configure --prefix=/usr/local/pgbouncer \
|
||||
&& make -j $(nproc) dist_man_MANS= \
|
||||
&& make install dist_man_MANS=
|
||||
|
||||
|
||||
@@ -1,265 +0,0 @@
|
||||
commit 00aa659afc9c7336ab81036edec3017168aabf40
|
||||
Author: Heikki Linnakangas <heikki@neon.tech>
|
||||
Date: Tue Nov 12 16:59:19 2024 +0200
|
||||
|
||||
Temporarily disable test that depends on timezone
|
||||
|
||||
diff --git a/tests/expected/generalization.out b/tests/expected/generalization.out
|
||||
index 23ef5fa..9e60deb 100644
|
||||
--- a/ext-src/pg_anon-src/tests/expected/generalization.out
|
||||
+++ b/ext-src/pg_anon-src/tests/expected/generalization.out
|
||||
@@ -284,12 +284,9 @@ SELECT anon.generalize_tstzrange('19041107','century');
|
||||
["Tue Jan 01 00:00:00 1901 PST","Mon Jan 01 00:00:00 2001 PST")
|
||||
(1 row)
|
||||
|
||||
-SELECT anon.generalize_tstzrange('19041107','millennium');
|
||||
- generalize_tstzrange
|
||||
------------------------------------------------------------------
|
||||
- ["Thu Jan 01 00:00:00 1001 PST","Mon Jan 01 00:00:00 2001 PST")
|
||||
-(1 row)
|
||||
-
|
||||
+-- temporarily disabled, see:
|
||||
+-- https://gitlab.com/dalibo/postgresql_anonymizer/-/commit/199f0a392b37c59d92ae441fb8f037e094a11a52#note_2148017485
|
||||
+--SELECT anon.generalize_tstzrange('19041107','millennium');
|
||||
-- generalize_daterange
|
||||
SELECT anon.generalize_daterange('19041107');
|
||||
generalize_daterange
|
||||
diff --git a/tests/sql/generalization.sql b/tests/sql/generalization.sql
|
||||
index b868344..b4fc977 100644
|
||||
--- a/ext-src/pg_anon-src/tests/sql/generalization.sql
|
||||
+++ b/ext-src/pg_anon-src/tests/sql/generalization.sql
|
||||
@@ -61,7 +61,9 @@ SELECT anon.generalize_tstzrange('19041107','month');
|
||||
SELECT anon.generalize_tstzrange('19041107','year');
|
||||
SELECT anon.generalize_tstzrange('19041107','decade');
|
||||
SELECT anon.generalize_tstzrange('19041107','century');
|
||||
-SELECT anon.generalize_tstzrange('19041107','millennium');
|
||||
+-- temporarily disabled, see:
|
||||
+-- https://gitlab.com/dalibo/postgresql_anonymizer/-/commit/199f0a392b37c59d92ae441fb8f037e094a11a52#note_2148017485
|
||||
+--SELECT anon.generalize_tstzrange('19041107','millennium');
|
||||
|
||||
-- generalize_daterange
|
||||
SELECT anon.generalize_daterange('19041107');
|
||||
|
||||
commit 7dd414ee75f2875cffb1d6ba474df1f135a6fc6f
|
||||
Author: Alexey Masterov <alexeymasterov@neon.tech>
|
||||
Date: Fri May 31 06:34:26 2024 +0000
|
||||
|
||||
These alternative expected files were added to consider the neon features
|
||||
|
||||
diff --git a/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
|
||||
new file mode 100644
|
||||
index 0000000..2539cfd
|
||||
--- /dev/null
|
||||
+++ b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
|
||||
@@ -0,0 +1,101 @@
|
||||
+BEGIN;
|
||||
+CREATE EXTENSION anon CASCADE;
|
||||
+NOTICE: installing required extension "pgcrypto"
|
||||
+SELECT anon.init();
|
||||
+ init
|
||||
+------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+CREATE ROLE mallory_the_masked_user;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
|
||||
+CREATE TABLE t1(i INT);
|
||||
+ALTER TABLE t1 ADD COLUMN t TEXT;
|
||||
+SECURITY LABEL FOR anon ON COLUMN t1.t
|
||||
+IS 'MASKED WITH VALUE NULL';
|
||||
+INSERT INTO t1 VALUES (1,'test');
|
||||
+--
|
||||
+-- We're checking the owner's permissions
|
||||
+--
|
||||
+-- see
|
||||
+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
|
||||
+--
|
||||
+SET ROLE mallory_the_masked_user;
|
||||
+SELECT anon.pseudo_first_name(0) IS NOT NULL;
|
||||
+ ?column?
|
||||
+----------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ PERFORM anon.init();
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ PERFORM anon.anonymize_table('t1');
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_start_engine;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ERROR: Only supersusers can start the dynamic masking engine.
|
||||
+CONTEXT: PL/pgSQL function anon.start_dynamic_masking(boolean) line 18 at RAISE
|
||||
+ROLLBACK TO fail_start_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ start_dynamic_masking
|
||||
+-----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE mallory_the_masked_user;
|
||||
+SELECT * FROM mask.t1;
|
||||
+ i | t
|
||||
+---+---
|
||||
+ 1 |
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ SELECT * FROM public.t1;
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_stop_engine;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+ERROR: Only supersusers can stop the dynamic masking engine.
|
||||
+CONTEXT: PL/pgSQL function anon.stop_dynamic_masking() line 18 at RAISE
|
||||
+ROLLBACK TO fail_stop_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+NOTICE: The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
|
||||
+ stop_dynamic_masking
|
||||
+----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE mallory_the_masked_user;
|
||||
+SELECT COUNT(*)=1 FROM anon.pg_masking_rules;
|
||||
+ ?column?
|
||||
+----------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_seclabel_on_role;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
|
||||
+ERROR: permission denied
|
||||
+DETAIL: The current user must have the CREATEROLE attribute.
|
||||
+ROLLBACK TO fail_seclabel_on_role;
|
||||
+ROLLBACK;
|
||||
diff --git a/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
|
||||
new file mode 100644
|
||||
index 0000000..8b090fe
|
||||
--- /dev/null
|
||||
+++ b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
|
||||
@@ -0,0 +1,104 @@
|
||||
+BEGIN;
|
||||
+CREATE EXTENSION anon CASCADE;
|
||||
+NOTICE: installing required extension "pgcrypto"
|
||||
+SELECT anon.init();
|
||||
+ init
|
||||
+------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+CREATE ROLE oscar_the_owner;
|
||||
+ALTER DATABASE :DBNAME OWNER TO oscar_the_owner;
|
||||
+CREATE ROLE mallory_the_masked_user;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
|
||||
+--
|
||||
+-- We're checking the owner's permissions
|
||||
+--
|
||||
+-- see
|
||||
+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
|
||||
+--
|
||||
+SET ROLE oscar_the_owner;
|
||||
+SELECT anon.pseudo_first_name(0) IS NOT NULL;
|
||||
+ ?column?
|
||||
+----------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ PERFORM anon.init();
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+CREATE TABLE t1(i INT);
|
||||
+ALTER TABLE t1 ADD COLUMN t TEXT;
|
||||
+SECURITY LABEL FOR anon ON COLUMN t1.t
|
||||
+IS 'MASKED WITH VALUE NULL';
|
||||
+INSERT INTO t1 VALUES (1,'test');
|
||||
+SELECT anon.anonymize_table('t1');
|
||||
+ anonymize_table
|
||||
+-----------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SELECT * FROM t1;
|
||||
+ i | t
|
||||
+---+---
|
||||
+ 1 |
|
||||
+(1 row)
|
||||
+
|
||||
+UPDATE t1 SET t='test' WHERE i=1;
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_start_engine;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ start_dynamic_masking
|
||||
+-----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+ROLLBACK TO fail_start_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ start_dynamic_masking
|
||||
+-----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE oscar_the_owner;
|
||||
+SELECT * FROM t1;
|
||||
+ i | t
|
||||
+---+------
|
||||
+ 1 | test
|
||||
+(1 row)
|
||||
+
|
||||
+--SELECT * FROM mask.t1;
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_stop_engine;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+ERROR: permission denied for schema mask
|
||||
+CONTEXT: SQL statement "DROP VIEW mask.t1;"
|
||||
+PL/pgSQL function anon.mask_drop_view(oid) line 3 at EXECUTE
|
||||
+SQL statement "SELECT anon.mask_drop_view(oid)
|
||||
+ FROM pg_catalog.pg_class
|
||||
+ WHERE relnamespace=quote_ident(pg_catalog.current_setting('anon.sourceschema'))::REGNAMESPACE
|
||||
+ AND relkind IN ('r','p','f')"
|
||||
+PL/pgSQL function anon.stop_dynamic_masking() line 22 at PERFORM
|
||||
+ROLLBACK TO fail_stop_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+NOTICE: The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
|
||||
+ stop_dynamic_masking
|
||||
+----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE oscar_the_owner;
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_seclabel_on_role;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
|
||||
+ERROR: permission denied
|
||||
+DETAIL: The current user must have the CREATEROLE attribute.
|
||||
+ROLLBACK TO fail_seclabel_on_role;
|
||||
+ROLLBACK;
|
||||
@@ -39,6 +39,13 @@ commands:
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
|
||||
# Rsyslog by default creates a unix socket under /dev/log . That's where Postgres sends logs also.
|
||||
# We run syslog with postgres user so it can't create /dev/log. Instead we configure rsyslog to
|
||||
# use a different path for the socket. The symlink actually points to our custom path.
|
||||
- name: rsyslogd-socket-symlink
|
||||
user: root
|
||||
sysvInitAction: sysinit
|
||||
shell: "ln -s /var/db/postgres/rsyslogpipe /dev/log"
|
||||
- name: rsyslogd
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
@@ -77,6 +84,9 @@ files:
|
||||
# compute_ctl will rewrite this file with the actual configuration, if needed.
|
||||
- filename: compute_rsyslog.conf
|
||||
content: |
|
||||
# Syslock.Name specifies a non-default pipe location that is writeable for the postgres user.
|
||||
module(load="imuxsock" SysSock.Name="/var/db/postgres/rsyslogpipe") # provides support for local system logging
|
||||
|
||||
*.* /dev/null
|
||||
$IncludeConfig /etc/rsyslog.d/*.conf
|
||||
build: |
|
||||
@@ -145,7 +155,7 @@ merge: |
|
||||
|
||||
COPY compute_rsyslog.conf /etc/compute_rsyslog.conf
|
||||
RUN chmod 0666 /etc/compute_rsyslog.conf
|
||||
RUN chmod 0666 /var/log/
|
||||
RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog
|
||||
|
||||
|
||||
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
|
||||
|
||||
@@ -39,6 +39,13 @@ commands:
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
|
||||
# Rsyslog by default creates a unix socket under /dev/log . That's where Postgres sends logs also.
|
||||
# We run syslog with postgres user so it can't create /dev/log. Instead we configure rsyslog to
|
||||
# use a different path for the socket. The symlink actually points to our custom path.
|
||||
- name: rsyslogd-socket-symlink
|
||||
user: root
|
||||
sysvInitAction: sysinit
|
||||
shell: "ln -s /var/db/postgres/rsyslogpipe /dev/log"
|
||||
- name: rsyslogd
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
@@ -77,6 +84,9 @@ files:
|
||||
# compute_ctl will rewrite this file with the actual configuration, if needed.
|
||||
- filename: compute_rsyslog.conf
|
||||
content: |
|
||||
# Syslock.Name specifies a non-default pipe location that is writeable for the postgres user.
|
||||
module(load="imuxsock" SysSock.Name="/var/db/postgres/rsyslogpipe") # provides support for local system logging
|
||||
|
||||
*.* /dev/null
|
||||
$IncludeConfig /etc/rsyslog.d/*.conf
|
||||
build: |
|
||||
@@ -140,7 +150,7 @@ merge: |
|
||||
|
||||
COPY compute_rsyslog.conf /etc/compute_rsyslog.conf
|
||||
RUN chmod 0666 /etc/compute_rsyslog.conf
|
||||
RUN chmod 0666 /var/log/
|
||||
RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog
|
||||
|
||||
|
||||
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
|
||||
|
||||
@@ -26,6 +26,7 @@ fail.workspace = true
|
||||
flate2.workspace = true
|
||||
futures.workspace = true
|
||||
http.workspace = true
|
||||
indexmap.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
metrics.workspace = true
|
||||
nix.workspace = true
|
||||
@@ -34,16 +35,19 @@ num_cpus.workspace = true
|
||||
once_cell.workspace = true
|
||||
opentelemetry.workspace = true
|
||||
opentelemetry_sdk.workspace = true
|
||||
p256 = { version = "0.13", features = ["pem"] }
|
||||
postgres.workspace = true
|
||||
regex.workspace = true
|
||||
reqwest = { workspace = true, features = ["json"] }
|
||||
ring = "0.17"
|
||||
serde.workspace = true
|
||||
serde_with.workspace = true
|
||||
serde_json.workspace = true
|
||||
signal-hook.workspace = true
|
||||
spki = { version = "0.7.3", features = ["std"] }
|
||||
tar.workspace = true
|
||||
tower.workspace = true
|
||||
tower-http.workspace = true
|
||||
reqwest = { workspace = true, features = ["json"] }
|
||||
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
tokio-postgres.workspace = true
|
||||
tokio-util.workspace = true
|
||||
@@ -57,6 +61,7 @@ thiserror.workspace = true
|
||||
url.workspace = true
|
||||
uuid.workspace = true
|
||||
walkdir.workspace = true
|
||||
x509-cert = { version = "0.2.5" }
|
||||
|
||||
postgres_initdb.workspace = true
|
||||
compute_api.workspace = true
|
||||
|
||||
@@ -37,10 +37,14 @@ use crate::logger::startup_context_from_env;
|
||||
use crate::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use crate::monitor::launch_monitor;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::rsyslog::configure_audit_rsyslog;
|
||||
use crate::rsyslog::{
|
||||
PostgresLogsRsyslogConfig, configure_audit_rsyslog, configure_postgres_logs_export,
|
||||
launch_pgaudit_gc,
|
||||
};
|
||||
use crate::spec::*;
|
||||
use crate::swap::resize_swap;
|
||||
use crate::sync_sk::{check_if_synced, ping_safekeeper};
|
||||
use crate::tls::watch_cert_for_changes;
|
||||
use crate::{config, extension_server, local_proxy};
|
||||
|
||||
pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0);
|
||||
@@ -112,6 +116,7 @@ pub struct ComputeNode {
|
||||
|
||||
// key: ext_archive_name, value: started download time, download_completed?
|
||||
pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
|
||||
pub compute_ctl_config: ComputeCtlConfig,
|
||||
}
|
||||
|
||||
// store some metrics about download size that might impact startup time
|
||||
@@ -135,8 +140,6 @@ pub struct ComputeState {
|
||||
/// passed by the control plane with a /configure HTTP request.
|
||||
pub pspec: Option<ParsedSpec>,
|
||||
|
||||
pub compute_ctl_config: ComputeCtlConfig,
|
||||
|
||||
/// If the spec is passed by a /configure request, 'startup_span' is the
|
||||
/// /configure request's tracing span. The main thread enters it when it
|
||||
/// processes the compute startup, so that the compute startup is considered
|
||||
@@ -160,7 +163,6 @@ impl ComputeState {
|
||||
last_active: None,
|
||||
error: None,
|
||||
pspec: None,
|
||||
compute_ctl_config: ComputeCtlConfig::default(),
|
||||
startup_span: None,
|
||||
metrics: ComputeMetrics::default(),
|
||||
}
|
||||
@@ -314,7 +316,6 @@ impl ComputeNode {
|
||||
let pspec = ParsedSpec::try_from(cli_spec).map_err(|msg| anyhow::anyhow!(msg))?;
|
||||
new_state.pspec = Some(pspec);
|
||||
}
|
||||
new_state.compute_ctl_config = compute_ctl_config;
|
||||
|
||||
Ok(ComputeNode {
|
||||
params,
|
||||
@@ -323,6 +324,7 @@ impl ComputeNode {
|
||||
state: Mutex::new(new_state),
|
||||
state_changed: Condvar::new(),
|
||||
ext_download_progress: RwLock::new(HashMap::new()),
|
||||
compute_ctl_config,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -345,7 +347,7 @@ impl ComputeNode {
|
||||
// requests while configuration is still in progress.
|
||||
crate::http::server::Server::External {
|
||||
port: this.params.external_http_port,
|
||||
jwks: this.state.lock().unwrap().compute_ctl_config.jwks.clone(),
|
||||
config: this.compute_ctl_config.clone(),
|
||||
compute_id: this.params.compute_id.clone(),
|
||||
}
|
||||
.launch(&this);
|
||||
@@ -524,6 +526,16 @@ impl ComputeNode {
|
||||
// Collect all the tasks that must finish here
|
||||
let mut pre_tasks = tokio::task::JoinSet::new();
|
||||
|
||||
// Make sure TLS certificates are properly loaded and in the right place.
|
||||
if self.compute_ctl_config.tls.is_some() {
|
||||
let this = self.clone();
|
||||
pre_tasks.spawn(async move {
|
||||
this.watch_cert_for_changes().await;
|
||||
|
||||
Ok::<(), anyhow::Error>(())
|
||||
});
|
||||
}
|
||||
|
||||
// If there are any remote extensions in shared_preload_libraries, start downloading them
|
||||
if pspec.spec.remote_extensions.is_some() {
|
||||
let (this, spec) = (self.clone(), pspec.spec.clone());
|
||||
@@ -579,11 +591,13 @@ impl ComputeNode {
|
||||
if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings {
|
||||
info!("tuning pgbouncer");
|
||||
|
||||
let pgbouncer_settings = pgbouncer_settings.clone();
|
||||
let tls_config = self.compute_ctl_config.tls.clone();
|
||||
|
||||
// Spawn a background task to do the tuning,
|
||||
// so that we don't block the main thread that starts Postgres.
|
||||
let pgbouncer_settings = pgbouncer_settings.clone();
|
||||
let _handle = tokio::spawn(async move {
|
||||
let res = tune_pgbouncer(pgbouncer_settings).await;
|
||||
let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;
|
||||
if let Err(err) = res {
|
||||
error!("error while tuning pgbouncer: {err:?}");
|
||||
// Continue with the startup anyway
|
||||
@@ -606,7 +620,7 @@ impl ComputeNode {
|
||||
});
|
||||
}
|
||||
|
||||
// Configure and start rsyslog if necessary
|
||||
// Configure and start rsyslog for HIPAA if necessary
|
||||
if let ComputeAudit::Hipaa = pspec.spec.audit_log_level {
|
||||
let remote_endpoint = std::env::var("AUDIT_LOGGING_ENDPOINT").unwrap_or("".to_string());
|
||||
if remote_endpoint.is_empty() {
|
||||
@@ -614,13 +628,22 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
let log_directory_path = Path::new(&self.params.pgdata).join("log");
|
||||
// TODO: make this more robust
|
||||
// now rsyslog starts once and there is no monitoring or restart if it fails
|
||||
configure_audit_rsyslog(
|
||||
log_directory_path.to_str().unwrap(),
|
||||
"hipaa",
|
||||
&remote_endpoint,
|
||||
)?;
|
||||
let log_directory_path = log_directory_path.to_string_lossy().to_string();
|
||||
configure_audit_rsyslog(log_directory_path.clone(), "hipaa", &remote_endpoint)?;
|
||||
|
||||
// Launch a background task to clean up the audit logs
|
||||
launch_pgaudit_gc(log_directory_path);
|
||||
}
|
||||
|
||||
// Configure and start rsyslog for Postgres logs export
|
||||
if self.has_feature(ComputeFeature::PostgresLogsExport) {
|
||||
if let Some(ref project_id) = pspec.spec.cluster.cluster_id {
|
||||
let host = PostgresLogsRsyslogConfig::default_host(project_id);
|
||||
let conf = PostgresLogsRsyslogConfig::new(Some(&host));
|
||||
configure_postgres_logs_export(conf)?;
|
||||
} else {
|
||||
warn!("not configuring rsyslog for Postgres logs export: project ID is missing")
|
||||
}
|
||||
}
|
||||
|
||||
// Launch remaining service threads
|
||||
@@ -1105,9 +1128,10 @@ impl ComputeNode {
|
||||
// Remove/create an empty pgdata directory and put configuration there.
|
||||
self.create_pgdata()?;
|
||||
config::write_postgres_conf(
|
||||
&pgdata_path.join("postgresql.conf"),
|
||||
pgdata_path,
|
||||
&pspec.spec,
|
||||
self.params.internal_http_port,
|
||||
&self.compute_ctl_config.tls,
|
||||
)?;
|
||||
|
||||
// Syncing safekeepers is only safe with primary nodes: if a primary
|
||||
@@ -1489,11 +1513,13 @@ impl ComputeNode {
|
||||
if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
|
||||
info!("tuning pgbouncer");
|
||||
|
||||
let pgbouncer_settings = pgbouncer_settings.clone();
|
||||
let tls_config = self.compute_ctl_config.tls.clone();
|
||||
|
||||
// Spawn a background task to do the tuning,
|
||||
// so that we don't block the main thread that starts Postgres.
|
||||
let pgbouncer_settings = pgbouncer_settings.clone();
|
||||
tokio::spawn(async move {
|
||||
let res = tune_pgbouncer(pgbouncer_settings).await;
|
||||
let res = tune_pgbouncer(pgbouncer_settings, tls_config).await;
|
||||
if let Err(err) = res {
|
||||
error!("error while tuning pgbouncer: {err:?}");
|
||||
}
|
||||
@@ -1505,7 +1531,8 @@ impl ComputeNode {
|
||||
|
||||
// Spawn a background task to do the configuration,
|
||||
// so that we don't block the main thread that starts Postgres.
|
||||
let local_proxy = local_proxy.clone();
|
||||
let mut local_proxy = local_proxy.clone();
|
||||
local_proxy.tls = self.compute_ctl_config.tls.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(err) = local_proxy::configure(&local_proxy) {
|
||||
error!("error while configuring local_proxy: {err:?}");
|
||||
@@ -1515,8 +1542,12 @@ impl ComputeNode {
|
||||
|
||||
// Write new config
|
||||
let pgdata_path = Path::new(&self.params.pgdata);
|
||||
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
|
||||
config::write_postgres_conf(&postgresql_conf_path, &spec, self.params.internal_http_port)?;
|
||||
config::write_postgres_conf(
|
||||
pgdata_path,
|
||||
&spec,
|
||||
self.params.internal_http_port,
|
||||
&self.compute_ctl_config.tls,
|
||||
)?;
|
||||
|
||||
if !spec.skip_pg_catalog_updates {
|
||||
let max_concurrent_connections = spec.reconfigure_concurrency;
|
||||
@@ -1587,6 +1618,56 @@ impl ComputeNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn watch_cert_for_changes(self: Arc<Self>) {
|
||||
// update status on cert renewal
|
||||
if let Some(tls_config) = &self.compute_ctl_config.tls {
|
||||
let tls_config = tls_config.clone();
|
||||
|
||||
// wait until the cert exists.
|
||||
let mut cert_watch = watch_cert_for_changes(tls_config.cert_path.clone()).await;
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
'cert_update: loop {
|
||||
// let postgres/pgbouncer/local_proxy know the new cert/key exists.
|
||||
// we need to wait until it's configurable first.
|
||||
|
||||
let mut state = self.state.lock().unwrap();
|
||||
'status_update: loop {
|
||||
match state.status {
|
||||
// let's update the state to config pending
|
||||
ComputeStatus::ConfigurationPending | ComputeStatus::Running => {
|
||||
state.set_status(
|
||||
ComputeStatus::ConfigurationPending,
|
||||
&self.state_changed,
|
||||
);
|
||||
break 'status_update;
|
||||
}
|
||||
|
||||
// exit loop
|
||||
ComputeStatus::Failed
|
||||
| ComputeStatus::TerminationPending
|
||||
| ComputeStatus::Terminated => break 'cert_update,
|
||||
|
||||
// wait
|
||||
ComputeStatus::Init
|
||||
| ComputeStatus::Configuration
|
||||
| ComputeStatus::Empty => {
|
||||
state = self.state_changed.wait(state).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
drop(state);
|
||||
|
||||
// wait for a new certificate update
|
||||
if handle.block_on(cert_watch.changed()).is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the `last_active` in the shared state, but ensure that it's a more recent one.
|
||||
pub fn update_last_active(&self, last_active: Option<DateTime<Utc>>) {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
|
||||
@@ -6,11 +6,13 @@ use std::io::Write;
|
||||
use std::io::prelude::*;
|
||||
use std::path::Path;
|
||||
|
||||
use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
|
||||
use compute_api::responses::TlsConfig;
|
||||
use compute_api::spec::{ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, GenericOption};
|
||||
|
||||
use crate::pg_helpers::{
|
||||
GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
|
||||
};
|
||||
use crate::tls::{self, SERVER_CRT, SERVER_KEY};
|
||||
|
||||
/// Check that `line` is inside a text file and put it there if it is not.
|
||||
/// Create file if it doesn't exist.
|
||||
@@ -38,10 +40,12 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
|
||||
|
||||
/// Create or completely rewrite configuration file specified by `path`
|
||||
pub fn write_postgres_conf(
|
||||
path: &Path,
|
||||
pgdata_path: &Path,
|
||||
spec: &ComputeSpec,
|
||||
extension_server_port: u16,
|
||||
tls_config: &Option<TlsConfig>,
|
||||
) -> Result<()> {
|
||||
let path = pgdata_path.join("postgresql.conf");
|
||||
// File::create() destroys the file content if it exists.
|
||||
let mut file = File::create(path)?;
|
||||
|
||||
@@ -86,6 +90,20 @@ pub fn write_postgres_conf(
|
||||
)?;
|
||||
}
|
||||
|
||||
// tls
|
||||
if let Some(tls_config) = tls_config {
|
||||
writeln!(file, "ssl = on")?;
|
||||
|
||||
// postgres requires the keyfile to be in a secure file,
|
||||
// currently too complicated to ensure that at the VM level,
|
||||
// so we just copy them to another file instead. :shrug:
|
||||
tls::update_key_path_blocking(pgdata_path, tls_config);
|
||||
|
||||
// these are the default, but good to be explicit.
|
||||
writeln!(file, "ssl_cert_file = '{}'", SERVER_CRT)?;
|
||||
writeln!(file, "ssl_key_file = '{}'", SERVER_KEY)?;
|
||||
}
|
||||
|
||||
// Locales
|
||||
if cfg!(target_os = "macos") {
|
||||
writeln!(file, "lc_messages='C'")?;
|
||||
@@ -149,7 +167,8 @@ pub fn write_postgres_conf(
|
||||
writeln!(file, "# Managed by compute_ctl audit settings: begin")?;
|
||||
// This log level is very verbose
|
||||
// but this is necessary for HIPAA compliance.
|
||||
writeln!(file, "pgaudit.log='all'")?;
|
||||
// Exclude 'misc' category, because it doesn't contain anythig relevant.
|
||||
writeln!(file, "pgaudit.log='all, -misc'")?;
|
||||
writeln!(file, "pgaudit.log_parameter=on")?;
|
||||
// Disable logging of catalog queries
|
||||
// The catalog doesn't contain sensitive data, so we don't need to audit it.
|
||||
@@ -197,6 +216,12 @@ pub fn write_postgres_conf(
|
||||
writeln!(file, "neon.disable_logical_replication_subscribers=false")?;
|
||||
}
|
||||
|
||||
// We need Postgres to send logs to rsyslog so that we can forward them
|
||||
// further to customers' log aggregation systems.
|
||||
if spec.features.contains(&ComputeFeature::PostgresLogsExport) {
|
||||
writeln!(file, "log_destination='stderr,syslog'")?;
|
||||
}
|
||||
|
||||
// This is essential to keep this line at the end of the file,
|
||||
// because it is intended to override any settings above.
|
||||
writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;
|
||||
|
||||
@@ -4,7 +4,8 @@ module(load="imfile")
|
||||
# Input configuration for log files in the specified directory
|
||||
# Replace {log_directory} with the directory containing the log files
|
||||
input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Facility="local0")
|
||||
global(workDirectory="/var/log")
|
||||
# the directory to store rsyslog state files
|
||||
global(workDirectory="/var/log/rsyslog")
|
||||
|
||||
# Forward logs to remote syslog server
|
||||
*.* @@{remote_endpoint}
|
||||
*.* @@{remote_endpoint}
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
# Program name comes from postgres' syslog_facility configuration: https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-SYSLOG-IDENT
|
||||
# Default value is 'postgres'.
|
||||
if $programname == 'postgres' then {{
|
||||
# Forward Postgres logs to telemetry otel collector
|
||||
action(type="omfwd" target="{logs_export_target}" port="{logs_export_port}" protocol="tcp"
|
||||
template="RSYSLOG_SyslogProtocol23Format"
|
||||
action.resumeRetryCount="3"
|
||||
queue.type="linkedList" queue.size="1000")
|
||||
stop
|
||||
}}
|
||||
@@ -306,6 +306,36 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/GenericError"
|
||||
|
||||
/configure_telemetry:
|
||||
post:
|
||||
tags:
|
||||
- Configure
|
||||
summary: Configure rsyslog
|
||||
description: |
|
||||
This API endpoint configures rsyslog to forward Postgres logs
|
||||
to a specified otel collector.
|
||||
operationId: configureTelemetry
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
logs_export_host:
|
||||
type: string
|
||||
description: |
|
||||
Hostname and the port of the otel collector. Leave empty to disable logs forwarding.
|
||||
Example: config-shy-breeze-123-collector-monitoring.neon-telemetry.svc.cluster.local:54526
|
||||
responses:
|
||||
204:
|
||||
description: "Telemetry configured successfully"
|
||||
500:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/GenericError"
|
||||
|
||||
components:
|
||||
securitySchemes:
|
||||
JWT:
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::body::Body;
|
||||
use axum::extract::State;
|
||||
use axum::response::Response;
|
||||
use compute_api::requests::ConfigurationRequest;
|
||||
use compute_api::requests::{ConfigurationRequest, ConfigureTelemetryRequest};
|
||||
use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
|
||||
use compute_api::spec::ComputeFeature;
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
use tracing::info;
|
||||
@@ -11,6 +13,7 @@ use tracing::info;
|
||||
use crate::compute::{ComputeNode, ParsedSpec};
|
||||
use crate::http::JsonResponse;
|
||||
use crate::http::extract::Json;
|
||||
use crate::rsyslog::{PostgresLogsRsyslogConfig, configure_postgres_logs_export};
|
||||
|
||||
// Accept spec in JSON format and request compute configuration. If anything
|
||||
// goes wrong after we set the compute status to `ConfigurationPending` and
|
||||
@@ -92,3 +95,25 @@ pub(in crate::http) async fn configure(
|
||||
|
||||
JsonResponse::success(StatusCode::OK, body)
|
||||
}
|
||||
|
||||
pub(in crate::http) async fn configure_telemetry(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
request: Json<ConfigureTelemetryRequest>,
|
||||
) -> Response {
|
||||
if !compute.has_feature(ComputeFeature::PostgresLogsExport) {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"Postgres logs export feature is not enabled".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let conf = PostgresLogsRsyslogConfig::new(request.logs_export_host.as_deref());
|
||||
if let Err(err) = configure_postgres_logs_export(conf) {
|
||||
return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, err.to_string());
|
||||
}
|
||||
|
||||
Response::builder()
|
||||
.status(StatusCode::NO_CONTENT)
|
||||
.body(Body::from(""))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
@@ -8,8 +8,8 @@ use axum::Router;
|
||||
use axum::middleware::{self};
|
||||
use axum::response::IntoResponse;
|
||||
use axum::routing::{get, post};
|
||||
use compute_api::responses::ComputeCtlConfig;
|
||||
use http::StatusCode;
|
||||
use jsonwebtoken::jwk::JwkSet;
|
||||
use tokio::net::TcpListener;
|
||||
use tower::ServiceBuilder;
|
||||
use tower_http::{
|
||||
@@ -41,7 +41,7 @@ pub enum Server {
|
||||
},
|
||||
External {
|
||||
port: u16,
|
||||
jwks: JwkSet,
|
||||
config: ComputeCtlConfig,
|
||||
compute_id: String,
|
||||
},
|
||||
}
|
||||
@@ -79,7 +79,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
|
||||
router
|
||||
}
|
||||
Server::External {
|
||||
jwks, compute_id, ..
|
||||
config, compute_id, ..
|
||||
} => {
|
||||
let unauthenticated_router =
|
||||
Router::<Arc<ComputeNode>>::new().route("/metrics", get(metrics::get_metrics));
|
||||
@@ -87,6 +87,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
|
||||
let authenticated_router = Router::<Arc<ComputeNode>>::new()
|
||||
.route("/check_writability", post(check_writability::is_writable))
|
||||
.route("/configure", post(configure::configure))
|
||||
.route("/configure_telemetry", post(configure::configure_telemetry))
|
||||
.route("/database_schema", get(database_schema::get_schema_dump))
|
||||
.route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
|
||||
.route("/insights", get(insights::get_insights))
|
||||
@@ -95,7 +96,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
|
||||
.route("/terminate", post(terminate::terminate))
|
||||
.layer(AsyncRequireAuthorizationLayer::new(Authorize::new(
|
||||
compute_id.clone(),
|
||||
jwks.clone(),
|
||||
config.jwks.clone(),
|
||||
)));
|
||||
|
||||
router
|
||||
|
||||
@@ -26,3 +26,4 @@ pub mod spec;
|
||||
mod spec_apply;
|
||||
pub mod swap;
|
||||
pub mod sync_sk;
|
||||
pub mod tls;
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use metrics::core::Collector;
|
||||
use metrics::core::{AtomicF64, Collector, GenericGauge};
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::{IntCounterVec, UIntGaugeVec, register_int_counter_vec, register_uint_gauge_vec};
|
||||
use metrics::{
|
||||
IntCounterVec, UIntGaugeVec, register_gauge, register_int_counter_vec, register_uint_gauge_vec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
@@ -59,10 +61,20 @@ pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(||
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
// Size of audit log directory in bytes
|
||||
pub(crate) static AUDIT_LOG_DIR_SIZE: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {
|
||||
register_gauge!(
|
||||
"compute_audit_log_dir_size",
|
||||
"Size of audit log directory in bytes",
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub fn collect() -> Vec<MetricFamily> {
|
||||
let mut metrics = INSTALLED_EXTENSIONS.collect();
|
||||
metrics.extend(CPLANE_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(DB_MIGRATION_FAILED.collect());
|
||||
metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
|
||||
metrics
|
||||
}
|
||||
|
||||
@@ -10,8 +10,10 @@ use std::str::FromStr;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::{Result, bail};
|
||||
use compute_api::responses::TlsConfig;
|
||||
use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
|
||||
use futures::StreamExt;
|
||||
use indexmap::IndexMap;
|
||||
use ini::Ini;
|
||||
use notify::{RecursiveMode, Watcher};
|
||||
use postgres::config::Config;
|
||||
@@ -206,8 +208,8 @@ impl Escaping for PgIdent {
|
||||
/// Here we somewhat mimic the logic of Postgres' `pg_get_functiondef()`,
|
||||
/// <https://github.com/postgres/postgres/blob/8b49392b270b4ac0b9f5c210e2a503546841e832/src/backend/utils/adt/ruleutils.c#L2924>
|
||||
fn pg_quote_dollar(&self) -> (String, String) {
|
||||
let mut tag: String = "".to_string();
|
||||
let mut outer_tag = "x".to_string();
|
||||
let mut tag: String = "x".to_string();
|
||||
let mut outer_tag = "xx".to_string();
|
||||
|
||||
// Find the first suitable tag that is not present in the string.
|
||||
// Postgres' max role/DB name length is 63 bytes, so even in the
|
||||
@@ -406,7 +408,7 @@ pub fn create_pgdata(pgdata: &str) -> Result<()> {
|
||||
|
||||
/// Update pgbouncer.ini with provided options
|
||||
fn update_pgbouncer_ini(
|
||||
pgbouncer_config: HashMap<String, String>,
|
||||
pgbouncer_config: IndexMap<String, String>,
|
||||
pgbouncer_ini_path: &str,
|
||||
) -> Result<()> {
|
||||
let mut conf = Ini::load_from_file(pgbouncer_ini_path)?;
|
||||
@@ -427,7 +429,10 @@ fn update_pgbouncer_ini(
|
||||
/// Tune pgbouncer.
|
||||
/// 1. Apply new config using pgbouncer admin console
|
||||
/// 2. Add new values to pgbouncer.ini to preserve them after restart
|
||||
pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result<()> {
|
||||
pub async fn tune_pgbouncer(
|
||||
mut pgbouncer_config: IndexMap<String, String>,
|
||||
tls_config: Option<TlsConfig>,
|
||||
) -> Result<()> {
|
||||
let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() {
|
||||
// for VMs use pgbouncer specific way to connect to
|
||||
// pgbouncer admin console without password
|
||||
@@ -473,19 +478,21 @@ pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result
|
||||
}
|
||||
};
|
||||
|
||||
// Apply new config
|
||||
for (option_name, value) in pgbouncer_config.iter() {
|
||||
let query = format!("SET {}={}", option_name, value);
|
||||
// keep this log line for debugging purposes
|
||||
info!("Applying pgbouncer setting change: {}", query);
|
||||
if let Some(tls_config) = tls_config {
|
||||
// pgbouncer starts in a half-ok state if it cannot find these files.
|
||||
// It will default to client_tls_sslmode=deny, which causes proxy to error.
|
||||
// There is a small window at startup where these files don't yet exist in the VM.
|
||||
// Best to wait until it exists.
|
||||
loop {
|
||||
if let Ok(true) = tokio::fs::try_exists(&tls_config.key_path).await {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(500)).await
|
||||
}
|
||||
|
||||
if let Err(err) = client.simple_query(&query).await {
|
||||
// Don't fail on error, just print it into log
|
||||
error!(
|
||||
"Failed to apply pgbouncer setting change: {}, {}",
|
||||
query, err
|
||||
);
|
||||
};
|
||||
pgbouncer_config.insert("client_tls_cert_file".to_string(), tls_config.cert_path);
|
||||
pgbouncer_config.insert("client_tls_key_file".to_string(), tls_config.key_path);
|
||||
pgbouncer_config.insert("client_tls_sslmode".to_string(), "allow".to_string());
|
||||
}
|
||||
|
||||
// save values to pgbouncer.ini
|
||||
@@ -501,6 +508,13 @@ pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result
|
||||
};
|
||||
update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;
|
||||
|
||||
info!("Applying pgbouncer setting change");
|
||||
|
||||
if let Err(err) = client.simple_query("RELOAD").await {
|
||||
// Don't fail on error, just print it into log
|
||||
error!("Failed to apply pgbouncer setting change, {err}",);
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,14 @@
|
||||
use std::fs;
|
||||
use std::io::ErrorKind;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
use std::{fs::OpenOptions, io::Write};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use tracing::info;
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use tracing::{error, info, instrument, warn};
|
||||
|
||||
const POSTGRES_LOGS_CONF_PATH: &str = "/etc/rsyslog.d/postgres_logs.conf";
|
||||
|
||||
fn get_rsyslog_pid() -> Option<String> {
|
||||
let output = Command::new("pgrep")
|
||||
@@ -43,7 +49,7 @@ fn restart_rsyslog() -> Result<()> {
|
||||
}
|
||||
|
||||
pub fn configure_audit_rsyslog(
|
||||
log_directory: &str,
|
||||
log_directory: String,
|
||||
tag: &str,
|
||||
remote_endpoint: &str,
|
||||
) -> Result<()> {
|
||||
@@ -75,3 +81,196 @@ pub fn configure_audit_rsyslog(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Configuration for enabling Postgres logs forwarding from rsyslogd
|
||||
pub struct PostgresLogsRsyslogConfig<'a> {
|
||||
pub host: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a> PostgresLogsRsyslogConfig<'a> {
|
||||
pub fn new(host: Option<&'a str>) -> Self {
|
||||
Self { host }
|
||||
}
|
||||
|
||||
pub fn build(&self) -> Result<String> {
|
||||
match self.host {
|
||||
Some(host) => {
|
||||
if let Some((target, port)) = host.split_once(":") {
|
||||
Ok(format!(
|
||||
include_str!(
|
||||
"config_template/compute_rsyslog_postgres_export_template.conf"
|
||||
),
|
||||
logs_export_target = target,
|
||||
logs_export_port = port,
|
||||
))
|
||||
} else {
|
||||
Err(anyhow!("Invalid host format for Postgres logs export"))
|
||||
}
|
||||
}
|
||||
None => Ok("".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn current_config() -> Result<String> {
|
||||
let config_content = match std::fs::read_to_string(POSTGRES_LOGS_CONF_PATH) {
|
||||
Ok(c) => c,
|
||||
Err(err) if err.kind() == ErrorKind::NotFound => String::new(),
|
||||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
Ok(config_content)
|
||||
}
|
||||
|
||||
/// Returns the default host for otel collector that receives Postgres logs
|
||||
pub fn default_host(project_id: &str) -> String {
|
||||
format!(
|
||||
"config-{}-collector.neon-telemetry.svc.cluster.local:10514",
|
||||
project_id
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn configure_postgres_logs_export(conf: PostgresLogsRsyslogConfig) -> Result<()> {
|
||||
let new_config = conf.build()?;
|
||||
let current_config = PostgresLogsRsyslogConfig::current_config()?;
|
||||
|
||||
if new_config == current_config {
|
||||
info!("postgres logs rsyslog configuration is up-to-date");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// When new config is empty we can simply remove the configuration file.
|
||||
if new_config.is_empty() {
|
||||
info!("removing rsyslog config file: {}", POSTGRES_LOGS_CONF_PATH);
|
||||
match std::fs::remove_file(POSTGRES_LOGS_CONF_PATH) {
|
||||
Ok(_) => {}
|
||||
Err(err) if err.kind() == ErrorKind::NotFound => {}
|
||||
Err(err) => return Err(err.into()),
|
||||
}
|
||||
restart_rsyslog()?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!(
|
||||
"configuring rsyslog for postgres logs export to: {:?}",
|
||||
conf.host
|
||||
);
|
||||
|
||||
let mut file = OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(POSTGRES_LOGS_CONF_PATH)?;
|
||||
file.write_all(new_config.as_bytes())?;
|
||||
|
||||
info!(
|
||||
"rsyslog configuration file {} added successfully. Starting rsyslogd",
|
||||
POSTGRES_LOGS_CONF_PATH
|
||||
);
|
||||
|
||||
restart_rsyslog()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
async fn pgaudit_gc_main_loop(log_directory: String) -> Result<()> {
|
||||
info!("running pgaudit GC main loop");
|
||||
loop {
|
||||
// Check log_directory for old pgaudit logs and delete them.
|
||||
// New log files are checked every 5 minutes, as set in pgaudit.log_rotation_age
|
||||
// Find files that were not modified in the last 15 minutes and delete them.
|
||||
// This should be enough time for rsyslog to process the logs and for us to catch the alerts.
|
||||
//
|
||||
// In case of a very high load, we might need to adjust this value and pgaudit.log_rotation_age.
|
||||
//
|
||||
// TODO: add some smarter logic to delete the files that are fully streamed according to rsyslog
|
||||
// imfile-state files, but for now just do a simple GC to avoid filling up the disk.
|
||||
let _ = Command::new("find")
|
||||
.arg(&log_directory)
|
||||
.arg("-name")
|
||||
.arg("audit*.log")
|
||||
.arg("-mmin")
|
||||
.arg("+15")
|
||||
.arg("-delete")
|
||||
.output()?;
|
||||
|
||||
// also collect the metric for the size of the log directory
|
||||
async fn get_log_files_size(path: &Path) -> Result<u64> {
|
||||
let mut total_size = 0;
|
||||
|
||||
for entry in fs::read_dir(path)? {
|
||||
let entry = entry?;
|
||||
let entry_path = entry.path();
|
||||
|
||||
if entry_path.is_file() && entry_path.to_string_lossy().ends_with("log") {
|
||||
total_size += entry.metadata()?.len();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(total_size)
|
||||
}
|
||||
|
||||
let log_directory_size = get_log_files_size(Path::new(&log_directory))
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
warn!("Failed to get log directory size: {}", e);
|
||||
0
|
||||
});
|
||||
crate::metrics::AUDIT_LOG_DIR_SIZE.set(log_directory_size as f64);
|
||||
tokio::time::sleep(Duration::from_secs(60)).await;
|
||||
}
|
||||
}
|
||||
|
||||
// launch pgaudit GC thread to clean up the old pgaudit logs stored in the log_directory
|
||||
pub fn launch_pgaudit_gc(log_directory: String) {
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = pgaudit_gc_main_loop(log_directory).await {
|
||||
error!("pgaudit GC main loop failed: {}", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::rsyslog::PostgresLogsRsyslogConfig;
|
||||
|
||||
#[test]
|
||||
fn test_postgres_logs_config() {
|
||||
{
|
||||
// Verify empty config
|
||||
let conf = PostgresLogsRsyslogConfig::new(None);
|
||||
let res = conf.build();
|
||||
assert!(res.is_ok());
|
||||
let conf_str = res.unwrap();
|
||||
assert_eq!(&conf_str, "");
|
||||
}
|
||||
|
||||
{
|
||||
// Verify config
|
||||
let conf = PostgresLogsRsyslogConfig::new(Some("collector.cvc.local:514"));
|
||||
let res = conf.build();
|
||||
assert!(res.is_ok());
|
||||
let conf_str = res.unwrap();
|
||||
assert!(conf_str.contains("omfwd"));
|
||||
assert!(conf_str.contains(r#"target="collector.cvc.local""#));
|
||||
assert!(conf_str.contains(r#"port="514""#));
|
||||
}
|
||||
|
||||
{
|
||||
// Verify invalid config
|
||||
let conf = PostgresLogsRsyslogConfig::new(Some("invalid"));
|
||||
let res = conf.build();
|
||||
assert!(res.is_err());
|
||||
}
|
||||
|
||||
{
|
||||
// Verify config with default host
|
||||
let host = PostgresLogsRsyslogConfig::default_host("shy-breeze-123");
|
||||
let conf = PostgresLogsRsyslogConfig::new(Some(&host));
|
||||
let res = conf.build();
|
||||
assert!(res.is_ok());
|
||||
let conf_str = res.unwrap();
|
||||
assert!(conf_str.contains(r#"shy-breeze-123"#));
|
||||
assert!(conf_str.contains(r#"port="10514""#));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,13 +8,12 @@ use compute_api::responses::{
|
||||
use compute_api::spec::ComputeSpec;
|
||||
use reqwest::StatusCode;
|
||||
use tokio_postgres::Client;
|
||||
use tracing::{error, info, instrument, warn};
|
||||
use tracing::{error, info, instrument};
|
||||
|
||||
use crate::config;
|
||||
use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
|
||||
use crate::migration::MigrationRunner;
|
||||
use crate::params::PG_HBA_ALL_MD5;
|
||||
use crate::pg_helpers::*;
|
||||
|
||||
// Do control plane request and return response if any. In case of error it
|
||||
// returns a bool flag indicating whether it makes sense to retry the request
|
||||
@@ -212,122 +211,3 @@ pub async fn handle_migrations(client: &mut Client) -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Connect to the database as superuser and pre-create anon extension
|
||||
/// if it is present in shared_preload_libraries
|
||||
#[instrument(skip_all)]
|
||||
pub async fn handle_extension_anon(
|
||||
spec: &ComputeSpec,
|
||||
db_owner: &str,
|
||||
db_client: &mut Client,
|
||||
grants_only: bool,
|
||||
) -> Result<()> {
|
||||
info!("handle extension anon");
|
||||
|
||||
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
|
||||
if libs.contains("anon") {
|
||||
if !grants_only {
|
||||
// check if extension is already initialized using anon.is_initialized()
|
||||
let query = "SELECT anon.is_initialized()";
|
||||
match db_client.query(query, &[]).await {
|
||||
Ok(rows) => {
|
||||
if !rows.is_empty() {
|
||||
let is_initialized: bool = rows[0].get(0);
|
||||
if is_initialized {
|
||||
info!("anon extension is already initialized");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"anon extension is_installed check failed with expected error: {}",
|
||||
e
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Create anon extension if this compute needs it
|
||||
// Users cannot create it themselves, because superuser is required.
|
||||
let mut query = "CREATE EXTENSION IF NOT EXISTS anon CASCADE";
|
||||
info!("creating anon extension with query: {}", query);
|
||||
match db_client.query(query, &[]).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
error!("anon extension creation failed with error: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
// check that extension is installed
|
||||
query = "SELECT extname FROM pg_extension WHERE extname = 'anon'";
|
||||
let rows = db_client.query(query, &[]).await?;
|
||||
if rows.is_empty() {
|
||||
error!("anon extension is not installed");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Initialize anon extension
|
||||
// This also requires superuser privileges, so users cannot do it themselves.
|
||||
query = "SELECT anon.init()";
|
||||
match db_client.query(query, &[]).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
error!("anon.init() failed with error: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check that extension is installed, if not bail early
|
||||
let query = "SELECT extname FROM pg_extension WHERE extname = 'anon'";
|
||||
match db_client.query(query, &[]).await {
|
||||
Ok(rows) => {
|
||||
if rows.is_empty() {
|
||||
error!("anon extension is not installed");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("anon extension check failed with error: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
let query = format!("GRANT ALL ON SCHEMA anon TO {}", db_owner);
|
||||
info!("granting anon extension permissions with query: {}", query);
|
||||
db_client.simple_query(&query).await?;
|
||||
|
||||
// Grant permissions to db_owner to use anon extension functions
|
||||
let query = format!("GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}", db_owner);
|
||||
info!("granting anon extension permissions with query: {}", query);
|
||||
db_client.simple_query(&query).await?;
|
||||
|
||||
// This is needed, because some functions are defined as SECURITY DEFINER.
|
||||
// In Postgres SECURITY DEFINER functions are executed with the privileges
|
||||
// of the owner.
|
||||
// In anon extension this it is needed to access some GUCs, which are only accessible to
|
||||
// superuser. But we've patched postgres to allow db_owner to access them as well.
|
||||
// So we need to change owner of these functions to db_owner.
|
||||
let query = format!("
|
||||
SELECT 'ALTER FUNCTION '||nsp.nspname||'.'||p.proname||'('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {};'
|
||||
from pg_proc p
|
||||
join pg_namespace nsp ON p.pronamespace = nsp.oid
|
||||
where nsp.nspname = 'anon';", db_owner);
|
||||
|
||||
info!("change anon extension functions owner to db owner");
|
||||
db_client.simple_query(&query).await?;
|
||||
|
||||
// affects views as well
|
||||
let query = format!("GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}", db_owner);
|
||||
info!("granting anon extension permissions with query: {}", query);
|
||||
db_client.simple_query(&query).await?;
|
||||
|
||||
let query = format!("GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}", db_owner);
|
||||
info!("granting anon extension permissions with query: {}", query);
|
||||
db_client.simple_query(&query).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::{ComputeAudit, ComputeFeature, ComputeSpec, Database, PgIdent, Role};
|
||||
use compute_api::spec::{ComputeAudit, ComputeSpec, Database, PgIdent, Role};
|
||||
use futures::future::join_all;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_postgres::Client;
|
||||
@@ -26,7 +26,7 @@ use crate::spec_apply::ApplySpecPhase::{
|
||||
RunInEachDatabase,
|
||||
};
|
||||
use crate::spec_apply::PerDatabasePhase::{
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions,
|
||||
};
|
||||
|
||||
impl ComputeNode {
|
||||
@@ -238,7 +238,6 @@ impl ComputeNode {
|
||||
let mut phases = vec![
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
];
|
||||
|
||||
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
|
||||
@@ -458,7 +457,6 @@ impl Debug for DB {
|
||||
pub enum PerDatabasePhase {
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
/// This is a shared phase, used for both i) dropping dangling LR subscriptions
|
||||
/// before dropping the DB, and ii) dropping all subscriptions after creating
|
||||
/// a fresh branch.
|
||||
@@ -1012,98 +1010,6 @@ async fn get_operations<'a>(
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
// TODO: remove this completely https://github.com/neondatabase/cloud/issues/22663
|
||||
PerDatabasePhase::HandleAnonExtension => {
|
||||
// Only install Anon into user databases
|
||||
let db = match &db {
|
||||
DB::SystemDB => return Ok(Box::new(empty())),
|
||||
DB::UserDB(db) => db,
|
||||
};
|
||||
// Never install Anon when it's not enabled as feature
|
||||
if !spec.features.contains(&ComputeFeature::AnonExtension) {
|
||||
return Ok(Box::new(empty()));
|
||||
}
|
||||
|
||||
// Only install Anon when it's added in preload libraries
|
||||
let opt_libs = spec.cluster.settings.find("shared_preload_libraries");
|
||||
|
||||
let libs = match opt_libs {
|
||||
Some(libs) => libs,
|
||||
None => return Ok(Box::new(empty())),
|
||||
};
|
||||
|
||||
if !libs.contains("anon") {
|
||||
return Ok(Box::new(empty()));
|
||||
}
|
||||
|
||||
let db_owner = db.owner.pg_quote();
|
||||
|
||||
let operations = vec![
|
||||
// Create anon extension if this compute needs it
|
||||
// Users cannot create it themselves, because superuser is required.
|
||||
Operation {
|
||||
query: String::from("CREATE EXTENSION IF NOT EXISTS anon CASCADE"),
|
||||
comment: Some(String::from("creating anon extension")),
|
||||
},
|
||||
// Initialize anon extension
|
||||
// This also requires superuser privileges, so users cannot do it themselves.
|
||||
Operation {
|
||||
query: String::from("SELECT anon.init()"),
|
||||
comment: Some(String::from("initializing anon extension data")),
|
||||
},
|
||||
Operation {
|
||||
query: format!("GRANT ALL ON SCHEMA anon TO {}", db_owner),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension schema permissions",
|
||||
)),
|
||||
},
|
||||
Operation {
|
||||
query: format!(
|
||||
"GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}",
|
||||
db_owner
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension schema functions permissions",
|
||||
)),
|
||||
},
|
||||
// We need this, because some functions are defined as SECURITY DEFINER.
|
||||
// In Postgres SECURITY DEFINER functions are executed with the privileges
|
||||
// of the owner.
|
||||
// In anon extension this it is needed to access some GUCs, which are only accessible to
|
||||
// superuser. But we've patched postgres to allow db_owner to access them as well.
|
||||
// So we need to change owner of these functions to db_owner.
|
||||
Operation {
|
||||
query: format!(
|
||||
include_str!("sql/anon_ext_fn_reassign.sql"),
|
||||
db_owner = db_owner,
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"change anon extension functions owner to database_owner",
|
||||
)),
|
||||
},
|
||||
Operation {
|
||||
query: format!(
|
||||
"GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}",
|
||||
db_owner,
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension tables permissions",
|
||||
)),
|
||||
},
|
||||
Operation {
|
||||
query: format!(
|
||||
"GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}",
|
||||
db_owner,
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension sequences permissions",
|
||||
)),
|
||||
},
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
}
|
||||
|
||||
118
compute_tools/src/tls.rs
Normal file
118
compute_tools/src/tls.rs
Normal file
@@ -0,0 +1,118 @@
|
||||
use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration};
|
||||
|
||||
use anyhow::{Context, Result, bail};
|
||||
use compute_api::responses::TlsConfig;
|
||||
use ring::digest;
|
||||
use spki::ObjectIdentifier;
|
||||
use spki::der::{Decode, PemReader};
|
||||
use x509_cert::Certificate;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct CertDigest(digest::Digest);
|
||||
|
||||
pub async fn watch_cert_for_changes(cert_path: String) -> tokio::sync::watch::Receiver<CertDigest> {
|
||||
let mut digest = compute_digest(&cert_path).await;
|
||||
let (tx, rx) = tokio::sync::watch::channel(digest);
|
||||
tokio::spawn(async move {
|
||||
while !tx.is_closed() {
|
||||
let new_digest = compute_digest(&cert_path).await;
|
||||
if digest.0.as_ref() != new_digest.0.as_ref() {
|
||||
digest = new_digest;
|
||||
_ = tx.send(digest);
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(60)).await
|
||||
}
|
||||
});
|
||||
rx
|
||||
}
|
||||
|
||||
async fn compute_digest(cert_path: &str) -> CertDigest {
|
||||
loop {
|
||||
match try_compute_digest(cert_path).await {
|
||||
Ok(d) => break d,
|
||||
Err(e) => {
|
||||
tracing::error!("could not read cert file {e:?}");
|
||||
tokio::time::sleep(Duration::from_secs(1)).await
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn try_compute_digest(cert_path: &str) -> Result<CertDigest> {
|
||||
let data = tokio::fs::read(cert_path).await?;
|
||||
// sha256 is extremely collision resistent. can safely assume the digest to be unique
|
||||
Ok(CertDigest(digest::digest(&digest::SHA256, &data)))
|
||||
}
|
||||
|
||||
pub const SERVER_CRT: &str = "server.crt";
|
||||
pub const SERVER_KEY: &str = "server.key";
|
||||
|
||||
pub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) {
|
||||
loop {
|
||||
match try_update_key_path_blocking(pg_data, tls_config) {
|
||||
Ok(()) => break,
|
||||
Err(e) => {
|
||||
tracing::error!("could not create key file {e:?}");
|
||||
std::thread::sleep(Duration::from_secs(1))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Postgres requires the keypath be "secure". This means
|
||||
// 1. Owned by the postgres user.
|
||||
// 2. Have permission 600.
|
||||
fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Result<()> {
|
||||
let key = std::fs::read_to_string(&tls_config.key_path)?;
|
||||
let crt = std::fs::read_to_string(&tls_config.cert_path)?;
|
||||
|
||||
// to mitigate a race condition during renewal.
|
||||
verify_key_cert(&key, &crt)?;
|
||||
|
||||
let mut key_file = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.mode(0o600)
|
||||
.open(pg_data.join(SERVER_KEY))?;
|
||||
|
||||
let mut crt_file = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.mode(0o600)
|
||||
.open(pg_data.join(SERVER_CRT))?;
|
||||
|
||||
key_file.write_all(key.as_bytes())?;
|
||||
crt_file.write_all(crt.as_bytes())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
|
||||
const ECDSA_WITH_SHA256: ObjectIdentifier = ObjectIdentifier::new_unwrap("1.2.840.10045.4.3.2");
|
||||
|
||||
let cert = Certificate::decode(&mut PemReader::new(cert.as_bytes()).context("pem reader")?)
|
||||
.context("decode cert")?;
|
||||
|
||||
match cert.signature_algorithm.oid {
|
||||
ECDSA_WITH_SHA256 => {
|
||||
let key = p256::SecretKey::from_sec1_pem(key).context("parse key")?;
|
||||
|
||||
let a = key.public_key().to_sec1_bytes();
|
||||
let b = cert
|
||||
.tbs_certificate
|
||||
.subject_public_key_info
|
||||
.subject_public_key
|
||||
.raw_bytes();
|
||||
|
||||
if *a != *b {
|
||||
bail!("private key file does not match certificate")
|
||||
}
|
||||
}
|
||||
_ => bail!("unknown TLS key type"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -64,7 +64,8 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor
|
||||
#[test]
|
||||
fn ident_pg_quote_dollar() {
|
||||
let test_cases = vec![
|
||||
("name", ("$$name$$", "x")),
|
||||
("name", ("$x$name$x$", "xx")),
|
||||
("name$", ("$x$name$$x$", "xx")),
|
||||
("name$$", ("$x$name$$$x$", "xx")),
|
||||
("name$$$", ("$x$name$$$$x$", "xx")),
|
||||
("name$$$$", ("$x$name$$$$$x$", "xx")),
|
||||
|
||||
@@ -979,7 +979,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
|
||||
neon_distrib_dir: None,
|
||||
default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
|
||||
storage_controller: None,
|
||||
control_plane_compute_hook_api: None,
|
||||
control_plane_hooks_api: None,
|
||||
generate_local_ssl_certs: false,
|
||||
}
|
||||
};
|
||||
|
||||
@@ -72,9 +72,9 @@ pub struct LocalEnv {
|
||||
// be propagated into each pageserver's configuration.
|
||||
pub control_plane_api: Url,
|
||||
|
||||
// Control plane upcall API for storage controller. If set, this will be propagated into the
|
||||
// Control plane upcall APIs for storage controller. If set, this will be propagated into the
|
||||
// storage controller's configuration.
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
pub control_plane_hooks_api: Option<Url>,
|
||||
|
||||
/// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
|
||||
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
|
||||
@@ -104,6 +104,7 @@ pub struct OnDiskConfig {
|
||||
pub pageservers: Vec<PageServerConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_hooks_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
// Note: skip serializing because in compat tests old storage controller fails
|
||||
@@ -136,7 +137,7 @@ pub struct NeonLocalInitConf {
|
||||
pub pageservers: Vec<NeonLocalInitPageserverConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Option<Url>>,
|
||||
pub control_plane_hooks_api: Option<Url>,
|
||||
pub generate_local_ssl_certs: bool,
|
||||
}
|
||||
|
||||
@@ -148,7 +149,7 @@ pub struct NeonBroker {
|
||||
pub listen_addr: SocketAddr,
|
||||
}
|
||||
|
||||
/// Broker config for cluster internal communication.
|
||||
/// A part of storage controller's config the neon_local knows about.
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct NeonStorageControllerConf {
|
||||
@@ -175,10 +176,11 @@ pub struct NeonStorageControllerConf {
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub long_reconcile_threshold: Option<Duration>,
|
||||
|
||||
#[serde(default)]
|
||||
pub use_https_pageserver_api: bool,
|
||||
|
||||
pub timelines_onto_safekeepers: bool,
|
||||
|
||||
pub use_https_safekeeper_api: bool,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerConf {
|
||||
@@ -204,6 +206,7 @@ impl Default for NeonStorageControllerConf {
|
||||
long_reconcile_threshold: None,
|
||||
use_https_pageserver_api: false,
|
||||
timelines_onto_safekeepers: false,
|
||||
use_https_safekeeper_api: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -301,6 +304,7 @@ pub struct SafekeeperConf {
|
||||
pub pg_port: u16,
|
||||
pub pg_tenant_only_port: Option<u16>,
|
||||
pub http_port: u16,
|
||||
pub https_port: Option<u16>,
|
||||
pub sync: bool,
|
||||
pub remote_storage: Option<String>,
|
||||
pub backup_threads: Option<u32>,
|
||||
@@ -315,6 +319,7 @@ impl Default for SafekeeperConf {
|
||||
pg_port: 0,
|
||||
pg_tenant_only_port: None,
|
||||
http_port: 0,
|
||||
https_port: None,
|
||||
sync: true,
|
||||
remote_storage: None,
|
||||
backup_threads: None,
|
||||
@@ -573,7 +578,8 @@ impl LocalEnv {
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
control_plane_hooks_api,
|
||||
control_plane_compute_hook_api: _,
|
||||
branch_name_mappings,
|
||||
generate_local_ssl_certs,
|
||||
} = on_disk_config;
|
||||
@@ -588,7 +594,7 @@ impl LocalEnv {
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api: control_plane_api.unwrap(),
|
||||
control_plane_compute_hook_api,
|
||||
control_plane_hooks_api,
|
||||
branch_name_mappings,
|
||||
generate_local_ssl_certs,
|
||||
}
|
||||
@@ -695,7 +701,8 @@ impl LocalEnv {
|
||||
pageservers: vec![], // it's skip_serializing anyway
|
||||
safekeepers: self.safekeepers.clone(),
|
||||
control_plane_api: Some(self.control_plane_api.clone()),
|
||||
control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
|
||||
control_plane_hooks_api: self.control_plane_hooks_api.clone(),
|
||||
control_plane_compute_hook_api: None,
|
||||
branch_name_mappings: self.branch_name_mappings.clone(),
|
||||
generate_local_ssl_certs: self.generate_local_ssl_certs,
|
||||
},
|
||||
@@ -779,8 +786,8 @@ impl LocalEnv {
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
generate_local_ssl_certs,
|
||||
control_plane_hooks_api,
|
||||
} = conf;
|
||||
|
||||
// Find postgres binaries.
|
||||
@@ -827,7 +834,7 @@ impl LocalEnv {
|
||||
pageservers: pageservers.iter().map(Into::into).collect(),
|
||||
safekeepers,
|
||||
control_plane_api: control_plane_api.unwrap(),
|
||||
control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
|
||||
control_plane_hooks_api,
|
||||
branch_name_mappings: Default::default(),
|
||||
generate_local_ssl_certs,
|
||||
};
|
||||
@@ -842,6 +849,9 @@ impl LocalEnv {
|
||||
// create safekeeper dirs
|
||||
for safekeeper in &env.safekeepers {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
|
||||
SafekeeperNode::from_env(&env, safekeeper)
|
||||
.initialize()
|
||||
.context("safekeeper init failed")?;
|
||||
}
|
||||
|
||||
// initialize pageserver state
|
||||
|
||||
@@ -111,6 +111,18 @@ impl SafekeeperNode {
|
||||
.expect("non-Unicode path")
|
||||
}
|
||||
|
||||
/// Initializes a safekeeper node by creating all necessary files,
|
||||
/// e.g. SSL certificates.
|
||||
pub fn initialize(&self) -> anyhow::Result<()> {
|
||||
if self.env.generate_local_ssl_certs {
|
||||
self.env.generate_ssl_cert(
|
||||
&self.datadir_path().join("server.crt"),
|
||||
&self.datadir_path().join("server.key"),
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn start(
|
||||
&self,
|
||||
extra_opts: &[String],
|
||||
@@ -196,6 +208,16 @@ impl SafekeeperNode {
|
||||
]);
|
||||
}
|
||||
|
||||
if let Some(https_port) = self.conf.https_port {
|
||||
args.extend([
|
||||
"--listen-https".to_owned(),
|
||||
format!("{}:{}", self.listen_addr, https_port),
|
||||
]);
|
||||
}
|
||||
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
|
||||
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
|
||||
}
|
||||
|
||||
args.extend_from_slice(extra_opts);
|
||||
|
||||
background_process::start_process(
|
||||
|
||||
@@ -538,6 +538,10 @@ impl StorageController {
|
||||
args.push("--use-https-pageserver-api".to_string());
|
||||
}
|
||||
|
||||
if self.config.use_https_safekeeper_api {
|
||||
args.push("--use-https-safekeeper-api".to_string());
|
||||
}
|
||||
|
||||
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
|
||||
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
|
||||
}
|
||||
@@ -558,10 +562,8 @@ impl StorageController {
|
||||
args.push(format!("--public-key=\"{public_key}\""));
|
||||
}
|
||||
|
||||
if let Some(control_plane_compute_hook_api) = &self.env.control_plane_compute_hook_api {
|
||||
args.push(format!(
|
||||
"--compute-hook-url={control_plane_compute_hook_api}"
|
||||
));
|
||||
if let Some(control_plane_hooks_api) = &self.env.control_plane_hooks_api {
|
||||
args.push(format!("--control-plane-url={control_plane_hooks_api}"));
|
||||
}
|
||||
|
||||
if let Some(split_threshold) = self.config.split_threshold.as_ref() {
|
||||
|
||||
@@ -31,10 +31,6 @@ reason = "the marvin attack only affects private key decryption, not public key
|
||||
id = "RUSTSEC-2024-0436"
|
||||
reason = "The paste crate is a build-only dependency with no runtime components. It is unlikely to have any security impact."
|
||||
|
||||
[[advisories.ignore]]
|
||||
id = "RUSTSEC-2025-0014"
|
||||
reason = "The humantime is widely used and is not easy to replace right now. It is unmaintained, but it has no known vulnerabilities to care about. #11179"
|
||||
|
||||
# This section is considered when running `cargo deny check licenses`
|
||||
# More documentation for the licenses section can be found here:
|
||||
# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
# Neon RFCs
|
||||
|
||||
## Overview
|
||||
|
||||
This directory contains Request for Comments documents, or RFCs, for
|
||||
features or concepts that have been proposed. Alternative names:
|
||||
technical design doc, ERD, one-pager
|
||||
@@ -59,37 +63,10 @@ RFC lifecycle:
|
||||
|
||||
### RFC template
|
||||
|
||||
Use template with `YYYY-MM-DD-copy-me.md` as a starting point. Timestamp prefix helps to avoid awkward 'id' collisions.
|
||||
|
||||
```sh
|
||||
cp docs/rfcs/YYYY-MM-DD-copy-me.md docs/rfcs/$(date +"%Y-%m-%d")-<name>.md
|
||||
```
|
||||
|
||||
Note, a lot of the sections are marked as ‘if relevant’. They are included into the template as a reminder and to help inspiration.
|
||||
|
||||
```
|
||||
# Name
|
||||
Created on ..
|
||||
Implemented on ..
|
||||
|
||||
## Summary
|
||||
|
||||
## Motivation
|
||||
|
||||
## Non Goals (if relevant)
|
||||
|
||||
## Impacted components (e.g. pageserver, safekeeper, console, etc)
|
||||
|
||||
## Proposed implementation
|
||||
|
||||
### Reliability, failure modes and corner cases (if relevant)
|
||||
|
||||
### Interaction/Sequence diagram (if relevant)
|
||||
|
||||
### Scalability (if relevant)
|
||||
|
||||
### Security implications (if relevant)
|
||||
|
||||
### Unresolved questions (if relevant)
|
||||
|
||||
## Alternative implementation (if relevant)
|
||||
|
||||
## Pros/cons of proposed approaches (if relevant)
|
||||
|
||||
## Definition of Done (if relevant)
|
||||
|
||||
```
|
||||
|
||||
30
docs/rfcs/YYYY-MM-DD-copy-me.md
Normal file
30
docs/rfcs/YYYY-MM-DD-copy-me.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Name
|
||||
|
||||
Created on YYYY-MM-DD
|
||||
Implemented on _TBD_
|
||||
|
||||
## Summary
|
||||
|
||||
## Motivation
|
||||
|
||||
## Non Goals (if relevant)
|
||||
|
||||
## Impacted components (e.g. pageserver, safekeeper, console, etc)
|
||||
|
||||
## Proposed implementation
|
||||
|
||||
### Reliability, failure modes and corner cases (if relevant)
|
||||
|
||||
### Interaction/Sequence diagram (if relevant)
|
||||
|
||||
### Scalability (if relevant)
|
||||
|
||||
### Security implications (if relevant)
|
||||
|
||||
### Unresolved questions (if relevant)
|
||||
|
||||
## Alternative implementation (if relevant)
|
||||
|
||||
## Pros/cons of proposed approaches (if relevant)
|
||||
|
||||
## Definition of Done (if relevant)
|
||||
@@ -101,15 +101,25 @@ changes such as a pageserver node becoming unavailable, or the tenant's shard co
|
||||
postgres clients to handle such changes, the storage controller calls an API hook when a tenant's pageserver
|
||||
location changes.
|
||||
|
||||
The hook is configured using the storage controller's `--control-plane-url` CLI option. If the hook requires
|
||||
JWT auth, the token may be provided with `--control-plane-jwt-token`. The hook will be invoked with a `PUT` request.
|
||||
The hook is configured using the storage controller's `--control-plane-url` CLI option, from which the hook URL is computed.
|
||||
|
||||
In the Neon cloud service, this hook is implemented by Neon's internal cloud control plane. In `neon_local` systems
|
||||
Currently, there is two hooks, each computed by appending the name to the provided control plane URL prefix:
|
||||
|
||||
- `notify-attach`, called whenever attachment for pageservers changes
|
||||
- `notify-safekeepers`, called whenever attachment for safekeepers changes
|
||||
|
||||
If the hooks require JWT auth, the token may be provided with `--control-plane-jwt-token`.
|
||||
The hooks will be invoked with a `PUT` request.
|
||||
|
||||
In the Neon cloud service, these hooks are implemented by Neon's internal cloud control plane. In `neon_local` systems,
|
||||
the storage controller integrates directly with neon_local to reconfigure local postgres processes instead of calling
|
||||
the compute hook.
|
||||
|
||||
When implementing an on-premise Neon deployment, you must implement a service that handles the compute hook. This is not complicated:
|
||||
the request body has format of the `ComputeHookNotifyRequest` structure, provided below for convenience.
|
||||
When implementing an on-premise Neon deployment, you must implement a service that handles the compute hooks. This is not complicated.
|
||||
|
||||
### `notify-attach` body
|
||||
|
||||
The `notify-attach` request body follows the format of the `ComputeHookNotifyRequest` structure, provided below for convenience.
|
||||
|
||||
```
|
||||
struct ComputeHookNotifyRequestShard {
|
||||
@@ -128,15 +138,15 @@ When a notification is received:
|
||||
|
||||
1. Modify postgres configuration for this tenant:
|
||||
|
||||
- set `neon.pageserver_connstr` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The
|
||||
- set `neon.pageserver_connstring` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The
|
||||
shards identified by `NodeId` must be converted to the address+port of the node.
|
||||
- if stripe_size is not None, set `neon.stripe_size` to this value
|
||||
- if stripe_size is not None, set `neon.shard_stripe_size` to this value
|
||||
|
||||
2. Send SIGHUP to postgres to reload configuration
|
||||
3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller
|
||||
will retry the notification until it succeeds..
|
||||
|
||||
### Example notification body
|
||||
Example body:
|
||||
|
||||
```
|
||||
{
|
||||
@@ -148,3 +158,34 @@ When a notification is received:
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
### `notify-safekeepers` body
|
||||
|
||||
The `notify-safekeepers` request body forllows the format of the `SafekeepersNotifyRequest` structure, provided below for convenience.
|
||||
|
||||
```
|
||||
pub struct SafekeeperInfo {
|
||||
pub id: NodeId,
|
||||
pub hostname: String,
|
||||
}
|
||||
|
||||
pub struct SafekeepersNotifyRequest {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub generation: u32,
|
||||
pub safekeepers: Vec<SafekeeperInfo>,
|
||||
}
|
||||
```
|
||||
|
||||
When a notification is received:
|
||||
|
||||
1. Modify postgres configuration for this tenant:
|
||||
|
||||
- set `neon.safekeeper_connstrings` to an array of postgres connection strings to safekeepers according to the `safekeepers` list. The
|
||||
safekeepers identified by `NodeId` must be converted to the address+port of the respective safekeeper.
|
||||
The hostname is provided for debugging purposes, so we reserve changes to how we pass it.
|
||||
- set `neon.safekeepers_generation` to the provided `generation` value.
|
||||
|
||||
2. Send SIGHUP to postgres to reload configuration
|
||||
3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller
|
||||
will retry the notification until it succeeds..
|
||||
@@ -7,6 +7,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
chrono.workspace = true
|
||||
indexmap.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -30,3 +30,9 @@ pub struct SetRoleGrantsRequest {
|
||||
pub privileges: Vec<Privilege>,
|
||||
pub role: PgIdent,
|
||||
}
|
||||
|
||||
/// Request of the /configure_telemetry API
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct ConfigureTelemetryRequest {
|
||||
pub logs_export_host: Option<String>,
|
||||
}
|
||||
|
||||
@@ -139,6 +139,7 @@ pub struct ComputeCtlConfig {
|
||||
/// Set of JSON web keys that the compute can use to authenticate
|
||||
/// communication from the control plane.
|
||||
pub jwks: JwkSet,
|
||||
pub tls: Option<TlsConfig>,
|
||||
}
|
||||
|
||||
impl Default for ComputeCtlConfig {
|
||||
@@ -147,10 +148,17 @@ impl Default for ComputeCtlConfig {
|
||||
jwks: JwkSet {
|
||||
keys: Vec::default(),
|
||||
},
|
||||
tls: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct TlsConfig {
|
||||
pub key_path: String,
|
||||
pub cert_path: String,
|
||||
}
|
||||
|
||||
/// Response of the `/computes/{compute_id}/spec` control-plane API.
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct ControlPlaneSpecResponse {
|
||||
|
||||
@@ -5,12 +5,15 @@
|
||||
//! and connect it to the storage nodes.
|
||||
use std::collections::HashMap;
|
||||
|
||||
use indexmap::IndexMap;
|
||||
use regex::Regex;
|
||||
use remote_storage::RemotePath;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::responses::TlsConfig;
|
||||
|
||||
/// String type alias representing Postgres identifier and
|
||||
/// intended to be used for DB / role names.
|
||||
pub type PgIdent = String;
|
||||
@@ -125,7 +128,7 @@ pub struct ComputeSpec {
|
||||
// information about available remote extensions
|
||||
pub remote_extensions: Option<RemoteExtSpec>,
|
||||
|
||||
pub pgbouncer_settings: Option<HashMap<String, String>>,
|
||||
pub pgbouncer_settings: Option<IndexMap<String, String>>,
|
||||
|
||||
// Stripe size for pageserver sharding, in pages
|
||||
#[serde(default)]
|
||||
@@ -176,8 +179,8 @@ pub enum ComputeFeature {
|
||||
/// track short-lived connections as user activity.
|
||||
ActivityMonitorExperimental,
|
||||
|
||||
/// Pre-install and initialize anon extension for every database in the cluster
|
||||
AnonExtension,
|
||||
/// Allow to configure rsyslog for Postgres logs export
|
||||
PostgresLogsExport,
|
||||
|
||||
/// This is a special feature flag that is used to represent unknown feature flags.
|
||||
/// Basically all unknown to enum flags are represented as this one. See unit test
|
||||
@@ -357,6 +360,9 @@ pub struct LocalProxySpec {
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub jwks: Option<Vec<JwksSettings>>,
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tls: Option<TlsConfig>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
|
||||
@@ -208,7 +208,6 @@
|
||||
],
|
||||
"remote_extensions": {
|
||||
"library_index": {
|
||||
"anon": "anon",
|
||||
"postgis-3": "postgis",
|
||||
"libpgrouting-3.4": "postgis",
|
||||
"postgis_raster-3": "postgis",
|
||||
@@ -217,12 +216,6 @@
|
||||
"address_standardizer-3": "postgis"
|
||||
},
|
||||
"extension_data": {
|
||||
"anon": {
|
||||
"archive_path": "5834329303/v15/extensions/anon.tar.zst",
|
||||
"control_data": {
|
||||
"anon.control": "# PostgreSQL Anonymizer (anon) extension\ncomment = ''Data anonymization tools''\ndefault_version = ''1.1.0''\ndirectory=''extension/anon''\nrelocatable = false\nrequires = ''pgcrypto''\nsuperuser = false\nmodule_pathname = ''$libdir/anon''\ntrusted = true\n"
|
||||
}
|
||||
},
|
||||
"postgis": {
|
||||
"archive_path": "5834329303/v15/extensions/postgis.tar.zst",
|
||||
"control_data": {
|
||||
@@ -238,7 +231,6 @@
|
||||
}
|
||||
},
|
||||
"custom_extensions": [
|
||||
"anon"
|
||||
],
|
||||
"public_extensions": [
|
||||
"postgis"
|
||||
|
||||
@@ -7,6 +7,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
bytes.workspace = true
|
||||
camino.workspace = true
|
||||
fail.workspace = true
|
||||
futures.workspace = true
|
||||
hyper0.workspace = true
|
||||
@@ -16,6 +17,7 @@ once_cell.workspace = true
|
||||
pprof.workspace = true
|
||||
regex.workspace = true
|
||||
routerify.workspace = true
|
||||
rustls-pemfile.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde_path_to_error.workspace = true
|
||||
|
||||
@@ -4,6 +4,7 @@ pub mod failpoints;
|
||||
pub mod json;
|
||||
pub mod request;
|
||||
pub mod server;
|
||||
pub mod tls_certs;
|
||||
|
||||
extern crate hyper0 as hyper;
|
||||
|
||||
|
||||
21
libs/http-utils/src/tls_certs.rs
Normal file
21
libs/http-utils/src/tls_certs.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
use camino::Utf8Path;
|
||||
use tokio_rustls::rustls::pki_types::{CertificateDer, PrivateKeyDer};
|
||||
|
||||
pub fn load_cert_chain(filename: &Utf8Path) -> anyhow::Result<Vec<CertificateDer<'static>>> {
|
||||
let file = std::fs::File::open(filename)?;
|
||||
let mut reader = std::io::BufReader::new(file);
|
||||
|
||||
Ok(rustls_pemfile::certs(&mut reader).collect::<Result<Vec<_>, _>>()?)
|
||||
}
|
||||
|
||||
pub fn load_private_key(filename: &Utf8Path) -> anyhow::Result<PrivateKeyDer<'static>> {
|
||||
let file = std::fs::File::open(filename)?;
|
||||
let mut reader = std::io::BufReader::new(file);
|
||||
|
||||
let key = rustls_pemfile::private_key(&mut reader)?;
|
||||
|
||||
key.ok_or(anyhow::anyhow!(
|
||||
"no private key found in {}",
|
||||
filename.as_str(),
|
||||
))
|
||||
}
|
||||
@@ -272,15 +272,16 @@ pub struct TenantConfigToml {
|
||||
/// size exceeds `compaction_upper_limit * checkpoint_distance`.
|
||||
pub compaction_upper_limit: usize,
|
||||
pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
|
||||
/// If true, compact down L0 across all tenant timelines before doing regular compaction.
|
||||
/// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
|
||||
/// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
|
||||
pub compaction_l0_first: bool,
|
||||
/// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
|
||||
/// has an effect if `compaction_l0_first` is `true`.
|
||||
/// has an effect if `compaction_l0_first` is true. Defaults to true.
|
||||
pub compaction_l0_semaphore: bool,
|
||||
/// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
|
||||
/// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
|
||||
/// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
|
||||
/// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default.
|
||||
/// Level0 delta layer threshold at which to delay layer flushes such that they take 2x as long,
|
||||
/// and block on layer flushes during ephemeral layer rolls, for compaction backpressure. This
|
||||
/// helps compaction keep up with WAL ingestion, and avoids read amplification blowing up.
|
||||
/// Should be >compaction_threshold. 0 to disable. Defaults to 3x compaction_threshold.
|
||||
pub l0_flush_delay_threshold: Option<usize>,
|
||||
/// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
|
||||
/// to avoid deadlock. 0 to disable. Disabled by default.
|
||||
@@ -288,6 +289,8 @@ pub struct TenantConfigToml {
|
||||
/// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next
|
||||
/// layer. This is a temporary backpressure mechanism which should be removed once
|
||||
/// l0_flush_{delay,stall}_threshold is fully enabled.
|
||||
///
|
||||
/// TODO: this is no longer enabled, remove it when the config option is no longer set.
|
||||
pub l0_flush_wait_upload: bool,
|
||||
// Determines how much history is retained, to allow
|
||||
// branching and read replicas at an older point in time.
|
||||
@@ -567,13 +570,15 @@ pub mod tenant_conf_defaults {
|
||||
// be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
|
||||
// with this config, we can get a maximum peak compaction usage of 9 GB.
|
||||
pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
|
||||
pub const DEFAULT_COMPACTION_L0_FIRST: bool = false;
|
||||
// Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
|
||||
// read amp.
|
||||
pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
|
||||
pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;
|
||||
|
||||
pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
|
||||
crate::models::CompactionAlgorithm::Legacy;
|
||||
|
||||
pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = true;
|
||||
pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = false;
|
||||
|
||||
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
|
||||
|
||||
@@ -584,9 +589,8 @@ pub mod tenant_conf_defaults {
|
||||
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
|
||||
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
|
||||
// If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
|
||||
// layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we
|
||||
// want to enable this feature.
|
||||
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0;
|
||||
// layer creation will end immediately. Set to 0 to disable.
|
||||
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;
|
||||
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
|
||||
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
|
||||
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
|
||||
|
||||
@@ -176,6 +176,39 @@ impl LsnLease {
|
||||
}
|
||||
}
|
||||
|
||||
/// Controls the detach ancestor behavior.
|
||||
/// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point.
|
||||
/// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub enum DetachBehavior {
|
||||
#[default]
|
||||
NoAncestorAndReparent,
|
||||
MultiLevelAndNoReparent,
|
||||
}
|
||||
|
||||
impl std::str::FromStr for DetachBehavior {
|
||||
type Err = &'static str;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent),
|
||||
"multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent),
|
||||
"v1" => Ok(DetachBehavior::NoAncestorAndReparent),
|
||||
"v2" => Ok(DetachBehavior::MultiLevelAndNoReparent),
|
||||
_ => Err("cannot parse detach behavior"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for DetachBehavior {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
DetachBehavior::NoAncestorAndReparent => write!(f, "no_ancestor_and_reparent"),
|
||||
DetachBehavior::MultiLevelAndNoReparent => write!(f, "multi_level_and_no_reparent"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The only [`TenantState`] variants we could be `TenantState::Activating` from.
|
||||
///
|
||||
/// XXX: We used to have more variants here, but now it's just one, which makes this rather
|
||||
|
||||
@@ -221,6 +221,11 @@ pub struct TimelineMembershipSwitchResponse {
|
||||
pub current_conf: Configuration,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct TimelineDeleteResult {
|
||||
pub dir_existed: bool,
|
||||
}
|
||||
|
||||
fn lsn_invalid() -> Lsn {
|
||||
Lsn::INVALID
|
||||
}
|
||||
|
||||
@@ -49,7 +49,13 @@ pub fn bench_log_slow(c: &mut Criterion) {
|
||||
// performance too. Use a simple noop future that yields once, to avoid any scheduler fast
|
||||
// paths for a ready future.
|
||||
if enabled {
|
||||
b.iter(|| runtime.block_on(log_slow("ready", THRESHOLD, tokio::task::yield_now())));
|
||||
b.iter(|| {
|
||||
runtime.block_on(log_slow(
|
||||
"ready",
|
||||
THRESHOLD,
|
||||
std::pin::pin!(tokio::task::yield_now()),
|
||||
))
|
||||
});
|
||||
} else {
|
||||
b.iter(|| runtime.block_on(tokio::task::yield_now()));
|
||||
}
|
||||
|
||||
@@ -331,37 +331,90 @@ impl std::fmt::Debug for SecretString {
|
||||
///
|
||||
/// TODO: consider upgrading this to a warning, but currently it fires too often.
|
||||
#[inline]
|
||||
pub async fn log_slow<O>(name: &str, threshold: Duration, f: impl Future<Output = O>) -> O {
|
||||
// TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and
|
||||
// won't fit on the stack.
|
||||
let mut f = Box::pin(f);
|
||||
pub async fn log_slow<F, O>(name: &str, threshold: Duration, f: std::pin::Pin<&mut F>) -> O
|
||||
where
|
||||
F: Future<Output = O>,
|
||||
{
|
||||
monitor_slow_future(
|
||||
threshold,
|
||||
threshold, // period = threshold
|
||||
f,
|
||||
|MonitorSlowFutureCallback {
|
||||
ready,
|
||||
is_slow,
|
||||
elapsed_total,
|
||||
elapsed_since_last_callback: _,
|
||||
}| {
|
||||
if !is_slow {
|
||||
return;
|
||||
}
|
||||
if ready {
|
||||
info!(
|
||||
"slow {name} completed after {:.3}s",
|
||||
elapsed_total.as_secs_f64()
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"slow {name} still running after {:.3}s",
|
||||
elapsed_total.as_secs_f64()
|
||||
);
|
||||
}
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Poll future `fut` to completion, invoking callback `cb` at the given `threshold` and every
|
||||
/// `period` afterwards, and also unconditionally when the future completes.
|
||||
#[inline]
|
||||
pub async fn monitor_slow_future<F, O>(
|
||||
threshold: Duration,
|
||||
period: Duration,
|
||||
mut fut: std::pin::Pin<&mut F>,
|
||||
mut cb: impl FnMut(MonitorSlowFutureCallback),
|
||||
) -> O
|
||||
where
|
||||
F: Future<Output = O>,
|
||||
{
|
||||
let started = Instant::now();
|
||||
let mut attempt = 1;
|
||||
|
||||
let mut last_cb = started;
|
||||
loop {
|
||||
// NB: use timeout_at() instead of timeout() to avoid an extra clock reading in the common
|
||||
// case where the timeout doesn't fire.
|
||||
let deadline = started + attempt * threshold;
|
||||
if let Ok(output) = tokio::time::timeout_at(deadline, &mut f).await {
|
||||
// NB: we check if we exceeded the threshold even if the timeout never fired, because
|
||||
// scheduling or execution delays may cause the future to succeed even if it exceeds the
|
||||
// timeout. This costs an extra unconditional clock reading, but seems worth it to avoid
|
||||
// false negatives.
|
||||
let elapsed = started.elapsed();
|
||||
if elapsed >= threshold {
|
||||
info!("slow {name} completed after {:.3}s", elapsed.as_secs_f64());
|
||||
}
|
||||
let deadline = started + threshold + (attempt - 1) * period;
|
||||
// TODO: still call the callback if the future panics? Copy how we do it for the page_service flush_in_progress counter.
|
||||
let res = tokio::time::timeout_at(deadline, &mut fut).await;
|
||||
let now = Instant::now();
|
||||
let elapsed_total = now - started;
|
||||
cb(MonitorSlowFutureCallback {
|
||||
ready: res.is_ok(),
|
||||
is_slow: elapsed_total >= threshold,
|
||||
elapsed_total,
|
||||
elapsed_since_last_callback: now - last_cb,
|
||||
});
|
||||
last_cb = now;
|
||||
if let Ok(output) = res {
|
||||
return output;
|
||||
}
|
||||
|
||||
let elapsed = started.elapsed().as_secs_f64();
|
||||
info!("slow {name} still running after {elapsed:.3}s",);
|
||||
|
||||
attempt += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// See [`monitor_slow_future`].
|
||||
pub struct MonitorSlowFutureCallback {
|
||||
/// Whether the future completed. If true, there will be no more callbacks.
|
||||
pub ready: bool,
|
||||
/// Whether the future is taking `>=` the specififed threshold duration to complete.
|
||||
/// Monotonic: if true in one callback invocation, true in all subsequent onces.
|
||||
pub is_slow: bool,
|
||||
/// The time elapsed since the [`monitor_slow_future`] was first polled.
|
||||
pub elapsed_total: Duration,
|
||||
/// The time elapsed since the last callback invocation.
|
||||
/// For the initial callback invocation, the time elapsed since the [`monitor_slow_future`] was first polled.
|
||||
pub elapsed_since_last_callback: Duration,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use metrics::IntCounterVec;
|
||||
|
||||
@@ -48,8 +48,6 @@ pprof.workspace = true
|
||||
rand.workspace = true
|
||||
range-set-blaze = { version = "0.1.16", features = ["alloc"] }
|
||||
regex.workspace = true
|
||||
rustls-pemfile.workspace = true
|
||||
rustls-pki-types.workspace = true
|
||||
rustls.workspace = true
|
||||
scopeguard.workspace = true
|
||||
send-future.workspace = true
|
||||
|
||||
@@ -7,7 +7,7 @@ use http_utils::error::HttpErrorBody;
|
||||
use pageserver_api::models::*;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
pub use reqwest::Body as ReqwestBody;
|
||||
use reqwest::{Certificate, IntoUrl, Method, StatusCode};
|
||||
use reqwest::{Certificate, IntoUrl, Method, StatusCode, Url};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -458,13 +458,21 @@ impl Client {
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
behavior: Option<DetachBehavior>,
|
||||
) -> Result<AncestorDetached> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/detach_ancestor",
|
||||
self.mgmt_api_endpoint
|
||||
);
|
||||
let mut uri = Url::parse(&uri)
|
||||
.map_err(|e| Error::ApiError(StatusCode::INTERNAL_SERVER_ERROR, format!("{e}")))?;
|
||||
|
||||
self.request(Method::PUT, &uri, ())
|
||||
if let Some(behavior) = behavior {
|
||||
uri.query_pairs_mut()
|
||||
.append_pair("detach_behavior", &behavior.to_string());
|
||||
}
|
||||
|
||||
self.request(Method::PUT, uri, ())
|
||||
.await?
|
||||
.json()
|
||||
.await
|
||||
|
||||
@@ -30,7 +30,6 @@ use pageserver::{
|
||||
};
|
||||
use postgres_backend::AuthType;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use rustls_pki_types::{CertificateDer, PrivateKeyDer};
|
||||
use tokio::signal::unix::SignalKind;
|
||||
use tokio::time::Instant;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -622,8 +621,8 @@ fn start_pageserver(
|
||||
|
||||
let https_task = match https_listener {
|
||||
Some(https_listener) => {
|
||||
let certs = load_certs(&conf.ssl_cert_file)?;
|
||||
let key = load_private_key(&conf.ssl_key_file)?;
|
||||
let certs = http_utils::tls_certs::load_cert_chain(&conf.ssl_cert_file)?;
|
||||
let key = http_utils::tls_certs::load_private_key(&conf.ssl_key_file)?;
|
||||
|
||||
let server_config = rustls::ServerConfig::builder()
|
||||
.with_no_client_auth()
|
||||
@@ -735,25 +734,6 @@ fn start_pageserver(
|
||||
})
|
||||
}
|
||||
|
||||
fn load_certs(filename: &Utf8Path) -> std::io::Result<Vec<CertificateDer<'static>>> {
|
||||
let file = std::fs::File::open(filename)?;
|
||||
let mut reader = std::io::BufReader::new(file);
|
||||
|
||||
rustls_pemfile::certs(&mut reader).collect()
|
||||
}
|
||||
|
||||
fn load_private_key(filename: &Utf8Path) -> anyhow::Result<PrivateKeyDer<'static>> {
|
||||
let file = std::fs::File::open(filename)?;
|
||||
let mut reader = std::io::BufReader::new(file);
|
||||
|
||||
let key = rustls_pemfile::private_key(&mut reader)?;
|
||||
|
||||
key.ok_or(anyhow::anyhow!(
|
||||
"no private key found in {}",
|
||||
filename.as_str(),
|
||||
))
|
||||
}
|
||||
|
||||
async fn create_remote_storage_client(
|
||||
conf: &'static PageServerConf,
|
||||
) -> anyhow::Result<GenericRemoteStorage> {
|
||||
|
||||
@@ -28,9 +28,9 @@ use hyper::{Body, Request, Response, StatusCode, Uri, header};
|
||||
use metrics::launch_timestamp::LaunchTimestamp;
|
||||
use pageserver_api::models::virtual_file::IoMode;
|
||||
use pageserver_api::models::{
|
||||
DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest, ListAuxFilesRequest,
|
||||
LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease, LsnLeaseRequest,
|
||||
OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse,
|
||||
DetachBehavior, DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest,
|
||||
ListAuxFilesRequest, LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease,
|
||||
LsnLeaseRequest, OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse,
|
||||
TenantConfigPatchRequest, TenantConfigRequest, TenantDetails, TenantInfo,
|
||||
TenantLocationConfigRequest, TenantLocationConfigResponse, TenantScanRemoteStorageResponse,
|
||||
TenantScanRemoteStorageShard, TenantShardLocation, TenantShardSplitRequest,
|
||||
@@ -72,7 +72,6 @@ use crate::tenant::remote_timeline_client::{
|
||||
use crate::tenant::secondary::SecondaryController;
|
||||
use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName};
|
||||
use crate::tenant::timeline::detach_ancestor::DetachBehavior;
|
||||
use crate::tenant::timeline::offload::{OffloadError, offload_timeline};
|
||||
use crate::tenant::timeline::{
|
||||
CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout,
|
||||
@@ -2392,6 +2391,7 @@ async fn timeline_checkpoint_handler(
|
||||
let state = get_state(&request);
|
||||
|
||||
let mut flags = EnumSet::empty();
|
||||
flags |= CompactFlags::NoYield; // run compaction to completion
|
||||
if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
|
||||
flags |= CompactFlags::ForceL0Compaction;
|
||||
}
|
||||
@@ -2507,6 +2507,7 @@ async fn timeline_detach_ancestor_handler(
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let behavior: Option<DetachBehavior> = parse_query_param(&request, "detach_behavior")?;
|
||||
|
||||
let behavior = behavior.unwrap_or_default();
|
||||
|
||||
let span = tracing::info_span!("detach_ancestor", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);
|
||||
|
||||
@@ -465,12 +465,40 @@ pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
|
||||
pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_wait_lsn_seconds",
|
||||
"Time spent waiting for WAL to arrive",
|
||||
"Time spent waiting for WAL to arrive. Updated on completion of the wait_lsn operation.",
|
||||
CRITICAL_OP_BUCKETS.into(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static WAIT_LSN_START_FINISH_COUNTERPAIR: Lazy<IntCounterPairVec> = Lazy::new(|| {
|
||||
register_int_counter_pair_vec!(
|
||||
"pageserver_wait_lsn_started_count",
|
||||
"Number of wait_lsn operations started.",
|
||||
"pageserver_wait_lsn_finished_count",
|
||||
"Number of wait_lsn operations finished.",
|
||||
&["tenant_id", "shard_id", "timeline_id"],
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static WAIT_LSN_IN_PROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_wait_lsn_in_progress_micros",
|
||||
"Time spent waiting for WAL to arrive, by timeline_id. Updated periodically while waiting.",
|
||||
&["tenant_id", "shard_id", "timeline_id"],
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"pageserver_wait_lsn_in_progress_micros_global",
|
||||
"Time spent waiting for WAL to arrive, globally. Updated periodically while waiting."
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
|
||||
register_gauge_vec!(
|
||||
"pageserver_flush_wait_upload_seconds",
|
||||
@@ -2830,7 +2858,6 @@ impl StorageTimeMetrics {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct TimelineMetrics {
|
||||
tenant_id: String,
|
||||
shard_id: String,
|
||||
@@ -2863,6 +2890,8 @@ pub(crate) struct TimelineMetrics {
|
||||
pub valid_lsn_lease_count_gauge: UIntGauge,
|
||||
pub wal_records_received: IntCounter,
|
||||
pub storage_io_size: StorageIoSizeMetrics,
|
||||
pub wait_lsn_in_progress_micros: GlobalAndPerTenantIntCounter,
|
||||
pub wait_lsn_start_finish_counterpair: IntCounterPair,
|
||||
shutdown: std::sync::atomic::AtomicBool,
|
||||
}
|
||||
|
||||
@@ -3000,6 +3029,17 @@ impl TimelineMetrics {
|
||||
|
||||
let storage_io_size = StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);
|
||||
|
||||
let wait_lsn_in_progress_micros = GlobalAndPerTenantIntCounter {
|
||||
global: WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS.clone(),
|
||||
per_tenant: WAIT_LSN_IN_PROGRESS_MICROS
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap(),
|
||||
};
|
||||
|
||||
let wait_lsn_start_finish_counterpair = WAIT_LSN_START_FINISH_COUNTERPAIR
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap();
|
||||
|
||||
TimelineMetrics {
|
||||
tenant_id,
|
||||
shard_id,
|
||||
@@ -3032,6 +3072,8 @@ impl TimelineMetrics {
|
||||
storage_io_size,
|
||||
valid_lsn_lease_count_gauge,
|
||||
wal_records_received,
|
||||
wait_lsn_in_progress_micros,
|
||||
wait_lsn_start_finish_counterpair,
|
||||
shutdown: std::sync::atomic::AtomicBool::default(),
|
||||
}
|
||||
}
|
||||
@@ -3224,6 +3266,15 @@ impl TimelineMetrics {
|
||||
let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
|
||||
}
|
||||
|
||||
let _ =
|
||||
WAIT_LSN_IN_PROGRESS_MICROS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
|
||||
{
|
||||
let mut res = [Ok(()), Ok(())];
|
||||
WAIT_LSN_START_FINISH_COUNTERPAIR
|
||||
.remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id]);
|
||||
}
|
||||
|
||||
let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
|
||||
SmgrQueryType::GetPageAtLsn.into(),
|
||||
tenant_id,
|
||||
@@ -3836,27 +3887,29 @@ pub mod tokio_epoll_uring {
|
||||
});
|
||||
}
|
||||
|
||||
pub(crate) struct GlobalAndPerTenantIntCounter {
|
||||
global: IntCounter,
|
||||
per_tenant: IntCounter,
|
||||
}
|
||||
|
||||
impl GlobalAndPerTenantIntCounter {
|
||||
#[inline(always)]
|
||||
pub(crate) fn inc(&self) {
|
||||
self.inc_by(1)
|
||||
}
|
||||
#[inline(always)]
|
||||
pub(crate) fn inc_by(&self, n: u64) {
|
||||
self.global.inc_by(n);
|
||||
self.per_tenant.inc_by(n);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) mod tenant_throttling {
|
||||
use metrics::{IntCounter, register_int_counter_vec};
|
||||
use metrics::register_int_counter_vec;
|
||||
use once_cell::sync::Lazy;
|
||||
use utils::shard::TenantShardId;
|
||||
|
||||
pub(crate) struct GlobalAndPerTenantIntCounter {
|
||||
global: IntCounter,
|
||||
per_tenant: IntCounter,
|
||||
}
|
||||
|
||||
impl GlobalAndPerTenantIntCounter {
|
||||
#[inline(always)]
|
||||
pub(crate) fn inc(&self) {
|
||||
self.inc_by(1)
|
||||
}
|
||||
#[inline(always)]
|
||||
pub(crate) fn inc_by(&self, n: u64) {
|
||||
self.global.inc_by(n);
|
||||
self.per_tenant.inc_by(n);
|
||||
}
|
||||
}
|
||||
use super::GlobalAndPerTenantIntCounter;
|
||||
|
||||
pub(crate) struct Metrics<const KIND: usize> {
|
||||
pub(super) count_accounted_start: GlobalAndPerTenantIntCounter,
|
||||
@@ -4102,6 +4155,7 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) {
|
||||
&CIRCUIT_BREAKERS_BROKEN,
|
||||
&CIRCUIT_BREAKERS_UNBROKEN,
|
||||
&PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL,
|
||||
&WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS,
|
||||
]
|
||||
.into_iter()
|
||||
.for_each(|c| {
|
||||
|
||||
@@ -1106,12 +1106,19 @@ impl PageServerHandler {
|
||||
};
|
||||
|
||||
// Dispatch the batch to the appropriate request handler.
|
||||
let (mut handler_results, span) = log_slow(
|
||||
batch.as_static_str(),
|
||||
LOG_SLOW_GETPAGE_THRESHOLD,
|
||||
self.pagestream_dispatch_batched_message(batch, io_concurrency, ctx),
|
||||
)
|
||||
.await?;
|
||||
let log_slow_name = batch.as_static_str();
|
||||
let (mut handler_results, span) = {
|
||||
// TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and
|
||||
// won't fit on the stack.
|
||||
let mut boxpinned =
|
||||
Box::pin(self.pagestream_dispatch_batched_message(batch, io_concurrency, ctx));
|
||||
log_slow(
|
||||
log_slow_name,
|
||||
LOG_SLOW_GETPAGE_THRESHOLD,
|
||||
boxpinned.as_mut(),
|
||||
)
|
||||
.await?
|
||||
};
|
||||
|
||||
// We purposefully don't count flush time into the smgr operation timer.
|
||||
//
|
||||
|
||||
@@ -6559,7 +6559,11 @@ mod tests {
|
||||
|
||||
tline.freeze_and_flush().await?;
|
||||
tline
|
||||
.compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
|
||||
.compact(
|
||||
&CancellationToken::new(),
|
||||
CompactFlags::NoYield.into(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut writer = tline.writer().await;
|
||||
@@ -6576,7 +6580,11 @@ mod tests {
|
||||
|
||||
tline.freeze_and_flush().await?;
|
||||
tline
|
||||
.compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
|
||||
.compact(
|
||||
&CancellationToken::new(),
|
||||
CompactFlags::NoYield.into(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut writer = tline.writer().await;
|
||||
@@ -6593,7 +6601,11 @@ mod tests {
|
||||
|
||||
tline.freeze_and_flush().await?;
|
||||
tline
|
||||
.compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
|
||||
.compact(
|
||||
&CancellationToken::new(),
|
||||
CompactFlags::NoYield.into(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut writer = tline.writer().await;
|
||||
@@ -6610,7 +6622,11 @@ mod tests {
|
||||
|
||||
tline.freeze_and_flush().await?;
|
||||
tline
|
||||
.compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
|
||||
.compact(
|
||||
&CancellationToken::new(),
|
||||
CompactFlags::NoYield.into(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(
|
||||
@@ -6693,7 +6709,9 @@ mod tests {
|
||||
timeline.freeze_and_flush().await?;
|
||||
if compact {
|
||||
// this requires timeline to be &Arc<Timeline>
|
||||
timeline.compact(&cancel, EnumSet::empty(), ctx).await?;
|
||||
timeline
|
||||
.compact(&cancel, CompactFlags::NoYield.into(), ctx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// this doesn't really need to use the timeline_id target, but it is closer to what it
|
||||
@@ -7020,6 +7038,7 @@ mod tests {
|
||||
child_timeline.freeze_and_flush().await?;
|
||||
let mut flags = EnumSet::new();
|
||||
flags.insert(CompactFlags::ForceRepartition);
|
||||
flags.insert(CompactFlags::NoYield);
|
||||
child_timeline
|
||||
.compact(&CancellationToken::new(), flags, &ctx)
|
||||
.await?;
|
||||
@@ -7398,7 +7417,9 @@ mod tests {
|
||||
|
||||
// Perform a cycle of flush, compact, and GC
|
||||
tline.freeze_and_flush().await?;
|
||||
tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
|
||||
tline
|
||||
.compact(&cancel, CompactFlags::NoYield.into(), &ctx)
|
||||
.await?;
|
||||
tenant
|
||||
.gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
|
||||
.await?;
|
||||
@@ -7727,6 +7748,7 @@ mod tests {
|
||||
let mut flags = EnumSet::new();
|
||||
flags.insert(CompactFlags::ForceImageLayerCreation);
|
||||
flags.insert(CompactFlags::ForceRepartition);
|
||||
flags.insert(CompactFlags::NoYield);
|
||||
flags
|
||||
} else {
|
||||
EnumSet::empty()
|
||||
@@ -7777,7 +7799,9 @@ mod tests {
|
||||
let before_num_l0_delta_files =
|
||||
tline.layers.read().await.layer_map()?.level0_deltas().len();
|
||||
|
||||
tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
|
||||
tline
|
||||
.compact(&cancel, CompactFlags::NoYield.into(), &ctx)
|
||||
.await?;
|
||||
|
||||
let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len();
|
||||
|
||||
@@ -7893,7 +7917,6 @@ mod tests {
|
||||
Ok((res, reconstruct_state.get_delta_layers_visited() as usize))
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for blknum in 0..NUM_KEYS {
|
||||
lsn = Lsn(lsn.0 + 0x10);
|
||||
test_key.field6 = (blknum * STEP) as u32;
|
||||
@@ -7943,6 +7966,7 @@ mod tests {
|
||||
let mut flags = EnumSet::new();
|
||||
flags.insert(CompactFlags::ForceImageLayerCreation);
|
||||
flags.insert(CompactFlags::ForceRepartition);
|
||||
flags.insert(CompactFlags::NoYield);
|
||||
flags
|
||||
},
|
||||
&ctx,
|
||||
@@ -8405,6 +8429,7 @@ mod tests {
|
||||
let mut flags = EnumSet::new();
|
||||
flags.insert(CompactFlags::ForceImageLayerCreation);
|
||||
flags.insert(CompactFlags::ForceRepartition);
|
||||
flags.insert(CompactFlags::NoYield);
|
||||
flags
|
||||
},
|
||||
&ctx,
|
||||
@@ -8472,6 +8497,7 @@ mod tests {
|
||||
let mut flags = EnumSet::new();
|
||||
flags.insert(CompactFlags::ForceImageLayerCreation);
|
||||
flags.insert(CompactFlags::ForceRepartition);
|
||||
flags.insert(CompactFlags::NoYield);
|
||||
flags
|
||||
},
|
||||
&ctx,
|
||||
|
||||
@@ -14,7 +14,7 @@ use futures::StreamExt;
|
||||
use itertools::Itertools;
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::models::LocationConfigMode;
|
||||
use pageserver_api::models::{DetachBehavior, LocationConfigMode};
|
||||
use pageserver_api::shard::{
|
||||
ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId,
|
||||
};
|
||||
@@ -1914,7 +1914,7 @@ impl TenantManager {
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
prepared: PreparedTimelineDetach,
|
||||
behavior: detach_ancestor::DetachBehavior,
|
||||
behavior: DetachBehavior,
|
||||
mut attempt: detach_ancestor::Attempt,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<HashSet<TimelineId>, detach_ancestor::Error> {
|
||||
|
||||
@@ -45,8 +45,9 @@ use pageserver_api::key::{
|
||||
use pageserver_api::keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning};
|
||||
use pageserver_api::models::{
|
||||
CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,
|
||||
DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
|
||||
InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, RelSizeMigration, TimelineState,
|
||||
DetachBehavior, DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest,
|
||||
EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, RelSizeMigration,
|
||||
TimelineState,
|
||||
};
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag};
|
||||
use pageserver_api::shard::{ShardIdentity, ShardIndex, ShardNumber, TenantShardId};
|
||||
@@ -67,6 +68,7 @@ use tracing::*;
|
||||
use utils::generation::Generation;
|
||||
use utils::guard_arc_swap::GuardArcSwap;
|
||||
use utils::id::TimelineId;
|
||||
use utils::logging::{MonitorSlowFutureCallback, monitor_slow_future};
|
||||
use utils::lsn::{AtomicLsn, Lsn, RecordLsn};
|
||||
use utils::postgres_client::PostgresClientProtocol;
|
||||
use utils::rate_limit::RateLimit;
|
||||
@@ -439,6 +441,8 @@ pub struct Timeline {
|
||||
heatmap_layers_downloader: Mutex<Option<heatmap_layers_downloader::HeatmapLayersDownloader>>,
|
||||
|
||||
pub(crate) rel_size_v2_status: ArcSwapOption<RelSizeMigration>,
|
||||
|
||||
wait_lsn_log_slow: tokio::sync::Semaphore,
|
||||
}
|
||||
|
||||
pub(crate) enum PreviousHeatmap {
|
||||
@@ -1479,17 +1483,67 @@ impl Timeline {
|
||||
WaitLsnTimeout::Default => self.conf.wait_lsn_timeout,
|
||||
};
|
||||
|
||||
let _timer = crate::metrics::WAIT_LSN_TIME.start_timer();
|
||||
let timer = crate::metrics::WAIT_LSN_TIME.start_timer();
|
||||
let start_finish_counterpair_guard = self.metrics.wait_lsn_start_finish_counterpair.guard();
|
||||
|
||||
match self.last_record_lsn.wait_for_timeout(lsn, timeout).await {
|
||||
let wait_for_timeout = self.last_record_lsn.wait_for_timeout(lsn, timeout);
|
||||
let wait_for_timeout = std::pin::pin!(wait_for_timeout);
|
||||
// Use threshold of 1 because even 1 second of wait for ingest is very much abnormal.
|
||||
let log_slow_threshold = Duration::from_secs(1);
|
||||
// Use period of 10 to avoid flooding logs during an outage that affects all timelines.
|
||||
let log_slow_period = Duration::from_secs(10);
|
||||
let mut logging_permit = None;
|
||||
let wait_for_timeout = monitor_slow_future(
|
||||
log_slow_threshold,
|
||||
log_slow_period,
|
||||
wait_for_timeout,
|
||||
|MonitorSlowFutureCallback {
|
||||
ready,
|
||||
is_slow,
|
||||
elapsed_total,
|
||||
elapsed_since_last_callback,
|
||||
}| {
|
||||
self.metrics
|
||||
.wait_lsn_in_progress_micros
|
||||
.inc_by(u64::try_from(elapsed_since_last_callback.as_micros()).unwrap());
|
||||
if !is_slow {
|
||||
return;
|
||||
}
|
||||
// It's slow, see if we should log it.
|
||||
// (We limit the logging to one per invocation per timeline to avoid excessive
|
||||
// logging during an extended broker / networking outage that affects all timelines.)
|
||||
if logging_permit.is_none() {
|
||||
logging_permit = self.wait_lsn_log_slow.try_acquire().ok();
|
||||
}
|
||||
if logging_permit.is_none() {
|
||||
return;
|
||||
}
|
||||
// We log it.
|
||||
if ready {
|
||||
info!(
|
||||
"slow wait_lsn completed after {:.3}s",
|
||||
elapsed_total.as_secs_f64()
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"slow wait_lsn still running for {:.3}s",
|
||||
elapsed_total.as_secs_f64()
|
||||
);
|
||||
}
|
||||
},
|
||||
);
|
||||
let res = wait_for_timeout.await;
|
||||
// don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo
|
||||
drop(logging_permit);
|
||||
drop(start_finish_counterpair_guard);
|
||||
drop(timer);
|
||||
match res {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
use utils::seqwait::SeqWaitError::*;
|
||||
match e {
|
||||
Shutdown => Err(WaitLsnError::Shutdown),
|
||||
Timeout => {
|
||||
// don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo
|
||||
drop(_timer);
|
||||
let walreceiver_status = self.walreceiver_status();
|
||||
Err(WaitLsnError::Timeout(format!(
|
||||
"Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}",
|
||||
@@ -2423,8 +2477,9 @@ impl Timeline {
|
||||
}
|
||||
|
||||
fn get_l0_flush_delay_threshold(&self) -> Option<usize> {
|
||||
// Disable L0 flushes by default. This and compaction needs further tuning.
|
||||
const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 0; // TODO: default to e.g. 3
|
||||
// By default, delay L0 flushes at 3x the compaction threshold. The compaction threshold
|
||||
// defaults to 10, and L0 compaction is generally able to keep L0 counts below 30.
|
||||
const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 3;
|
||||
|
||||
// If compaction is disabled, don't delay.
|
||||
if self.get_compaction_period() == Duration::ZERO {
|
||||
@@ -2452,8 +2507,9 @@ impl Timeline {
|
||||
}
|
||||
|
||||
fn get_l0_flush_stall_threshold(&self) -> Option<usize> {
|
||||
// Disable L0 stalls by default. In ingest benchmarks, we see image compaction take >10
|
||||
// minutes, blocking L0 compaction, and we can't stall L0 flushes for that long.
|
||||
// Disable L0 stalls by default. Stalling can cause unavailability if L0 compaction isn't
|
||||
// responsive, and it can e.g. block on other compaction via the compaction semaphore or
|
||||
// sibling timelines. We need more confidence before enabling this.
|
||||
const DEFAULT_L0_FLUSH_STALL_FACTOR: usize = 0; // TODO: default to e.g. 5
|
||||
|
||||
// If compaction is disabled, don't stall.
|
||||
@@ -2821,6 +2877,8 @@ impl Timeline {
|
||||
heatmap_layers_downloader: Mutex::new(None),
|
||||
|
||||
rel_size_v2_status: ArcSwapOption::from_pointee(rel_size_v2_status),
|
||||
|
||||
wait_lsn_log_slow: tokio::sync::Semaphore::new(1),
|
||||
};
|
||||
|
||||
result.repartition_threshold =
|
||||
@@ -5388,7 +5446,7 @@ impl Timeline {
|
||||
self: &Arc<Timeline>,
|
||||
tenant: &crate::tenant::Tenant,
|
||||
options: detach_ancestor::Options,
|
||||
behavior: detach_ancestor::DetachBehavior,
|
||||
behavior: DetachBehavior,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<detach_ancestor::Progress, detach_ancestor::Error> {
|
||||
detach_ancestor::prepare(self, tenant, behavior, options, ctx).await
|
||||
@@ -5409,7 +5467,7 @@ impl Timeline {
|
||||
prepared: detach_ancestor::PreparedTimelineDetach,
|
||||
ancestor_timeline_id: TimelineId,
|
||||
ancestor_lsn: Lsn,
|
||||
behavior: detach_ancestor::DetachBehavior,
|
||||
behavior: DetachBehavior,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<detach_ancestor::DetachingAndReparenting, detach_ancestor::Error> {
|
||||
detach_ancestor::detach_and_reparent(
|
||||
|
||||
@@ -3189,7 +3189,11 @@ impl Timeline {
|
||||
}
|
||||
|
||||
// TODO: move the below part to the loop body
|
||||
let last_key = last_key.expect("no keys produced during compaction");
|
||||
let Some(last_key) = last_key else {
|
||||
return Err(CompactionError::Other(anyhow!(
|
||||
"no keys produced during compaction"
|
||||
)));
|
||||
};
|
||||
stat.on_unique_key_visited();
|
||||
|
||||
let retention = self
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use http_utils::error::ApiError;
|
||||
use pageserver_api::models::DetachBehavior;
|
||||
use pageserver_api::models::detach_ancestor::AncestorDetached;
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
use tokio::sync::Semaphore;
|
||||
@@ -139,30 +140,6 @@ pub(crate) struct Options {
|
||||
pub(crate) copy_concurrency: std::num::NonZeroUsize,
|
||||
}
|
||||
|
||||
/// Controls the detach ancestor behavior.
|
||||
/// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point.
|
||||
/// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub enum DetachBehavior {
|
||||
#[default]
|
||||
NoAncestorAndReparent,
|
||||
MultiLevelAndNoReparent,
|
||||
}
|
||||
|
||||
impl std::str::FromStr for DetachBehavior {
|
||||
type Err = &'static str;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent),
|
||||
"multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent),
|
||||
"v1" => Ok(DetachBehavior::NoAncestorAndReparent),
|
||||
"v2" => Ok(DetachBehavior::MultiLevelAndNoReparent),
|
||||
_ => Err("cannot parse detach behavior"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Options {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
|
||||
@@ -2898,6 +2898,11 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
|
||||
relpath(reln->smgr_rlocator, forkNum),
|
||||
InvalidBlockNumber)));
|
||||
|
||||
#ifdef DEBUG_COMPARE_LOCAL
|
||||
if (IS_LOCAL_REL(reln))
|
||||
mdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync);
|
||||
#endif
|
||||
|
||||
/* Don't log any pages if we're not allowed to do so. */
|
||||
if (!XLogInsertAllowed())
|
||||
return;
|
||||
@@ -4171,8 +4176,10 @@ neon_start_unlogged_build(SMgrRelation reln)
|
||||
* FIXME: should we pass isRedo true to create the tablespace dir if it
|
||||
* doesn't exist? Is it needed?
|
||||
*/
|
||||
if (!IsParallelWorker())
|
||||
#ifndef DEBUG_COMPARE_LOCAL
|
||||
if (!IsParallelWorker())
|
||||
mdcreate(reln, MAIN_FORKNUM, false);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4247,8 +4254,10 @@ neon_end_unlogged_build(SMgrRelation reln)
|
||||
|
||||
forget_cached_relsize(InfoFromNInfoB(rinfob), forknum);
|
||||
mdclose(reln, forknum);
|
||||
#ifndef DEBUG_COMPARE_LOCAL
|
||||
/* use isRedo == true, so that we drop it immediately */
|
||||
mdunlink(rinfob, forknum, true);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
34
poetry.lock
generated
34
poetry.lock
generated
@@ -1491,14 +1491,38 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "jsonnet"
|
||||
version = "0.20.0"
|
||||
description = "Python bindings for Jsonnet - The data templating language"
|
||||
version = "0.21.0rc2"
|
||||
description = "Python bindings for Jsonnet - The data templating language "
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
markers = "python_version < \"3.13\""
|
||||
files = [
|
||||
{file = "jsonnet-0.20.0.tar.gz", hash = "sha256:7e770c7bf3a366b97b650a39430450f77612e74406731eb75c5bd59f3f104d4f"},
|
||||
{file = "jsonnet-0.21.0rc2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8779ac6820fee44ef736df2baedc3ae93e8cd5d672ee105015c2a47fe627a727"},
|
||||
{file = "jsonnet-0.21.0rc2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:99affe8c71e2551465064a8039bb3d1cba27a0b73b2b9ff1b652e06f17d4ea8b"},
|
||||
{file = "jsonnet-0.21.0rc2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a9dffb9aa01013d100ddfb7230d1eeb80f2a8eef712b1825a60cad57106d8bd"},
|
||||
{file = "jsonnet-0.21.0rc2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cca6c95f2879dcab52650b7aa09a4e82a139b084931b1f6f8c840f834fecc08a"},
|
||||
{file = "jsonnet-0.21.0rc2-cp310-cp310-win_amd64.whl", hash = "sha256:016d6afdb302a6d00bf3bce6a0c3d9c093b992e33f9bc67c64a868035892258e"},
|
||||
{file = "jsonnet-0.21.0rc2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e893ab2c9bf10d8ec9e9b0cee8961879c88d0619cc6d8f75ea284a78e06ae32b"},
|
||||
{file = "jsonnet-0.21.0rc2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06b353cd3daa2781e6cd308e05f2f116396376994bcb5f59aaadbc6a752c7f2"},
|
||||
{file = "jsonnet-0.21.0rc2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eb2bc8e62b73101329072da322f7e2a1bdb3ac530b94669128d1b480e311e55"},
|
||||
{file = "jsonnet-0.21.0rc2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:113766fd0c25620807bcf04d4c739f461c971a4f0e4aece9ba62b4e762de9598"},
|
||||
{file = "jsonnet-0.21.0rc2-cp311-cp311-win_amd64.whl", hash = "sha256:8dab208c2c2760be60f87d1ceb8b28c86b51ed0e31129a7d90cd5fe890b41225"},
|
||||
{file = "jsonnet-0.21.0rc2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:95f5b9dd26a41d6f258d1baa8d22e557051beeed8c52a6202584f1becca9dcb5"},
|
||||
{file = "jsonnet-0.21.0rc2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cecc6d76e2b377260fae0a060097c113e6ac361b8f739903ea7f3f5f64cdebdf"},
|
||||
{file = "jsonnet-0.21.0rc2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaa2d18224af7e63872ef4a101e93962505456cf5f5439c3cfc25dad6845f8b1"},
|
||||
{file = "jsonnet-0.21.0rc2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2a9063f811554487ed552445e964aeec969cafb266b965029c8d6b091ce47950"},
|
||||
{file = "jsonnet-0.21.0rc2-cp312-cp312-win_amd64.whl", hash = "sha256:80d171182c169761f744ba50068a4ad35d48e52b91d25bf4c7bb9a72f0a04f71"},
|
||||
{file = "jsonnet-0.21.0rc2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3657938f87cb6bc6da20ca631d437b5faf469ca060a7c7def9c8fd2f25a5e06"},
|
||||
{file = "jsonnet-0.21.0rc2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3dcebc30cb991b58bc416ee05e9387004d04716d5c0b89714ff042bd069af5c8"},
|
||||
{file = "jsonnet-0.21.0rc2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ac52c95482df3ed93c908468ca2f40d4825b6baba284b395ddc47bd663b8c3a"},
|
||||
{file = "jsonnet-0.21.0rc2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8b34450823a7a1861de892fef9f29de1b4c19e1a79e27d81ffe7e57646cc89d6"},
|
||||
{file = "jsonnet-0.21.0rc2-cp313-cp313-win_amd64.whl", hash = "sha256:573fd2580e46f4875ec505f1732f9e804b7063cba790342ed6fdafe9a6b30556"},
|
||||
{file = "jsonnet-0.21.0rc2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:871ca1411de3626499bda60b330d37f85a592918f99ba4809089bbb8d4f5bfe4"},
|
||||
{file = "jsonnet-0.21.0rc2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5d33b25a9c5bf9099100b9b16cb385a2876d891fbe639ee9d476fc75c861903a"},
|
||||
{file = "jsonnet-0.21.0rc2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2bac374565c7f89a4675f19fd2b624ed1376519267f4e444f49b6fc0368f6e5"},
|
||||
{file = "jsonnet-0.21.0rc2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fab7bbd88f9159f88a7350701a97bda24de9e3b9eef14c2501ba8b9224160d60"},
|
||||
{file = "jsonnet-0.21.0rc2-cp39-cp39-win_amd64.whl", hash = "sha256:ed71ffba0fd233a1bca7b0f7be79730792c5383e562a9dc7da152478d9ee1612"},
|
||||
{file = "jsonnet-0.21.0rc2.tar.gz", hash = "sha256:2b83ec4b5a771c3732e0972be23a71f042ad2940db6918d3a52aade69bc394fb"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3820,4 +3844,4 @@ cffi = ["cffi (>=1.11)"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "010ffce959bb256880ab5a267048c182e4612b3151f9a94e3bf5d3a7807962fe"
|
||||
content-hash = "715fc8c896dcfa1b15054deeddcdec557ef93af91b26e1c8e4688fe4dbef5296"
|
||||
|
||||
@@ -5,6 +5,7 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, bail, ensure};
|
||||
use arc_swap::ArcSwapOption;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use clap::Parser;
|
||||
use compute_api::spec::LocalProxySpec;
|
||||
@@ -27,6 +28,7 @@ use crate::config::{
|
||||
};
|
||||
use crate::control_plane::locks::ApiLocks;
|
||||
use crate::control_plane::messages::{EndpointJwksResponse, JwksSettings};
|
||||
use crate::ext::TaskExt;
|
||||
use crate::http::health_server::AppMetrics;
|
||||
use crate::intern::RoleNameInt;
|
||||
use crate::metrics::{Metrics, ThreadPoolMetrics};
|
||||
@@ -190,7 +192,11 @@ pub async fn run() -> anyhow::Result<()> {
|
||||
// 2. The config file is written but the signal hook is not yet received
|
||||
// 3. local_proxy completes startup but has no config loaded, despite there being a registerd config.
|
||||
refresh_config_notify.notify_one();
|
||||
tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));
|
||||
tokio::spawn(refresh_config_loop(
|
||||
config,
|
||||
args.config_path,
|
||||
refresh_config_notify,
|
||||
));
|
||||
|
||||
maintenance_tasks.spawn(crate::http::health_server::task_main(
|
||||
metrics_listener,
|
||||
@@ -269,7 +275,7 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
|
||||
};
|
||||
|
||||
Ok(Box::leak(Box::new(ProxyConfig {
|
||||
tls_config: None,
|
||||
tls_config: ArcSwapOption::from(None),
|
||||
metric_collection: None,
|
||||
http_config,
|
||||
authentication_config: AuthenticationConfig {
|
||||
@@ -311,14 +317,16 @@ enum RefreshConfigError {
|
||||
Parse(#[from] serde_json::Error),
|
||||
#[error(transparent)]
|
||||
Validate(anyhow::Error),
|
||||
#[error(transparent)]
|
||||
Tls(anyhow::Error),
|
||||
}
|
||||
|
||||
async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
|
||||
async fn refresh_config_loop(config: &ProxyConfig, path: Utf8PathBuf, rx: Arc<Notify>) {
|
||||
let mut init = true;
|
||||
loop {
|
||||
rx.notified().await;
|
||||
|
||||
match refresh_config_inner(&path).await {
|
||||
match refresh_config_inner(config, &path).await {
|
||||
Ok(()) => {}
|
||||
// don't log for file not found errors if this is the first time we are checking
|
||||
// for computes that don't use local_proxy, this is not an error.
|
||||
@@ -327,6 +335,9 @@ async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
|
||||
{
|
||||
debug!(error=?e, ?path, "could not read config file");
|
||||
}
|
||||
Err(RefreshConfigError::Tls(e)) => {
|
||||
error!(error=?e, ?path, "could not read TLS certificates");
|
||||
}
|
||||
Err(e) => {
|
||||
error!(error=?e, ?path, "could not read config file");
|
||||
}
|
||||
@@ -336,7 +347,10 @@ async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
|
||||
}
|
||||
}
|
||||
|
||||
async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError> {
|
||||
async fn refresh_config_inner(
|
||||
config: &ProxyConfig,
|
||||
path: &Utf8Path,
|
||||
) -> Result<(), RefreshConfigError> {
|
||||
let bytes = tokio::fs::read(&path).await?;
|
||||
let data: LocalProxySpec = serde_json::from_slice(&bytes)?;
|
||||
|
||||
@@ -406,5 +420,20 @@ async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError>
|
||||
info!("successfully loaded new config");
|
||||
JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set })));
|
||||
|
||||
if let Some(tls_config) = data.tls {
|
||||
let tls_config = tokio::task::spawn_blocking(move || {
|
||||
crate::tls::server_config::configure_tls(
|
||||
&tls_config.key_path,
|
||||
&tls_config.cert_path,
|
||||
None,
|
||||
false,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.propagate_task_panic()
|
||||
.map_err(RefreshConfigError::Tls)?;
|
||||
config.tls_config.store(Some(Arc::new(tls_config)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::bail;
|
||||
use arc_swap::ArcSwapOption;
|
||||
use futures::future::Either;
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
use tokio::net::TcpListener;
|
||||
@@ -563,6 +564,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
|
||||
(None, None) => None,
|
||||
_ => bail!("either both or neither tls-key and tls-cert must be specified"),
|
||||
};
|
||||
let tls_config = ArcSwapOption::from(tls_config.map(Arc::new));
|
||||
|
||||
let backup_metric_collection_config = config::MetricBackupCollectionConfig {
|
||||
remote_storage_config: args.metric_backup_collection_remote_storage.clone(),
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, Ok, bail, ensure};
|
||||
use arc_swap::ArcSwapOption;
|
||||
use clap::ValueEnum;
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
|
||||
@@ -17,7 +18,7 @@ pub use crate::tls::server_config::{TlsConfig, configure_tls};
|
||||
use crate::types::Host;
|
||||
|
||||
pub struct ProxyConfig {
|
||||
pub tls_config: Option<TlsConfig>,
|
||||
pub tls_config: ArcSwapOption<TlsConfig>,
|
||||
pub metric_collection: Option<MetricCollectionConfig>,
|
||||
pub http_config: HttpConfig,
|
||||
pub authentication_config: AuthenticationConfig,
|
||||
|
||||
@@ -177,7 +177,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
let proto = ctx.protocol();
|
||||
let request_gauge = metrics.connection_requests.guard(proto);
|
||||
|
||||
let tls = config.tls_config.as_ref();
|
||||
let tls = config.tls_config.load();
|
||||
let tls = tls.as_deref();
|
||||
|
||||
let record_handshake_error = !ctx.has_private_peer_addr();
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
|
||||
@@ -30,7 +30,16 @@ pub struct Metrics {
|
||||
static SELF: OnceLock<Metrics> = OnceLock::new();
|
||||
impl Metrics {
|
||||
pub fn install(thread_pool: Arc<ThreadPoolMetrics>) {
|
||||
SELF.set(Metrics::new(thread_pool))
|
||||
let mut metrics = Metrics::new(thread_pool);
|
||||
|
||||
metrics.proxy.errors_total.init_all_dense();
|
||||
metrics.proxy.redis_errors_total.init_all_dense();
|
||||
metrics.proxy.redis_events_count.init_all_dense();
|
||||
metrics.proxy.retries_metric.init_all_dense();
|
||||
metrics.proxy.invalid_endpoints_total.init_all_dense();
|
||||
metrics.proxy.connection_failures_total.init_all_dense();
|
||||
|
||||
SELF.set(metrics)
|
||||
.ok()
|
||||
.expect("proxy metrics must not be installed more than once");
|
||||
}
|
||||
|
||||
@@ -114,7 +114,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
|
||||
let mut read_buf = read_buf.reader();
|
||||
let mut res = Ok(());
|
||||
let accept = tokio_rustls::TlsAcceptor::from(tls.to_server_config())
|
||||
let accept = tokio_rustls::TlsAcceptor::from(tls.pg_config.clone())
|
||||
.accept_with(raw, |session| {
|
||||
// push the early data to the tls session
|
||||
while !read_buf.get_ref().is_empty() {
|
||||
|
||||
@@ -278,7 +278,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
let proto = ctx.protocol();
|
||||
let request_gauge = metrics.connection_requests.guard(proto);
|
||||
|
||||
let tls = config.tls_config.as_ref();
|
||||
let tls = config.tls_config.load();
|
||||
let tls = tls.as_deref();
|
||||
|
||||
let record_handshake_error = !ctx.has_private_peer_addr();
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::config::ComputeConfig;
|
||||
use crate::control_plane::messages::MetricsAuxInfo;
|
||||
use crate::metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard};
|
||||
use crate::stream::Stream;
|
||||
use crate::usage_metrics::{Ids, MetricCounterRecorder, TrafficDirection, USAGE_METRICS};
|
||||
use crate::usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS};
|
||||
|
||||
/// Forward bytes in both directions (client <-> compute).
|
||||
#[tracing::instrument(skip_all)]
|
||||
@@ -24,7 +24,6 @@ pub(crate) async fn proxy_pass(
|
||||
let usage_tx = USAGE_METRICS.register(Ids {
|
||||
endpoint_id: aux.endpoint_id,
|
||||
branch_id: aux.branch_id,
|
||||
direction: TrafficDirection::Egress,
|
||||
private_link_id,
|
||||
});
|
||||
|
||||
@@ -47,6 +46,7 @@ pub(crate) async fn proxy_pass(
|
||||
|cnt| {
|
||||
// Number of bytes the client sent to the compute node (inbound).
|
||||
metrics.get_metric(m_recv).inc_by(cnt as u64);
|
||||
usage_tx.record_ingress(cnt as u64);
|
||||
},
|
||||
);
|
||||
|
||||
|
||||
@@ -96,16 +96,18 @@ fn generate_tls_config<'a>(
|
||||
.with_safe_default_protocol_versions()
|
||||
.context("ring should support the default protocol versions")?
|
||||
.with_no_client_auth()
|
||||
.with_single_cert(vec![cert.clone()], key.clone_key())?
|
||||
.into();
|
||||
.with_single_cert(vec![cert.clone()], key.clone_key())?;
|
||||
|
||||
let mut cert_resolver = CertResolver::new();
|
||||
cert_resolver.add_cert(key, vec![cert], true)?;
|
||||
|
||||
let common_names = cert_resolver.get_common_names();
|
||||
|
||||
let config = Arc::new(config);
|
||||
|
||||
TlsConfig {
|
||||
config,
|
||||
http_config: config.clone(),
|
||||
pg_config: config,
|
||||
common_names,
|
||||
cert_resolver: Arc::new(cert_resolver),
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
|
||||
use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
|
||||
use crate::protocol2::ConnectionInfoExtra;
|
||||
use crate::types::{DbName, EndpointCacheKey, RoleName};
|
||||
use crate::usage_metrics::{Ids, MetricCounter, TrafficDirection, USAGE_METRICS};
|
||||
use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct ConnInfo {
|
||||
@@ -639,11 +639,7 @@ impl<C: ClientInnerExt> Client<C> {
|
||||
(&mut inner.inner, Discard { conn_info, pool })
|
||||
}
|
||||
|
||||
pub(crate) fn metrics(
|
||||
&self,
|
||||
direction: TrafficDirection,
|
||||
ctx: &RequestContext,
|
||||
) -> Arc<MetricCounter> {
|
||||
pub(crate) fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {
|
||||
let aux = &self
|
||||
.inner
|
||||
.as_ref()
|
||||
@@ -659,7 +655,6 @@ impl<C: ClientInnerExt> Client<C> {
|
||||
USAGE_METRICS.register(Ids {
|
||||
endpoint_id: aux.endpoint_id,
|
||||
branch_id: aux.branch_id,
|
||||
direction,
|
||||
private_link_id,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
|
||||
use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
|
||||
use crate::protocol2::ConnectionInfoExtra;
|
||||
use crate::types::EndpointCacheKey;
|
||||
use crate::usage_metrics::{Ids, MetricCounter, TrafficDirection, USAGE_METRICS};
|
||||
use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
|
||||
|
||||
pub(crate) type Send = http2::SendRequest<hyper::body::Incoming>;
|
||||
pub(crate) type Connect = http2::Connection<TokioIo<AsyncRW>, hyper::body::Incoming, TokioExecutor>;
|
||||
@@ -265,11 +265,7 @@ impl<C: ClientInnerExt + Clone> Client<C> {
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
pub(crate) fn metrics(
|
||||
&self,
|
||||
direction: TrafficDirection,
|
||||
ctx: &RequestContext,
|
||||
) -> Arc<MetricCounter> {
|
||||
pub(crate) fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {
|
||||
let aux = &self.inner.aux;
|
||||
|
||||
let private_link_id = match ctx.extra() {
|
||||
@@ -281,7 +277,6 @@ impl<C: ClientInnerExt + Clone> Client<C> {
|
||||
USAGE_METRICS.register(Ids {
|
||||
endpoint_id: aux.endpoint_id,
|
||||
branch_id: aux.branch_id,
|
||||
direction,
|
||||
private_link_id,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::pin::{Pin, pin};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use arc_swap::ArcSwapOption;
|
||||
use async_trait::async_trait;
|
||||
use atomic_take::AtomicTake;
|
||||
use bytes::Bytes;
|
||||
@@ -117,18 +118,7 @@ pub async fn task_main(
|
||||
auth_backend,
|
||||
endpoint_rate_limiter: Arc::clone(&endpoint_rate_limiter),
|
||||
});
|
||||
let tls_acceptor: Arc<dyn MaybeTlsAcceptor> = match config.tls_config.as_ref() {
|
||||
Some(config) => {
|
||||
let mut tls_server_config = rustls::ServerConfig::clone(&config.to_server_config());
|
||||
// prefer http2, but support http/1.1
|
||||
tls_server_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
|
||||
Arc::new(tls_server_config)
|
||||
}
|
||||
None => {
|
||||
warn!("TLS config is missing");
|
||||
Arc::new(NoTls)
|
||||
}
|
||||
};
|
||||
let tls_acceptor: Arc<dyn MaybeTlsAcceptor> = Arc::new(&config.tls_config);
|
||||
|
||||
let connections = tokio_util::task::task_tracker::TaskTracker::new();
|
||||
connections.close(); // allows `connections.wait to complete`
|
||||
@@ -216,22 +206,20 @@ pub(crate) type AsyncRW = Pin<Box<dyn AsyncReadWrite>>;
|
||||
|
||||
#[async_trait]
|
||||
trait MaybeTlsAcceptor: Send + Sync + 'static {
|
||||
async fn accept(self: Arc<Self>, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW>;
|
||||
async fn accept(&self, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl MaybeTlsAcceptor for rustls::ServerConfig {
|
||||
async fn accept(self: Arc<Self>, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW> {
|
||||
Ok(Box::pin(TlsAcceptor::from(self).accept(conn).await?))
|
||||
}
|
||||
}
|
||||
|
||||
struct NoTls;
|
||||
|
||||
#[async_trait]
|
||||
impl MaybeTlsAcceptor for NoTls {
|
||||
async fn accept(self: Arc<Self>, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW> {
|
||||
Ok(Box::pin(conn))
|
||||
impl MaybeTlsAcceptor for &'static ArcSwapOption<crate::config::TlsConfig> {
|
||||
async fn accept(&self, conn: ChainRW<TcpStream>) -> std::io::Result<AsyncRW> {
|
||||
match &*self.load() {
|
||||
Some(config) => Ok(Box::pin(
|
||||
TlsAcceptor::from(config.http_config.clone())
|
||||
.accept(conn)
|
||||
.await?,
|
||||
)),
|
||||
None => Ok(Box::pin(conn)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ use crate::metrics::{HttpDirection, Metrics};
|
||||
use crate::proxy::{NeonOptions, run_until_cancelled};
|
||||
use crate::serverless::backend::HttpConnError;
|
||||
use crate::types::{DbName, RoleName};
|
||||
use crate::usage_metrics::{MetricCounter, MetricCounterRecorder, TrafficDirection};
|
||||
use crate::usage_metrics::{MetricCounter, MetricCounterRecorder};
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -614,7 +614,9 @@ async fn handle_inner(
|
||||
&config.authentication_config,
|
||||
ctx,
|
||||
request.headers(),
|
||||
config.tls_config.as_ref(),
|
||||
// todo: race condition?
|
||||
// we're unlikely to change the common names.
|
||||
config.tls_config.load().as_deref(),
|
||||
)?;
|
||||
info!(
|
||||
user = conn_info.conn_info.user_info.user.as_str(),
|
||||
@@ -661,6 +663,7 @@ async fn handle_db_inner(
|
||||
|
||||
let parsed_headers = HttpHeaders::try_parse(headers)?;
|
||||
|
||||
let mut request_len = 0;
|
||||
let fetch_and_process_request = Box::pin(
|
||||
async {
|
||||
let body = read_body_with_limit(
|
||||
@@ -669,6 +672,8 @@ async fn handle_db_inner(
|
||||
)
|
||||
.await?;
|
||||
|
||||
request_len = body.len();
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.http_conn_content_length_bytes
|
||||
@@ -763,7 +768,7 @@ async fn handle_db_inner(
|
||||
}
|
||||
};
|
||||
|
||||
let metrics = client.metrics(TrafficDirection::Egress, ctx);
|
||||
let metrics = client.metrics(ctx);
|
||||
|
||||
let len = json_output.len();
|
||||
let response = response
|
||||
@@ -779,6 +784,8 @@ async fn handle_db_inner(
|
||||
// count the egress bytes - we miss the TLS and header overhead but oh well...
|
||||
// moving this later in the stack is going to be a lot of effort and ehhhh
|
||||
metrics.record_egress(len as u64);
|
||||
metrics.record_ingress(request_len as u64);
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.http_conn_content_length_bytes
|
||||
@@ -836,7 +843,7 @@ async fn handle_auth_broker_inner(
|
||||
.expect("all headers and params received via hyper should be valid for request");
|
||||
|
||||
// todo: map body to count egress
|
||||
let _metrics = client.metrics(TrafficDirection::Egress, ctx);
|
||||
let _metrics = client.metrics(ctx);
|
||||
|
||||
Ok(client
|
||||
.inner
|
||||
@@ -1166,10 +1173,10 @@ enum Discard<'a> {
|
||||
}
|
||||
|
||||
impl Client {
|
||||
fn metrics(&self, direction: TrafficDirection, ctx: &RequestContext) -> Arc<MetricCounter> {
|
||||
fn metrics(&self, ctx: &RequestContext) -> Arc<MetricCounter> {
|
||||
match self {
|
||||
Client::Remote(client) => client.metrics(direction, ctx),
|
||||
Client::Local(local_client) => local_client.metrics(direction, ctx),
|
||||
Client::Remote(client) => client.metrics(ctx),
|
||||
Client::Local(local_client) => local_client.metrics(ctx),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -9,17 +9,14 @@ use rustls::pki_types::{CertificateDer, PrivateKeyDer};
|
||||
use super::{PG_ALPN_PROTOCOL, TlsServerEndPoint};
|
||||
|
||||
pub struct TlsConfig {
|
||||
pub config: Arc<rustls::ServerConfig>,
|
||||
// unfortunate split since we cannot change the ALPN on demand.
|
||||
// <https://github.com/rustls/rustls/issues/2260>
|
||||
pub http_config: Arc<rustls::ServerConfig>,
|
||||
pub pg_config: Arc<rustls::ServerConfig>,
|
||||
pub common_names: HashSet<String>,
|
||||
pub cert_resolver: Arc<CertResolver>,
|
||||
}
|
||||
|
||||
impl TlsConfig {
|
||||
pub fn to_server_config(&self) -> Arc<rustls::ServerConfig> {
|
||||
self.config.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// Configure TLS for the main endpoint.
|
||||
pub fn configure_tls(
|
||||
key_path: &str,
|
||||
@@ -71,8 +68,15 @@ pub fn configure_tls(
|
||||
config.key_log = Arc::new(rustls::KeyLogFile::new());
|
||||
}
|
||||
|
||||
let mut http_config = config.clone();
|
||||
let mut pg_config = config;
|
||||
|
||||
http_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
|
||||
pg_config.alpn_protocols = vec![b"postgresql".to_vec()];
|
||||
|
||||
Ok(TlsConfig {
|
||||
config: Arc::new(config),
|
||||
http_config: Arc::new(http_config),
|
||||
pg_config: Arc::new(pg_config),
|
||||
common_names,
|
||||
cert_resolver,
|
||||
})
|
||||
|
||||
@@ -44,11 +44,17 @@ const HTTP_REPORTING_RETRY_DURATION: Duration = Duration::from_secs(60);
|
||||
pub(crate) struct Ids {
|
||||
pub(crate) endpoint_id: EndpointIdInt,
|
||||
pub(crate) branch_id: BranchIdInt,
|
||||
pub(crate) direction: TrafficDirection,
|
||||
#[serde(with = "none_as_empty_string")]
|
||||
pub(crate) private_link_id: Option<SmolStr>,
|
||||
}
|
||||
|
||||
#[derive(Eq, Hash, PartialEq, Serialize, Deserialize, Debug, Clone)]
|
||||
struct Extra {
|
||||
#[serde(flatten)]
|
||||
ids: Ids,
|
||||
direction: TrafficDirection,
|
||||
}
|
||||
|
||||
mod none_as_empty_string {
|
||||
use serde::Deserialize;
|
||||
use smol_str::SmolStr;
|
||||
@@ -76,18 +82,23 @@ pub(crate) enum TrafficDirection {
|
||||
pub(crate) trait MetricCounterRecorder {
|
||||
/// Record that some bytes were sent from the proxy to the client
|
||||
fn record_egress(&self, bytes: u64);
|
||||
|
||||
/// Record that some bytes were sent from the client to the proxy
|
||||
fn record_ingress(&self, bytes: u64);
|
||||
|
||||
/// Record that some connections were opened
|
||||
fn record_connection(&self, count: usize);
|
||||
}
|
||||
|
||||
trait MetricCounterReporter {
|
||||
fn get_metrics(&mut self) -> (u64, usize);
|
||||
fn move_metrics(&self) -> (u64, usize);
|
||||
fn get_metrics(&mut self) -> MetricsData;
|
||||
fn move_metrics(&self) -> MetricsData;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct MetricCounter {
|
||||
transmitted: AtomicU64,
|
||||
received: AtomicU64,
|
||||
opened_connections: AtomicUsize,
|
||||
}
|
||||
|
||||
@@ -97,6 +108,11 @@ impl MetricCounterRecorder for MetricCounter {
|
||||
self.transmitted.fetch_add(bytes, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Record that some bytes were sent from the proxy to the client
|
||||
fn record_ingress(&self, bytes: u64) {
|
||||
self.received.fetch_add(bytes, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Record that some connections were opened
|
||||
fn record_connection(&self, count: usize) {
|
||||
self.opened_connections.fetch_add(count, Ordering::Relaxed);
|
||||
@@ -104,29 +120,43 @@ impl MetricCounterRecorder for MetricCounter {
|
||||
}
|
||||
|
||||
impl MetricCounterReporter for MetricCounter {
|
||||
fn get_metrics(&mut self) -> (u64, usize) {
|
||||
(
|
||||
*self.transmitted.get_mut(),
|
||||
*self.opened_connections.get_mut(),
|
||||
)
|
||||
fn get_metrics(&mut self) -> MetricsData {
|
||||
MetricsData {
|
||||
received: *self.received.get_mut(),
|
||||
transmitted: *self.transmitted.get_mut(),
|
||||
connections: *self.opened_connections.get_mut(),
|
||||
}
|
||||
}
|
||||
fn move_metrics(&self) -> (u64, usize) {
|
||||
(
|
||||
self.transmitted.swap(0, Ordering::Relaxed),
|
||||
self.opened_connections.swap(0, Ordering::Relaxed),
|
||||
)
|
||||
|
||||
fn move_metrics(&self) -> MetricsData {
|
||||
MetricsData {
|
||||
received: self.received.swap(0, Ordering::Relaxed),
|
||||
transmitted: self.transmitted.swap(0, Ordering::Relaxed),
|
||||
connections: self.opened_connections.swap(0, Ordering::Relaxed),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct MetricsData {
|
||||
transmitted: u64,
|
||||
received: u64,
|
||||
connections: usize,
|
||||
}
|
||||
|
||||
struct BytesSent {
|
||||
transmitted: u64,
|
||||
received: u64,
|
||||
}
|
||||
|
||||
trait Clearable {
|
||||
/// extract the value that should be reported
|
||||
fn should_report(self: &Arc<Self>) -> Option<u64>;
|
||||
fn should_report(self: &Arc<Self>) -> Option<BytesSent>;
|
||||
/// Determine whether the counter should be cleared from the global map.
|
||||
fn should_clear(self: &mut Arc<Self>) -> bool;
|
||||
}
|
||||
|
||||
impl<C: MetricCounterReporter> Clearable for C {
|
||||
fn should_report(self: &Arc<Self>) -> Option<u64> {
|
||||
fn should_report(self: &Arc<Self>) -> Option<BytesSent> {
|
||||
// heuristic to see if the branch is still open
|
||||
// if a clone happens while we are observing, the heuristic will be incorrect.
|
||||
//
|
||||
@@ -139,14 +169,21 @@ impl<C: MetricCounterReporter> Clearable for C {
|
||||
// (to avoid sending the same metrics twice)
|
||||
// see the relevant discussion on why to do so even if the status is not success:
|
||||
// https://github.com/neondatabase/neon/pull/4563#discussion_r1246710956
|
||||
let (value, opened) = self.move_metrics();
|
||||
let MetricsData {
|
||||
transmitted,
|
||||
received,
|
||||
connections,
|
||||
} = self.move_metrics();
|
||||
|
||||
// Our only requirement is that we report in every interval if there was an open connection
|
||||
// if there were no opened connections since, then we don't need to report
|
||||
if value == 0 && !is_open && opened == 0 {
|
||||
if transmitted == 0 && received == 0 && !is_open && connections == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(value)
|
||||
Some(BytesSent {
|
||||
transmitted,
|
||||
received,
|
||||
})
|
||||
}
|
||||
}
|
||||
fn should_clear(self: &mut Arc<Self>) -> bool {
|
||||
@@ -154,9 +191,13 @@ impl<C: MetricCounterReporter> Clearable for C {
|
||||
let Some(counter) = Arc::get_mut(self) else {
|
||||
return false;
|
||||
};
|
||||
let (opened, value) = counter.get_metrics();
|
||||
let MetricsData {
|
||||
transmitted,
|
||||
received,
|
||||
connections,
|
||||
} = counter.get_metrics();
|
||||
// clear if there's no data to report
|
||||
value == 0 && opened == 0
|
||||
transmitted == 0 && received == 0 && connections == 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,6 +219,7 @@ impl Metrics {
|
||||
.entry(ids)
|
||||
.or_insert_with(|| {
|
||||
Arc::new(MetricCounter {
|
||||
received: AtomicU64::new(0),
|
||||
transmitted: AtomicU64::new(0),
|
||||
opened_connections: AtomicUsize::new(0),
|
||||
})
|
||||
@@ -242,10 +284,10 @@ pub async fn task_main(config: &MetricCollectionConfig) -> anyhow::Result<Infall
|
||||
|
||||
fn collect_and_clear_metrics<C: Clearable>(
|
||||
endpoints: &ClashMap<Ids, Arc<C>, FastHasher>,
|
||||
) -> Vec<(Ids, u64)> {
|
||||
) -> Vec<(Ids, BytesSent)> {
|
||||
let mut metrics_to_clear = Vec::new();
|
||||
|
||||
let metrics_to_send: Vec<(Ids, u64)> = endpoints
|
||||
let metrics_to_send: Vec<(Ids, BytesSent)> = endpoints
|
||||
.iter()
|
||||
.filter_map(|counter| {
|
||||
let key = counter.key().clone();
|
||||
@@ -271,26 +313,46 @@ fn collect_and_clear_metrics<C: Clearable>(
|
||||
}
|
||||
|
||||
fn create_event_chunks<'a>(
|
||||
metrics_to_send: &'a [(Ids, u64)],
|
||||
metrics_to_send: &'a [(Ids, BytesSent)],
|
||||
hostname: &'a str,
|
||||
prev: DateTime<Utc>,
|
||||
now: DateTime<Utc>,
|
||||
chunk_size: usize,
|
||||
) -> impl Iterator<Item = EventChunk<'a, Event<Ids, &'static str>>> + 'a {
|
||||
) -> impl Iterator<Item = EventChunk<'a, Event<Extra, &'static str>>> + 'a {
|
||||
metrics_to_send
|
||||
.chunks(chunk_size)
|
||||
.map(move |chunk| EventChunk {
|
||||
events: chunk
|
||||
.iter()
|
||||
.map(|(ids, value)| Event {
|
||||
kind: EventType::Incremental {
|
||||
start_time: prev,
|
||||
stop_time: now,
|
||||
},
|
||||
metric: PROXY_IO_BYTES_PER_CLIENT,
|
||||
idempotency_key: idempotency_key(hostname),
|
||||
value: *value,
|
||||
extra: ids.clone(),
|
||||
.flat_map(|(ids, bytes)| {
|
||||
[
|
||||
Event {
|
||||
kind: EventType::Incremental {
|
||||
start_time: prev,
|
||||
stop_time: now,
|
||||
},
|
||||
metric: PROXY_IO_BYTES_PER_CLIENT,
|
||||
idempotency_key: idempotency_key(hostname),
|
||||
value: bytes.transmitted,
|
||||
extra: Extra {
|
||||
ids: ids.clone(),
|
||||
direction: TrafficDirection::Egress,
|
||||
},
|
||||
},
|
||||
Event {
|
||||
kind: EventType::Incremental {
|
||||
start_time: prev,
|
||||
stop_time: now,
|
||||
},
|
||||
metric: PROXY_IO_BYTES_PER_CLIENT,
|
||||
idempotency_key: idempotency_key(hostname),
|
||||
value: bytes.received,
|
||||
extra: Extra {
|
||||
ids: ids.clone(),
|
||||
direction: TrafficDirection::Ingress,
|
||||
},
|
||||
},
|
||||
]
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
@@ -350,7 +412,7 @@ fn create_remote_path_prefix(now: DateTime<Utc>) -> String {
|
||||
async fn upload_main_events_chunked(
|
||||
client: &http::ClientWithMiddleware,
|
||||
metric_collection_endpoint: &reqwest::Url,
|
||||
chunk: &EventChunk<'_, Event<Ids, &str>>,
|
||||
chunk: &EventChunk<'_, Event<Extra, &str>>,
|
||||
subchunk_size: usize,
|
||||
) {
|
||||
// Split into smaller chunks to avoid exceeding the max request size
|
||||
@@ -384,7 +446,7 @@ async fn upload_main_events_chunked(
|
||||
|
||||
async fn upload_backup_events(
|
||||
storage: Option<&GenericRemoteStorage>,
|
||||
chunk: &EventChunk<'_, Event<Ids, &'static str>>,
|
||||
chunk: &EventChunk<'_, Event<Extra, &'static str>>,
|
||||
path_prefix: &str,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -461,7 +523,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn metrics() {
|
||||
type Report = EventChunk<'static, Event<Ids, String>>;
|
||||
type Report = EventChunk<'static, Event<Extra, String>>;
|
||||
let reports: Arc<Mutex<Vec<Report>>> = Arc::default();
|
||||
|
||||
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
@@ -533,7 +595,6 @@ mod tests {
|
||||
let counter = metrics.register(Ids {
|
||||
endpoint_id: (&EndpointId::from("e1")).into(),
|
||||
branch_id: (&BranchId::from("b1")).into(),
|
||||
direction: TrafficDirection::Egress,
|
||||
private_link_id: None,
|
||||
});
|
||||
|
||||
@@ -551,13 +612,19 @@ mod tests {
|
||||
.await;
|
||||
let r = std::mem::take(&mut *reports.lock().unwrap());
|
||||
assert_eq!(r.len(), 1);
|
||||
assert_eq!(r[0].events.len(), 1);
|
||||
assert_eq!(r[0].events.len(), 2);
|
||||
assert_eq!(r[0].events[0].value, 0);
|
||||
assert_eq!(r[0].events[0].extra.direction, TrafficDirection::Egress);
|
||||
assert_eq!(r[0].events[1].value, 0);
|
||||
assert_eq!(r[0].events[1].extra.direction, TrafficDirection::Ingress);
|
||||
pushed_chunks.extend(r);
|
||||
|
||||
// record egress
|
||||
counter.record_egress(1);
|
||||
|
||||
// record ingress
|
||||
counter.record_ingress(2);
|
||||
|
||||
// egress should be observered
|
||||
collect_metrics_iteration(
|
||||
&metrics.endpoints,
|
||||
@@ -572,8 +639,11 @@ mod tests {
|
||||
.await;
|
||||
let r = std::mem::take(&mut *reports.lock().unwrap());
|
||||
assert_eq!(r.len(), 1);
|
||||
assert_eq!(r[0].events.len(), 1);
|
||||
assert_eq!(r[0].events.len(), 2);
|
||||
assert_eq!(r[0].events[0].value, 1);
|
||||
assert_eq!(r[0].events[0].extra.direction, TrafficDirection::Egress);
|
||||
assert_eq!(r[0].events[1].value, 2);
|
||||
assert_eq!(r[0].events[1].extra.direction, TrafficDirection::Ingress);
|
||||
pushed_chunks.extend(r);
|
||||
|
||||
// release counter
|
||||
|
||||
@@ -48,8 +48,8 @@ types-jwcrypto = "^1.5.0.20240925"
|
||||
pyyaml = "^6.0.2"
|
||||
types-pyyaml = "^6.0.12.20240917"
|
||||
testcontainers = "^4.9.0"
|
||||
# Jsonnet doesn't support Python 3.13 yet
|
||||
jsonnet = { version = "^0.20.0", markers = "python_version < '3.13'" }
|
||||
# Install a release candidate of `jsonnet`, as it supports Python 3.13
|
||||
jsonnet = "^0.21.0-rc2"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
mypy = "==1.13.0"
|
||||
|
||||
@@ -35,8 +35,9 @@ postgres-protocol.workspace = true
|
||||
pprof.workspace = true
|
||||
rand.workspace = true
|
||||
regex.workspace = true
|
||||
scopeguard.workspace = true
|
||||
reqwest = { workspace = true, features = ["json"] }
|
||||
rustls.workspace = true
|
||||
scopeguard.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
smallvec.workspace = true
|
||||
@@ -45,10 +46,11 @@ strum_macros.workspace = true
|
||||
thiserror.workspace = true
|
||||
tikv-jemallocator.workspace = true
|
||||
tokio = { workspace = true, features = ["fs"] }
|
||||
tokio-util = { workspace = true }
|
||||
tokio-io-timeout.workspace = true
|
||||
tokio-postgres.workspace = true
|
||||
tokio-rustls.workspace = true
|
||||
tokio-tar.workspace = true
|
||||
tokio-util = { workspace = true }
|
||||
tracing.workspace = true
|
||||
url.workspace = true
|
||||
metrics.workspace = true
|
||||
|
||||
@@ -8,7 +8,7 @@ use std::error::Error as _;
|
||||
use http_utils::error::HttpErrorBody;
|
||||
use reqwest::{IntoUrl, Method, StatusCode};
|
||||
use safekeeper_api::models::{
|
||||
PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
|
||||
self, PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
|
||||
TimelineStatus,
|
||||
};
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
@@ -96,11 +96,25 @@ impl Client {
|
||||
resp.json().await.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn exclude_timeline(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
req: &models::TimelineMembershipSwitchRequest,
|
||||
) -> Result<models::TimelineDeleteResult> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/exclude",
|
||||
self.mgmt_api_endpoint, tenant_id, timeline_id
|
||||
);
|
||||
let resp = self.put(&uri, req).await?;
|
||||
resp.json().await.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn delete_timeline(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<TimelineStatus> {
|
||||
) -> Result<models::TimelineDeleteResult> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{}/timeline/{}",
|
||||
self.mgmt_api_endpoint, tenant_id, timeline_id
|
||||
@@ -109,6 +123,20 @@ impl Client {
|
||||
resp.json().await.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn bump_timeline_term(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
req: &models::TimelineTermBumpRequest,
|
||||
) -> Result<models::TimelineTermBumpResponse> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{}/timeline/{}/term_bump",
|
||||
self.mgmt_api_endpoint, tenant_id, timeline_id
|
||||
);
|
||||
let resp = self.post(&uri, req).await?;
|
||||
resp.json().await.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn timeline_status(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
@@ -149,6 +177,14 @@ impl Client {
|
||||
self.request(Method::POST, uri, body).await
|
||||
}
|
||||
|
||||
async fn put<B: serde::Serialize, U: IntoUrl>(
|
||||
&self,
|
||||
uri: U,
|
||||
body: B,
|
||||
) -> Result<reqwest::Response> {
|
||||
self.request(Method::PUT, uri, body).await
|
||||
}
|
||||
|
||||
async fn get<U: IntoUrl>(&self, uri: U) -> Result<reqwest::Response> {
|
||||
self.request(Method::GET, uri, ()).await
|
||||
}
|
||||
|
||||
@@ -16,10 +16,12 @@ use futures::stream::FuturesUnordered;
|
||||
use futures::{FutureExt, StreamExt};
|
||||
use metrics::set_build_info_metric;
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
use reqwest::Certificate;
|
||||
use safekeeper::defaults::{
|
||||
DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_EVICTION_MIN_RESIDENT, DEFAULT_HEARTBEAT_TIMEOUT,
|
||||
DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY,
|
||||
DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR,
|
||||
DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR, DEFAULT_SSL_CERT_FILE,
|
||||
DEFAULT_SSL_KEY_FILE,
|
||||
};
|
||||
use safekeeper::{
|
||||
BROKER_RUNTIME, GlobalTimelines, HTTP_RUNTIME, SafeKeeperConf, WAL_SERVICE_RUNTIME, broker,
|
||||
@@ -94,6 +96,9 @@ struct Args {
|
||||
/// Listen http endpoint for management and metrics in the form host:port.
|
||||
#[arg(long, default_value = DEFAULT_HTTP_LISTEN_ADDR)]
|
||||
listen_http: String,
|
||||
/// Listen https endpoint for management and metrics in the form host:port.
|
||||
#[arg(long, default_value = None)]
|
||||
listen_https: Option<String>,
|
||||
/// Advertised endpoint for receiving/sending WAL in the form host:port. If not
|
||||
/// specified, listen_pg is used to advertise instead.
|
||||
#[arg(long, default_value = None)]
|
||||
@@ -203,6 +208,15 @@ struct Args {
|
||||
/// and the current position of the reader is smaller than this value.
|
||||
#[arg(long)]
|
||||
max_delta_for_fanout: Option<u64>,
|
||||
/// Path to a file with certificate's private key for https API.
|
||||
#[arg(long, default_value = DEFAULT_SSL_KEY_FILE)]
|
||||
ssl_key_file: Utf8PathBuf,
|
||||
/// Path to a file with a X509 certificate for https API.
|
||||
#[arg(long, default_value = DEFAULT_SSL_CERT_FILE)]
|
||||
ssl_cert_file: Utf8PathBuf,
|
||||
/// Trusted root CA certificate to use in https APIs.
|
||||
#[arg(long)]
|
||||
ssl_ca_file: Option<Utf8PathBuf>,
|
||||
}
|
||||
|
||||
// Like PathBufValueParser, but allows empty string.
|
||||
@@ -336,12 +350,22 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
};
|
||||
|
||||
let ssl_ca_cert = match args.ssl_ca_file.as_ref() {
|
||||
Some(ssl_ca_file) => {
|
||||
tracing::info!("Using ssl root CA file: {ssl_ca_file:?}");
|
||||
let buf = tokio::fs::read(ssl_ca_file).await?;
|
||||
Some(Certificate::from_pem(&buf)?)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let conf = Arc::new(SafeKeeperConf {
|
||||
workdir,
|
||||
my_id: id,
|
||||
listen_pg_addr: args.listen_pg,
|
||||
listen_pg_addr_tenant_only: args.listen_pg_tenant_only,
|
||||
listen_http_addr: args.listen_http,
|
||||
listen_https_addr: args.listen_https,
|
||||
advertise_pg_addr: args.advertise_pg,
|
||||
availability_zone: args.availability_zone,
|
||||
no_sync: args.no_sync,
|
||||
@@ -368,6 +392,9 @@ async fn main() -> anyhow::Result<()> {
|
||||
eviction_min_resident: args.eviction_min_resident,
|
||||
wal_reader_fanout: args.wal_reader_fanout,
|
||||
max_delta_for_fanout: args.max_delta_for_fanout,
|
||||
ssl_key_file: args.ssl_key_file,
|
||||
ssl_cert_file: args.ssl_cert_file,
|
||||
ssl_ca_cert,
|
||||
});
|
||||
|
||||
// initialize sentry if SENTRY_DSN is provided
|
||||
@@ -428,6 +455,17 @@ async fn start_safekeeper(conf: Arc<SafeKeeperConf>) -> Result<()> {
|
||||
e
|
||||
})?;
|
||||
|
||||
let https_listener = match conf.listen_https_addr.as_ref() {
|
||||
Some(listen_https_addr) => {
|
||||
info!("starting safekeeper HTTPS service on {}", listen_https_addr);
|
||||
Some(tcp_listener::bind(listen_https_addr).map_err(|e| {
|
||||
error!("failed to bind to address {}: {}", listen_https_addr, e);
|
||||
e
|
||||
})?)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let global_timelines = Arc::new(GlobalTimelines::new(conf.clone()));
|
||||
|
||||
// Register metrics collector for active timelines. It's important to do this
|
||||
@@ -501,7 +539,7 @@ async fn start_safekeeper(conf: Arc<SafeKeeperConf>) -> Result<()> {
|
||||
let http_handle = current_thread_rt
|
||||
.as_ref()
|
||||
.unwrap_or_else(|| HTTP_RUNTIME.handle())
|
||||
.spawn(http::task_main(
|
||||
.spawn(http::task_main_http(
|
||||
conf.clone(),
|
||||
http_listener,
|
||||
global_timelines.clone(),
|
||||
@@ -509,6 +547,19 @@ async fn start_safekeeper(conf: Arc<SafeKeeperConf>) -> Result<()> {
|
||||
.map(|res| ("HTTP service main".to_owned(), res));
|
||||
tasks_handles.push(Box::pin(http_handle));
|
||||
|
||||
if let Some(https_listener) = https_listener {
|
||||
let https_handle = current_thread_rt
|
||||
.as_ref()
|
||||
.unwrap_or_else(|| HTTP_RUNTIME.handle())
|
||||
.spawn(http::task_main_https(
|
||||
conf.clone(),
|
||||
https_listener,
|
||||
global_timelines.clone(),
|
||||
))
|
||||
.map(|res| ("HTTPS service main".to_owned(), res));
|
||||
tasks_handles.push(Box::pin(https_handle));
|
||||
}
|
||||
|
||||
let broker_task_handle = current_thread_rt
|
||||
.as_ref()
|
||||
.unwrap_or_else(|| BROKER_RUNTIME.handle())
|
||||
|
||||
@@ -3,10 +3,11 @@ use std::sync::Arc;
|
||||
|
||||
pub use routes::make_router;
|
||||
pub use safekeeper_api::models;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::{GlobalTimelines, SafeKeeperConf};
|
||||
|
||||
pub async fn task_main(
|
||||
pub async fn task_main_http(
|
||||
conf: Arc<SafeKeeperConf>,
|
||||
http_listener: std::net::TcpListener,
|
||||
global_timelines: Arc<GlobalTimelines>,
|
||||
@@ -14,8 +15,37 @@ pub async fn task_main(
|
||||
let router = make_router(conf, global_timelines)
|
||||
.build()
|
||||
.map_err(|err| anyhow::anyhow!(err))?;
|
||||
let service = http_utils::RouterService::new(router).unwrap();
|
||||
let server = hyper::Server::from_tcp(http_listener)?;
|
||||
server.serve(service).await?;
|
||||
|
||||
let service = Arc::new(
|
||||
http_utils::RequestServiceBuilder::new(router).map_err(|err| anyhow::anyhow!(err))?,
|
||||
);
|
||||
let server = http_utils::server::Server::new(service, http_listener, None)?;
|
||||
server.serve(CancellationToken::new()).await?;
|
||||
Ok(()) // unreachable
|
||||
}
|
||||
|
||||
pub async fn task_main_https(
|
||||
conf: Arc<SafeKeeperConf>,
|
||||
https_listener: std::net::TcpListener,
|
||||
global_timelines: Arc<GlobalTimelines>,
|
||||
) -> anyhow::Result<()> {
|
||||
let certs = http_utils::tls_certs::load_cert_chain(&conf.ssl_cert_file)?;
|
||||
let key = http_utils::tls_certs::load_private_key(&conf.ssl_key_file)?;
|
||||
|
||||
let server_config = rustls::ServerConfig::builder()
|
||||
.with_no_client_auth()
|
||||
.with_single_cert(certs, key)?;
|
||||
|
||||
let tls_acceptor = tokio_rustls::TlsAcceptor::from(Arc::new(server_config));
|
||||
|
||||
let router = make_router(conf, global_timelines)
|
||||
.build()
|
||||
.map_err(|err| anyhow::anyhow!(err))?;
|
||||
|
||||
let service = Arc::new(
|
||||
http_utils::RequestServiceBuilder::new(router).map_err(|err| anyhow::anyhow!(err))?,
|
||||
);
|
||||
let server = http_utils::server::Server::new(service, https_listener, Some(tls_acceptor))?;
|
||||
server.serve(CancellationToken::new()).await?;
|
||||
Ok(()) // unreachable
|
||||
}
|
||||
|
||||
@@ -17,7 +17,8 @@ use hyper::{Body, Request, Response, StatusCode};
|
||||
use postgres_ffi::WAL_SEGMENT_SIZE;
|
||||
use safekeeper_api::models::{
|
||||
AcceptorStateStatus, PullTimelineRequest, SafekeeperStatus, SkTimelineInfo, TermSwitchApiEntry,
|
||||
TimelineCopyRequest, TimelineCreateRequest, TimelineStatus, TimelineTermBumpRequest,
|
||||
TimelineCopyRequest, TimelineCreateRequest, TimelineDeleteResult, TimelineStatus,
|
||||
TimelineTermBumpRequest,
|
||||
};
|
||||
use safekeeper_api::{ServerInfo, membership, models};
|
||||
use storage_broker::proto::{SafekeeperTimelineInfo, TenantTimelineId as ProtoTenantTimelineId};
|
||||
@@ -32,7 +33,7 @@ use utils::lsn::Lsn;
|
||||
|
||||
use crate::debug_dump::TimelineDigestRequest;
|
||||
use crate::safekeeper::TermLsn;
|
||||
use crate::timelines_global_map::{DeleteOrExclude, TimelineDeleteResult};
|
||||
use crate::timelines_global_map::DeleteOrExclude;
|
||||
use crate::{
|
||||
GlobalTimelines, SafeKeeperConf, copy_timeline, debug_dump, patch_control_file, pull_timeline,
|
||||
};
|
||||
@@ -231,9 +232,14 @@ async fn timeline_pull_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
let conf = get_conf(&request);
|
||||
let global_timelines = get_global_timelines(&request);
|
||||
|
||||
let resp = pull_timeline::handle_request(data, conf.sk_auth_token.clone(), global_timelines)
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
let resp = pull_timeline::handle_request(
|
||||
data,
|
||||
conf.sk_auth_token.clone(),
|
||||
conf.ssl_ca_cert.clone(),
|
||||
global_timelines,
|
||||
)
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
json_response(StatusCode::OK, resp)
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ use std::time::Duration;
|
||||
use camino::Utf8PathBuf;
|
||||
use once_cell::sync::Lazy;
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
use reqwest::Certificate;
|
||||
use storage_broker::Uri;
|
||||
use tokio::runtime::Runtime;
|
||||
use utils::auth::SwappableJwtAuth;
|
||||
@@ -69,6 +70,9 @@ pub mod defaults {
|
||||
// before uploading a partial segment, so that in normal operation the eviction can happen
|
||||
// as soon as we have done the partial segment upload.
|
||||
pub const DEFAULT_EVICTION_MIN_RESIDENT: &str = DEFAULT_PARTIAL_BACKUP_TIMEOUT;
|
||||
|
||||
pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
|
||||
pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -84,6 +88,7 @@ pub struct SafeKeeperConf {
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_pg_addr_tenant_only: Option<String>,
|
||||
pub listen_http_addr: String,
|
||||
pub listen_https_addr: Option<String>,
|
||||
pub advertise_pg_addr: Option<String>,
|
||||
pub availability_zone: Option<String>,
|
||||
pub no_sync: bool,
|
||||
@@ -111,6 +116,9 @@ pub struct SafeKeeperConf {
|
||||
pub eviction_min_resident: Duration,
|
||||
pub wal_reader_fanout: bool,
|
||||
pub max_delta_for_fanout: Option<u64>,
|
||||
pub ssl_key_file: Utf8PathBuf,
|
||||
pub ssl_cert_file: Utf8PathBuf,
|
||||
pub ssl_ca_cert: Option<Certificate>,
|
||||
}
|
||||
|
||||
impl SafeKeeperConf {
|
||||
@@ -127,6 +135,7 @@ impl SafeKeeperConf {
|
||||
listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
|
||||
listen_pg_addr_tenant_only: None,
|
||||
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
|
||||
listen_https_addr: None,
|
||||
advertise_pg_addr: None,
|
||||
availability_zone: None,
|
||||
remote_storage: None,
|
||||
@@ -155,6 +164,9 @@ impl SafeKeeperConf {
|
||||
eviction_min_resident: Duration::ZERO,
|
||||
wal_reader_fanout: false,
|
||||
max_delta_for_fanout: None,
|
||||
ssl_key_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_KEY_FILE),
|
||||
ssl_cert_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_CERT_FILE),
|
||||
ssl_ca_cert: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ use camino::Utf8PathBuf;
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::{SinkExt, StreamExt, TryStreamExt};
|
||||
use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo};
|
||||
use reqwest::Certificate;
|
||||
use safekeeper_api::Term;
|
||||
use safekeeper_api::models::{PullTimelineRequest, PullTimelineResponse, TimelineStatus};
|
||||
use safekeeper_client::mgmt_api;
|
||||
@@ -392,6 +393,7 @@ pub struct DebugDumpResponse {
|
||||
pub async fn handle_request(
|
||||
request: PullTimelineRequest,
|
||||
sk_auth_token: Option<SecretString>,
|
||||
ssl_ca_cert: Option<Certificate>,
|
||||
global_timelines: Arc<GlobalTimelines>,
|
||||
) -> Result<PullTimelineResponse> {
|
||||
let existing_tli = global_timelines.get(TenantTimelineId::new(
|
||||
@@ -402,9 +404,11 @@ pub async fn handle_request(
|
||||
bail!("Timeline {} already exists", request.timeline_id);
|
||||
}
|
||||
|
||||
// TODO(DimasKovas): add ssl root CA certificate when implementing safekeeper's
|
||||
// part of https support (#24836).
|
||||
let http_client = reqwest::Client::new();
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
if let Some(ssl_ca_cert) = ssl_ca_cert {
|
||||
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||
}
|
||||
let http_client = http_client.build()?;
|
||||
|
||||
let http_hosts = request.http_hosts.clone();
|
||||
|
||||
@@ -441,13 +445,21 @@ pub async fn handle_request(
|
||||
assert!(status.tenant_id == request.tenant_id);
|
||||
assert!(status.timeline_id == request.timeline_id);
|
||||
|
||||
pull_timeline(status, safekeeper_host, sk_auth_token, global_timelines).await
|
||||
pull_timeline(
|
||||
status,
|
||||
safekeeper_host,
|
||||
sk_auth_token,
|
||||
http_client,
|
||||
global_timelines,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn pull_timeline(
|
||||
status: TimelineStatus,
|
||||
host: String,
|
||||
sk_auth_token: Option<SecretString>,
|
||||
http_client: reqwest::Client,
|
||||
global_timelines: Arc<GlobalTimelines>,
|
||||
) -> Result<PullTimelineResponse> {
|
||||
let ttid = TenantTimelineId::new(status.tenant_id, status.timeline_id);
|
||||
@@ -464,9 +476,6 @@ async fn pull_timeline(
|
||||
let conf = &global_timelines.get_global_config();
|
||||
|
||||
let (_tmp_dir, tli_dir_path) = create_temp_timeline_dir(conf, ttid).await?;
|
||||
// TODO(DimasKovas): add ssl root CA certificate when implementing safekeeper's
|
||||
// part of https support (#24836).
|
||||
let http_client = reqwest::Client::new();
|
||||
let client = Client::new(http_client, host.clone(), sk_auth_token.clone());
|
||||
// Request stream with basebackup archive.
|
||||
let bb_resp = client
|
||||
|
||||
@@ -11,9 +11,8 @@ use anyhow::{Context, Result, bail};
|
||||
use camino::Utf8PathBuf;
|
||||
use camino_tempfile::Utf8TempDir;
|
||||
use safekeeper_api::membership::Configuration;
|
||||
use safekeeper_api::models::SafekeeperUtilization;
|
||||
use safekeeper_api::models::{SafekeeperUtilization, TimelineDeleteResult};
|
||||
use safekeeper_api::{ServerInfo, membership};
|
||||
use serde::Serialize;
|
||||
use tokio::fs;
|
||||
use tracing::*;
|
||||
use utils::crashsafe::{durable_rename, fsync_async_opt};
|
||||
@@ -579,11 +578,6 @@ impl GlobalTimelines {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Serialize)]
|
||||
pub struct TimelineDeleteResult {
|
||||
pub dir_existed: bool,
|
||||
}
|
||||
|
||||
/// Action for delete_or_exclude.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum DeleteOrExclude {
|
||||
|
||||
@@ -152,6 +152,7 @@ pub fn run_server(os: NodeOs, disk: Arc<SafekeeperDisk>) -> Result<()> {
|
||||
my_id: NodeId(os.id() as u64),
|
||||
listen_pg_addr: String::new(),
|
||||
listen_http_addr: String::new(),
|
||||
listen_https_addr: None,
|
||||
no_sync: false,
|
||||
broker_endpoint: "/".parse::<Uri>().unwrap(),
|
||||
broker_keepalive_interval: Duration::from_secs(0),
|
||||
@@ -179,6 +180,9 @@ pub fn run_server(os: NodeOs, disk: Arc<SafekeeperDisk>) -> Result<()> {
|
||||
eviction_min_resident: Duration::ZERO,
|
||||
wal_reader_fanout: false,
|
||||
max_delta_for_fanout: None,
|
||||
ssl_key_file: Utf8PathBuf::from(""),
|
||||
ssl_cert_file: Utf8PathBuf::from(""),
|
||||
ssl_ca_cert: None,
|
||||
};
|
||||
|
||||
let mut global = GlobalMap::new(disk, conf.clone())?;
|
||||
|
||||
@@ -24,7 +24,7 @@ use pageserver_api::controller_api::{
|
||||
ShardsPreferredAzsRequest, TenantCreateRequest, TenantPolicyRequest, TenantShardMigrateRequest,
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
|
||||
DetachBehavior, TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
|
||||
TenantShardSplitRequest, TenantTimeTravelRequest, TimelineArchivalConfigRequest,
|
||||
TimelineCreateRequest,
|
||||
};
|
||||
@@ -525,6 +525,7 @@ async fn handle_tenant_timeline_detach_ancestor(
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
|
||||
let behavior: Option<DetachBehavior> = parse_query_param(&req, "detach_behavior")?;
|
||||
|
||||
check_permissions(&req, Scope::PageServerApi)?;
|
||||
maybe_rate_limit(&req, tenant_id).await;
|
||||
@@ -537,7 +538,7 @@ async fn handle_tenant_timeline_detach_ancestor(
|
||||
};
|
||||
|
||||
let res = service
|
||||
.tenant_timeline_detach_ancestor(tenant_id, timeline_id)
|
||||
.tenant_timeline_detach_ancestor(tenant_id, timeline_id, behavior)
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::OK, res)
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use pageserver_api::models::detach_ancestor::AncestorDetached;
|
||||
use pageserver_api::models::{
|
||||
LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress,
|
||||
TenantScanRemoteStorageResponse, TenantShardSplitRequest, TenantShardSplitResponse,
|
||||
TenantWaitLsnRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo,
|
||||
TopTenantShardsRequest, TopTenantShardsResponse,
|
||||
DetachBehavior, LocationConfig, LocationConfigListResponse, PageserverUtilization,
|
||||
SecondaryProgress, TenantScanRemoteStorageResponse, TenantShardSplitRequest,
|
||||
TenantShardSplitResponse, TenantWaitLsnRequest, TimelineArchivalConfigRequest,
|
||||
TimelineCreateRequest, TimelineInfo, TopTenantShardsRequest, TopTenantShardsResponse,
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::BlockUnblock;
|
||||
@@ -252,13 +252,14 @@ impl PageserverClient {
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
behavior: Option<DetachBehavior>,
|
||||
) -> Result<AncestorDetached> {
|
||||
measured_request!(
|
||||
"timeline_detach_ancestor",
|
||||
crate::metrics::Method::Put,
|
||||
&self.node_id_label,
|
||||
self.inner
|
||||
.timeline_detach_ancestor(tenant_shard_id, timeline_id)
|
||||
.timeline_detach_ancestor(tenant_shard_id, timeline_id, behavior)
|
||||
.await
|
||||
)
|
||||
}
|
||||
|
||||
@@ -967,10 +967,26 @@ impl Persistence {
|
||||
&self,
|
||||
split_tenant_id: TenantId,
|
||||
old_shard_count: ShardCount,
|
||||
new_shard_count: ShardCount,
|
||||
) -> DatabaseResult<()> {
|
||||
use crate::schema::tenant_shards::dsl::*;
|
||||
self.with_measured_conn(DatabaseOperation::CompleteShardSplit, move |conn| {
|
||||
Box::pin(async move {
|
||||
// Sanity: child shards must still exist, as we're deleting parent shards
|
||||
let child_shards_query = tenant_shards
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
.filter(shard_count.eq(new_shard_count.literal() as i32));
|
||||
let child_shards = child_shards_query
|
||||
.load::<TenantShardPersistence>(conn)
|
||||
.await?;
|
||||
if child_shards.len() != new_shard_count.count() as usize {
|
||||
return Err(DatabaseError::Logical(format!(
|
||||
"Unexpected child shard count {} while completing split to \
|
||||
count {new_shard_count:?} on tenant {split_tenant_id}",
|
||||
child_shards.len()
|
||||
)));
|
||||
}
|
||||
|
||||
// Drop parent shards
|
||||
diesel::delete(tenant_shards)
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use safekeeper_api::models::{
|
||||
PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
|
||||
self, PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
|
||||
TimelineStatus,
|
||||
};
|
||||
use safekeeper_client::mgmt_api::{Client, Result};
|
||||
@@ -69,11 +69,28 @@ impl SafekeeperClient {
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub(crate) async fn exclude_timeline(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
req: &models::TimelineMembershipSwitchRequest,
|
||||
) -> Result<models::TimelineDeleteResult> {
|
||||
measured_request!(
|
||||
"exclude_timeline",
|
||||
crate::metrics::Method::Post,
|
||||
&self.node_id_label,
|
||||
self.inner
|
||||
.exclude_timeline(tenant_id, timeline_id, req)
|
||||
.await
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn delete_timeline(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<TimelineStatus> {
|
||||
) -> Result<models::TimelineDeleteResult> {
|
||||
measured_request!(
|
||||
"delete_timeline",
|
||||
crate::metrics::Method::Delete,
|
||||
@@ -94,6 +111,23 @@ impl SafekeeperClient {
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub(crate) async fn bump_timeline_term(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
req: &models::TimelineTermBumpRequest,
|
||||
) -> Result<models::TimelineTermBumpResponse> {
|
||||
measured_request!(
|
||||
"term_bump",
|
||||
crate::metrics::Method::Post,
|
||||
&self.node_id_label,
|
||||
self.inner
|
||||
.bump_timeline_term(tenant_id, timeline_id, req)
|
||||
.await
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn get_utilization(&self) -> Result<SafekeeperUtilization> {
|
||||
measured_request!(
|
||||
"utilization",
|
||||
|
||||
@@ -34,9 +34,9 @@ use pageserver_api::controller_api::{
|
||||
TenantShardMigrateResponse,
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
self, LocationConfig, LocationConfigListResponse, LocationConfigMode, PageserverUtilization,
|
||||
SafekeeperInfo, SafekeepersInfo, SecondaryProgress, ShardParameters, TenantConfig,
|
||||
TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
|
||||
self, DetachBehavior, LocationConfig, LocationConfigListResponse, LocationConfigMode,
|
||||
PageserverUtilization, SafekeeperInfo, SafekeepersInfo, SecondaryProgress, ShardParameters,
|
||||
TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
|
||||
TenantLocationConfigResponse, TenantShardLocation, TenantShardSplitRequest,
|
||||
TenantShardSplitResponse, TenantSorting, TenantTimeTravelRequest,
|
||||
TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateResponseStorcon,
|
||||
@@ -4041,6 +4041,7 @@ impl Service {
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
behavior: Option<DetachBehavior>,
|
||||
) -> Result<models::detach_ancestor::AncestorDetached, ApiError> {
|
||||
tracing::info!("Detaching timeline {tenant_id}/{timeline_id}",);
|
||||
|
||||
@@ -4064,6 +4065,7 @@ impl Service {
|
||||
node: Node,
|
||||
jwt: Option<String>,
|
||||
ssl_ca_cert: Option<Certificate>,
|
||||
behavior: Option<DetachBehavior>,
|
||||
) -> Result<(ShardNumber, models::detach_ancestor::AncestorDetached), ApiError> {
|
||||
tracing::info!(
|
||||
"Detaching timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}",
|
||||
@@ -4073,7 +4075,7 @@ impl Service {
|
||||
.map_err(|e| passthrough_api_error(&node, e))?;
|
||||
|
||||
client
|
||||
.timeline_detach_ancestor(tenant_shard_id, timeline_id)
|
||||
.timeline_detach_ancestor(tenant_shard_id, timeline_id, behavior)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
use mgmt_api::Error;
|
||||
@@ -4111,6 +4113,7 @@ impl Service {
|
||||
node,
|
||||
self.config.pageserver_jwt_token.clone(),
|
||||
self.config.ssl_ca_cert.clone(),
|
||||
behavior,
|
||||
))
|
||||
})
|
||||
.await?;
|
||||
@@ -4265,7 +4268,8 @@ impl Service {
|
||||
|
||||
/// Helper for concurrently calling a pageserver API on a number of shards, such as timeline creation.
|
||||
///
|
||||
/// On success, the returned vector contains exactly the same number of elements as the input `locations`.
|
||||
/// On success, the returned vector contains exactly the same number of elements as the input `locations`
|
||||
/// and returned element at index `i` is the result for `req_fn(op(locations[i])`.
|
||||
async fn tenant_for_shards<F, R>(
|
||||
&self,
|
||||
locations: Vec<(TenantShardId, Node)>,
|
||||
@@ -4281,18 +4285,23 @@ impl Service {
|
||||
let mut futs = FuturesUnordered::new();
|
||||
let mut results = Vec::with_capacity(locations.len());
|
||||
|
||||
for (tenant_shard_id, node) in locations {
|
||||
futs.push(req_fn(tenant_shard_id, node));
|
||||
for (idx, (tenant_shard_id, node)) in locations.into_iter().enumerate() {
|
||||
let fut = req_fn(tenant_shard_id, node);
|
||||
futs.push(async move { (idx, fut.await) });
|
||||
}
|
||||
|
||||
while let Some(r) = futs.next().await {
|
||||
results.push(r?);
|
||||
while let Some((idx, r)) = futs.next().await {
|
||||
results.push((idx, r?));
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
results.sort_by_key(|(idx, _)| *idx);
|
||||
Ok(results.into_iter().map(|(_, r)| r).collect())
|
||||
}
|
||||
|
||||
/// Concurrently invoke a pageserver API call on many shards at once
|
||||
/// Concurrently invoke a pageserver API call on many shards at once.
|
||||
///
|
||||
/// The returned Vec has the same length as the `locations` Vec,
|
||||
/// and returned element at index `i` is the result for `op(locations[i])`.
|
||||
pub(crate) async fn tenant_for_shards_api<T, O, F>(
|
||||
&self,
|
||||
locations: Vec<(TenantShardId, Node)>,
|
||||
@@ -4309,27 +4318,29 @@ impl Service {
|
||||
let mut futs = FuturesUnordered::new();
|
||||
let mut results = Vec::with_capacity(locations.len());
|
||||
|
||||
for (tenant_shard_id, node) in locations {
|
||||
for (idx, (tenant_shard_id, node)) in locations.into_iter().enumerate() {
|
||||
futs.push(async move {
|
||||
node.with_client_retries(
|
||||
|client| op(tenant_shard_id, client),
|
||||
&self.config.pageserver_jwt_token,
|
||||
&self.config.ssl_ca_cert,
|
||||
warn_threshold,
|
||||
max_retries,
|
||||
timeout,
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
let r = node
|
||||
.with_client_retries(
|
||||
|client| op(tenant_shard_id, client),
|
||||
&self.config.pageserver_jwt_token,
|
||||
&self.config.ssl_ca_cert,
|
||||
warn_threshold,
|
||||
max_retries,
|
||||
timeout,
|
||||
cancel,
|
||||
)
|
||||
.await;
|
||||
(idx, r)
|
||||
});
|
||||
}
|
||||
|
||||
while let Some(r) = futs.next().await {
|
||||
let r = r.unwrap_or(Err(mgmt_api::Error::Cancelled));
|
||||
results.push(r);
|
||||
while let Some((idx, r)) = futs.next().await {
|
||||
results.push((idx, r.unwrap_or(Err(mgmt_api::Error::Cancelled))));
|
||||
}
|
||||
|
||||
results
|
||||
results.sort_by_key(|(idx, _)| *idx);
|
||||
results.into_iter().map(|(_, r)| r).collect()
|
||||
}
|
||||
|
||||
/// Helper for safely working with the shards in a tenant remotely on pageservers, for example
|
||||
@@ -5742,7 +5753,7 @@ impl Service {
|
||||
// it doesn't match, but that requires more retry logic on this side)
|
||||
|
||||
self.persistence
|
||||
.complete_shard_split(tenant_id, old_shard_count)
|
||||
.complete_shard_split(tenant_id, old_shard_count, new_shard_count)
|
||||
.await?;
|
||||
|
||||
fail::fail_point!("shard-split-post-complete", |_| Err(
|
||||
|
||||
@@ -19,7 +19,7 @@ if TYPE_CHECKING:
|
||||
class ComputeReconfigure:
|
||||
def __init__(self, server: HTTPServer):
|
||||
self.server = server
|
||||
self.control_plane_compute_hook_api = f"http://{server.host}:{server.port}/notify-attach"
|
||||
self.control_plane_hooks_api = f"http://{server.host}:{server.port}/"
|
||||
self.workloads: dict[TenantId, Any] = {}
|
||||
self.on_notify: Callable[[Any], None] | None = None
|
||||
|
||||
|
||||
@@ -175,6 +175,9 @@ PAGESERVER_PER_TENANT_METRICS: tuple[str, ...] = (
|
||||
counter("pageserver_tenant_throttling_count"),
|
||||
counter("pageserver_timeline_wal_records_received"),
|
||||
counter("pageserver_page_service_pagestream_flush_in_progress_micros"),
|
||||
counter("pageserver_wait_lsn_in_progress_micros"),
|
||||
counter("pageserver_wait_lsn_started_count"),
|
||||
counter("pageserver_wait_lsn_finished_count"),
|
||||
*histogram("pageserver_page_service_batch_size"),
|
||||
*histogram("pageserver_page_service_pagestream_batch_wait_time_seconds"),
|
||||
*PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS,
|
||||
|
||||
@@ -460,12 +460,15 @@ class NeonEnvBuilder:
|
||||
self.overlay_mounts_created_by_us: list[tuple[str, Path]] = []
|
||||
self.config_init_force: str | None = None
|
||||
self.top_output_dir = top_output_dir
|
||||
self.control_plane_compute_hook_api: str | None = None
|
||||
self.control_plane_hooks_api: str | None = None
|
||||
self.storage_controller_config: dict[Any, Any] | None = None
|
||||
|
||||
# Flag to enable https listener in pageserver, generate local ssl certs,
|
||||
# and force storage controller to use https for pageserver api.
|
||||
self.use_https_pageserver_api: bool = False
|
||||
# Flag to enable https listener in safekeeper, generate local ssl certs,
|
||||
# and force storage controller to use https for safekeeper api.
|
||||
self.use_https_safekeeper_api: bool = False
|
||||
|
||||
self.pageserver_virtual_file_io_engine: str | None = pageserver_virtual_file_io_engine
|
||||
self.pageserver_get_vectored_concurrent_io: str | None = (
|
||||
@@ -1063,7 +1066,9 @@ class NeonEnv:
|
||||
self.initial_tenant = config.initial_tenant
|
||||
self.initial_timeline = config.initial_timeline
|
||||
|
||||
self.generate_local_ssl_certs = config.use_https_pageserver_api
|
||||
self.generate_local_ssl_certs = (
|
||||
config.use_https_pageserver_api or config.use_https_safekeeper_api
|
||||
)
|
||||
self.ssl_ca_file = (
|
||||
self.repo_dir.joinpath("rootCA.crt") if self.generate_local_ssl_certs else None
|
||||
)
|
||||
@@ -1116,7 +1121,7 @@ class NeonEnv:
|
||||
self.control_plane_api: str = self.storage_controller.upcall_api_endpoint()
|
||||
|
||||
# For testing this with a fake HTTP server, enable passing through a URL from config
|
||||
self.control_plane_compute_hook_api = config.control_plane_compute_hook_api
|
||||
self.control_plane_hooks_api = config.control_plane_hooks_api
|
||||
|
||||
self.pageserver_virtual_file_io_engine = config.pageserver_virtual_file_io_engine
|
||||
self.pageserver_virtual_file_io_mode = config.pageserver_virtual_file_io_mode
|
||||
@@ -1137,8 +1142,8 @@ class NeonEnv:
|
||||
if self.control_plane_api is not None:
|
||||
cfg["control_plane_api"] = self.control_plane_api
|
||||
|
||||
if self.control_plane_compute_hook_api is not None:
|
||||
cfg["control_plane_compute_hook_api"] = self.control_plane_compute_hook_api
|
||||
if self.control_plane_hooks_api is not None:
|
||||
cfg["control_plane_hooks_api"] = self.control_plane_hooks_api
|
||||
|
||||
storage_controller_config = self.storage_controller_config
|
||||
|
||||
@@ -1146,6 +1151,10 @@ class NeonEnv:
|
||||
storage_controller_config = storage_controller_config or {}
|
||||
storage_controller_config["use_https_pageserver_api"] = True
|
||||
|
||||
if config.use_https_safekeeper_api:
|
||||
storage_controller_config = storage_controller_config or {}
|
||||
storage_controller_config["use_https_safekeeper_api"] = True
|
||||
|
||||
if storage_controller_config is not None:
|
||||
cfg["storage_controller"] = storage_controller_config
|
||||
|
||||
@@ -1248,6 +1257,7 @@ class NeonEnv:
|
||||
pg=self.port_distributor.get_port(),
|
||||
pg_tenant_only=self.port_distributor.get_port(),
|
||||
http=self.port_distributor.get_port(),
|
||||
https=self.port_distributor.get_port() if config.use_https_safekeeper_api else None,
|
||||
)
|
||||
id = config.safekeepers_id_start + i # assign ids sequentially
|
||||
sk_cfg: dict[str, Any] = {
|
||||
@@ -1255,6 +1265,7 @@ class NeonEnv:
|
||||
"pg_port": port.pg,
|
||||
"pg_tenant_only_port": port.pg_tenant_only,
|
||||
"http_port": port.http,
|
||||
"https_port": port.https,
|
||||
"sync": config.safekeepers_enable_fsync,
|
||||
}
|
||||
if config.auth_enabled:
|
||||
@@ -4475,6 +4486,7 @@ class SafekeeperPort:
|
||||
pg: int
|
||||
pg_tenant_only: int
|
||||
http: int
|
||||
https: int | None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -1070,14 +1070,14 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
|
||||
tenant_id: TenantId | TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
batch_size: int | None = None,
|
||||
behavior_v2: bool = False,
|
||||
detach_behavior: str | None = None,
|
||||
**kwargs,
|
||||
) -> set[TimelineId]:
|
||||
params: dict[str, Any] = {}
|
||||
if batch_size is not None:
|
||||
params["batch_size"] = batch_size
|
||||
if behavior_v2:
|
||||
params["detach_behavior"] = "v2"
|
||||
if detach_behavior:
|
||||
params["detach_behavior"] = detach_behavior
|
||||
res = self.put(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/detach_ancestor",
|
||||
params=params,
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
\set min_id 1
|
||||
\set max_id 1500000000
|
||||
\set range_size 100
|
||||
|
||||
-- Use uniform random instead of random_zipfian
|
||||
\set random_id random(:min_id, :max_id)
|
||||
\set random_mar_id random(1, 65536)
|
||||
\set random_delete_id random(:min_id, :max_id)
|
||||
|
||||
-- Update exactly one row (if it exists) using the uniformly chosen random_id
|
||||
UPDATE transaction.transaction
|
||||
SET state = 'COMPLETED',
|
||||
settlement_date = CURRENT_DATE,
|
||||
mar_identifier = (:random_mar_id)::int
|
||||
WHERE id = (:random_id)::bigint;
|
||||
|
||||
-- Insert exactly one row
|
||||
INSERT INTO transaction.transaction (
|
||||
user_id,
|
||||
card_id,
|
||||
business_id,
|
||||
preceding_transaction_id,
|
||||
is_last,
|
||||
is_mocked,
|
||||
type,
|
||||
state,
|
||||
network,
|
||||
subnetwork,
|
||||
user_transaction_time,
|
||||
settlement_date,
|
||||
request_amount,
|
||||
amount,
|
||||
currency_code,
|
||||
approval_code,
|
||||
response,
|
||||
gpa,
|
||||
gpa_order_unload,
|
||||
gpa_order,
|
||||
program_transfer,
|
||||
fee_transfer,
|
||||
peer_transfer,
|
||||
msa_orders,
|
||||
risk_assessment,
|
||||
auto_reload,
|
||||
direct_deposit,
|
||||
polarity,
|
||||
real_time_fee_group,
|
||||
fee,
|
||||
chargeback,
|
||||
standin_approved_by,
|
||||
acquirer_fee_amount,
|
||||
funded_account_holder,
|
||||
digital_wallet_token,
|
||||
network_fees,
|
||||
card_security_code_verification,
|
||||
fraud,
|
||||
cardholder_authentication_data,
|
||||
currency_conversion,
|
||||
merchant,
|
||||
store,
|
||||
card_acceptor,
|
||||
acquirer,
|
||||
pos,
|
||||
avs,
|
||||
mar_token,
|
||||
mar_preceding_related_transaction_token,
|
||||
mar_business_token,
|
||||
mar_acting_user_token,
|
||||
mar_card_token,
|
||||
mar_duration,
|
||||
mar_created_time,
|
||||
issuer_interchange_amount,
|
||||
offer_orders,
|
||||
transaction_canonical_id,
|
||||
mar_identifier,
|
||||
created_at,
|
||||
card_acceptor_mid,
|
||||
card_acceptor_name,
|
||||
address_verification,
|
||||
issuing_product,
|
||||
mar_enhanced_data_token,
|
||||
standin_reason
|
||||
)
|
||||
SELECT
|
||||
(:random_id % 100000) + 1 AS user_id,
|
||||
(:random_id % 500000) + 1 AS card_id,
|
||||
(:random_id % 20000) + 1 AS business_id,
|
||||
NULL AS preceding_transaction_id,
|
||||
(:random_id % 2) = 0 AS is_last,
|
||||
(:random_id % 5) = 0 AS is_mocked,
|
||||
'authorization' AS type,
|
||||
'PENDING' AS state,
|
||||
'VISA' AS network,
|
||||
'VISANET' AS subnetwork,
|
||||
now() - ((:random_id % 100) || ' days')::interval AS user_transaction_time,
|
||||
now() - ((:random_id % 100) || ' days')::interval AS settlement_date,
|
||||
random() * 1000 AS request_amount,
|
||||
random() * 1000 AS amount,
|
||||
'USD' AS currency_code,
|
||||
md5((:random_id)::text) AS approval_code,
|
||||
'{}'::jsonb AS response,
|
||||
'{}'::jsonb AS gpa,
|
||||
'{}'::jsonb AS gpa_order_unload,
|
||||
'{}'::jsonb AS gpa_order,
|
||||
'{}'::jsonb AS program_transfer,
|
||||
'{}'::jsonb AS fee_transfer,
|
||||
'{}'::jsonb AS peer_transfer,
|
||||
'{}'::jsonb AS msa_orders,
|
||||
'{}'::jsonb AS risk_assessment,
|
||||
'{}'::jsonb AS auto_reload,
|
||||
'{}'::jsonb AS direct_deposit,
|
||||
'{}'::jsonb AS polarity,
|
||||
'{}'::jsonb AS real_time_fee_group,
|
||||
'{}'::jsonb AS fee,
|
||||
'{}'::jsonb AS chargeback,
|
||||
NULL AS standin_approved_by,
|
||||
random() * 100 AS acquirer_fee_amount,
|
||||
'{}'::jsonb AS funded_account_holder,
|
||||
'{}'::jsonb AS digital_wallet_token,
|
||||
'{}'::jsonb AS network_fees,
|
||||
'{}'::jsonb AS card_security_code_verification,
|
||||
'{}'::jsonb AS fraud,
|
||||
'{}'::jsonb AS cardholder_authentication_data,
|
||||
'{}'::jsonb AS currency_conversion,
|
||||
'{}'::jsonb AS merchant,
|
||||
'{}'::jsonb AS store,
|
||||
'{}'::jsonb AS card_acceptor,
|
||||
'{}'::jsonb AS acquirer,
|
||||
'{}'::jsonb AS pos,
|
||||
'{}'::jsonb AS avs,
|
||||
md5((:random_id)::text || 'token') AS mar_token,
|
||||
NULL AS mar_preceding_related_transaction_token,
|
||||
NULL AS mar_business_token,
|
||||
NULL AS mar_acting_user_token,
|
||||
NULL AS mar_card_token,
|
||||
random() * 1000 AS mar_duration,
|
||||
now() AS mar_created_time,
|
||||
random() * 100 AS issuer_interchange_amount,
|
||||
'{}'::jsonb AS offer_orders,
|
||||
(:random_id % 500) + 1 AS transaction_canonical_id,
|
||||
:random_id::integer AS mar_identifier,
|
||||
now() AS created_at,
|
||||
NULL AS card_acceptor_mid,
|
||||
NULL AS card_acceptor_name,
|
||||
'{}'::jsonb AS address_verification,
|
||||
'DEFAULT_PRODUCT' AS issuing_product,
|
||||
NULL AS mar_enhanced_data_token,
|
||||
NULL AS standin_reason
|
||||
FROM (SELECT 1) AS dummy;
|
||||
|
||||
-- Delete exactly one row using the uniformly chosen random_delete_id
|
||||
WITH to_delete AS (
|
||||
SELECT id
|
||||
FROM transaction.transaction
|
||||
WHERE id >= (:random_delete_id)::bigint
|
||||
AND id < ((:random_delete_id)::bigint + :range_size)
|
||||
ORDER BY id
|
||||
LIMIT 1
|
||||
)
|
||||
DELETE FROM transaction.transaction
|
||||
USING to_delete
|
||||
WHERE transaction.transaction.id = to_delete.id;
|
||||
@@ -0,0 +1,25 @@
|
||||
-- enforce a controlled number of getpages prefetch requests from a range of
|
||||
-- 40 million first pages (320 GB) of a 500 GiB table
|
||||
-- the table has 55 million pages
|
||||
|
||||
|
||||
-- Zipfian distributions model real-world access patterns where:
|
||||
-- A few values (popular IDs) are accessed frequently.
|
||||
-- Many values are accessed rarely.
|
||||
-- This is useful for simulating realistic workloads
|
||||
|
||||
\set alpha 1.2
|
||||
\set min_page 1
|
||||
\set max_page 40000000
|
||||
|
||||
\set zipf_random_page random_zipfian(:min_page, :max_page, :alpha)
|
||||
|
||||
-- Read 500 consecutive pages from a Zipfian-distributed random start page
|
||||
-- This enforces PostgreSQL prefetching
|
||||
WITH random_page AS (
|
||||
SELECT :zipf_random_page::int AS start_page
|
||||
)
|
||||
SELECT MAX(created_at)
|
||||
FROM webhook.incoming_webhooks
|
||||
WHERE ctid >= (SELECT format('(%s,1)', start_page)::tid FROM random_page)
|
||||
AND ctid < (SELECT format('(%s,1)', start_page + 500)::tid FROM random_page);
|
||||
@@ -2,11 +2,13 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import timeit
|
||||
from contextlib import closing
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fixtures.benchmark_fixture import PgBenchRunResult
|
||||
from fixtures.compare_fixtures import PgCompare
|
||||
from fixtures.log_helper import log
|
||||
|
||||
from performance.test_perf_pgbench import get_durations_matrix, utc_now_timestamp
|
||||
|
||||
@@ -82,9 +84,81 @@ def run_pgbench(env: PgCompare, prefix: str, cmdline, password: None):
|
||||
env.zenbenchmark.record_pg_bench_result(prefix, res)
|
||||
|
||||
|
||||
def run_database_maintenance(env: PgCompare):
|
||||
with closing(env.pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
log.info("start vacuum analyze transaction.transaction")
|
||||
with env.zenbenchmark.record_duration("vacuum_analyze"):
|
||||
cur.execute("SET statement_timeout = 0;")
|
||||
cur.execute("SET max_parallel_maintenance_workers = 7;")
|
||||
cur.execute("SET maintenance_work_mem = '10GB';")
|
||||
cur.execute("vacuum analyze transaction.transaction;")
|
||||
log.info("finished vacuum analyze transaction.transaction")
|
||||
|
||||
# recover previously failed or canceled re-indexing
|
||||
cur.execute(
|
||||
"""
|
||||
DO $$
|
||||
DECLARE
|
||||
invalid_index TEXT;
|
||||
BEGIN
|
||||
FOR invalid_index IN
|
||||
SELECT c.relname
|
||||
FROM pg_class c
|
||||
JOIN pg_index i ON i.indexrelid = c.oid
|
||||
JOIN pg_namespace n ON n.oid = c.relnamespace
|
||||
WHERE n.nspname = 'transaction'
|
||||
AND i.indisvalid = FALSE
|
||||
AND c.relname LIKE '%_ccnew%'
|
||||
LOOP
|
||||
EXECUTE 'DROP INDEX IF EXISTS transaction.' || invalid_index;
|
||||
END LOOP;
|
||||
END $$;
|
||||
"""
|
||||
)
|
||||
# also recover failed or canceled re-indexing on toast part of table
|
||||
cur.execute(
|
||||
"""
|
||||
DO $$
|
||||
DECLARE
|
||||
invalid_index TEXT;
|
||||
BEGIN
|
||||
FOR invalid_index IN
|
||||
SELECT c.relname
|
||||
FROM pg_class c
|
||||
JOIN pg_index i ON i.indexrelid = c.oid
|
||||
JOIN pg_namespace n ON n.oid = c.relnamespace
|
||||
WHERE n.nspname = 'pg_toast'
|
||||
AND i.indisvalid = FALSE
|
||||
AND c.relname LIKE '%_ccnew%'
|
||||
AND i.indrelid = (
|
||||
SELECT reltoastrelid FROM pg_class
|
||||
WHERE relname = 'transaction'
|
||||
AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'transaction')
|
||||
)
|
||||
LOOP
|
||||
EXECUTE 'DROP INDEX IF EXISTS pg_toast.' || invalid_index;
|
||||
END LOOP;
|
||||
END $$;
|
||||
"""
|
||||
)
|
||||
|
||||
log.info("start REINDEX TABLE CONCURRENTLY transaction.transaction")
|
||||
with env.zenbenchmark.record_duration("reindex concurrently"):
|
||||
cur.execute("REINDEX TABLE CONCURRENTLY transaction.transaction;")
|
||||
log.info("finished REINDEX TABLE CONCURRENTLY transaction.transaction")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("custom_scripts", get_custom_scripts())
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_perf_oltp_large_tenant(remote_compare: PgCompare, custom_scripts: str, duration: int):
|
||||
def test_perf_oltp_large_tenant_pgbench(
|
||||
remote_compare: PgCompare, custom_scripts: str, duration: int
|
||||
):
|
||||
run_test_pgbench(remote_compare, custom_scripts, duration)
|
||||
# todo: run re-index, analyze, vacuum, etc. after the test and measure and report its duration
|
||||
|
||||
|
||||
@pytest.mark.remote_cluster
|
||||
def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare):
|
||||
# run analyze, vacuum, re-index after the test and measure and report its duration
|
||||
run_database_maintenance(remote_compare)
|
||||
|
||||
@@ -83,9 +83,7 @@ def test_storage_controller_many_tenants(
|
||||
"max_offline": "30s",
|
||||
"max_warming_up": "300s",
|
||||
}
|
||||
neon_env_builder.control_plane_compute_hook_api = (
|
||||
compute_reconfigure_listener.control_plane_compute_hook_api
|
||||
)
|
||||
neon_env_builder.control_plane_hooks_api = compute_reconfigure_listener.control_plane_hooks_api
|
||||
|
||||
AZS = ["alpha", "bravo", "charlie"]
|
||||
|
||||
|
||||
@@ -144,7 +144,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
|
||||
"compaction_l0_semaphore": False,
|
||||
"l0_flush_delay_threshold": 25,
|
||||
"l0_flush_stall_threshold": 42,
|
||||
"l0_flush_wait_upload": False,
|
||||
"l0_flush_wait_upload": True,
|
||||
"compaction_target_size": 1048576,
|
||||
"checkpoint_distance": 10000,
|
||||
"checkpoint_timeout": "13m",
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user