mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-22 07:30:37 +00:00
Compare commits
34 Commits
arpad/sk_t
...
tristan957
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9eaaca4834 | ||
|
|
707a926057 | ||
|
|
5bcefb4ee1 | ||
|
|
34322b2424 | ||
|
|
fdfbc7b358 | ||
|
|
190c19c034 | ||
|
|
34e560fe37 | ||
|
|
7922458b98 | ||
|
|
34d9e2d8e3 | ||
|
|
2f82c21c63 | ||
|
|
222cc181e9 | ||
|
|
4d2328ebe3 | ||
|
|
9f81828429 | ||
|
|
9ab13d6e2c | ||
|
|
983e18e63e | ||
|
|
b735df6ff0 | ||
|
|
68cf0ba439 | ||
|
|
d04d924649 | ||
|
|
f5fdaa6dc6 | ||
|
|
c54cd9e76a | ||
|
|
1010b8add4 | ||
|
|
ae4b2af299 | ||
|
|
15fecb8474 | ||
|
|
47677ba578 | ||
|
|
83b6bfa229 | ||
|
|
ed942b05f7 | ||
|
|
62a717a2ca | ||
|
|
c8fbbb9b65 | ||
|
|
d73f4a6470 | ||
|
|
5477d7db93 | ||
|
|
eb9832d846 | ||
|
|
3d36dfe533 | ||
|
|
ebf44210ba | ||
|
|
aabf455dfb |
1
.github/actionlint.yml
vendored
1
.github/actionlint.yml
vendored
@@ -4,6 +4,7 @@ self-hosted-runner:
|
||||
- large
|
||||
- large-arm64
|
||||
- small
|
||||
- small-metal
|
||||
- small-arm64
|
||||
- us-east-2
|
||||
config-variables:
|
||||
|
||||
48
.github/actions/neon-project-create/action.yml
vendored
48
.github/actions/neon-project-create/action.yml
vendored
@@ -17,6 +17,31 @@ inputs:
|
||||
compute_units:
|
||||
description: '[Min, Max] compute units'
|
||||
default: '[1, 1]'
|
||||
# settings below only needed if you want the project to be sharded from the beginning
|
||||
shard_split_project:
|
||||
description: 'by default new projects are not shard-split, specify true to shard-split'
|
||||
required: false
|
||||
default: 'false'
|
||||
admin_api_key:
|
||||
description: 'Admin API Key needed for shard-splitting. Must be specified if shard_split_project is true'
|
||||
required: false
|
||||
shard_count:
|
||||
description: 'Number of shards to split the project into, only applies if shard_split_project is true'
|
||||
required: false
|
||||
default: '8'
|
||||
stripe_size:
|
||||
description: 'Stripe size, optional, in 8kiB pages. e.g. set 2048 for 16MB stripes. Default is 128 MiB, only applies if shard_split_project is true'
|
||||
required: false
|
||||
default: '32768'
|
||||
psql_path:
|
||||
description: 'Path to psql binary - it is caller responsibility to provision the psql binary'
|
||||
required: false
|
||||
default: '/tmp/neon/pg_install/v16/bin/psql'
|
||||
libpq_lib_path:
|
||||
description: 'Path to directory containing libpq library - it is caller responsibility to provision the libpq library'
|
||||
required: false
|
||||
default: '/tmp/neon/pg_install/v16/lib'
|
||||
|
||||
|
||||
outputs:
|
||||
dsn:
|
||||
@@ -63,6 +88,23 @@ runs:
|
||||
echo "project_id=${project_id}" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Project ${project_id} has been created"
|
||||
|
||||
if [ "${SHARD_SPLIT_PROJECT}" = "true" ]; then
|
||||
# determine tenant ID
|
||||
TENANT_ID=`${PSQL} ${dsn} -t -A -c "SHOW neon.tenant_id"`
|
||||
|
||||
echo "Splitting project ${project_id} with tenant_id ${TENANT_ID} into $((SHARD_COUNT)) shards with stripe size $((STRIPE_SIZE))"
|
||||
|
||||
echo "Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split"
|
||||
echo "with body {\"new_shard_count\": $((SHARD_COUNT)), \"new_stripe_size\": $((STRIPE_SIZE))}"
|
||||
|
||||
# we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set)
|
||||
curl -X PUT \
|
||||
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/shard_split" \
|
||||
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
|
||||
-d "{\"new_shard_count\": $SHARD_COUNT, \"new_stripe_size\": $STRIPE_SIZE}"
|
||||
fi
|
||||
|
||||
env:
|
||||
API_HOST: ${{ inputs.api_host }}
|
||||
API_KEY: ${{ inputs.api_key }}
|
||||
@@ -70,3 +112,9 @@ runs:
|
||||
POSTGRES_VERSION: ${{ inputs.postgres_version }}
|
||||
MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
|
||||
MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}
|
||||
SHARD_SPLIT_PROJECT: ${{ inputs.shard_split_project }}
|
||||
ADMIN_API_KEY: ${{ inputs.admin_api_key }}
|
||||
SHARD_COUNT: ${{ inputs.shard_count }}
|
||||
STRIPE_SIZE: ${{ inputs.stripe_size }}
|
||||
PSQL: ${{ inputs.psql_path }}
|
||||
LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}
|
||||
|
||||
@@ -158,8 +158,6 @@ jobs:
|
||||
|
||||
- name: Run cargo build
|
||||
run: |
|
||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
||||
export PQ_LIB_DIR
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
|
||||
|
||||
# Do install *before* running rust tests because they might recompile the
|
||||
@@ -217,8 +215,6 @@ jobs:
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
||||
export PQ_LIB_DIR
|
||||
LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib
|
||||
export LD_LIBRARY_PATH
|
||||
|
||||
|
||||
2
.github/workflows/build-macos.yml
vendored
2
.github/workflows/build-macos.yml
vendored
@@ -235,7 +235,7 @@ jobs:
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Run cargo build (only for v17)
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release -j$(sysctl -n hw.ncpu)
|
||||
run: cargo build --all --release -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Check that no warnings are produced (only for v17)
|
||||
run: ./run_clippy.sh
|
||||
|
||||
70
.github/workflows/build_and_test.yml
vendored
70
.github/workflows/build_and_test.yml
vendored
@@ -242,7 +242,7 @@ jobs:
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: [ self-hosted, small ]
|
||||
runs-on: [ self-hosted, small-metal ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
@@ -636,6 +636,8 @@ jobs:
|
||||
file: compute/compute-node.Dockerfile
|
||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
|
||||
secrets: |
|
||||
PG_SEARCH_ENTERPRISE_GITHUB_PAT=${{ secrets.PG_SEARCH_ENTERPRISE_GITHUB_PAT }}
|
||||
tags: |
|
||||
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
|
||||
@@ -786,6 +788,17 @@ jobs:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Get the last compute release tag
|
||||
id: get-last-compute-release-tag
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
tag=$(gh api -q '[.[].tag_name | select(startswith("release-compute"))][0]'\
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"/repos/${{ github.repository }}/releases")
|
||||
echo tag=${tag} >> ${GITHUB_OUTPUT}
|
||||
|
||||
# `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
|
||||
# Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
|
||||
# Regular pageserver version string looks like
|
||||
@@ -817,6 +830,20 @@ jobs:
|
||||
TEST_VERSION_ONLY: ${{ matrix.pg_version }}
|
||||
run: ./docker-compose/docker_compose_test.sh
|
||||
|
||||
- name: Print logs and clean up docker-compose test
|
||||
if: always()
|
||||
run: |
|
||||
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true
|
||||
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down
|
||||
|
||||
- name: Test extension upgrade
|
||||
timeout-minutes: 20
|
||||
if: ${{ needs.tag.outputs.build-tag == github.run_id }}
|
||||
env:
|
||||
NEWTAG: ${{ needs.tag.outputs.build-tag }}
|
||||
OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
|
||||
run: ./docker-compose/test_extensions_upgrade.sh
|
||||
|
||||
- name: Print logs and clean up
|
||||
if: always()
|
||||
run: |
|
||||
@@ -1050,6 +1077,7 @@ jobs:
|
||||
retries: 5
|
||||
script: |
|
||||
const tag = "${{ needs.tag.outputs.build-tag }}";
|
||||
const branch = "${{ github.ref_name }}";
|
||||
|
||||
try {
|
||||
const existingRef = await github.rest.git.getRef({
|
||||
@@ -1092,12 +1120,48 @@ jobs:
|
||||
}
|
||||
|
||||
console.log(`Release for tag ${tag} does not exist. Creating it...`);
|
||||
|
||||
// Find the PR number using the commit SHA
|
||||
const pullRequests = await github.rest.pulls.list({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
state: 'closed',
|
||||
base: branch,
|
||||
});
|
||||
|
||||
const pr = pullRequests.data.find(pr => pr.merge_commit_sha === context.sha);
|
||||
const prNumber = pr ? pr.number : null;
|
||||
|
||||
// Find the previous release on the branch
|
||||
const releases = await github.rest.repos.listReleases({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
per_page: 100,
|
||||
});
|
||||
|
||||
const branchReleases = releases.data
|
||||
.filter((release) => {
|
||||
const regex = new RegExp(`^${branch}-\\d+$`);
|
||||
return regex.test(release.tag_name) && !release.draft && !release.prerelease;
|
||||
})
|
||||
.sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
|
||||
|
||||
const previousTag = branchReleases.length > 0 ? branchReleases[0].tag_name : null;
|
||||
|
||||
const releaseNotes = [
|
||||
prNumber
|
||||
? `Release PR https://github.com/${context.repo.owner}/${context.repo.repo}/pull/${prNumber}.`
|
||||
: 'Release PR not found.',
|
||||
previousTag
|
||||
? `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${previousTag}...${tag}.`
|
||||
: `No previous release found on branch ${branch}.`,
|
||||
].join('\n\n');
|
||||
|
||||
await github.rest.repos.createRelease({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
tag_name: tag,
|
||||
// TODO: Automate release notes properly
|
||||
generate_release_notes: false,
|
||||
body: releaseNotes,
|
||||
});
|
||||
console.log(`Release for tag ${tag} created successfully.`);
|
||||
}
|
||||
|
||||
33
.github/workflows/ingest_benchmark.yml
vendored
33
.github/workflows/ingest_benchmark.yml
vendored
@@ -28,7 +28,24 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
target_project: [new_empty_project, large_existing_project]
|
||||
include:
|
||||
- target_project: new_empty_project_stripe_size_2048
|
||||
stripe_size: 2048 # 16 MiB
|
||||
postgres_version: 16
|
||||
- target_project: new_empty_project_stripe_size_32768
|
||||
stripe_size: 32768 # 256 MiB # note that this is different from null because using null will shard_split the project only if it reaches the threshold
|
||||
# while here it is sharded from the beginning with a shard size of 256 MiB
|
||||
postgres_version: 16
|
||||
- target_project: new_empty_project
|
||||
stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
|
||||
postgres_version: 16
|
||||
- target_project: new_empty_project
|
||||
stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
|
||||
postgres_version: 17
|
||||
- target_project: large_existing_project
|
||||
stripe_size: null # cannot re-shared or choose different stripe size for existing, already sharded project
|
||||
postgres_version: 16
|
||||
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
@@ -67,17 +84,21 @@ jobs:
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
if: ${{ matrix.target_project == 'new_empty_project' }}
|
||||
if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
|
||||
id: create-neon-project-ingest-target
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
region_id: aws-us-east-2
|
||||
postgres_version: 16
|
||||
postgres_version: ${{ matrix.postgres_version }}
|
||||
compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
shard_split_project: ${{ matrix.stripe_size != null && 'true' || 'false' }}
|
||||
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
|
||||
shard_count: 8
|
||||
stripe_size: ${{ matrix.stripe_size }}
|
||||
|
||||
- name: Initialize Neon project
|
||||
if: ${{ matrix.target_project == 'new_empty_project' }}
|
||||
if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
|
||||
env:
|
||||
BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
|
||||
NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
|
||||
@@ -130,7 +151,7 @@ jobs:
|
||||
test_selection: performance/test_perf_ingest_using_pgcopydb.py
|
||||
run_in_parallel: false
|
||||
extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
|
||||
pg_version: v16
|
||||
pg_version: v${{ matrix.postgres_version }}
|
||||
save_perf_report: true
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
@@ -146,7 +167,7 @@ jobs:
|
||||
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+"
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: ${{ always() && matrix.target_project == 'new_empty_project' }}
|
||||
if: ${{ always() && startsWith(matrix.target_project, 'new_empty_project') }}
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
project_id: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
|
||||
|
||||
2
.github/workflows/neon_extra_builds.yml
vendored
2
.github/workflows/neon_extra_builds.yml
vendored
@@ -114,7 +114,7 @@ jobs:
|
||||
run: make walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Produce the build stats
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc)
|
||||
run: cargo build --all --release --timings -j$(nproc)
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
|
||||
185
Cargo.lock
generated
185
Cargo.lock
generated
@@ -179,7 +179,7 @@ dependencies = [
|
||||
"nom",
|
||||
"num-traits",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
]
|
||||
|
||||
@@ -718,14 +718,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.9"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
|
||||
checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"form_urlencoded",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
@@ -733,7 +733,7 @@ dependencies = [
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"itoa",
|
||||
"matchit 0.7.0",
|
||||
"matchit",
|
||||
"memchr",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
@@ -746,7 +746,7 @@ dependencies = [
|
||||
"sha1",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tokio",
|
||||
"tokio-tungstenite 0.24.0",
|
||||
"tokio-tungstenite 0.26.1",
|
||||
"tower 0.5.2",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
@@ -755,11 +755,10 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.4.5"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
|
||||
checksum = "df1362f362fd16024ae199c1970ce98f9661bf5ef94b9808fee734bc3698b733"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
@@ -1130,7 +1129,7 @@ dependencies = [
|
||||
"log",
|
||||
"nix 0.25.1",
|
||||
"regex",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1311,9 +1310,9 @@ dependencies = [
|
||||
"serde_with",
|
||||
"signal-hook",
|
||||
"tar",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tower 0.5.2",
|
||||
@@ -1420,9 +1419,9 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"storage_broker",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
"tokio-util",
|
||||
"toml",
|
||||
"toml_edit",
|
||||
@@ -2264,7 +2263,7 @@ dependencies = [
|
||||
"pin-project",
|
||||
"rand 0.8.5",
|
||||
"sha1",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
@@ -3390,12 +3389,6 @@ dependencies = [
|
||||
"regex-automata 0.1.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b87248edafb776e59e6ee64a79086f65890d3510f2c656c000bf2a7e8a0aea40"
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
version = "0.8.4"
|
||||
@@ -3786,7 +3779,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"serde_path_to_error",
|
||||
"sha2",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"url",
|
||||
]
|
||||
|
||||
@@ -3836,7 +3829,7 @@ dependencies = [
|
||||
"futures-sink",
|
||||
"js-sys",
|
||||
"pin-project-lite",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
@@ -3868,7 +3861,7 @@ dependencies = [
|
||||
"opentelemetry_sdk",
|
||||
"prost",
|
||||
"reqwest",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3904,7 +3897,7 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
"rand 0.8.5",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tracing",
|
||||
@@ -4018,7 +4011,7 @@ dependencies = [
|
||||
"remote_storage",
|
||||
"serde_json",
|
||||
"svg_fmt",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"utils",
|
||||
@@ -4067,8 +4060,8 @@ dependencies = [
|
||||
"pageserver_compaction",
|
||||
"pin-project-lite",
|
||||
"postgres",
|
||||
"postgres-protocol 0.6.4",
|
||||
"postgres-types 0.2.4",
|
||||
"postgres-protocol 0.6.6",
|
||||
"postgres-types 0.2.6",
|
||||
"postgres_backend",
|
||||
"postgres_connection",
|
||||
"postgres_ffi",
|
||||
@@ -4094,12 +4087,12 @@ dependencies = [
|
||||
"strum_macros",
|
||||
"sysinfo",
|
||||
"tenant_size_model",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"tokio-epoll-uring",
|
||||
"tokio-io-timeout",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
@@ -4140,7 +4133,7 @@ dependencies = [
|
||||
"storage_broker",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"utils",
|
||||
]
|
||||
|
||||
@@ -4155,9 +4148,9 @@ dependencies = [
|
||||
"postgres",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"utils",
|
||||
@@ -4455,23 +4448,23 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "postgres"
|
||||
version = "0.19.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#511f998c00148ab7c847bd7e6cfd3a906d0e7473"
|
||||
version = "0.19.6"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"futures-util",
|
||||
"log",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#511f998c00148ab7c847bd7e6cfd3a906d0e7473"
|
||||
version = "0.6.6"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
|
||||
dependencies = [
|
||||
"base64 0.20.0",
|
||||
"base64 0.21.1",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -4520,12 +4513,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#511f998c00148ab7c847bd7e6cfd3a906d0e7473"
|
||||
version = "0.2.6"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
"fallible-iterator",
|
||||
"postgres-protocol 0.6.4",
|
||||
"postgres-protocol 0.6.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4559,9 +4553,9 @@ dependencies = [
|
||||
"rustls 0.23.18",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
"tokio-postgres-rustls",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-util",
|
||||
@@ -4576,7 +4570,7 @@ dependencies = [
|
||||
"itertools 0.10.5",
|
||||
"once_cell",
|
||||
"postgres",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
"url",
|
||||
]
|
||||
|
||||
@@ -4597,7 +4591,7 @@ dependencies = [
|
||||
"pprof",
|
||||
"regex",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tracing",
|
||||
"utils",
|
||||
]
|
||||
@@ -4608,7 +4602,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"camino",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -4641,7 +4635,7 @@ dependencies = [
|
||||
"smallvec",
|
||||
"symbolic-demangle",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4670,10 +4664,10 @@ dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"itertools 0.10.5",
|
||||
"postgres-protocol 0.6.4",
|
||||
"postgres-protocol 0.6.6",
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
@@ -4744,7 +4738,7 @@ dependencies = [
|
||||
"memchr",
|
||||
"parking_lot 0.12.1",
|
||||
"procfs",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4914,11 +4908,11 @@ dependencies = [
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"subtle",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tikv-jemalloc-ctl",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
"tokio-postgres2",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-tungstenite 0.21.0",
|
||||
@@ -5311,7 +5305,7 @@ dependencies = [
|
||||
"http 1.1.0",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
@@ -5331,7 +5325,7 @@ dependencies = [
|
||||
"reqwest",
|
||||
"reqwest-middleware",
|
||||
"retry-policies",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"wasm-timer",
|
||||
@@ -5347,7 +5341,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"getrandom 0.2.11",
|
||||
"http 1.1.0",
|
||||
"matchit 0.8.4",
|
||||
"matchit",
|
||||
"opentelemetry",
|
||||
"reqwest",
|
||||
"reqwest-middleware",
|
||||
@@ -5706,7 +5700,7 @@ dependencies = [
|
||||
"pageserver_api",
|
||||
"parking_lot 0.12.1",
|
||||
"postgres",
|
||||
"postgres-protocol 0.6.4",
|
||||
"postgres-protocol 0.6.6",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
"pprof",
|
||||
@@ -5726,11 +5720,11 @@ dependencies = [
|
||||
"storage_broker",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"tokio-io-timeout",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
@@ -5765,7 +5759,7 @@ dependencies = [
|
||||
"reqwest",
|
||||
"safekeeper_api",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -5974,7 +5968,7 @@ dependencies = [
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
"url",
|
||||
"uuid",
|
||||
@@ -6046,7 +6040,7 @@ checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6"
|
||||
dependencies = [
|
||||
"percent-encoding",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6208,7 +6202,7 @@ checksum = "adc4e5204eb1910f40f9cfa375f6f05b68c3abac4b6fd879c8ff5e7ae8a0a085"
|
||||
dependencies = [
|
||||
"num-bigint",
|
||||
"num-traits",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
]
|
||||
|
||||
@@ -6353,7 +6347,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
@@ -6400,7 +6394,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"storage_controller_client",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
"tokio-postgres-rustls",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
@@ -6645,7 +6639,16 @@ version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
"thiserror-impl 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "2.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc"
|
||||
dependencies = [
|
||||
"thiserror-impl 2.0.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6659,6 +6662,17 @@ dependencies = [
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "2.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.90",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.7"
|
||||
@@ -6815,7 +6829,7 @@ dependencies = [
|
||||
"nix 0.26.4",
|
||||
"once_cell",
|
||||
"scopeguard",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
@@ -6845,8 +6859,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.7"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#511f998c00148ab7c847bd7e6cfd3a906d0e7473"
|
||||
version = "0.7.9"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#8b44892f7851e705810b2cb54504325699966070"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
@@ -6859,11 +6873,13 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
"phf",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol 0.6.4",
|
||||
"postgres-types 0.2.4",
|
||||
"postgres-protocol 0.6.6",
|
||||
"postgres-types 0.2.6",
|
||||
"rand 0.8.5",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"whoami",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6901,7 +6917,7 @@ dependencies = [
|
||||
"ring",
|
||||
"rustls 0.23.18",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
"tokio-rustls 0.26.0",
|
||||
"x509-certificate",
|
||||
]
|
||||
@@ -6922,6 +6938,7 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
"postgres-protocol2",
|
||||
"postgres-types2",
|
||||
"serde",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
@@ -6998,14 +7015,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio-tungstenite"
|
||||
version = "0.24.0"
|
||||
version = "0.26.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9"
|
||||
checksum = "be4bf6fecd69fcdede0ec680aaf474cdab988f9de6bc73d3758f0160e3b7025a"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"log",
|
||||
"tokio",
|
||||
"tungstenite 0.24.0",
|
||||
"tungstenite 0.26.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7315,16 +7332,16 @@ dependencies = [
|
||||
"log",
|
||||
"rand 0.8.5",
|
||||
"sha1",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"url",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tungstenite"
|
||||
version = "0.24.0"
|
||||
version = "0.26.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a"
|
||||
checksum = "413083a99c579593656008130e29255e54dcaae495be556cc26888f211648c24"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
@@ -7334,7 +7351,7 @@ dependencies = [
|
||||
"log",
|
||||
"rand 0.8.5",
|
||||
"sha1",
|
||||
"thiserror",
|
||||
"thiserror 2.0.11",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
@@ -7529,7 +7546,7 @@ dependencies = [
|
||||
"signal-hook",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
@@ -7578,7 +7595,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"sysinfo",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres 0.7.9",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
@@ -7629,7 +7646,7 @@ dependencies = [
|
||||
"remote_storage",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -8158,7 +8175,7 @@ dependencies = [
|
||||
"ring",
|
||||
"signature 2.2.0",
|
||||
"spki 0.7.3",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
@@ -8175,7 +8192,7 @@ dependencies = [
|
||||
"nom",
|
||||
"oid-registry",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
]
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ aws-smithy-types = "1.2"
|
||||
aws-credential-types = "1.2.0"
|
||||
aws-sigv4 = { version = "1.2", features = ["sign-http"] }
|
||||
aws-types = "1.3"
|
||||
axum = { version = "0.7.9", features = ["ws"] }
|
||||
axum = { version = "0.8.1", features = ["ws"] }
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
bindgen = "0.70"
|
||||
|
||||
@@ -45,7 +45,7 @@ COPY --chown=nonroot . .
|
||||
|
||||
ARG ADDITIONAL_RUSTFLAGS
|
||||
RUN set -e \
|
||||
&& PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
&& RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
--bin pg_sni_router \
|
||||
--bin pageserver \
|
||||
--bin pagectl \
|
||||
|
||||
@@ -21,8 +21,10 @@ The Neon storage engine consists of two major components:
|
||||
|
||||
See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more information.
|
||||
|
||||
## Running local installation
|
||||
## Running a local development environment
|
||||
|
||||
Neon can be run on a workstation for small experiments and to test code changes, by
|
||||
following these instructions.
|
||||
|
||||
#### Installing dependencies on Linux
|
||||
1. Install build dependencies and other applicable packages
|
||||
@@ -238,7 +240,7 @@ postgres=# select * from t;
|
||||
> cargo neon stop
|
||||
```
|
||||
|
||||
More advanced usages can be found at [Control Plane and Neon Local](./control_plane/README.md).
|
||||
More advanced usages can be found at [Local Development Control Plane (`neon_local`))](./control_plane/README.md).
|
||||
|
||||
#### Handling build failures
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ ARG TAG=pinned
|
||||
ARG BUILD_TAG
|
||||
ARG DEBIAN_VERSION=bookworm
|
||||
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
|
||||
ARG ALPINE_CURL_VERSION=8.11.1
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -882,6 +883,39 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
||||
|
||||
USER root
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "rust extensions pgrx12.7"
|
||||
#
|
||||
# Essentially, this layer is the same as above, but instead of pgrx 0.12.6, it
|
||||
# uses 0.12.7. 0.12.7, specifically, is necessary for building pg_search from
|
||||
# ParadeDB, according to the ParadeDB team. Eventually, we can remove this layer
|
||||
# when ParadeDB gets various pgrx changes upstreamed.
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS rust-extensions-build-pgrx12_7
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||
useradd -ms /bin/bash nonroot -b /home
|
||||
|
||||
ENV HOME=/home/nonroot
|
||||
ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
|
||||
chmod +x rustup-init && \
|
||||
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
|
||||
rm rustup-init && \
|
||||
cargo install --locked --version 0.12.7 cargo-pgrx && \
|
||||
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
|
||||
|
||||
USER root
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layers "pg-onnx-build" and "pgrag-pg-build"
|
||||
@@ -1154,6 +1188,24 @@ RUN wget https://github.com/reorg/pg_repack/archive/refs/tags/ver_1.5.2.tar.gz -
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg_search-build"
|
||||
# compile "pg_search" extension
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build-pgrx12_7 AS pg-search-build
|
||||
ARG PG_VERSION
|
||||
ARG PARADEDB_TAG=v0.14.1
|
||||
|
||||
RUN --mount=type=secret,id=PG_SEARCH_ENTERPRISE_GITHUB_PAT \
|
||||
git clone --recurse-submodules --depth 1 --branch ${PARADEDB_TAG} https://$(cat /run/secrets/PG_SEARCH_ENTERPRISE_GITHUB_PAT)@github.com/paradedb/paradedb-enterprise.git pg_search-src && \
|
||||
cd pg_search-src && \
|
||||
cargo pgrx install --package pg_search --no-default-features --features unsafe-postgres --release && \
|
||||
sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_search.control && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_search.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "neon-pg-ext-build"
|
||||
@@ -1201,6 +1253,7 @@ COPY --from=pg-ivm-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-partman-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-repack-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-search-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY pgxn/ pgxn/
|
||||
|
||||
RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
@@ -1266,16 +1319,31 @@ RUN set -e \
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layers "postgres-exporter", "pgbouncer-exporter", and "sql-exporter"
|
||||
# Layer "exporters"
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM quay.io/prometheuscommunity/postgres-exporter:v0.16.0 AS postgres-exporter
|
||||
FROM quay.io/prometheuscommunity/pgbouncer-exporter:v0.10.2 AS pgbouncer-exporter
|
||||
|
||||
# Keep the version the same as in build-tools.Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py.
|
||||
FROM burningalchemist/sql_exporter:0.17.0 AS sql-exporter
|
||||
FROM alpine/curl:${ALPINE_CURL_VERSION} AS exporters
|
||||
ARG TARGETARCH
|
||||
# Keep sql_exporter version same as in build-tools.Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py
|
||||
RUN if [ "$TARGETARCH" = "amd64" ]; then\
|
||||
postgres_exporter_sha256='027e75dda7af621237ff8f5ac66b78a40b0093595f06768612b92b1374bd3105';\
|
||||
pgbouncer_exporter_sha256='c9f7cf8dcff44f0472057e9bf52613d93f3ffbc381ad7547a959daa63c5e84ac';\
|
||||
sql_exporter_sha256='38e439732bbf6e28ca4a94d7bc3686d3fa1abdb0050773d5617a9efdb9e64d08';\
|
||||
else\
|
||||
postgres_exporter_sha256='131a376d25778ff9701a4c81f703f179e0b58db5c2c496e66fa43f8179484786';\
|
||||
pgbouncer_exporter_sha256='217c4afd7e6492ae904055bc14fe603552cf9bac458c063407e991d68c519da3';\
|
||||
sql_exporter_sha256='11918b00be6e2c3a67564adfdb2414fdcbb15a5db76ea17d1d1a944237a893c6';\
|
||||
fi\
|
||||
&& curl -sL https://github.com/prometheus-community/postgres_exporter/releases/download/v0.16.0/postgres_exporter-0.16.0.linux-${TARGETARCH}.tar.gz\
|
||||
| tar xzf - --strip-components=1 -C.\
|
||||
&& curl -sL https://github.com/prometheus-community/pgbouncer_exporter/releases/download/v0.10.2/pgbouncer_exporter-0.10.2.linux-${TARGETARCH}.tar.gz\
|
||||
| tar xzf - --strip-components=1 -C.\
|
||||
&& curl -sL https://github.com/burningalchemist/sql_exporter/releases/download/0.17.0/sql_exporter-0.17.0.linux-${TARGETARCH}.tar.gz\
|
||||
| tar xzf - --strip-components=1 -C.\
|
||||
&& echo "${postgres_exporter_sha256} postgres_exporter" | sha256sum -c -\
|
||||
&& echo "${pgbouncer_exporter_sha256} pgbouncer_exporter" | sha256sum -c -\
|
||||
&& echo "${sql_exporter_sha256} sql_exporter" | sha256sum -c -
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1330,7 +1398,8 @@ COPY --from=vector-pg-build /pgvector.patch /ext-src/
|
||||
COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
|
||||
#COPY --from=pgrag-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
#COPY --from=pg-jsonschema-pg-build /home/nonroot/pg_jsonschema.tar.gz /ext-src
|
||||
#COPY --from=pg-graphql-pg-build /home/nonroot/pg_graphql.tar.gz /ext-src
|
||||
COPY --from=pg-graphql-pg-build /home/nonroot/pg_graphql.tar.gz /ext-src
|
||||
COPY compute/patches/pg_graphql.patch /ext-src
|
||||
#COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
|
||||
COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
|
||||
COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
|
||||
@@ -1364,6 +1433,7 @@ RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
|
||||
RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan_${PG_VERSION}.patch
|
||||
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
||||
RUN patch -p1 </ext-src/pg_cron.patch
|
||||
RUN cd /ext-src/pg_graphql-src && patch -p1 </ext-src/pg_graphql.patch
|
||||
ENV PATH=/usr/local/pgsql/bin:$PATH
|
||||
ENV PGHOST=compute
|
||||
ENV PGPORT=55433
|
||||
@@ -1401,10 +1471,10 @@ COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
|
||||
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
|
||||
RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
|
||||
|
||||
# Metrics exporter binaries and configuration files
|
||||
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
|
||||
COPY --from=pgbouncer-exporter /bin/pgbouncer_exporter /bin/pgbouncer_exporter
|
||||
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter
|
||||
# Metrics exporter binaries and configuration files
|
||||
COPY --from=exporters ./postgres_exporter /bin/postgres_exporter
|
||||
COPY --from=exporters ./pgbouncer_exporter /bin/pgbouncer_exporter
|
||||
COPY --from=exporters ./sql_exporter /bin/sql_exporter
|
||||
|
||||
COPY --chown=postgres compute/etc/postgres_exporter.yml /etc/postgres_exporter.yml
|
||||
|
||||
|
||||
19
compute/patches/pg_graphql.patch
Normal file
19
compute/patches/pg_graphql.patch
Normal file
@@ -0,0 +1,19 @@
|
||||
commit ec6a491d126882966a696f9ad5d3698935361d55
|
||||
Author: Alexey Masterov <alexeymasterov@neon.tech>
|
||||
Date: Tue Dec 17 10:25:00 2024 +0100
|
||||
|
||||
Changes required to run tests on Neon
|
||||
|
||||
diff --git a/test/expected/permissions_functions.out b/test/expected/permissions_functions.out
|
||||
index 1e9fbc2..94cbe25 100644
|
||||
--- a/test/expected/permissions_functions.out
|
||||
+++ b/test/expected/permissions_functions.out
|
||||
@@ -64,7 +64,7 @@ begin;
|
||||
select current_user;
|
||||
current_user
|
||||
--------------
|
||||
- postgres
|
||||
+ cloud_admin
|
||||
(1 row)
|
||||
|
||||
-- revoke default access from the public role for new functions
|
||||
@@ -85,6 +85,8 @@ use tracing::info;
|
||||
use tracing::log::warn;
|
||||
use zstd::stream::read::Decoder;
|
||||
|
||||
use crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
|
||||
|
||||
fn get_pg_config(argument: &str, pgbin: &str) -> String {
|
||||
// gives the result of `pg_config [argument]`
|
||||
// where argument is a flag like `--version` or `--sharedir`
|
||||
@@ -258,21 +260,58 @@ async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Res
|
||||
|
||||
info!("Download extension {:?} from uri {:?}", ext_path, uri);
|
||||
|
||||
let resp = reqwest::get(uri).await?;
|
||||
match do_extension_server_request(&uri).await {
|
||||
Ok(resp) => {
|
||||
info!(
|
||||
"Successfully downloaded remote extension data {:?}",
|
||||
ext_path
|
||||
);
|
||||
REMOTE_EXT_REQUESTS_TOTAL
|
||||
.with_label_values(&[&StatusCode::OK.to_string()])
|
||||
.inc();
|
||||
Ok(resp)
|
||||
}
|
||||
Err((msg, status)) => {
|
||||
REMOTE_EXT_REQUESTS_TOTAL
|
||||
.with_label_values(&[&status])
|
||||
.inc();
|
||||
bail!(msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match resp.status() {
|
||||
// Do a single remote extensions server request.
|
||||
// Return result or (error message + stringified status code) in case of any failures.
|
||||
async fn do_extension_server_request(uri: &str) -> Result<Bytes, (String, String)> {
|
||||
let resp = reqwest::get(uri).await.map_err(|e| {
|
||||
(
|
||||
format!("could not perform remote extensions server request: {}", e),
|
||||
UNKNOWN_HTTP_STATUS.to_string(),
|
||||
)
|
||||
})?;
|
||||
let status = resp.status();
|
||||
|
||||
match status {
|
||||
StatusCode::OK => match resp.bytes().await {
|
||||
Ok(resp) => {
|
||||
info!("Download extension {:?} completed successfully", ext_path);
|
||||
Ok(resp)
|
||||
}
|
||||
Err(e) => bail!("could not deserialize remote extension response: {}", e),
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(e) => Err((
|
||||
format!("could not read remote extensions server response: {}", e),
|
||||
// It's fine to return and report error with status as 200 OK,
|
||||
// because we still failed to read the response.
|
||||
status.to_string(),
|
||||
)),
|
||||
},
|
||||
StatusCode::SERVICE_UNAVAILABLE => bail!("remote extension is temporarily unavailable"),
|
||||
_ => bail!(
|
||||
"unexpected remote extension response status code: {}",
|
||||
resp.status()
|
||||
),
|
||||
StatusCode::SERVICE_UNAVAILABLE => Err((
|
||||
"remote extensions server is temporarily unavailable".to_string(),
|
||||
status.to_string(),
|
||||
)),
|
||||
_ => Err((
|
||||
format!(
|
||||
"unexpected remote extensions server response status code: {}",
|
||||
status
|
||||
),
|
||||
status.to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::JsonRejection, FromRequest, Request},
|
||||
};
|
||||
use axum::extract::{rejection::JsonRejection, FromRequest, Request};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::StatusCode;
|
||||
|
||||
@@ -12,7 +9,6 @@ use http::StatusCode;
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Json<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequest<S> for Json<T>
|
||||
where
|
||||
axum::Json<T>: FromRequest<S, Rejection = JsonRejection>,
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::PathRejection, FromRequestParts},
|
||||
};
|
||||
use axum::extract::{rejection::PathRejection, FromRequestParts};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::{request::Parts, StatusCode};
|
||||
|
||||
@@ -12,7 +9,6 @@ use http::{request::Parts, StatusCode};
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Path<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequestParts<S> for Path<T>
|
||||
where
|
||||
axum::extract::Path<T>: FromRequestParts<S, Rejection = PathRejection>,
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use axum::{
|
||||
async_trait,
|
||||
extract::{rejection::QueryRejection, FromRequestParts},
|
||||
};
|
||||
use axum::extract::{rejection::QueryRejection, FromRequestParts};
|
||||
use compute_api::responses::GenericAPIError;
|
||||
use http::{request::Parts, StatusCode};
|
||||
|
||||
@@ -12,7 +9,6 @@ use http::{request::Parts, StatusCode};
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct Query<T>(pub T);
|
||||
|
||||
#[async_trait]
|
||||
impl<S, T> FromRequestParts<S> for Query<T>
|
||||
where
|
||||
axum::extract::Query<T>: FromRequestParts<S, Rejection = QueryRejection>,
|
||||
|
||||
@@ -68,35 +68,6 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ComputeInsights"
|
||||
|
||||
/installed_extensions:
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
summary: Get installed extensions.
|
||||
description: ""
|
||||
operationId: getInstalledExtensions
|
||||
responses:
|
||||
200:
|
||||
description: List of installed extensions
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/InstalledExtensions"
|
||||
/info:
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
summary: Get info about the compute pod / VM.
|
||||
description: ""
|
||||
operationId: getInfo
|
||||
responses:
|
||||
200:
|
||||
description: Info
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Info"
|
||||
|
||||
/dbs_and_roles:
|
||||
get:
|
||||
tags:
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
use axum::response::Response;
|
||||
use compute_api::responses::InfoResponse;
|
||||
use http::StatusCode;
|
||||
|
||||
use crate::http::JsonResponse;
|
||||
|
||||
/// Get information about the physical characteristics about the compute.
|
||||
pub(in crate::http) async fn get_info() -> Response {
|
||||
let num_cpus = num_cpus::get_physical();
|
||||
JsonResponse::success(StatusCode::OK, &InfoResponse { num_cpus })
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{extract::State, response::Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use http::StatusCode;
|
||||
use tokio::task;
|
||||
|
||||
use crate::{compute::ComputeNode, http::JsonResponse, installed_extensions};
|
||||
|
||||
/// Get a list of installed extensions.
|
||||
pub(in crate::http) async fn get_installed_extensions(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
) -> Response {
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
return JsonResponse::invalid_status(status);
|
||||
}
|
||||
|
||||
let conf = compute.get_conn_conf(None);
|
||||
let res = task::spawn_blocking(move || installed_extensions::get_installed_extensions(conf))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
match res {
|
||||
Ok(installed_extensions) => {
|
||||
JsonResponse::success(StatusCode::OK, Some(installed_extensions))
|
||||
}
|
||||
Err(e) => JsonResponse::error(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("failed to get list of installed extensions: {e}"),
|
||||
),
|
||||
}
|
||||
}
|
||||
@@ -2,17 +2,16 @@ use axum::{body::Body, response::Response};
|
||||
use http::header::CONTENT_TYPE;
|
||||
use http::StatusCode;
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::Encoder;
|
||||
use metrics::TextEncoder;
|
||||
use metrics::{Encoder, TextEncoder};
|
||||
|
||||
use crate::{http::JsonResponse, installed_extensions};
|
||||
use crate::{http::JsonResponse, metrics::collect};
|
||||
|
||||
/// Expose Prometheus metrics.
|
||||
pub(in crate::http) async fn get_metrics() -> Response {
|
||||
// When we call TextEncoder::encode() below, it will immediately return an
|
||||
// error if a metric family has no metrics, so we need to preemptively
|
||||
// filter out metric families with no metrics.
|
||||
let metrics = installed_extensions::collect()
|
||||
let metrics = collect()
|
||||
.into_iter()
|
||||
.filter(|m| !m.get_metric().is_empty())
|
||||
.collect::<Vec<MetricFamily>>();
|
||||
|
||||
@@ -10,9 +10,7 @@ pub(in crate::http) mod extension_server;
|
||||
pub(in crate::http) mod extensions;
|
||||
pub(in crate::http) mod failpoints;
|
||||
pub(in crate::http) mod grants;
|
||||
pub(in crate::http) mod info;
|
||||
pub(in crate::http) mod insights;
|
||||
pub(in crate::http) mod installed_extensions;
|
||||
pub(in crate::http) mod metrics;
|
||||
pub(in crate::http) mod metrics_json;
|
||||
pub(in crate::http) mod status;
|
||||
|
||||
@@ -22,8 +22,7 @@ use uuid::Uuid;
|
||||
|
||||
use super::routes::{
|
||||
check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
|
||||
grants, info as info_route, insights, installed_extensions, metrics, metrics_json, status,
|
||||
terminate,
|
||||
grants, insights, metrics, metrics_json, status, terminate,
|
||||
};
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
@@ -55,17 +54,12 @@ async fn serve(port: u16, compute: Arc<ComputeNode>) {
|
||||
.route("/database_schema", get(database_schema::get_schema_dump))
|
||||
.route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
|
||||
.route(
|
||||
"/extension_server/*filename",
|
||||
"/extension_server/{*filename}",
|
||||
post(extension_server::download_extension),
|
||||
)
|
||||
.route("/extensions", post(extensions::install_extension))
|
||||
.route("/grants", post(grants::add_grant))
|
||||
.route("/info", get(info_route::get_info))
|
||||
.route("/insights", get(insights::get_insights))
|
||||
.route(
|
||||
"/installed_extensions",
|
||||
get(installed_extensions::get_installed_extensions),
|
||||
)
|
||||
.route("/metrics", get(metrics::get_metrics))
|
||||
.route("/metrics.json", get(metrics_json::get_metrics))
|
||||
.route("/status", get(status::get_status))
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
||||
use metrics::proto::MetricFamily;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use postgres::{Client, NoTls};
|
||||
|
||||
use metrics::core::Collector;
|
||||
use metrics::{register_uint_gauge_vec, UIntGaugeVec};
|
||||
use once_cell::sync::Lazy;
|
||||
use crate::metrics::INSTALLED_EXTENSIONS;
|
||||
|
||||
/// We don't reuse get_existing_dbs() just for code clarity
|
||||
/// and to make database listing query here more explicit.
|
||||
@@ -102,16 +99,3 @@ pub fn get_installed_extensions(mut conf: postgres::config::Config) -> Result<In
|
||||
extensions: extensions_map.into_values().collect(),
|
||||
})
|
||||
}
|
||||
|
||||
static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"compute_installed_extensions",
|
||||
"Number of databases where the version of extension is installed",
|
||||
&["extension_name", "version", "owned_by_superuser"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub fn collect() -> Vec<MetricFamily> {
|
||||
INSTALLED_EXTENSIONS.collect()
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ pub mod extension_server;
|
||||
pub mod installed_extensions;
|
||||
pub mod local_proxy;
|
||||
pub mod lsn_lease;
|
||||
pub mod metrics;
|
||||
mod migration;
|
||||
pub mod monitor;
|
||||
pub mod params;
|
||||
|
||||
70
compute_tools/src/metrics.rs
Normal file
70
compute_tools/src/metrics.rs
Normal file
@@ -0,0 +1,70 @@
|
||||
use metrics::core::Collector;
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::{register_int_counter_vec, register_uint_gauge_vec, IntCounterVec, UIntGaugeVec};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"compute_installed_extensions",
|
||||
"Number of databases where the version of extension is installed",
|
||||
&["extension_name", "version", "owned_by_superuser"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
// Normally, any HTTP API request is described by METHOD (e.g. GET, POST, etc.) + PATH,
|
||||
// but for all our APIs we defined a 'slug'/method/operationId in the OpenAPI spec.
|
||||
// And it's fair to call it a 'RPC' (Remote Procedure Call).
|
||||
pub enum CPlaneRequestRPC {
|
||||
GetSpec,
|
||||
}
|
||||
|
||||
impl CPlaneRequestRPC {
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
CPlaneRequestRPC::GetSpec => "GetSpec",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const UNKNOWN_HTTP_STATUS: &str = "unknown";
|
||||
|
||||
pub(crate) static CPLANE_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"compute_ctl_cplane_requests_total",
|
||||
"Total number of control plane requests made by compute_ctl by status",
|
||||
&["rpc", "http_status"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
/// Total number of failed database migrations. Per-compute, this is actually a boolean metric,
|
||||
/// either empty or with a single value (1, migration_id) because we stop at the first failure.
|
||||
/// Yet, the sum over the fleet will provide the total number of failures.
|
||||
pub(crate) static DB_MIGRATION_FAILED: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"compute_ctl_db_migration_failed_total",
|
||||
"Total number of failed database migrations",
|
||||
&["migration_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"compute_ctl_remote_ext_requests_total",
|
||||
"Total number of requests made by compute_ctl to download extensions from S3 proxy by status",
|
||||
// Do not use any labels like extension name yet.
|
||||
// We can add them later if needed.
|
||||
&["http_status"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub fn collect() -> Vec<MetricFamily> {
|
||||
let mut metrics = INSTALLED_EXTENSIONS.collect();
|
||||
metrics.extend(CPLANE_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(DB_MIGRATION_FAILED.collect());
|
||||
metrics
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
use anyhow::{Context, Result};
|
||||
use fail::fail_point;
|
||||
use postgres::{Client, Transaction};
|
||||
use tracing::info;
|
||||
use tracing::{error, info};
|
||||
|
||||
use crate::metrics::DB_MIGRATION_FAILED;
|
||||
|
||||
/// Runs a series of migrations on a target database
|
||||
pub(crate) struct MigrationRunner<'m> {
|
||||
@@ -78,24 +80,31 @@ impl<'m> MigrationRunner<'m> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run an individual migration
|
||||
fn run_migration(txn: &mut Transaction, migration_id: i64, migration: &str) -> Result<()> {
|
||||
/// Run an individual migration in a separate transaction block.
|
||||
fn run_migration(client: &mut Client, migration_id: i64, migration: &str) -> Result<()> {
|
||||
let mut txn = client
|
||||
.transaction()
|
||||
.with_context(|| format!("begin transaction for migration {migration_id}"))?;
|
||||
|
||||
if migration.starts_with("-- SKIP") {
|
||||
info!("Skipping migration id={}", migration_id);
|
||||
|
||||
// Even though we are skipping the migration, updating the
|
||||
// migration ID should help keep logic easy to understand when
|
||||
// trying to understand the state of a cluster.
|
||||
Self::update_migration_id(txn, migration_id)?;
|
||||
Self::update_migration_id(&mut txn, migration_id)?;
|
||||
} else {
|
||||
info!("Running migration id={}:\n{}\n", migration_id, migration);
|
||||
|
||||
txn.simple_query(migration)
|
||||
.with_context(|| format!("apply migration {migration_id}"))?;
|
||||
|
||||
Self::update_migration_id(txn, migration_id)?;
|
||||
Self::update_migration_id(&mut txn, migration_id)?;
|
||||
}
|
||||
|
||||
txn.commit()
|
||||
.with_context(|| format!("commit transaction for migration {migration_id}"))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -109,19 +118,20 @@ impl<'m> MigrationRunner<'m> {
|
||||
// The index lags the migration ID by 1, so the current migration
|
||||
// ID is also the next index
|
||||
let migration_id = (current_migration + 1) as i64;
|
||||
let migration = self.migrations[current_migration];
|
||||
|
||||
let mut txn = self
|
||||
.client
|
||||
.transaction()
|
||||
.with_context(|| format!("begin transaction for migration {migration_id}"))?;
|
||||
|
||||
Self::run_migration(&mut txn, migration_id, self.migrations[current_migration])
|
||||
.with_context(|| format!("running migration {migration_id}"))?;
|
||||
|
||||
txn.commit()
|
||||
.with_context(|| format!("commit transaction for migration {migration_id}"))?;
|
||||
|
||||
info!("Finished migration id={}", migration_id);
|
||||
match Self::run_migration(self.client, migration_id, migration) {
|
||||
Ok(_) => {
|
||||
info!("Finished migration id={}", migration_id);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to run migration id={}: {}", migration_id, e);
|
||||
DB_MIGRATION_FAILED
|
||||
.with_label_values(&[migration_id.to_string().as_str()])
|
||||
.inc();
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
|
||||
current_migration += 1;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use std::path::Path;
|
||||
use tracing::{error, info, instrument, warn};
|
||||
|
||||
use crate::config;
|
||||
use crate::metrics::{CPlaneRequestRPC, CPLANE_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
|
||||
use crate::migration::MigrationRunner;
|
||||
use crate::params::PG_HBA_ALL_MD5;
|
||||
use crate::pg_helpers::*;
|
||||
@@ -19,7 +20,7 @@ use compute_api::spec::ComputeSpec;
|
||||
fn do_control_plane_request(
|
||||
uri: &str,
|
||||
jwt: &str,
|
||||
) -> Result<ControlPlaneSpecResponse, (bool, String)> {
|
||||
) -> Result<ControlPlaneSpecResponse, (bool, String, String)> {
|
||||
let resp = reqwest::blocking::Client::new()
|
||||
.get(uri)
|
||||
.header("Authorization", format!("Bearer {}", jwt))
|
||||
@@ -28,34 +29,41 @@ fn do_control_plane_request(
|
||||
(
|
||||
true,
|
||||
format!("could not perform spec request to control plane: {}", e),
|
||||
UNKNOWN_HTTP_STATUS.to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
match resp.status() {
|
||||
let status = resp.status();
|
||||
match status {
|
||||
StatusCode::OK => match resp.json::<ControlPlaneSpecResponse>() {
|
||||
Ok(spec_resp) => Ok(spec_resp),
|
||||
Err(e) => Err((
|
||||
true,
|
||||
format!("could not deserialize control plane response: {}", e),
|
||||
status.to_string(),
|
||||
)),
|
||||
},
|
||||
StatusCode::SERVICE_UNAVAILABLE => {
|
||||
Err((true, "control plane is temporarily unavailable".to_string()))
|
||||
}
|
||||
StatusCode::SERVICE_UNAVAILABLE => Err((
|
||||
true,
|
||||
"control plane is temporarily unavailable".to_string(),
|
||||
status.to_string(),
|
||||
)),
|
||||
StatusCode::BAD_GATEWAY => {
|
||||
// We have a problem with intermittent 502 errors now
|
||||
// https://github.com/neondatabase/cloud/issues/2353
|
||||
// It's fine to retry GET request in this case.
|
||||
Err((true, "control plane request failed with 502".to_string()))
|
||||
Err((
|
||||
true,
|
||||
"control plane request failed with 502".to_string(),
|
||||
status.to_string(),
|
||||
))
|
||||
}
|
||||
// Another code, likely 500 or 404, means that compute is unknown to the control plane
|
||||
// or some internal failure happened. Doesn't make much sense to retry in this case.
|
||||
_ => Err((
|
||||
false,
|
||||
format!(
|
||||
"unexpected control plane response status code: {}",
|
||||
resp.status()
|
||||
),
|
||||
format!("unexpected control plane response status code: {}", status),
|
||||
status.to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
@@ -83,17 +91,28 @@ pub fn get_spec_from_control_plane(
|
||||
// - got spec -> return Ok(Some(spec))
|
||||
while attempt < 4 {
|
||||
spec = match do_control_plane_request(&cp_uri, &jwt) {
|
||||
Ok(spec_resp) => match spec_resp.status {
|
||||
ControlPlaneComputeStatus::Empty => Ok(None),
|
||||
ControlPlaneComputeStatus::Attached => {
|
||||
if let Some(spec) = spec_resp.spec {
|
||||
Ok(Some(spec))
|
||||
} else {
|
||||
bail!("compute is attached, but spec is empty")
|
||||
Ok(spec_resp) => {
|
||||
CPLANE_REQUESTS_TOTAL
|
||||
.with_label_values(&[
|
||||
CPlaneRequestRPC::GetSpec.as_str(),
|
||||
&StatusCode::OK.to_string(),
|
||||
])
|
||||
.inc();
|
||||
match spec_resp.status {
|
||||
ControlPlaneComputeStatus::Empty => Ok(None),
|
||||
ControlPlaneComputeStatus::Attached => {
|
||||
if let Some(spec) = spec_resp.spec {
|
||||
Ok(Some(spec))
|
||||
} else {
|
||||
bail!("compute is attached, but spec is empty")
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
Err((retry, msg)) => {
|
||||
}
|
||||
Err((retry, msg, status)) => {
|
||||
CPLANE_REQUESTS_TOTAL
|
||||
.with_label_values(&[CPlaneRequestRPC::GetSpec.as_str(), &status])
|
||||
.inc();
|
||||
if retry {
|
||||
Err(anyhow!(msg))
|
||||
} else {
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
# Control Plane and Neon Local
|
||||
# Local Development Control Plane (`neon_local`)
|
||||
|
||||
This crate contains tools to start a Neon development environment locally. This utility can be used with the `cargo neon` command.
|
||||
This crate contains tools to start a Neon development environment locally. This utility can be used with the `cargo neon` command. This is a convenience to invoke
|
||||
the `neon_local` binary.
|
||||
|
||||
**Note**: this is a dev/test tool -- a minimal control plane suitable for testing
|
||||
code changes locally, but not suitable for running production systems.
|
||||
|
||||
## Example: Start with Postgres 16
|
||||
|
||||
|
||||
@@ -347,6 +347,11 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_threshold' as an integer")?,
|
||||
compaction_upper_limit: settings
|
||||
.remove("compaction_upper_limit")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_upper_limit' as an integer")?,
|
||||
compaction_algorithm: settings
|
||||
.remove("compaction_algorithm")
|
||||
.map(serde_json::from_str)
|
||||
@@ -357,6 +362,11 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'l0_flush_delay_threshold' as an integer")?,
|
||||
l0_flush_wait_upload: settings
|
||||
.remove("l0_flush_wait_upload")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'l0_flush_wait_upload' as a boolean")?,
|
||||
l0_flush_stall_threshold: settings
|
||||
.remove("l0_flush_stall_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
|
||||
@@ -41,8 +41,8 @@ allow = [
|
||||
"MIT",
|
||||
"MPL-2.0",
|
||||
"OpenSSL",
|
||||
"Unicode-DFS-2016",
|
||||
"Unicode-3.0",
|
||||
"Zlib",
|
||||
]
|
||||
confidence-threshold = 0.8
|
||||
exceptions = [
|
||||
|
||||
@@ -20,30 +20,55 @@ while ! nc -z pageserver 6400; do
|
||||
done
|
||||
echo "Page server is ready."
|
||||
|
||||
echo "Create a tenant and timeline"
|
||||
generate_id tenant_id
|
||||
PARAMS=(
|
||||
-X PUT
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"mode\": \"AttachedSingle\", \"generation\": 1, \"tenant_conf\": {}}"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/location_config"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
cp ${SPEC_FILE_ORG} ${SPEC_FILE}
|
||||
|
||||
generate_id timeline_id
|
||||
PARAMS=(
|
||||
-sbf
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
if [ -n "${TENANT_ID:-}" ] && [ -n "${TIMELINE_ID:-}" ]; then
|
||||
tenant_id=${TENANT_ID}
|
||||
timeline_id=${TIMELINE_ID}
|
||||
else
|
||||
echo "Check if a tenant present"
|
||||
PARAMS=(
|
||||
-X GET
|
||||
-H "Content-Type: application/json"
|
||||
"http://pageserver:9898/v1/tenant"
|
||||
)
|
||||
tenant_id=$(curl "${PARAMS[@]}" | jq -r .[0].id)
|
||||
if [ -z "${tenant_id}" ] || [ "${tenant_id}" = null ]; then
|
||||
echo "Create a tenant"
|
||||
generate_id tenant_id
|
||||
PARAMS=(
|
||||
-X PUT
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"mode\": \"AttachedSingle\", \"generation\": 1, \"tenant_conf\": {}}"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/location_config"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
fi
|
||||
|
||||
echo "Check if a timeline present"
|
||||
PARAMS=(
|
||||
-X GET
|
||||
-H "Content-Type: application/json"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
|
||||
)
|
||||
timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
|
||||
if [ -z "${timeline_id}" ] || [ "${timeline_id}" = null ]; then
|
||||
generate_id timeline_id
|
||||
PARAMS=(
|
||||
-sbf
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Overwrite tenant id and timeline id in spec file"
|
||||
sed "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE_ORG} > ${SPEC_FILE}
|
||||
sed -i "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE}
|
||||
sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE}
|
||||
|
||||
cat ${SPEC_FILE}
|
||||
|
||||
@@ -149,11 +149,13 @@ services:
|
||||
args:
|
||||
- REPOSITORY=${REPOSITORY:-neondatabase}
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-16}
|
||||
- TAG=${TAG:-latest}
|
||||
- TAG=${COMPUTE_TAG:-${TAG:-latest}}
|
||||
- http_proxy=${http_proxy:-}
|
||||
- https_proxy=${https_proxy:-}
|
||||
environment:
|
||||
- PG_VERSION=${PG_VERSION:-16}
|
||||
- TENANT_ID=${TENANT_ID:-}
|
||||
- TIMELINE_ID=${TIMELINE_ID:-}
|
||||
#- RUST_BACKTRACE=1
|
||||
# Mount the test files directly, for faster editing cycle.
|
||||
volumes:
|
||||
|
||||
@@ -31,7 +31,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
|
||||
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d
|
||||
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --quiet-pull --build -d
|
||||
|
||||
echo "wait until the compute is ready. timeout after 60s. "
|
||||
cnt=0
|
||||
@@ -51,6 +51,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
done
|
||||
|
||||
if [ $pg_version -ge 16 ]; then
|
||||
docker cp ext-src $TEST_CONTAINER_NAME:/
|
||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config
|
||||
@@ -61,7 +62,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
|
||||
rm -rf $TMPDIR
|
||||
# We are running tests now
|
||||
if ! docker exec -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,pg_graphql-src,kq_imcx-src,wal2json_2_5-src \
|
||||
if ! docker exec -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \
|
||||
$TEST_CONTAINER_NAME /run-tests.sh | tee testout.txt
|
||||
then
|
||||
FAILED=$(tail -1 testout.txt)
|
||||
|
||||
5
docker-compose/ext-src/hll-src/test-upgrade.sh
Executable file
5
docker-compose/ext-src/hll-src/test-upgrade.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression add_agg agg_oob auto_sparse card_op cast_shape copy_binary cumulative_add_cardinality_correction cumulative_add_comprehensive_promotion cumulative_add_sparse_edge cumulative_add_sparse_random cumulative_add_sparse_step cumulative_union_comprehensive cumulative_union_explicit_explicit cumulative_union_explicit_promotion cumulative_union_probabilistic_probabilistic cumulative_union_sparse_full_representation cumulative_union_sparse_promotion cumulative_union_sparse_sparse disable_hashagg equal explicit_thresh hash hash_any meta_func murmur_bigint murmur_bytea nosparse notequal scalar_oob storedproc transaction typmod typmod_insert union_op
|
||||
27
docker-compose/ext-src/hypopg-src/test-upgrade.patch
Normal file
27
docker-compose/ext-src/hypopg-src/test-upgrade.patch
Normal file
@@ -0,0 +1,27 @@
|
||||
diff --git a/expected/hypopg.out b/expected/hypopg.out
|
||||
index 90121d0..859260b 100644
|
||||
--- a/expected/hypopg.out
|
||||
+++ b/expected/hypopg.out
|
||||
@@ -11,7 +11,8 @@ BEGIN
|
||||
END;
|
||||
$_$
|
||||
LANGUAGE plpgsql;
|
||||
-CREATE EXTENSION hypopg;
|
||||
+CREATE EXTENSION IF NOT EXISTS hypopg;
|
||||
+NOTICE: extension "hypopg" already exists, skipping
|
||||
CREATE TABLE hypo (id integer, val text, "Id2" bigint);
|
||||
INSERT INTO hypo SELECT i, 'line ' || i
|
||||
FROM generate_series(1,100000) f(i);
|
||||
diff --git a/test/sql/hypopg.sql b/test/sql/hypopg.sql
|
||||
index 99722b0..8d6bacb 100644
|
||||
--- a/test/sql/hypopg.sql
|
||||
+++ b/test/sql/hypopg.sql
|
||||
@@ -12,7 +12,7 @@ END;
|
||||
$_$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
-CREATE EXTENSION hypopg;
|
||||
+CREATE EXTENSION IF NOT EXISTS hypopg;
|
||||
|
||||
CREATE TABLE hypo (id integer, val text, "Id2" bigint);
|
||||
|
||||
6
docker-compose/ext-src/hypopg-src/test-upgrade.sh
Executable file
6
docker-compose/ext-src/hypopg-src/test-upgrade.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --inputdir=test --dbname=contrib_regression hypopg hypo_brin hypo_index_part hypo_include hypo_hash hypo_hide_index
|
||||
23
docker-compose/ext-src/ip4r-src/test-upgrade.patch
Normal file
23
docker-compose/ext-src/ip4r-src/test-upgrade.patch
Normal file
@@ -0,0 +1,23 @@
|
||||
diff --git a/expected/ip4r.out b/expected/ip4r.out
|
||||
index 7527af3..b38ed29 100644
|
||||
--- a/expected/ip4r.out
|
||||
+++ b/expected/ip4r.out
|
||||
@@ -1,6 +1,5 @@
|
||||
--
|
||||
/*CUT-HERE*/
|
||||
-CREATE EXTENSION ip4r;
|
||||
-- Check whether any of our opclasses fail amvalidate
|
||||
DO $d$
|
||||
DECLARE
|
||||
diff --git a/sql/ip4r.sql b/sql/ip4r.sql
|
||||
index 65c49ec..24ade09 100644
|
||||
--- a/sql/ip4r.sql
|
||||
+++ b/sql/ip4r.sql
|
||||
@@ -1,7 +1,6 @@
|
||||
--
|
||||
|
||||
/*CUT-HERE*/
|
||||
-CREATE EXTENSION ip4r;
|
||||
|
||||
-- Check whether any of our opclasses fail amvalidate
|
||||
|
||||
6
docker-compose/ext-src/ip4r-src/test-upgrade.sh
Executable file
6
docker-compose/ext-src/ip4r-src/test-upgrade.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ip4r ip4r-softerr ip4r-v11
|
||||
75
docker-compose/ext-src/pg_cron-src/test-upgrade.patch
Normal file
75
docker-compose/ext-src/pg_cron-src/test-upgrade.patch
Normal file
@@ -0,0 +1,75 @@
|
||||
diff --git a/expected/pg_cron-test.out b/expected/pg_cron-test.out
|
||||
index d79d542..1663886 100644
|
||||
--- a/expected/pg_cron-test.out
|
||||
+++ b/expected/pg_cron-test.out
|
||||
@@ -1,30 +1,3 @@
|
||||
-CREATE EXTENSION pg_cron VERSION '1.0';
|
||||
-SELECT extversion FROM pg_extension WHERE extname='pg_cron';
|
||||
- extversion
|
||||
-------------
|
||||
- 1.0
|
||||
-(1 row)
|
||||
-
|
||||
--- Test binary compatibility with v1.4 function signature.
|
||||
-ALTER EXTENSION pg_cron UPDATE TO '1.4';
|
||||
-SELECT cron.unschedule(job_name := 'no_such_job');
|
||||
-ERROR: could not find valid entry for job 'no_such_job'
|
||||
-SELECT cron.schedule('testjob', '* * * * *', 'SELECT 1');
|
||||
- schedule
|
||||
-----------
|
||||
- 1
|
||||
-(1 row)
|
||||
-
|
||||
-SELECT cron.unschedule('testjob');
|
||||
- unschedule
|
||||
-------------
|
||||
- t
|
||||
-(1 row)
|
||||
-
|
||||
--- Test cache invalidation
|
||||
-DROP EXTENSION pg_cron;
|
||||
-CREATE EXTENSION pg_cron VERSION '1.4';
|
||||
-ALTER EXTENSION pg_cron UPDATE;
|
||||
-- Vacuum every day at 10:00am (GMT)
|
||||
SELECT cron.schedule('0 10 * * *', 'VACUUM');
|
||||
schedule
|
||||
@@ -300,8 +273,3 @@ SELECT jobid, jobname, schedule, command FROM cron.job ORDER BY jobid;
|
||||
SELECT cron.schedule('bad-last-dom-job1', '0 11 $foo * *', 'VACUUM FULL');
|
||||
ERROR: invalid schedule: 0 11 $foo * *
|
||||
HINT: Use cron format (e.g. 5 4 * * *), or interval format '[1-59] seconds'
|
||||
--- cleaning
|
||||
-DROP EXTENSION pg_cron;
|
||||
-drop user pgcron_cront;
|
||||
-drop database pgcron_dbno;
|
||||
-drop database pgcron_dbyes;
|
||||
diff --git a/sql/pg_cron-test.sql b/sql/pg_cron-test.sql
|
||||
index 45f94d9..241cf73 100644
|
||||
--- a/sql/pg_cron-test.sql
|
||||
+++ b/sql/pg_cron-test.sql
|
||||
@@ -1,17 +1,3 @@
|
||||
-CREATE EXTENSION pg_cron VERSION '1.0';
|
||||
-SELECT extversion FROM pg_extension WHERE extname='pg_cron';
|
||||
--- Test binary compatibility with v1.4 function signature.
|
||||
-ALTER EXTENSION pg_cron UPDATE TO '1.4';
|
||||
-SELECT cron.unschedule(job_name := 'no_such_job');
|
||||
-SELECT cron.schedule('testjob', '* * * * *', 'SELECT 1');
|
||||
-SELECT cron.unschedule('testjob');
|
||||
-
|
||||
--- Test cache invalidation
|
||||
-DROP EXTENSION pg_cron;
|
||||
-CREATE EXTENSION pg_cron VERSION '1.4';
|
||||
-
|
||||
-ALTER EXTENSION pg_cron UPDATE;
|
||||
-
|
||||
-- Vacuum every day at 10:00am (GMT)
|
||||
SELECT cron.schedule('0 10 * * *', 'VACUUM');
|
||||
|
||||
@@ -156,8 +142,3 @@ SELECT jobid, jobname, schedule, command FROM cron.job ORDER BY jobid;
|
||||
-- invalid last of day job
|
||||
SELECT cron.schedule('bad-last-dom-job1', '0 11 $foo * *', 'VACUUM FULL');
|
||||
|
||||
--- cleaning
|
||||
-DROP EXTENSION pg_cron;
|
||||
-drop user pgcron_cront;
|
||||
-drop database pgcron_dbno;
|
||||
-drop database pgcron_dbyes;
|
||||
6
docker-compose/ext-src/pg_cron-src/test-upgrade.sh
Executable file
6
docker-compose/ext-src/pg_cron-src/test-upgrade.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_cron-test
|
||||
13
docker-compose/ext-src/pg_graphql-src/neon-test.sh
Executable file
13
docker-compose/ext-src/pg_graphql-src/neon-test.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
cd "$(dirname "${0}")"
|
||||
dropdb --if-exists contrib_regression
|
||||
createdb contrib_regression
|
||||
PGXS="$(dirname "$(pg_config --pgxs)" )"
|
||||
REGRESS="${PGXS}/../test/regress/pg_regress"
|
||||
TESTDIR="test"
|
||||
TESTS=$(ls "${TESTDIR}/sql" | sort )
|
||||
TESTS=${TESTS//\.sql/}
|
||||
psql -v ON_ERROR_STOP=1 -f test/fixtures.sql -d contrib_regression
|
||||
${REGRESS} --use-existing --dbname=contrib_regression --inputdir=${TESTDIR} ${TESTS}
|
||||
|
||||
18
docker-compose/ext-src/pg_ivm-src/test-upgrade.patch
Normal file
18
docker-compose/ext-src/pg_ivm-src/test-upgrade.patch
Normal file
@@ -0,0 +1,18 @@
|
||||
diff --git a/expected/pg_ivm.out b/expected/pg_ivm.out
|
||||
index e8798ee..cca58d0 100644
|
||||
--- a/expected/pg_ivm.out
|
||||
+++ b/expected/pg_ivm.out
|
||||
@@ -1,4 +1,3 @@
|
||||
-CREATE EXTENSION pg_ivm;
|
||||
GRANT ALL ON SCHEMA public TO public;
|
||||
-- create a table to use as a basis for views and materialized views in various combinations
|
||||
CREATE TABLE mv_base_a (i int, j int);
|
||||
diff --git a/sql/pg_ivm.sql b/sql/pg_ivm.sql
|
||||
index d3c1a01..9382d7f 100644
|
||||
--- a/sql/pg_ivm.sql
|
||||
+++ b/sql/pg_ivm.sql
|
||||
@@ -1,4 +1,3 @@
|
||||
-CREATE EXTENSION pg_ivm;
|
||||
GRANT ALL ON SCHEMA public TO public;
|
||||
|
||||
-- create a table to use as a basis for views and materialized views in various combinations
|
||||
6
docker-compose/ext-src/pg_ivm-src/test-upgrade.sh
Executable file
6
docker-compose/ext-src/pg_ivm-src/test-upgrade.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_ivm create_immv refresh_immv
|
||||
@@ -0,0 +1,25 @@
|
||||
diff --git a/expected/roaringbitmap.out b/expected/roaringbitmap.out
|
||||
index de70531..a5f7c15 100644
|
||||
--- a/expected/roaringbitmap.out
|
||||
+++ b/expected/roaringbitmap.out
|
||||
@@ -1,7 +1,6 @@
|
||||
--
|
||||
-- Test roaringbitmap extension
|
||||
--
|
||||
-CREATE EXTENSION if not exists roaringbitmap;
|
||||
-- Test input and output
|
||||
set roaringbitmap.output_format='array';
|
||||
set extra_float_digits = 0;
|
||||
diff --git a/sql/roaringbitmap.sql b/sql/roaringbitmap.sql
|
||||
index a0e9c74..84bc966 100644
|
||||
--- a/sql/roaringbitmap.sql
|
||||
+++ b/sql/roaringbitmap.sql
|
||||
@@ -2,8 +2,6 @@
|
||||
-- Test roaringbitmap extension
|
||||
--
|
||||
|
||||
-CREATE EXTENSION if not exists roaringbitmap;
|
||||
-
|
||||
-- Test input and output
|
||||
|
||||
set roaringbitmap.output_format='array';
|
||||
6
docker-compose/ext-src/pg_roaringbitmap-src/test-upgrade.sh
Executable file
6
docker-compose/ext-src/pg_roaringbitmap-src/test-upgrade.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression roaringbitmap
|
||||
24
docker-compose/ext-src/pg_semver-src/test-upgrade.patch
Normal file
24
docker-compose/ext-src/pg_semver-src/test-upgrade.patch
Normal file
@@ -0,0 +1,24 @@
|
||||
diff --git a/test/sql/base.sql b/test/sql/base.sql
|
||||
index af599d8..2eed91b 100644
|
||||
--- a/test/sql/base.sql
|
||||
+++ b/test/sql/base.sql
|
||||
@@ -2,7 +2,6 @@
|
||||
BEGIN;
|
||||
|
||||
\i test/pgtap-core.sql
|
||||
-\i sql/semver.sql
|
||||
|
||||
SELECT plan(334);
|
||||
--SELECT * FROM no_plan();
|
||||
diff --git a/test/sql/corpus.sql b/test/sql/corpus.sql
|
||||
index 1f5f637..a519905 100644
|
||||
--- a/test/sql/corpus.sql
|
||||
+++ b/test/sql/corpus.sql
|
||||
@@ -4,7 +4,6 @@ BEGIN;
|
||||
-- Test the SemVer corpus from https://regex101.com/r/Ly7O1x/3/.
|
||||
|
||||
\i test/pgtap-core.sql
|
||||
-\i sql/semver.sql
|
||||
|
||||
SELECT plan(71);
|
||||
--SELECT * FROM no_plan();
|
||||
6
docker-compose/ext-src/pg_semver-src/test-upgrade.sh
Executable file
6
docker-compose/ext-src/pg_semver-src/test-upgrade.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --dbname=contrib_regression base corpus
|
||||
5
docker-compose/ext-src/pg_uuidv7-src/test-upgrade.sh
Executable file
5
docker-compose/ext-src/pg_uuidv7-src/test-upgrade.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --dbname=contrib_regression 002_uuid_generate_v7 003_uuid_v7_to_timestamptz 004_uuid_timestamptz_to_v7 005_uuid_v7_to_timestamp 006_uuid_timestamp_to_v7
|
||||
5
docker-compose/ext-src/pgvector-src/test-upgrade.sh
Executable file
5
docker-compose/ext-src/pgvector-src/test-upgrade.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --use-existing --dbname=contrib_regression bit btree cast copy halfvec hnsw_bit hnsw_halfvec hnsw_sparsevec hnsw_vector ivfflat_bit ivfflat_halfvec ivfflat_vector sparsevec vector_type
|
||||
5
docker-compose/ext-src/plv8-src/test-upgrade.sh
Executable file
5
docker-compose/ext-src/plv8-src/test-upgrade.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression plv8 plv8-errors scalar_args inline json startup_pre startup varparam json_conv jsonb_conv window guc es6 arraybuffer composites currentresource startup_perms bytea find_function_perms memory_limits reset show array_spread regression dialect bigint procedure
|
||||
5
docker-compose/ext-src/postgresql-unit-src/test-upgrade.sh
Executable file
5
docker-compose/ext-src/postgresql-unit-src/test-upgrade.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression extension tables unit binary unicode prefix units time temperature functions language_functions round derived compare aggregate iec custom crosstab convert
|
||||
5
docker-compose/ext-src/prefix-src/test-upgrade.sh
Executable file
5
docker-compose/ext-src/prefix-src/test-upgrade.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression prefix falcon explain queries
|
||||
19
docker-compose/ext-src/rum-src/test-upgrade.patch
Normal file
19
docker-compose/ext-src/rum-src/test-upgrade.patch
Normal file
@@ -0,0 +1,19 @@
|
||||
diff --git a/expected/rum.out b/expected/rum.out
|
||||
index 5966d19..8860b79 100644
|
||||
--- a/expected/rum.out
|
||||
+++ b/expected/rum.out
|
||||
@@ -1,4 +1,3 @@
|
||||
-CREATE EXTENSION rum;
|
||||
CREATE TABLE test_rum( t text, a tsvector );
|
||||
CREATE TRIGGER tsvectorupdate
|
||||
BEFORE UPDATE OR INSERT ON test_rum
|
||||
diff --git a/sql/rum.sql b/sql/rum.sql
|
||||
index 8414bb9..898e6ab 100644
|
||||
--- a/sql/rum.sql
|
||||
+++ b/sql/rum.sql
|
||||
@@ -1,5 +1,3 @@
|
||||
-CREATE EXTENSION rum;
|
||||
-
|
||||
CREATE TABLE test_rum( t text, a tsvector );
|
||||
|
||||
CREATE TRIGGER tsvectorupdate
|
||||
6
docker-compose/ext-src/rum-src/test-upgrade.sh
Executable file
6
docker-compose/ext-src/rum-src/test-upgrade.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
patch -p1 <test-upgrade.patch
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression rum rum_validate rum_hash ruminv timestamp orderby orderby_hash altorder altorder_hash limits int2 int4 int8 float4 float8 money oid time timetz date interval macaddr inet cidr text varchar char bytea bit varbit numeric rum_weight expr array
|
||||
@@ -4,14 +4,17 @@ set -x
|
||||
cd /ext-src || exit 2
|
||||
FAILED=
|
||||
LIST=$( (echo -e "${SKIP//","/"\n"}"; ls -d -- *-src) | sort | uniq -u)
|
||||
for d in ${LIST}
|
||||
do
|
||||
[ -d "${d}" ] || continue
|
||||
if ! psql -w -c "select 1" >/dev/null; then
|
||||
FAILED="${d} ${FAILED}"
|
||||
break
|
||||
fi
|
||||
for d in ${LIST}; do
|
||||
[ -d "${d}" ] || continue
|
||||
if ! psql -w -c "select 1" >/dev/null; then
|
||||
FAILED="${d} ${FAILED}"
|
||||
break
|
||||
fi
|
||||
if [ -f "${d}/neon-test.sh" ]; then
|
||||
"${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
|
||||
else
|
||||
USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
|
||||
fi
|
||||
done
|
||||
[ -z "${FAILED}" ] && exit 0
|
||||
echo "${FAILED}"
|
||||
|
||||
93
docker-compose/test_extensions_upgrade.sh
Executable file
93
docker-compose/test_extensions_upgrade.sh
Executable file
@@ -0,0 +1,93 @@
|
||||
#!/bin/bash
|
||||
set -eux -o pipefail
|
||||
cd "$(dirname "${0}")"
|
||||
# Takes a variable name as argument. The result is stored in that variable.
|
||||
generate_id() {
|
||||
local -n resvar=$1
|
||||
printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
|
||||
}
|
||||
if [ -z ${OLDTAG+x} ] || [ -z ${NEWTAG+x} ] || [ -z "${OLDTAG}" ] || [ -z "${NEWTAG}" ]; then
|
||||
echo OLDTAG and NEWTAG must be defined
|
||||
exit 1
|
||||
fi
|
||||
export PG_VERSION=${PG_VERSION:-16}
|
||||
function wait_for_ready {
|
||||
TIME=0
|
||||
while ! docker compose logs compute_is_ready | grep -q "accepting connections" && [ ${TIME} -le 300 ] ; do
|
||||
((TIME += 1 ))
|
||||
sleep 1
|
||||
done
|
||||
if [ ${TIME} -gt 300 ]; then
|
||||
echo Time is out.
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
function create_extensions() {
|
||||
for ext in ${1}; do
|
||||
docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c "CREATE EXTENSION IF NOT EXISTS ${ext}"
|
||||
done
|
||||
}
|
||||
EXTENSIONS='[
|
||||
{"extname": "plv8", "extdir": "plv8-src"},
|
||||
{"extname": "vector", "extdir": "pgvector-src"},
|
||||
{"extname": "unit", "extdir": "postgresql-unit-src"},
|
||||
{"extname": "hypopg", "extdir": "hypopg-src"},
|
||||
{"extname": "rum", "extdir": "rum-src"},
|
||||
{"extname": "ip4r", "extdir": "ip4r-src"},
|
||||
{"extname": "prefix", "extdir": "prefix-src"},
|
||||
{"extname": "hll", "extdir": "hll-src"},
|
||||
{"extname": "pg_cron", "extdir": "pg_cron-src"},
|
||||
{"extname": "pg_uuidv7", "extdir": "pg_uuidv7-src"},
|
||||
{"extname": "roaringbitmap", "extdir": "pg_roaringbitmap-src"},
|
||||
{"extname": "semver", "extdir": "pg_semver-src"},
|
||||
{"extname": "pg_ivm", "extdir": "pg_ivm-src"}
|
||||
]'
|
||||
EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
|
||||
TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d
|
||||
wait_for_ready
|
||||
docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
|
||||
docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
|
||||
create_extensions "${EXTNAMES}"
|
||||
query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')"
|
||||
new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
|
||||
docker compose --profile test-extensions down
|
||||
TAG=${OLDTAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
|
||||
wait_for_ready
|
||||
docker compose cp ext-src neon-test-extensions:/
|
||||
docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
|
||||
docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
|
||||
create_extensions "${EXTNAMES}"
|
||||
query="select pge.extname from pg_extension pge join (select key as extname, value as extversion from json_each_text('${new_vers}')) x on pge.extname=x.extname and pge.extversion <> x.extversion"
|
||||
exts=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
|
||||
if [ -z "${exts}" ]; then
|
||||
echo "No extensions were upgraded"
|
||||
else
|
||||
tenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.tenant_id")
|
||||
timeline_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
|
||||
for ext in ${exts}; do
|
||||
echo Testing ${ext}...
|
||||
EXTDIR=$(echo ${EXTENSIONS} | jq -r '.[] | select(.extname=="'${ext}'") | .extdir')
|
||||
generate_id new_timeline_id
|
||||
PARAMS=(
|
||||
-sbf
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"new_timeline_id\": \"${new_timeline_id}\", \"pg_version\": ${PG_VERSION}, \"ancestor_timeline_id\": \"${timeline_id}\"}"
|
||||
"http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} TAG=${OLDTAG} docker compose down compute compute_is_ready
|
||||
COMPUTE_TAG=${NEWTAG} TAG=${OLDTAG} TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} docker compose up --quiet-pull -d --build compute compute_is_ready
|
||||
wait_for_ready
|
||||
TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
|
||||
if [ ${TID} != ${new_timeline_id} ]; then
|
||||
echo Timeline mismatch
|
||||
exit 1
|
||||
fi
|
||||
docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
|
||||
docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh
|
||||
docker compose exec neon-test-extensions psql -d contrib_regression -c "alter extension ${ext} update"
|
||||
docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
|
||||
done
|
||||
fi
|
||||
@@ -15,11 +15,6 @@ pub struct GenericAPIError {
|
||||
pub error: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct InfoResponse {
|
||||
pub num_cpus: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ExtensionInstallResponse {
|
||||
pub extension: PgIdent,
|
||||
|
||||
@@ -256,16 +256,24 @@ pub struct TenantConfigToml {
|
||||
pub compaction_period: Duration,
|
||||
/// Level0 delta layer threshold for compaction.
|
||||
pub compaction_threshold: usize,
|
||||
/// Controls the amount of L0 included in a single compaction iteration.
|
||||
/// The unit is `checkpoint_distance`, i.e., a size.
|
||||
/// We add L0s to the set of layers to compact until their cumulative
|
||||
/// size exceeds `compaction_upper_limit * checkpoint_distance`.
|
||||
pub compaction_upper_limit: usize,
|
||||
pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
|
||||
/// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
|
||||
/// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
|
||||
/// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
|
||||
/// blowing up. Should be >compaction_threshold. If None, defaults to 2 * compaction_threshold.
|
||||
/// 0 to disable.
|
||||
/// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default.
|
||||
pub l0_flush_delay_threshold: Option<usize>,
|
||||
/// Level0 delta layer threshold at which to stall layer flushes. 0 to disable. If None,
|
||||
/// defaults to 4 * compaction_threshold. Must be >compaction_threshold to avoid deadlock.
|
||||
/// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
|
||||
/// to avoid deadlock. 0 to disable. Disabled by default.
|
||||
pub l0_flush_stall_threshold: Option<usize>,
|
||||
/// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next
|
||||
/// layer. This is a temporary backpressure mechanism which should be removed once
|
||||
/// l0_flush_{delay,stall}_threshold is fully enabled.
|
||||
pub l0_flush_wait_upload: bool,
|
||||
// Determines how much history is retained, to allow
|
||||
// branching and read replicas at an older point in time.
|
||||
// The unit is #of bytes of WAL.
|
||||
@@ -520,9 +528,17 @@ pub mod tenant_conf_defaults {
|
||||
|
||||
pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
|
||||
pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
|
||||
|
||||
// This value needs to be tuned to avoid OOM. We have 3/4 of the total CPU threads to do background works, that's 16*3/4=9 on
|
||||
// most of our pageservers. Compaction ~50 layers requires about 2GB memory (could be reduced later by optimizing L0 hole
|
||||
// calculation to avoid loading all keys into the memory). So with this config, we can get a maximum peak compaction usage of 18GB.
|
||||
pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 50;
|
||||
|
||||
pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
|
||||
crate::models::CompactionAlgorithm::Legacy;
|
||||
|
||||
pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = true;
|
||||
|
||||
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
|
||||
|
||||
// Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
|
||||
@@ -558,11 +574,13 @@ impl Default for TenantConfigToml {
|
||||
compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
|
||||
.expect("cannot parse default compaction period"),
|
||||
compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
|
||||
compaction_upper_limit: DEFAULT_COMPACTION_UPPER_LIMIT,
|
||||
compaction_algorithm: crate::models::CompactionAlgorithmSettings {
|
||||
kind: DEFAULT_COMPACTION_ALGORITHM,
|
||||
},
|
||||
l0_flush_delay_threshold: None,
|
||||
l0_flush_stall_threshold: None,
|
||||
l0_flush_wait_upload: DEFAULT_L0_FLUSH_WAIT_UPLOAD,
|
||||
gc_horizon: DEFAULT_GC_HORIZON,
|
||||
gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
|
||||
.expect("cannot parse default gc period"),
|
||||
|
||||
@@ -458,6 +458,8 @@ pub struct TenantConfigPatch {
|
||||
pub compaction_period: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_threshold: FieldPatch<usize>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_upper_limit: FieldPatch<usize>,
|
||||
// defer parsing compaction_algorithm, like eviction_policy
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_algorithm: FieldPatch<CompactionAlgorithmSettings>,
|
||||
@@ -466,6 +468,8 @@ pub struct TenantConfigPatch {
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub l0_flush_stall_threshold: FieldPatch<usize>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub l0_flush_wait_upload: FieldPatch<bool>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_horizon: FieldPatch<u64>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_period: FieldPatch<String>,
|
||||
@@ -520,10 +524,12 @@ pub struct TenantConfig {
|
||||
pub compaction_target_size: Option<u64>,
|
||||
pub compaction_period: Option<String>,
|
||||
pub compaction_threshold: Option<usize>,
|
||||
pub compaction_upper_limit: Option<usize>,
|
||||
// defer parsing compaction_algorithm, like eviction_policy
|
||||
pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
|
||||
pub l0_flush_delay_threshold: Option<usize>,
|
||||
pub l0_flush_stall_threshold: Option<usize>,
|
||||
pub l0_flush_wait_upload: Option<bool>,
|
||||
pub gc_horizon: Option<u64>,
|
||||
pub gc_period: Option<String>,
|
||||
pub image_creation_threshold: Option<usize>,
|
||||
@@ -556,9 +562,11 @@ impl TenantConfig {
|
||||
mut compaction_target_size,
|
||||
mut compaction_period,
|
||||
mut compaction_threshold,
|
||||
mut compaction_upper_limit,
|
||||
mut compaction_algorithm,
|
||||
mut l0_flush_delay_threshold,
|
||||
mut l0_flush_stall_threshold,
|
||||
mut l0_flush_wait_upload,
|
||||
mut gc_horizon,
|
||||
mut gc_period,
|
||||
mut image_creation_threshold,
|
||||
@@ -590,6 +598,9 @@ impl TenantConfig {
|
||||
.apply(&mut compaction_target_size);
|
||||
patch.compaction_period.apply(&mut compaction_period);
|
||||
patch.compaction_threshold.apply(&mut compaction_threshold);
|
||||
patch
|
||||
.compaction_upper_limit
|
||||
.apply(&mut compaction_upper_limit);
|
||||
patch.compaction_algorithm.apply(&mut compaction_algorithm);
|
||||
patch
|
||||
.l0_flush_delay_threshold
|
||||
@@ -597,6 +608,7 @@ impl TenantConfig {
|
||||
patch
|
||||
.l0_flush_stall_threshold
|
||||
.apply(&mut l0_flush_stall_threshold);
|
||||
patch.l0_flush_wait_upload.apply(&mut l0_flush_wait_upload);
|
||||
patch.gc_horizon.apply(&mut gc_horizon);
|
||||
patch.gc_period.apply(&mut gc_period);
|
||||
patch
|
||||
@@ -648,9 +660,11 @@ impl TenantConfig {
|
||||
compaction_target_size,
|
||||
compaction_period,
|
||||
compaction_threshold,
|
||||
compaction_upper_limit,
|
||||
compaction_algorithm,
|
||||
l0_flush_delay_threshold,
|
||||
l0_flush_stall_threshold,
|
||||
l0_flush_wait_upload,
|
||||
gc_horizon,
|
||||
gc_period,
|
||||
image_creation_threshold,
|
||||
@@ -1015,6 +1029,13 @@ pub struct TenantConfigPatchRequest {
|
||||
pub config: TenantConfigPatch, // as we have a flattened field, we should reject all unknown fields in it
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct TenantWaitLsnRequest {
|
||||
#[serde(flatten)]
|
||||
pub timelines: HashMap<TimelineId, Lsn>,
|
||||
pub timeout: Duration,
|
||||
}
|
||||
|
||||
/// See [`TenantState::attachment_status`] and the OpenAPI docs for context.
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
#[serde(tag = "slug", content = "data", rename_all = "snake_case")]
|
||||
|
||||
@@ -182,6 +182,13 @@ pub struct CancelKeyData {
|
||||
pub cancel_key: i32,
|
||||
}
|
||||
|
||||
pub fn id_to_cancel_key(id: u64) -> CancelKeyData {
|
||||
CancelKeyData {
|
||||
backend_pid: (id >> 32) as i32,
|
||||
cancel_key: (id & 0xffffffff) as i32,
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for CancelKeyData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let hi = (self.backend_pid as u64) << 32;
|
||||
|
||||
@@ -19,3 +19,4 @@ postgres-protocol2 = { path = "../postgres-protocol2" }
|
||||
postgres-types2 = { path = "../postgres-types2" }
|
||||
tokio = { workspace = true, features = ["io-util", "time", "net"] }
|
||||
tokio-util = { workspace = true, features = ["codec"] }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
@@ -3,12 +3,13 @@ use crate::tls::TlsConnect;
|
||||
|
||||
use crate::{cancel_query, client::SocketConfig, tls::MakeTlsConnect};
|
||||
use crate::{cancel_query_raw, Error};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio::net::TcpStream;
|
||||
|
||||
/// The capability to request cancellation of in-progress queries on a
|
||||
/// connection.
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct CancelToken {
|
||||
pub socket_config: Option<SocketConfig>,
|
||||
pub ssl_mode: SslMode,
|
||||
|
||||
@@ -18,6 +18,7 @@ use fallible_iterator::FallibleIterator;
|
||||
use futures_util::{future, ready, TryStreamExt};
|
||||
use parking_lot::Mutex;
|
||||
use postgres_protocol2::message::{backend::Message, frontend};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
@@ -137,7 +138,7 @@ impl InnerClient {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct SocketConfig {
|
||||
pub host: Host,
|
||||
pub port: u16,
|
||||
|
||||
@@ -7,6 +7,7 @@ use crate::tls::MakeTlsConnect;
|
||||
use crate::tls::TlsConnect;
|
||||
use crate::{Client, Connection, Error};
|
||||
use postgres_protocol2::message::frontend::StartupMessageParams;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
use std::str;
|
||||
use std::time::Duration;
|
||||
@@ -16,7 +17,7 @@ pub use postgres_protocol2::authentication::sasl::ScramKeys;
|
||||
use tokio::net::TcpStream;
|
||||
|
||||
/// TLS configuration.
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[non_exhaustive]
|
||||
pub enum SslMode {
|
||||
/// Do not use TLS.
|
||||
@@ -50,7 +51,7 @@ pub enum ReplicationMode {
|
||||
}
|
||||
|
||||
/// A host specification.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum Host {
|
||||
/// A TCP hostname.
|
||||
Tcp(String),
|
||||
|
||||
@@ -377,7 +377,8 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
|
||||
let next_item = next_item?;
|
||||
|
||||
if timeout_try_cnt >= 2 {
|
||||
// Log a warning if we saw two timeouts in a row before a successful request
|
||||
if timeout_try_cnt > 2 {
|
||||
tracing::warn!("Azure Blob Storage list timed out and succeeded after {} tries", timeout_try_cnt);
|
||||
}
|
||||
timeout_try_cnt = 1;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
//! (notifying it of upscale).
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use axum::extract::ws::{Message, WebSocket};
|
||||
use axum::extract::ws::{Message, Utf8Bytes, WebSocket};
|
||||
use futures::{
|
||||
stream::{SplitSink, SplitStream},
|
||||
SinkExt, StreamExt,
|
||||
@@ -82,21 +82,21 @@ impl Dispatcher {
|
||||
|
||||
let highest_shared_version = match monitor_range.highest_shared_version(&agent_range) {
|
||||
Ok(version) => {
|
||||
sink.send(Message::Text(
|
||||
sink.send(Message::Text(Utf8Bytes::from(
|
||||
serde_json::to_string(&ProtocolResponse::Version(version)).unwrap(),
|
||||
))
|
||||
)))
|
||||
.await
|
||||
.context("failed to notify agent of negotiated protocol version")?;
|
||||
version
|
||||
}
|
||||
Err(e) => {
|
||||
sink.send(Message::Text(
|
||||
sink.send(Message::Text(Utf8Bytes::from(
|
||||
serde_json::to_string(&ProtocolResponse::Error(format!(
|
||||
"Received protocol version range {} which does not overlap with {}",
|
||||
agent_range, monitor_range
|
||||
)))
|
||||
.unwrap(),
|
||||
))
|
||||
)))
|
||||
.await
|
||||
.context("failed to notify agent of no overlap between protocol version ranges")?;
|
||||
Err(e).context("error determining suitable protocol version range")?
|
||||
@@ -126,7 +126,7 @@ impl Dispatcher {
|
||||
|
||||
let json = serde_json::to_string(&message).context("failed to serialize message")?;
|
||||
self.sink
|
||||
.send(Message::Text(json))
|
||||
.send(Message::Text(Utf8Bytes::from(json)))
|
||||
.await
|
||||
.context("stream error sending message")
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ itertools.workspace = true
|
||||
md5.workspace = true
|
||||
nix.workspace = true
|
||||
# hack to get the number of worker threads tokio uses
|
||||
num_cpus = { version = "1.15" }
|
||||
num_cpus.workspace = true
|
||||
num-traits.workspace = true
|
||||
once_cell.workspace = true
|
||||
pin-project-lite.workspace = true
|
||||
|
||||
@@ -763,4 +763,19 @@ impl Client {
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn wait_lsn(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
request: TenantWaitLsnRequest,
|
||||
) -> Result<StatusCode> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{tenant_shard_id}/wait_lsn",
|
||||
self.mgmt_api_endpoint,
|
||||
);
|
||||
|
||||
self.request_noerror(Method::POST, uri, request)
|
||||
.await
|
||||
.map(|resp| resp.status())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,9 +160,12 @@ pub fn draw_history<W: std::io::Write>(history: &[LayerTraceEvent], mut output:
|
||||
|
||||
// Fill in and thicken rectangle if it's an
|
||||
// image layer so that we can see it.
|
||||
let mut style = Style::default();
|
||||
style.fill = Fill::Color(rgb(0x80, 0x80, 0x80));
|
||||
style.stroke = Stroke::Color(rgb(0, 0, 0), 0.5);
|
||||
let mut style = Style {
|
||||
fill: Fill::Color(rgb(0x80, 0x80, 0x80)),
|
||||
stroke: Stroke::Color(rgb(0, 0, 0), 0.5),
|
||||
opacity: 1.0,
|
||||
stroke_opacity: 1.0,
|
||||
};
|
||||
|
||||
let y_start = lsn_max - lsn_start;
|
||||
let y_end = lsn_max - lsn_end;
|
||||
@@ -214,10 +217,6 @@ pub fn draw_history<W: std::io::Write>(history: &[LayerTraceEvent], mut output:
|
||||
files_seen.insert(f);
|
||||
}
|
||||
|
||||
let mut record_style = Style::default();
|
||||
record_style.fill = Fill::Color(rgb(0x80, 0x80, 0x80));
|
||||
record_style.stroke = Stroke::None;
|
||||
|
||||
writeln!(svg, "{}", EndSvg)?;
|
||||
|
||||
let mut layer_events_str = String::new();
|
||||
|
||||
@@ -984,6 +984,8 @@ components:
|
||||
type: string
|
||||
compaction_threshold:
|
||||
type: string
|
||||
compaction_upper_limit:
|
||||
type: string
|
||||
image_creation_threshold:
|
||||
type: integer
|
||||
walreceiver_connect_timeout:
|
||||
|
||||
@@ -10,6 +10,7 @@ use std::time::Duration;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use enumset::EnumSet;
|
||||
use futures::future::join_all;
|
||||
use futures::StreamExt;
|
||||
use futures::TryFutureExt;
|
||||
use humantime::format_rfc3339;
|
||||
@@ -40,6 +41,7 @@ use pageserver_api::models::TenantShardSplitRequest;
|
||||
use pageserver_api::models::TenantShardSplitResponse;
|
||||
use pageserver_api::models::TenantSorting;
|
||||
use pageserver_api::models::TenantState;
|
||||
use pageserver_api::models::TenantWaitLsnRequest;
|
||||
use pageserver_api::models::TimelineArchivalConfigRequest;
|
||||
use pageserver_api::models::TimelineCreateRequestMode;
|
||||
use pageserver_api::models::TimelineCreateRequestModeImportPgdata;
|
||||
@@ -95,6 +97,8 @@ use crate::tenant::timeline::CompactOptions;
|
||||
use crate::tenant::timeline::CompactRequest;
|
||||
use crate::tenant::timeline::CompactionError;
|
||||
use crate::tenant::timeline::Timeline;
|
||||
use crate::tenant::timeline::WaitLsnTimeout;
|
||||
use crate::tenant::timeline::WaitLsnWaiter;
|
||||
use crate::tenant::GetTimelineError;
|
||||
use crate::tenant::OffloadedTimeline;
|
||||
use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError};
|
||||
@@ -2790,6 +2794,63 @@ async fn secondary_download_handler(
|
||||
json_response(status, progress)
|
||||
}
|
||||
|
||||
async fn wait_lsn_handler(
|
||||
mut request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let wait_lsn_request: TenantWaitLsnRequest = json_request(&mut request).await?;
|
||||
|
||||
let state = get_state(&request);
|
||||
let tenant = state
|
||||
.tenant_manager
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
|
||||
let mut wait_futures = Vec::default();
|
||||
for timeline in tenant.list_timelines() {
|
||||
let Some(lsn) = wait_lsn_request.timelines.get(&timeline.timeline_id) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let fut = {
|
||||
let timeline = timeline.clone();
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Error);
|
||||
async move {
|
||||
timeline
|
||||
.wait_lsn(
|
||||
*lsn,
|
||||
WaitLsnWaiter::HttpEndpoint,
|
||||
WaitLsnTimeout::Custom(wait_lsn_request.timeout),
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
};
|
||||
wait_futures.push(fut);
|
||||
}
|
||||
|
||||
if wait_futures.is_empty() {
|
||||
return json_response(StatusCode::NOT_FOUND, ());
|
||||
}
|
||||
|
||||
let all_done = tokio::select! {
|
||||
results = join_all(wait_futures) => {
|
||||
results.iter().all(|res| res.is_ok())
|
||||
},
|
||||
_ = cancel.cancelled() => {
|
||||
return Err(ApiError::Cancelled);
|
||||
}
|
||||
};
|
||||
|
||||
let status = if all_done {
|
||||
StatusCode::OK
|
||||
} else {
|
||||
StatusCode::ACCEPTED
|
||||
};
|
||||
|
||||
json_response(status, ())
|
||||
}
|
||||
|
||||
async fn secondary_status_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
@@ -3577,6 +3638,9 @@ pub fn make_router(
|
||||
.post("/v1/tenant/:tenant_shard_id/secondary/download", |r| {
|
||||
api_handler(r, secondary_download_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_shard_id/wait_lsn", |r| {
|
||||
api_handler(r, wait_lsn_handler)
|
||||
})
|
||||
.put("/v1/tenant/:tenant_shard_id/break", |r| {
|
||||
testing_api_handler("set tenant state to broken", r, handle_tenant_break)
|
||||
})
|
||||
|
||||
@@ -3,7 +3,7 @@ use metrics::{
|
||||
register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
|
||||
register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
|
||||
register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
|
||||
Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
|
||||
Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
|
||||
IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -398,6 +398,15 @@ pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
|
||||
register_gauge_vec!(
|
||||
"pageserver_flush_wait_upload_seconds",
|
||||
"Time spent waiting for preceding uploads during layer flush",
|
||||
&["tenant_id", "shard_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
register_int_gauge_vec!(
|
||||
"pageserver_last_record_lsn",
|
||||
@@ -2569,6 +2578,7 @@ pub(crate) struct TimelineMetrics {
|
||||
timeline_id: String,
|
||||
pub flush_time_histo: StorageTimeMetrics,
|
||||
pub flush_delay_histo: StorageTimeMetrics,
|
||||
pub flush_wait_upload_time_gauge: Gauge,
|
||||
pub compact_time_histo: StorageTimeMetrics,
|
||||
pub create_images_time_histo: StorageTimeMetrics,
|
||||
pub logical_size_histo: StorageTimeMetrics,
|
||||
@@ -2620,6 +2630,9 @@ impl TimelineMetrics {
|
||||
&shard_id,
|
||||
&timeline_id,
|
||||
);
|
||||
let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap();
|
||||
let compact_time_histo = StorageTimeMetrics::new(
|
||||
StorageTimeOperation::Compact,
|
||||
&tenant_id,
|
||||
@@ -2766,6 +2779,7 @@ impl TimelineMetrics {
|
||||
timeline_id,
|
||||
flush_time_histo,
|
||||
flush_delay_histo,
|
||||
flush_wait_upload_time_gauge,
|
||||
compact_time_histo,
|
||||
create_images_time_histo,
|
||||
logical_size_histo,
|
||||
@@ -2815,6 +2829,14 @@ impl TimelineMetrics {
|
||||
self.resident_physical_size_gauge.get()
|
||||
}
|
||||
|
||||
pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) {
|
||||
self.flush_wait_upload_time_gauge.add(duration);
|
||||
crate::metrics::FLUSH_WAIT_UPLOAD_TIME
|
||||
.get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id])
|
||||
.unwrap()
|
||||
.add(duration);
|
||||
}
|
||||
|
||||
pub(crate) fn shutdown(&self) {
|
||||
let was_shutdown = self
|
||||
.shutdown
|
||||
@@ -2832,6 +2854,7 @@ impl TimelineMetrics {
|
||||
let shard_id = &self.shard_id;
|
||||
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
{
|
||||
RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
|
||||
|
||||
@@ -1708,6 +1708,7 @@ impl PageServerHandler {
|
||||
.wait_lsn(
|
||||
not_modified_since,
|
||||
crate::tenant::timeline::WaitLsnWaiter::PageService,
|
||||
timeline::WaitLsnTimeout::Default,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
@@ -2044,6 +2045,7 @@ impl PageServerHandler {
|
||||
.wait_lsn(
|
||||
lsn,
|
||||
crate::tenant::timeline::WaitLsnWaiter::PageService,
|
||||
crate::tenant::timeline::WaitLsnTimeout::Default,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -37,6 +37,8 @@ use remote_timeline_client::manifest::{
|
||||
OffloadedTimelineManifest, TenantManifest, LATEST_TENANT_MANIFEST_VERSION,
|
||||
};
|
||||
use remote_timeline_client::UploadQueueNotReadyError;
|
||||
use remote_timeline_client::FAILED_REMOTE_OP_RETRIES;
|
||||
use remote_timeline_client::FAILED_UPLOAD_WARN_THRESHOLD;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::future::Future;
|
||||
@@ -2558,7 +2560,12 @@ impl Tenant {
|
||||
// sizes etc. and that would get confused if the previous page versions
|
||||
// are not in the repository yet.
|
||||
ancestor_timeline
|
||||
.wait_lsn(*lsn, timeline::WaitLsnWaiter::Tenant, ctx)
|
||||
.wait_lsn(
|
||||
*lsn,
|
||||
timeline::WaitLsnWaiter::Tenant,
|
||||
timeline::WaitLsnTimeout::Default,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
e @ (WaitLsnError::Timeout(_) | WaitLsnError::BadState { .. }) => {
|
||||
@@ -3809,6 +3816,13 @@ impl Tenant {
|
||||
.unwrap_or(self.conf.default_tenant_conf.compaction_threshold)
|
||||
}
|
||||
|
||||
pub fn get_compaction_upper_limit(&self) -> usize {
|
||||
let tenant_conf = self.tenant_conf.load().tenant_conf.clone();
|
||||
tenant_conf
|
||||
.compaction_upper_limit
|
||||
.unwrap_or(self.conf.default_tenant_conf.compaction_upper_limit)
|
||||
}
|
||||
|
||||
pub fn get_gc_horizon(&self) -> u64 {
|
||||
let tenant_conf = self.tenant_conf.load().tenant_conf.clone();
|
||||
tenant_conf
|
||||
@@ -5308,27 +5322,37 @@ impl Tenant {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
upload_tenant_manifest(
|
||||
&self.remote_storage,
|
||||
&self.tenant_shard_id,
|
||||
self.generation,
|
||||
&manifest,
|
||||
// Remote storage does no retries internally, so wrap it
|
||||
match backoff::retry(
|
||||
|| async {
|
||||
upload_tenant_manifest(
|
||||
&self.remote_storage,
|
||||
&self.tenant_shard_id,
|
||||
self.generation,
|
||||
&manifest,
|
||||
&self.cancel,
|
||||
)
|
||||
.await
|
||||
},
|
||||
|_e| self.cancel.is_cancelled(),
|
||||
FAILED_UPLOAD_WARN_THRESHOLD,
|
||||
FAILED_REMOTE_OP_RETRIES,
|
||||
"uploading tenant manifest",
|
||||
&self.cancel,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if self.cancel.is_cancelled() {
|
||||
TenantManifestError::Cancelled
|
||||
} else {
|
||||
TenantManifestError::RemoteStorage(e)
|
||||
{
|
||||
None => Err(TenantManifestError::Cancelled),
|
||||
Some(Err(_)) if self.cancel.is_cancelled() => Err(TenantManifestError::Cancelled),
|
||||
Some(Err(e)) => Err(TenantManifestError::RemoteStorage(e)),
|
||||
Some(Ok(_)) => {
|
||||
// Store the successfully uploaded manifest, so that future callers can avoid
|
||||
// re-uploading the same thing.
|
||||
*guard = Some(manifest);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
})?;
|
||||
|
||||
// Store the successfully uploaded manifest, so that future callers can avoid
|
||||
// re-uploading the same thing.
|
||||
*guard = Some(manifest);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5452,9 +5476,11 @@ pub(crate) mod harness {
|
||||
compaction_target_size: Some(tenant_conf.compaction_target_size),
|
||||
compaction_period: Some(tenant_conf.compaction_period),
|
||||
compaction_threshold: Some(tenant_conf.compaction_threshold),
|
||||
compaction_upper_limit: Some(tenant_conf.compaction_upper_limit),
|
||||
compaction_algorithm: Some(tenant_conf.compaction_algorithm),
|
||||
l0_flush_delay_threshold: tenant_conf.l0_flush_delay_threshold,
|
||||
l0_flush_stall_threshold: tenant_conf.l0_flush_stall_threshold,
|
||||
l0_flush_wait_upload: Some(tenant_conf.l0_flush_wait_upload),
|
||||
gc_horizon: Some(tenant_conf.gc_horizon),
|
||||
gc_period: Some(tenant_conf.gc_period),
|
||||
image_creation_threshold: Some(tenant_conf.image_creation_threshold),
|
||||
|
||||
@@ -277,6 +277,10 @@ pub struct TenantConfOpt {
|
||||
#[serde(default)]
|
||||
pub compaction_threshold: Option<usize>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub compaction_upper_limit: Option<usize>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
|
||||
@@ -289,6 +293,10 @@ pub struct TenantConfOpt {
|
||||
#[serde(default)]
|
||||
pub l0_flush_stall_threshold: Option<usize>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub l0_flush_wait_upload: Option<bool>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub gc_horizon: Option<u64>,
|
||||
@@ -397,6 +405,9 @@ impl TenantConfOpt {
|
||||
compaction_threshold: self
|
||||
.compaction_threshold
|
||||
.unwrap_or(global_conf.compaction_threshold),
|
||||
compaction_upper_limit: self
|
||||
.compaction_upper_limit
|
||||
.unwrap_or(global_conf.compaction_upper_limit),
|
||||
compaction_algorithm: self
|
||||
.compaction_algorithm
|
||||
.as_ref()
|
||||
@@ -408,6 +419,9 @@ impl TenantConfOpt {
|
||||
l0_flush_stall_threshold: self
|
||||
.l0_flush_stall_threshold
|
||||
.or(global_conf.l0_flush_stall_threshold),
|
||||
l0_flush_wait_upload: self
|
||||
.l0_flush_wait_upload
|
||||
.unwrap_or(global_conf.l0_flush_wait_upload),
|
||||
gc_horizon: self.gc_horizon.unwrap_or(global_conf.gc_horizon),
|
||||
gc_period: self.gc_period.unwrap_or(global_conf.gc_period),
|
||||
image_creation_threshold: self
|
||||
@@ -471,9 +485,11 @@ impl TenantConfOpt {
|
||||
mut compaction_target_size,
|
||||
mut compaction_period,
|
||||
mut compaction_threshold,
|
||||
mut compaction_upper_limit,
|
||||
mut compaction_algorithm,
|
||||
mut l0_flush_delay_threshold,
|
||||
mut l0_flush_stall_threshold,
|
||||
mut l0_flush_wait_upload,
|
||||
mut gc_horizon,
|
||||
mut gc_period,
|
||||
mut image_creation_threshold,
|
||||
@@ -511,6 +527,9 @@ impl TenantConfOpt {
|
||||
.map(|v| humantime::parse_duration(&v))?
|
||||
.apply(&mut compaction_period);
|
||||
patch.compaction_threshold.apply(&mut compaction_threshold);
|
||||
patch
|
||||
.compaction_upper_limit
|
||||
.apply(&mut compaction_upper_limit);
|
||||
patch.compaction_algorithm.apply(&mut compaction_algorithm);
|
||||
patch
|
||||
.l0_flush_delay_threshold
|
||||
@@ -518,6 +537,7 @@ impl TenantConfOpt {
|
||||
patch
|
||||
.l0_flush_stall_threshold
|
||||
.apply(&mut l0_flush_stall_threshold);
|
||||
patch.l0_flush_wait_upload.apply(&mut l0_flush_wait_upload);
|
||||
patch.gc_horizon.apply(&mut gc_horizon);
|
||||
patch
|
||||
.gc_period
|
||||
@@ -587,9 +607,11 @@ impl TenantConfOpt {
|
||||
compaction_target_size,
|
||||
compaction_period,
|
||||
compaction_threshold,
|
||||
compaction_upper_limit,
|
||||
compaction_algorithm,
|
||||
l0_flush_delay_threshold,
|
||||
l0_flush_stall_threshold,
|
||||
l0_flush_wait_upload,
|
||||
gc_horizon,
|
||||
gc_period,
|
||||
image_creation_threshold,
|
||||
@@ -647,8 +669,10 @@ impl From<TenantConfOpt> for models::TenantConfig {
|
||||
compaction_target_size: value.compaction_target_size,
|
||||
compaction_period: value.compaction_period.map(humantime),
|
||||
compaction_threshold: value.compaction_threshold,
|
||||
compaction_upper_limit: value.compaction_upper_limit,
|
||||
l0_flush_delay_threshold: value.l0_flush_delay_threshold,
|
||||
l0_flush_stall_threshold: value.l0_flush_stall_threshold,
|
||||
l0_flush_wait_upload: value.l0_flush_wait_upload,
|
||||
gc_horizon: value.gc_horizon,
|
||||
gc_period: value.gc_period.map(humantime),
|
||||
image_creation_threshold: value.image_creation_threshold,
|
||||
|
||||
@@ -1643,6 +1643,7 @@ impl TenantManager {
|
||||
.wait_lsn(
|
||||
*target_lsn,
|
||||
crate::tenant::timeline::WaitLsnWaiter::Tenant,
|
||||
crate::tenant::timeline::WaitLsnTimeout::Default,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -222,6 +222,10 @@ impl LayerFileMetadata {
|
||||
shard,
|
||||
}
|
||||
}
|
||||
/// Helper to get both generation and file size in a tuple
|
||||
pub fn generation_file_size(&self) -> (Generation, u64) {
|
||||
(self.generation, self.file_size)
|
||||
}
|
||||
}
|
||||
|
||||
/// Limited history of earlier ancestors.
|
||||
|
||||
@@ -559,6 +559,13 @@ impl JobGenerator<PendingDownload, RunningDownload, CompleteDownload, DownloadCo
|
||||
}
|
||||
}
|
||||
|
||||
enum LayerAction {
|
||||
Download,
|
||||
NoAction,
|
||||
Skip,
|
||||
Touch,
|
||||
}
|
||||
|
||||
/// This type is a convenience to group together the various functions involved in
|
||||
/// freshening a secondary tenant.
|
||||
struct TenantDownloader<'a> {
|
||||
@@ -1008,69 +1015,17 @@ impl<'a> TenantDownloader<'a> {
|
||||
return (Err(UpdateError::Restart), touched);
|
||||
}
|
||||
|
||||
// Existing on-disk layers: just update their access time.
|
||||
if let Some(on_disk) = timeline_state.on_disk_layers.get(&layer.name) {
|
||||
tracing::debug!("Layer {} is already on disk", layer.name);
|
||||
|
||||
if cfg!(debug_assertions) {
|
||||
// Debug for https://github.com/neondatabase/neon/issues/6966: check that the files we think
|
||||
// are already present on disk are really there.
|
||||
match tokio::fs::metadata(&on_disk.local_path).await {
|
||||
Ok(meta) => {
|
||||
tracing::debug!(
|
||||
"Layer {} present at {}, size {}",
|
||||
layer.name,
|
||||
on_disk.local_path,
|
||||
meta.len(),
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"Layer {} not found at {} ({})",
|
||||
layer.name,
|
||||
on_disk.local_path,
|
||||
e
|
||||
);
|
||||
debug_assert!(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if on_disk.metadata != layer.metadata || on_disk.access_time != layer.access_time {
|
||||
// We already have this layer on disk. Update its access time.
|
||||
tracing::debug!(
|
||||
"Access time updated for layer {}: {} -> {}",
|
||||
layer.name,
|
||||
strftime(&on_disk.access_time),
|
||||
strftime(&layer.access_time)
|
||||
);
|
||||
touched.push(layer);
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
tracing::debug!("Layer {} not present on disk yet", layer.name);
|
||||
}
|
||||
|
||||
// Eviction: if we evicted a layer, then do not re-download it unless it was accessed more
|
||||
// recently than it was evicted.
|
||||
if let Some(evicted_at) = timeline_state.evicted_at.get(&layer.name) {
|
||||
if &layer.access_time > evicted_at {
|
||||
tracing::info!(
|
||||
"Re-downloading evicted layer {}, accessed at {}, evicted at {}",
|
||||
layer.name,
|
||||
strftime(&layer.access_time),
|
||||
strftime(evicted_at)
|
||||
);
|
||||
} else {
|
||||
tracing::trace!(
|
||||
"Not re-downloading evicted layer {}, accessed at {}, evicted at {}",
|
||||
layer.name,
|
||||
strftime(&layer.access_time),
|
||||
strftime(evicted_at)
|
||||
);
|
||||
match self.layer_action(&timeline_state, &layer).await {
|
||||
LayerAction::Download => (),
|
||||
LayerAction::NoAction => continue,
|
||||
LayerAction::Skip => {
|
||||
self.skip_layer(layer);
|
||||
continue;
|
||||
}
|
||||
LayerAction::Touch => {
|
||||
touched.push(layer);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
match self
|
||||
@@ -1091,6 +1046,86 @@ impl<'a> TenantDownloader<'a> {
|
||||
(Ok(()), touched)
|
||||
}
|
||||
|
||||
async fn layer_action(
|
||||
&self,
|
||||
timeline_state: &SecondaryDetailTimeline,
|
||||
layer: &HeatMapLayer,
|
||||
) -> LayerAction {
|
||||
// Existing on-disk layers: just update their access time.
|
||||
if let Some(on_disk) = timeline_state.on_disk_layers.get(&layer.name) {
|
||||
tracing::debug!("Layer {} is already on disk", layer.name);
|
||||
|
||||
if cfg!(debug_assertions) {
|
||||
// Debug for https://github.com/neondatabase/neon/issues/6966: check that the files we think
|
||||
// are already present on disk are really there.
|
||||
match tokio::fs::metadata(&on_disk.local_path).await {
|
||||
Ok(meta) => {
|
||||
tracing::debug!(
|
||||
"Layer {} present at {}, size {}",
|
||||
layer.name,
|
||||
on_disk.local_path,
|
||||
meta.len(),
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"Layer {} not found at {} ({})",
|
||||
layer.name,
|
||||
on_disk.local_path,
|
||||
e
|
||||
);
|
||||
debug_assert!(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if on_disk.metadata.generation_file_size() != on_disk.metadata.generation_file_size() {
|
||||
tracing::info!(
|
||||
"Re-downloading layer {} with changed size or generation: {:?}->{:?}",
|
||||
layer.name,
|
||||
on_disk.metadata.generation_file_size(),
|
||||
on_disk.metadata.generation_file_size()
|
||||
);
|
||||
return LayerAction::Download;
|
||||
}
|
||||
if on_disk.metadata != layer.metadata || on_disk.access_time != layer.access_time {
|
||||
// We already have this layer on disk. Update its access time.
|
||||
tracing::debug!(
|
||||
"Access time updated for layer {}: {} -> {}",
|
||||
layer.name,
|
||||
strftime(&on_disk.access_time),
|
||||
strftime(&layer.access_time)
|
||||
);
|
||||
return LayerAction::Touch;
|
||||
}
|
||||
return LayerAction::NoAction;
|
||||
} else {
|
||||
tracing::debug!("Layer {} not present on disk yet", layer.name);
|
||||
}
|
||||
|
||||
// Eviction: if we evicted a layer, then do not re-download it unless it was accessed more
|
||||
// recently than it was evicted.
|
||||
if let Some(evicted_at) = timeline_state.evicted_at.get(&layer.name) {
|
||||
if &layer.access_time > evicted_at {
|
||||
tracing::info!(
|
||||
"Re-downloading evicted layer {}, accessed at {}, evicted at {}",
|
||||
layer.name,
|
||||
strftime(&layer.access_time),
|
||||
strftime(evicted_at)
|
||||
);
|
||||
} else {
|
||||
tracing::trace!(
|
||||
"Not re-downloading evicted layer {}, accessed at {}, evicted at {}",
|
||||
layer.name,
|
||||
strftime(&layer.access_time),
|
||||
strftime(evicted_at)
|
||||
);
|
||||
return LayerAction::Skip;
|
||||
}
|
||||
}
|
||||
LayerAction::Download
|
||||
}
|
||||
|
||||
async fn download_timeline(
|
||||
&self,
|
||||
timeline: HeatMapTimeline,
|
||||
|
||||
@@ -33,6 +33,7 @@ use utils::sync::gate::GateGuard;
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
pub use batch_split_writer::{BatchLayerWriter, SplitDeltaLayerWriter, SplitImageLayerWriter};
|
||||
pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
|
||||
pub use image_layer::{ImageLayer, ImageLayerWriter};
|
||||
pub use inmemory_layer::InMemoryLayer;
|
||||
|
||||
@@ -87,6 +87,23 @@ impl BatchLayerWriter {
|
||||
));
|
||||
}
|
||||
|
||||
pub(crate) async fn finish(
|
||||
self,
|
||||
tline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Vec<ResidentLayer>> {
|
||||
let res = self
|
||||
.finish_with_discard_fn(tline, ctx, |_| async { false })
|
||||
.await?;
|
||||
let mut output = Vec::new();
|
||||
for r in res {
|
||||
if let BatchWriterResult::Produced(layer) = r {
|
||||
output.push(layer);
|
||||
}
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
pub(crate) async fn finish_with_discard_fn<D, F>(
|
||||
self,
|
||||
tline: &Arc<Timeline>,
|
||||
|
||||
@@ -70,6 +70,7 @@ use std::sync::{Arc, Mutex, OnceLock, RwLock, Weak};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use crate::l0_flush::{self, L0FlushGlobalState};
|
||||
use crate::tenant::storage_layer::ImageLayerName;
|
||||
use crate::{
|
||||
aux_file::AuxFileSizeEstimator,
|
||||
page_service::TenantManagerTypes,
|
||||
@@ -78,7 +79,7 @@ use crate::{
|
||||
layer_map::{LayerMap, SearchResult},
|
||||
metadata::TimelineMetadata,
|
||||
storage_layer::{
|
||||
inmemory_layer::IndexEntry, IoConcurrency, PersistentLayerDesc,
|
||||
inmemory_layer::IndexEntry, BatchLayerWriter, IoConcurrency, PersistentLayerDesc,
|
||||
ValueReconstructSituation,
|
||||
},
|
||||
},
|
||||
@@ -144,15 +145,19 @@ use self::layer_manager::LayerManager;
|
||||
use self::logical_size::LogicalSize;
|
||||
use self::walreceiver::{WalReceiver, WalReceiverConf};
|
||||
|
||||
use super::config::TenantConf;
|
||||
use super::remote_timeline_client::index::IndexPart;
|
||||
use super::remote_timeline_client::RemoteTimelineClient;
|
||||
use super::secondary::heatmap::{HeatMapLayer, HeatMapTimeline};
|
||||
use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
|
||||
use super::upload_queue::NotInitialized;
|
||||
use super::GcError;
|
||||
use super::{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf, MaybeOffloaded,
|
||||
config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized,
|
||||
MaybeOffloaded,
|
||||
};
|
||||
use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
|
||||
use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
|
||||
use super::{
|
||||
remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError,
|
||||
storage_layer::ReadableLayer,
|
||||
};
|
||||
use super::{
|
||||
secondary::heatmap::{HeatMapLayer, HeatMapTimeline},
|
||||
GcError,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -897,10 +902,17 @@ impl From<GetReadyAncestorError> for PageReconstructError {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum WaitLsnTimeout {
|
||||
Custom(Duration),
|
||||
// Use the [`PageServerConf::wait_lsn_timeout`] default
|
||||
Default,
|
||||
}
|
||||
|
||||
pub(crate) enum WaitLsnWaiter<'a> {
|
||||
Timeline(&'a Timeline),
|
||||
Tenant,
|
||||
PageService,
|
||||
HttpEndpoint,
|
||||
}
|
||||
|
||||
/// Argument to [`Timeline::shutdown`].
|
||||
@@ -922,7 +934,7 @@ pub(crate) enum ShutdownMode {
|
||||
}
|
||||
|
||||
struct ImageLayerCreationOutcome {
|
||||
image: Option<ResidentLayer>,
|
||||
unfinished_image_layer: Option<ImageLayerWriter>,
|
||||
next_start_key: Key,
|
||||
}
|
||||
|
||||
@@ -1297,6 +1309,7 @@ impl Timeline {
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
who_is_waiting: WaitLsnWaiter<'_>,
|
||||
timeout: WaitLsnTimeout,
|
||||
ctx: &RequestContext, /* Prepare for use by cancellation */
|
||||
) -> Result<(), WaitLsnError> {
|
||||
let state = self.current_state();
|
||||
@@ -1313,7 +1326,7 @@ impl Timeline {
|
||||
| TaskKind::WalReceiverConnectionPoller => {
|
||||
let is_myself = match who_is_waiting {
|
||||
WaitLsnWaiter::Timeline(waiter) => Weak::ptr_eq(&waiter.myself, &self.myself),
|
||||
WaitLsnWaiter::Tenant | WaitLsnWaiter::PageService => unreachable!("tenant or page_service context are not expected to have task kind {:?}", ctx.task_kind()),
|
||||
WaitLsnWaiter::Tenant | WaitLsnWaiter::PageService | WaitLsnWaiter::HttpEndpoint => unreachable!("tenant or page_service context are not expected to have task kind {:?}", ctx.task_kind()),
|
||||
};
|
||||
if is_myself {
|
||||
if let Err(current) = self.last_record_lsn.would_wait_for(lsn) {
|
||||
@@ -1329,13 +1342,14 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
let timeout = match timeout {
|
||||
WaitLsnTimeout::Custom(t) => t,
|
||||
WaitLsnTimeout::Default => self.conf.wait_lsn_timeout,
|
||||
};
|
||||
|
||||
let _timer = crate::metrics::WAIT_LSN_TIME.start_timer();
|
||||
|
||||
match self
|
||||
.last_record_lsn
|
||||
.wait_for_timeout(lsn, self.conf.wait_lsn_timeout)
|
||||
.await
|
||||
{
|
||||
match self.last_record_lsn.wait_for_timeout(lsn, timeout).await {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
use utils::seqwait::SeqWaitError::*;
|
||||
@@ -2167,9 +2181,17 @@ impl Timeline {
|
||||
.unwrap_or(self.conf.default_tenant_conf.compaction_threshold)
|
||||
}
|
||||
|
||||
fn get_compaction_upper_limit(&self) -> usize {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
.tenant_conf
|
||||
.compaction_upper_limit
|
||||
.unwrap_or(self.conf.default_tenant_conf.compaction_upper_limit)
|
||||
}
|
||||
|
||||
fn get_l0_flush_delay_threshold(&self) -> Option<usize> {
|
||||
// Default to delay L0 flushes at 3x compaction threshold.
|
||||
const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 3;
|
||||
// Disable L0 flushes by default. This and compaction needs further tuning.
|
||||
const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 0; // TODO: default to e.g. 3
|
||||
|
||||
// If compaction is disabled, don't delay.
|
||||
if self.get_compaction_period() == Duration::ZERO {
|
||||
@@ -2197,10 +2219,9 @@ impl Timeline {
|
||||
}
|
||||
|
||||
fn get_l0_flush_stall_threshold(&self) -> Option<usize> {
|
||||
// Default to stall L0 flushes at 5x compaction threshold.
|
||||
// TODO: stalls are temporarily disabled by default, see below.
|
||||
#[allow(unused)]
|
||||
const DEFAULT_L0_FLUSH_STALL_FACTOR: usize = 5;
|
||||
// Disable L0 stalls by default. In ingest benchmarks, we see image compaction take >10
|
||||
// minutes, blocking L0 compaction, and we can't stall L0 flushes for that long.
|
||||
const DEFAULT_L0_FLUSH_STALL_FACTOR: usize = 0; // TODO: default to e.g. 5
|
||||
|
||||
// If compaction is disabled, don't stall.
|
||||
if self.get_compaction_period() == Duration::ZERO {
|
||||
@@ -2232,13 +2253,8 @@ impl Timeline {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Disable stalls by default. In ingest benchmarks, we see image compaction take >10
|
||||
// minutes, blocking L0 compaction, and we can't stall L0 flushes for that long.
|
||||
//
|
||||
// TODO: fix this.
|
||||
// let l0_flush_stall_threshold = l0_flush_stall_threshold
|
||||
// .unwrap_or(DEFAULT_L0_FLUSH_STALL_FACTOR * compaction_threshold);
|
||||
let l0_flush_stall_threshold = l0_flush_stall_threshold?;
|
||||
let l0_flush_stall_threshold = l0_flush_stall_threshold
|
||||
.unwrap_or(DEFAULT_L0_FLUSH_STALL_FACTOR * compaction_threshold);
|
||||
|
||||
// 0 disables backpressure.
|
||||
if l0_flush_stall_threshold == 0 {
|
||||
@@ -2252,6 +2268,14 @@ impl Timeline {
|
||||
Some(max(l0_flush_stall_threshold, compaction_threshold))
|
||||
}
|
||||
|
||||
fn get_l0_flush_wait_upload(&self) -> bool {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
.tenant_conf
|
||||
.l0_flush_wait_upload
|
||||
.unwrap_or(self.conf.default_tenant_conf.l0_flush_wait_upload)
|
||||
}
|
||||
|
||||
fn get_image_creation_threshold(&self) -> usize {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
@@ -3444,6 +3468,13 @@ impl Timeline {
|
||||
let mut completed_keyspace = KeySpace::default();
|
||||
let mut image_covered_keyspace = KeySpaceRandomAccum::new();
|
||||
|
||||
// Prevent GC from progressing while visiting the current timeline.
|
||||
// If we are GC-ing because a new image layer was added while traversing
|
||||
// the timeline, then it will remove layers that are required for fulfilling
|
||||
// the current get request (read-path cannot "look back" and notice the new
|
||||
// image layer).
|
||||
let _gc_cutoff_holder = timeline.get_latest_gc_cutoff_lsn();
|
||||
|
||||
loop {
|
||||
if cancel.is_cancelled() {
|
||||
return Err(GetVectoredError::Cancelled);
|
||||
@@ -3584,7 +3615,12 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
ancestor
|
||||
.wait_lsn(self.ancestor_lsn, WaitLsnWaiter::Timeline(self), ctx)
|
||||
.wait_lsn(
|
||||
self.ancestor_lsn,
|
||||
WaitLsnWaiter::Timeline(self),
|
||||
WaitLsnTimeout::Default,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
e @ WaitLsnError::Timeout(_) => GetReadyAncestorError::AncestorLsnTimeout(e),
|
||||
@@ -4034,6 +4070,27 @@ impl Timeline {
|
||||
// release lock on 'layers'
|
||||
};
|
||||
|
||||
// Backpressure mechanism: wait with continuation of the flush loop until we have uploaded all layer files.
|
||||
// This makes us refuse ingest until the new layers have been persisted to the remote
|
||||
// TODO: remove this, and rely on l0_flush_{delay,stall}_threshold instead.
|
||||
if self.get_l0_flush_wait_upload() {
|
||||
let start = Instant::now();
|
||||
self.remote_client
|
||||
.wait_completion()
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
WaitCompletionError::UploadQueueShutDownOrStopped
|
||||
| WaitCompletionError::NotInitialized(
|
||||
NotInitialized::ShuttingDown | NotInitialized::Stopped,
|
||||
) => FlushLayerError::Cancelled,
|
||||
WaitCompletionError::NotInitialized(NotInitialized::Uninitialized) => {
|
||||
FlushLayerError::Other(anyhow!(e).into())
|
||||
}
|
||||
})?;
|
||||
let duration = start.elapsed().as_secs_f64();
|
||||
self.metrics.flush_wait_upload_time_gauge_add(duration);
|
||||
}
|
||||
|
||||
// FIXME: between create_delta_layer and the scheduling of the upload in `update_metadata_file`,
|
||||
// a compaction can delete the file and then it won't be available for uploads any more.
|
||||
// We still schedule the upload, resulting in an error, but ideally we'd somehow avoid this
|
||||
@@ -4364,11 +4421,15 @@ impl Timeline {
|
||||
if wrote_keys {
|
||||
// Normal path: we have written some data into the new image layer for this
|
||||
// partition, so flush it to disk.
|
||||
let (desc, path) = image_layer_writer.finish(ctx).await?;
|
||||
let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?;
|
||||
info!("created image layer for rel {}", image_layer.local_path());
|
||||
info!(
|
||||
"produced image layer for rel {}",
|
||||
ImageLayerName {
|
||||
key_range: img_range.clone(),
|
||||
lsn
|
||||
},
|
||||
);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
image: Some(image_layer),
|
||||
unfinished_image_layer: Some(image_layer_writer),
|
||||
next_start_key: img_range.end,
|
||||
})
|
||||
} else {
|
||||
@@ -4378,7 +4439,7 @@ impl Timeline {
|
||||
// layer we write will cover the key range that we just scanned.
|
||||
tracing::debug!("no data in range {}-{}", img_range.start, img_range.end);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
image: None,
|
||||
unfinished_image_layer: None,
|
||||
next_start_key: start,
|
||||
})
|
||||
}
|
||||
@@ -4427,7 +4488,7 @@ impl Timeline {
|
||||
|
||||
if !trigger_generation && mode == ImageLayerCreationMode::Try {
|
||||
return Ok(ImageLayerCreationOutcome {
|
||||
image: None,
|
||||
unfinished_image_layer: None,
|
||||
next_start_key: img_range.end,
|
||||
});
|
||||
}
|
||||
@@ -4453,14 +4514,15 @@ impl Timeline {
|
||||
if wrote_any_image {
|
||||
// Normal path: we have written some data into the new image layer for this
|
||||
// partition, so flush it to disk.
|
||||
let (desc, path) = image_layer_writer.finish(ctx).await?;
|
||||
let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?;
|
||||
info!(
|
||||
"created image layer for metadata {}",
|
||||
image_layer.local_path()
|
||||
ImageLayerName {
|
||||
key_range: img_range.clone(),
|
||||
lsn
|
||||
}
|
||||
);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
image: Some(image_layer),
|
||||
unfinished_image_layer: Some(image_layer_writer),
|
||||
next_start_key: img_range.end,
|
||||
})
|
||||
} else {
|
||||
@@ -4470,7 +4532,7 @@ impl Timeline {
|
||||
// layer we write will cover the key range that we just scanned.
|
||||
tracing::debug!("no data in range {}-{}", img_range.start, img_range.end);
|
||||
Ok(ImageLayerCreationOutcome {
|
||||
image: None,
|
||||
unfinished_image_layer: None,
|
||||
next_start_key: start,
|
||||
})
|
||||
}
|
||||
@@ -4537,7 +4599,6 @@ impl Timeline {
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Vec<ResidentLayer>, CreateImageLayersError> {
|
||||
let timer = self.metrics.create_images_time_histo.start_timer();
|
||||
let mut image_layers = Vec::new();
|
||||
|
||||
// We need to avoid holes between generated image layers.
|
||||
// Otherwise LayerMap::image_layer_exists will return false if key range of some layer is covered by more than one
|
||||
@@ -4552,6 +4613,8 @@ impl Timeline {
|
||||
|
||||
let check_for_image_layers = self.should_check_if_image_layers_required(lsn);
|
||||
|
||||
let mut batch_image_writer = BatchLayerWriter::new(self.conf).await?;
|
||||
|
||||
for partition in partitioning.parts.iter() {
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(CreateImageLayersError::Cancelled);
|
||||
@@ -4624,45 +4687,45 @@ impl Timeline {
|
||||
.map_err(|_| CreateImageLayersError::Cancelled)?,
|
||||
);
|
||||
|
||||
if !compact_metadata {
|
||||
let ImageLayerCreationOutcome {
|
||||
image,
|
||||
next_start_key,
|
||||
} = self
|
||||
.create_image_layer_for_rel_blocks(
|
||||
partition,
|
||||
image_layer_writer,
|
||||
lsn,
|
||||
ctx,
|
||||
img_range,
|
||||
start,
|
||||
io_concurrency,
|
||||
)
|
||||
.await?;
|
||||
|
||||
start = next_start_key;
|
||||
image_layers.extend(image);
|
||||
let ImageLayerCreationOutcome {
|
||||
unfinished_image_layer,
|
||||
next_start_key,
|
||||
} = if !compact_metadata {
|
||||
self.create_image_layer_for_rel_blocks(
|
||||
partition,
|
||||
image_layer_writer,
|
||||
lsn,
|
||||
ctx,
|
||||
img_range.clone(),
|
||||
start,
|
||||
io_concurrency,
|
||||
)
|
||||
.await?
|
||||
} else {
|
||||
let ImageLayerCreationOutcome {
|
||||
image,
|
||||
next_start_key,
|
||||
} = self
|
||||
.create_image_layer_for_metadata_keys(
|
||||
partition,
|
||||
image_layer_writer,
|
||||
lsn,
|
||||
ctx,
|
||||
img_range,
|
||||
mode,
|
||||
start,
|
||||
io_concurrency,
|
||||
)
|
||||
.await?;
|
||||
start = next_start_key;
|
||||
image_layers.extend(image);
|
||||
self.create_image_layer_for_metadata_keys(
|
||||
partition,
|
||||
image_layer_writer,
|
||||
lsn,
|
||||
ctx,
|
||||
img_range.clone(),
|
||||
mode,
|
||||
start,
|
||||
io_concurrency,
|
||||
)
|
||||
.await?
|
||||
};
|
||||
start = next_start_key;
|
||||
if let Some(unfinished_image_layer) = unfinished_image_layer {
|
||||
batch_image_writer.add_unfinished_image_writer(
|
||||
unfinished_image_layer,
|
||||
img_range,
|
||||
lsn,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let image_layers = batch_image_writer.finish(self, ctx).await?;
|
||||
|
||||
let mut guard = self.layers.write().await;
|
||||
|
||||
// FIXME: we could add the images to be uploaded *before* returning from here, but right
|
||||
|
||||
@@ -47,9 +47,7 @@ use crate::tenant::timeline::{ImageLayerCreationOutcome, IoConcurrency};
|
||||
use crate::tenant::timeline::{Layer, ResidentLayer};
|
||||
use crate::tenant::{gc_block, DeltaLayer, MaybeOffloaded};
|
||||
use crate::virtual_file::{MaybeFatalIo, VirtualFile};
|
||||
use pageserver_api::config::tenant_conf_defaults::{
|
||||
DEFAULT_CHECKPOINT_DISTANCE, DEFAULT_COMPACTION_THRESHOLD,
|
||||
};
|
||||
use pageserver_api::config::tenant_conf_defaults::DEFAULT_CHECKPOINT_DISTANCE;
|
||||
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
@@ -1114,17 +1112,10 @@ impl Timeline {
|
||||
// Accumulate the size of layers in `deltas_to_compact`
|
||||
let mut deltas_to_compact_bytes = 0;
|
||||
|
||||
// Under normal circumstances, we will accumulate up to compaction_interval L0s of size
|
||||
// Under normal circumstances, we will accumulate up to compaction_upper_limit L0s of size
|
||||
// checkpoint_distance each. To avoid edge cases using extra system resources, bound our
|
||||
// work in this function to only operate on this much delta data at once.
|
||||
//
|
||||
// Take the max of the configured value & the default, so that tests that configure tiny values
|
||||
// can still use a sensible amount of memory, but if a deployed system configures bigger values we
|
||||
// still let them compact a full stack of L0s in one go.
|
||||
let delta_size_limit = std::cmp::max(
|
||||
self.get_compaction_threshold(),
|
||||
DEFAULT_COMPACTION_THRESHOLD,
|
||||
) as u64
|
||||
let delta_size_limit = self.get_compaction_upper_limit() as u64
|
||||
* std::cmp::max(self.get_checkpoint_distance(), DEFAULT_CHECKPOINT_DISTANCE);
|
||||
|
||||
let mut fully_compacted = true;
|
||||
@@ -3197,7 +3188,7 @@ impl TimelineAdaptor {
|
||||
// TODO set proper (stateful) start. The create_image_layer_for_rel_blocks function mostly
|
||||
let start = Key::MIN;
|
||||
let ImageLayerCreationOutcome {
|
||||
image,
|
||||
unfinished_image_layer,
|
||||
next_start_key: _,
|
||||
} = self
|
||||
.timeline
|
||||
@@ -3212,7 +3203,10 @@ impl TimelineAdaptor {
|
||||
)
|
||||
.await?;
|
||||
|
||||
if let Some(image_layer) = image {
|
||||
if let Some(image_layer_writer) = unfinished_image_layer {
|
||||
let (desc, path) = image_layer_writer.finish(ctx).await?;
|
||||
let image_layer =
|
||||
Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?;
|
||||
self.new_images.push(image_layer);
|
||||
}
|
||||
|
||||
|
||||
@@ -113,7 +113,7 @@ pub async fn doit(
|
||||
match res {
|
||||
Ok(_) => break,
|
||||
Err(err) => {
|
||||
info!(?err, "indefintely waiting for pgdata to finish");
|
||||
info!(?err, "indefinitely waiting for pgdata to finish");
|
||||
if tokio::time::timeout(std::time::Duration::from_secs(10), cancel.cancelled())
|
||||
.await
|
||||
.is_ok()
|
||||
|
||||
@@ -308,7 +308,7 @@ impl ControlFile {
|
||||
202107181 => 14,
|
||||
202209061 => 15,
|
||||
202307071 => 16,
|
||||
/* XXX pg17 */
|
||||
202406281 => 17,
|
||||
catversion => {
|
||||
anyhow::bail!("unrecognized catalog version {catversion}")
|
||||
}
|
||||
|
||||
@@ -164,9 +164,10 @@ pub(super) async fn connection_manager_loop_step(
|
||||
Ok(Some(broker_update)) => connection_manager_state.register_timeline_update(broker_update),
|
||||
Err(status) => {
|
||||
match status.code() {
|
||||
Code::Unknown if status.message().contains("stream closed because of a broken pipe") || status.message().contains("connection reset") => {
|
||||
Code::Unknown if status.message().contains("stream closed because of a broken pipe") || status.message().contains("connection reset") || status.message().contains("error reading a body from connection") => {
|
||||
// tonic's error handling doesn't provide a clear code for disconnections: we get
|
||||
// "h2 protocol error: error reading a body from connection: stream closed because of a broken pipe"
|
||||
// => https://github.com/neondatabase/neon/issues/9562
|
||||
info!("broker disconnected: {status}");
|
||||
},
|
||||
_ => {
|
||||
@@ -273,7 +274,7 @@ pub(super) async fn connection_manager_loop_step(
|
||||
};
|
||||
|
||||
last_discovery_ts = Some(std::time::Instant::now());
|
||||
debug!("No active connection and no candidates, sending discovery request to the broker");
|
||||
info!("No active connection and no candidates, sending discovery request to the broker");
|
||||
|
||||
// Cancellation safety: we want to send a message to the broker, but publish_one()
|
||||
// function can get cancelled by the other select! arm. This is absolutely fine, because
|
||||
|
||||
@@ -118,7 +118,7 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
cancellation: CancellationToken,
|
||||
connect_timeout: Duration,
|
||||
ctx: RequestContext,
|
||||
node: NodeId,
|
||||
safekeeper_node: NodeId,
|
||||
ingest_batch_size: u64,
|
||||
) -> Result<(), WalReceiverError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
@@ -140,7 +140,7 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
|
||||
let (replication_client, connection) = {
|
||||
let mut config = wal_source_connconf.to_tokio_postgres_config();
|
||||
config.application_name(format!("pageserver-{}", node.0).as_str());
|
||||
config.application_name(format!("pageserver-{}", timeline.conf.id.0).as_str());
|
||||
config.replication_mode(tokio_postgres::config::ReplicationMode::Physical);
|
||||
match time::timeout(connect_timeout, config.connect(postgres::NoTls)).await {
|
||||
Ok(client_and_conn) => client_and_conn?,
|
||||
@@ -162,7 +162,7 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
latest_wal_update: Utc::now().naive_utc(),
|
||||
streaming_lsn: None,
|
||||
commit_lsn: None,
|
||||
node,
|
||||
node: safekeeper_node,
|
||||
};
|
||||
if let Err(e) = events_sender.send(TaskStateUpdate::Progress(connection_status)) {
|
||||
warn!("Wal connection event listener dropped right after connection init, aborting the connection: {e}");
|
||||
|
||||
@@ -480,7 +480,7 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
|
||||
if (LFC_ENABLED())
|
||||
{
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
|
||||
found = entry != NULL && (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) != 0;
|
||||
found = entry != NULL && (entry->bitmap[chunk_offs >> 5] & ((uint32)1 << (chunk_offs & 31))) != 0;
|
||||
}
|
||||
LWLockRelease(lfc_lock);
|
||||
return found;
|
||||
@@ -527,7 +527,7 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
|
||||
{
|
||||
if ((entry->bitmap[chunk_offs >> 5] &
|
||||
(1 << (chunk_offs & 31))) != 0)
|
||||
((uint32)1 << (chunk_offs & 31))) != 0)
|
||||
{
|
||||
BITMAP_SET(bitmap, i);
|
||||
found++;
|
||||
@@ -620,7 +620,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
|
||||
}
|
||||
|
||||
/* remove the page from the cache */
|
||||
entry->bitmap[chunk_offs >> 5] &= ~(1 << (chunk_offs & (32 - 1)));
|
||||
entry->bitmap[chunk_offs >> 5] &= ~((uint32)1 << (chunk_offs & (32 - 1)));
|
||||
|
||||
if (entry->access_count == 0)
|
||||
{
|
||||
@@ -774,7 +774,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
* If the page is valid, we consider it "read".
|
||||
* All other pages will be fetched separately by the next cache
|
||||
*/
|
||||
if (entry->bitmap[(chunk_offs + i) / 32] & (1 << ((chunk_offs + i) % 32)))
|
||||
if (entry->bitmap[(chunk_offs + i) / 32] & ((uint32)1 << ((chunk_offs + i) % 32)))
|
||||
{
|
||||
BITMAP_SET(mask, buf_offset + i);
|
||||
iteration_hits++;
|
||||
@@ -1034,7 +1034,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
{
|
||||
lfc_ctl->used_pages += 1 - ((entry->bitmap[(chunk_offs + i) >> 5] >> ((chunk_offs + i) & 31)) & 1);
|
||||
entry->bitmap[(chunk_offs + i) >> 5] |=
|
||||
(1 << ((chunk_offs + i) & 31));
|
||||
((uint32)1 << ((chunk_offs + i) & 31));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1282,7 +1282,7 @@ local_cache_pages(PG_FUNCTION_ARGS)
|
||||
{
|
||||
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||
{
|
||||
if (entry->bitmap[i >> 5] & (1 << (i & 31)))
|
||||
if (entry->bitmap[i >> 5] & ((uint32)1 << (i & 31)))
|
||||
{
|
||||
fctx->record[n].pageoffs = entry->offset * BLOCKS_PER_CHUNK + i;
|
||||
fctx->record[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
|
||||
|
||||
@@ -1024,7 +1024,8 @@ DetermineEpochStartLsn(WalProposer *wp)
|
||||
dth = &wp->safekeeper[wp->donor].voteResponse.termHistory;
|
||||
wp->propTermHistory.n_entries = dth->n_entries + 1;
|
||||
wp->propTermHistory.entries = palloc(sizeof(TermSwitchEntry) * wp->propTermHistory.n_entries);
|
||||
memcpy(wp->propTermHistory.entries, dth->entries, sizeof(TermSwitchEntry) * dth->n_entries);
|
||||
if (dth->n_entries > 0)
|
||||
memcpy(wp->propTermHistory.entries, dth->entries, sizeof(TermSwitchEntry) * dth->n_entries);
|
||||
wp->propTermHistory.entries[wp->propTermHistory.n_entries - 1].term = wp->propTerm;
|
||||
wp->propTermHistory.entries[wp->propTermHistory.n_entries - 1].lsn = wp->propEpochStartLsn;
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ pub(crate) use console_redirect::ConsoleRedirectError;
|
||||
use ipnet::{Ipv4Net, Ipv6Net};
|
||||
use local::LocalBackend;
|
||||
use postgres_client::config::AuthKeys;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
@@ -133,7 +134,7 @@ pub(crate) struct ComputeUserInfoNoEndpoint {
|
||||
pub(crate) options: NeonOptions,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub(crate) struct ComputeUserInfo {
|
||||
pub(crate) endpoint: EndpointId,
|
||||
pub(crate) user: RoleName,
|
||||
|
||||
@@ -7,12 +7,11 @@ use std::time::Duration;
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use compute_api::spec::LocalProxySpec;
|
||||
use dashmap::DashMap;
|
||||
use futures::future::Either;
|
||||
use proxy::auth::backend::jwt::JwkCache;
|
||||
use proxy::auth::backend::local::{LocalBackend, JWKS_ROLE_MAP};
|
||||
use proxy::auth::{self};
|
||||
use proxy::cancellation::CancellationHandlerMain;
|
||||
use proxy::cancellation::CancellationHandler;
|
||||
use proxy::config::{
|
||||
self, AuthenticationConfig, ComputeConfig, HttpConfig, ProxyConfig, RetryConfig,
|
||||
};
|
||||
@@ -211,12 +210,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
auth_backend,
|
||||
http_listener,
|
||||
shutdown.clone(),
|
||||
Arc::new(CancellationHandlerMain::new(
|
||||
&config.connect_to_compute,
|
||||
Arc::new(DashMap::new()),
|
||||
None,
|
||||
proxy::metrics::CancellationSource::Local,
|
||||
)),
|
||||
Arc::new(CancellationHandler::new(&config.connect_to_compute, None)),
|
||||
endpoint_rate_limiter,
|
||||
);
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ use anyhow::bail;
|
||||
use futures::future::Either;
|
||||
use proxy::auth::backend::jwt::JwkCache;
|
||||
use proxy::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned};
|
||||
use proxy::cancellation::{CancelMap, CancellationHandler};
|
||||
use proxy::cancellation::{handle_cancel_messages, CancellationHandler};
|
||||
use proxy::config::{
|
||||
self, remote_storage_from_toml, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig,
|
||||
ProjectInfoCacheOptions, ProxyConfig, ProxyProtocolV2,
|
||||
@@ -18,8 +18,8 @@ use proxy::metrics::Metrics;
|
||||
use proxy::rate_limiter::{
|
||||
EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo, WakeComputeRateLimiter,
|
||||
};
|
||||
use proxy::redis::cancellation_publisher::RedisPublisherClient;
|
||||
use proxy::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
|
||||
use proxy::redis::kv_ops::RedisKVClient;
|
||||
use proxy::redis::{elasticache, notifications};
|
||||
use proxy::scram::threadpool::ThreadPool;
|
||||
use proxy::serverless::cancel_set::CancelSet;
|
||||
@@ -28,7 +28,6 @@ use proxy::tls::client_config::compute_client_config_with_root_certs;
|
||||
use proxy::{auth, control_plane, http, serverless, usage_metrics};
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
use tokio::net::TcpListener;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::task::JoinSet;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{info, warn, Instrument};
|
||||
@@ -158,8 +157,11 @@ struct ProxyCliArgs {
|
||||
#[clap(long, default_value_t = 64)]
|
||||
auth_rate_limit_ip_subnet: u8,
|
||||
/// Redis rate limiter max number of requests per second.
|
||||
#[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
|
||||
#[clap(long, default_values_t = RateBucketInfo::DEFAULT_REDIS_SET)]
|
||||
redis_rps_limit: Vec<RateBucketInfo>,
|
||||
/// Cancellation channel size (max queue size for redis kv client)
|
||||
#[clap(long, default_value = "1024")]
|
||||
cancellation_ch_size: usize,
|
||||
/// cache for `allowed_ips` (use `size=0` to disable)
|
||||
#[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
|
||||
allowed_ips_cache: String,
|
||||
@@ -382,27 +384,19 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let cancellation_token = CancellationToken::new();
|
||||
|
||||
let cancel_map = CancelMap::default();
|
||||
|
||||
let redis_rps_limit = Vec::leak(args.redis_rps_limit.clone());
|
||||
RateBucketInfo::validate(redis_rps_limit)?;
|
||||
|
||||
let redis_publisher = match ®ional_redis_client {
|
||||
Some(redis_publisher) => Some(Arc::new(Mutex::new(RedisPublisherClient::new(
|
||||
redis_publisher.clone(),
|
||||
args.region.clone(),
|
||||
redis_rps_limit,
|
||||
)?))),
|
||||
None => None,
|
||||
};
|
||||
let redis_kv_client = regional_redis_client
|
||||
.as_ref()
|
||||
.map(|redis_publisher| RedisKVClient::new(redis_publisher.clone(), redis_rps_limit));
|
||||
|
||||
let cancellation_handler = Arc::new(CancellationHandler::<
|
||||
Option<Arc<Mutex<RedisPublisherClient>>>,
|
||||
>::new(
|
||||
// channel size should be higher than redis client limit to avoid blocking
|
||||
let cancel_ch_size = args.cancellation_ch_size;
|
||||
let (tx_cancel, rx_cancel) = tokio::sync::mpsc::channel(cancel_ch_size);
|
||||
let cancellation_handler = Arc::new(CancellationHandler::new(
|
||||
&config.connect_to_compute,
|
||||
cancel_map.clone(),
|
||||
redis_publisher,
|
||||
proxy::metrics::CancellationSource::FromClient,
|
||||
Some(tx_cancel),
|
||||
));
|
||||
|
||||
// bit of a hack - find the min rps and max rps supported and turn it into
|
||||
@@ -495,25 +489,29 @@ async fn main() -> anyhow::Result<()> {
|
||||
let cache = api.caches.project_info.clone();
|
||||
if let Some(client) = client1 {
|
||||
maintenance_tasks.spawn(notifications::task_main(
|
||||
config,
|
||||
client,
|
||||
cache.clone(),
|
||||
cancel_map.clone(),
|
||||
args.region.clone(),
|
||||
));
|
||||
}
|
||||
if let Some(client) = client2 {
|
||||
maintenance_tasks.spawn(notifications::task_main(
|
||||
config,
|
||||
client,
|
||||
cache.clone(),
|
||||
cancel_map.clone(),
|
||||
args.region.clone(),
|
||||
));
|
||||
}
|
||||
maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(mut redis_kv_client) = redis_kv_client {
|
||||
maintenance_tasks.spawn(async move {
|
||||
redis_kv_client.try_connect().await?;
|
||||
handle_cancel_messages(&mut redis_kv_client, rx_cancel).await
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(regional_redis_client) = regional_redis_client {
|
||||
let cache = api.caches.endpoints_cache.clone();
|
||||
let con = regional_redis_client;
|
||||
|
||||
@@ -1,48 +1,124 @@
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::sync::Arc;
|
||||
|
||||
use dashmap::DashMap;
|
||||
use ipnet::{IpNet, Ipv4Net, Ipv6Net};
|
||||
use postgres_client::tls::MakeTlsConnect;
|
||||
use postgres_client::CancelToken;
|
||||
use pq_proto::CancelKeyData;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::mpsc;
|
||||
use tracing::{debug, info};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::auth::backend::{BackendIpAllowlist, ComputeUserInfo};
|
||||
use crate::auth::{check_peer_addr_is_in_list, AuthError, IpPattern};
|
||||
use crate::auth::{check_peer_addr_is_in_list, AuthError};
|
||||
use crate::config::ComputeConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::error::ReportableError;
|
||||
use crate::ext::LockExt;
|
||||
use crate::metrics::{CancellationRequest, CancellationSource, Metrics};
|
||||
use crate::metrics::CancelChannelSizeGuard;
|
||||
use crate::metrics::{CancellationRequest, Metrics, RedisMsgKind};
|
||||
use crate::rate_limiter::LeakyBucketRateLimiter;
|
||||
use crate::redis::cancellation_publisher::{
|
||||
CancellationPublisher, CancellationPublisherMut, RedisPublisherClient,
|
||||
};
|
||||
use crate::redis::keys::KeyPrefix;
|
||||
use crate::redis::kv_ops::RedisKVClient;
|
||||
use crate::tls::postgres_rustls::MakeRustlsConnect;
|
||||
|
||||
pub type CancelMap = Arc<DashMap<CancelKeyData, Option<CancelClosure>>>;
|
||||
pub type CancellationHandlerMain = CancellationHandler<Option<Arc<Mutex<RedisPublisherClient>>>>;
|
||||
pub(crate) type CancellationHandlerMainInternal = Option<Arc<Mutex<RedisPublisherClient>>>;
|
||||
use std::convert::Infallible;
|
||||
use tokio::sync::oneshot;
|
||||
|
||||
type IpSubnetKey = IpNet;
|
||||
|
||||
const CANCEL_KEY_TTL: i64 = 1_209_600; // 2 weeks cancellation key expire time
|
||||
const REDIS_SEND_TIMEOUT: std::time::Duration = std::time::Duration::from_millis(10);
|
||||
|
||||
// Message types for sending through mpsc channel
|
||||
pub enum CancelKeyOp {
|
||||
StoreCancelKey {
|
||||
key: String,
|
||||
field: String,
|
||||
value: String,
|
||||
resp_tx: Option<oneshot::Sender<anyhow::Result<()>>>,
|
||||
_guard: CancelChannelSizeGuard<'static>,
|
||||
expire: i64, // TTL for key
|
||||
},
|
||||
GetCancelData {
|
||||
key: String,
|
||||
resp_tx: oneshot::Sender<anyhow::Result<Vec<(String, String)>>>,
|
||||
_guard: CancelChannelSizeGuard<'static>,
|
||||
},
|
||||
RemoveCancelKey {
|
||||
key: String,
|
||||
field: String,
|
||||
resp_tx: Option<oneshot::Sender<anyhow::Result<()>>>,
|
||||
_guard: CancelChannelSizeGuard<'static>,
|
||||
},
|
||||
}
|
||||
|
||||
// Running as a separate task to accept messages through the rx channel
|
||||
// In case of problems with RTT: switch to recv_many() + redis pipeline
|
||||
pub async fn handle_cancel_messages(
|
||||
client: &mut RedisKVClient,
|
||||
mut rx: mpsc::Receiver<CancelKeyOp>,
|
||||
) -> anyhow::Result<Infallible> {
|
||||
loop {
|
||||
if let Some(msg) = rx.recv().await {
|
||||
match msg {
|
||||
CancelKeyOp::StoreCancelKey {
|
||||
key,
|
||||
field,
|
||||
value,
|
||||
resp_tx,
|
||||
_guard,
|
||||
expire: _,
|
||||
} => {
|
||||
if let Some(resp_tx) = resp_tx {
|
||||
resp_tx
|
||||
.send(client.hset(key, field, value).await)
|
||||
.inspect_err(|e| {
|
||||
tracing::debug!("failed to send StoreCancelKey response: {:?}", e);
|
||||
})
|
||||
.ok();
|
||||
} else {
|
||||
drop(client.hset(key, field, value).await);
|
||||
}
|
||||
}
|
||||
CancelKeyOp::GetCancelData {
|
||||
key,
|
||||
resp_tx,
|
||||
_guard,
|
||||
} => {
|
||||
drop(resp_tx.send(client.hget_all(key).await));
|
||||
}
|
||||
CancelKeyOp::RemoveCancelKey {
|
||||
key,
|
||||
field,
|
||||
resp_tx,
|
||||
_guard,
|
||||
} => {
|
||||
if let Some(resp_tx) = resp_tx {
|
||||
resp_tx
|
||||
.send(client.hdel(key, field).await)
|
||||
.inspect_err(|e| {
|
||||
tracing::debug!("failed to send StoreCancelKey response: {:?}", e);
|
||||
})
|
||||
.ok();
|
||||
} else {
|
||||
drop(client.hdel(key, field).await);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Enables serving `CancelRequest`s.
|
||||
///
|
||||
/// If `CancellationPublisher` is available, cancel request will be used to publish the cancellation key to other proxy instances.
|
||||
pub struct CancellationHandler<P> {
|
||||
pub struct CancellationHandler {
|
||||
compute_config: &'static ComputeConfig,
|
||||
map: CancelMap,
|
||||
client: P,
|
||||
/// This field used for the monitoring purposes.
|
||||
/// Represents the source of the cancellation request.
|
||||
from: CancellationSource,
|
||||
// rate limiter of cancellation requests
|
||||
limiter: Arc<std::sync::Mutex<LeakyBucketRateLimiter<IpSubnetKey>>>,
|
||||
tx: Option<mpsc::Sender<CancelKeyOp>>, // send messages to the redis KV client task
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
@@ -61,6 +137,12 @@ pub(crate) enum CancelError {
|
||||
|
||||
#[error("Authentication backend error")]
|
||||
AuthError(#[from] AuthError),
|
||||
|
||||
#[error("key not found")]
|
||||
NotFound,
|
||||
|
||||
#[error("proxy service error")]
|
||||
InternalError,
|
||||
}
|
||||
|
||||
impl ReportableError for CancelError {
|
||||
@@ -73,274 +155,191 @@ impl ReportableError for CancelError {
|
||||
CancelError::Postgres(_) => crate::error::ErrorKind::Compute,
|
||||
CancelError::RateLimit => crate::error::ErrorKind::RateLimit,
|
||||
CancelError::IpNotAllowed => crate::error::ErrorKind::User,
|
||||
CancelError::NotFound => crate::error::ErrorKind::User,
|
||||
CancelError::AuthError(_) => crate::error::ErrorKind::ControlPlane,
|
||||
CancelError::InternalError => crate::error::ErrorKind::Service,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<P: CancellationPublisher> CancellationHandler<P> {
|
||||
/// Run async action within an ephemeral session identified by [`CancelKeyData`].
|
||||
pub(crate) fn get_session(self: Arc<Self>) -> Session<P> {
|
||||
impl CancellationHandler {
|
||||
pub fn new(
|
||||
compute_config: &'static ComputeConfig,
|
||||
tx: Option<mpsc::Sender<CancelKeyOp>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
compute_config,
|
||||
tx,
|
||||
limiter: Arc::new(std::sync::Mutex::new(
|
||||
LeakyBucketRateLimiter::<IpSubnetKey>::new_with_shards(
|
||||
LeakyBucketRateLimiter::<IpSubnetKey>::DEFAULT,
|
||||
64,
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_key(self: &Arc<Self>) -> Session {
|
||||
// we intentionally generate a random "backend pid" and "secret key" here.
|
||||
// we use the corresponding u64 as an identifier for the
|
||||
// actual endpoint+pid+secret for postgres/pgbouncer.
|
||||
//
|
||||
// if we forwarded the backend_pid from postgres to the client, there would be a lot
|
||||
// of overlap between our computes as most pids are small (~100).
|
||||
let key = loop {
|
||||
let key = rand::random();
|
||||
|
||||
// Random key collisions are unlikely to happen here, but they're still possible,
|
||||
// which is why we have to take care not to rewrite an existing key.
|
||||
match self.map.entry(key) {
|
||||
dashmap::mapref::entry::Entry::Occupied(_) => continue,
|
||||
dashmap::mapref::entry::Entry::Vacant(e) => {
|
||||
e.insert(None);
|
||||
}
|
||||
}
|
||||
break key;
|
||||
};
|
||||
let key: CancelKeyData = rand::random();
|
||||
|
||||
let prefix_key: KeyPrefix = KeyPrefix::Cancel(key);
|
||||
let redis_key = prefix_key.build_redis_key();
|
||||
|
||||
debug!("registered new query cancellation key {key}");
|
||||
Session {
|
||||
key,
|
||||
cancellation_handler: self,
|
||||
redis_key,
|
||||
cancellation_handler: Arc::clone(self),
|
||||
}
|
||||
}
|
||||
|
||||
/// Cancelling only in notification, will be removed
|
||||
pub(crate) async fn cancel_session(
|
||||
async fn get_cancel_key(
|
||||
&self,
|
||||
key: CancelKeyData,
|
||||
session_id: Uuid,
|
||||
peer_addr: IpAddr,
|
||||
check_allowed: bool,
|
||||
) -> Result<(), CancelError> {
|
||||
// TODO: check for unspecified address is only for backward compatibility, should be removed
|
||||
if !peer_addr.is_unspecified() {
|
||||
let subnet_key = match peer_addr {
|
||||
IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here
|
||||
IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()),
|
||||
};
|
||||
if !self.limiter.lock_propagate_poison().check(subnet_key, 1) {
|
||||
// log only the subnet part of the IP address to know which subnet is rate limited
|
||||
tracing::warn!("Rate limit exceeded. Skipping cancellation message, {subnet_key}");
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
.inc(CancellationRequest {
|
||||
source: self.from,
|
||||
kind: crate::metrics::CancellationOutcome::RateLimitExceeded,
|
||||
});
|
||||
return Err(CancelError::RateLimit);
|
||||
}
|
||||
}
|
||||
) -> Result<Option<CancelClosure>, CancelError> {
|
||||
let prefix_key: KeyPrefix = KeyPrefix::Cancel(key);
|
||||
let redis_key = prefix_key.build_redis_key();
|
||||
|
||||
// NB: we should immediately release the lock after cloning the token.
|
||||
let cancel_state = self.map.get(&key).and_then(|x| x.clone());
|
||||
let Some(cancel_closure) = cancel_state else {
|
||||
tracing::warn!("query cancellation key not found: {key}");
|
||||
Metrics::get()
|
||||
let (resp_tx, resp_rx) = tokio::sync::oneshot::channel();
|
||||
let op = CancelKeyOp::GetCancelData {
|
||||
key: redis_key,
|
||||
resp_tx,
|
||||
_guard: Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
.inc(CancellationRequest {
|
||||
source: self.from,
|
||||
kind: crate::metrics::CancellationOutcome::NotFound,
|
||||
});
|
||||
|
||||
if session_id == Uuid::nil() {
|
||||
// was already published, do not publish it again
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match self.client.try_publish(key, session_id, peer_addr).await {
|
||||
Ok(()) => {} // do nothing
|
||||
Err(e) => {
|
||||
// log it here since cancel_session could be spawned in a task
|
||||
tracing::error!("failed to publish cancellation key: {key}, error: {e}");
|
||||
return Err(CancelError::IO(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
e.to_string(),
|
||||
)));
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
.cancel_channel_size
|
||||
.guard(RedisMsgKind::HGetAll),
|
||||
};
|
||||
|
||||
if check_allowed
|
||||
&& !check_peer_addr_is_in_list(&peer_addr, cancel_closure.ip_allowlist.as_slice())
|
||||
{
|
||||
// log it here since cancel_session could be spawned in a task
|
||||
tracing::warn!("IP is not allowed to cancel the query: {key}");
|
||||
return Err(CancelError::IpNotAllowed);
|
||||
}
|
||||
let Some(tx) = &self.tx else {
|
||||
tracing::warn!("cancellation handler is not available");
|
||||
return Err(CancelError::InternalError);
|
||||
};
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
.inc(CancellationRequest {
|
||||
source: self.from,
|
||||
kind: crate::metrics::CancellationOutcome::Found,
|
||||
});
|
||||
info!(
|
||||
"cancelling query per user's request using key {key}, hostname {}, address: {}",
|
||||
cancel_closure.hostname, cancel_closure.socket_addr
|
||||
);
|
||||
cancel_closure.try_cancel_query(self.compute_config).await
|
||||
tx.send_timeout(op, REDIS_SEND_TIMEOUT)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
tracing::warn!("failed to send GetCancelData for {key}: {e}");
|
||||
})
|
||||
.map_err(|()| CancelError::InternalError)?;
|
||||
|
||||
let result = resp_rx.await.map_err(|e| {
|
||||
tracing::warn!("failed to receive GetCancelData response: {e}");
|
||||
CancelError::InternalError
|
||||
})?;
|
||||
|
||||
let cancel_state_str: Option<String> = match result {
|
||||
Ok(mut state) => {
|
||||
if state.len() == 1 {
|
||||
Some(state.remove(0).1)
|
||||
} else {
|
||||
tracing::warn!("unexpected number of entries in cancel state: {state:?}");
|
||||
return Err(CancelError::InternalError);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("failed to receive cancel state from redis: {e}");
|
||||
return Err(CancelError::InternalError);
|
||||
}
|
||||
};
|
||||
|
||||
let cancel_state: Option<CancelClosure> = match cancel_state_str {
|
||||
Some(state) => {
|
||||
let cancel_closure: CancelClosure = serde_json::from_str(&state).map_err(|e| {
|
||||
tracing::warn!("failed to deserialize cancel state: {e}");
|
||||
CancelError::InternalError
|
||||
})?;
|
||||
Some(cancel_closure)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
Ok(cancel_state)
|
||||
}
|
||||
|
||||
/// Try to cancel a running query for the corresponding connection.
|
||||
/// If the cancellation key is not found, it will be published to Redis.
|
||||
/// check_allowed - if true, check if the IP is allowed to cancel the query.
|
||||
/// Will fetch IP allowlist internally.
|
||||
///
|
||||
/// return Result primarily for tests
|
||||
pub(crate) async fn cancel_session_auth<T: BackendIpAllowlist>(
|
||||
pub(crate) async fn cancel_session<T: BackendIpAllowlist>(
|
||||
&self,
|
||||
key: CancelKeyData,
|
||||
ctx: RequestContext,
|
||||
check_allowed: bool,
|
||||
auth_backend: &T,
|
||||
) -> Result<(), CancelError> {
|
||||
// TODO: check for unspecified address is only for backward compatibility, should be removed
|
||||
if !ctx.peer_addr().is_unspecified() {
|
||||
let subnet_key = match ctx.peer_addr() {
|
||||
IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here
|
||||
IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()),
|
||||
};
|
||||
if !self.limiter.lock_propagate_poison().check(subnet_key, 1) {
|
||||
// log only the subnet part of the IP address to know which subnet is rate limited
|
||||
tracing::warn!("Rate limit exceeded. Skipping cancellation message, {subnet_key}");
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
.inc(CancellationRequest {
|
||||
source: self.from,
|
||||
kind: crate::metrics::CancellationOutcome::RateLimitExceeded,
|
||||
});
|
||||
return Err(CancelError::RateLimit);
|
||||
}
|
||||
let subnet_key = match ctx.peer_addr() {
|
||||
IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here
|
||||
IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()),
|
||||
};
|
||||
if !self.limiter.lock_propagate_poison().check(subnet_key, 1) {
|
||||
// log only the subnet part of the IP address to know which subnet is rate limited
|
||||
tracing::warn!("Rate limit exceeded. Skipping cancellation message, {subnet_key}");
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
.inc(CancellationRequest {
|
||||
kind: crate::metrics::CancellationOutcome::RateLimitExceeded,
|
||||
});
|
||||
return Err(CancelError::RateLimit);
|
||||
}
|
||||
|
||||
// NB: we should immediately release the lock after cloning the token.
|
||||
let cancel_state = self.map.get(&key).and_then(|x| x.clone());
|
||||
let cancel_state = self.get_cancel_key(key).await.map_err(|e| {
|
||||
tracing::warn!("failed to receive RedisOp response: {e}");
|
||||
CancelError::InternalError
|
||||
})?;
|
||||
|
||||
let Some(cancel_closure) = cancel_state else {
|
||||
tracing::warn!("query cancellation key not found: {key}");
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
.inc(CancellationRequest {
|
||||
source: self.from,
|
||||
kind: crate::metrics::CancellationOutcome::NotFound,
|
||||
});
|
||||
|
||||
if ctx.session_id() == Uuid::nil() {
|
||||
// was already published, do not publish it again
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match self
|
||||
.client
|
||||
.try_publish(key, ctx.session_id(), ctx.peer_addr())
|
||||
.await
|
||||
{
|
||||
Ok(()) => {} // do nothing
|
||||
Err(e) => {
|
||||
// log it here since cancel_session could be spawned in a task
|
||||
tracing::error!("failed to publish cancellation key: {key}, error: {e}");
|
||||
return Err(CancelError::IO(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
e.to_string(),
|
||||
)));
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
return Err(CancelError::NotFound);
|
||||
};
|
||||
|
||||
let ip_allowlist = auth_backend
|
||||
.get_allowed_ips(&ctx, &cancel_closure.user_info)
|
||||
.await
|
||||
.map_err(CancelError::AuthError)?;
|
||||
if check_allowed {
|
||||
let ip_allowlist = auth_backend
|
||||
.get_allowed_ips(&ctx, &cancel_closure.user_info)
|
||||
.await
|
||||
.map_err(CancelError::AuthError)?;
|
||||
|
||||
if check_allowed && !check_peer_addr_is_in_list(&ctx.peer_addr(), &ip_allowlist) {
|
||||
// log it here since cancel_session could be spawned in a task
|
||||
tracing::warn!("IP is not allowed to cancel the query: {key}");
|
||||
return Err(CancelError::IpNotAllowed);
|
||||
if !check_peer_addr_is_in_list(&ctx.peer_addr(), &ip_allowlist) {
|
||||
// log it here since cancel_session could be spawned in a task
|
||||
tracing::warn!(
|
||||
"IP is not allowed to cancel the query: {key}, address: {}",
|
||||
ctx.peer_addr()
|
||||
);
|
||||
return Err(CancelError::IpNotAllowed);
|
||||
}
|
||||
}
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.cancellation_requests_total
|
||||
.inc(CancellationRequest {
|
||||
source: self.from,
|
||||
kind: crate::metrics::CancellationOutcome::Found,
|
||||
});
|
||||
info!("cancelling query per user's request using key {key}");
|
||||
cancel_closure.try_cancel_query(self.compute_config).await
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn contains(&self, session: &Session<P>) -> bool {
|
||||
self.map.contains_key(&session.key)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn is_empty(&self) -> bool {
|
||||
self.map.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl CancellationHandler<()> {
|
||||
pub fn new(
|
||||
compute_config: &'static ComputeConfig,
|
||||
map: CancelMap,
|
||||
from: CancellationSource,
|
||||
) -> Self {
|
||||
Self {
|
||||
compute_config,
|
||||
map,
|
||||
client: (),
|
||||
from,
|
||||
limiter: Arc::new(std::sync::Mutex::new(
|
||||
LeakyBucketRateLimiter::<IpSubnetKey>::new_with_shards(
|
||||
LeakyBucketRateLimiter::<IpSubnetKey>::DEFAULT,
|
||||
64,
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<P: CancellationPublisherMut> CancellationHandler<Option<Arc<Mutex<P>>>> {
|
||||
pub fn new(
|
||||
compute_config: &'static ComputeConfig,
|
||||
map: CancelMap,
|
||||
client: Option<Arc<Mutex<P>>>,
|
||||
from: CancellationSource,
|
||||
) -> Self {
|
||||
Self {
|
||||
compute_config,
|
||||
map,
|
||||
client,
|
||||
from,
|
||||
limiter: Arc::new(std::sync::Mutex::new(
|
||||
LeakyBucketRateLimiter::<IpSubnetKey>::new_with_shards(
|
||||
LeakyBucketRateLimiter::<IpSubnetKey>::DEFAULT,
|
||||
64,
|
||||
),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This should've been a [`std::future::Future`], but
|
||||
/// it's impossible to name a type of an unboxed future
|
||||
/// (we'd need something like `#![feature(type_alias_impl_trait)]`).
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct CancelClosure {
|
||||
socket_addr: SocketAddr,
|
||||
cancel_token: CancelToken,
|
||||
ip_allowlist: Vec<IpPattern>,
|
||||
hostname: String, // for pg_sni router
|
||||
user_info: ComputeUserInfo,
|
||||
}
|
||||
@@ -349,14 +348,12 @@ impl CancelClosure {
|
||||
pub(crate) fn new(
|
||||
socket_addr: SocketAddr,
|
||||
cancel_token: CancelToken,
|
||||
ip_allowlist: Vec<IpPattern>,
|
||||
hostname: String,
|
||||
user_info: ComputeUserInfo,
|
||||
) -> Self {
|
||||
Self {
|
||||
socket_addr,
|
||||
cancel_token,
|
||||
ip_allowlist,
|
||||
hostname,
|
||||
user_info,
|
||||
}
|
||||
@@ -385,99 +382,75 @@ impl CancelClosure {
|
||||
debug!("query was cancelled");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Obsolete (will be removed after moving CancelMap to Redis), only for notifications
|
||||
pub(crate) fn set_ip_allowlist(&mut self, ip_allowlist: Vec<IpPattern>) {
|
||||
self.ip_allowlist = ip_allowlist;
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper for registering query cancellation tokens.
|
||||
pub(crate) struct Session<P> {
|
||||
pub(crate) struct Session {
|
||||
/// The user-facing key identifying this session.
|
||||
key: CancelKeyData,
|
||||
/// The [`CancelMap`] this session belongs to.
|
||||
cancellation_handler: Arc<CancellationHandler<P>>,
|
||||
redis_key: String,
|
||||
cancellation_handler: Arc<CancellationHandler>,
|
||||
}
|
||||
|
||||
impl<P> Session<P> {
|
||||
/// Store the cancel token for the given session.
|
||||
/// This enables query cancellation in `crate::proxy::prepare_client_connection`.
|
||||
pub(crate) fn enable_query_cancellation(&self, cancel_closure: CancelClosure) -> CancelKeyData {
|
||||
debug!("enabling query cancellation for this session");
|
||||
self.cancellation_handler
|
||||
.map
|
||||
.insert(self.key, Some(cancel_closure));
|
||||
|
||||
self.key
|
||||
impl Session {
|
||||
pub(crate) fn key(&self) -> &CancelKeyData {
|
||||
&self.key
|
||||
}
|
||||
}
|
||||
|
||||
impl<P> Drop for Session<P> {
|
||||
fn drop(&mut self) {
|
||||
self.cancellation_handler.map.remove(&self.key);
|
||||
debug!("dropped query cancellation key {}", &self.key);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[expect(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use super::*;
|
||||
use crate::config::RetryConfig;
|
||||
use crate::tls::client_config::compute_client_config_with_certs;
|
||||
|
||||
fn config() -> ComputeConfig {
|
||||
let retry = RetryConfig {
|
||||
base_delay: Duration::from_secs(1),
|
||||
max_retries: 5,
|
||||
backoff_factor: 2.0,
|
||||
// Send the store key op to the cancellation handler
|
||||
pub(crate) async fn write_cancel_key(
|
||||
&self,
|
||||
cancel_closure: CancelClosure,
|
||||
) -> Result<(), CancelError> {
|
||||
let Some(tx) = &self.cancellation_handler.tx else {
|
||||
tracing::warn!("cancellation handler is not available");
|
||||
return Err(CancelError::InternalError);
|
||||
};
|
||||
|
||||
ComputeConfig {
|
||||
retry,
|
||||
tls: Arc::new(compute_client_config_with_certs(std::iter::empty())),
|
||||
timeout: Duration::from_secs(2),
|
||||
}
|
||||
}
|
||||
let closure_json = serde_json::to_string(&cancel_closure).map_err(|e| {
|
||||
tracing::warn!("failed to serialize cancel closure: {e}");
|
||||
CancelError::InternalError
|
||||
})?;
|
||||
|
||||
#[tokio::test]
|
||||
async fn check_session_drop() -> anyhow::Result<()> {
|
||||
let cancellation_handler = Arc::new(CancellationHandler::<()>::new(
|
||||
Box::leak(Box::new(config())),
|
||||
CancelMap::default(),
|
||||
CancellationSource::FromRedis,
|
||||
));
|
||||
|
||||
let session = cancellation_handler.clone().get_session();
|
||||
assert!(cancellation_handler.contains(&session));
|
||||
drop(session);
|
||||
// Check that the session has been dropped.
|
||||
assert!(cancellation_handler.is_empty());
|
||||
let op = CancelKeyOp::StoreCancelKey {
|
||||
key: self.redis_key.clone(),
|
||||
field: "data".to_string(),
|
||||
value: closure_json,
|
||||
resp_tx: None,
|
||||
_guard: Metrics::get()
|
||||
.proxy
|
||||
.cancel_channel_size
|
||||
.guard(RedisMsgKind::HSet),
|
||||
expire: CANCEL_KEY_TTL,
|
||||
};
|
||||
|
||||
let _ = tx.send_timeout(op, REDIS_SEND_TIMEOUT).await.map_err(|e| {
|
||||
let key = self.key;
|
||||
tracing::warn!("failed to send StoreCancelKey for {key}: {e}");
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn cancel_session_noop_regression() {
|
||||
let handler = CancellationHandler::<()>::new(
|
||||
Box::leak(Box::new(config())),
|
||||
CancelMap::default(),
|
||||
CancellationSource::Local,
|
||||
);
|
||||
handler
|
||||
.cancel_session(
|
||||
CancelKeyData {
|
||||
backend_pid: 0,
|
||||
cancel_key: 0,
|
||||
},
|
||||
Uuid::new_v4(),
|
||||
"127.0.0.1".parse().unwrap(),
|
||||
true,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
pub(crate) async fn remove_cancel_key(&self) -> Result<(), CancelError> {
|
||||
let Some(tx) = &self.cancellation_handler.tx else {
|
||||
tracing::warn!("cancellation handler is not available");
|
||||
return Err(CancelError::InternalError);
|
||||
};
|
||||
|
||||
let op = CancelKeyOp::RemoveCancelKey {
|
||||
key: self.redis_key.clone(),
|
||||
field: "data".to_string(),
|
||||
resp_tx: None,
|
||||
_guard: Metrics::get()
|
||||
.proxy
|
||||
.cancel_channel_size
|
||||
.guard(RedisMsgKind::HSet),
|
||||
};
|
||||
|
||||
let _ = tx.send_timeout(op, REDIS_SEND_TIMEOUT).await.map_err(|e| {
|
||||
let key = self.key;
|
||||
tracing::warn!("failed to send RemoveCancelKey for {key}: {e}");
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -296,7 +296,6 @@ impl ConnCfg {
|
||||
process_id,
|
||||
secret_key,
|
||||
},
|
||||
vec![], // TODO: deprecated, will be removed
|
||||
host.to_string(),
|
||||
user_info,
|
||||
);
|
||||
|
||||
@@ -6,7 +6,7 @@ use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, error, info, Instrument};
|
||||
|
||||
use crate::auth::backend::ConsoleRedirectBackend;
|
||||
use crate::cancellation::{CancellationHandlerMain, CancellationHandlerMainInternal};
|
||||
use crate::cancellation::CancellationHandler;
|
||||
use crate::config::{ProxyConfig, ProxyProtocolV2};
|
||||
use crate::context::RequestContext;
|
||||
use crate::error::ReportableError;
|
||||
@@ -24,7 +24,7 @@ pub async fn task_main(
|
||||
backend: &'static ConsoleRedirectBackend,
|
||||
listener: tokio::net::TcpListener,
|
||||
cancellation_token: CancellationToken,
|
||||
cancellation_handler: Arc<CancellationHandlerMain>,
|
||||
cancellation_handler: Arc<CancellationHandler>,
|
||||
) -> anyhow::Result<()> {
|
||||
scopeguard::defer! {
|
||||
info!("proxy has shut down");
|
||||
@@ -140,15 +140,16 @@ pub async fn task_main(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
config: &'static ProxyConfig,
|
||||
backend: &'static ConsoleRedirectBackend,
|
||||
ctx: &RequestContext,
|
||||
cancellation_handler: Arc<CancellationHandlerMain>,
|
||||
cancellation_handler: Arc<CancellationHandler>,
|
||||
stream: S,
|
||||
conn_gauge: NumClientConnectionsGuard<'static>,
|
||||
cancellations: tokio_util::task::task_tracker::TaskTracker,
|
||||
) -> Result<Option<ProxyPassthrough<CancellationHandlerMainInternal, S>>, ClientRequestError> {
|
||||
) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {
|
||||
debug!(
|
||||
protocol = %ctx.protocol(),
|
||||
"handling interactive connection from client"
|
||||
@@ -171,13 +172,13 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
HandshakeData::Cancel(cancel_key_data) => {
|
||||
// spawn a task to cancel the session, but don't wait for it
|
||||
cancellations.spawn({
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
let ctx = ctx.clone();
|
||||
let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
|
||||
cancel_span.follows_from(tracing::Span::current());
|
||||
async move {
|
||||
cancellation_handler_clone
|
||||
.cancel_session_auth(
|
||||
.cancel_session(
|
||||
cancel_key_data,
|
||||
ctx,
|
||||
config.authentication_config.ip_allowlist_check_enabled,
|
||||
@@ -195,7 +196,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
|
||||
ctx.set_db_options(params.clone());
|
||||
|
||||
let (node_info, user_info, ip_allowlist) = match backend
|
||||
let (node_info, user_info, _ip_allowlist) = match backend
|
||||
.authenticate(ctx, &config.authentication_config, &mut stream)
|
||||
.await
|
||||
{
|
||||
@@ -220,10 +221,14 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
.or_else(|e| stream.throw_error(e))
|
||||
.await?;
|
||||
|
||||
node.cancel_closure
|
||||
.set_ip_allowlist(ip_allowlist.unwrap_or_default());
|
||||
let session = cancellation_handler.get_session();
|
||||
prepare_client_connection(&node, &session, &mut stream).await?;
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
let session = cancellation_handler_clone.get_key();
|
||||
|
||||
session
|
||||
.write_cancel_key(node.cancel_closure.clone())
|
||||
.await?;
|
||||
|
||||
prepare_client_connection(&node, *session.key(), &mut stream).await?;
|
||||
|
||||
// Before proxy passing, forward to compute whatever data is left in the
|
||||
// PqStream input buffer. Normally there is none, but our serverless npm
|
||||
@@ -237,8 +242,8 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
aux: node.aux.clone(),
|
||||
compute: node,
|
||||
session_id: ctx.session_id(),
|
||||
cancel: session,
|
||||
_req: request_gauge,
|
||||
_conn: conn_gauge,
|
||||
_cancel: session,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -423,11 +423,11 @@ async fn upload_parquet(
|
||||
.await
|
||||
.ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
|
||||
.and_then(|x| x)
|
||||
.context("request_data_upload")
|
||||
.with_context(|| format!("request_data_upload: path={path}"))
|
||||
.err();
|
||||
|
||||
if let Some(err) = maybe_err {
|
||||
tracing::error!(%id, error = ?err, "failed to upload request data");
|
||||
tracing::error!(%id, %path, error = ?err, "failed to upload request data");
|
||||
}
|
||||
|
||||
Ok(buffer.writer())
|
||||
|
||||
@@ -56,6 +56,8 @@ pub struct ProxyMetrics {
|
||||
pub connection_requests: CounterPairVec<NumConnectionRequestsGauge>,
|
||||
#[metric(flatten)]
|
||||
pub http_endpoint_pools: HttpEndpointPools,
|
||||
#[metric(flatten)]
|
||||
pub cancel_channel_size: CounterPairVec<CancelChannelSizeGauge>,
|
||||
|
||||
/// Time it took for proxy to establish a connection to the compute endpoint.
|
||||
// largest bucket = 2^16 * 0.5ms = 32s
|
||||
@@ -294,6 +296,16 @@ impl CounterPairAssoc for NumConnectionRequestsGauge {
|
||||
pub type NumConnectionRequestsGuard<'a> =
|
||||
metrics::MeasuredCounterPairGuard<'a, NumConnectionRequestsGauge>;
|
||||
|
||||
pub struct CancelChannelSizeGauge;
|
||||
impl CounterPairAssoc for CancelChannelSizeGauge {
|
||||
const INC_NAME: &'static MetricName = MetricName::from_str("opened_msgs_cancel_channel_total");
|
||||
const DEC_NAME: &'static MetricName = MetricName::from_str("closed_msgs_cancel_channel_total");
|
||||
const INC_HELP: &'static str = "Number of processing messages in the cancellation channel.";
|
||||
const DEC_HELP: &'static str = "Number of closed messages in the cancellation channel.";
|
||||
type LabelGroupSet = StaticLabelSet<RedisMsgKind>;
|
||||
}
|
||||
pub type CancelChannelSizeGuard<'a> = metrics::MeasuredCounterPairGuard<'a, CancelChannelSizeGauge>;
|
||||
|
||||
#[derive(LabelGroup)]
|
||||
#[label(set = ComputeConnectionLatencySet)]
|
||||
pub struct ComputeConnectionLatencyGroup {
|
||||
@@ -340,13 +352,6 @@ pub struct RedisErrors<'a> {
|
||||
pub channel: &'a str,
|
||||
}
|
||||
|
||||
#[derive(FixedCardinalityLabel, Copy, Clone)]
|
||||
pub enum CancellationSource {
|
||||
FromClient,
|
||||
FromRedis,
|
||||
Local,
|
||||
}
|
||||
|
||||
#[derive(FixedCardinalityLabel, Copy, Clone)]
|
||||
pub enum CancellationOutcome {
|
||||
NotFound,
|
||||
@@ -357,7 +362,6 @@ pub enum CancellationOutcome {
|
||||
#[derive(LabelGroup)]
|
||||
#[label(set = CancellationRequestSet)]
|
||||
pub struct CancellationRequest {
|
||||
pub source: CancellationSource,
|
||||
pub kind: CancellationOutcome,
|
||||
}
|
||||
|
||||
@@ -369,6 +373,16 @@ pub enum Waiting {
|
||||
RetryTimeout,
|
||||
}
|
||||
|
||||
#[derive(FixedCardinalityLabel, Copy, Clone)]
|
||||
#[label(singleton = "kind")]
|
||||
pub enum RedisMsgKind {
|
||||
HSet,
|
||||
HSetMultiple,
|
||||
HGet,
|
||||
HGetAll,
|
||||
HDel,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct Accumulated {
|
||||
cplane: time::Duration,
|
||||
|
||||
@@ -13,8 +13,9 @@ pub use copy_bidirectional::{copy_bidirectional_client_compute, ErrorSource};
|
||||
use futures::{FutureExt, TryFutureExt};
|
||||
use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
use pq_proto::{BeMessage as Be, StartupMessageParams};
|
||||
use pq_proto::{BeMessage as Be, CancelKeyData, StartupMessageParams};
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smol_str::{format_smolstr, SmolStr};
|
||||
use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
|
||||
@@ -23,7 +24,7 @@ use tracing::{debug, error, info, warn, Instrument};
|
||||
|
||||
use self::connect_compute::{connect_to_compute, TcpMechanism};
|
||||
use self::passthrough::ProxyPassthrough;
|
||||
use crate::cancellation::{self, CancellationHandlerMain, CancellationHandlerMainInternal};
|
||||
use crate::cancellation::{self, CancellationHandler};
|
||||
use crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig};
|
||||
use crate::context::RequestContext;
|
||||
use crate::error::ReportableError;
|
||||
@@ -57,7 +58,7 @@ pub async fn task_main(
|
||||
auth_backend: &'static auth::Backend<'static, ()>,
|
||||
listener: tokio::net::TcpListener,
|
||||
cancellation_token: CancellationToken,
|
||||
cancellation_handler: Arc<CancellationHandlerMain>,
|
||||
cancellation_handler: Arc<CancellationHandler>,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> anyhow::Result<()> {
|
||||
scopeguard::defer! {
|
||||
@@ -243,13 +244,13 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
config: &'static ProxyConfig,
|
||||
auth_backend: &'static auth::Backend<'static, ()>,
|
||||
ctx: &RequestContext,
|
||||
cancellation_handler: Arc<CancellationHandlerMain>,
|
||||
cancellation_handler: Arc<CancellationHandler>,
|
||||
stream: S,
|
||||
mode: ClientMode,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
conn_gauge: NumClientConnectionsGuard<'static>,
|
||||
cancellations: tokio_util::task::task_tracker::TaskTracker,
|
||||
) -> Result<Option<ProxyPassthrough<CancellationHandlerMainInternal, S>>, ClientRequestError> {
|
||||
) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {
|
||||
debug!(
|
||||
protocol = %ctx.protocol(),
|
||||
"handling interactive connection from client"
|
||||
@@ -278,7 +279,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
cancel_span.follows_from(tracing::Span::current());
|
||||
async move {
|
||||
cancellation_handler_clone
|
||||
.cancel_session_auth(
|
||||
.cancel_session(
|
||||
cancel_key_data,
|
||||
ctx,
|
||||
config.authentication_config.ip_allowlist_check_enabled,
|
||||
@@ -312,7 +313,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
};
|
||||
|
||||
let user = user_info.get_user().to_owned();
|
||||
let (user_info, ip_allowlist) = match user_info
|
||||
let (user_info, _ip_allowlist) = match user_info
|
||||
.authenticate(
|
||||
ctx,
|
||||
&mut stream,
|
||||
@@ -356,10 +357,14 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
.or_else(|e| stream.throw_error(e))
|
||||
.await?;
|
||||
|
||||
node.cancel_closure
|
||||
.set_ip_allowlist(ip_allowlist.unwrap_or_default());
|
||||
let session = cancellation_handler.get_session();
|
||||
prepare_client_connection(&node, &session, &mut stream).await?;
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
let session = cancellation_handler_clone.get_key();
|
||||
|
||||
session
|
||||
.write_cancel_key(node.cancel_closure.clone())
|
||||
.await?;
|
||||
|
||||
prepare_client_connection(&node, *session.key(), &mut stream).await?;
|
||||
|
||||
// Before proxy passing, forward to compute whatever data is left in the
|
||||
// PqStream input buffer. Normally there is none, but our serverless npm
|
||||
@@ -373,23 +378,19 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
aux: node.aux.clone(),
|
||||
compute: node,
|
||||
session_id: ctx.session_id(),
|
||||
cancel: session,
|
||||
_req: request_gauge,
|
||||
_conn: conn_gauge,
|
||||
_cancel: session,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Finish client connection initialization: confirm auth success, send params, etc.
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub(crate) async fn prepare_client_connection<P>(
|
||||
pub(crate) async fn prepare_client_connection(
|
||||
node: &compute::PostgresConnection,
|
||||
session: &cancellation::Session<P>,
|
||||
cancel_key_data: CancelKeyData,
|
||||
stream: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
) -> Result<(), std::io::Error> {
|
||||
// Register compute's query cancellation token and produce a new, unique one.
|
||||
// The new token (cancel_key_data) will be sent to the client.
|
||||
let cancel_key_data = session.enable_query_cancellation(node.cancel_closure.clone());
|
||||
|
||||
// Forward all deferred notices to the client.
|
||||
for notice in &node.delayed_notice {
|
||||
stream.write_message_noflush(&Be::Raw(b'N', notice.as_bytes()))?;
|
||||
@@ -411,7 +412,7 @@ pub(crate) async fn prepare_client_connection<P>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
|
||||
pub(crate) struct NeonOptions(Vec<(SmolStr, SmolStr)>);
|
||||
|
||||
impl NeonOptions {
|
||||
|
||||
@@ -56,18 +56,18 @@ pub(crate) async fn proxy_pass(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) struct ProxyPassthrough<P, S> {
|
||||
pub(crate) struct ProxyPassthrough<S> {
|
||||
pub(crate) client: Stream<S>,
|
||||
pub(crate) compute: PostgresConnection,
|
||||
pub(crate) aux: MetricsAuxInfo,
|
||||
pub(crate) session_id: uuid::Uuid,
|
||||
pub(crate) cancel: cancellation::Session,
|
||||
|
||||
pub(crate) _req: NumConnectionRequestsGuard<'static>,
|
||||
pub(crate) _conn: NumClientConnectionsGuard<'static>,
|
||||
pub(crate) _cancel: cancellation::Session<P>,
|
||||
}
|
||||
|
||||
impl<P, S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<P, S> {
|
||||
impl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {
|
||||
pub(crate) async fn proxy_pass(
|
||||
self,
|
||||
compute_config: &ComputeConfig,
|
||||
@@ -81,6 +81,9 @@ impl<P, S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<P, S> {
|
||||
{
|
||||
tracing::warn!(session_id = ?self.session_id, ?err, "could not cancel the query in the database");
|
||||
}
|
||||
|
||||
drop(self.cancel.remove_cancel_key().await); // we don't need a result. If the queue is full, we just log the error
|
||||
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,6 +138,12 @@ impl RateBucketInfo {
|
||||
Self::new(200, Duration::from_secs(600)),
|
||||
];
|
||||
|
||||
// For all the sessions will be cancel key. So this limit is essentially global proxy limit.
|
||||
pub const DEFAULT_REDIS_SET: [Self; 2] = [
|
||||
Self::new(100_000, Duration::from_secs(1)),
|
||||
Self::new(50_000, Duration::from_secs(10)),
|
||||
];
|
||||
|
||||
/// All of these are per endpoint-maskedip pair.
|
||||
/// Context: 4096 rounds of pbkdf2 take about 1ms of cpu time to execute (1 milli-cpu-second or 1mcpus).
|
||||
///
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user