mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-23 21:30:36 +00:00
Compare commits
1 Commits
vlad/debug
...
arpad/remo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3bc3f71418 |
@@ -19,7 +19,7 @@
|
||||
!pageserver/
|
||||
!pgxn/
|
||||
!proxy/
|
||||
!endpoint_storage/
|
||||
!object_storage/
|
||||
!storage_scrubber/
|
||||
!safekeeper/
|
||||
!storage_broker/
|
||||
|
||||
14
.github/actions/run-python-test-set/action.yml
vendored
14
.github/actions/run-python-test-set/action.yml
vendored
@@ -113,6 +113,8 @@ runs:
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: ${{ inputs.build_type }}
|
||||
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
|
||||
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
|
||||
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
|
||||
RERUN_FAILED: ${{ inputs.rerun_failed }}
|
||||
PG_VERSION: ${{ inputs.pg_version }}
|
||||
SANITIZERS: ${{ inputs.sanitizers }}
|
||||
@@ -133,7 +135,6 @@ runs:
|
||||
fi
|
||||
|
||||
PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
|
||||
echo "PERF_REPORT_DIR=${PERF_REPORT_DIR}" >> ${GITHUB_ENV}
|
||||
rm -rf $PERF_REPORT_DIR
|
||||
|
||||
TEST_SELECTION="test_runner/${{ inputs.test_selection }}"
|
||||
@@ -210,12 +211,11 @@ runs:
|
||||
--verbose \
|
||||
-rA $TEST_SELECTION $EXTRA_PARAMS
|
||||
|
||||
- name: Upload performance report
|
||||
if: ${{ !cancelled() && inputs.save_perf_report == 'true' }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
export REPORT_FROM="${PERF_REPORT_DIR}"
|
||||
scripts/generate_and_push_perf_report.sh
|
||||
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
|
||||
export REPORT_FROM="$PERF_REPORT_DIR"
|
||||
export REPORT_TO="$PLATFORM"
|
||||
scripts/generate_and_push_perf_report.sh
|
||||
fi
|
||||
|
||||
- name: Upload compatibility snapshot
|
||||
# Note, that we use `github.base_ref` which is a target branch for a PR
|
||||
|
||||
14
.github/workflows/_build-and-test-locally.yml
vendored
14
.github/workflows/_build-and-test-locally.yml
vendored
@@ -272,13 +272,10 @@ jobs:
|
||||
# run pageserver tests with different settings
|
||||
for get_vectored_concurrent_io in sequential sidecar-task; do
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
for io_mode in buffered direct direct-rw ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE=$io_mode \
|
||||
${cov_prefix} \
|
||||
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
done
|
||||
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
|
||||
${cov_prefix} \
|
||||
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
done
|
||||
done
|
||||
|
||||
@@ -349,7 +346,7 @@ jobs:
|
||||
contents: read
|
||||
statuses: write
|
||||
needs: [ build-neon ]
|
||||
runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large-metal')) }}
|
||||
runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image }}
|
||||
credentials:
|
||||
@@ -395,7 +392,6 @@ jobs:
|
||||
BUILD_TAG: ${{ inputs.build-tag }}
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
|
||||
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
|
||||
11
.github/workflows/_create-release-pr.yml
vendored
11
.github/workflows/_create-release-pr.yml
vendored
@@ -53,13 +53,10 @@ jobs:
|
||||
|| inputs.component-name == 'Compute' && 'release-compute'
|
||||
}}
|
||||
run: |
|
||||
now_date=$(date -u +'%Y-%m-%d')
|
||||
now_time=$(date -u +'%H-%M-%Z')
|
||||
{
|
||||
echo "title=${COMPONENT_NAME} release ${now_date}"
|
||||
echo "rc-branch=rc/${RELEASE_BRANCH}/${now_date}_${now_time}"
|
||||
echo "release-branch=${RELEASE_BRANCH}"
|
||||
} | tee -a ${GITHUB_OUTPUT}
|
||||
today=$(date +'%Y-%m-%d')
|
||||
echo "title=${COMPONENT_NAME} release ${today}" | tee -a ${GITHUB_OUTPUT}
|
||||
echo "rc-branch=rc/${RELEASE_BRANCH}/${today}" | tee -a ${GITHUB_OUTPUT}
|
||||
echo "release-branch=${RELEASE_BRANCH}" | tee -a ${GITHUB_OUTPUT}
|
||||
|
||||
- name: Configure git
|
||||
run: |
|
||||
|
||||
2
.github/workflows/_meta.yml
vendored
2
.github/workflows/_meta.yml
vendored
@@ -165,5 +165,5 @@ jobs:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
CURRENT_SHA: ${{ github.sha }}
|
||||
run: |
|
||||
RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release.*$"; "s"))] | first | .id // ("Failed to find Build and Test run from RC PR!" | halt_error(1))')
|
||||
RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy|compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Failed to find Build and Test run from RC PR!" | halt_error(1))')
|
||||
echo "release-pr-run-id=$RELEASE_PR_RUN_ID" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
63
.github/workflows/build-macos.yml
vendored
63
.github/workflows/build-macos.yml
vendored
@@ -63,8 +63,13 @@ jobs:
|
||||
|
||||
- name: Cache postgres ${{ matrix.postgres-version }} build
|
||||
id: cache_pg
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/${{ matrix.postgres-version }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
@@ -129,15 +134,25 @@ jobs:
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
@@ -203,32 +218,57 @@ jobs:
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v14-${{ steps.pg_rev_v14.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_v15
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v15-${{ steps.pg_rev_v15.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_v16
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v16-${{ steps.pg_rev_v16.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_v17
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache cargo deps (only for v17)
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
@@ -238,8 +278,13 @@ jobs:
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
|
||||
4
.github/workflows/build_and_test.yml
vendored
4
.github/workflows/build_and_test.yml
vendored
@@ -323,8 +323,6 @@ jobs:
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
|
||||
SYNC_BETWEEN_TESTS: true
|
||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||
# while coverage is currently collected for the debug ones
|
||||
@@ -1238,7 +1236,7 @@ jobs:
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
TIMEOUT=5400 # 90 minutes, usually it takes ~2-3 minutes, but if runners are busy, it might take longer
|
||||
TIMEOUT=1800 # 30 minutes, usually it takes ~2-3 minutes, but if runners are busy, it might take longer
|
||||
INTERVAL=15 # try each N seconds
|
||||
|
||||
last_status="" # a variable to carry the last status of the "build-and-upload-extensions" context
|
||||
|
||||
8
.github/workflows/fast-forward.yml
vendored
8
.github/workflows/fast-forward.yml
vendored
@@ -27,17 +27,15 @@ jobs:
|
||||
- name: Fast forwarding
|
||||
uses: sequoia-pgp/fast-forward@ea7628bedcb0b0b96e94383ada458d812fca4979
|
||||
# See https://docs.github.com/en/graphql/reference/enums#mergestatestatus
|
||||
if: ${{ contains(fromJSON('["clean", "unstable"]'), github.event.pull_request.mergeable_state) }}
|
||||
if: ${{ github.event.pull_request.mergeable_state == 'clean' }}
|
||||
with:
|
||||
merge: true
|
||||
comment: on-error
|
||||
github_token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
- name: Comment if mergeable_state is not clean
|
||||
if: ${{ !contains(fromJSON('["clean", "unstable"]'), github.event.pull_request.mergeable_state) }}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
if: ${{ github.event.pull_request.mergeable_state != 'clean' }}
|
||||
run: |
|
||||
gh pr comment ${{ github.event.pull_request.number }} \
|
||||
--repo "${GITHUB_REPOSITORY}" \
|
||||
--body "Not trying to forward pull-request, because \`mergeable_state\` is \`${{ github.event.pull_request.mergeable_state }}\`, not \`clean\` or \`unstable\`."
|
||||
--body "Not trying to forward pull-request, because \`mergeable_state\` is \`${{ github.event.pull_request.mergeable_state }}\`, not \`clean\`."
|
||||
|
||||
4
.github/workflows/pg-clients.yml
vendored
4
.github/workflows/pg-clients.yml
vendored
@@ -30,7 +30,7 @@ permissions:
|
||||
statuses: write # require for posting a status update
|
||||
|
||||
env:
|
||||
DEFAULT_PG_VERSION: 17
|
||||
DEFAULT_PG_VERSION: 16
|
||||
PLATFORM: neon-captest-new
|
||||
AWS_DEFAULT_REGION: eu-central-1
|
||||
|
||||
@@ -42,8 +42,6 @@ jobs:
|
||||
github-event-name: ${{ github.event_name }}
|
||||
|
||||
build-build-tools-image:
|
||||
permissions:
|
||||
packages: write
|
||||
needs: [ check-permissions ]
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
secrets: inherit
|
||||
|
||||
93
.github/workflows/random-ops-test.yml
vendored
93
.github/workflows/random-ops-test.yml
vendored
@@ -1,93 +0,0 @@
|
||||
name: Random Operations Test
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '23 */2 * * *' # runs every 2 hours
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
random_seed:
|
||||
type: number
|
||||
description: 'The random seed'
|
||||
required: false
|
||||
default: 0
|
||||
num_operations:
|
||||
type: number
|
||||
description: "The number of operations to test"
|
||||
default: 250
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
permissions: {}
|
||||
|
||||
env:
|
||||
DEFAULT_PG_VERSION: 16
|
||||
PLATFORM: neon-captest-new
|
||||
AWS_DEFAULT_REGION: eu-central-1
|
||||
|
||||
jobs:
|
||||
run-random-rests:
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
runs-on: small
|
||||
permissions:
|
||||
id-token: write
|
||||
statuses: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
pg-version: [16, 17]
|
||||
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Run tests
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: remote
|
||||
test_selection: random_ops
|
||||
run_in_parallel: false
|
||||
extra_params: -m remote_cluster
|
||||
pg_version: ${{ matrix.pg-version }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
RANDOM_SEED: ${{ inputs.random_seed }}
|
||||
NUM_OPERATIONS: ${{ inputs.num_operations }}
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
id: create-allure-report
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
161
Cargo.lock
generated
161
Cargo.lock
generated
@@ -40,7 +40,7 @@ dependencies = [
|
||||
"getrandom 0.2.11",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
"zerocopy 0.7.31",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1323,6 +1323,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"signal-hook",
|
||||
"spki 0.7.3",
|
||||
"tar",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
@@ -1415,7 +1416,6 @@ name = "control_plane"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64 0.13.1",
|
||||
"camino",
|
||||
"clap",
|
||||
"comfy-table",
|
||||
@@ -1425,12 +1425,10 @@ dependencies = [
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
"hyper 0.14.30",
|
||||
"jsonwebtoken",
|
||||
"nix 0.27.1",
|
||||
"once_cell",
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"pem",
|
||||
"postgres_backend",
|
||||
"postgres_connection",
|
||||
"regex",
|
||||
@@ -1439,8 +1437,6 @@ dependencies = [
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"spki 0.7.3",
|
||||
"storage_broker",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
@@ -2036,33 +2032,6 @@ dependencies = [
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "endpoint_storage"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"axum",
|
||||
"axum-extra",
|
||||
"camino",
|
||||
"camino-tempfile",
|
||||
"futures",
|
||||
"http-body-util",
|
||||
"itertools 0.10.5",
|
||||
"jsonwebtoken",
|
||||
"prometheus",
|
||||
"rand 0.8.5",
|
||||
"remote_storage",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"test-log",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tower 0.5.2",
|
||||
"tracing",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enum-map"
|
||||
version = "2.5.0"
|
||||
@@ -2848,7 +2817,6 @@ dependencies = [
|
||||
"hyper 0.14.30",
|
||||
"itertools 0.10.5",
|
||||
"jemalloc_pprof",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
"pprof",
|
||||
@@ -4024,6 +3992,33 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object_storage"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"axum",
|
||||
"axum-extra",
|
||||
"camino",
|
||||
"camino-tempfile",
|
||||
"futures",
|
||||
"http-body-util",
|
||||
"itertools 0.10.5",
|
||||
"jsonwebtoken",
|
||||
"prometheus",
|
||||
"rand 0.8.5",
|
||||
"remote_storage",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"test-log",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tower 0.5.2",
|
||||
"tracing",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.20.2"
|
||||
@@ -4274,7 +4269,6 @@ dependencies = [
|
||||
"hyper 0.14.30",
|
||||
"indoc",
|
||||
"itertools 0.10.5",
|
||||
"jsonwebtoken",
|
||||
"md5",
|
||||
"metrics",
|
||||
"nix 0.27.1",
|
||||
@@ -4284,7 +4278,6 @@ dependencies = [
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"pageserver_compaction",
|
||||
"pem",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol",
|
||||
"postgres-types",
|
||||
@@ -4301,7 +4294,6 @@ dependencies = [
|
||||
"remote_storage",
|
||||
"reqwest",
|
||||
"rpds",
|
||||
"rstest",
|
||||
"rustls 0.23.18",
|
||||
"scopeguard",
|
||||
"send-future",
|
||||
@@ -4353,7 +4345,6 @@ dependencies = [
|
||||
"humantime-serde",
|
||||
"itertools 0.10.5",
|
||||
"nix 0.27.1",
|
||||
"once_cell",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
"rand 0.8.5",
|
||||
@@ -4415,9 +4406,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "papaya"
|
||||
version = "0.2.1"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6827e3fc394523c21d4464d02c0bb1c19966ea4a58a9844ad6d746214179d2bc"
|
||||
checksum = "aab21828b6b5952fdadd6c377728ffae53ec3a21b2febc47319ab65741f7e2fd"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"seize",
|
||||
@@ -5204,7 +5195,7 @@ dependencies = [
|
||||
"walkdir",
|
||||
"workspace_hack",
|
||||
"x509-cert",
|
||||
"zerocopy 0.8.24",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5504,6 +5495,17 @@ version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c707298afce11da2efef2f600116fa93ffa7a032b5d7b628aa17711ec81383ca"
|
||||
|
||||
[[package]]
|
||||
name = "remote_keys"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"aws-config",
|
||||
"aws-sdk-kms",
|
||||
"aws-smithy-types",
|
||||
"utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "remote_storage"
|
||||
version = "0.1.0"
|
||||
@@ -5594,7 +5596,7 @@ dependencies = [
|
||||
"wasm-bindgen-futures",
|
||||
"wasm-streams",
|
||||
"web-sys",
|
||||
"webpki-roots",
|
||||
"webpki-roots 0.26.1",
|
||||
"winreg",
|
||||
]
|
||||
|
||||
@@ -5694,9 +5696,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.14"
|
||||
version = "0.17.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
|
||||
checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
@@ -5997,12 +5999,10 @@ dependencies = [
|
||||
"humantime",
|
||||
"hyper 0.14.30",
|
||||
"itertools 0.10.5",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
"pageserver_api",
|
||||
"parking_lot 0.12.1",
|
||||
"pem",
|
||||
"postgres-protocol",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
@@ -6195,13 +6195,13 @@ checksum = "224e328af6e080cddbab3c770b1cf50f0351ba0577091ef2410c3951d835ff87"
|
||||
|
||||
[[package]]
|
||||
name = "sentry"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "255914a8e53822abd946e2ce8baa41d4cded6b8e938913b7f7b9da5b7ab44335"
|
||||
checksum = "00421ed8fa0c995f07cde48ba6c89e80f2b312f74ff637326f392fbfd23abe02"
|
||||
dependencies = [
|
||||
"httpdate",
|
||||
"reqwest",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.21.12",
|
||||
"sentry-backtrace",
|
||||
"sentry-contexts",
|
||||
"sentry-core",
|
||||
@@ -6209,14 +6209,14 @@ dependencies = [
|
||||
"sentry-tracing",
|
||||
"tokio",
|
||||
"ureq",
|
||||
"webpki-roots",
|
||||
"webpki-roots 0.25.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-backtrace"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "00293cd332a859961f24fd69258f7e92af736feaeb91020cff84dac4188a4302"
|
||||
checksum = "a79194074f34b0cbe5dd33896e5928bbc6ab63a889bd9df2264af5acb186921e"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"once_cell",
|
||||
@@ -6226,9 +6226,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-contexts"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "961990f9caa76476c481de130ada05614cd7f5aa70fb57c2142f0e09ad3fb2aa"
|
||||
checksum = "eba8870c5dba2bfd9db25c75574a11429f6b95957b0a78ac02e2970dd7a5249a"
|
||||
dependencies = [
|
||||
"hostname",
|
||||
"libc",
|
||||
@@ -6240,9 +6240,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-core"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a6409d845707d82415c800290a5d63be5e3df3c2e417b0997c60531dfbd35ef"
|
||||
checksum = "46a75011ea1c0d5c46e9e57df03ce81f5c7f0a9e199086334a1f9c0a541e0826"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"rand 0.8.5",
|
||||
@@ -6253,9 +6253,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-panic"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "609b1a12340495ce17baeec9e08ff8ed423c337c1a84dffae36a178c783623f3"
|
||||
checksum = "2eaa3ecfa3c8750c78dcfd4637cfa2598b95b52897ed184b4dc77fcf7d95060d"
|
||||
dependencies = [
|
||||
"sentry-backtrace",
|
||||
"sentry-core",
|
||||
@@ -6263,9 +6263,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-tracing"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49f4e86402d5c50239dc7d8fd3f6d5e048221d5fcb4e026d8d50ab57fe4644cb"
|
||||
checksum = "f715932bf369a61b7256687c6f0554141b7ce097287e30e3f7ed6e9de82498fe"
|
||||
dependencies = [
|
||||
"sentry-backtrace",
|
||||
"sentry-core",
|
||||
@@ -6275,9 +6275,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-types"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d3f117b8755dbede8260952de2aeb029e20f432e72634e8969af34324591631"
|
||||
checksum = "4519c900ce734f7a0eb7aba0869dfb225a7af8820634a7dd51449e3b093cfb7c"
|
||||
dependencies = [
|
||||
"debugid",
|
||||
"hex",
|
||||
@@ -6711,6 +6711,8 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-stream",
|
||||
"aws-config",
|
||||
"aws-sdk-s3",
|
||||
"camino",
|
||||
"chrono",
|
||||
"clap",
|
||||
@@ -7799,7 +7801,7 @@ dependencies = [
|
||||
"rustls 0.23.18",
|
||||
"rustls-pki-types",
|
||||
"url",
|
||||
"webpki-roots",
|
||||
"webpki-roots 0.26.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7881,7 +7883,6 @@ dependencies = [
|
||||
"metrics",
|
||||
"nix 0.27.1",
|
||||
"once_cell",
|
||||
"pem",
|
||||
"pin-project-lite",
|
||||
"postgres_connection",
|
||||
"pprof",
|
||||
@@ -8167,6 +8168,12 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.25.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc"
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.26.1"
|
||||
@@ -8474,8 +8481,6 @@ dependencies = [
|
||||
"regex-syntax 0.8.2",
|
||||
"reqwest",
|
||||
"rustls 0.23.18",
|
||||
"rustls-pki-types",
|
||||
"rustls-webpki 0.102.8",
|
||||
"scopeguard",
|
||||
"sec1 0.7.3",
|
||||
"serde",
|
||||
@@ -8504,6 +8509,7 @@ dependencies = [
|
||||
"tracing-log",
|
||||
"url",
|
||||
"uuid",
|
||||
"zerocopy",
|
||||
"zeroize",
|
||||
"zstd",
|
||||
"zstd-safe",
|
||||
@@ -8607,16 +8613,8 @@ version = "0.7.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d"
|
||||
dependencies = [
|
||||
"zerocopy-derive 0.7.31",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.8.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879"
|
||||
dependencies = [
|
||||
"zerocopy-derive 0.8.24",
|
||||
"byteorder",
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8630,17 +8628,6 @@ dependencies = [
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.8.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerofrom"
|
||||
version = "0.1.5"
|
||||
|
||||
@@ -30,6 +30,7 @@ members = [
|
||||
"libs/tenant_size_model",
|
||||
"libs/metrics",
|
||||
"libs/postgres_connection",
|
||||
"libs/remote_keys",
|
||||
"libs/remote_storage",
|
||||
"libs/tracing-utils",
|
||||
"libs/postgres_ffi/wal_craft",
|
||||
@@ -40,7 +41,7 @@ members = [
|
||||
"libs/proxy/postgres-protocol2",
|
||||
"libs/proxy/postgres-types2",
|
||||
"libs/proxy/tokio-postgres2",
|
||||
"endpoint_storage",
|
||||
"object_storage",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
@@ -141,7 +142,6 @@ parking_lot = "0.12"
|
||||
parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "53"
|
||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||
pem = "3.0.3"
|
||||
pin-project-lite = "0.2"
|
||||
pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
|
||||
procfs = "0.16"
|
||||
@@ -164,7 +164,7 @@ scopeguard = "1.1"
|
||||
sysinfo = "0.29.2"
|
||||
sd-notify = "0.4.1"
|
||||
send-future = "0.1.0"
|
||||
sentry = { version = "0.37", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
|
||||
sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_path_to_error = "0.1"
|
||||
@@ -175,7 +175,6 @@ signal-hook = "0.3"
|
||||
smallvec = "1.11"
|
||||
smol_str = { version = "0.2.0", features = ["serde"] }
|
||||
socket2 = "0.5"
|
||||
spki = "0.7.3"
|
||||
strum = "0.26"
|
||||
strum_macros = "0.26"
|
||||
"subtle" = "2.5.0"
|
||||
@@ -220,7 +219,7 @@ uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
|
||||
walkdir = "2.3.2"
|
||||
rustls-native-certs = "0.8"
|
||||
whoami = "1.5.1"
|
||||
zerocopy = { version = "0.8", features = ["derive", "simd"] }
|
||||
zerocopy = { version = "0.7", features = ["derive"] }
|
||||
json-structural-diff = { version = "0.2.0" }
|
||||
x509-cert = { version = "0.2.5" }
|
||||
|
||||
|
||||
@@ -89,7 +89,7 @@ RUN set -e \
|
||||
--bin storage_broker \
|
||||
--bin storage_controller \
|
||||
--bin proxy \
|
||||
--bin endpoint_storage \
|
||||
--bin object_storage \
|
||||
--bin neon_local \
|
||||
--bin storage_scrubber \
|
||||
--locked --release
|
||||
@@ -122,7 +122,7 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_controller /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/endpoint_storage /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/object_storage /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local /usr/local/bin
|
||||
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubber /usr/local/bin
|
||||
|
||||
|
||||
@@ -270,7 +270,7 @@ By default, this runs both debug and release modes, and all supported postgres v
|
||||
testing locally, it is convenient to run just one set of permutations, like this:
|
||||
|
||||
```sh
|
||||
DEFAULT_PG_VERSION=17 BUILD_TYPE=release ./scripts/pytest
|
||||
DEFAULT_PG_VERSION=16 BUILD_TYPE=release ./scripts/pytest
|
||||
```
|
||||
|
||||
## Flamegraphs
|
||||
|
||||
@@ -173,7 +173,7 @@ RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v$
|
||||
&& rm -rf protoc.zip protoc
|
||||
|
||||
# s5cmd
|
||||
ENV S5CMD_VERSION=2.3.0
|
||||
ENV S5CMD_VERSION=2.2.2
|
||||
RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/s5cmd_${S5CMD_VERSION}_Linux-$(uname -m | sed 's/x86_64/64bit/g' | sed 's/aarch64/arm64/g').tar.gz" | tar zxvf - s5cmd \
|
||||
&& chmod +x s5cmd \
|
||||
&& mv s5cmd /usr/local/bin/s5cmd
|
||||
@@ -206,7 +206,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
|
||||
&& rm awscliv2.zip
|
||||
|
||||
# Mold: A Modern Linker
|
||||
ENV MOLD_VERSION=v2.37.1
|
||||
ENV MOLD_VERSION=v2.34.1
|
||||
RUN set -e \
|
||||
&& git clone https://github.com/rui314/mold.git \
|
||||
&& mkdir mold/build \
|
||||
@@ -268,7 +268,7 @@ WORKDIR /home/nonroot
|
||||
RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /home/nonroot/.curlrc
|
||||
|
||||
# Python
|
||||
ENV PYTHON_VERSION=3.11.12 \
|
||||
ENV PYTHON_VERSION=3.11.10 \
|
||||
PYENV_ROOT=/home/nonroot/.pyenv \
|
||||
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
|
||||
RUN set -e \
|
||||
@@ -296,12 +296,12 @@ ENV RUSTC_VERSION=1.86.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
ARG CARGO_HAKARI_VERSION=0.9.36
|
||||
ARG CARGO_DENY_VERSION=0.18.2
|
||||
ARG CARGO_HACK_VERSION=0.6.36
|
||||
ARG CARGO_NEXTEST_VERSION=0.9.94
|
||||
ARG CARGO_HAKARI_VERSION=0.9.33
|
||||
ARG CARGO_DENY_VERSION=0.16.2
|
||||
ARG CARGO_HACK_VERSION=0.6.33
|
||||
ARG CARGO_NEXTEST_VERSION=0.9.85
|
||||
ARG CARGO_CHEF_VERSION=0.1.71
|
||||
ARG CARGO_DIESEL_CLI_VERSION=2.2.9
|
||||
ARG CARGO_DIESEL_CLI_VERSION=2.2.6
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
|
||||
chmod +x rustup-init && \
|
||||
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
|
||||
|
||||
@@ -12,5 +12,3 @@ disallowed-macros = [
|
||||
# cannot disallow this, because clippy finds used from tokio macros
|
||||
#"tokio::pin",
|
||||
]
|
||||
|
||||
allow-unwrap-in-tests = true
|
||||
|
||||
@@ -1677,7 +1677,7 @@ RUN set -e \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
|
||||
ENV PGBOUNCER_TAG=pgbouncer_1_24_1
|
||||
ENV PGBOUNCER_TAG=pgbouncer_1_22_1
|
||||
RUN set -e \
|
||||
&& git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
|
||||
&& cd pgbouncer \
|
||||
|
||||
265
compute/patches/pg_anon.patch
Normal file
265
compute/patches/pg_anon.patch
Normal file
@@ -0,0 +1,265 @@
|
||||
commit 00aa659afc9c7336ab81036edec3017168aabf40
|
||||
Author: Heikki Linnakangas <heikki@neon.tech>
|
||||
Date: Tue Nov 12 16:59:19 2024 +0200
|
||||
|
||||
Temporarily disable test that depends on timezone
|
||||
|
||||
diff --git a/tests/expected/generalization.out b/tests/expected/generalization.out
|
||||
index 23ef5fa..9e60deb 100644
|
||||
--- a/ext-src/pg_anon-src/tests/expected/generalization.out
|
||||
+++ b/ext-src/pg_anon-src/tests/expected/generalization.out
|
||||
@@ -284,12 +284,9 @@ SELECT anon.generalize_tstzrange('19041107','century');
|
||||
["Tue Jan 01 00:00:00 1901 PST","Mon Jan 01 00:00:00 2001 PST")
|
||||
(1 row)
|
||||
|
||||
-SELECT anon.generalize_tstzrange('19041107','millennium');
|
||||
- generalize_tstzrange
|
||||
------------------------------------------------------------------
|
||||
- ["Thu Jan 01 00:00:00 1001 PST","Mon Jan 01 00:00:00 2001 PST")
|
||||
-(1 row)
|
||||
-
|
||||
+-- temporarily disabled, see:
|
||||
+-- https://gitlab.com/dalibo/postgresql_anonymizer/-/commit/199f0a392b37c59d92ae441fb8f037e094a11a52#note_2148017485
|
||||
+--SELECT anon.generalize_tstzrange('19041107','millennium');
|
||||
-- generalize_daterange
|
||||
SELECT anon.generalize_daterange('19041107');
|
||||
generalize_daterange
|
||||
diff --git a/tests/sql/generalization.sql b/tests/sql/generalization.sql
|
||||
index b868344..b4fc977 100644
|
||||
--- a/ext-src/pg_anon-src/tests/sql/generalization.sql
|
||||
+++ b/ext-src/pg_anon-src/tests/sql/generalization.sql
|
||||
@@ -61,7 +61,9 @@ SELECT anon.generalize_tstzrange('19041107','month');
|
||||
SELECT anon.generalize_tstzrange('19041107','year');
|
||||
SELECT anon.generalize_tstzrange('19041107','decade');
|
||||
SELECT anon.generalize_tstzrange('19041107','century');
|
||||
-SELECT anon.generalize_tstzrange('19041107','millennium');
|
||||
+-- temporarily disabled, see:
|
||||
+-- https://gitlab.com/dalibo/postgresql_anonymizer/-/commit/199f0a392b37c59d92ae441fb8f037e094a11a52#note_2148017485
|
||||
+--SELECT anon.generalize_tstzrange('19041107','millennium');
|
||||
|
||||
-- generalize_daterange
|
||||
SELECT anon.generalize_daterange('19041107');
|
||||
|
||||
commit 7dd414ee75f2875cffb1d6ba474df1f135a6fc6f
|
||||
Author: Alexey Masterov <alexeymasterov@neon.tech>
|
||||
Date: Fri May 31 06:34:26 2024 +0000
|
||||
|
||||
These alternative expected files were added to consider the neon features
|
||||
|
||||
diff --git a/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
|
||||
new file mode 100644
|
||||
index 0000000..2539cfd
|
||||
--- /dev/null
|
||||
+++ b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
|
||||
@@ -0,0 +1,101 @@
|
||||
+BEGIN;
|
||||
+CREATE EXTENSION anon CASCADE;
|
||||
+NOTICE: installing required extension "pgcrypto"
|
||||
+SELECT anon.init();
|
||||
+ init
|
||||
+------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+CREATE ROLE mallory_the_masked_user;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
|
||||
+CREATE TABLE t1(i INT);
|
||||
+ALTER TABLE t1 ADD COLUMN t TEXT;
|
||||
+SECURITY LABEL FOR anon ON COLUMN t1.t
|
||||
+IS 'MASKED WITH VALUE NULL';
|
||||
+INSERT INTO t1 VALUES (1,'test');
|
||||
+--
|
||||
+-- We're checking the owner's permissions
|
||||
+--
|
||||
+-- see
|
||||
+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
|
||||
+--
|
||||
+SET ROLE mallory_the_masked_user;
|
||||
+SELECT anon.pseudo_first_name(0) IS NOT NULL;
|
||||
+ ?column?
|
||||
+----------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ PERFORM anon.init();
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ PERFORM anon.anonymize_table('t1');
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_start_engine;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ERROR: Only supersusers can start the dynamic masking engine.
|
||||
+CONTEXT: PL/pgSQL function anon.start_dynamic_masking(boolean) line 18 at RAISE
|
||||
+ROLLBACK TO fail_start_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ start_dynamic_masking
|
||||
+-----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE mallory_the_masked_user;
|
||||
+SELECT * FROM mask.t1;
|
||||
+ i | t
|
||||
+---+---
|
||||
+ 1 |
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ SELECT * FROM public.t1;
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_stop_engine;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+ERROR: Only supersusers can stop the dynamic masking engine.
|
||||
+CONTEXT: PL/pgSQL function anon.stop_dynamic_masking() line 18 at RAISE
|
||||
+ROLLBACK TO fail_stop_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+NOTICE: The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
|
||||
+ stop_dynamic_masking
|
||||
+----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE mallory_the_masked_user;
|
||||
+SELECT COUNT(*)=1 FROM anon.pg_masking_rules;
|
||||
+ ?column?
|
||||
+----------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_seclabel_on_role;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
|
||||
+ERROR: permission denied
|
||||
+DETAIL: The current user must have the CREATEROLE attribute.
|
||||
+ROLLBACK TO fail_seclabel_on_role;
|
||||
+ROLLBACK;
|
||||
diff --git a/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
|
||||
new file mode 100644
|
||||
index 0000000..8b090fe
|
||||
--- /dev/null
|
||||
+++ b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
|
||||
@@ -0,0 +1,104 @@
|
||||
+BEGIN;
|
||||
+CREATE EXTENSION anon CASCADE;
|
||||
+NOTICE: installing required extension "pgcrypto"
|
||||
+SELECT anon.init();
|
||||
+ init
|
||||
+------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+CREATE ROLE oscar_the_owner;
|
||||
+ALTER DATABASE :DBNAME OWNER TO oscar_the_owner;
|
||||
+CREATE ROLE mallory_the_masked_user;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
|
||||
+--
|
||||
+-- We're checking the owner's permissions
|
||||
+--
|
||||
+-- see
|
||||
+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
|
||||
+--
|
||||
+SET ROLE oscar_the_owner;
|
||||
+SELECT anon.pseudo_first_name(0) IS NOT NULL;
|
||||
+ ?column?
|
||||
+----------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+-- SHOULD FAIL
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ PERFORM anon.init();
|
||||
+ EXCEPTION WHEN insufficient_privilege
|
||||
+ THEN RAISE NOTICE 'insufficient_privilege';
|
||||
+END$$;
|
||||
+NOTICE: insufficient_privilege
|
||||
+CREATE TABLE t1(i INT);
|
||||
+ALTER TABLE t1 ADD COLUMN t TEXT;
|
||||
+SECURITY LABEL FOR anon ON COLUMN t1.t
|
||||
+IS 'MASKED WITH VALUE NULL';
|
||||
+INSERT INTO t1 VALUES (1,'test');
|
||||
+SELECT anon.anonymize_table('t1');
|
||||
+ anonymize_table
|
||||
+-----------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SELECT * FROM t1;
|
||||
+ i | t
|
||||
+---+---
|
||||
+ 1 |
|
||||
+(1 row)
|
||||
+
|
||||
+UPDATE t1 SET t='test' WHERE i=1;
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_start_engine;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ start_dynamic_masking
|
||||
+-----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+ROLLBACK TO fail_start_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.start_dynamic_masking();
|
||||
+ start_dynamic_masking
|
||||
+-----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE oscar_the_owner;
|
||||
+SELECT * FROM t1;
|
||||
+ i | t
|
||||
+---+------
|
||||
+ 1 | test
|
||||
+(1 row)
|
||||
+
|
||||
+--SELECT * FROM mask.t1;
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_stop_engine;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+ERROR: permission denied for schema mask
|
||||
+CONTEXT: SQL statement "DROP VIEW mask.t1;"
|
||||
+PL/pgSQL function anon.mask_drop_view(oid) line 3 at EXECUTE
|
||||
+SQL statement "SELECT anon.mask_drop_view(oid)
|
||||
+ FROM pg_catalog.pg_class
|
||||
+ WHERE relnamespace=quote_ident(pg_catalog.current_setting('anon.sourceschema'))::REGNAMESPACE
|
||||
+ AND relkind IN ('r','p','f')"
|
||||
+PL/pgSQL function anon.stop_dynamic_masking() line 22 at PERFORM
|
||||
+ROLLBACK TO fail_stop_engine;
|
||||
+RESET ROLE;
|
||||
+SELECT anon.stop_dynamic_masking();
|
||||
+NOTICE: The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
|
||||
+ stop_dynamic_masking
|
||||
+----------------------
|
||||
+ t
|
||||
+(1 row)
|
||||
+
|
||||
+SET ROLE oscar_the_owner;
|
||||
+-- SHOULD FAIL
|
||||
+SAVEPOINT fail_seclabel_on_role;
|
||||
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
|
||||
+ERROR: permission denied
|
||||
+DETAIL: The current user must have the CREATEROLE attribute.
|
||||
+ROLLBACK TO fail_seclabel_on_role;
|
||||
+ROLLBACK;
|
||||
@@ -11,14 +11,6 @@ index bf6edcb..89b4c7f 100644
|
||||
|
||||
USE_PGXS = 1 # use pgxs if not in contrib directory
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
diff --git a/regress/expected/init-extension.out b/regress/expected/init-extension.out
|
||||
index 9f2e171..f6e4f8d 100644
|
||||
--- a/regress/expected/init-extension.out
|
||||
+++ b/regress/expected/init-extension.out
|
||||
@@ -1,3 +1,2 @@
|
||||
SET client_min_messages = warning;
|
||||
CREATE EXTENSION pg_repack;
|
||||
-RESET client_min_messages;
|
||||
diff --git a/regress/expected/nosuper.out b/regress/expected/nosuper.out
|
||||
index 8d0a94e..63b68bf 100644
|
||||
--- a/regress/expected/nosuper.out
|
||||
@@ -50,14 +42,6 @@ index 8d0a94e..63b68bf 100644
|
||||
INFO: repacking table "public.tbl_cluster"
|
||||
ERROR: query failed: ERROR: current transaction is aborted, commands ignored until end of transaction block
|
||||
DETAIL: query was: RESET lock_timeout
|
||||
diff --git a/regress/sql/init-extension.sql b/regress/sql/init-extension.sql
|
||||
index 9f2e171..f6e4f8d 100644
|
||||
--- a/regress/sql/init-extension.sql
|
||||
+++ b/regress/sql/init-extension.sql
|
||||
@@ -1,3 +1,2 @@
|
||||
SET client_min_messages = warning;
|
||||
CREATE EXTENSION pg_repack;
|
||||
-RESET client_min_messages;
|
||||
diff --git a/regress/sql/nosuper.sql b/regress/sql/nosuper.sql
|
||||
index 072f0fa..dbe60f8 100644
|
||||
--- a/regress/sql/nosuper.sql
|
||||
|
||||
@@ -15,7 +15,7 @@ index 7a4b88c..56678af 100644
|
||||
HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
|
||||
|
||||
diff --git a/src/hnswbuild.c b/src/hnswbuild.c
|
||||
index b667478..1298aa1 100644
|
||||
index b667478..dc95d89 100644
|
||||
--- a/src/hnswbuild.c
|
||||
+++ b/src/hnswbuild.c
|
||||
@@ -843,9 +843,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
|
||||
@@ -36,7 +36,7 @@ index b667478..1298aa1 100644
|
||||
/* Close relations within worker */
|
||||
index_close(indexRel, indexLockmode);
|
||||
table_close(heapRel, heapLockmode);
|
||||
@@ -1100,13 +1108,25 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
|
||||
@@ -1100,12 +1108,39 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
|
||||
SeedRandom(42);
|
||||
#endif
|
||||
|
||||
@@ -48,17 +48,32 @@ index b667478..1298aa1 100644
|
||||
|
||||
BuildGraph(buildstate, forkNum);
|
||||
|
||||
- if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM)
|
||||
+#ifdef NEON_SMGR
|
||||
+ smgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));
|
||||
+#endif
|
||||
+
|
||||
if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM)
|
||||
+ if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM) {
|
||||
log_newpage_range(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true);
|
||||
|
||||
+#ifdef NEON_SMGR
|
||||
+ {
|
||||
+#if PG_VERSION_NUM >= 160000
|
||||
+ RelFileLocator rlocator = RelationGetSmgr(index)->smgr_rlocator.locator;
|
||||
+#else
|
||||
+ RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
|
||||
+#endif
|
||||
+ if (set_lwlsn_block_range_hook)
|
||||
+ set_lwlsn_block_range_hook(XactLastRecEnd, rlocator,
|
||||
+ MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
|
||||
+ if (set_lwlsn_relation_hook)
|
||||
+ set_lwlsn_relation_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM);
|
||||
+ }
|
||||
+#endif
|
||||
+ }
|
||||
+
|
||||
+#ifdef NEON_SMGR
|
||||
+ smgr_end_unlogged_build(RelationGetSmgr(index));
|
||||
+#endif
|
||||
+
|
||||
|
||||
FreeBuildState(buildstate);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
diff --git a/src/ruminsert.c b/src/ruminsert.c
|
||||
index 255e616..1c6edb7 100644
|
||||
index 255e616..7a2240f 100644
|
||||
--- a/src/ruminsert.c
|
||||
+++ b/src/ruminsert.c
|
||||
@@ -628,6 +628,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
|
||||
@@ -24,12 +24,24 @@ index 255e616..1c6edb7 100644
|
||||
/*
|
||||
* Write index to xlog
|
||||
*/
|
||||
@@ -713,6 +721,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
|
||||
@@ -713,6 +721,22 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
+#ifdef NEON_SMGR
|
||||
+ smgr_end_unlogged_build(index->rd_smgr);
|
||||
+ {
|
||||
+#if PG_VERSION_NUM >= 160000
|
||||
+ RelFileLocator rlocator = RelationGetSmgr(index)->smgr_rlocator.locator;
|
||||
+#else
|
||||
+ RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
|
||||
+#endif
|
||||
+ if (set_lwlsn_block_range_hook)
|
||||
+ set_lwlsn_block_range_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
|
||||
+ if (set_lwlsn_relation_hook)
|
||||
+ set_lwlsn_relation_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM);
|
||||
+
|
||||
+ smgr_end_unlogged_build(index->rd_smgr);
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
|
||||
@@ -22,7 +22,7 @@ commands:
|
||||
- name: local_proxy
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -22,7 +22,7 @@ commands:
|
||||
- name: local_proxy
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -44,6 +44,7 @@ serde.workspace = true
|
||||
serde_with.workspace = true
|
||||
serde_json.workspace = true
|
||||
signal-hook.workspace = true
|
||||
spki = { version = "0.7.3", features = ["std"] }
|
||||
tar.workspace = true
|
||||
tower.workspace = true
|
||||
tower-http.workspace = true
|
||||
|
||||
@@ -116,7 +116,9 @@ struct Cli {
|
||||
#[arg(long)]
|
||||
pub set_disk_quota_for_fs: Option<String>,
|
||||
|
||||
#[arg(short = 'c', long)]
|
||||
// TODO(tristan957): remove alias after compatibility tests are no longer
|
||||
// an issue
|
||||
#[arg(short = 'c', long, alias = "spec-path")]
|
||||
pub config: Option<OsString>,
|
||||
|
||||
#[arg(short = 'i', long, group = "compute-id")]
|
||||
|
||||
@@ -641,26 +641,7 @@ impl ComputeNode {
|
||||
|
||||
let log_directory_path = Path::new(&self.params.pgdata).join("log");
|
||||
let log_directory_path = log_directory_path.to_string_lossy().to_string();
|
||||
|
||||
// Add project_id,endpoint_id tag to identify the logs.
|
||||
//
|
||||
// These ids are passed from cplane,
|
||||
// for backwards compatibility (old computes that don't have them),
|
||||
// we set them to None.
|
||||
// TODO: Clean up this code when all computes have them.
|
||||
let tag: Option<String> = match (
|
||||
pspec.spec.project_id.as_deref(),
|
||||
pspec.spec.endpoint_id.as_deref(),
|
||||
) {
|
||||
(Some(project_id), Some(endpoint_id)) => {
|
||||
Some(format!("{project_id}/{endpoint_id}"))
|
||||
}
|
||||
(Some(project_id), None) => Some(format!("{project_id}/None")),
|
||||
(None, Some(endpoint_id)) => Some(format!("None,{endpoint_id}")),
|
||||
(None, None) => None,
|
||||
};
|
||||
|
||||
configure_audit_rsyslog(log_directory_path.clone(), tag, &remote_endpoint)?;
|
||||
configure_audit_rsyslog(log_directory_path.clone(), "hipaa", &remote_endpoint)?;
|
||||
|
||||
// Launch a background task to clean up the audit logs
|
||||
launch_pgaudit_gc(log_directory_path);
|
||||
|
||||
@@ -6,5 +6,4 @@ pub(crate) mod request_id;
|
||||
pub(crate) use json::Json;
|
||||
pub(crate) use path::Path;
|
||||
pub(crate) use query::Query;
|
||||
#[allow(unused)]
|
||||
pub(crate) use request_id::RequestId;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::collections::HashSet;
|
||||
use std::{collections::HashSet, net::SocketAddr};
|
||||
|
||||
use anyhow::{Result, anyhow};
|
||||
use axum::{RequestExt, body::Body};
|
||||
use axum::{RequestExt, body::Body, extract::ConnectInfo};
|
||||
use axum_extra::{
|
||||
TypedHeader,
|
||||
headers::{Authorization, authorization::Bearer},
|
||||
@@ -13,7 +13,7 @@ use jsonwebtoken::{Algorithm, DecodingKey, TokenData, Validation, jwk::JwkSet};
|
||||
use tower_http::auth::AsyncAuthorizeRequest;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::http::JsonResponse;
|
||||
use crate::http::{JsonResponse, extract::RequestId};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(in crate::http) struct Authorize {
|
||||
@@ -52,6 +52,31 @@ impl AsyncAuthorizeRequest<Body> for Authorize {
|
||||
let validation = self.validation.clone();
|
||||
|
||||
Box::pin(async move {
|
||||
let request_id = request.extract_parts::<RequestId>().await.unwrap();
|
||||
|
||||
// TODO: Remove this stanza after teaching neon_local and the
|
||||
// regression tests to use a JWT + JWKS.
|
||||
//
|
||||
// https://github.com/neondatabase/neon/issues/11316
|
||||
if cfg!(feature = "testing") {
|
||||
warn!(%request_id, "Skipping compute_ctl authorization check");
|
||||
|
||||
return Ok(request);
|
||||
}
|
||||
|
||||
let connect_info = request
|
||||
.extract_parts::<ConnectInfo<SocketAddr>>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// In the event the request is coming from the loopback interface,
|
||||
// allow all requests
|
||||
if connect_info.ip().is_loopback() {
|
||||
warn!(%request_id, "Bypassed authorization because request is coming from the loopback interface");
|
||||
|
||||
return Ok(request);
|
||||
}
|
||||
|
||||
let TypedHeader(Authorization(bearer)) = request
|
||||
.extract_parts::<TypedHeader<Authorization<Bearer>>>()
|
||||
.await
|
||||
@@ -87,8 +112,6 @@ impl Authorize {
|
||||
token: &str,
|
||||
validation: &Validation,
|
||||
) -> Result<TokenData<ComputeClaims>> {
|
||||
debug_assert!(!jwks.keys.is_empty());
|
||||
|
||||
debug!("verifying token {}", token);
|
||||
|
||||
for jwk in jwks.keys.iter() {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use metrics::core::{AtomicF64, AtomicU64, Collector, GenericCounter, GenericGauge};
|
||||
use metrics::core::{AtomicF64, Collector, GenericGauge};
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::{
|
||||
IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter,
|
||||
register_int_counter_vec, register_int_gauge_vec, register_uint_gauge_vec,
|
||||
IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter_vec,
|
||||
register_int_gauge_vec, register_uint_gauge_vec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
@@ -81,22 +81,6 @@ pub(crate) static COMPUTE_CTL_UP: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static PG_CURR_DOWNTIME_MS: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {
|
||||
register_gauge!(
|
||||
"compute_pg_current_downtime_ms",
|
||||
"Non-cumulative duration of Postgres downtime in ms; resets after successful check",
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static PG_TOTAL_DOWNTIME_MS: Lazy<GenericCounter<AtomicU64>> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"compute_pg_downtime_ms_total",
|
||||
"Cumulative duration of Postgres downtime in ms",
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub fn collect() -> Vec<MetricFamily> {
|
||||
let mut metrics = COMPUTE_CTL_UP.collect();
|
||||
metrics.extend(INSTALLED_EXTENSIONS.collect());
|
||||
@@ -104,7 +88,5 @@ pub fn collect() -> Vec<MetricFamily> {
|
||||
metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(DB_MIGRATION_FAILED.collect());
|
||||
metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
|
||||
metrics.extend(PG_CURR_DOWNTIME_MS.collect());
|
||||
metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
|
||||
metrics
|
||||
}
|
||||
|
||||
@@ -6,294 +6,197 @@ use chrono::{DateTime, Utc};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::ComputeFeature;
|
||||
use postgres::{Client, NoTls};
|
||||
use tracing::{Level, error, info, instrument, span};
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::metrics::{PG_CURR_DOWNTIME_MS, PG_TOTAL_DOWNTIME_MS};
|
||||
|
||||
const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
|
||||
|
||||
struct ComputeMonitor {
|
||||
compute: Arc<ComputeNode>,
|
||||
// Spin in a loop and figure out the last activity time in the Postgres.
|
||||
// Then update it in the shared state. This function never errors out.
|
||||
// NB: the only expected panic is at `Mutex` unwrap(), all other errors
|
||||
// should be handled gracefully.
|
||||
fn watch_compute_activity(compute: &ComputeNode) {
|
||||
// Suppose that `connstr` doesn't change
|
||||
let connstr = compute.params.connstr.clone();
|
||||
let conf = compute.get_conn_conf(Some("compute_ctl:activity_monitor"));
|
||||
|
||||
/// The moment when Postgres had some activity,
|
||||
/// that should prevent compute from being suspended.
|
||||
last_active: Option<DateTime<Utc>>,
|
||||
// During startup and configuration we connect to every Postgres database,
|
||||
// but we don't want to count this as some user activity. So wait until
|
||||
// the compute fully started before monitoring activity.
|
||||
wait_for_postgres_start(compute);
|
||||
|
||||
/// The moment when we last tried to check Postgres.
|
||||
last_checked: DateTime<Utc>,
|
||||
/// The last moment we did a successful Postgres check.
|
||||
last_up: DateTime<Utc>,
|
||||
// Define `client` outside of the loop to reuse existing connection if it's active.
|
||||
let mut client = conf.connect(NoTls);
|
||||
|
||||
/// Only used for internal statistics change tracking
|
||||
/// between monitor runs and can be outdated.
|
||||
active_time: Option<f64>,
|
||||
/// Only used for internal statistics change tracking
|
||||
/// between monitor runs and can be outdated.
|
||||
sessions: Option<i64>,
|
||||
let mut sleep = false;
|
||||
let mut prev_active_time: Option<f64> = None;
|
||||
let mut prev_sessions: Option<i64> = None;
|
||||
|
||||
/// Use experimental statistics-based activity monitor. It's no longer
|
||||
/// 'experimental' per se, as it's enabled for everyone, but we still
|
||||
/// keep the flag as an option to turn it off in some cases if it will
|
||||
/// misbehave.
|
||||
experimental: bool,
|
||||
}
|
||||
|
||||
impl ComputeMonitor {
|
||||
fn report_down(&self) {
|
||||
let now = Utc::now();
|
||||
|
||||
// Calculate and report current downtime
|
||||
// (since the last time Postgres was up)
|
||||
let downtime = now.signed_duration_since(self.last_up);
|
||||
PG_CURR_DOWNTIME_MS.set(downtime.num_milliseconds() as f64);
|
||||
|
||||
// Calculate and update total downtime
|
||||
// (cumulative duration of Postgres downtime in ms)
|
||||
let inc = now
|
||||
.signed_duration_since(self.last_checked)
|
||||
.num_milliseconds();
|
||||
PG_TOTAL_DOWNTIME_MS.inc_by(inc as u64);
|
||||
if compute.has_feature(ComputeFeature::ActivityMonitorExperimental) {
|
||||
info!("starting experimental activity monitor for {}", connstr);
|
||||
} else {
|
||||
info!("starting activity monitor for {}", connstr);
|
||||
}
|
||||
|
||||
fn report_up(&mut self) {
|
||||
self.last_up = Utc::now();
|
||||
PG_CURR_DOWNTIME_MS.set(0.0);
|
||||
}
|
||||
|
||||
fn downtime_info(&self) -> String {
|
||||
format!(
|
||||
"total_ms: {}, current_ms: {}, last_up: {}",
|
||||
PG_TOTAL_DOWNTIME_MS.get(),
|
||||
PG_CURR_DOWNTIME_MS.get(),
|
||||
self.last_up
|
||||
)
|
||||
}
|
||||
|
||||
/// Spin in a loop and figure out the last activity time in the Postgres.
|
||||
/// Then update it in the shared state. This function never errors out.
|
||||
/// NB: the only expected panic is at `Mutex` unwrap(), all other errors
|
||||
/// should be handled gracefully.
|
||||
#[instrument(skip_all)]
|
||||
pub fn run(&mut self) {
|
||||
// Suppose that `connstr` doesn't change
|
||||
let connstr = self.compute.params.connstr.clone();
|
||||
let conf = self
|
||||
.compute
|
||||
.get_conn_conf(Some("compute_ctl:compute_monitor"));
|
||||
|
||||
// During startup and configuration we connect to every Postgres database,
|
||||
// but we don't want to count this as some user activity. So wait until
|
||||
// the compute fully started before monitoring activity.
|
||||
wait_for_postgres_start(&self.compute);
|
||||
|
||||
// Define `client` outside of the loop to reuse existing connection if it's active.
|
||||
let mut client = conf.connect(NoTls);
|
||||
|
||||
info!("starting compute monitor for {}", connstr);
|
||||
|
||||
loop {
|
||||
match &mut client {
|
||||
Ok(cli) => {
|
||||
if cli.is_closed() {
|
||||
info!(
|
||||
downtime_info = self.downtime_info(),
|
||||
"connection to Postgres is closed, trying to reconnect"
|
||||
);
|
||||
self.report_down();
|
||||
|
||||
// Connection is closed, reconnect and try again.
|
||||
client = conf.connect(NoTls);
|
||||
} else {
|
||||
match self.check(cli) {
|
||||
Ok(_) => {
|
||||
self.report_up();
|
||||
self.compute.update_last_active(self.last_active);
|
||||
}
|
||||
Err(e) => {
|
||||
// Although we have many places where we can return errors in `check()`,
|
||||
// normally it shouldn't happen. I.e., we will likely return error if
|
||||
// connection got broken, query timed out, Postgres returned invalid data, etc.
|
||||
// In all such cases it's suspicious, so let's report this as downtime.
|
||||
self.report_down();
|
||||
error!(
|
||||
downtime_info = self.downtime_info(),
|
||||
"could not check Postgres: {}", e
|
||||
);
|
||||
|
||||
// Reconnect to Postgres just in case. During tests, I noticed
|
||||
// that queries in `check()` can fail with `connection closed`,
|
||||
// but `cli.is_closed()` above doesn't detect it. Even if old
|
||||
// connection is still alive, it will be dropped when we reassign
|
||||
// `client` to a new connection.
|
||||
client = conf.connect(NoTls);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
info!(
|
||||
downtime_info = self.downtime_info(),
|
||||
"could not connect to Postgres: {}, retrying", e
|
||||
);
|
||||
self.report_down();
|
||||
|
||||
// Establish a new connection and try again.
|
||||
client = conf.connect(NoTls);
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the `last_checked` timestamp and sleep before the next iteration.
|
||||
self.last_checked = Utc::now();
|
||||
loop {
|
||||
// We use `continue` a lot, so it's more convenient to sleep at the top of the loop.
|
||||
// But skip the first sleep, so we can connect to Postgres immediately.
|
||||
if sleep {
|
||||
// Should be outside of the mutex lock to allow others to read while we sleep.
|
||||
thread::sleep(MONITOR_CHECK_INTERVAL);
|
||||
} else {
|
||||
sleep = true;
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
fn check(&mut self, cli: &mut Client) -> anyhow::Result<()> {
|
||||
// This is new logic, only enable if the feature flag is set.
|
||||
// TODO: remove this once we are sure that it works OR drop it altogether.
|
||||
if self.experimental {
|
||||
// Check if the total active time or sessions across all databases has changed.
|
||||
// If it did, it means that user executed some queries. In theory, it can even go down if
|
||||
// some databases were dropped, but it's still user activity.
|
||||
match get_database_stats(cli) {
|
||||
Ok((active_time, sessions)) => {
|
||||
let mut detected_activity = false;
|
||||
match &mut client {
|
||||
Ok(cli) => {
|
||||
if cli.is_closed() {
|
||||
info!("connection to Postgres is closed, trying to reconnect");
|
||||
|
||||
if let Some(prev_active_time) = self.active_time {
|
||||
if active_time != prev_active_time {
|
||||
detected_activity = true;
|
||||
// Connection is closed, reconnect and try again.
|
||||
client = conf.connect(NoTls);
|
||||
continue;
|
||||
}
|
||||
|
||||
// This is a new logic, only enable if the feature flag is set.
|
||||
// TODO: remove this once we are sure that it works OR drop it altogether.
|
||||
if compute.has_feature(ComputeFeature::ActivityMonitorExperimental) {
|
||||
// First, check if the total active time or sessions across all databases has changed.
|
||||
// If it did, it means that user executed some queries. In theory, it can even go down if
|
||||
// some databases were dropped, but it's still a user activity.
|
||||
match get_database_stats(cli) {
|
||||
Ok((active_time, sessions)) => {
|
||||
let mut detected_activity = false;
|
||||
|
||||
prev_active_time = match prev_active_time {
|
||||
Some(prev_active_time) => {
|
||||
if active_time != prev_active_time {
|
||||
detected_activity = true;
|
||||
}
|
||||
Some(active_time)
|
||||
}
|
||||
None => Some(active_time),
|
||||
};
|
||||
prev_sessions = match prev_sessions {
|
||||
Some(prev_sessions) => {
|
||||
if sessions != prev_sessions {
|
||||
detected_activity = true;
|
||||
}
|
||||
Some(sessions)
|
||||
}
|
||||
None => Some(sessions),
|
||||
};
|
||||
|
||||
if detected_activity {
|
||||
// Update the last active time and continue, we don't need to
|
||||
// check backends state change.
|
||||
compute.update_last_active(Some(Utc::now()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("could not get database statistics: {}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
self.active_time = Some(active_time);
|
||||
}
|
||||
|
||||
if let Some(prev_sessions) = self.sessions {
|
||||
if sessions != prev_sessions {
|
||||
detected_activity = true;
|
||||
// Second, if database statistics is the same, check all backends state change,
|
||||
// maybe there is some with more recent activity. `get_backends_state_change()`
|
||||
// can return None or stale timestamp, so it's `compute.update_last_active()`
|
||||
// responsibility to check if the new timestamp is more recent than the current one.
|
||||
// This helps us to discover new sessions, that did nothing yet.
|
||||
match get_backends_state_change(cli) {
|
||||
Ok(last_active) => {
|
||||
compute.update_last_active(last_active);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("could not get backends state change: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, if there are existing (logical) walsenders, do not suspend.
|
||||
//
|
||||
// walproposer doesn't currently show up in pg_stat_replication,
|
||||
// but protect if it will be
|
||||
let ws_count_query = "select count(*) from pg_stat_replication where application_name != 'walproposer';";
|
||||
match cli.query_one(ws_count_query, &[]) {
|
||||
Ok(r) => match r.try_get::<&str, i64>("count") {
|
||||
Ok(num_ws) => {
|
||||
if num_ws > 0 {
|
||||
compute.update_last_active(Some(Utc::now()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
self.sessions = Some(sessions);
|
||||
|
||||
if detected_activity {
|
||||
// Update the last active time and continue, we don't need to
|
||||
// check backends state change.
|
||||
self.last_active = Some(Utc::now());
|
||||
return Ok(());
|
||||
Err(e) => {
|
||||
warn!("failed to parse walsenders count: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
warn!("failed to get list of walsenders: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!("could not get database statistics: {}", e));
|
||||
//
|
||||
// Don't suspend compute if there is an active logical replication subscription
|
||||
//
|
||||
// `where pid is not null` – to filter out read only computes and subscription on branches
|
||||
//
|
||||
let logical_subscriptions_query =
|
||||
"select count(*) from pg_stat_subscription where pid is not null;";
|
||||
match cli.query_one(logical_subscriptions_query, &[]) {
|
||||
Ok(row) => match row.try_get::<&str, i64>("count") {
|
||||
Ok(num_subscribers) => {
|
||||
if num_subscribers > 0 {
|
||||
compute.update_last_active(Some(Utc::now()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("failed to parse `pg_stat_subscription` count: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"failed to get list of active logical replication subscriptions: {:?}",
|
||||
e
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
//
|
||||
// Do not suspend compute if autovacuum is running
|
||||
//
|
||||
let autovacuum_count_query = "select count(*) from pg_stat_activity where backend_type = 'autovacuum worker'";
|
||||
match cli.query_one(autovacuum_count_query, &[]) {
|
||||
Ok(r) => match r.try_get::<&str, i64>("count") {
|
||||
Ok(num_workers) => {
|
||||
if num_workers > 0 {
|
||||
compute.update_last_active(Some(Utc::now()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("failed to parse autovacuum workers count: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
warn!("failed to get list of autovacuum workers: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If database statistics are the same, check all backends for state changes.
|
||||
// Maybe there are some with more recent activity. `get_backends_state_change()`
|
||||
// can return None or stale timestamp, so it's `compute.update_last_active()`
|
||||
// responsibility to check if the new timestamp is more recent than the current one.
|
||||
// This helps us to discover new sessions that have not done anything yet.
|
||||
match get_backends_state_change(cli) {
|
||||
Ok(last_active) => match (last_active, self.last_active) {
|
||||
(Some(last_active), Some(prev_last_active)) => {
|
||||
if last_active > prev_last_active {
|
||||
self.last_active = Some(last_active);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
(Some(last_active), None) => {
|
||||
self.last_active = Some(last_active);
|
||||
return Ok(());
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"could not get backends state change: {}",
|
||||
e
|
||||
));
|
||||
debug!("could not connect to Postgres: {}, retrying", e);
|
||||
|
||||
// Establish a new connection and try again.
|
||||
client = conf.connect(NoTls);
|
||||
}
|
||||
}
|
||||
|
||||
// If there are existing (logical) walsenders, do not suspend.
|
||||
//
|
||||
// N.B. walproposer doesn't currently show up in pg_stat_replication,
|
||||
// but protect if it will.
|
||||
const WS_COUNT_QUERY: &str =
|
||||
"select count(*) from pg_stat_replication where application_name != 'walproposer';";
|
||||
match cli.query_one(WS_COUNT_QUERY, &[]) {
|
||||
Ok(r) => match r.try_get::<&str, i64>("count") {
|
||||
Ok(num_ws) => {
|
||||
if num_ws > 0 {
|
||||
self.last_active = Some(Utc::now());
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
let err: anyhow::Error = e.into();
|
||||
return Err(err.context("failed to parse walsenders count"));
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!("failed to get list of walsenders: {}", e));
|
||||
}
|
||||
}
|
||||
|
||||
// Don't suspend compute if there is an active logical replication subscription
|
||||
//
|
||||
// `where pid is not null` – to filter out read only computes and subscription on branches
|
||||
const LOGICAL_SUBSCRIPTIONS_QUERY: &str =
|
||||
"select count(*) from pg_stat_subscription where pid is not null;";
|
||||
match cli.query_one(LOGICAL_SUBSCRIPTIONS_QUERY, &[]) {
|
||||
Ok(row) => match row.try_get::<&str, i64>("count") {
|
||||
Ok(num_subscribers) => {
|
||||
if num_subscribers > 0 {
|
||||
self.last_active = Some(Utc::now());
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"failed to parse 'pg_stat_subscription' count: {}",
|
||||
e
|
||||
));
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"failed to get list of active logical replication subscriptions: {}",
|
||||
e
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Do not suspend compute if autovacuum is running
|
||||
const AUTOVACUUM_COUNT_QUERY: &str =
|
||||
"select count(*) from pg_stat_activity where backend_type = 'autovacuum worker'";
|
||||
match cli.query_one(AUTOVACUUM_COUNT_QUERY, &[]) {
|
||||
Ok(r) => match r.try_get::<&str, i64>("count") {
|
||||
Ok(num_workers) => {
|
||||
if num_workers > 0 {
|
||||
self.last_active = Some(Utc::now());
|
||||
return Ok(());
|
||||
};
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"failed to parse autovacuum workers count: {}",
|
||||
e
|
||||
));
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"failed to get list of autovacuum workers: {}",
|
||||
e
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -412,24 +315,9 @@ fn get_backends_state_change(cli: &mut Client) -> anyhow::Result<Option<DateTime
|
||||
/// Launch a separate compute monitor thread and return its `JoinHandle`.
|
||||
pub fn launch_monitor(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
|
||||
let compute = Arc::clone(compute);
|
||||
let experimental = compute.has_feature(ComputeFeature::ActivityMonitorExperimental);
|
||||
let now = Utc::now();
|
||||
let mut monitor = ComputeMonitor {
|
||||
compute,
|
||||
last_active: None,
|
||||
last_checked: now,
|
||||
last_up: now,
|
||||
active_time: None,
|
||||
sessions: None,
|
||||
experimental,
|
||||
};
|
||||
|
||||
let span = span!(Level::INFO, "compute_monitor");
|
||||
thread::Builder::new()
|
||||
.name("compute-monitor".into())
|
||||
.spawn(move || {
|
||||
let _enter = span.enter();
|
||||
monitor.run();
|
||||
})
|
||||
.spawn(move || watch_compute_activity(&compute))
|
||||
.expect("cannot launch compute monitor thread")
|
||||
}
|
||||
|
||||
@@ -50,13 +50,13 @@ fn restart_rsyslog() -> Result<()> {
|
||||
|
||||
pub fn configure_audit_rsyslog(
|
||||
log_directory: String,
|
||||
tag: Option<String>,
|
||||
tag: &str,
|
||||
remote_endpoint: &str,
|
||||
) -> Result<()> {
|
||||
let config_content: String = format!(
|
||||
include_str!("config_template/compute_audit_rsyslog_template.conf"),
|
||||
log_directory = log_directory,
|
||||
tag = tag.unwrap_or("".to_string()),
|
||||
tag = tag,
|
||||
remote_endpoint = remote_endpoint
|
||||
);
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration};
|
||||
use anyhow::{Context, Result, bail};
|
||||
use compute_api::responses::TlsConfig;
|
||||
use ring::digest;
|
||||
use spki::der::{Decode, PemReader};
|
||||
use x509_cert::Certificate;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
@@ -51,7 +52,7 @@ pub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) {
|
||||
match try_update_key_path_blocking(pg_data, tls_config) {
|
||||
Ok(()) => break,
|
||||
Err(e) => {
|
||||
tracing::error!(error = ?e, "could not create key file");
|
||||
tracing::error!("could not create key file {e:?}");
|
||||
std::thread::sleep(Duration::from_secs(1))
|
||||
}
|
||||
}
|
||||
@@ -91,14 +92,8 @@ fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Resul
|
||||
fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
|
||||
use x509_cert::der::oid::db::rfc5912::ECDSA_WITH_SHA_256;
|
||||
|
||||
let certs = Certificate::load_pem_chain(cert.as_bytes())
|
||||
.context("decoding PEM encoded certificates")?;
|
||||
|
||||
// First certificate is our server-cert,
|
||||
// all the rest of the certs are the CA cert chain.
|
||||
let Some(cert) = certs.first() else {
|
||||
bail!("no certificates found");
|
||||
};
|
||||
let cert = Certificate::decode(&mut PemReader::new(cert.as_bytes()).context("pem reader")?)
|
||||
.context("decode cert")?;
|
||||
|
||||
match cert.signature_algorithm.oid {
|
||||
ECDSA_WITH_SHA_256 => {
|
||||
@@ -120,82 +115,3 @@ fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::verify_key_cert;
|
||||
|
||||
/// Real certificate chain file, generated by cert-manager in dev.
|
||||
/// The server auth certificate has expired since 2025-04-24T15:41:35Z.
|
||||
const CERT: &str = "
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIICCDCCAa+gAwIBAgIQKhLomFcNULbZA/bPdGzaSzAKBggqhkjOPQQDAjBEMQsw
|
||||
CQYDVQQGEwJVUzESMBAGA1UEChMJTmVvbiBJbmMuMSEwHwYDVQQDExhOZW9uIEs4
|
||||
cyBJbnRlcm1lZGlhdGUgQ0EwHhcNMjUwNDIzMTU0MTM1WhcNMjUwNDI0MTU0MTM1
|
||||
WjBBMT8wPQYDVQQDEzZjb21wdXRlLXdpc3B5LWdyYXNzLXcwY21laWp3LmRlZmF1
|
||||
bHQuc3ZjLmNsdXN0ZXIubG9jYWwwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATF
|
||||
QCcG2m/EVHAiZtSsYgVnHgoTjUL/Jtwfdrpvz2t0bVRZmBmSKhlo53uPV9Y5eKFG
|
||||
AmR54p9/gT2eO3xU7vAgo4GFMIGCMA4GA1UdDwEB/wQEAwIFoDAMBgNVHRMBAf8E
|
||||
AjAAMB8GA1UdIwQYMBaAFFR2JAhXkeiNQNEixTvAYIwxUu3QMEEGA1UdEQQ6MDiC
|
||||
NmNvbXB1dGUtd2lzcHktZ3Jhc3MtdzBjbWVpancuZGVmYXVsdC5zdmMuY2x1c3Rl
|
||||
ci5sb2NhbDAKBggqhkjOPQQDAgNHADBEAiBLG22wKG8XS9e9RxBT+kmUx/kIThcP
|
||||
DIpp7jx0PrFcdQIgEMTdnXpx5Cv/Z0NIEDxtMHUD7G0vuRPfztki36JuakM=
|
||||
-----END CERTIFICATE-----
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIICFzCCAb6gAwIBAgIUbbX98N2Ip6lWAONRk8dU9hSz+YIwCgYIKoZIzj0EAwIw
|
||||
RDELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UEAxMYTmVv
|
||||
biBBV1MgSW50ZXJtZWRpYXRlIENBMB4XDTI1MDQyMjE1MTAxMFoXDTI1MDcyMTE1
|
||||
MTAxMFowRDELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UE
|
||||
AxMYTmVvbiBLOHMgSW50ZXJtZWRpYXRlIENBMFkwEwYHKoZIzj0CAQYIKoZIzj0D
|
||||
AQcDQgAE5++m5owqNI4BPMTVNIUQH0qvU7pYhdpHGVGhdj/Lgars6ROvE6uSNQV4
|
||||
SAmJN5HBzj5/6kLQaTPWpXW7EHXjK6OBjTCBijAOBgNVHQ8BAf8EBAMCAQYwEgYD
|
||||
VR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUVHYkCFeR6I1A0SLFO8BgjDFS7dAw
|
||||
HwYDVR0jBBgwFoAUgHfNXfyKtHO0V9qoLOWCjkNiaI8wJAYDVR0eAQH/BBowGKAW
|
||||
MBSCEi5zdmMuY2x1c3Rlci5sb2NhbDAKBggqhkjOPQQDAgNHADBEAiBObVFFdXaL
|
||||
QpOXmN60dYUNnQRwjKreFduEkQgOdOlssgIgVAdJJQFgvlrvEOBhY8j5WyeKRwUN
|
||||
k/ALs6KpgaFBCGY=
|
||||
-----END CERTIFICATE-----
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIB4jCCAYegAwIBAgIUFlxWFn/11yoGdmD+6gf+yQMToS0wCgYIKoZIzj0EAwIw
|
||||
ODELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEVMBMGA1UEAxMMTmVv
|
||||
biBSb290IENBMB4XDTI1MDQwMzA3MTUyMloXDTI2MDQwMzA3MTUyMlowRDELMAkG
|
||||
A1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UEAxMYTmVvbiBBV1Mg
|
||||
SW50ZXJtZWRpYXRlIENBMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEqonG/IQ6
|
||||
ZxtEtOUTkkoNopPieXDO5CBKUkNFTGeJEB7OxRlSpYJgsBpaYIaD6Vc4sVk3thIF
|
||||
p+pLw52idQOIN6NjMGEwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8w
|
||||
HQYDVR0OBBYEFIB3zV38irRztFfaqCzlgo5DYmiPMB8GA1UdIwQYMBaAFKh7M4/G
|
||||
FHvr/ORDQZt4bMLlJvHCMAoGCCqGSM49BAMCA0kAMEYCIQCbS4x7QPslONzBYbjC
|
||||
UQaQ0QLDW4CJHvQ4u4gbWFG87wIhAJMsHQHjP9qTT27Q65zQCR7O8QeLAfha1jrH
|
||||
Ag/LsxSr
|
||||
-----END CERTIFICATE-----
|
||||
";
|
||||
|
||||
/// The key corresponding to [`CERT`]
|
||||
const KEY: &str = "
|
||||
-----BEGIN EC PRIVATE KEY-----
|
||||
MHcCAQEEIDnAnrqmIJjndCLWP1iIO5X3X63Aia48TGpGuMXwvm6IoAoGCCqGSM49
|
||||
AwEHoUQDQgAExUAnBtpvxFRwImbUrGIFZx4KE41C/ybcH3a6b89rdG1UWZgZkioZ
|
||||
aOd7j1fWOXihRgJkeeKff4E9njt8VO7wIA==
|
||||
-----END EC PRIVATE KEY-----
|
||||
";
|
||||
|
||||
/// An incorrect key.
|
||||
const INCORRECT_KEY: &str = "
|
||||
-----BEGIN EC PRIVATE KEY-----
|
||||
MHcCAQEEIL6WqqBDyvM0HWz7Ir5M5+jhFWB7IzOClGn26OPrzHCXoAoGCCqGSM49
|
||||
AwEHoUQDQgAE7XVvdOy5lfwtNKb+gJEUtnG+DrnnXLY5LsHDeGQKV9PTRcEMeCrG
|
||||
YZzHyML4P6Sr4yi2ts+4B9i47uvAG8+XwQ==
|
||||
-----END EC PRIVATE KEY-----
|
||||
";
|
||||
|
||||
#[test]
|
||||
fn certificate_verification() {
|
||||
verify_key_cert(KEY, CERT).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "private key file does not match certificate")]
|
||||
fn certificate_verification_fail() {
|
||||
verify_key_cert(INCORRECT_KEY, CERT).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,16 +6,13 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
base64.workspace = true
|
||||
camino.workspace = true
|
||||
clap.workspace = true
|
||||
comfy-table.workspace = true
|
||||
futures.workspace = true
|
||||
humantime.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
nix.workspace = true
|
||||
once_cell.workspace = true
|
||||
pem.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
hyper0.workspace = true
|
||||
regex.workspace = true
|
||||
@@ -23,8 +20,6 @@ reqwest = { workspace = true, features = ["blocking", "json"] }
|
||||
scopeguard.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
sha2.workspace = true
|
||||
spki.workspace = true
|
||||
thiserror.workspace = true
|
||||
toml.workspace = true
|
||||
toml_edit.workspace = true
|
||||
|
||||
@@ -18,11 +18,12 @@ use anyhow::{Context, Result, anyhow, bail};
|
||||
use clap::Parser;
|
||||
use compute_api::spec::ComputeMode;
|
||||
use control_plane::endpoint::ComputeControlPlane;
|
||||
use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_PORT, EndpointStorage};
|
||||
use control_plane::local_env::{
|
||||
EndpointStorageConf, InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf,
|
||||
NeonLocalInitPageserverConf, SafekeeperConf,
|
||||
InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf, NeonLocalInitPageserverConf,
|
||||
ObjectStorageConf, SafekeeperConf,
|
||||
};
|
||||
use control_plane::object_storage::OBJECT_STORAGE_DEFAULT_PORT;
|
||||
use control_plane::object_storage::ObjectStorage;
|
||||
use control_plane::pageserver::PageServerNode;
|
||||
use control_plane::safekeeper::SafekeeperNode;
|
||||
use control_plane::storage_controller::{
|
||||
@@ -62,7 +63,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
|
||||
const DEFAULT_BRANCH_NAME: &str = "main";
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
const DEFAULT_PG_VERSION: u32 = 17;
|
||||
const DEFAULT_PG_VERSION: u32 = 16;
|
||||
|
||||
const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
|
||||
|
||||
@@ -92,7 +93,7 @@ enum NeonLocalCmd {
|
||||
#[command(subcommand)]
|
||||
Safekeeper(SafekeeperCmd),
|
||||
#[command(subcommand)]
|
||||
EndpointStorage(EndpointStorageCmd),
|
||||
ObjectStorage(ObjectStorageCmd),
|
||||
#[command(subcommand)]
|
||||
Endpoint(EndpointCmd),
|
||||
#[command(subcommand)]
|
||||
@@ -459,14 +460,14 @@ enum SafekeeperCmd {
|
||||
|
||||
#[derive(clap::Subcommand)]
|
||||
#[clap(about = "Manage object storage")]
|
||||
enum EndpointStorageCmd {
|
||||
Start(EndpointStorageStartCmd),
|
||||
Stop(EndpointStorageStopCmd),
|
||||
enum ObjectStorageCmd {
|
||||
Start(ObjectStorageStartCmd),
|
||||
Stop(ObjectStorageStopCmd),
|
||||
}
|
||||
|
||||
#[derive(clap::Args)]
|
||||
#[clap(about = "Start object storage")]
|
||||
struct EndpointStorageStartCmd {
|
||||
struct ObjectStorageStartCmd {
|
||||
#[clap(short = 't', long, help = "timeout until we fail the command")]
|
||||
#[arg(default_value = "10s")]
|
||||
start_timeout: humantime::Duration,
|
||||
@@ -474,7 +475,7 @@ struct EndpointStorageStartCmd {
|
||||
|
||||
#[derive(clap::Args)]
|
||||
#[clap(about = "Stop object storage")]
|
||||
struct EndpointStorageStopCmd {
|
||||
struct ObjectStorageStopCmd {
|
||||
#[arg(value_enum, default_value = "fast")]
|
||||
#[clap(
|
||||
short = 'm',
|
||||
@@ -551,7 +552,6 @@ enum EndpointCmd {
|
||||
Start(EndpointStartCmdArgs),
|
||||
Reconfigure(EndpointReconfigureCmdArgs),
|
||||
Stop(EndpointStopCmdArgs),
|
||||
GenerateJwt(EndpointGenerateJwtCmdArgs),
|
||||
}
|
||||
|
||||
#[derive(clap::Args)]
|
||||
@@ -699,13 +699,6 @@ struct EndpointStopCmdArgs {
|
||||
mode: String,
|
||||
}
|
||||
|
||||
#[derive(clap::Args)]
|
||||
#[clap(about = "Generate a JWT for an endpoint")]
|
||||
struct EndpointGenerateJwtCmdArgs {
|
||||
#[clap(help = "Postgres endpoint id")]
|
||||
endpoint_id: String,
|
||||
}
|
||||
|
||||
#[derive(clap::Subcommand)]
|
||||
#[clap(about = "Manage neon_local branch name mappings")]
|
||||
enum MappingsCmd {
|
||||
@@ -796,9 +789,7 @@ fn main() -> Result<()> {
|
||||
}
|
||||
NeonLocalCmd::StorageBroker(subcmd) => rt.block_on(handle_storage_broker(&subcmd, env)),
|
||||
NeonLocalCmd::Safekeeper(subcmd) => rt.block_on(handle_safekeeper(&subcmd, env)),
|
||||
NeonLocalCmd::EndpointStorage(subcmd) => {
|
||||
rt.block_on(handle_endpoint_storage(&subcmd, env))
|
||||
}
|
||||
NeonLocalCmd::ObjectStorage(subcmd) => rt.block_on(handle_object_storage(&subcmd, env)),
|
||||
NeonLocalCmd::Endpoint(subcmd) => rt.block_on(handle_endpoint(&subcmd, env)),
|
||||
NeonLocalCmd::Mappings(subcmd) => handle_mappings(&subcmd, env),
|
||||
};
|
||||
@@ -1015,8 +1006,8 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
endpoint_storage: EndpointStorageConf {
|
||||
port: ENDPOINT_STORAGE_DEFAULT_PORT,
|
||||
object_storage: ObjectStorageConf {
|
||||
port: OBJECT_STORAGE_DEFAULT_PORT,
|
||||
},
|
||||
pg_distrib_dir: None,
|
||||
neon_distrib_dir: None,
|
||||
@@ -1537,16 +1528,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
||||
endpoint.stop(&args.mode, args.destroy)?;
|
||||
}
|
||||
EndpointCmd::GenerateJwt(args) => {
|
||||
let endpoint_id = &args.endpoint_id;
|
||||
let endpoint = cplane
|
||||
.endpoints
|
||||
.get(endpoint_id)
|
||||
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
||||
let jwt = endpoint.generate_jwt()?;
|
||||
|
||||
print!("{jwt}");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -1736,15 +1717,12 @@ async fn handle_safekeeper(subcmd: &SafekeeperCmd, env: &local_env::LocalEnv) ->
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_endpoint_storage(
|
||||
subcmd: &EndpointStorageCmd,
|
||||
env: &local_env::LocalEnv,
|
||||
) -> Result<()> {
|
||||
use EndpointStorageCmd::*;
|
||||
let storage = EndpointStorage::from_env(env);
|
||||
async fn handle_object_storage(subcmd: &ObjectStorageCmd, env: &local_env::LocalEnv) -> Result<()> {
|
||||
use ObjectStorageCmd::*;
|
||||
let storage = ObjectStorage::from_env(env);
|
||||
|
||||
// In tests like test_forward_compatibility or test_graceful_cluster_restart
|
||||
// old neon binaries (without endpoint_storage) are present
|
||||
// old neon binaries (without object_storage) are present
|
||||
if !storage.bin.exists() {
|
||||
eprintln!(
|
||||
"{} binary not found. Ignore if this is a compatibility test",
|
||||
@@ -1754,13 +1732,13 @@ async fn handle_endpoint_storage(
|
||||
}
|
||||
|
||||
match subcmd {
|
||||
Start(EndpointStorageStartCmd { start_timeout }) => {
|
||||
Start(ObjectStorageStartCmd { start_timeout }) => {
|
||||
if let Err(e) = storage.start(start_timeout).await {
|
||||
eprintln!("endpoint_storage start failed: {e}");
|
||||
eprintln!("object_storage start failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
Stop(EndpointStorageStopCmd { stop_mode }) => {
|
||||
Stop(ObjectStorageStopCmd { stop_mode }) => {
|
||||
let immediate = match stop_mode {
|
||||
StopMode::Fast => false,
|
||||
StopMode::Immediate => true,
|
||||
@@ -1870,10 +1848,10 @@ async fn handle_start_all_impl(
|
||||
}
|
||||
|
||||
js.spawn(async move {
|
||||
EndpointStorage::from_env(env)
|
||||
ObjectStorage::from_env(env)
|
||||
.start(&retry_timeout)
|
||||
.await
|
||||
.map_err(|e| e.context("start endpoint_storage"))
|
||||
.map_err(|e| e.context("start object_storage"))
|
||||
});
|
||||
})();
|
||||
|
||||
@@ -1972,9 +1950,9 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
|
||||
}
|
||||
}
|
||||
|
||||
let storage = EndpointStorage::from_env(env);
|
||||
let storage = ObjectStorage::from_env(env);
|
||||
if let Err(e) = storage.stop(immediate) {
|
||||
eprintln!("endpoint_storage stop failed: {:#}", e);
|
||||
eprintln!("object_storage stop failed: {:#}", e);
|
||||
}
|
||||
|
||||
for ps_conf in &env.pageservers {
|
||||
|
||||
@@ -42,30 +42,22 @@ use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use anyhow::{Context, Result, anyhow, bail};
|
||||
use compute_api::requests::{ComputeClaims, ConfigurationRequest};
|
||||
use compute_api::requests::ConfigurationRequest;
|
||||
use compute_api::responses::{
|
||||
ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TlsConfig,
|
||||
ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse,
|
||||
};
|
||||
use compute_api::spec::{
|
||||
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
|
||||
RemoteExtSpec, Role,
|
||||
};
|
||||
use jsonwebtoken::jwk::{
|
||||
AlgorithmParameters, CommonParameters, EllipticCurve, Jwk, JwkSet, KeyAlgorithm, KeyOperations,
|
||||
OctetKeyPairParameters, OctetKeyPairType, PublicKeyUse,
|
||||
};
|
||||
use nix::sys::signal::{Signal, kill};
|
||||
use pageserver_api::shard::ShardStripeSize;
|
||||
use pem::Pem;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use safekeeper_api::membership::SafekeeperGeneration;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{Digest, Sha256};
|
||||
use spki::der::Decode;
|
||||
use spki::{SubjectPublicKeyInfo, SubjectPublicKeyInfoRef};
|
||||
use tracing::debug;
|
||||
use url::Host;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
@@ -90,7 +82,6 @@ pub struct EndpointConf {
|
||||
drop_subscriptions_before_start: bool,
|
||||
features: Vec<ComputeFeature>,
|
||||
cluster: Option<Cluster>,
|
||||
compute_ctl_config: ComputeCtlConfig,
|
||||
}
|
||||
|
||||
//
|
||||
@@ -146,37 +137,6 @@ impl ComputeControlPlane {
|
||||
.unwrap_or(self.base_port)
|
||||
}
|
||||
|
||||
/// Create a JSON Web Key Set. This ideally matches the way we create a JWKS
|
||||
/// from the production control plane.
|
||||
fn create_jwks_from_pem(pem: &Pem) -> Result<JwkSet> {
|
||||
let spki: SubjectPublicKeyInfoRef = SubjectPublicKeyInfo::from_der(pem.contents())?;
|
||||
let public_key = spki.subject_public_key.raw_bytes();
|
||||
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(public_key);
|
||||
let key_hash = hasher.finalize();
|
||||
|
||||
Ok(JwkSet {
|
||||
keys: vec![Jwk {
|
||||
common: CommonParameters {
|
||||
public_key_use: Some(PublicKeyUse::Signature),
|
||||
key_operations: Some(vec![KeyOperations::Verify]),
|
||||
key_algorithm: Some(KeyAlgorithm::EdDSA),
|
||||
key_id: Some(base64::encode_config(key_hash, base64::URL_SAFE_NO_PAD)),
|
||||
x509_url: None::<String>,
|
||||
x509_chain: None::<Vec<String>>,
|
||||
x509_sha1_fingerprint: None::<String>,
|
||||
x509_sha256_fingerprint: None::<String>,
|
||||
},
|
||||
algorithm: AlgorithmParameters::OctetKeyPair(OctetKeyPairParameters {
|
||||
key_type: OctetKeyPairType::OctetKeyPair,
|
||||
curve: EllipticCurve::Ed25519,
|
||||
x: base64::encode_config(public_key, base64::URL_SAFE_NO_PAD),
|
||||
}),
|
||||
}],
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new_endpoint(
|
||||
&mut self,
|
||||
@@ -194,10 +154,6 @@ impl ComputeControlPlane {
|
||||
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
|
||||
let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
|
||||
let internal_http_port = internal_http_port.unwrap_or_else(|| external_http_port + 1);
|
||||
let compute_ctl_config = ComputeCtlConfig {
|
||||
jwks: Self::create_jwks_from_pem(&self.env.read_public_key()?)?,
|
||||
tls: None::<TlsConfig>,
|
||||
};
|
||||
let ep = Arc::new(Endpoint {
|
||||
endpoint_id: endpoint_id.to_owned(),
|
||||
pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), pg_port),
|
||||
@@ -225,7 +181,6 @@ impl ComputeControlPlane {
|
||||
reconfigure_concurrency: 1,
|
||||
features: vec![],
|
||||
cluster: None,
|
||||
compute_ctl_config: compute_ctl_config.clone(),
|
||||
});
|
||||
|
||||
ep.create_endpoint_dir()?;
|
||||
@@ -245,7 +200,6 @@ impl ComputeControlPlane {
|
||||
reconfigure_concurrency: 1,
|
||||
features: vec![],
|
||||
cluster: None,
|
||||
compute_ctl_config,
|
||||
})?,
|
||||
)?;
|
||||
std::fs::write(
|
||||
@@ -288,6 +242,7 @@ impl ComputeControlPlane {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Endpoint {
|
||||
/// used as the directory name
|
||||
endpoint_id: String,
|
||||
@@ -316,9 +271,6 @@ pub struct Endpoint {
|
||||
features: Vec<ComputeFeature>,
|
||||
// Cluster settings
|
||||
cluster: Option<Cluster>,
|
||||
|
||||
/// The compute_ctl config for the endpoint's compute.
|
||||
compute_ctl_config: ComputeCtlConfig,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
@@ -381,7 +333,6 @@ impl Endpoint {
|
||||
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
|
||||
features: conf.features,
|
||||
cluster: conf.cluster,
|
||||
compute_ctl_config: conf.compute_ctl_config,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -629,13 +580,6 @@ impl Endpoint {
|
||||
Ok(safekeeper_connstrings)
|
||||
}
|
||||
|
||||
/// Generate a JWT with the correct claims.
|
||||
pub fn generate_jwt(&self) -> Result<String> {
|
||||
self.env.generate_auth_token(&ComputeClaims {
|
||||
compute_id: self.endpoint_id.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn start(
|
||||
&self,
|
||||
@@ -762,10 +706,14 @@ impl Endpoint {
|
||||
|
||||
ComputeConfig {
|
||||
spec: Some(spec),
|
||||
compute_ctl_config: self.compute_ctl_config.clone(),
|
||||
compute_ctl_config: ComputeCtlConfig::default(),
|
||||
}
|
||||
};
|
||||
|
||||
// TODO(tristan957): Remove the write to spec.json after compatibility
|
||||
// tests work themselves out
|
||||
let spec_path = self.endpoint_path().join("spec.json");
|
||||
std::fs::write(spec_path, serde_json::to_string_pretty(&config.spec)?)?;
|
||||
let config_path = self.endpoint_path().join("config.json");
|
||||
std::fs::write(config_path, serde_json::to_string_pretty(&config)?)?;
|
||||
|
||||
@@ -775,6 +723,16 @@ impl Endpoint {
|
||||
.append(true)
|
||||
.open(self.endpoint_path().join("compute.log"))?;
|
||||
|
||||
// TODO(tristan957): Remove when compatibility tests are no longer an
|
||||
// issue
|
||||
let old_compute_ctl = {
|
||||
let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
|
||||
let help_output = cmd.arg("--help").output()?;
|
||||
let help_output = String::from_utf8_lossy(&help_output.stdout);
|
||||
|
||||
!help_output.contains("--config")
|
||||
};
|
||||
|
||||
// Launch compute_ctl
|
||||
let conn_str = self.connstr("cloud_admin", "postgres");
|
||||
println!("Starting postgres node at '{}'", conn_str);
|
||||
@@ -793,8 +751,19 @@ impl Endpoint {
|
||||
])
|
||||
.args(["--pgdata", self.pgdata().to_str().unwrap()])
|
||||
.args(["--connstr", &conn_str])
|
||||
.arg("--config")
|
||||
.arg(self.endpoint_path().join("config.json").as_os_str())
|
||||
// TODO(tristan957): Change this to --config when compatibility tests
|
||||
// are no longer an issue
|
||||
.args([
|
||||
"--spec-path",
|
||||
self.endpoint_path()
|
||||
.join(if old_compute_ctl {
|
||||
"spec.json"
|
||||
} else {
|
||||
"config.json"
|
||||
})
|
||||
.to_str()
|
||||
.unwrap(),
|
||||
])
|
||||
.args([
|
||||
"--pgbin",
|
||||
self.env
|
||||
@@ -805,7 +774,16 @@ impl Endpoint {
|
||||
])
|
||||
// TODO: It would be nice if we generated compute IDs with the same
|
||||
// algorithm as the real control plane.
|
||||
.args(["--compute-id", &self.endpoint_id])
|
||||
.args([
|
||||
"--compute-id",
|
||||
&format!(
|
||||
"compute-{}",
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
),
|
||||
])
|
||||
.stdin(std::process::Stdio::null())
|
||||
.stderr(logfile.try_clone()?)
|
||||
.stdout(logfile);
|
||||
@@ -903,7 +881,6 @@ impl Endpoint {
|
||||
self.external_http_address.port()
|
||||
),
|
||||
)
|
||||
.bearer_auth(self.generate_jwt()?)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
@@ -980,7 +957,6 @@ impl Endpoint {
|
||||
self.external_http_address.port()
|
||||
))
|
||||
.header(CONTENT_TYPE.as_str(), "application/json")
|
||||
.bearer_auth(self.generate_jwt()?)
|
||||
.body(
|
||||
serde_json::to_string(&ConfigurationRequest {
|
||||
spec,
|
||||
|
||||
@@ -9,8 +9,8 @@
|
||||
mod background_process;
|
||||
pub mod broker;
|
||||
pub mod endpoint;
|
||||
pub mod endpoint_storage;
|
||||
pub mod local_env;
|
||||
pub mod object_storage;
|
||||
pub mod pageserver;
|
||||
pub mod postgresql_conf;
|
||||
pub mod safekeeper;
|
||||
|
||||
@@ -12,18 +12,17 @@ use std::{env, fs};
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use clap::ValueEnum;
|
||||
use pem::Pem;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::Url;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::auth::encode_from_key_file;
|
||||
use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
|
||||
|
||||
use crate::endpoint_storage::{ENDPOINT_STORAGE_REMOTE_STORAGE_DIR, EndpointStorage};
|
||||
use crate::object_storage::{OBJECT_STORAGE_REMOTE_STORAGE_DIR, ObjectStorage};
|
||||
use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
|
||||
pub const DEFAULT_PG_VERSION: u32 = 17;
|
||||
pub const DEFAULT_PG_VERSION: u32 = 16;
|
||||
|
||||
//
|
||||
// This data structures represents neon_local CLI config
|
||||
@@ -57,7 +56,6 @@ pub struct LocalEnv {
|
||||
|
||||
// used to issue tokens during e.g pg start
|
||||
pub private_key_path: PathBuf,
|
||||
/// Path to environment's public key
|
||||
pub public_key_path: PathBuf,
|
||||
|
||||
pub broker: NeonBroker,
|
||||
@@ -72,7 +70,7 @@ pub struct LocalEnv {
|
||||
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
|
||||
pub endpoint_storage: EndpointStorageConf,
|
||||
pub object_storage: ObjectStorageConf,
|
||||
|
||||
// Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will
|
||||
// be propagated into each pageserver's configuration.
|
||||
@@ -110,7 +108,7 @@ pub struct OnDiskConfig {
|
||||
)]
|
||||
pub pageservers: Vec<PageServerConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub endpoint_storage: EndpointStorageConf,
|
||||
pub object_storage: ObjectStorageConf,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_hooks_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
@@ -144,7 +142,7 @@ pub struct NeonLocalInitConf {
|
||||
pub storage_controller: Option<NeonStorageControllerConf>,
|
||||
pub pageservers: Vec<NeonLocalInitPageserverConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub endpoint_storage: EndpointStorageConf,
|
||||
pub object_storage: ObjectStorageConf,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_hooks_api: Option<Url>,
|
||||
pub generate_local_ssl_certs: bool,
|
||||
@@ -152,7 +150,7 @@ pub struct NeonLocalInitConf {
|
||||
|
||||
#[derive(Serialize, Default, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct EndpointStorageConf {
|
||||
pub struct ObjectStorageConf {
|
||||
pub port: u16,
|
||||
}
|
||||
|
||||
@@ -413,8 +411,8 @@ impl LocalEnv {
|
||||
self.pg_dir(pg_version, "lib")
|
||||
}
|
||||
|
||||
pub fn endpoint_storage_bin(&self) -> PathBuf {
|
||||
self.neon_distrib_dir.join("endpoint_storage")
|
||||
pub fn object_storage_bin(&self) -> PathBuf {
|
||||
self.neon_distrib_dir.join("object_storage")
|
||||
}
|
||||
|
||||
pub fn pageserver_bin(&self) -> PathBuf {
|
||||
@@ -450,8 +448,8 @@ impl LocalEnv {
|
||||
self.base_data_dir.join("safekeepers").join(data_dir_name)
|
||||
}
|
||||
|
||||
pub fn endpoint_storage_data_dir(&self) -> PathBuf {
|
||||
self.base_data_dir.join("endpoint_storage")
|
||||
pub fn object_storage_data_dir(&self) -> PathBuf {
|
||||
self.base_data_dir.join("object_storage")
|
||||
}
|
||||
|
||||
pub fn get_pageserver_conf(&self, id: NodeId) -> anyhow::Result<&PageServerConf> {
|
||||
@@ -615,7 +613,7 @@ impl LocalEnv {
|
||||
control_plane_compute_hook_api: _,
|
||||
branch_name_mappings,
|
||||
generate_local_ssl_certs,
|
||||
endpoint_storage,
|
||||
object_storage,
|
||||
} = on_disk_config;
|
||||
LocalEnv {
|
||||
base_data_dir: repopath.to_owned(),
|
||||
@@ -632,7 +630,7 @@ impl LocalEnv {
|
||||
control_plane_hooks_api,
|
||||
branch_name_mappings,
|
||||
generate_local_ssl_certs,
|
||||
endpoint_storage,
|
||||
object_storage,
|
||||
}
|
||||
};
|
||||
|
||||
@@ -742,7 +740,7 @@ impl LocalEnv {
|
||||
control_plane_compute_hook_api: None,
|
||||
branch_name_mappings: self.branch_name_mappings.clone(),
|
||||
generate_local_ssl_certs: self.generate_local_ssl_certs,
|
||||
endpoint_storage: self.endpoint_storage.clone(),
|
||||
object_storage: self.object_storage.clone(),
|
||||
},
|
||||
)
|
||||
}
|
||||
@@ -760,11 +758,11 @@ impl LocalEnv {
|
||||
|
||||
// this function is used only for testing purposes in CLI e g generate tokens during init
|
||||
pub fn generate_auth_token<S: Serialize>(&self, claims: &S) -> anyhow::Result<String> {
|
||||
let key = self.read_private_key()?;
|
||||
encode_from_key_file(claims, &key)
|
||||
let private_key_path = self.get_private_key_path();
|
||||
let key_data = fs::read(private_key_path)?;
|
||||
encode_from_key_file(claims, &key_data)
|
||||
}
|
||||
|
||||
/// Get the path to the private key.
|
||||
pub fn get_private_key_path(&self) -> PathBuf {
|
||||
if self.private_key_path.is_absolute() {
|
||||
self.private_key_path.to_path_buf()
|
||||
@@ -773,29 +771,6 @@ impl LocalEnv {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the path to the public key.
|
||||
pub fn get_public_key_path(&self) -> PathBuf {
|
||||
if self.public_key_path.is_absolute() {
|
||||
self.public_key_path.to_path_buf()
|
||||
} else {
|
||||
self.base_data_dir.join(&self.public_key_path)
|
||||
}
|
||||
}
|
||||
|
||||
/// Read the contents of the private key file.
|
||||
pub fn read_private_key(&self) -> anyhow::Result<Pem> {
|
||||
let private_key_path = self.get_private_key_path();
|
||||
let pem = pem::parse(fs::read(private_key_path)?)?;
|
||||
Ok(pem)
|
||||
}
|
||||
|
||||
/// Read the contents of the public key file.
|
||||
pub fn read_public_key(&self) -> anyhow::Result<Pem> {
|
||||
let public_key_path = self.get_public_key_path();
|
||||
let pem = pem::parse(fs::read(public_key_path)?)?;
|
||||
Ok(pem)
|
||||
}
|
||||
|
||||
/// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
|
||||
pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
|
||||
let base_path = base_path();
|
||||
@@ -849,7 +824,7 @@ impl LocalEnv {
|
||||
control_plane_api,
|
||||
generate_local_ssl_certs,
|
||||
control_plane_hooks_api,
|
||||
endpoint_storage,
|
||||
object_storage,
|
||||
} = conf;
|
||||
|
||||
// Find postgres binaries.
|
||||
@@ -901,7 +876,7 @@ impl LocalEnv {
|
||||
control_plane_hooks_api,
|
||||
branch_name_mappings: Default::default(),
|
||||
generate_local_ssl_certs,
|
||||
endpoint_storage,
|
||||
object_storage,
|
||||
};
|
||||
|
||||
if generate_local_ssl_certs {
|
||||
@@ -929,13 +904,13 @@ impl LocalEnv {
|
||||
.context("pageserver init failed")?;
|
||||
}
|
||||
|
||||
EndpointStorage::from_env(&env)
|
||||
ObjectStorage::from_env(&env)
|
||||
.init()
|
||||
.context("object storage init failed")?;
|
||||
|
||||
// setup remote remote location for default LocalFs remote storage
|
||||
std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
|
||||
std::fs::create_dir_all(env.base_data_dir.join(ENDPOINT_STORAGE_REMOTE_STORAGE_DIR))?;
|
||||
std::fs::create_dir_all(env.base_data_dir.join(OBJECT_STORAGE_REMOTE_STORAGE_DIR))?;
|
||||
|
||||
env.persist_config()
|
||||
}
|
||||
@@ -981,7 +956,6 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
|
||||
String::from_utf8_lossy(&keygen_output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
// Extract the public key from the private key file
|
||||
//
|
||||
// openssl pkey -in auth_private_key.pem -pubout -out auth_public_key.pem
|
||||
@@ -998,7 +972,6 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
|
||||
String::from_utf8_lossy(&keygen_output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -1,33 +1,34 @@
|
||||
use crate::background_process::{self, start_process, stop_process};
|
||||
use crate::local_env::LocalEnv;
|
||||
use anyhow::anyhow;
|
||||
use anyhow::{Context, Result};
|
||||
use camino::Utf8PathBuf;
|
||||
use std::io::Write;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Directory within .neon which will be used by default for LocalFs remote storage.
|
||||
pub const ENDPOINT_STORAGE_REMOTE_STORAGE_DIR: &str = "local_fs_remote_storage/endpoint_storage";
|
||||
pub const ENDPOINT_STORAGE_DEFAULT_PORT: u16 = 9993;
|
||||
pub const OBJECT_STORAGE_REMOTE_STORAGE_DIR: &str = "local_fs_remote_storage/object_storage";
|
||||
pub const OBJECT_STORAGE_DEFAULT_PORT: u16 = 9993;
|
||||
|
||||
pub struct EndpointStorage {
|
||||
pub struct ObjectStorage {
|
||||
pub bin: Utf8PathBuf,
|
||||
pub data_dir: Utf8PathBuf,
|
||||
pub pemfile: Utf8PathBuf,
|
||||
pub port: u16,
|
||||
}
|
||||
|
||||
impl EndpointStorage {
|
||||
pub fn from_env(env: &LocalEnv) -> EndpointStorage {
|
||||
EndpointStorage {
|
||||
bin: Utf8PathBuf::from_path_buf(env.endpoint_storage_bin()).unwrap(),
|
||||
data_dir: Utf8PathBuf::from_path_buf(env.endpoint_storage_data_dir()).unwrap(),
|
||||
impl ObjectStorage {
|
||||
pub fn from_env(env: &LocalEnv) -> ObjectStorage {
|
||||
ObjectStorage {
|
||||
bin: Utf8PathBuf::from_path_buf(env.object_storage_bin()).unwrap(),
|
||||
data_dir: Utf8PathBuf::from_path_buf(env.object_storage_data_dir()).unwrap(),
|
||||
pemfile: Utf8PathBuf::from_path_buf(env.public_key_path.clone()).unwrap(),
|
||||
port: env.endpoint_storage.port,
|
||||
port: env.object_storage.port,
|
||||
}
|
||||
}
|
||||
|
||||
fn config_path(&self) -> Utf8PathBuf {
|
||||
self.data_dir.join("endpoint_storage.json")
|
||||
self.data_dir.join("object_storage.json")
|
||||
}
|
||||
|
||||
fn listen_addr(&self) -> Utf8PathBuf {
|
||||
@@ -48,7 +49,7 @@ impl EndpointStorage {
|
||||
let cfg = Cfg {
|
||||
listen: self.listen_addr(),
|
||||
pemfile: parent.join(self.pemfile.clone()),
|
||||
local_path: parent.join(ENDPOINT_STORAGE_REMOTE_STORAGE_DIR),
|
||||
local_path: parent.join(OBJECT_STORAGE_REMOTE_STORAGE_DIR),
|
||||
r#type: "LocalFs".to_string(),
|
||||
};
|
||||
std::fs::create_dir_all(self.config_path().parent().unwrap())?;
|
||||
@@ -58,19 +59,24 @@ impl EndpointStorage {
|
||||
}
|
||||
|
||||
pub async fn start(&self, retry_timeout: &Duration) -> Result<()> {
|
||||
println!("Starting endpoint_storage at {}", self.listen_addr());
|
||||
println!("Starting s3 proxy at {}", self.listen_addr());
|
||||
std::io::stdout().flush().context("flush stdout")?;
|
||||
|
||||
let process_status_check = || async {
|
||||
let res = reqwest::Client::new().get(format!("http://{}/metrics", self.listen_addr()));
|
||||
match res.send().await {
|
||||
Ok(res) => Ok(res.status().is_success()),
|
||||
Err(_) => Ok(false),
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
let res = reqwest::Client::new()
|
||||
.get(format!("http://{}/metrics", self.listen_addr()))
|
||||
.send()
|
||||
.await;
|
||||
match res {
|
||||
Ok(response) if response.status().is_success() => Ok(true),
|
||||
Ok(_) => Err(anyhow!("Failed to query /metrics")),
|
||||
Err(e) => Err(anyhow!("Failed to check node status: {e}")),
|
||||
}
|
||||
};
|
||||
|
||||
let res = start_process(
|
||||
"endpoint_storage",
|
||||
"object_storage",
|
||||
&self.data_dir.clone().into_std_path_buf(),
|
||||
&self.bin.clone().into_std_path_buf(),
|
||||
vec![self.config_path().to_string()],
|
||||
@@ -88,14 +94,14 @@ impl EndpointStorage {
|
||||
}
|
||||
|
||||
pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
|
||||
stop_process(immediate, "endpoint_storage", &self.pid_file())
|
||||
stop_process(immediate, "object_storage", &self.pid_file())
|
||||
}
|
||||
|
||||
fn log_file(&self) -> Utf8PathBuf {
|
||||
self.data_dir.join("endpoint_storage.log")
|
||||
self.data_dir.join("object_storage.log")
|
||||
}
|
||||
|
||||
fn pid_file(&self) -> Utf8PathBuf {
|
||||
self.data_dir.join("endpoint_storage.pid")
|
||||
self.data_dir.join("object_storage.pid")
|
||||
}
|
||||
}
|
||||
@@ -413,11 +413,6 @@ impl PageServerNode {
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_algorithm' json")?,
|
||||
compaction_shard_ancestor: settings
|
||||
.remove("compaction_shard_ancestor")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_shard_ancestor' as a bool")?,
|
||||
compaction_l0_first: settings
|
||||
.remove("compaction_l0_first")
|
||||
.map(|x| x.parse::<bool>())
|
||||
|
||||
@@ -18,7 +18,6 @@ use pageserver_api::models::{
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
||||
use pem::Pem;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::{Certificate, Method};
|
||||
use serde::de::DeserializeOwned;
|
||||
@@ -35,8 +34,8 @@ use crate::local_env::{LocalEnv, NeonStorageControllerConf};
|
||||
|
||||
pub struct StorageController {
|
||||
env: LocalEnv,
|
||||
private_key: Option<Pem>,
|
||||
public_key: Option<Pem>,
|
||||
private_key: Option<Vec<u8>>,
|
||||
public_key: Option<String>,
|
||||
client: reqwest::Client,
|
||||
config: NeonStorageControllerConf,
|
||||
|
||||
@@ -117,9 +116,7 @@ impl StorageController {
|
||||
AuthType::Trust => (None, None),
|
||||
AuthType::NeonJWT => {
|
||||
let private_key_path = env.get_private_key_path();
|
||||
let private_key =
|
||||
pem::parse(fs::read(private_key_path).expect("failed to read private key"))
|
||||
.expect("failed to parse PEM file");
|
||||
let private_key = fs::read(private_key_path).expect("failed to read private key");
|
||||
|
||||
// If pageserver auth is enabled, this implicitly enables auth for this service,
|
||||
// using the same credentials.
|
||||
@@ -141,13 +138,9 @@ impl StorageController {
|
||||
.expect("Empty key dir")
|
||||
.expect("Error reading key dir");
|
||||
|
||||
pem::parse(std::fs::read_to_string(dent.path()).expect("Can't read public key"))
|
||||
.expect("Failed to parse PEM file")
|
||||
std::fs::read_to_string(dent.path()).expect("Can't read public key")
|
||||
} else {
|
||||
pem::parse(
|
||||
std::fs::read_to_string(&public_key_path).expect("Can't read public key"),
|
||||
)
|
||||
.expect("Failed to parse PEM file")
|
||||
std::fs::read_to_string(&public_key_path).expect("Can't read public key")
|
||||
};
|
||||
(Some(private_key), Some(public_key))
|
||||
}
|
||||
|
||||
16
deny.toml
16
deny.toml
@@ -45,7 +45,9 @@ allow = [
|
||||
"ISC",
|
||||
"MIT",
|
||||
"MPL-2.0",
|
||||
"OpenSSL",
|
||||
"Unicode-3.0",
|
||||
"Zlib",
|
||||
]
|
||||
confidence-threshold = 0.8
|
||||
exceptions = [
|
||||
@@ -54,6 +56,14 @@ exceptions = [
|
||||
{ allow = ["Zlib"], name = "const_format", version = "*" },
|
||||
]
|
||||
|
||||
[[licenses.clarify]]
|
||||
name = "ring"
|
||||
version = "*"
|
||||
expression = "MIT AND ISC AND OpenSSL"
|
||||
license-files = [
|
||||
{ path = "LICENSE", hash = 0xbd0eed23 }
|
||||
]
|
||||
|
||||
[licenses.private]
|
||||
ignore = true
|
||||
registries = []
|
||||
@@ -106,11 +116,7 @@ name = "openssl"
|
||||
unknown-registry = "warn"
|
||||
unknown-git = "warn"
|
||||
allow-registry = ["https://github.com/rust-lang/crates.io-index"]
|
||||
allow-git = [
|
||||
# Crate pinned to commit in origin repo due to opentelemetry version.
|
||||
# TODO: Remove this once crate is fetched from crates.io again.
|
||||
"https://github.com/mattiapenati/tower-otel",
|
||||
]
|
||||
allow-git = []
|
||||
|
||||
[sources.allow-org]
|
||||
github = [
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
|
||||
# Example docker compose configuration
|
||||
|
||||
The configuration in this directory is used for testing Neon docker images: it is
|
||||
@@ -7,13 +8,3 @@ you can experiment with a miniature Neon system, use `cargo neon` rather than co
|
||||
This configuration does not start the storage controller, because the controller
|
||||
needs a way to reconfigure running computes, and no such thing exists in this setup.
|
||||
|
||||
## Generating the JWKS for a compute
|
||||
|
||||
```shell
|
||||
openssl genpkey -algorithm Ed25519 -out private-key.pem
|
||||
openssl pkey -in private-key.pem -pubout -out public-key.pem
|
||||
openssl pkey -pubin -inform pem -in public-key.pem -pubout -outform der -out public-key.der
|
||||
key="$(xxd -plain -cols 32 -s -32 public-key.der)"
|
||||
key_id="$(printf '%s' "$key" | sha256sum | awk '{ print $1 }' | basenc --base64url --wrap=0)"
|
||||
x="$(printf '%s' "$key" | basenc --base64url --wrap=0)"
|
||||
```
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MC4CAQAwBQYDK2VwBCIEIOmnRbzt2AJ0d+S3aU1hiYOl/tXpvz1FmWBfwHYBgOma
|
||||
-----END PRIVATE KEY-----
|
||||
Binary file not shown.
@@ -1,3 +0,0 @@
|
||||
-----BEGIN PUBLIC KEY-----
|
||||
MCowBQYDK2VwAyEADY0al/U0bgB3+9fUGk+3PKWnsck9OyxN5DjHIN6Xep0=
|
||||
-----END PUBLIC KEY-----
|
||||
@@ -81,9 +81,19 @@ sed -i "s/TIMELINE_ID/${timeline_id}/" ${CONFIG_FILE}
|
||||
|
||||
cat ${CONFIG_FILE}
|
||||
|
||||
# TODO(tristan957): Remove these workarounds for backwards compatibility after
|
||||
# the next compute release. That includes these next few lines and the
|
||||
# --spec-path in the compute_ctl invocation.
|
||||
if compute_ctl --help | grep --quiet -- '--config'; then
|
||||
SPEC_PATH="$CONFIG_FILE"
|
||||
else
|
||||
jq '.spec' < "$CONFIG_FILE" > /tmp/spec.json
|
||||
SPEC_PATH=/tmp/spec.json
|
||||
fi
|
||||
|
||||
echo "Start compute node"
|
||||
/usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
|
||||
-C "postgresql://cloud_admin@localhost:55433/postgres" \
|
||||
-b /usr/local/bin/postgres \
|
||||
--compute-id "compute-$RANDOM" \
|
||||
--config "$CONFIG_FILE"
|
||||
--spec-path "$SPEC_PATH"
|
||||
|
||||
@@ -142,19 +142,7 @@
|
||||
},
|
||||
"compute_ctl_config": {
|
||||
"jwks": {
|
||||
"keys": [
|
||||
{
|
||||
"use": "sig",
|
||||
"key_ops": [
|
||||
"verify"
|
||||
],
|
||||
"alg": "EdDSA",
|
||||
"kid": "ZGIxMzAzOGY0YWQwODk2ODU1MTk1NzMxMDFkYmUyOWU2NzZkOWNjNjMyMGRkZGJjOWY0MjdjYWVmNzE1MjUyOAo=",
|
||||
"kty": "OKP",
|
||||
"crv": "Ed25519",
|
||||
"x": "MGQ4ZDFhOTdmNTM0NmUwMDc3ZmJkN2Q0MWE0ZmI3M2NhNWE3YjFjOTNkM2IyYzRkZTQzOGM3MjBkZTk3N2E5ZAo="
|
||||
}
|
||||
]
|
||||
"keys": []
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,20 +9,21 @@
|
||||
# to verify custom image builds (e.g pre-published ones).
|
||||
#
|
||||
# A test script for postgres extensions
|
||||
# Currently supports only v16+
|
||||
# Currently supports only v16
|
||||
#
|
||||
set -eux -o pipefail
|
||||
|
||||
export COMPOSE_FILE='docker-compose.yml'
|
||||
export COMPOSE_PROFILES=test-extensions
|
||||
cd "$(dirname "${0}")"
|
||||
COMPOSE_FILE='docker-compose.yml'
|
||||
cd $(dirname $0)
|
||||
COMPUTE_CONTAINER_NAME=docker-compose-compute-1
|
||||
TEST_CONTAINER_NAME=docker-compose-neon-test-extensions-1
|
||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
|
||||
|
||||
function cleanup() {
|
||||
cleanup() {
|
||||
echo "show container information"
|
||||
docker ps
|
||||
echo "stop containers..."
|
||||
docker compose down
|
||||
docker compose --profile test-extensions -f $COMPOSE_FILE down
|
||||
}
|
||||
|
||||
for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
@@ -30,55 +31,55 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
|
||||
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull --build -d
|
||||
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --quiet-pull --build -d
|
||||
|
||||
echo "wait until the compute is ready. timeout after 60s. "
|
||||
cnt=0
|
||||
while sleep 3; do
|
||||
# check timeout
|
||||
(( cnt += 3 ))
|
||||
if [[ ${cnt} -gt 60 ]]; then
|
||||
cnt=`expr $cnt + 3`
|
||||
if [ $cnt -gt 60 ]; then
|
||||
echo "timeout before the compute is ready."
|
||||
exit 1
|
||||
fi
|
||||
if docker compose logs "compute_is_ready" | grep -q "accepting connections"; then
|
||||
if docker compose --profile test-extensions -f $COMPOSE_FILE logs "compute_is_ready" | grep -q "accepting connections"; then
|
||||
echo "OK. The compute is ready to connect."
|
||||
echo "execute simple queries."
|
||||
docker compose exec compute /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
|
||||
docker exec $COMPUTE_CONTAINER_NAME /bin/bash -c "psql $PSQL_OPTION"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${pg_version} -ge 16 ]]; then
|
||||
if [ $pg_version -ge 16 ]; then
|
||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config
|
||||
docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||
docker exec $COMPUTE_CONTAINER_NAME touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data"
|
||||
docker compose cp "${TMPDIR}/data" compute:/ext-src/pg_hint_plan-src/
|
||||
rm -rf "${TMPDIR}"
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
|
||||
rm -rf $TMPDIR
|
||||
# The following block does the same for the contrib/file_fdw test
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${TMPDIR}/data"
|
||||
docker compose cp "${TMPDIR}/data" compute:/postgres/contrib/file_fdw/data
|
||||
rm -rf "${TMPDIR}"
|
||||
docker cp $TEST_CONTAINER_NAME:/postgres/contrib/file_fdw/data $TMPDIR/data
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/postgres/contrib/file_fdw/data
|
||||
rm -rf $TMPDIR
|
||||
# Apply patches
|
||||
docker compose exec -i neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
|
||||
cat ../compute/patches/contrib_pg${pg_version}.patch | docker exec -i $TEST_CONTAINER_NAME bash -c "(cd /postgres && patch -p1)"
|
||||
# We are running tests now
|
||||
rm -f testout.txt testout_contrib.txt
|
||||
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
|
||||
neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
|
||||
docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
|
||||
neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
|
||||
if [[ ${EXT_SUCCESS} -eq 0 || ${CONTRIB_SUCCESS} -eq 0 ]]; then
|
||||
docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
|
||||
$TEST_CONTAINER_NAME /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
|
||||
docker exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
|
||||
$TEST_CONTAINER_NAME /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
|
||||
if [ $EXT_SUCCESS -eq 0 ] || [ $CONTRIB_SUCCESS -eq 0 ]; then
|
||||
CONTRIB_FAILED=
|
||||
FAILED=
|
||||
[[ ${EXT_SUCCESS} -eq 0 ]] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
|
||||
[[ ${CONTRIB_SUCCESS} -eq 0 ]] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
|
||||
for d in ${FAILED} ${CONTRIB_FAILED}; do
|
||||
docker compose exec neon-test-extensions bash -c 'for file in $(find '"${d}"' -name regression.diffs -o -name regression.out); do cat ${file}; done' || [[ ${?} -eq 1 ]]
|
||||
[ $EXT_SUCCESS -eq 0 ] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
|
||||
[ $CONTRIB_SUCCESS -eq 0 ] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
|
||||
for d in $FAILED $CONTRIB_FAILED; do
|
||||
docker exec $TEST_CONTAINER_NAME bash -c 'for file in $(find '"$d"' -name regression.diffs -o -name regression.out); do cat $file; done' || [ $? -eq 1 ]
|
||||
done
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
EXTENSION = pg_jsonschema
|
||||
DATA = pg_jsonschema--1.0.sql
|
||||
REGRESS = jsonschema_valid_api jsonschema_edge_cases
|
||||
REGRESS_OPTS = --load-extension=pg_jsonschema
|
||||
|
||||
PG_CONFIG ?= pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
@@ -1,87 +0,0 @@
|
||||
-- Schema with enums, nulls, extra properties disallowed
|
||||
SELECT jsonschema_is_valid('{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||
"email": { "type": ["string", "null"], "format": "email" }
|
||||
},
|
||||
"required": ["status"],
|
||||
"additionalProperties": false
|
||||
}'::json);
|
||||
jsonschema_is_valid
|
||||
---------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- Valid enum and null email
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||
"email": { "type": ["string", "null"], "format": "email" }
|
||||
},
|
||||
"required": ["status"],
|
||||
"additionalProperties": false
|
||||
}'::json,
|
||||
'{"status": "active", "email": null}'::json
|
||||
);
|
||||
jsonschema_validation_errors
|
||||
------------------------------
|
||||
{}
|
||||
(1 row)
|
||||
|
||||
-- Invalid enum value
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||
"email": { "type": ["string", "null"], "format": "email" }
|
||||
},
|
||||
"required": ["status"],
|
||||
"additionalProperties": false
|
||||
}'::json,
|
||||
'{"status": "disabled", "email": null}'::json
|
||||
);
|
||||
jsonschema_validation_errors
|
||||
----------------------------------------------------------------------
|
||||
{"\"disabled\" is not one of [\"active\",\"inactive\",\"pending\"]"}
|
||||
(1 row)
|
||||
|
||||
-- Invalid email format (assuming format is validated)
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||
"email": { "type": ["string", "null"], "format": "email" }
|
||||
},
|
||||
"required": ["status"],
|
||||
"additionalProperties": false
|
||||
}'::json,
|
||||
'{"status": "active", "email": "not-an-email"}'::json
|
||||
);
|
||||
jsonschema_validation_errors
|
||||
-----------------------------------------
|
||||
{"\"not-an-email\" is not a \"email\""}
|
||||
(1 row)
|
||||
|
||||
-- Extra property not allowed
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||
"email": { "type": ["string", "null"], "format": "email" }
|
||||
},
|
||||
"required": ["status"],
|
||||
"additionalProperties": false
|
||||
}'::json,
|
||||
'{"status": "active", "extra": "should not be here"}'::json
|
||||
);
|
||||
jsonschema_validation_errors
|
||||
--------------------------------------------------------------------
|
||||
{"Additional properties are not allowed ('extra' was unexpected)"}
|
||||
(1 row)
|
||||
|
||||
@@ -1,65 +0,0 @@
|
||||
-- Define schema
|
||||
SELECT jsonschema_is_valid('{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"username": { "type": "string" },
|
||||
"age": { "type": "integer" }
|
||||
},
|
||||
"required": ["username"]
|
||||
}'::json);
|
||||
jsonschema_is_valid
|
||||
---------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- Valid instance
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"username": { "type": "string" },
|
||||
"age": { "type": "integer" }
|
||||
},
|
||||
"required": ["username"]
|
||||
}'::json,
|
||||
'{"username": "alice", "age": 25}'::json
|
||||
);
|
||||
jsonschema_validation_errors
|
||||
------------------------------
|
||||
{}
|
||||
(1 row)
|
||||
|
||||
-- Invalid instance: missing required "username"
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"username": { "type": "string" },
|
||||
"age": { "type": "integer" }
|
||||
},
|
||||
"required": ["username"]
|
||||
}'::json,
|
||||
'{"age": 25}'::json
|
||||
);
|
||||
jsonschema_validation_errors
|
||||
-----------------------------------------
|
||||
{"\"username\" is a required property"}
|
||||
(1 row)
|
||||
|
||||
-- Invalid instance: wrong type for "age"
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"username": { "type": "string" },
|
||||
"age": { "type": "integer" }
|
||||
},
|
||||
"required": ["username"]
|
||||
}'::json,
|
||||
'{"username": "bob", "age": "twenty"}'::json
|
||||
);
|
||||
jsonschema_validation_errors
|
||||
-------------------------------------------
|
||||
{"\"twenty\" is not of type \"integer\""}
|
||||
(1 row)
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
-- Schema with enums, nulls, extra properties disallowed
|
||||
SELECT jsonschema_is_valid('{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||
"email": { "type": ["string", "null"], "format": "email" }
|
||||
},
|
||||
"required": ["status"],
|
||||
"additionalProperties": false
|
||||
}'::json);
|
||||
|
||||
-- Valid enum and null email
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||
"email": { "type": ["string", "null"], "format": "email" }
|
||||
},
|
||||
"required": ["status"],
|
||||
"additionalProperties": false
|
||||
}'::json,
|
||||
'{"status": "active", "email": null}'::json
|
||||
);
|
||||
|
||||
-- Invalid enum value
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||
"email": { "type": ["string", "null"], "format": "email" }
|
||||
},
|
||||
"required": ["status"],
|
||||
"additionalProperties": false
|
||||
}'::json,
|
||||
'{"status": "disabled", "email": null}'::json
|
||||
);
|
||||
|
||||
-- Invalid email format (assuming format is validated)
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||
"email": { "type": ["string", "null"], "format": "email" }
|
||||
},
|
||||
"required": ["status"],
|
||||
"additionalProperties": false
|
||||
}'::json,
|
||||
'{"status": "active", "email": "not-an-email"}'::json
|
||||
);
|
||||
|
||||
-- Extra property not allowed
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||
"email": { "type": ["string", "null"], "format": "email" }
|
||||
},
|
||||
"required": ["status"],
|
||||
"additionalProperties": false
|
||||
}'::json,
|
||||
'{"status": "active", "extra": "should not be here"}'::json
|
||||
);
|
||||
@@ -1,48 +0,0 @@
|
||||
-- Define schema
|
||||
SELECT jsonschema_is_valid('{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"username": { "type": "string" },
|
||||
"age": { "type": "integer" }
|
||||
},
|
||||
"required": ["username"]
|
||||
}'::json);
|
||||
|
||||
-- Valid instance
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"username": { "type": "string" },
|
||||
"age": { "type": "integer" }
|
||||
},
|
||||
"required": ["username"]
|
||||
}'::json,
|
||||
'{"username": "alice", "age": 25}'::json
|
||||
);
|
||||
|
||||
-- Invalid instance: missing required "username"
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"username": { "type": "string" },
|
||||
"age": { "type": "integer" }
|
||||
},
|
||||
"required": ["username"]
|
||||
}'::json,
|
||||
'{"age": 25}'::json
|
||||
);
|
||||
|
||||
-- Invalid instance: wrong type for "age"
|
||||
SELECT jsonschema_validation_errors(
|
||||
'{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"username": { "type": "string" },
|
||||
"age": { "type": "integer" }
|
||||
},
|
||||
"required": ["username"]
|
||||
}'::json,
|
||||
'{"username": "bob", "age": "twenty"}'::json
|
||||
);
|
||||
@@ -1,9 +0,0 @@
|
||||
EXTENSION = pg_session_jwt
|
||||
|
||||
REGRESS = basic_functions
|
||||
REGRESS_OPTS = --load-extension=$(EXTENSION)
|
||||
export PGOPTIONS = -c pg_session_jwt.jwk={"crv":"Ed25519","kty":"OKP","x":"R_Abz-63zJ00l-IraL5fQhwkhGVZCSooQFV5ntC3C7M"}
|
||||
|
||||
PG_CONFIG ?= pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
@@ -1,35 +0,0 @@
|
||||
-- Basic functionality tests for pg_session_jwt
|
||||
-- Test auth.init() function
|
||||
SELECT auth.init();
|
||||
init
|
||||
------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- Test an invalid JWT
|
||||
SELECT auth.jwt_session_init('INVALID-JWT');
|
||||
ERROR: invalid JWT encoding
|
||||
-- Test creating a session with an expired JWT
|
||||
SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjE3NDI1NjQ0MzIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MjQyNDIsInN1YiI6InVzZXIxMjMifQ.A6FwKuaSduHB9O7Gz37g0uoD_U9qVS0JNtT7YABGVgB7HUD1AMFc9DeyhNntWBqncg8k5brv-hrNTuUh5JYMAw');
|
||||
ERROR: Token used after it has expired
|
||||
-- Test creating a session with a valid JWT
|
||||
SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjQ4OTYxNjQyNTIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MzQzNDMsInN1YiI6InVzZXIxMjMifQ.2TXVgjb6JSUq6_adlvp-m_SdOxZSyGS30RS9TLB0xu2N83dMSs2NybwE1NMU8Fb0tcAZR_ET7M2rSxbTrphfCg');
|
||||
jwt_session_init
|
||||
------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- Test auth.session() function
|
||||
SELECT auth.session();
|
||||
session
|
||||
-------------------------------------------------------------------------
|
||||
{"exp": 4896164252, "iat": 1742564252, "jti": 434343, "sub": "user123"}
|
||||
(1 row)
|
||||
|
||||
-- Test auth.user_id() function
|
||||
SELECT auth.user_id() AS user_id;
|
||||
user_id
|
||||
---------
|
||||
user123
|
||||
(1 row)
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
-- Basic functionality tests for pg_session_jwt
|
||||
|
||||
-- Test auth.init() function
|
||||
SELECT auth.init();
|
||||
|
||||
-- Test an invalid JWT
|
||||
SELECT auth.jwt_session_init('INVALID-JWT');
|
||||
|
||||
-- Test creating a session with an expired JWT
|
||||
SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjE3NDI1NjQ0MzIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MjQyNDIsInN1YiI6InVzZXIxMjMifQ.A6FwKuaSduHB9O7Gz37g0uoD_U9qVS0JNtT7YABGVgB7HUD1AMFc9DeyhNntWBqncg8k5brv-hrNTuUh5JYMAw');
|
||||
|
||||
-- Test creating a session with a valid JWT
|
||||
SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjQ4OTYxNjQyNTIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MzQzNDMsInN1YiI6InVzZXIxMjMifQ.2TXVgjb6JSUq6_adlvp-m_SdOxZSyGS30RS9TLB0xu2N83dMSs2NybwE1NMU8Fb0tcAZR_ET7M2rSxbTrphfCg');
|
||||
|
||||
-- Test auth.session() function
|
||||
SELECT auth.session();
|
||||
|
||||
-- Test auth.user_id() function
|
||||
SELECT auth.user_id() AS user_id;
|
||||
0
explained_queries.sql
Normal file
0
explained_queries.sql
Normal file
@@ -160,7 +160,7 @@ pub struct CatalogObjects {
|
||||
pub databases: Vec<Database>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct ComputeCtlConfig {
|
||||
/// Set of JSON web keys that the compute can use to authenticate
|
||||
/// communication from the control plane.
|
||||
@@ -179,7 +179,7 @@ impl Default for ComputeCtlConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct TlsConfig {
|
||||
pub key_path: String,
|
||||
pub cert_path: String,
|
||||
|
||||
@@ -242,22 +242,13 @@ impl RemoteExtSpec {
|
||||
|
||||
match self.extension_data.get(real_ext_name) {
|
||||
Some(_ext_data) => {
|
||||
// We have decided to use the Go naming convention due to Kubernetes.
|
||||
|
||||
let arch = match std::env::consts::ARCH {
|
||||
"x86_64" => "amd64",
|
||||
"aarch64" => "arm64",
|
||||
arch => arch,
|
||||
};
|
||||
|
||||
// Construct the path to the extension archive
|
||||
// BUILD_TAG/PG_MAJOR_VERSION/extensions/EXTENSION_NAME.tar.zst
|
||||
//
|
||||
// Keep it in sync with path generation in
|
||||
// https://github.com/neondatabase/build-custom-extensions/tree/main
|
||||
let archive_path_str = format!(
|
||||
"{build_tag}/{arch}/{pg_major_version}/extensions/{real_ext_name}.tar.zst"
|
||||
);
|
||||
let archive_path_str =
|
||||
format!("{build_tag}/{pg_major_version}/extensions/{real_ext_name}.tar.zst");
|
||||
Ok((
|
||||
real_ext_name.to_string(),
|
||||
RemotePath::from_string(&archive_path_str)?,
|
||||
|
||||
@@ -14,7 +14,6 @@ futures.workspace = true
|
||||
hyper0.workspace = true
|
||||
itertools.workspace = true
|
||||
jemalloc_pprof.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
once_cell.workspace = true
|
||||
pprof.workspace = true
|
||||
regex.workspace = true
|
||||
|
||||
@@ -8,7 +8,6 @@ use bytes::{Bytes, BytesMut};
|
||||
use hyper::header::{AUTHORIZATION, CONTENT_DISPOSITION, CONTENT_TYPE, HeaderName};
|
||||
use hyper::http::HeaderValue;
|
||||
use hyper::{Body, Method, Request, Response};
|
||||
use jsonwebtoken::TokenData;
|
||||
use metrics::{Encoder, IntCounter, TextEncoder, register_int_counter};
|
||||
use once_cell::sync::Lazy;
|
||||
use pprof::ProfilerGuardBuilder;
|
||||
@@ -619,7 +618,7 @@ pub fn auth_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
|
||||
})?;
|
||||
let token = parse_token(header_value)?;
|
||||
|
||||
let data: TokenData<Claims> = auth.decode(token).map_err(|err| {
|
||||
let data = auth.decode(token).map_err(|err| {
|
||||
warn!("Authentication error: {err}");
|
||||
// Rely on From<AuthError> for ApiError impl
|
||||
err
|
||||
|
||||
@@ -35,7 +35,6 @@ nix = {workspace = true, optional = true}
|
||||
reqwest.workspace = true
|
||||
rand.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
once_cell.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
bincode.workspace = true
|
||||
|
||||
@@ -181,7 +181,6 @@ pub struct ConfigToml {
|
||||
pub generate_unarchival_heatmap: Option<bool>,
|
||||
pub tracing: Option<Tracing>,
|
||||
pub enable_tls_page_service_api: bool,
|
||||
pub dev_mode: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -380,8 +379,6 @@ pub struct TenantConfigToml {
|
||||
/// size exceeds `compaction_upper_limit * checkpoint_distance`.
|
||||
pub compaction_upper_limit: usize,
|
||||
pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
|
||||
/// If true, enable shard ancestor compaction (enabled by default).
|
||||
pub compaction_shard_ancestor: bool,
|
||||
/// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
|
||||
/// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
|
||||
pub compaction_l0_first: bool,
|
||||
@@ -658,7 +655,6 @@ impl Default for ConfigToml {
|
||||
generate_unarchival_heatmap: None,
|
||||
tracing: None,
|
||||
enable_tls_page_service_api: false,
|
||||
dev_mode: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -681,13 +677,12 @@ pub mod tenant_conf_defaults {
|
||||
|
||||
pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
|
||||
pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
|
||||
pub const DEFAULT_COMPACTION_SHARD_ANCESTOR: bool = true;
|
||||
|
||||
// This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
|
||||
// 3/4*8=6 on most of our pageservers. Compacting 10 layers requires a maximum of
|
||||
// DEFAULT_CHECKPOINT_DISTANCE*10 memory, that's 2560MB. So with this config, we can get a maximum peak
|
||||
// compaction usage of 15360MB.
|
||||
pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 10;
|
||||
// 3/4*16=9 on most of our pageservers. Compacting 20 layers requires about 1 GB memory (could
|
||||
// be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
|
||||
// with this config, we can get a maximum peak compaction usage of 9 GB.
|
||||
pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
|
||||
// Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
|
||||
// read amp.
|
||||
pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
|
||||
@@ -704,11 +699,8 @@ pub mod tenant_conf_defaults {
|
||||
// Relevant: https://github.com/neondatabase/neon/issues/3394
|
||||
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
|
||||
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
|
||||
// Currently, any value other than 0 will trigger image layer creation preemption immediately with L0 backpressure
|
||||
// without looking at the exact number of L0 layers.
|
||||
// It was expected to have the following behavior:
|
||||
// > If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
|
||||
// > layer creation will end immediately. Set to 0 to disable.
|
||||
// If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
|
||||
// layer creation will end immediately. Set to 0 to disable.
|
||||
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;
|
||||
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
|
||||
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
|
||||
@@ -742,7 +734,6 @@ impl Default for TenantConfigToml {
|
||||
compaction_algorithm: crate::models::CompactionAlgorithmSettings {
|
||||
kind: DEFAULT_COMPACTION_ALGORITHM,
|
||||
},
|
||||
compaction_shard_ancestor: DEFAULT_COMPACTION_SHARD_ANCESTOR,
|
||||
compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,
|
||||
compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
|
||||
l0_flush_delay_threshold: None,
|
||||
|
||||
@@ -320,35 +320,6 @@ pub struct TimelineCreateRequest {
|
||||
pub mode: TimelineCreateRequestMode,
|
||||
}
|
||||
|
||||
impl TimelineCreateRequest {
|
||||
pub fn mode_tag(&self) -> &'static str {
|
||||
match &self.mode {
|
||||
TimelineCreateRequestMode::Branch { .. } => "branch",
|
||||
TimelineCreateRequestMode::ImportPgdata { .. } => "import",
|
||||
TimelineCreateRequestMode::Bootstrap { .. } => "bootstrap",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_import(&self) -> bool {
|
||||
matches!(self.mode, TimelineCreateRequestMode::ImportPgdata { .. })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ShardImportStatus {
|
||||
InProgress,
|
||||
Done,
|
||||
Error(String),
|
||||
}
|
||||
impl ShardImportStatus {
|
||||
pub fn is_terminal(&self) -> bool {
|
||||
match self {
|
||||
ShardImportStatus::InProgress => false,
|
||||
ShardImportStatus::Done | ShardImportStatus::Error(_) => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage controller specific extensions to [`TimelineInfo`].
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineCreateResponseStorcon {
|
||||
@@ -555,8 +526,6 @@ pub struct TenantConfigPatch {
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_algorithm: FieldPatch<CompactionAlgorithmSettings>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_shard_ancestor: FieldPatch<bool>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_l0_first: FieldPatch<bool>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_l0_semaphore: FieldPatch<bool>,
|
||||
@@ -646,9 +615,6 @@ pub struct TenantConfig {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub compaction_shard_ancestor: Option<bool>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub compaction_l0_first: Option<bool>,
|
||||
|
||||
@@ -758,7 +724,6 @@ impl TenantConfig {
|
||||
mut compaction_threshold,
|
||||
mut compaction_upper_limit,
|
||||
mut compaction_algorithm,
|
||||
mut compaction_shard_ancestor,
|
||||
mut compaction_l0_first,
|
||||
mut compaction_l0_semaphore,
|
||||
mut l0_flush_delay_threshold,
|
||||
@@ -807,9 +772,6 @@ impl TenantConfig {
|
||||
.compaction_upper_limit
|
||||
.apply(&mut compaction_upper_limit);
|
||||
patch.compaction_algorithm.apply(&mut compaction_algorithm);
|
||||
patch
|
||||
.compaction_shard_ancestor
|
||||
.apply(&mut compaction_shard_ancestor);
|
||||
patch.compaction_l0_first.apply(&mut compaction_l0_first);
|
||||
patch
|
||||
.compaction_l0_semaphore
|
||||
@@ -898,7 +860,6 @@ impl TenantConfig {
|
||||
compaction_threshold,
|
||||
compaction_upper_limit,
|
||||
compaction_algorithm,
|
||||
compaction_shard_ancestor,
|
||||
compaction_l0_first,
|
||||
compaction_l0_semaphore,
|
||||
l0_flush_delay_threshold,
|
||||
@@ -959,9 +920,6 @@ impl TenantConfig {
|
||||
.as_ref()
|
||||
.unwrap_or(&global_conf.compaction_algorithm)
|
||||
.clone(),
|
||||
compaction_shard_ancestor: self
|
||||
.compaction_shard_ancestor
|
||||
.unwrap_or(global_conf.compaction_shard_ancestor),
|
||||
compaction_l0_first: self
|
||||
.compaction_l0_first
|
||||
.unwrap_or(global_conf.compaction_l0_first),
|
||||
@@ -1803,8 +1761,6 @@ pub struct TopTenantShardsResponse {
|
||||
}
|
||||
|
||||
pub mod virtual_file {
|
||||
use std::sync::LazyLock;
|
||||
|
||||
#[derive(
|
||||
Copy,
|
||||
Clone,
|
||||
@@ -1842,38 +1798,14 @@ pub mod virtual_file {
|
||||
pub enum IoMode {
|
||||
/// Uses buffered IO.
|
||||
Buffered,
|
||||
/// Uses direct IO for reads only.
|
||||
/// Uses direct IO, error out if the operation fails.
|
||||
#[cfg(target_os = "linux")]
|
||||
Direct,
|
||||
/// Use direct IO for reads and writes.
|
||||
#[cfg(target_os = "linux")]
|
||||
DirectRw,
|
||||
}
|
||||
|
||||
impl IoMode {
|
||||
pub fn preferred() -> Self {
|
||||
// The default behavior when running Rust unit tests without any further
|
||||
// flags is to use the newest behavior (DirectRw).
|
||||
// The CI uses the following environment variable to unit tests for all
|
||||
// different modes.
|
||||
// NB: the Python regression & perf tests have their own defaults management
|
||||
// that writes pageserver.toml; they do not use this variable.
|
||||
if cfg!(test) {
|
||||
static CACHED: LazyLock<IoMode> = LazyLock::new(|| {
|
||||
utils::env::var_serde_json_string(
|
||||
"NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE",
|
||||
)
|
||||
.unwrap_or(
|
||||
#[cfg(target_os = "linux")]
|
||||
IoMode::DirectRw,
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
IoMode::Buffered,
|
||||
)
|
||||
});
|
||||
*CACHED
|
||||
} else {
|
||||
IoMode::Buffered
|
||||
}
|
||||
pub const fn preferred() -> Self {
|
||||
Self::Buffered
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1885,8 +1817,6 @@ pub mod virtual_file {
|
||||
v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
|
||||
#[cfg(target_os = "linux")]
|
||||
v if v == (IoMode::Direct as u8) => IoMode::Direct,
|
||||
#[cfg(target_os = "linux")]
|
||||
v if v == (IoMode::DirectRw as u8) => IoMode::DirectRw,
|
||||
x => return Err(x),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -4,10 +4,10 @@
|
||||
//! See docs/rfcs/025-generation-numbers.md
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::id::{NodeId, TimelineId};
|
||||
use utils::id::NodeId;
|
||||
|
||||
use crate::controller_api::NodeRegisterRequest;
|
||||
use crate::models::{LocationConfigMode, ShardImportStatus};
|
||||
use crate::models::LocationConfigMode;
|
||||
use crate::shard::TenantShardId;
|
||||
|
||||
/// Upcall message sent by the pageserver to the configured `control_plane_api` on
|
||||
@@ -62,10 +62,3 @@ pub struct ValidateResponseTenant {
|
||||
pub id: TenantShardId,
|
||||
pub valid: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct PutTimelineImportStatusRequest {
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub status: ShardImportStatus,
|
||||
}
|
||||
|
||||
12
libs/remote_keys/Cargo.toml
Normal file
12
libs/remote_keys/Cargo.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "remote_keys"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
aws-smithy-types.workspace = true
|
||||
aws-sdk-kms.workspace = true
|
||||
aws-config.workspace = true
|
||||
utils.workspace = true
|
||||
47
libs/remote_keys/src/aws_keys.rs
Normal file
47
libs/remote_keys/src/aws_keys.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
use aws_config::BehaviorVersion;
|
||||
|
||||
use crate::KeyId;
|
||||
|
||||
pub struct AwsRemoteKeyClient {
|
||||
client: aws_sdk_kms::Client,
|
||||
}
|
||||
|
||||
impl AwsRemoteKeyClient {
|
||||
pub async fn new() -> Self {
|
||||
let sdk_config = aws_config::defaults(BehaviorVersion::v2024_03_28())
|
||||
.retry_config(
|
||||
aws_config::retry::RetryConfig::standard()
|
||||
.with_max_attempts(5) // Retry up to 5 times
|
||||
.with_initial_backoff(std::time::Duration::from_millis(200)) // Start with 200ms delay
|
||||
.with_max_backoff(std::time::Duration::from_secs(5)), // Cap at 5 seconds
|
||||
)
|
||||
.load()
|
||||
.await;
|
||||
let client = aws_sdk_kms::Client::new(&sdk_config);
|
||||
Self { client }
|
||||
}
|
||||
|
||||
pub async fn decrypt(&self, key_id: &KeyId, ciphertext: impl Into<Vec<u8>>) -> Vec<u8> {
|
||||
let output = self
|
||||
.client
|
||||
.decrypt()
|
||||
.key_id(&key_id.0)
|
||||
.ciphertext_blob(aws_smithy_types::Blob::new(ciphertext.into()))
|
||||
.send()
|
||||
.await
|
||||
.expect("decrypt");
|
||||
output.plaintext.expect("plaintext").into_inner()
|
||||
}
|
||||
|
||||
pub async fn encrypt(&self, key_id: &KeyId, ciphertext: impl Into<Vec<u8>>) -> Vec<u8> {
|
||||
let output = self
|
||||
.client
|
||||
.encrypt()
|
||||
.key_id(&key_id.0)
|
||||
.plaintext(aws_smithy_types::Blob::new(ciphertext.into()))
|
||||
.send()
|
||||
.await
|
||||
.expect("decrypt");
|
||||
output.ciphertext_blob.expect("ciphertext").into_inner()
|
||||
}
|
||||
}
|
||||
6
libs/remote_keys/src/lib.rs
Normal file
6
libs/remote_keys/src/lib.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
mod aws_keys;
|
||||
pub use aws_keys::AwsRemoteKeyClient;
|
||||
|
||||
/// A string uniquely identifying a key
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct KeyId(pub String);
|
||||
@@ -14,9 +14,8 @@ use anyhow::{Context, Result};
|
||||
use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
|
||||
use azure_core::{Continuable, HttpClient, RetryOptions, TransportOptions};
|
||||
use azure_storage::StorageCredentials;
|
||||
use azure_storage_blobs::blob::CopyStatus;
|
||||
use azure_storage_blobs::blob::operations::GetBlobBuilder;
|
||||
use azure_storage_blobs::blob::{Blob, CopyStatus};
|
||||
use azure_storage_blobs::container::operations::ListBlobsBuilder;
|
||||
use azure_storage_blobs::prelude::{ClientBuilder, ContainerClient};
|
||||
use bytes::Bytes;
|
||||
use futures::FutureExt;
|
||||
@@ -254,15 +253,53 @@ impl AzureBlobStorage {
|
||||
download
|
||||
}
|
||||
|
||||
fn list_streaming_for_fn<T: Default + ListingCollector>(
|
||||
async fn permit(
|
||||
&self,
|
||||
kind: RequestKind,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<tokio::sync::SemaphorePermit<'_>, Cancelled> {
|
||||
let acquire = self.concurrency_limiter.acquire(kind);
|
||||
|
||||
tokio::select! {
|
||||
permit = acquire => Ok(permit.expect("never closed")),
|
||||
_ = cancel.cancelled() => Err(Cancelled),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn container_name(&self) -> &str {
|
||||
&self.container_name
|
||||
}
|
||||
}
|
||||
|
||||
fn to_azure_metadata(metadata: StorageMetadata) -> Metadata {
|
||||
let mut res = Metadata::new();
|
||||
for (k, v) in metadata.0.into_iter() {
|
||||
res.insert(k, v);
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn to_download_error(error: azure_core::Error) -> DownloadError {
|
||||
if let Some(http_err) = error.as_http_error() {
|
||||
match http_err.status() {
|
||||
StatusCode::NotFound => DownloadError::NotFound,
|
||||
StatusCode::NotModified => DownloadError::Unmodified,
|
||||
StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),
|
||||
_ => DownloadError::Other(anyhow::Error::new(error)),
|
||||
}
|
||||
} else {
|
||||
DownloadError::Other(error.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl RemoteStorage for AzureBlobStorage {
|
||||
fn list_streaming(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
request_kind: RequestKind,
|
||||
customize_builder: impl Fn(ListBlobsBuilder) -> ListBlobsBuilder,
|
||||
) -> impl Stream<Item = Result<T, DownloadError>> {
|
||||
) -> impl Stream<Item = Result<Listing, DownloadError>> {
|
||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||
let list_prefix = prefix.map(|p| self.relative_path_to_name(p)).or_else(|| {
|
||||
self.prefix_in_container.clone().map(|mut s| {
|
||||
@@ -274,7 +311,7 @@ impl AzureBlobStorage {
|
||||
});
|
||||
|
||||
async_stream::stream! {
|
||||
let _permit = self.permit(request_kind, cancel).await?;
|
||||
let _permit = self.permit(RequestKind::List, cancel).await?;
|
||||
|
||||
let mut builder = self.client.list_blobs();
|
||||
|
||||
@@ -290,8 +327,6 @@ impl AzureBlobStorage {
|
||||
builder = builder.max_results(MaxResults::new(limit));
|
||||
}
|
||||
|
||||
builder = customize_builder(builder);
|
||||
|
||||
let mut next_marker = None;
|
||||
|
||||
let mut timeout_try_cnt = 1;
|
||||
@@ -347,20 +382,26 @@ impl AzureBlobStorage {
|
||||
break;
|
||||
};
|
||||
|
||||
let mut res = T::default();
|
||||
let mut res = Listing::default();
|
||||
next_marker = entry.continuation();
|
||||
let prefix_iter = entry
|
||||
.blobs
|
||||
.prefixes()
|
||||
.map(|prefix| self.name_to_relative_path(&prefix.name));
|
||||
res.add_prefixes(self, prefix_iter);
|
||||
res.prefixes.extend(prefix_iter);
|
||||
|
||||
let blob_iter = entry
|
||||
.blobs
|
||||
.blobs();
|
||||
.blobs()
|
||||
.map(|k| ListingObject{
|
||||
key: self.name_to_relative_path(&k.name),
|
||||
last_modified: k.properties.last_modified.into(),
|
||||
size: k.properties.content_length,
|
||||
}
|
||||
);
|
||||
|
||||
for key in blob_iter {
|
||||
res.add_blob(self, key);
|
||||
res.keys.push(key);
|
||||
|
||||
if let Some(mut mk) = max_keys {
|
||||
assert!(mk > 0);
|
||||
@@ -382,128 +423,6 @@ impl AzureBlobStorage {
|
||||
}
|
||||
}
|
||||
|
||||
async fn permit(
|
||||
&self,
|
||||
kind: RequestKind,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<tokio::sync::SemaphorePermit<'_>, Cancelled> {
|
||||
let acquire = self.concurrency_limiter.acquire(kind);
|
||||
|
||||
tokio::select! {
|
||||
permit = acquire => Ok(permit.expect("never closed")),
|
||||
_ = cancel.cancelled() => Err(Cancelled),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn container_name(&self) -> &str {
|
||||
&self.container_name
|
||||
}
|
||||
}
|
||||
|
||||
trait ListingCollector {
|
||||
fn add_prefixes(&mut self, abs: &AzureBlobStorage, prefix_it: impl Iterator<Item = RemotePath>);
|
||||
fn add_blob(&mut self, abs: &AzureBlobStorage, blob: &Blob);
|
||||
}
|
||||
|
||||
impl ListingCollector for Listing {
|
||||
fn add_prefixes(
|
||||
&mut self,
|
||||
_abs: &AzureBlobStorage,
|
||||
prefix_it: impl Iterator<Item = RemotePath>,
|
||||
) {
|
||||
self.prefixes.extend(prefix_it);
|
||||
}
|
||||
fn add_blob(&mut self, abs: &AzureBlobStorage, blob: &Blob) {
|
||||
self.keys.push(ListingObject {
|
||||
key: abs.name_to_relative_path(&blob.name),
|
||||
last_modified: blob.properties.last_modified.into(),
|
||||
size: blob.properties.content_length,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl ListingCollector for crate::VersionListing {
|
||||
fn add_prefixes(
|
||||
&mut self,
|
||||
_abs: &AzureBlobStorage,
|
||||
_prefix_it: impl Iterator<Item = RemotePath>,
|
||||
) {
|
||||
// nothing
|
||||
}
|
||||
fn add_blob(&mut self, abs: &AzureBlobStorage, blob: &Blob) {
|
||||
let id = crate::VersionId(blob.version_id.clone().expect("didn't find version ID"));
|
||||
self.versions.push(crate::Version {
|
||||
key: abs.name_to_relative_path(&blob.name),
|
||||
last_modified: blob.properties.last_modified.into(),
|
||||
kind: crate::VersionKind::Version(id),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn to_azure_metadata(metadata: StorageMetadata) -> Metadata {
|
||||
let mut res = Metadata::new();
|
||||
for (k, v) in metadata.0.into_iter() {
|
||||
res.insert(k, v);
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn to_download_error(error: azure_core::Error) -> DownloadError {
|
||||
if let Some(http_err) = error.as_http_error() {
|
||||
match http_err.status() {
|
||||
StatusCode::NotFound => DownloadError::NotFound,
|
||||
StatusCode::NotModified => DownloadError::Unmodified,
|
||||
StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),
|
||||
_ => DownloadError::Other(anyhow::Error::new(error)),
|
||||
}
|
||||
} else {
|
||||
DownloadError::Other(error.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl RemoteStorage for AzureBlobStorage {
|
||||
fn list_streaming(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> impl Stream<Item = Result<Listing, DownloadError>> {
|
||||
let customize_builder = |builder| builder;
|
||||
let kind = RequestKind::ListVersions;
|
||||
self.list_streaming_for_fn(prefix, mode, max_keys, cancel, kind, customize_builder)
|
||||
}
|
||||
|
||||
async fn list_versions(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> std::result::Result<crate::VersionListing, DownloadError> {
|
||||
let customize_builder = |mut builder: ListBlobsBuilder| {
|
||||
builder = builder.include_versions(true);
|
||||
builder
|
||||
};
|
||||
let kind = RequestKind::ListVersions;
|
||||
|
||||
let mut stream = std::pin::pin!(self.list_streaming_for_fn(
|
||||
prefix,
|
||||
mode,
|
||||
max_keys,
|
||||
cancel,
|
||||
kind,
|
||||
customize_builder
|
||||
));
|
||||
let mut combined: crate::VersionListing =
|
||||
stream.next().await.expect("At least one item required")?;
|
||||
while let Some(list) = stream.next().await {
|
||||
let list = list?;
|
||||
combined.versions.extend(list.versions.into_iter());
|
||||
}
|
||||
Ok(combined)
|
||||
}
|
||||
|
||||
async fn head_object(
|
||||
&self,
|
||||
key: &RemotePath,
|
||||
@@ -613,12 +532,7 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
let mut builder = blob_client.get();
|
||||
|
||||
if let Some(ref etag) = opts.etag {
|
||||
builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()));
|
||||
}
|
||||
|
||||
if let Some(ref version_id) = opts.version_id {
|
||||
let version_id = azure_storage_blobs::prelude::VersionId::new(version_id.0.clone());
|
||||
builder = builder.blob_versioning(version_id);
|
||||
builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()))
|
||||
}
|
||||
|
||||
if let Some((start, end)) = opts.byte_range() {
|
||||
|
||||
@@ -176,32 +176,6 @@ pub struct Listing {
|
||||
pub keys: Vec<ListingObject>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VersionListing {
|
||||
pub versions: Vec<Version>,
|
||||
}
|
||||
|
||||
pub struct Version {
|
||||
pub key: RemotePath,
|
||||
pub last_modified: SystemTime,
|
||||
pub kind: VersionKind,
|
||||
}
|
||||
|
||||
impl Version {
|
||||
pub fn version_id(&self) -> Option<&VersionId> {
|
||||
match &self.kind {
|
||||
VersionKind::Version(id) => Some(id),
|
||||
VersionKind::DeletionMarker => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum VersionKind {
|
||||
DeletionMarker,
|
||||
Version(VersionId),
|
||||
}
|
||||
|
||||
/// Options for downloads. The default value is a plain GET.
|
||||
pub struct DownloadOpts {
|
||||
/// If given, returns [`DownloadError::Unmodified`] if the object still has
|
||||
@@ -212,8 +186,6 @@ pub struct DownloadOpts {
|
||||
/// The end of the byte range to download, or unbounded. Must be after the
|
||||
/// start bound.
|
||||
pub byte_end: Bound<u64>,
|
||||
/// Optionally request a specific version of a key
|
||||
pub version_id: Option<VersionId>,
|
||||
/// Indicate whether we're downloading something small or large: this indirectly controls
|
||||
/// timeouts: for something like an index/manifest/heatmap, we should time out faster than
|
||||
/// for layer files
|
||||
@@ -225,16 +197,12 @@ pub enum DownloadKind {
|
||||
Small,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct VersionId(pub String);
|
||||
|
||||
impl Default for DownloadOpts {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
etag: Default::default(),
|
||||
byte_start: Bound::Unbounded,
|
||||
byte_end: Bound::Unbounded,
|
||||
version_id: None,
|
||||
kind: DownloadKind::Large,
|
||||
}
|
||||
}
|
||||
@@ -327,14 +295,6 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
||||
Ok(combined)
|
||||
}
|
||||
|
||||
async fn list_versions(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<VersionListing, DownloadError>;
|
||||
|
||||
/// Obtain metadata information about an object.
|
||||
async fn head_object(
|
||||
&self,
|
||||
@@ -515,22 +475,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
||||
}
|
||||
}
|
||||
|
||||
// See [`RemoteStorage::list_versions`].
|
||||
pub async fn list_versions<'a>(
|
||||
&'a self,
|
||||
prefix: Option<&'a RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &'a CancellationToken,
|
||||
) -> Result<VersionListing, DownloadError> {
|
||||
match self {
|
||||
Self::LocalFs(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
|
||||
Self::AwsS3(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
|
||||
Self::AzureBlob(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
|
||||
Self::Unreliable(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
|
||||
}
|
||||
}
|
||||
|
||||
// See [`RemoteStorage::head_object`].
|
||||
pub async fn head_object(
|
||||
&self,
|
||||
@@ -783,7 +727,6 @@ impl ConcurrencyLimiter {
|
||||
RequestKind::Copy => &self.write,
|
||||
RequestKind::TimeTravel => &self.write,
|
||||
RequestKind::Head => &self.read,
|
||||
RequestKind::ListVersions => &self.read,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -445,16 +445,6 @@ impl RemoteStorage for LocalFs {
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_versions(
|
||||
&self,
|
||||
_prefix: Option<&RemotePath>,
|
||||
_mode: ListingMode,
|
||||
_max_keys: Option<NonZeroU32>,
|
||||
_cancel: &CancellationToken,
|
||||
) -> Result<crate::VersionListing, DownloadError> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn head_object(
|
||||
&self,
|
||||
key: &RemotePath,
|
||||
|
||||
@@ -14,7 +14,6 @@ pub(crate) enum RequestKind {
|
||||
Copy = 4,
|
||||
TimeTravel = 5,
|
||||
Head = 6,
|
||||
ListVersions = 7,
|
||||
}
|
||||
|
||||
use RequestKind::*;
|
||||
@@ -30,7 +29,6 @@ impl RequestKind {
|
||||
Copy => "copy_object",
|
||||
TimeTravel => "time_travel_recover",
|
||||
Head => "head_object",
|
||||
ListVersions => "list_versions",
|
||||
}
|
||||
}
|
||||
const fn as_index(&self) -> usize {
|
||||
@@ -38,10 +36,7 @@ impl RequestKind {
|
||||
}
|
||||
}
|
||||
|
||||
const REQUEST_KIND_LIST: &[RequestKind] =
|
||||
&[Get, Put, Delete, List, Copy, TimeTravel, Head, ListVersions];
|
||||
|
||||
const REQUEST_KIND_COUNT: usize = REQUEST_KIND_LIST.len();
|
||||
const REQUEST_KIND_COUNT: usize = 7;
|
||||
pub(crate) struct RequestTyped<C>([C; REQUEST_KIND_COUNT]);
|
||||
|
||||
impl<C> RequestTyped<C> {
|
||||
@@ -50,11 +45,12 @@ impl<C> RequestTyped<C> {
|
||||
}
|
||||
|
||||
fn build_with(mut f: impl FnMut(RequestKind) -> C) -> Self {
|
||||
let mut it = REQUEST_KIND_LIST.iter();
|
||||
use RequestKind::*;
|
||||
let mut it = [Get, Put, Delete, List, Copy, TimeTravel, Head].into_iter();
|
||||
let arr = std::array::from_fn::<C, REQUEST_KIND_COUNT, _>(|index| {
|
||||
let next = it.next().unwrap();
|
||||
assert_eq!(index, next.as_index());
|
||||
f(*next)
|
||||
f(next)
|
||||
});
|
||||
|
||||
if let Some(next) = it.next() {
|
||||
|
||||
@@ -21,8 +21,9 @@ use aws_sdk_s3::config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep};
|
||||
use aws_sdk_s3::error::SdkError;
|
||||
use aws_sdk_s3::operation::get_object::GetObjectError;
|
||||
use aws_sdk_s3::operation::head_object::HeadObjectError;
|
||||
use aws_sdk_s3::types::{Delete, ObjectIdentifier, StorageClass};
|
||||
use aws_sdk_s3::types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass};
|
||||
use aws_smithy_async::rt::sleep::TokioSleep;
|
||||
use aws_smithy_types::DateTime;
|
||||
use aws_smithy_types::body::SdkBody;
|
||||
use aws_smithy_types::byte_stream::ByteStream;
|
||||
use aws_smithy_types::date_time::ConversionError;
|
||||
@@ -45,7 +46,7 @@ use crate::support::PermitCarrying;
|
||||
use crate::{
|
||||
ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
|
||||
MAX_KEYS_PER_DELETE_S3, REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, RemoteStorage,
|
||||
TimeTravelError, TimeoutOrCancel, Version, VersionId, VersionKind, VersionListing,
|
||||
TimeTravelError, TimeoutOrCancel,
|
||||
};
|
||||
|
||||
/// AWS S3 storage.
|
||||
@@ -65,7 +66,6 @@ struct GetObjectRequest {
|
||||
key: String,
|
||||
etag: Option<String>,
|
||||
range: Option<String>,
|
||||
version_id: Option<String>,
|
||||
}
|
||||
impl S3Bucket {
|
||||
/// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
|
||||
@@ -251,7 +251,6 @@ impl S3Bucket {
|
||||
.get_object()
|
||||
.bucket(request.bucket)
|
||||
.key(request.key)
|
||||
.set_version_id(request.version_id)
|
||||
.set_range(request.range);
|
||||
|
||||
if let Some(etag) = request.etag {
|
||||
@@ -406,124 +405,6 @@ impl S3Bucket {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list_versions_with_permit(
|
||||
&self,
|
||||
_permit: &tokio::sync::SemaphorePermit<'_>,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<crate::VersionListing, DownloadError> {
|
||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||
let prefix = prefix
|
||||
.map(|p| self.relative_path_to_s3_object(p))
|
||||
.or_else(|| self.prefix_in_bucket.clone());
|
||||
|
||||
let warn_threshold = 3;
|
||||
let max_retries = 10;
|
||||
let is_permanent = |e: &_| matches!(e, DownloadError::Cancelled);
|
||||
|
||||
let mut key_marker = None;
|
||||
let mut version_id_marker = None;
|
||||
let mut versions_and_deletes = Vec::new();
|
||||
|
||||
loop {
|
||||
let response = backoff::retry(
|
||||
|| async {
|
||||
let mut request = self
|
||||
.client
|
||||
.list_object_versions()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.set_prefix(prefix.clone())
|
||||
.set_key_marker(key_marker.clone())
|
||||
.set_version_id_marker(version_id_marker.clone());
|
||||
|
||||
if let ListingMode::WithDelimiter = mode {
|
||||
request = request.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string());
|
||||
}
|
||||
|
||||
let op = request.send();
|
||||
|
||||
tokio::select! {
|
||||
res = op => res.map_err(|e| DownloadError::Other(e.into())),
|
||||
_ = cancel.cancelled() => Err(DownloadError::Cancelled),
|
||||
}
|
||||
},
|
||||
is_permanent,
|
||||
warn_threshold,
|
||||
max_retries,
|
||||
"listing object versions",
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
.ok_or_else(|| DownloadError::Cancelled)
|
||||
.and_then(|x| x)?;
|
||||
|
||||
tracing::trace!(
|
||||
" Got List response version_id_marker={:?}, key_marker={:?}",
|
||||
response.version_id_marker,
|
||||
response.key_marker
|
||||
);
|
||||
let versions = response
|
||||
.versions
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|version| {
|
||||
let key = version.key.expect("response does not contain a key");
|
||||
let key = self.s3_object_to_relative_path(&key);
|
||||
let version_id = VersionId(version.version_id.expect("needing version id"));
|
||||
let last_modified =
|
||||
SystemTime::try_from(version.last_modified.expect("no last_modified"))?;
|
||||
Ok(Version {
|
||||
key,
|
||||
last_modified,
|
||||
kind: crate::VersionKind::Version(version_id),
|
||||
})
|
||||
});
|
||||
let deletes = response
|
||||
.delete_markers
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|version| {
|
||||
let key = version.key.expect("response does not contain a key");
|
||||
let key = self.s3_object_to_relative_path(&key);
|
||||
let last_modified =
|
||||
SystemTime::try_from(version.last_modified.expect("no last_modified"))?;
|
||||
Ok(Version {
|
||||
key,
|
||||
last_modified,
|
||||
kind: crate::VersionKind::DeletionMarker,
|
||||
})
|
||||
});
|
||||
itertools::process_results(versions.chain(deletes), |n_vds| {
|
||||
versions_and_deletes.extend(n_vds)
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
fn none_if_empty(v: Option<String>) -> Option<String> {
|
||||
v.filter(|v| !v.is_empty())
|
||||
}
|
||||
version_id_marker = none_if_empty(response.next_version_id_marker);
|
||||
key_marker = none_if_empty(response.next_key_marker);
|
||||
if version_id_marker.is_none() {
|
||||
// The final response is not supposed to be truncated
|
||||
if response.is_truncated.unwrap_or_default() {
|
||||
return Err(DownloadError::Other(anyhow::anyhow!(
|
||||
"Received truncated ListObjectVersions response for prefix={prefix:?}"
|
||||
)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
if let Some(max_keys) = max_keys {
|
||||
if versions_and_deletes.len() >= max_keys.get().try_into().unwrap() {
|
||||
return Err(DownloadError::Other(anyhow::anyhow!("too many versions")));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(VersionListing {
|
||||
versions: versions_and_deletes,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn bucket_name(&self) -> &str {
|
||||
&self.bucket_name
|
||||
}
|
||||
@@ -740,19 +621,6 @@ impl RemoteStorage for S3Bucket {
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_versions(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<crate::VersionListing, DownloadError> {
|
||||
let kind = RequestKind::ListVersions;
|
||||
let permit = self.permit(kind, cancel).await?;
|
||||
self.list_versions_with_permit(&permit, prefix, mode, max_keys, cancel)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn head_object(
|
||||
&self,
|
||||
key: &RemotePath,
|
||||
@@ -933,7 +801,6 @@ impl RemoteStorage for S3Bucket {
|
||||
key: self.relative_path_to_s3_object(from),
|
||||
etag: opts.etag.as_ref().map(|e| e.to_string()),
|
||||
range: opts.byte_range_header(),
|
||||
version_id: opts.version_id.as_ref().map(|v| v.0.to_owned()),
|
||||
},
|
||||
cancel,
|
||||
)
|
||||
@@ -978,25 +845,94 @@ impl RemoteStorage for S3Bucket {
|
||||
let kind = RequestKind::TimeTravel;
|
||||
let permit = self.permit(kind, cancel).await?;
|
||||
|
||||
let timestamp = DateTime::from(timestamp);
|
||||
let done_if_after = DateTime::from(done_if_after);
|
||||
|
||||
tracing::trace!("Target time: {timestamp:?}, done_if_after {done_if_after:?}");
|
||||
|
||||
// Limit the number of versions deletions, mostly so that we don't
|
||||
// keep requesting forever if the list is too long, as we'd put the
|
||||
// list in RAM.
|
||||
// Building a list of 100k entries that reaches the limit roughly takes
|
||||
// 40 seconds, and roughly corresponds to tenants of 2 TiB physical size.
|
||||
const COMPLEXITY_LIMIT: Option<NonZeroU32> = NonZeroU32::new(100_000);
|
||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||
let prefix = prefix
|
||||
.map(|p| self.relative_path_to_s3_object(p))
|
||||
.or_else(|| self.prefix_in_bucket.clone());
|
||||
|
||||
let mode = ListingMode::NoDelimiter;
|
||||
let version_listing = self
|
||||
.list_versions_with_permit(&permit, prefix, mode, COMPLEXITY_LIMIT, cancel)
|
||||
let warn_threshold = 3;
|
||||
let max_retries = 10;
|
||||
let is_permanent = |e: &_| matches!(e, TimeTravelError::Cancelled);
|
||||
|
||||
let mut key_marker = None;
|
||||
let mut version_id_marker = None;
|
||||
let mut versions_and_deletes = Vec::new();
|
||||
|
||||
loop {
|
||||
let response = backoff::retry(
|
||||
|| async {
|
||||
let op = self
|
||||
.client
|
||||
.list_object_versions()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.set_prefix(prefix.clone())
|
||||
.set_key_marker(key_marker.clone())
|
||||
.set_version_id_marker(version_id_marker.clone())
|
||||
.send();
|
||||
|
||||
tokio::select! {
|
||||
res = op => res.map_err(|e| TimeTravelError::Other(e.into())),
|
||||
_ = cancel.cancelled() => Err(TimeTravelError::Cancelled),
|
||||
}
|
||||
},
|
||||
is_permanent,
|
||||
warn_threshold,
|
||||
max_retries,
|
||||
"listing object versions for time_travel_recover",
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
DownloadError::Other(e) => TimeTravelError::Other(e),
|
||||
DownloadError::Cancelled => TimeTravelError::Cancelled,
|
||||
other => TimeTravelError::Other(other.into()),
|
||||
})?;
|
||||
let versions_and_deletes = version_listing.versions;
|
||||
.ok_or_else(|| TimeTravelError::Cancelled)
|
||||
.and_then(|x| x)?;
|
||||
|
||||
tracing::trace!(
|
||||
" Got List response version_id_marker={:?}, key_marker={:?}",
|
||||
response.version_id_marker,
|
||||
response.key_marker
|
||||
);
|
||||
let versions = response
|
||||
.versions
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(VerOrDelete::from_version);
|
||||
let deletes = response
|
||||
.delete_markers
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(VerOrDelete::from_delete_marker);
|
||||
itertools::process_results(versions.chain(deletes), |n_vds| {
|
||||
versions_and_deletes.extend(n_vds)
|
||||
})
|
||||
.map_err(TimeTravelError::Other)?;
|
||||
fn none_if_empty(v: Option<String>) -> Option<String> {
|
||||
v.filter(|v| !v.is_empty())
|
||||
}
|
||||
version_id_marker = none_if_empty(response.next_version_id_marker);
|
||||
key_marker = none_if_empty(response.next_key_marker);
|
||||
if version_id_marker.is_none() {
|
||||
// The final response is not supposed to be truncated
|
||||
if response.is_truncated.unwrap_or_default() {
|
||||
return Err(TimeTravelError::Other(anyhow::anyhow!(
|
||||
"Received truncated ListObjectVersions response for prefix={prefix:?}"
|
||||
)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Limit the number of versions deletions, mostly so that we don't
|
||||
// keep requesting forever if the list is too long, as we'd put the
|
||||
// list in RAM.
|
||||
// Building a list of 100k entries that reaches the limit roughly takes
|
||||
// 40 seconds, and roughly corresponds to tenants of 2 TiB physical size.
|
||||
const COMPLEXITY_LIMIT: usize = 100_000;
|
||||
if versions_and_deletes.len() >= COMPLEXITY_LIMIT {
|
||||
return Err(TimeTravelError::TooManyVersions);
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Built list for time travel with {} versions and deletions",
|
||||
@@ -1012,26 +948,24 @@ impl RemoteStorage for S3Bucket {
|
||||
let mut vds_for_key = HashMap::<_, Vec<_>>::new();
|
||||
|
||||
for vd in &versions_and_deletes {
|
||||
let Version { key, .. } = &vd;
|
||||
let version_id = vd.version_id().map(|v| v.0.as_str());
|
||||
if version_id == Some("null") {
|
||||
let VerOrDelete {
|
||||
version_id, key, ..
|
||||
} = &vd;
|
||||
if version_id == "null" {
|
||||
return Err(TimeTravelError::Other(anyhow!(
|
||||
"Received ListVersions response for key={key} with version_id='null', \
|
||||
indicating either disabled versioning, or legacy objects with null version id values"
|
||||
)));
|
||||
}
|
||||
tracing::trace!("Parsing version key={key} kind={:?}", vd.kind);
|
||||
tracing::trace!(
|
||||
"Parsing version key={key} version_id={version_id} kind={:?}",
|
||||
vd.kind
|
||||
);
|
||||
|
||||
vds_for_key.entry(key).or_default().push(vd);
|
||||
}
|
||||
|
||||
let warn_threshold = 3;
|
||||
let max_retries = 10;
|
||||
let is_permanent = |e: &_| matches!(e, TimeTravelError::Cancelled);
|
||||
|
||||
for (key, versions) in vds_for_key {
|
||||
let last_vd = versions.last().unwrap();
|
||||
let key = self.relative_path_to_s3_object(key);
|
||||
if last_vd.last_modified > done_if_after {
|
||||
tracing::trace!("Key {key} has version later than done_if_after, skipping");
|
||||
continue;
|
||||
@@ -1056,11 +990,11 @@ impl RemoteStorage for S3Bucket {
|
||||
do_delete = true;
|
||||
} else {
|
||||
match &versions[version_to_restore_to - 1] {
|
||||
Version {
|
||||
kind: VersionKind::Version(version_id),
|
||||
VerOrDelete {
|
||||
kind: VerOrDeleteKind::Version,
|
||||
version_id,
|
||||
..
|
||||
} => {
|
||||
let version_id = &version_id.0;
|
||||
tracing::trace!("Copying old version {version_id} for {key}...");
|
||||
// Restore the state to the last version by copying
|
||||
let source_id =
|
||||
@@ -1072,7 +1006,7 @@ impl RemoteStorage for S3Bucket {
|
||||
.client
|
||||
.copy_object()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.key(&key)
|
||||
.key(key)
|
||||
.set_storage_class(self.upload_storage_class.clone())
|
||||
.copy_source(&source_id)
|
||||
.send();
|
||||
@@ -1093,8 +1027,8 @@ impl RemoteStorage for S3Bucket {
|
||||
.and_then(|x| x)?;
|
||||
tracing::info!(%version_id, %key, "Copied old version in S3");
|
||||
}
|
||||
Version {
|
||||
kind: VersionKind::DeletionMarker,
|
||||
VerOrDelete {
|
||||
kind: VerOrDeleteKind::DeleteMarker,
|
||||
..
|
||||
} => {
|
||||
do_delete = true;
|
||||
@@ -1102,7 +1036,7 @@ impl RemoteStorage for S3Bucket {
|
||||
}
|
||||
};
|
||||
if do_delete {
|
||||
if matches!(last_vd.kind, VersionKind::DeletionMarker) {
|
||||
if matches!(last_vd.kind, VerOrDeleteKind::DeleteMarker) {
|
||||
// Key has since been deleted (but there was some history), no need to do anything
|
||||
tracing::trace!("Key {key} already deleted, skipping.");
|
||||
} else {
|
||||
@@ -1130,6 +1064,62 @@ impl RemoteStorage for S3Bucket {
|
||||
}
|
||||
}
|
||||
|
||||
// Save RAM and only store the needed data instead of the entire ObjectVersion/DeleteMarkerEntry
|
||||
struct VerOrDelete {
|
||||
kind: VerOrDeleteKind,
|
||||
last_modified: DateTime,
|
||||
version_id: String,
|
||||
key: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum VerOrDeleteKind {
|
||||
Version,
|
||||
DeleteMarker,
|
||||
}
|
||||
|
||||
impl VerOrDelete {
|
||||
fn with_kind(
|
||||
kind: VerOrDeleteKind,
|
||||
last_modified: Option<DateTime>,
|
||||
version_id: Option<String>,
|
||||
key: Option<String>,
|
||||
) -> anyhow::Result<Self> {
|
||||
let lvk = (last_modified, version_id, key);
|
||||
let (Some(last_modified), Some(version_id), Some(key)) = lvk else {
|
||||
anyhow::bail!(
|
||||
"One (or more) of last_modified, key, and id is None. \
|
||||
Is versioning enabled in the bucket? last_modified={:?}, version_id={:?}, key={:?}",
|
||||
lvk.0,
|
||||
lvk.1,
|
||||
lvk.2,
|
||||
);
|
||||
};
|
||||
Ok(Self {
|
||||
kind,
|
||||
last_modified,
|
||||
version_id,
|
||||
key,
|
||||
})
|
||||
}
|
||||
fn from_version(v: ObjectVersion) -> anyhow::Result<Self> {
|
||||
Self::with_kind(
|
||||
VerOrDeleteKind::Version,
|
||||
v.last_modified,
|
||||
v.version_id,
|
||||
v.key,
|
||||
)
|
||||
}
|
||||
fn from_delete_marker(v: DeleteMarkerEntry) -> anyhow::Result<Self> {
|
||||
Self::with_kind(
|
||||
VerOrDeleteKind::DeleteMarker,
|
||||
v.last_modified,
|
||||
v.version_id,
|
||||
v.key,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
@@ -139,20 +139,6 @@ impl RemoteStorage for UnreliableWrapper {
|
||||
self.inner.list(prefix, mode, max_keys, cancel).await
|
||||
}
|
||||
|
||||
async fn list_versions(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<crate::VersionListing, DownloadError> {
|
||||
self.attempt(RemoteOp::ListPrefixes(prefix.cloned()))
|
||||
.map_err(DownloadError::Other)?;
|
||||
self.inner
|
||||
.list_versions(prefix, mode, max_keys, cancel)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn head_object(
|
||||
&self,
|
||||
key: &RemotePath,
|
||||
|
||||
@@ -29,7 +29,6 @@ futures = { workspace = true }
|
||||
jsonwebtoken.workspace = true
|
||||
nix = { workspace = true, features = ["ioctl"] }
|
||||
once_cell.workspace = true
|
||||
pem.workspace = true
|
||||
pin-project-lite.workspace = true
|
||||
regex.workspace = true
|
||||
serde.workspace = true
|
||||
|
||||
@@ -11,8 +11,7 @@ use camino::Utf8Path;
|
||||
use jsonwebtoken::{
|
||||
Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation, decode, encode,
|
||||
};
|
||||
use pem::Pem;
|
||||
use serde::{Deserialize, Serialize, de::DeserializeOwned};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::id::TenantId;
|
||||
|
||||
@@ -74,10 +73,7 @@ impl SwappableJwtAuth {
|
||||
pub fn swap(&self, jwt_auth: JwtAuth) {
|
||||
self.0.swap(Arc::new(jwt_auth));
|
||||
}
|
||||
pub fn decode<D: DeserializeOwned>(
|
||||
&self,
|
||||
token: &str,
|
||||
) -> std::result::Result<TokenData<D>, AuthError> {
|
||||
pub fn decode(&self, token: &str) -> std::result::Result<TokenData<Claims>, AuthError> {
|
||||
self.0.load().decode(token)
|
||||
}
|
||||
}
|
||||
@@ -152,10 +148,7 @@ impl JwtAuth {
|
||||
/// The function tries the stored decoding keys in succession,
|
||||
/// and returns the first yielding a successful result.
|
||||
/// If there is no working decoding key, it returns the last error.
|
||||
pub fn decode<D: DeserializeOwned>(
|
||||
&self,
|
||||
token: &str,
|
||||
) -> std::result::Result<TokenData<D>, AuthError> {
|
||||
pub fn decode(&self, token: &str) -> std::result::Result<TokenData<Claims>, AuthError> {
|
||||
let mut res = None;
|
||||
for decoding_key in &self.decoding_keys {
|
||||
res = Some(decode(token, decoding_key, &self.validation));
|
||||
@@ -180,8 +173,8 @@ impl std::fmt::Debug for JwtAuth {
|
||||
}
|
||||
|
||||
// this function is used only for testing purposes in CLI e g generate tokens during init
|
||||
pub fn encode_from_key_file<S: Serialize>(claims: &S, pem: &Pem) -> Result<String> {
|
||||
let key = EncodingKey::from_ed_der(pem.contents());
|
||||
pub fn encode_from_key_file<S: Serialize>(claims: &S, key_data: &[u8]) -> Result<String> {
|
||||
let key = EncodingKey::from_ed_pem(key_data)?;
|
||||
Ok(encode(&Header::new(STORAGE_TOKEN_ALGORITHM), claims, &key)?)
|
||||
}
|
||||
|
||||
@@ -195,13 +188,13 @@ mod tests {
|
||||
//
|
||||
// openssl genpkey -algorithm ed25519 -out ed25519-priv.pem
|
||||
// openssl pkey -in ed25519-priv.pem -pubout -out ed25519-pub.pem
|
||||
const TEST_PUB_KEY_ED25519: &str = r#"
|
||||
const TEST_PUB_KEY_ED25519: &[u8] = br#"
|
||||
-----BEGIN PUBLIC KEY-----
|
||||
MCowBQYDK2VwAyEARYwaNBayR+eGI0iXB4s3QxE3Nl2g1iWbr6KtLWeVD/w=
|
||||
-----END PUBLIC KEY-----
|
||||
"#;
|
||||
|
||||
const TEST_PRIV_KEY_ED25519: &str = r#"
|
||||
const TEST_PRIV_KEY_ED25519: &[u8] = br#"
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
-----END PRIVATE KEY-----
|
||||
@@ -229,9 +222,9 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
|
||||
// Check it can be validated with the public key
|
||||
let auth = JwtAuth::new(vec![
|
||||
DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519.as_bytes()).unwrap(),
|
||||
DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap(),
|
||||
]);
|
||||
let claims_from_token: Claims = auth.decode(encoded_eddsa).unwrap().claims;
|
||||
let claims_from_token = auth.decode(encoded_eddsa).unwrap().claims;
|
||||
assert_eq!(claims_from_token, expected_claims);
|
||||
}
|
||||
|
||||
@@ -242,14 +235,13 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
scope: Scope::Tenant,
|
||||
};
|
||||
|
||||
let pem = pem::parse(TEST_PRIV_KEY_ED25519).unwrap();
|
||||
let encoded = encode_from_key_file(&claims, &pem).unwrap();
|
||||
let encoded = encode_from_key_file(&claims, TEST_PRIV_KEY_ED25519).unwrap();
|
||||
|
||||
// decode it back
|
||||
let auth = JwtAuth::new(vec![
|
||||
DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519.as_bytes()).unwrap(),
|
||||
DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap(),
|
||||
]);
|
||||
let decoded: TokenData<Claims> = auth.decode(&encoded).unwrap();
|
||||
let decoded = auth.decode(&encoded).unwrap();
|
||||
|
||||
assert_eq!(decoded.claims, claims);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "endpoint_storage"
|
||||
name = "object_storage"
|
||||
version = "0.0.1"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
@@ -2,7 +2,7 @@ use anyhow::anyhow;
|
||||
use axum::body::{Body, Bytes};
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use axum::{Router, http::StatusCode};
|
||||
use endpoint_storage::{PrefixS3Path, S3Path, Storage, bad_request, internal_error, not_found, ok};
|
||||
use object_storage::{PrefixS3Path, S3Path, Storage, bad_request, internal_error, not_found, ok};
|
||||
use remote_storage::TimeoutOrCancel;
|
||||
use remote_storage::{DownloadError, DownloadOpts, GenericRemoteStorage, RemotePath};
|
||||
use std::{sync::Arc, time::SystemTime, time::UNIX_EPOCH};
|
||||
@@ -46,12 +46,12 @@ async fn metrics() -> Result {
|
||||
|
||||
async fn get(S3Path { path }: S3Path, state: State) -> Result {
|
||||
info!(%path, "downloading");
|
||||
let download_err = |err| {
|
||||
if let DownloadError::NotFound = err {
|
||||
info!(%path, %err, "downloading"); // 404 is not an issue of _this_ service
|
||||
let download_err = |e| {
|
||||
if let DownloadError::NotFound = e {
|
||||
info!(%path, %e, "downloading"); // 404 is not an issue of _this_ service
|
||||
return not_found(&path);
|
||||
}
|
||||
internal_error(err, &path, "downloading")
|
||||
internal_error(e, &path, "downloading")
|
||||
};
|
||||
let cancel = state.cancel.clone();
|
||||
let opts = &DownloadOpts::default();
|
||||
@@ -249,7 +249,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let proxy = Storage {
|
||||
auth: endpoint_storage::JwtAuth::new(TEST_PUB_KEY_ED25519).unwrap(),
|
||||
auth: object_storage::JwtAuth::new(TEST_PUB_KEY_ED25519).unwrap(),
|
||||
storage,
|
||||
cancel: cancel.clone(),
|
||||
max_upload_file_limit: usize::MAX,
|
||||
@@ -343,14 +343,14 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
TimelineId::from_array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 7]);
|
||||
const ENDPOINT_ID: &str = "ep-winter-frost-a662z3vg";
|
||||
fn token() -> String {
|
||||
let claims = endpoint_storage::Claims {
|
||||
let claims = object_storage::Claims {
|
||||
tenant_id: TENANT_ID,
|
||||
timeline_id: TIMELINE_ID,
|
||||
endpoint_id: ENDPOINT_ID.into(),
|
||||
exp: u64::MAX,
|
||||
};
|
||||
let key = jsonwebtoken::EncodingKey::from_ed_pem(TEST_PRIV_KEY_ED25519).unwrap();
|
||||
let header = jsonwebtoken::Header::new(endpoint_storage::VALIDATION_ALGO);
|
||||
let header = jsonwebtoken::Header::new(object_storage::VALIDATION_ALGO);
|
||||
jsonwebtoken::encode(&header, &claims, &key).unwrap()
|
||||
}
|
||||
|
||||
@@ -364,10 +364,7 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
vec![TIMELINE_ID.to_string(), TimelineId::generate().to_string()],
|
||||
vec![ENDPOINT_ID, "ep-ololo"]
|
||||
)
|
||||
// first one is fully valid path, second path is valid for GET as
|
||||
// read paths may have different endpoint if tenant and timeline matches
|
||||
// (needed for prewarming RO->RW replica)
|
||||
.skip(2);
|
||||
.skip(1);
|
||||
|
||||
for ((uri, method), (tenant, timeline, endpoint)) in iproduct!(routes(), args) {
|
||||
info!(%uri, %method, %tenant, %timeline, %endpoint);
|
||||
@@ -478,16 +475,6 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
requests_chain(chain.into_iter(), |_| token()).await;
|
||||
}
|
||||
|
||||
#[testlog(tokio::test)]
|
||||
async fn read_other_endpoint_data() {
|
||||
let uri = format!("/{TENANT_ID}/{TIMELINE_ID}/other_endpoint/key");
|
||||
let chain = vec![
|
||||
(uri.clone(), "GET", "", StatusCode::NOT_FOUND, false),
|
||||
(uri.clone(), "PUT", "", StatusCode::UNAUTHORIZED, false),
|
||||
];
|
||||
requests_chain(chain.into_iter(), |_| token()).await;
|
||||
}
|
||||
|
||||
fn delete_prefix_token(uri: &str) -> String {
|
||||
use serde::Serialize;
|
||||
let parts = uri.split("/").collect::<Vec<&str>>();
|
||||
@@ -495,7 +482,7 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
struct PrefixClaims {
|
||||
tenant_id: TenantId,
|
||||
timeline_id: Option<TimelineId>,
|
||||
endpoint_id: Option<endpoint_storage::EndpointId>,
|
||||
endpoint_id: Option<object_storage::EndpointId>,
|
||||
exp: u64,
|
||||
}
|
||||
let claims = PrefixClaims {
|
||||
@@ -505,7 +492,7 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
exp: u64::MAX,
|
||||
};
|
||||
let key = jsonwebtoken::EncodingKey::from_ed_pem(TEST_PRIV_KEY_ED25519).unwrap();
|
||||
let header = jsonwebtoken::Header::new(endpoint_storage::VALIDATION_ALGO);
|
||||
let header = jsonwebtoken::Header::new(object_storage::VALIDATION_ALGO);
|
||||
jsonwebtoken::encode(&header, &claims, &key).unwrap()
|
||||
}
|
||||
|
||||
@@ -169,19 +169,10 @@ impl FromRequestParts<Arc<Storage>> for S3Path {
|
||||
.auth
|
||||
.decode(bearer.token())
|
||||
.map_err(|e| bad_request(e, "decoding token"))?;
|
||||
|
||||
// Read paths may have different endpoint ids. For readonly -> readwrite replica
|
||||
// prewarming, endpoint must read other endpoint's data.
|
||||
let endpoint_id = if parts.method == axum::http::Method::GET {
|
||||
claims.endpoint_id.clone()
|
||||
} else {
|
||||
path.endpoint_id.clone()
|
||||
};
|
||||
|
||||
let route = Claims {
|
||||
tenant_id: path.tenant_id,
|
||||
timeline_id: path.timeline_id,
|
||||
endpoint_id,
|
||||
endpoint_id: path.endpoint_id.clone(),
|
||||
exp: claims.exp,
|
||||
};
|
||||
if route != claims {
|
||||
@@ -1,4 +1,4 @@
|
||||
//! `endpoint_storage` is a service which provides API for uploading and downloading
|
||||
//! `object_storage` is a service which provides API for uploading and downloading
|
||||
//! files. It is used by compute and control plane for accessing LFC prewarm data.
|
||||
//! This service is deployed either as a separate component or as part of compute image
|
||||
//! for large computes.
|
||||
@@ -33,7 +33,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let config: String = std::env::args().skip(1).take(1).collect();
|
||||
if config.is_empty() {
|
||||
anyhow::bail!("Usage: endpoint_storage config.json")
|
||||
anyhow::bail!("Usage: object_storage config.json")
|
||||
}
|
||||
info!("Reading config from {config}");
|
||||
let config = std::fs::read_to_string(config.clone())?;
|
||||
@@ -41,7 +41,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
info!("Reading pemfile from {}", config.pemfile.clone());
|
||||
let pemfile = std::fs::read(config.pemfile.clone())?;
|
||||
info!("Loading public key from {}", config.pemfile.clone());
|
||||
let auth = endpoint_storage::JwtAuth::new(&pemfile)?;
|
||||
let auth = object_storage::JwtAuth::new(&pemfile)?;
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(config.listen).await.unwrap();
|
||||
info!("listening on {}", listener.local_addr().unwrap());
|
||||
@@ -50,7 +50,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
let cancel = tokio_util::sync::CancellationToken::new();
|
||||
app::check_storage_permissions(&storage, cancel.clone()).await?;
|
||||
|
||||
let proxy = std::sync::Arc::new(endpoint_storage::Storage {
|
||||
let proxy = std::sync::Arc::new(object_storage::Storage {
|
||||
auth,
|
||||
storage,
|
||||
cancel: cancel.clone(),
|
||||
@@ -35,7 +35,6 @@ humantime.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
hyper0.workspace = true
|
||||
itertools.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
md5.workspace = true
|
||||
nix.workspace = true
|
||||
# hack to get the number of worker threads tokio uses
|
||||
@@ -78,7 +77,6 @@ metrics.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
|
||||
pageserver_compaction.workspace = true
|
||||
pem.workspace = true
|
||||
postgres_connection.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
pq_proto.workspace = true
|
||||
@@ -106,7 +104,6 @@ hex-literal.workspace = true
|
||||
tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time", "test-util"] }
|
||||
indoc.workspace = true
|
||||
uuid.workspace = true
|
||||
rstest.workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "bench_layer_map"
|
||||
|
||||
@@ -11,7 +11,6 @@ use pageserver::task_mgr::TaskKind;
|
||||
use pageserver::tenant::storage_layer::InMemoryLayer;
|
||||
use pageserver::{page_cache, virtual_file};
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::models::virtual_file::IoMode;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::value::Value;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -29,7 +28,6 @@ fn murmurhash32(mut h: u32) -> u32 {
|
||||
h
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Copy, Debug)]
|
||||
enum KeyLayout {
|
||||
/// Sequential unique keys
|
||||
Sequential,
|
||||
@@ -39,7 +37,6 @@ enum KeyLayout {
|
||||
RandomReuse(u32),
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Copy, Debug)]
|
||||
enum WriteDelta {
|
||||
Yes,
|
||||
No,
|
||||
@@ -141,15 +138,12 @@ async fn ingest(
|
||||
/// Wrapper to instantiate a tokio runtime
|
||||
fn ingest_main(
|
||||
conf: &'static PageServerConf,
|
||||
io_mode: IoMode,
|
||||
put_size: usize,
|
||||
put_count: usize,
|
||||
key_layout: KeyLayout,
|
||||
write_delta: WriteDelta,
|
||||
) {
|
||||
pageserver::virtual_file::set_io_mode(io_mode);
|
||||
|
||||
let runtime = tokio::runtime::Builder::new_multi_thread()
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap();
|
||||
@@ -180,245 +174,93 @@ fn criterion_benchmark(c: &mut Criterion) {
|
||||
virtual_file::init(
|
||||
16384,
|
||||
virtual_file::io_engine_for_bench(),
|
||||
// immaterial, each `ingest_main` invocation below overrides this
|
||||
conf.virtual_file_io_mode,
|
||||
// without actually doing syncs, buffered writes have an unfair advantage over direct IO writes
|
||||
virtual_file::SyncMode::Sync,
|
||||
);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
struct ExplodedParameters {
|
||||
io_mode: IoMode,
|
||||
volume_mib: usize,
|
||||
key_size: usize,
|
||||
key_layout: KeyLayout,
|
||||
write_delta: WriteDelta,
|
||||
}
|
||||
#[derive(Clone)]
|
||||
struct HandPickedParameters {
|
||||
volume_mib: usize,
|
||||
key_size: usize,
|
||||
key_layout: KeyLayout,
|
||||
write_delta: WriteDelta,
|
||||
}
|
||||
let expect = vec![
|
||||
// Small values (100b) tests
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 100,
|
||||
key_layout: KeyLayout::Sequential,
|
||||
write_delta: WriteDelta::Yes,
|
||||
},
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 100,
|
||||
key_layout: KeyLayout::Random,
|
||||
write_delta: WriteDelta::Yes,
|
||||
},
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 100,
|
||||
key_layout: KeyLayout::RandomReuse(0x3ff),
|
||||
write_delta: WriteDelta::Yes,
|
||||
},
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 100,
|
||||
key_layout: KeyLayout::Sequential,
|
||||
write_delta: WriteDelta::No,
|
||||
},
|
||||
// Large values (8k) tests
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 8192,
|
||||
key_layout: KeyLayout::Sequential,
|
||||
write_delta: WriteDelta::Yes,
|
||||
},
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 8192,
|
||||
key_layout: KeyLayout::Sequential,
|
||||
write_delta: WriteDelta::No,
|
||||
},
|
||||
];
|
||||
let exploded_parameters = {
|
||||
let mut out = Vec::new();
|
||||
for io_mode in [
|
||||
IoMode::Buffered,
|
||||
#[cfg(target_os = "linux")]
|
||||
IoMode::Direct,
|
||||
#[cfg(target_os = "linux")]
|
||||
IoMode::DirectRw,
|
||||
] {
|
||||
for param in expect.clone() {
|
||||
let HandPickedParameters {
|
||||
volume_mib,
|
||||
key_size,
|
||||
key_layout,
|
||||
write_delta,
|
||||
} = param;
|
||||
out.push(ExplodedParameters {
|
||||
io_mode,
|
||||
volume_mib,
|
||||
key_size,
|
||||
key_layout,
|
||||
write_delta,
|
||||
});
|
||||
}
|
||||
}
|
||||
out
|
||||
};
|
||||
impl ExplodedParameters {
|
||||
fn benchmark_id(&self) -> String {
|
||||
let ExplodedParameters {
|
||||
io_mode,
|
||||
volume_mib,
|
||||
key_size,
|
||||
key_layout,
|
||||
write_delta,
|
||||
} = self;
|
||||
format!(
|
||||
"io_mode={io_mode:?} volume_mib={volume_mib:?} key_size_bytes={key_size:?} key_layout={key_layout:?} write_delta={write_delta:?}"
|
||||
)
|
||||
}
|
||||
}
|
||||
let mut group = c.benchmark_group("ingest");
|
||||
for params in exploded_parameters {
|
||||
let id = params.benchmark_id();
|
||||
let ExplodedParameters {
|
||||
io_mode,
|
||||
volume_mib,
|
||||
key_size,
|
||||
key_layout,
|
||||
write_delta,
|
||||
} = params;
|
||||
let put_count = volume_mib * 1024 * 1024 / key_size;
|
||||
group.throughput(criterion::Throughput::Bytes((key_size * put_count) as u64));
|
||||
{
|
||||
let mut group = c.benchmark_group("ingest-small-values");
|
||||
let put_size = 100usize;
|
||||
let put_count = 128 * 1024 * 1024 / put_size;
|
||||
group.throughput(criterion::Throughput::Bytes((put_size * put_count) as u64));
|
||||
group.sample_size(10);
|
||||
group.bench_function(id, |b| {
|
||||
b.iter(|| ingest_main(conf, io_mode, key_size, put_count, key_layout, write_delta))
|
||||
group.bench_function("ingest 128MB/100b seq", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::Sequential,
|
||||
WriteDelta::Yes,
|
||||
)
|
||||
})
|
||||
});
|
||||
group.bench_function("ingest 128MB/100b rand", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::Random,
|
||||
WriteDelta::Yes,
|
||||
)
|
||||
})
|
||||
});
|
||||
group.bench_function("ingest 128MB/100b rand-1024keys", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::RandomReuse(0x3ff),
|
||||
WriteDelta::Yes,
|
||||
)
|
||||
})
|
||||
});
|
||||
group.bench_function("ingest 128MB/100b seq, no delta", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::Sequential,
|
||||
WriteDelta::No,
|
||||
)
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
let mut group = c.benchmark_group("ingest-big-values");
|
||||
let put_size = 8192usize;
|
||||
let put_count = 128 * 1024 * 1024 / put_size;
|
||||
group.throughput(criterion::Throughput::Bytes((put_size * put_count) as u64));
|
||||
group.sample_size(10);
|
||||
group.bench_function("ingest 128MB/8k seq", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::Sequential,
|
||||
WriteDelta::Yes,
|
||||
)
|
||||
})
|
||||
});
|
||||
group.bench_function("ingest 128MB/8k seq, no delta", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::Sequential,
|
||||
WriteDelta::No,
|
||||
)
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(benches, criterion_benchmark);
|
||||
criterion_main!(benches);
|
||||
|
||||
/*
|
||||
cargo bench --bench bench_ingest
|
||||
|
||||
im4gn.2xlarge:
|
||||
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [1.2901 s 1.2943 s 1.2991 s]
|
||||
thrpt: [98.533 MiB/s 98.892 MiB/s 99.220 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [2.1387 s 2.1623 s 2.1845 s]
|
||||
thrpt: [58.595 MiB/s 59.197 MiB/s 59.851 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
|
||||
time: [1.2036 s 1.2074 s 1.2122 s]
|
||||
thrpt: [105.60 MiB/s 106.01 MiB/s 106.35 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [520.55 ms 521.46 ms 522.57 ms]
|
||||
thrpt: [244.94 MiB/s 245.47 MiB/s 245.89 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [440.33 ms 442.24 ms 444.10 ms]
|
||||
thrpt: [288.22 MiB/s 289.43 MiB/s 290.69 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [168.78 ms 169.42 ms 170.18 ms]
|
||||
thrpt: [752.16 MiB/s 755.52 MiB/s 758.40 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [1.2978 s 1.3094 s 1.3227 s]
|
||||
thrpt: [96.775 MiB/s 97.758 MiB/s 98.632 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [2.1976 s 2.2067 s 2.2154 s]
|
||||
thrpt: [57.777 MiB/s 58.006 MiB/s 58.245 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Yes
|
||||
time: [1.2103 s 1.2160 s 1.2233 s]
|
||||
thrpt: [104.64 MiB/s 105.26 MiB/s 105.76 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [525.05 ms 526.37 ms 527.79 ms]
|
||||
thrpt: [242.52 MiB/s 243.17 MiB/s 243.79 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [443.06 ms 444.88 ms 447.15 ms]
|
||||
thrpt: [286.26 MiB/s 287.72 MiB/s 288.90 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [169.40 ms 169.80 ms 170.17 ms]
|
||||
thrpt: [752.21 MiB/s 753.81 MiB/s 755.60 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [1.2844 s 1.2915 s 1.2990 s]
|
||||
thrpt: [98.536 MiB/s 99.112 MiB/s 99.657 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [2.1431 s 2.1663 s 2.1900 s]
|
||||
thrpt: [58.446 MiB/s 59.087 MiB/s 59.726 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
|
||||
time: [1.1906 s 1.1926 s 1.1947 s]
|
||||
thrpt: [107.14 MiB/s 107.33 MiB/s 107.51 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [516.86 ms 518.25 ms 519.47 ms]
|
||||
thrpt: [246.40 MiB/s 246.98 MiB/s 247.65 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [536.50 ms 536.53 ms 536.60 ms]
|
||||
thrpt: [238.54 MiB/s 238.57 MiB/s 238.59 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [267.77 ms 267.90 ms 268.04 ms]
|
||||
thrpt: [477.53 MiB/s 477.79 MiB/s 478.02 MiB/s]
|
||||
|
||||
Hetzner AX102:
|
||||
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [836.58 ms 861.93 ms 886.57 ms]
|
||||
thrpt: [144.38 MiB/s 148.50 MiB/s 153.00 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [1.2782 s 1.3191 s 1.3665 s]
|
||||
thrpt: [93.668 MiB/s 97.037 MiB/s 100.14 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
|
||||
time: [791.27 ms 807.08 ms 822.95 ms]
|
||||
thrpt: [155.54 MiB/s 158.60 MiB/s 161.77 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [310.78 ms 314.66 ms 318.47 ms]
|
||||
thrpt: [401.92 MiB/s 406.79 MiB/s 411.87 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [377.11 ms 387.77 ms 399.21 ms]
|
||||
thrpt: [320.63 MiB/s 330.10 MiB/s 339.42 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [128.37 ms 132.96 ms 138.55 ms]
|
||||
thrpt: [923.83 MiB/s 962.69 MiB/s 997.11 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [900.38 ms 914.88 ms 928.86 ms]
|
||||
thrpt: [137.80 MiB/s 139.91 MiB/s 142.16 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [1.2538 s 1.2936 s 1.3313 s]
|
||||
thrpt: [96.149 MiB/s 98.946 MiB/s 102.09 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Yes
|
||||
time: [787.17 ms 803.89 ms 820.63 ms]
|
||||
thrpt: [155.98 MiB/s 159.23 MiB/s 162.61 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [318.78 ms 321.89 ms 324.74 ms]
|
||||
thrpt: [394.16 MiB/s 397.65 MiB/s 401.53 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [374.01 ms 383.45 ms 393.20 ms]
|
||||
thrpt: [325.53 MiB/s 333.81 MiB/s 342.24 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [137.98 ms 141.31 ms 143.57 ms]
|
||||
thrpt: [891.58 MiB/s 905.79 MiB/s 927.66 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [613.69 ms 622.48 ms 630.97 ms]
|
||||
thrpt: [202.86 MiB/s 205.63 MiB/s 208.57 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [1.0299 s 1.0766 s 1.1273 s]
|
||||
thrpt: [113.55 MiB/s 118.90 MiB/s 124.29 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
|
||||
time: [637.80 ms 647.78 ms 658.01 ms]
|
||||
thrpt: [194.53 MiB/s 197.60 MiB/s 200.69 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [266.09 ms 267.20 ms 268.31 ms]
|
||||
thrpt: [477.06 MiB/s 479.04 MiB/s 481.04 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [269.34 ms 273.27 ms 277.69 ms]
|
||||
thrpt: [460.95 MiB/s 468.40 MiB/s 475.24 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [123.18 ms 124.24 ms 125.15 ms]
|
||||
thrpt: [1022.8 MiB/s 1.0061 GiB/s 1.0148 GiB/s]
|
||||
*/
|
||||
|
||||
@@ -419,23 +419,6 @@ impl Client {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn timeline_detail(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<TimelineInfo> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}",
|
||||
self.mgmt_api_endpoint
|
||||
);
|
||||
|
||||
self.request(Method::GET, &uri, ())
|
||||
.await?
|
||||
.json()
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn timeline_archival_config(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
|
||||
@@ -68,13 +68,6 @@ pub(crate) struct Args {
|
||||
targets: Option<Vec<TenantTimelineId>>,
|
||||
}
|
||||
|
||||
/// State shared by all clients
|
||||
#[derive(Debug)]
|
||||
struct SharedState {
|
||||
start_work_barrier: tokio::sync::Barrier,
|
||||
live_stats: LiveStats,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct LiveStats {
|
||||
completed_requests: AtomicU64,
|
||||
@@ -247,26 +240,24 @@ async fn main_impl(
|
||||
all_ranges
|
||||
};
|
||||
|
||||
let live_stats = Arc::new(LiveStats::default());
|
||||
|
||||
let num_live_stats_dump = 1;
|
||||
let num_work_sender_tasks = args.num_clients.get() * timelines.len();
|
||||
let num_main_impl = 1;
|
||||
|
||||
let shared_state = Arc::new(SharedState {
|
||||
start_work_barrier: tokio::sync::Barrier::new(
|
||||
num_live_stats_dump + num_work_sender_tasks + num_main_impl,
|
||||
),
|
||||
live_stats: LiveStats::default(),
|
||||
});
|
||||
let cancel = CancellationToken::new();
|
||||
let start_work_barrier = Arc::new(tokio::sync::Barrier::new(
|
||||
num_live_stats_dump + num_work_sender_tasks + num_main_impl,
|
||||
));
|
||||
|
||||
let ss = shared_state.clone();
|
||||
tokio::spawn({
|
||||
let stats = Arc::clone(&live_stats);
|
||||
let start_work_barrier = Arc::clone(&start_work_barrier);
|
||||
async move {
|
||||
ss.start_work_barrier.wait().await;
|
||||
start_work_barrier.wait().await;
|
||||
loop {
|
||||
let start = std::time::Instant::now();
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
let stats = &ss.live_stats;
|
||||
let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
|
||||
let missed = stats.missed.swap(0, Ordering::Relaxed);
|
||||
let elapsed = start.elapsed();
|
||||
@@ -279,12 +270,14 @@ async fn main_impl(
|
||||
}
|
||||
});
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let rps_period = args
|
||||
.per_client_rate
|
||||
.map(|rps_limit| Duration::from_secs_f64(1.0 / (rps_limit as f64)));
|
||||
let make_worker: &dyn Fn(WorkerId) -> Pin<Box<dyn Send + Future<Output = ()>>> = &|worker_id| {
|
||||
let ss = shared_state.clone();
|
||||
let cancel = cancel.clone();
|
||||
let live_stats = live_stats.clone();
|
||||
let start_work_barrier = start_work_barrier.clone();
|
||||
let ranges: Vec<KeyRange> = all_ranges
|
||||
.iter()
|
||||
.filter(|r| r.timeline == worker_id.timeline)
|
||||
@@ -294,8 +287,85 @@ async fn main_impl(
|
||||
rand::distributions::weighted::WeightedIndex::new(ranges.iter().map(|v| v.len()))
|
||||
.unwrap();
|
||||
|
||||
let cancel = cancel.clone();
|
||||
Box::pin(async move {
|
||||
client_libpq(args, worker_id, ss, cancel, rps_period, ranges, weights).await
|
||||
let client =
|
||||
pageserver_client::page_service::Client::new(args.page_service_connstring.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
let mut client = client
|
||||
.pagestream(worker_id.timeline.tenant_id, worker_id.timeline.timeline_id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
start_work_barrier.wait().await;
|
||||
let client_start = Instant::now();
|
||||
let mut ticks_processed = 0;
|
||||
let mut inflight = VecDeque::new();
|
||||
while !cancel.is_cancelled() {
|
||||
// Detect if a request took longer than the RPS rate
|
||||
if let Some(period) = &rps_period {
|
||||
let periods_passed_until_now =
|
||||
usize::try_from(client_start.elapsed().as_micros() / period.as_micros())
|
||||
.unwrap();
|
||||
|
||||
if periods_passed_until_now > ticks_processed {
|
||||
live_stats.missed((periods_passed_until_now - ticks_processed) as u64);
|
||||
}
|
||||
ticks_processed = periods_passed_until_now;
|
||||
}
|
||||
|
||||
while inflight.len() < args.queue_depth.get() {
|
||||
let start = Instant::now();
|
||||
let req = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = &ranges[weights.sample(&mut rng)];
|
||||
let key: i128 = rng.gen_range(r.start..r.end);
|
||||
let key = Key::from_i128(key);
|
||||
assert!(key.is_rel_block_key());
|
||||
let (rel_tag, block_no) = key
|
||||
.to_rel_block()
|
||||
.expect("we filter non-rel-block keys out above");
|
||||
PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: if rng.gen_bool(args.req_latest_probability) {
|
||||
Lsn::MAX
|
||||
} else {
|
||||
r.timeline_lsn
|
||||
},
|
||||
not_modified_since: r.timeline_lsn,
|
||||
},
|
||||
rel: rel_tag,
|
||||
blkno: block_no,
|
||||
}
|
||||
};
|
||||
client.getpage_send(req).await.unwrap();
|
||||
inflight.push_back(start);
|
||||
}
|
||||
|
||||
let start = inflight.pop_front().unwrap();
|
||||
client.getpage_recv().await.unwrap();
|
||||
let end = Instant::now();
|
||||
live_stats.request_done();
|
||||
ticks_processed += 1;
|
||||
STATS.with(|stats| {
|
||||
stats
|
||||
.borrow()
|
||||
.lock()
|
||||
.unwrap()
|
||||
.observe(end.duration_since(start))
|
||||
.unwrap();
|
||||
});
|
||||
|
||||
if let Some(period) = &rps_period {
|
||||
let next_at = client_start
|
||||
+ Duration::from_micros(
|
||||
(ticks_processed) as u64 * u64::try_from(period.as_micros()).unwrap(),
|
||||
);
|
||||
tokio::time::sleep_until(next_at.into()).await;
|
||||
}
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
@@ -317,7 +387,7 @@ async fn main_impl(
|
||||
};
|
||||
|
||||
info!("waiting for everything to become ready");
|
||||
shared_state.start_work_barrier.wait().await;
|
||||
start_work_barrier.wait().await;
|
||||
info!("work started");
|
||||
if let Some(runtime) = args.runtime {
|
||||
tokio::time::sleep(runtime.into()).await;
|
||||
@@ -346,91 +416,3 @@ async fn main_impl(
|
||||
|
||||
anyhow::Ok(())
|
||||
}
|
||||
|
||||
async fn client_libpq(
|
||||
args: &Args,
|
||||
worker_id: WorkerId,
|
||||
shared_state: Arc<SharedState>,
|
||||
cancel: CancellationToken,
|
||||
rps_period: Option<Duration>,
|
||||
ranges: Vec<KeyRange>,
|
||||
weights: rand::distributions::weighted::WeightedIndex<i128>,
|
||||
) {
|
||||
let client = pageserver_client::page_service::Client::new(args.page_service_connstring.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
let mut client = client
|
||||
.pagestream(worker_id.timeline.tenant_id, worker_id.timeline.timeline_id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
shared_state.start_work_barrier.wait().await;
|
||||
let client_start = Instant::now();
|
||||
let mut ticks_processed = 0;
|
||||
let mut inflight = VecDeque::new();
|
||||
while !cancel.is_cancelled() {
|
||||
// Detect if a request took longer than the RPS rate
|
||||
if let Some(period) = &rps_period {
|
||||
let periods_passed_until_now =
|
||||
usize::try_from(client_start.elapsed().as_micros() / period.as_micros()).unwrap();
|
||||
|
||||
if periods_passed_until_now > ticks_processed {
|
||||
shared_state
|
||||
.live_stats
|
||||
.missed((periods_passed_until_now - ticks_processed) as u64);
|
||||
}
|
||||
ticks_processed = periods_passed_until_now;
|
||||
}
|
||||
|
||||
while inflight.len() < args.queue_depth.get() {
|
||||
let start = Instant::now();
|
||||
let req = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = &ranges[weights.sample(&mut rng)];
|
||||
let key: i128 = rng.gen_range(r.start..r.end);
|
||||
let key = Key::from_i128(key);
|
||||
assert!(key.is_rel_block_key());
|
||||
let (rel_tag, block_no) = key
|
||||
.to_rel_block()
|
||||
.expect("we filter non-rel-block keys out above");
|
||||
PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: if rng.gen_bool(args.req_latest_probability) {
|
||||
Lsn::MAX
|
||||
} else {
|
||||
r.timeline_lsn
|
||||
},
|
||||
not_modified_since: r.timeline_lsn,
|
||||
},
|
||||
rel: rel_tag,
|
||||
blkno: block_no,
|
||||
}
|
||||
};
|
||||
client.getpage_send(req).await.unwrap();
|
||||
inflight.push_back(start);
|
||||
}
|
||||
|
||||
let start = inflight.pop_front().unwrap();
|
||||
client.getpage_recv().await.unwrap();
|
||||
let end = Instant::now();
|
||||
shared_state.live_stats.request_done();
|
||||
ticks_processed += 1;
|
||||
STATS.with(|stats| {
|
||||
stats
|
||||
.borrow()
|
||||
.lock()
|
||||
.unwrap()
|
||||
.observe(end.duration_since(start))
|
||||
.unwrap();
|
||||
});
|
||||
|
||||
if let Some(period) = &rps_period {
|
||||
let next_at = client_start
|
||||
+ Duration::from_micros(
|
||||
(ticks_processed) as u64 * u64::try_from(period.as_micros()).unwrap(),
|
||||
);
|
||||
tokio::time::sleep_until(next_at.into()).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -416,18 +416,8 @@ fn start_pageserver(
|
||||
// The storage_broker::connect call needs to happen inside a tokio runtime thread.
|
||||
let broker_client = WALRECEIVER_RUNTIME
|
||||
.block_on(async {
|
||||
let tls_config = storage_broker::ClientTlsConfig::new().ca_certificates(
|
||||
conf.ssl_ca_certs
|
||||
.iter()
|
||||
.map(pem::encode)
|
||||
.map(storage_broker::Certificate::from_pem),
|
||||
);
|
||||
// Note: we do not attempt connecting here (but validate endpoints sanity).
|
||||
storage_broker::connect(
|
||||
conf.broker_endpoint.clone(),
|
||||
conf.broker_keepalive_interval,
|
||||
tls_config,
|
||||
)
|
||||
storage_broker::connect(conf.broker_endpoint.clone(), conf.broker_keepalive_interval)
|
||||
})
|
||||
.with_context(|| {
|
||||
format!(
|
||||
|
||||
@@ -17,10 +17,9 @@ use once_cell::sync::OnceCell;
|
||||
use pageserver_api::config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes};
|
||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pem::Pem;
|
||||
use postgres_backend::AuthType;
|
||||
use remote_storage::{RemotePath, RemoteStorageConfig};
|
||||
use reqwest::Url;
|
||||
use reqwest::{Certificate, Url};
|
||||
use storage_broker::Uri;
|
||||
use utils::id::{NodeId, TimelineId};
|
||||
use utils::logging::{LogFormat, SecretString};
|
||||
@@ -68,8 +67,8 @@ pub struct PageServerConf {
|
||||
/// Period to reload certificate and private key from files.
|
||||
/// Default: 60s.
|
||||
pub ssl_cert_reload_period: Duration,
|
||||
/// Trusted root CA certificates to use in https APIs in PEM format.
|
||||
pub ssl_ca_certs: Vec<Pem>,
|
||||
/// Trusted root CA certificates to use in https APIs.
|
||||
pub ssl_ca_certs: Vec<Certificate>,
|
||||
|
||||
/// Current availability zone. Used for traffic metrics.
|
||||
pub availability_zone: Option<String>,
|
||||
@@ -119,13 +118,13 @@ pub struct PageServerConf {
|
||||
/// A lower value implicitly deprioritizes loading such tenants, vs. other work in the system.
|
||||
pub concurrent_tenant_warmup: ConfigurableSemaphore,
|
||||
|
||||
/// Number of concurrent [`TenantShard::gather_size_inputs`](crate::tenant::TenantShard::gather_size_inputs) allowed.
|
||||
/// Number of concurrent [`Tenant::gather_size_inputs`](crate::tenant::Tenant::gather_size_inputs) allowed.
|
||||
pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
|
||||
/// Limit of concurrent [`TenantShard::gather_size_inputs`] issued by module `eviction_task`.
|
||||
/// Limit of concurrent [`Tenant::gather_size_inputs`] issued by module `eviction_task`.
|
||||
/// The number of permits is the same as `concurrent_tenant_size_logical_size_queries`.
|
||||
/// See the comment in `eviction_task` for details.
|
||||
///
|
||||
/// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
|
||||
/// [`Tenant::gather_size_inputs`]: crate::tenant::Tenant::gather_size_inputs
|
||||
pub eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore,
|
||||
|
||||
// How often to collect metrics and send them to the metrics endpoint.
|
||||
@@ -225,11 +224,6 @@ pub struct PageServerConf {
|
||||
/// Does not force TLS: the client negotiates TLS usage during the handshake.
|
||||
/// Uses key and certificate from ssl_key_file/ssl_cert_file.
|
||||
pub enable_tls_page_service_api: bool,
|
||||
|
||||
/// Run in development mode, which disables certain safety checks
|
||||
/// such as authentication requirements for HTTP and PostgreSQL APIs.
|
||||
/// This is insecure and should only be used in development environments.
|
||||
pub dev_mode: bool,
|
||||
}
|
||||
|
||||
/// Token for authentication to safekeepers
|
||||
@@ -403,7 +397,6 @@ impl PageServerConf {
|
||||
generate_unarchival_heatmap,
|
||||
tracing,
|
||||
enable_tls_page_service_api,
|
||||
dev_mode,
|
||||
} = config_toml;
|
||||
|
||||
let mut conf = PageServerConf {
|
||||
@@ -455,7 +448,6 @@ impl PageServerConf {
|
||||
get_vectored_concurrent_io,
|
||||
tracing,
|
||||
enable_tls_page_service_api,
|
||||
dev_mode,
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// fields that require additional validation or custom handling
|
||||
@@ -505,10 +497,7 @@ impl PageServerConf {
|
||||
ssl_ca_certs: match ssl_ca_file {
|
||||
Some(ssl_ca_file) => {
|
||||
let buf = std::fs::read(ssl_ca_file)?;
|
||||
pem::parse_many(&buf)?
|
||||
.into_iter()
|
||||
.filter(|pem| pem.tag() == "CERTIFICATE")
|
||||
.collect()
|
||||
Certificate::from_pem_bundle(&buf)?
|
||||
}
|
||||
None => Vec::new(),
|
||||
},
|
||||
@@ -599,10 +588,10 @@ impl ConfigurableSemaphore {
|
||||
/// Initializse using a non-zero amount of permits.
|
||||
///
|
||||
/// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a
|
||||
/// feature such as [`TenantShard::gather_size_inputs`]. Otherwise any semaphore using future will
|
||||
/// feature such as [`Tenant::gather_size_inputs`]. Otherwise any semaphore using future will
|
||||
/// behave like [`futures::future::pending`], just waiting until new permits are added.
|
||||
///
|
||||
/// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
|
||||
/// [`Tenant::gather_size_inputs`]: crate::tenant::Tenant::gather_size_inputs
|
||||
pub fn new(initial_permits: NonZeroUsize) -> Self {
|
||||
ConfigurableSemaphore {
|
||||
initial_permits,
|
||||
|
||||
@@ -24,7 +24,7 @@ use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};
|
||||
use crate::tenant::mgr::TenantManager;
|
||||
use crate::tenant::size::CalculateSyntheticSizeError;
|
||||
use crate::tenant::tasks::BackgroundLoopKind;
|
||||
use crate::tenant::{LogicalSizeCalculationCause, TenantShard};
|
||||
use crate::tenant::{LogicalSizeCalculationCause, Tenant};
|
||||
|
||||
mod disk_cache;
|
||||
mod metrics;
|
||||
@@ -428,7 +428,7 @@ async fn calculate_synthetic_size_worker(
|
||||
}
|
||||
}
|
||||
|
||||
async fn calculate_and_log(tenant: &TenantShard, cancel: &CancellationToken, ctx: &RequestContext) {
|
||||
async fn calculate_and_log(tenant: &Tenant, cancel: &CancellationToken, ctx: &RequestContext) {
|
||||
const CAUSE: LogicalSizeCalculationCause =
|
||||
LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;
|
||||
|
||||
|
||||
@@ -175,9 +175,9 @@ impl MetricsKey {
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// [`TenantShard::remote_size`]
|
||||
/// [`Tenant::remote_size`]
|
||||
///
|
||||
/// [`TenantShard::remote_size`]: crate::tenant::TenantShard::remote_size
|
||||
/// [`Tenant::remote_size`]: crate::tenant::Tenant::remote_size
|
||||
const fn remote_storage_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
||||
MetricsKey {
|
||||
tenant_id,
|
||||
@@ -199,9 +199,9 @@ impl MetricsKey {
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// [`TenantShard::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
|
||||
/// [`Tenant::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
|
||||
///
|
||||
/// [`TenantShard::cached_synthetic_size`]: crate::tenant::TenantShard::cached_synthetic_size
|
||||
/// [`Tenant::cached_synthetic_size`]: crate::tenant::Tenant::cached_synthetic_size
|
||||
/// [`calculate_synthetic_size_worker`]: super::calculate_synthetic_size_worker
|
||||
const fn synthetic_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
||||
MetricsKey {
|
||||
@@ -254,7 +254,7 @@ pub(super) async fn collect_all_metrics(
|
||||
|
||||
async fn collect<S>(tenants: S, cache: &Cache, ctx: &RequestContext) -> Vec<NewRawMetric>
|
||||
where
|
||||
S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::TenantShard>)>,
|
||||
S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::Tenant>)>,
|
||||
{
|
||||
let mut current_metrics: Vec<NewRawMetric> = Vec::new();
|
||||
|
||||
@@ -263,9 +263,7 @@ where
|
||||
while let Some((tenant_id, tenant)) = tenants.next().await {
|
||||
let mut tenant_resident_size = 0;
|
||||
|
||||
let timelines = tenant.list_timelines();
|
||||
let timelines_len = timelines.len();
|
||||
for timeline in timelines {
|
||||
for timeline in tenant.list_timelines() {
|
||||
let timeline_id = timeline.timeline_id;
|
||||
|
||||
match TimelineSnapshot::collect(&timeline, ctx) {
|
||||
@@ -291,11 +289,6 @@ where
|
||||
tenant_resident_size += timeline.resident_physical_size();
|
||||
}
|
||||
|
||||
if timelines_len == 0 {
|
||||
// Force set it to 1 byte to avoid not being reported -- all timelines are offloaded.
|
||||
tenant_resident_size = 1;
|
||||
}
|
||||
|
||||
let snap = TenantSnapshot::collect(&tenant, tenant_resident_size);
|
||||
snap.to_metrics(tenant_id, Utc::now(), cache, &mut current_metrics);
|
||||
}
|
||||
@@ -315,7 +308,7 @@ impl TenantSnapshot {
|
||||
///
|
||||
/// `resident_size` is calculated of the timelines we had access to for other metrics, so we
|
||||
/// cannot just list timelines here.
|
||||
fn collect(t: &Arc<crate::tenant::TenantShard>, resident_size: u64) -> Self {
|
||||
fn collect(t: &Arc<crate::tenant::Tenant>, resident_size: u64) -> Self {
|
||||
TenantSnapshot {
|
||||
resident_size,
|
||||
remote_size: t.remote_size(),
|
||||
|
||||
@@ -3,19 +3,17 @@ use std::collections::HashMap;
|
||||
use futures::Future;
|
||||
use pageserver_api::config::NodeMetadata;
|
||||
use pageserver_api::controller_api::{AvailabilityZone, NodeRegisterRequest};
|
||||
use pageserver_api::models::ShardImportStatus;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::upcall_api::{
|
||||
PutTimelineImportStatusRequest, ReAttachRequest, ReAttachResponse, ReAttachResponseTenant,
|
||||
ValidateRequest, ValidateRequestTenant, ValidateResponse,
|
||||
ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
|
||||
ValidateRequestTenant, ValidateResponse,
|
||||
};
|
||||
use reqwest::Certificate;
|
||||
use serde::Serialize;
|
||||
use serde::de::DeserializeOwned;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use url::Url;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::{NodeId, TimelineId};
|
||||
use utils::id::NodeId;
|
||||
use utils::{backoff, failpoint_support};
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
@@ -47,12 +45,6 @@ pub trait StorageControllerUpcallApi {
|
||||
&self,
|
||||
tenants: Vec<(TenantShardId, Generation)>,
|
||||
) -> impl Future<Output = Result<HashMap<TenantShardId, bool>, RetryForeverError>> + Send;
|
||||
fn put_timeline_import_status(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
status: ShardImportStatus,
|
||||
) -> impl Future<Output = Result<(), RetryForeverError>> + Send;
|
||||
}
|
||||
|
||||
impl StorageControllerUpcallClient {
|
||||
@@ -84,8 +76,8 @@ impl StorageControllerUpcallClient {
|
||||
client = client.default_headers(headers);
|
||||
}
|
||||
|
||||
for cert in &conf.ssl_ca_certs {
|
||||
client = client.add_root_certificate(Certificate::from_der(cert.contents())?);
|
||||
for ssl_ca_cert in &conf.ssl_ca_certs {
|
||||
client = client.add_root_certificate(ssl_ca_cert.clone());
|
||||
}
|
||||
|
||||
Ok(Some(Self {
|
||||
@@ -280,30 +272,4 @@ impl StorageControllerUpcallApi for StorageControllerUpcallClient {
|
||||
|
||||
Ok(result.into_iter().collect())
|
||||
}
|
||||
|
||||
/// Send a shard import status to the storage controller
|
||||
///
|
||||
/// The implementation must have at-least-once delivery semantics.
|
||||
/// To this end, we retry the request until it succeeds. If the pageserver
|
||||
/// restarts or crashes, the shard import will start again from the beggining.
|
||||
#[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context
|
||||
async fn put_timeline_import_status(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
status: ShardImportStatus,
|
||||
) -> Result<(), RetryForeverError> {
|
||||
let url = self
|
||||
.base_url
|
||||
.join("timeline_import_status")
|
||||
.expect("Failed to build path");
|
||||
|
||||
let request = PutTimelineImportStatusRequest {
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
status,
|
||||
};
|
||||
|
||||
self.retry_http_forever(&url, request).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -787,15 +787,6 @@ mod test {
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn put_timeline_import_status(
|
||||
&self,
|
||||
_tenant_shard_id: TenantShardId,
|
||||
_timeline_id: TimelineId,
|
||||
_status: pageserver_api::models::ShardImportStatus,
|
||||
) -> Result<(), RetryForeverError> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
async fn setup(test_name: &str) -> anyhow::Result<TestSetup> {
|
||||
|
||||
@@ -1873,7 +1873,7 @@ async fn update_tenant_config_handler(
|
||||
&ShardParameters::default(),
|
||||
);
|
||||
|
||||
crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||
|
||||
@@ -1917,7 +1917,7 @@ async fn patch_tenant_config_handler(
|
||||
&ShardParameters::default(),
|
||||
);
|
||||
|
||||
crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ use tracing::{info, info_span};
|
||||
/// backwards-compatible changes to the metadata format.
|
||||
pub const STORAGE_FORMAT_VERSION: u16 = 3;
|
||||
|
||||
pub const DEFAULT_PG_VERSION: u32 = 17;
|
||||
pub const DEFAULT_PG_VERSION: u32 = 16;
|
||||
|
||||
// Magic constants used to identify different kinds of files
|
||||
pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
|
||||
|
||||
@@ -1086,7 +1086,7 @@ pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
|
||||
.expect("Failed to register metric")
|
||||
});
|
||||
|
||||
/// Metrics related to the lifecycle of a [`crate::tenant::TenantShard`] object: things
|
||||
/// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
|
||||
/// like how long it took to load.
|
||||
///
|
||||
/// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
|
||||
@@ -1289,7 +1289,6 @@ pub(crate) enum StorageIoOperation {
|
||||
Seek,
|
||||
Fsync,
|
||||
Metadata,
|
||||
SetLen,
|
||||
}
|
||||
|
||||
impl StorageIoOperation {
|
||||
@@ -1304,7 +1303,6 @@ impl StorageIoOperation {
|
||||
StorageIoOperation::Seek => "seek",
|
||||
StorageIoOperation::Fsync => "fsync",
|
||||
StorageIoOperation::Metadata => "metadata",
|
||||
StorageIoOperation::SetLen => "set_len",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,13 +15,12 @@ use async_compression::tokio::write::GzipEncoder;
|
||||
use bytes::Buf;
|
||||
use futures::FutureExt;
|
||||
use itertools::Itertools;
|
||||
use jsonwebtoken::TokenData;
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_api::config::{
|
||||
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
|
||||
PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy,
|
||||
};
|
||||
use pageserver_api::key::{Key, rel_block_to_key};
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::models::{
|
||||
self, PageTraceEvent, PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
||||
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
||||
@@ -29,7 +28,7 @@ use pageserver_api::models::{
|
||||
PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
|
||||
PagestreamProtocolVersion, PagestreamRequest, TenantState,
|
||||
};
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use pageserver_api::reltag::SlruKind;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use postgres_backend::{
|
||||
AuthType, PostgresBackend, PostgresBackendReader, QueryError, is_expected_io_error,
|
||||
@@ -76,7 +75,7 @@ use crate::tenant::timeline::{self, WaitLsnError};
|
||||
use crate::tenant::{GetTimelineError, PageReconstructError, Timeline};
|
||||
use crate::{basebackup, timed_after_cancellation};
|
||||
|
||||
/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::TenantShard`] which
|
||||
/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::Tenant`] which
|
||||
/// is not yet in state [`TenantState::Active`].
|
||||
///
|
||||
/// NB: this is a different value than [`crate::http::routes::ACTIVE_TENANT_TIMEOUT`].
|
||||
@@ -1035,10 +1034,10 @@ impl PageServerHandler {
|
||||
// avoid a somewhat costly Span::record() by constructing the entire span in one go.
|
||||
macro_rules! mkspan {
|
||||
(before shard routing) => {{
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, not_modified_since_lsn = %req.hdr.not_modified_since)
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn)
|
||||
}};
|
||||
($shard_id:expr) => {{
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, not_modified_since_lsn = %req.hdr.not_modified_since, shard_id = %$shard_id)
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, shard_id = %$shard_id)
|
||||
}};
|
||||
}
|
||||
|
||||
@@ -1140,10 +1139,9 @@ impl PageServerHandler {
|
||||
.await?;
|
||||
|
||||
// We're holding the Handle
|
||||
let last_record_lsn = shard.get_last_record_lsn();
|
||||
let effective_request_lsn = match Self::effective_request_lsn(
|
||||
&shard,
|
||||
last_record_lsn,
|
||||
shard.get_last_record_lsn(),
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&shard.get_applied_gc_cutoff_lsn(),
|
||||
@@ -1154,22 +1152,6 @@ impl PageServerHandler {
|
||||
}
|
||||
};
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap();
|
||||
let trouble_rel = RelTag {
|
||||
spcnode: trouble_key.field2,
|
||||
dbnode: trouble_key.field3,
|
||||
relnode: trouble_key.field4,
|
||||
forknum: trouble_key.field5,
|
||||
};
|
||||
if req.rel == trouble_rel {
|
||||
tracing::info!(
|
||||
request_lsn=%req.hdr.request_lsn,
|
||||
not_modified_since_lsn=%req.hdr.not_modified_since,
|
||||
%last_record_lsn,
|
||||
"effective_request_lsn for {} is {}", key, effective_request_lsn
|
||||
);
|
||||
}
|
||||
|
||||
BatchedFeMessage::GetPage {
|
||||
span,
|
||||
shard: shard.downgrade(),
|
||||
@@ -2855,7 +2837,7 @@ where
|
||||
) -> Result<(), QueryError> {
|
||||
// this unwrap is never triggered, because check_auth_jwt only called when auth_type is NeonJWT
|
||||
// which requires auth to be present
|
||||
let data: TokenData<Claims> = self
|
||||
let data = self
|
||||
.auth
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
|
||||
@@ -185,7 +185,6 @@ impl Timeline {
|
||||
pending_directory_entries: Vec::new(),
|
||||
pending_metadata_bytes: 0,
|
||||
lsn,
|
||||
extra_log: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -266,14 +265,6 @@ impl Timeline {
|
||||
let mut req_keyspaces: HashMap<Lsn, KeySpaceRandomAccum> =
|
||||
HashMap::with_capacity(pages.len());
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap();
|
||||
let trouble_rel = RelTag {
|
||||
spcnode: trouble_key.field2,
|
||||
dbnode: trouble_key.field3,
|
||||
relnode: trouble_key.field4,
|
||||
forknum: trouble_key.field5,
|
||||
};
|
||||
|
||||
for (response_slot_idx, (tag, blknum, lsn, ctx)) in pages.enumerate() {
|
||||
if tag.relnode == 0 {
|
||||
result_slots[response_slot_idx].write(Err(PageReconstructError::Other(
|
||||
@@ -284,14 +275,6 @@ impl Timeline {
|
||||
continue;
|
||||
}
|
||||
|
||||
if *tag == trouble_rel {
|
||||
tracing::info!(
|
||||
"Getting rel size for {} at LSN {}",
|
||||
rel_block_to_key(*tag, *blknum),
|
||||
lsn
|
||||
);
|
||||
}
|
||||
|
||||
let nblocks = match self
|
||||
.get_rel_size(*tag, Version::Lsn(lsn), &ctx)
|
||||
.maybe_perf_instrument(&ctx, |crnt_perf_span| {
|
||||
@@ -1419,8 +1402,6 @@ pub struct DatadirModification<'a> {
|
||||
|
||||
/// An **approximation** of how many metadata bytes will be written to the EphemeralFile.
|
||||
pending_metadata_bytes: usize,
|
||||
|
||||
extra_log: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
@@ -1639,32 +1620,6 @@ impl DatadirModification<'_> {
|
||||
) -> Result<(), WalIngestError> {
|
||||
let mut gaps_at_lsns = Vec::default();
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap();
|
||||
let trouble_rel = RelTag {
|
||||
spcnode: trouble_key.field2,
|
||||
dbnode: trouble_key.field3,
|
||||
relnode: trouble_key.field4,
|
||||
forknum: trouble_key.field5,
|
||||
};
|
||||
|
||||
for meta in batch.metadata.iter().filter_map(|m| match m {
|
||||
ValueMeta::Serialized(serialized_value_meta) => Some(serialized_value_meta),
|
||||
ValueMeta::Observed(_) => None,
|
||||
}) {
|
||||
let key = Key::from_compact(meta.key);
|
||||
let rel = RelTag {
|
||||
spcnode: key.field2,
|
||||
dbnode: key.field3,
|
||||
relnode: key.field4,
|
||||
forknum: key.field5,
|
||||
};
|
||||
|
||||
if rel == trouble_rel {
|
||||
tracing::info!("Put for {key} at LSN {}", meta.lsn);
|
||||
self.extra_log = true;
|
||||
}
|
||||
}
|
||||
|
||||
for meta in batch.metadata.iter() {
|
||||
let key = Key::from_compact(meta.key());
|
||||
let (rel, blkno) = key
|
||||
@@ -1995,19 +1950,6 @@ impl DatadirModification<'_> {
|
||||
"invalid relnode"
|
||||
)))?;
|
||||
}
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap();
|
||||
let trouble_rel = RelTag {
|
||||
spcnode: trouble_key.field2,
|
||||
dbnode: trouble_key.field3,
|
||||
relnode: trouble_key.field4,
|
||||
forknum: trouble_key.field5,
|
||||
};
|
||||
|
||||
if rel == trouble_rel {
|
||||
self.extra_log = true;
|
||||
}
|
||||
|
||||
// It's possible that this is the first rel for this db in this
|
||||
// tablespace. Create the reldir entry for it if so.
|
||||
let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await?)?;
|
||||
@@ -2027,10 +1969,6 @@ impl DatadirModification<'_> {
|
||||
true
|
||||
};
|
||||
|
||||
if rel == trouble_rel {
|
||||
tracing::info!(%dbdir_exists, "Maybe created db dir for {} at LSN {}", trouble_key.to_compact(), self.lsn);
|
||||
}
|
||||
|
||||
let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
|
||||
let mut rel_dir = if !dbdir_exists {
|
||||
// Create the RelDirectory
|
||||
@@ -2076,10 +2014,6 @@ impl DatadirModification<'_> {
|
||||
}
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::RelV2, MetricsUpdate::Add(1)));
|
||||
|
||||
if rel == trouble_rel {
|
||||
tracing::info!(%dbdir_exists, "Created v2 rel for {} at LSN {}", trouble_key.to_compact(), self.lsn);
|
||||
}
|
||||
} else {
|
||||
// Add the new relation to the rel directory entry, and write it back
|
||||
if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
|
||||
@@ -2095,10 +2029,6 @@ impl DatadirModification<'_> {
|
||||
rel_dir_key,
|
||||
Value::Image(Bytes::from(RelDirectory::ser(&rel_dir)?)),
|
||||
);
|
||||
|
||||
if rel == trouble_rel {
|
||||
tracing::info!(%dbdir_exists, "Created v1 rel for {} at LSN {}", trouble_key.to_compact(), self.lsn);
|
||||
}
|
||||
}
|
||||
|
||||
// Put size
|
||||
@@ -2533,19 +2463,11 @@ impl DatadirModification<'_> {
|
||||
};
|
||||
|
||||
if let Some(batch) = maybe_batch {
|
||||
if self.extra_log {
|
||||
tracing::info!(
|
||||
"Flushing batch with max_lsn={}. Last record LSN is {}",
|
||||
batch.max_lsn,
|
||||
self.tline.get_last_record_lsn()
|
||||
);
|
||||
} else {
|
||||
tracing::debug!(
|
||||
"Flushing batch with max_lsn={}. Last record LSN is {}",
|
||||
batch.max_lsn,
|
||||
self.tline.get_last_record_lsn()
|
||||
);
|
||||
}
|
||||
tracing::debug!(
|
||||
"Flushing batch with max_lsn={}. Last record LSN is {}",
|
||||
batch.max_lsn,
|
||||
self.tline.get_last_record_lsn()
|
||||
);
|
||||
|
||||
// This bails out on first error without modifying pending_updates.
|
||||
// That's Ok, cf this function's doc comment.
|
||||
@@ -2579,14 +2501,6 @@ impl DatadirModification<'_> {
|
||||
|
||||
self.pending_metadata_bytes = 0;
|
||||
|
||||
if self.extra_log {
|
||||
tracing::info!(
|
||||
"Flushed batch. Last record LSN is {}",
|
||||
self.tline.get_last_record_lsn()
|
||||
);
|
||||
self.extra_log = false;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -2677,26 +2591,6 @@ impl DatadirModification<'_> {
|
||||
.pending_data_batch
|
||||
.get_or_insert_with(SerializedValueBatch::default);
|
||||
batch.put(key, val, self.lsn);
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF").unwrap();
|
||||
let trouble_rel = RelTag {
|
||||
spcnode: trouble_key.field2,
|
||||
dbnode: trouble_key.field3,
|
||||
relnode: trouble_key.field4,
|
||||
forknum: trouble_key.field5,
|
||||
};
|
||||
let key = Key::from_compact(key);
|
||||
let rel = RelTag {
|
||||
spcnode: key.field2,
|
||||
dbnode: key.field3,
|
||||
relnode: key.field4,
|
||||
forknum: key.field5,
|
||||
};
|
||||
|
||||
if rel == trouble_rel {
|
||||
tracing::info!("Put for {key} at LSN {}", self.lsn);
|
||||
self.extra_log = true;
|
||||
}
|
||||
}
|
||||
|
||||
fn put_metadata(&mut self, key: CompactKey, val: Value) {
|
||||
@@ -2723,14 +2617,6 @@ impl DatadirModification<'_> {
|
||||
if key == CHECKPOINT_KEY.to_compact() {
|
||||
tracing::debug!("Checkpoint key added to pending with size {val_serialized_size}");
|
||||
}
|
||||
|
||||
let trouble_key = Key::from_hex("000000067F000040000000400600FFFFFFFF")
|
||||
.unwrap()
|
||||
.to_compact();
|
||||
if key == trouble_key {
|
||||
tracing::info!("Put for {trouble_key} at LSN {}", self.lsn);
|
||||
self.extra_log = true;
|
||||
}
|
||||
}
|
||||
|
||||
fn delete(&mut self, key_range: Range<Key>) {
|
||||
|
||||
@@ -158,7 +158,7 @@ pub struct TenantSharedResources {
|
||||
pub l0_flush_global_state: L0FlushGlobalState,
|
||||
}
|
||||
|
||||
/// A [`TenantShard`] is really an _attached_ tenant. The configuration
|
||||
/// A [`Tenant`] is really an _attached_ tenant. The configuration
|
||||
/// for an attached tenant is a subset of the [`LocationConf`], represented
|
||||
/// in this struct.
|
||||
#[derive(Clone)]
|
||||
@@ -245,7 +245,7 @@ pub(crate) enum SpawnMode {
|
||||
///
|
||||
/// Tenant consists of multiple timelines. Keep them in a hash table.
|
||||
///
|
||||
pub struct TenantShard {
|
||||
pub struct Tenant {
|
||||
// Global pageserver config parameters
|
||||
pub conf: &'static PageServerConf,
|
||||
|
||||
@@ -267,7 +267,7 @@ pub struct TenantShard {
|
||||
shard_identity: ShardIdentity,
|
||||
|
||||
/// The remote storage generation, used to protect S3 objects from split-brain.
|
||||
/// Does not change over the lifetime of the [`TenantShard`] object.
|
||||
/// Does not change over the lifetime of the [`Tenant`] object.
|
||||
///
|
||||
/// This duplicates the generation stored in LocationConf, but that structure is mutable:
|
||||
/// this copy enforces the invariant that generatio doesn't change during a Tenant's lifetime.
|
||||
@@ -309,7 +309,7 @@ pub struct TenantShard {
|
||||
// Access to global deletion queue for when this tenant wants to schedule a deletion
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
|
||||
/// Cached logical sizes updated updated on each [`TenantShard::gather_size_inputs`].
|
||||
/// Cached logical sizes updated updated on each [`Tenant::gather_size_inputs`].
|
||||
cached_logical_sizes: tokio::sync::Mutex<HashMap<(TimelineId, Lsn), u64>>,
|
||||
cached_synthetic_tenant_size: Arc<AtomicU64>,
|
||||
|
||||
@@ -337,12 +337,12 @@ pub struct TenantShard {
|
||||
// Timelines' cancellation token.
|
||||
pub(crate) cancel: CancellationToken,
|
||||
|
||||
// Users of the TenantShard such as the page service must take this Gate to avoid
|
||||
// trying to use a TenantShard which is shutting down.
|
||||
// Users of the Tenant such as the page service must take this Gate to avoid
|
||||
// trying to use a Tenant which is shutting down.
|
||||
pub(crate) gate: Gate,
|
||||
|
||||
/// Throttle applied at the top of [`Timeline::get`].
|
||||
/// All [`TenantShard::timelines`] of a given [`TenantShard`] instance share the same [`throttle::Throttle`] instance.
|
||||
/// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance.
|
||||
pub(crate) pagestream_throttle: Arc<throttle::Throttle>,
|
||||
|
||||
pub(crate) pagestream_throttle_metrics: Arc<crate::metrics::tenant_throttling::Pagestream>,
|
||||
@@ -362,7 +362,7 @@ pub struct TenantShard {
|
||||
|
||||
l0_flush_global_state: L0FlushGlobalState,
|
||||
}
|
||||
impl std::fmt::Debug for TenantShard {
|
||||
impl std::fmt::Debug for Tenant {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{} ({})", self.tenant_shard_id, self.current_state())
|
||||
}
|
||||
@@ -841,7 +841,7 @@ impl Debug for SetStoppingError {
|
||||
}
|
||||
}
|
||||
|
||||
/// Arguments to [`TenantShard::create_timeline`].
|
||||
/// Arguments to [`Tenant::create_timeline`].
|
||||
///
|
||||
/// Not usable as an idempotency key for timeline creation because if [`CreateTimelineParamsBranch::ancestor_start_lsn`]
|
||||
/// is `None`, the result of the timeline create call is not deterministic.
|
||||
@@ -876,7 +876,7 @@ pub(crate) struct CreateTimelineParamsImportPgdata {
|
||||
pub(crate) idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
|
||||
}
|
||||
|
||||
/// What is used to determine idempotency of a [`TenantShard::create_timeline`] call in [`TenantShard::start_creating_timeline`] in [`TenantShard::start_creating_timeline`].
|
||||
/// What is used to determine idempotency of a [`Tenant::create_timeline`] call in [`Tenant::start_creating_timeline`] in [`Tenant::start_creating_timeline`].
|
||||
///
|
||||
/// Each [`Timeline`] object holds [`Self`] as an immutable property in [`Timeline::create_idempotency`].
|
||||
///
|
||||
@@ -914,7 +914,7 @@ pub(crate) struct CreatingTimelineIdempotencyImportPgdata {
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
|
||||
}
|
||||
|
||||
/// What is returned by [`TenantShard::start_creating_timeline`].
|
||||
/// What is returned by [`Tenant::start_creating_timeline`].
|
||||
#[must_use]
|
||||
enum StartCreatingTimelineResult {
|
||||
CreateGuard(TimelineCreateGuard),
|
||||
@@ -943,13 +943,13 @@ struct TimelineInitAndSyncNeedsSpawnImportPgdata {
|
||||
guard: TimelineCreateGuard,
|
||||
}
|
||||
|
||||
/// What is returned by [`TenantShard::create_timeline`].
|
||||
/// What is returned by [`Tenant::create_timeline`].
|
||||
enum CreateTimelineResult {
|
||||
Created(Arc<Timeline>),
|
||||
Idempotent(Arc<Timeline>),
|
||||
/// IMPORTANT: This [`Arc<Timeline>`] object is not in [`TenantShard::timelines`] when
|
||||
/// IMPORTANT: This [`Arc<Timeline>`] object is not in [`Tenant::timelines`] when
|
||||
/// we return this result, nor will this concrete object ever be added there.
|
||||
/// Cf method comment on [`TenantShard::create_timeline_import_pgdata`].
|
||||
/// Cf method comment on [`Tenant::create_timeline_import_pgdata`].
|
||||
ImportSpawned(Arc<Timeline>),
|
||||
}
|
||||
|
||||
@@ -1082,7 +1082,7 @@ pub(crate) enum LoadConfigError {
|
||||
NotFound(Utf8PathBuf),
|
||||
}
|
||||
|
||||
impl TenantShard {
|
||||
impl Tenant {
|
||||
/// Yet another helper for timeline initialization.
|
||||
///
|
||||
/// - Initializes the Timeline struct and inserts it into the tenant's hash map
|
||||
@@ -1303,7 +1303,7 @@ impl TenantShard {
|
||||
init_order: Option<InitializationOrder>,
|
||||
mode: SpawnMode,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Arc<TenantShard>, GlobalShutDown> {
|
||||
) -> Result<Arc<Tenant>, GlobalShutDown> {
|
||||
let wal_redo_manager =
|
||||
WalRedoManager::new(PostgresRedoManager::new(conf, tenant_shard_id))?;
|
||||
|
||||
@@ -1317,7 +1317,7 @@ impl TenantShard {
|
||||
let attach_mode = attached_conf.location.attach_mode;
|
||||
let generation = attached_conf.location.generation;
|
||||
|
||||
let tenant = Arc::new(TenantShard::new(
|
||||
let tenant = Arc::new(Tenant::new(
|
||||
TenantState::Attaching,
|
||||
conf,
|
||||
attached_conf,
|
||||
@@ -1334,7 +1334,7 @@ impl TenantShard {
|
||||
let attach_gate_guard = tenant
|
||||
.gate
|
||||
.enter()
|
||||
.expect("We just created the TenantShard: nothing else can have shut it down yet");
|
||||
.expect("We just created the Tenant: nothing else can have shut it down yet");
|
||||
|
||||
// Do all the hard work in the background
|
||||
let tenant_clone = Arc::clone(&tenant);
|
||||
@@ -1362,7 +1362,7 @@ impl TenantShard {
|
||||
}
|
||||
}
|
||||
|
||||
fn make_broken_or_stopping(t: &TenantShard, err: anyhow::Error) {
|
||||
fn make_broken_or_stopping(t: &Tenant, err: anyhow::Error) {
|
||||
t.state.send_modify(|state| match state {
|
||||
// TODO: the old code alluded to DeleteTenantFlow sometimes setting
|
||||
// TenantState::Stopping before we get here, but this may be outdated.
|
||||
@@ -1627,7 +1627,7 @@ impl TenantShard {
|
||||
/// No background tasks are started as part of this routine.
|
||||
///
|
||||
async fn attach(
|
||||
self: &Arc<TenantShard>,
|
||||
self: &Arc<Tenant>,
|
||||
preload: Option<TenantPreload>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -1957,7 +1957,7 @@ impl TenantShard {
|
||||
}
|
||||
|
||||
async fn load_timelines_metadata(
|
||||
self: &Arc<TenantShard>,
|
||||
self: &Arc<Tenant>,
|
||||
timeline_ids: HashSet<TimelineId>,
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
heatmap: Option<(HeatMapTenant, std::time::Instant)>,
|
||||
@@ -2028,7 +2028,7 @@ impl TenantShard {
|
||||
}
|
||||
|
||||
fn load_timeline_metadata(
|
||||
self: &Arc<TenantShard>,
|
||||
self: &Arc<Tenant>,
|
||||
timeline_id: TimelineId,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
previous_heatmap: Option<PreviousHeatmap>,
|
||||
@@ -2429,14 +2429,14 @@ impl TenantShard {
|
||||
/// This is used by tests & import-from-basebackup.
|
||||
///
|
||||
/// The returned [`UninitializedTimeline`] contains no data nor metadata and it is in
|
||||
/// a state that will fail [`TenantShard::load_remote_timeline`] because `disk_consistent_lsn=Lsn(0)`.
|
||||
/// a state that will fail [`Tenant::load_remote_timeline`] because `disk_consistent_lsn=Lsn(0)`.
|
||||
///
|
||||
/// The caller is responsible for getting the timeline into a state that will be accepted
|
||||
/// by [`TenantShard::load_remote_timeline`] / [`TenantShard::attach`].
|
||||
/// by [`Tenant::load_remote_timeline`] / [`Tenant::attach`].
|
||||
/// Then they may call [`UninitializedTimeline::finish_creation`] to add the timeline
|
||||
/// to the [`TenantShard::timelines`].
|
||||
/// to the [`Tenant::timelines`].
|
||||
///
|
||||
/// Tests should use `TenantShard::create_test_timeline` to set up the minimum required metadata keys.
|
||||
/// Tests should use `Tenant::create_test_timeline` to set up the minimum required metadata keys.
|
||||
pub(crate) async fn create_empty_timeline(
|
||||
self: &Arc<Self>,
|
||||
new_timeline_id: TimelineId,
|
||||
@@ -2584,7 +2584,7 @@ impl TenantShard {
|
||||
/// the same timeline ID already exists, returns CreateTimelineError::AlreadyExists.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn create_timeline(
|
||||
self: &Arc<TenantShard>,
|
||||
self: &Arc<Tenant>,
|
||||
params: CreateTimelineParams,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
ctx: &RequestContext,
|
||||
@@ -2751,13 +2751,13 @@ impl TenantShard {
|
||||
Ok(activated_timeline)
|
||||
}
|
||||
|
||||
/// The returned [`Arc<Timeline>`] is NOT in the [`TenantShard::timelines`] map until the import
|
||||
/// The returned [`Arc<Timeline>`] is NOT in the [`Tenant::timelines`] map until the import
|
||||
/// completes in the background. A DIFFERENT [`Arc<Timeline>`] will be inserted into the
|
||||
/// [`TenantShard::timelines`] map when the import completes.
|
||||
/// [`Tenant::timelines`] map when the import completes.
|
||||
/// We only return an [`Arc<Timeline>`] here so the API handler can create a [`pageserver_api::models::TimelineInfo`]
|
||||
/// for the response.
|
||||
async fn create_timeline_import_pgdata(
|
||||
self: &Arc<Self>,
|
||||
self: &Arc<Tenant>,
|
||||
params: CreateTimelineParamsImportPgdata,
|
||||
activate: ActivateTimelineArgs,
|
||||
ctx: &RequestContext,
|
||||
@@ -2854,7 +2854,7 @@ impl TenantShard {
|
||||
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline.timeline_id))]
|
||||
async fn create_timeline_import_pgdata_task(
|
||||
self: Arc<TenantShard>,
|
||||
self: Arc<Tenant>,
|
||||
timeline: Arc<Timeline>,
|
||||
index_part: import_pgdata::index_part_format::Root,
|
||||
activate: ActivateTimelineArgs,
|
||||
@@ -2882,7 +2882,7 @@ impl TenantShard {
|
||||
}
|
||||
|
||||
async fn create_timeline_import_pgdata_task_impl(
|
||||
self: Arc<TenantShard>,
|
||||
self: Arc<Tenant>,
|
||||
timeline: Arc<Timeline>,
|
||||
index_part: import_pgdata::index_part_format::Root,
|
||||
activate: ActivateTimelineArgs,
|
||||
@@ -2899,10 +2899,10 @@ impl TenantShard {
|
||||
// Reload timeline from remote.
|
||||
// This proves that the remote state is attachable, and it reuses the code.
|
||||
//
|
||||
// TODO: think about whether this is safe to do with concurrent TenantShard::shutdown.
|
||||
// TODO: think about whether this is safe to do with concurrent Tenant::shutdown.
|
||||
// timeline_create_guard hols the tenant gate open, so, shutdown cannot _complete_ until we exit.
|
||||
// But our activate() call might launch new background tasks after TenantShard::shutdown
|
||||
// already went past shutting down the TenantShard::timelines, which this timeline here is no part of.
|
||||
// But our activate() call might launch new background tasks after Tenant::shutdown
|
||||
// already went past shutting down the Tenant::timelines, which this timeline here is no part of.
|
||||
// I think the same problem exists with the bootstrap & branch mgmt API tasks (tenant shutting
|
||||
// down while bootstrapping/branching + activating), but, the race condition is much more likely
|
||||
// to manifest because of the long runtime of this import task.
|
||||
@@ -2917,7 +2917,7 @@ impl TenantShard {
|
||||
// };
|
||||
let timeline_id = timeline.timeline_id;
|
||||
|
||||
// load from object storage like TenantShard::attach does
|
||||
// load from object storage like Tenant::attach does
|
||||
let resources = self.build_timeline_resources(timeline_id);
|
||||
let index_part = resources
|
||||
.remote_client
|
||||
@@ -3938,7 +3938,7 @@ enum ActivateTimelineArgs {
|
||||
No,
|
||||
}
|
||||
|
||||
impl TenantShard {
|
||||
impl Tenant {
|
||||
pub fn tenant_specific_overrides(&self) -> pageserver_api::models::TenantConfig {
|
||||
self.tenant_conf.load().tenant_conf.clone()
|
||||
}
|
||||
@@ -4096,7 +4096,7 @@ impl TenantShard {
|
||||
update: F,
|
||||
) -> anyhow::Result<pageserver_api::models::TenantConfig> {
|
||||
// Use read-copy-update in order to avoid overwriting the location config
|
||||
// state if this races with [`TenantShard::set_new_location_config`]. Note that
|
||||
// state if this races with [`Tenant::set_new_location_config`]. Note that
|
||||
// this race is not possible if both request types come from the storage
|
||||
// controller (as they should!) because an exclusive op lock is required
|
||||
// on the storage controller side.
|
||||
@@ -4219,7 +4219,7 @@ impl TenantShard {
|
||||
Ok((timeline, timeline_ctx))
|
||||
}
|
||||
|
||||
/// [`TenantShard::shutdown`] must be called before dropping the returned [`TenantShard`] object
|
||||
/// [`Tenant::shutdown`] must be called before dropping the returned [`Tenant`] object
|
||||
/// to ensure proper cleanup of background tasks and metrics.
|
||||
//
|
||||
// Allow too_many_arguments because a constructor's argument list naturally grows with the
|
||||
@@ -4235,7 +4235,7 @@ impl TenantShard {
|
||||
remote_storage: GenericRemoteStorage,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
l0_flush_global_state: L0FlushGlobalState,
|
||||
) -> TenantShard {
|
||||
) -> Tenant {
|
||||
debug_assert!(
|
||||
!attached_conf.location.generation.is_none() || conf.control_plane_api.is_none()
|
||||
);
|
||||
@@ -4295,7 +4295,7 @@ impl TenantShard {
|
||||
}
|
||||
});
|
||||
|
||||
TenantShard {
|
||||
Tenant {
|
||||
tenant_shard_id,
|
||||
shard_identity,
|
||||
generation: attached_conf.location.generation,
|
||||
@@ -4330,7 +4330,7 @@ impl TenantShard {
|
||||
cancel: CancellationToken::default(),
|
||||
gate: Gate::default(),
|
||||
pagestream_throttle: Arc::new(throttle::Throttle::new(
|
||||
TenantShard::get_pagestream_throttle_config(conf, &attached_conf.tenant_conf),
|
||||
Tenant::get_pagestream_throttle_config(conf, &attached_conf.tenant_conf),
|
||||
)),
|
||||
pagestream_throttle_metrics: Arc::new(
|
||||
crate::metrics::tenant_throttling::Pagestream::new(&tenant_shard_id),
|
||||
@@ -4466,11 +4466,11 @@ impl TenantShard {
|
||||
|
||||
// Perform GC for each timeline.
|
||||
//
|
||||
// Note that we don't hold the `TenantShard::gc_cs` lock here because we don't want to delay the
|
||||
// Note that we don't hold the `Tenant::gc_cs` lock here because we don't want to delay the
|
||||
// branch creation task, which requires the GC lock. A GC iteration can run concurrently
|
||||
// with branch creation.
|
||||
//
|
||||
// See comments in [`TenantShard::branch_timeline`] for more information about why branch
|
||||
// See comments in [`Tenant::branch_timeline`] for more information about why branch
|
||||
// creation task can run concurrently with timeline's GC iteration.
|
||||
for timeline in gc_timelines {
|
||||
if cancel.is_cancelled() {
|
||||
@@ -4500,7 +4500,7 @@ impl TenantShard {
|
||||
|
||||
/// Refreshes the Timeline::gc_info for all timelines, returning the
|
||||
/// vector of timelines which have [`Timeline::get_last_record_lsn`] past
|
||||
/// [`TenantShard::get_gc_horizon`].
|
||||
/// [`Tenant::get_gc_horizon`].
|
||||
///
|
||||
/// This is usually executed as part of periodic gc, but can now be triggered more often.
|
||||
pub(crate) async fn refresh_gc_info(
|
||||
@@ -5499,7 +5499,7 @@ impl TenantShard {
|
||||
}
|
||||
}
|
||||
|
||||
// The flushes we did above were just writes, but the TenantShard might have had
|
||||
// The flushes we did above were just writes, but the Tenant might have had
|
||||
// pending deletions as well from recent compaction/gc: we want to flush those
|
||||
// as well. This requires flushing the global delete queue. This is cheap
|
||||
// because it's typically a no-op.
|
||||
@@ -5517,7 +5517,7 @@ impl TenantShard {
|
||||
|
||||
/// How much local storage would this tenant like to have? It can cope with
|
||||
/// less than this (via eviction and on-demand downloads), but this function enables
|
||||
/// the TenantShard to advertise how much storage it would prefer to have to provide fast I/O
|
||||
/// the Tenant to advertise how much storage it would prefer to have to provide fast I/O
|
||||
/// by keeping important things on local disk.
|
||||
///
|
||||
/// This is a heuristic, not a guarantee: tenants that are long-idle will actually use less
|
||||
@@ -5540,11 +5540,11 @@ impl TenantShard {
|
||||
/// manifest in `Self::remote_tenant_manifest`.
|
||||
///
|
||||
/// TODO: instead of requiring callers to remember to call `maybe_upload_tenant_manifest` after
|
||||
/// changing any `TenantShard` state that's included in the manifest, consider making the manifest
|
||||
/// changing any `Tenant` state that's included in the manifest, consider making the manifest
|
||||
/// the authoritative source of data with an API that automatically uploads on changes. Revisit
|
||||
/// this when the manifest is more widely used and we have a better idea of the data model.
|
||||
pub(crate) async fn maybe_upload_tenant_manifest(&self) -> Result<(), TenantManifestError> {
|
||||
// Multiple tasks may call this function concurrently after mutating the TenantShard runtime
|
||||
// Multiple tasks may call this function concurrently after mutating the Tenant runtime
|
||||
// state, affecting the manifest generated by `build_tenant_manifest`. We use an async mutex
|
||||
// to serialize these callers. `eq_ignoring_version` acts as a slightly inefficient but
|
||||
// simple coalescing mechanism.
|
||||
@@ -5812,7 +5812,7 @@ pub(crate) mod harness {
|
||||
info_span!("TenantHarness", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug())
|
||||
}
|
||||
|
||||
pub(crate) async fn load(&self) -> (Arc<TenantShard>, RequestContext) {
|
||||
pub(crate) async fn load(&self) -> (Arc<Tenant>, RequestContext) {
|
||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error)
|
||||
.with_scope_unit_test();
|
||||
(
|
||||
@@ -5827,10 +5827,10 @@ pub(crate) mod harness {
|
||||
pub(crate) async fn do_try_load(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<TenantShard>> {
|
||||
) -> anyhow::Result<Arc<Tenant>> {
|
||||
let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));
|
||||
|
||||
let tenant = Arc::new(TenantShard::new(
|
||||
let tenant = Arc::new(Tenant::new(
|
||||
TenantState::Attaching,
|
||||
self.conf,
|
||||
AttachedTenantConf::try_from(LocationConf::attached_single(
|
||||
@@ -6046,7 +6046,7 @@ mod tests {
|
||||
#[cfg(feature = "testing")]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn randomize_timeline(
|
||||
tenant: &Arc<TenantShard>,
|
||||
tenant: &Arc<Tenant>,
|
||||
new_timeline_id: TimelineId,
|
||||
pg_version: u32,
|
||||
spec: TestTimelineSpecification,
|
||||
@@ -6936,7 +6936,7 @@ mod tests {
|
||||
}
|
||||
|
||||
async fn bulk_insert_compact_gc(
|
||||
tenant: &TenantShard,
|
||||
tenant: &Tenant,
|
||||
timeline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
lsn: Lsn,
|
||||
@@ -6948,7 +6948,7 @@ mod tests {
|
||||
}
|
||||
|
||||
async fn bulk_insert_maybe_compact_gc(
|
||||
tenant: &TenantShard,
|
||||
tenant: &Tenant,
|
||||
timeline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
mut lsn: Lsn,
|
||||
@@ -7858,7 +7858,7 @@ mod tests {
|
||||
let (tline, _ctx) = tenant
|
||||
.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)
|
||||
.await?;
|
||||
// Leave the timeline ID in [`TenantShard::timelines_creating`] to exclude attempting to create it again
|
||||
// Leave the timeline ID in [`Tenant::timelines_creating`] to exclude attempting to create it again
|
||||
let raw_tline = tline.raw_timeline().unwrap();
|
||||
raw_tline
|
||||
.shutdown(super::timeline::ShutdownMode::Hard)
|
||||
|
||||
@@ -15,23 +15,21 @@
|
||||
//! len >= 128: 1CCCXXXX XXXXXXXX XXXXXXXX XXXXXXXX
|
||||
//!
|
||||
use std::cmp::min;
|
||||
use std::io::Error;
|
||||
|
||||
use anyhow::Context;
|
||||
use async_compression::Level;
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio_epoll_uring::IoBuf;
|
||||
use tokio_epoll_uring::{BoundedBuf, IoBuf, Slice};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::context::RequestContext;
|
||||
use crate::page_cache::PAGE_SZ;
|
||||
use crate::tenant::block_io::BlockCursor;
|
||||
use crate::virtual_file::IoBufferMut;
|
||||
use crate::virtual_file::VirtualFile;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
|
||||
use crate::virtual_file::owned_buffers_io::write::{BufferedWriter, FlushTaskError};
|
||||
use crate::virtual_file::owned_buffers_io::write::{BufferedWriterShutdownMode, OwnedAsyncWriter};
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct CompressionInfo {
|
||||
@@ -39,67 +37,6 @@ pub struct CompressionInfo {
|
||||
pub compressed_size: Option<usize>,
|
||||
}
|
||||
|
||||
/// A blob header, with header+data length and compression info.
|
||||
///
|
||||
/// TODO: use this more widely, and add an encode() method too.
|
||||
/// TODO: document the header format.
|
||||
#[derive(Clone, Copy, Default)]
|
||||
pub struct Header {
|
||||
pub header_len: usize,
|
||||
pub data_len: usize,
|
||||
pub compression_bits: u8,
|
||||
}
|
||||
|
||||
impl Header {
|
||||
/// Decodes a header from a byte slice.
|
||||
pub fn decode(bytes: &[u8]) -> anyhow::Result<Self> {
|
||||
let Some(&first_header_byte) = bytes.first() else {
|
||||
anyhow::bail!("zero-length blob header");
|
||||
};
|
||||
|
||||
// If the first bit is 0, this is just a 1-byte length prefix up to 128 bytes.
|
||||
if first_header_byte < 0x80 {
|
||||
return Ok(Self {
|
||||
header_len: 1, // by definition
|
||||
data_len: first_header_byte as usize,
|
||||
compression_bits: BYTE_UNCOMPRESSED,
|
||||
});
|
||||
}
|
||||
|
||||
// Otherwise, this is a 4-byte header containing compression information and length.
|
||||
const HEADER_LEN: usize = 4;
|
||||
let mut header_buf: [u8; HEADER_LEN] = bytes[0..HEADER_LEN]
|
||||
.try_into()
|
||||
.map_err(|_| anyhow::anyhow!("blob header too short: {bytes:?}"))?;
|
||||
|
||||
// TODO: verify the compression bits and convert to an enum.
|
||||
let compression_bits = header_buf[0] & LEN_COMPRESSION_BIT_MASK;
|
||||
header_buf[0] &= !LEN_COMPRESSION_BIT_MASK;
|
||||
let data_len = u32::from_be_bytes(header_buf) as usize;
|
||||
|
||||
Ok(Self {
|
||||
header_len: HEADER_LEN,
|
||||
data_len,
|
||||
compression_bits,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the total header+data length.
|
||||
pub fn total_len(&self) -> usize {
|
||||
self.header_len + self.data_len
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum WriteBlobError {
|
||||
#[error(transparent)]
|
||||
Flush(FlushTaskError),
|
||||
#[error("blob too large ({len} bytes)")]
|
||||
BlobTooLarge { len: usize },
|
||||
#[error(transparent)]
|
||||
WriteBlobRaw(anyhow::Error),
|
||||
}
|
||||
|
||||
impl BlockCursor<'_> {
|
||||
/// Read a blob into a new buffer.
|
||||
pub async fn read_blob(
|
||||
@@ -219,64 +156,143 @@ pub(super) const BYTE_UNCOMPRESSED: u8 = 0x80;
|
||||
pub(super) const BYTE_ZSTD: u8 = BYTE_UNCOMPRESSED | 0x10;
|
||||
|
||||
/// A wrapper of `VirtualFile` that allows users to write blobs.
|
||||
pub struct BlobWriter<W> {
|
||||
///
|
||||
/// If a `BlobWriter` is dropped, the internal buffer will be
|
||||
/// discarded. You need to call [`flush_buffer`](Self::flush_buffer)
|
||||
/// manually before dropping.
|
||||
pub struct BlobWriter<const BUFFERED: bool> {
|
||||
inner: VirtualFile,
|
||||
offset: u64,
|
||||
/// A buffer to save on write calls, only used if BUFFERED=true
|
||||
buf: Vec<u8>,
|
||||
/// We do tiny writes for the length headers; they need to be in an owned buffer;
|
||||
io_buf: Option<BytesMut>,
|
||||
writer: BufferedWriter<IoBufferMut, W>,
|
||||
offset: u64,
|
||||
}
|
||||
|
||||
impl<W> BlobWriter<W>
|
||||
where
|
||||
W: OwnedAsyncWriter + std::fmt::Debug + Send + Sync + 'static,
|
||||
{
|
||||
/// See [`BufferedWriter`] struct-level doc comment for semantics of `start_offset`.
|
||||
impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
|
||||
pub fn new(
|
||||
file: W,
|
||||
inner: VirtualFile,
|
||||
start_offset: u64,
|
||||
gate: &utils::sync::gate::Gate,
|
||||
cancel: CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
flush_task_span: tracing::Span,
|
||||
) -> anyhow::Result<Self> {
|
||||
Ok(Self {
|
||||
io_buf: Some(BytesMut::new()),
|
||||
writer: BufferedWriter::new(
|
||||
file,
|
||||
start_offset,
|
||||
|| IoBufferMut::with_capacity(Self::CAPACITY),
|
||||
gate.enter()?,
|
||||
cancel,
|
||||
ctx,
|
||||
flush_task_span,
|
||||
),
|
||||
_gate: &utils::sync::gate::Gate,
|
||||
_cancel: CancellationToken,
|
||||
_ctx: &RequestContext,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
offset: start_offset,
|
||||
})
|
||||
buf: Vec::with_capacity(Self::CAPACITY),
|
||||
io_buf: Some(BytesMut::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
self.offset
|
||||
}
|
||||
|
||||
const CAPACITY: usize = 64 * 1024;
|
||||
const CAPACITY: usize = if BUFFERED { 64 * 1024 } else { 0 };
|
||||
|
||||
/// Writes `src_buf` to the file at the current offset.
|
||||
/// Writes the given buffer directly to the underlying `VirtualFile`.
|
||||
/// You need to make sure that the internal buffer is empty, otherwise
|
||||
/// data will be written in wrong order.
|
||||
#[inline(always)]
|
||||
async fn write_all_unbuffered<Buf: IoBuf + Send>(
|
||||
&mut self,
|
||||
src_buf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<(), Error>) {
|
||||
let (src_buf, res) = self.inner.write_all(src_buf, ctx).await;
|
||||
let nbytes = match res {
|
||||
Ok(nbytes) => nbytes,
|
||||
Err(e) => return (src_buf, Err(e)),
|
||||
};
|
||||
self.offset += nbytes as u64;
|
||||
(src_buf, Ok(()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// Flushes the internal buffer to the underlying `VirtualFile`.
|
||||
pub async fn flush_buffer(&mut self, ctx: &RequestContext) -> Result<(), Error> {
|
||||
let buf = std::mem::take(&mut self.buf);
|
||||
let (slice, res) = self.inner.write_all(buf.slice_len(), ctx).await;
|
||||
res?;
|
||||
let mut buf = slice.into_raw_slice().into_inner();
|
||||
buf.clear();
|
||||
self.buf = buf;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// Writes as much of `src_buf` into the internal buffer as it fits
|
||||
fn write_into_buffer(&mut self, src_buf: &[u8]) -> usize {
|
||||
let remaining = Self::CAPACITY - self.buf.len();
|
||||
let to_copy = src_buf.len().min(remaining);
|
||||
self.buf.extend_from_slice(&src_buf[..to_copy]);
|
||||
self.offset += to_copy as u64;
|
||||
to_copy
|
||||
}
|
||||
|
||||
/// Internal, possibly buffered, write function
|
||||
async fn write_all<Buf: IoBuf + Send>(
|
||||
&mut self,
|
||||
src_buf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<(), FlushTaskError>) {
|
||||
let res = self
|
||||
.writer
|
||||
// TODO: why are we taking a FullSlice if we're going to pass a borrow downstack?
|
||||
// Can remove all the complexity around owned buffers upstack
|
||||
.write_buffered_borrowed(&src_buf, ctx)
|
||||
.await
|
||||
.map(|len| {
|
||||
self.offset += len as u64;
|
||||
});
|
||||
) -> (FullSlice<Buf>, Result<(), Error>) {
|
||||
let src_buf = src_buf.into_raw_slice();
|
||||
let src_buf_bounds = src_buf.bounds();
|
||||
let restore = move |src_buf_slice: Slice<_>| {
|
||||
FullSlice::must_new(Slice::from_buf_bounds(
|
||||
src_buf_slice.into_inner(),
|
||||
src_buf_bounds,
|
||||
))
|
||||
};
|
||||
|
||||
(src_buf, res)
|
||||
if !BUFFERED {
|
||||
assert!(self.buf.is_empty());
|
||||
return self
|
||||
.write_all_unbuffered(FullSlice::must_new(src_buf), ctx)
|
||||
.await;
|
||||
}
|
||||
let remaining = Self::CAPACITY - self.buf.len();
|
||||
let src_buf_len = src_buf.bytes_init();
|
||||
if src_buf_len == 0 {
|
||||
return (restore(src_buf), Ok(()));
|
||||
}
|
||||
let mut src_buf = src_buf.slice(0..src_buf_len);
|
||||
// First try to copy as much as we can into the buffer
|
||||
if remaining > 0 {
|
||||
let copied = self.write_into_buffer(&src_buf);
|
||||
src_buf = src_buf.slice(copied..);
|
||||
}
|
||||
// Then, if the buffer is full, flush it out
|
||||
if self.buf.len() == Self::CAPACITY {
|
||||
if let Err(e) = self.flush_buffer(ctx).await {
|
||||
return (restore(src_buf), Err(e));
|
||||
}
|
||||
}
|
||||
// Finally, write the tail of src_buf:
|
||||
// If it wholly fits into the buffer without
|
||||
// completely filling it, then put it there.
|
||||
// If not, write it out directly.
|
||||
let src_buf = if !src_buf.is_empty() {
|
||||
assert_eq!(self.buf.len(), 0);
|
||||
if src_buf.len() < Self::CAPACITY {
|
||||
let copied = self.write_into_buffer(&src_buf);
|
||||
// We just verified above that src_buf fits into our internal buffer.
|
||||
assert_eq!(copied, src_buf.len());
|
||||
restore(src_buf)
|
||||
} else {
|
||||
let (src_buf, res) = self
|
||||
.write_all_unbuffered(FullSlice::must_new(src_buf), ctx)
|
||||
.await;
|
||||
if let Err(e) = res {
|
||||
return (src_buf, Err(e));
|
||||
}
|
||||
src_buf
|
||||
}
|
||||
} else {
|
||||
restore(src_buf)
|
||||
};
|
||||
(src_buf, Ok(()))
|
||||
}
|
||||
|
||||
/// Write a blob of data. Returns the offset that it was written to,
|
||||
@@ -285,7 +301,7 @@ where
|
||||
&mut self,
|
||||
srcbuf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<u64, WriteBlobError>) {
|
||||
) -> (FullSlice<Buf>, Result<u64, Error>) {
|
||||
let (buf, res) = self
|
||||
.write_blob_maybe_compressed(srcbuf, ctx, ImageCompressionAlgorithm::Disabled)
|
||||
.await;
|
||||
@@ -299,10 +315,7 @@ where
|
||||
srcbuf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
algorithm: ImageCompressionAlgorithm,
|
||||
) -> (
|
||||
FullSlice<Buf>,
|
||||
Result<(u64, CompressionInfo), WriteBlobError>,
|
||||
) {
|
||||
) -> (FullSlice<Buf>, Result<(u64, CompressionInfo), Error>) {
|
||||
let offset = self.offset;
|
||||
let mut compression_info = CompressionInfo {
|
||||
written_compressed: false,
|
||||
@@ -318,16 +331,14 @@ where
|
||||
if len < 128 {
|
||||
// Short blob. Write a 1-byte length header
|
||||
io_buf.put_u8(len as u8);
|
||||
let (slice, res) = self.write_all(io_buf.slice_len(), ctx).await;
|
||||
let res = res.map_err(WriteBlobError::Flush);
|
||||
((slice, res), srcbuf)
|
||||
(self.write_all(io_buf.slice_len(), ctx).await, srcbuf)
|
||||
} else {
|
||||
// Write a 4-byte length header
|
||||
if len > MAX_SUPPORTED_BLOB_LEN {
|
||||
return (
|
||||
(
|
||||
io_buf.slice_len(),
|
||||
Err(WriteBlobError::BlobTooLarge { len }),
|
||||
Err(Error::other(format!("blob too large ({len} bytes)"))),
|
||||
),
|
||||
srcbuf,
|
||||
);
|
||||
@@ -361,9 +372,7 @@ where
|
||||
assert_eq!(len_buf[0] & 0xf0, 0);
|
||||
len_buf[0] |= high_bit_mask;
|
||||
io_buf.extend_from_slice(&len_buf[..]);
|
||||
let (slice, res) = self.write_all(io_buf.slice_len(), ctx).await;
|
||||
let res = res.map_err(WriteBlobError::Flush);
|
||||
((slice, res), srcbuf)
|
||||
(self.write_all(io_buf.slice_len(), ctx).await, srcbuf)
|
||||
}
|
||||
}
|
||||
.await;
|
||||
@@ -378,49 +387,33 @@ where
|
||||
} else {
|
||||
self.write_all(srcbuf, ctx).await
|
||||
};
|
||||
let res = res.map_err(WriteBlobError::Flush);
|
||||
(srcbuf, res.map(|_| (offset, compression_info)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes a raw blob containing both header and data, returning its offset.
|
||||
pub(crate) async fn write_blob_raw<Buf: IoBuf + Send>(
|
||||
&mut self,
|
||||
raw_with_header: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<u64, WriteBlobError>) {
|
||||
// Verify the header, to ensure we don't write invalid/corrupt data.
|
||||
let header = match Header::decode(&raw_with_header)
|
||||
.context("decoding blob header")
|
||||
.map_err(WriteBlobError::WriteBlobRaw)
|
||||
{
|
||||
Ok(header) => header,
|
||||
Err(err) => return (raw_with_header, Err(err)),
|
||||
};
|
||||
if raw_with_header.len() != header.total_len() {
|
||||
let header_total_len = header.total_len();
|
||||
let raw_len = raw_with_header.len();
|
||||
return (
|
||||
raw_with_header,
|
||||
Err(WriteBlobError::WriteBlobRaw(anyhow::anyhow!(
|
||||
"header length mismatch: {header_total_len} != {raw_len}"
|
||||
))),
|
||||
);
|
||||
}
|
||||
|
||||
let offset = self.offset;
|
||||
let (raw_with_header, result) = self.write_all(raw_with_header, ctx).await;
|
||||
let result = result.map_err(WriteBlobError::Flush);
|
||||
(raw_with_header, result.map(|_| offset))
|
||||
impl BlobWriter<true> {
|
||||
/// Access the underlying `VirtualFile`.
|
||||
///
|
||||
/// This function flushes the internal buffer before giving access
|
||||
/// to the underlying `VirtualFile`.
|
||||
pub async fn into_inner(mut self, ctx: &RequestContext) -> Result<VirtualFile, Error> {
|
||||
self.flush_buffer(ctx).await?;
|
||||
Ok(self.inner)
|
||||
}
|
||||
|
||||
/// Finish this blob writer and return the underlying `W`.
|
||||
pub async fn shutdown(
|
||||
self,
|
||||
mode: BufferedWriterShutdownMode,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<W, FlushTaskError> {
|
||||
let (_, file) = self.writer.shutdown(mode, ctx).await?;
|
||||
Ok(file)
|
||||
/// Access the underlying `VirtualFile`.
|
||||
///
|
||||
/// Unlike [`into_inner`](Self::into_inner), this doesn't flush
|
||||
/// the internal buffer before giving access.
|
||||
pub fn into_inner_no_flush(self) -> VirtualFile {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl BlobWriter<false> {
|
||||
/// Access the underlying `VirtualFile`.
|
||||
pub fn into_inner(self) -> VirtualFile {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
@@ -429,25 +422,21 @@ pub(crate) mod tests {
|
||||
use camino::Utf8PathBuf;
|
||||
use camino_tempfile::Utf8TempDir;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use tracing::info_span;
|
||||
|
||||
use super::*;
|
||||
use crate::context::DownloadBehavior;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::block_io::BlockReaderRef;
|
||||
use crate::virtual_file;
|
||||
use crate::virtual_file::TempVirtualFile;
|
||||
use crate::virtual_file::VirtualFile;
|
||||
|
||||
async fn round_trip_test(blobs: &[Vec<u8>]) -> anyhow::Result<()> {
|
||||
round_trip_test_compressed(blobs, false).await
|
||||
async fn round_trip_test<const BUFFERED: bool>(blobs: &[Vec<u8>]) -> Result<(), Error> {
|
||||
round_trip_test_compressed::<BUFFERED>(blobs, false).await
|
||||
}
|
||||
|
||||
pub(crate) async fn write_maybe_compressed(
|
||||
pub(crate) async fn write_maybe_compressed<const BUFFERED: bool>(
|
||||
blobs: &[Vec<u8>],
|
||||
compression: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<(Utf8TempDir, Utf8PathBuf, Vec<u64>)> {
|
||||
) -> Result<(Utf8TempDir, Utf8PathBuf, Vec<u64>), Error> {
|
||||
let temp_dir = camino_tempfile::tempdir()?;
|
||||
let pathbuf = temp_dir.path().join("file");
|
||||
let gate = utils::sync::gate::Gate::default();
|
||||
@@ -456,19 +445,8 @@ pub(crate) mod tests {
|
||||
// Write part (in block to drop the file)
|
||||
let mut offsets = Vec::new();
|
||||
{
|
||||
let file = TempVirtualFile::new(
|
||||
VirtualFile::open_with_options_v2(
|
||||
pathbuf.as_path(),
|
||||
virtual_file::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
gate.enter()?,
|
||||
);
|
||||
let mut wtr =
|
||||
BlobWriter::new(file, 0, &gate, cancel.clone(), ctx, info_span!("test")).unwrap();
|
||||
let file = VirtualFile::create(pathbuf.as_path(), ctx).await?;
|
||||
let mut wtr = BlobWriter::<BUFFERED>::new(file, 0, &gate, cancel.clone(), ctx);
|
||||
for blob in blobs.iter() {
|
||||
let (_, res) = if compression {
|
||||
let res = wtr
|
||||
@@ -485,28 +463,26 @@ pub(crate) mod tests {
|
||||
let offs = res?;
|
||||
offsets.push(offs);
|
||||
}
|
||||
let file = wtr
|
||||
.shutdown(
|
||||
BufferedWriterShutdownMode::ZeroPadToNextMultiple(PAGE_SZ),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
file.disarm_into_inner()
|
||||
};
|
||||
// Write out one page worth of zeros so that we can
|
||||
// read again with read_blk
|
||||
let (_, res) = wtr.write_blob(vec![0; PAGE_SZ].slice_len(), ctx).await;
|
||||
let offs = res?;
|
||||
println!("Writing final blob at offs={offs}");
|
||||
wtr.flush_buffer(ctx).await?;
|
||||
}
|
||||
Ok((temp_dir, pathbuf, offsets))
|
||||
}
|
||||
|
||||
async fn round_trip_test_compressed(
|
||||
async fn round_trip_test_compressed<const BUFFERED: bool>(
|
||||
blobs: &[Vec<u8>],
|
||||
compression: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
) -> Result<(), Error> {
|
||||
let ctx =
|
||||
RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();
|
||||
let (_temp_dir, pathbuf, offsets) =
|
||||
write_maybe_compressed(blobs, compression, &ctx).await?;
|
||||
write_maybe_compressed::<BUFFERED>(blobs, compression, &ctx).await?;
|
||||
|
||||
println!("Done writing!");
|
||||
let file = VirtualFile::open_v2(pathbuf, &ctx).await?;
|
||||
let file = VirtualFile::open(pathbuf, &ctx).await?;
|
||||
let rdr = BlockReaderRef::VirtualFile(&file);
|
||||
let rdr = BlockCursor::new_with_compression(rdr, compression);
|
||||
for (idx, (blob, offset)) in blobs.iter().zip(offsets.iter()).enumerate() {
|
||||
@@ -525,27 +501,30 @@ pub(crate) mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_one() -> anyhow::Result<()> {
|
||||
async fn test_one() -> Result<(), Error> {
|
||||
let blobs = &[vec![12, 21, 22]];
|
||||
round_trip_test(blobs).await?;
|
||||
round_trip_test::<false>(blobs).await?;
|
||||
round_trip_test::<true>(blobs).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_hello_simple() -> anyhow::Result<()> {
|
||||
async fn test_hello_simple() -> Result<(), Error> {
|
||||
let blobs = &[
|
||||
vec![0, 1, 2, 3],
|
||||
b"Hello, World!".to_vec(),
|
||||
Vec::new(),
|
||||
b"foobar".to_vec(),
|
||||
];
|
||||
round_trip_test(blobs).await?;
|
||||
round_trip_test_compressed(blobs, true).await?;
|
||||
round_trip_test::<false>(blobs).await?;
|
||||
round_trip_test::<true>(blobs).await?;
|
||||
round_trip_test_compressed::<false>(blobs, true).await?;
|
||||
round_trip_test_compressed::<true>(blobs, true).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_really_big_array() -> anyhow::Result<()> {
|
||||
async fn test_really_big_array() -> Result<(), Error> {
|
||||
let blobs = &[
|
||||
b"test".to_vec(),
|
||||
random_array(10 * PAGE_SZ),
|
||||
@@ -554,22 +533,25 @@ pub(crate) mod tests {
|
||||
vec![0xf3; 24 * PAGE_SZ],
|
||||
b"foobar".to_vec(),
|
||||
];
|
||||
round_trip_test(blobs).await?;
|
||||
round_trip_test_compressed(blobs, true).await?;
|
||||
round_trip_test::<false>(blobs).await?;
|
||||
round_trip_test::<true>(blobs).await?;
|
||||
round_trip_test_compressed::<false>(blobs, true).await?;
|
||||
round_trip_test_compressed::<true>(blobs, true).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_arrays_inc() -> anyhow::Result<()> {
|
||||
async fn test_arrays_inc() -> Result<(), Error> {
|
||||
let blobs = (0..PAGE_SZ / 8)
|
||||
.map(|v| random_array(v * 16))
|
||||
.collect::<Vec<_>>();
|
||||
round_trip_test(&blobs).await?;
|
||||
round_trip_test::<false>(&blobs).await?;
|
||||
round_trip_test::<true>(&blobs).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_arrays_random_size() -> anyhow::Result<()> {
|
||||
async fn test_arrays_random_size() -> Result<(), Error> {
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
let blobs = (0..1024)
|
||||
.map(|_| {
|
||||
@@ -581,18 +563,20 @@ pub(crate) mod tests {
|
||||
random_array(sz.into())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
round_trip_test(&blobs).await?;
|
||||
round_trip_test::<false>(&blobs).await?;
|
||||
round_trip_test::<true>(&blobs).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_arrays_page_boundary() -> anyhow::Result<()> {
|
||||
async fn test_arrays_page_boundary() -> Result<(), Error> {
|
||||
let blobs = &[
|
||||
random_array(PAGE_SZ - 4),
|
||||
random_array(PAGE_SZ - 4),
|
||||
random_array(PAGE_SZ - 4),
|
||||
];
|
||||
round_trip_test(blobs).await?;
|
||||
round_trip_test::<false>(blobs).await?;
|
||||
round_trip_test::<true>(blobs).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,12 +4,14 @@
|
||||
|
||||
use std::ops::Deref;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};
|
||||
use crate::context::RequestContext;
|
||||
use crate::page_cache::{self, FileId, PAGE_SZ, PageReadGuard, PageWriteGuard, ReadBufResult};
|
||||
#[cfg(test)]
|
||||
use crate::virtual_file::IoBufferMut;
|
||||
use crate::virtual_file::{IoBuffer, VirtualFile};
|
||||
use crate::virtual_file::VirtualFile;
|
||||
|
||||
/// This is implemented by anything that can read 8 kB (PAGE_SZ)
|
||||
/// blocks, using the page cache
|
||||
@@ -245,17 +247,17 @@ pub trait BlockWriter {
|
||||
/// 'buf' must be of size PAGE_SZ. Returns the block number the page was
|
||||
/// written to.
|
||||
///
|
||||
fn write_blk(&mut self, buf: IoBuffer) -> Result<u32, std::io::Error>;
|
||||
fn write_blk(&mut self, buf: Bytes) -> Result<u32, std::io::Error>;
|
||||
}
|
||||
|
||||
///
|
||||
/// A simple in-memory buffer of blocks.
|
||||
///
|
||||
pub struct BlockBuf {
|
||||
pub blocks: Vec<IoBuffer>,
|
||||
pub blocks: Vec<Bytes>,
|
||||
}
|
||||
impl BlockWriter for BlockBuf {
|
||||
fn write_blk(&mut self, buf: IoBuffer) -> Result<u32, std::io::Error> {
|
||||
fn write_blk(&mut self, buf: Bytes) -> Result<u32, std::io::Error> {
|
||||
assert!(buf.len() == PAGE_SZ);
|
||||
let blknum = self.blocks.len();
|
||||
self.blocks.push(buf);
|
||||
|
||||
@@ -25,7 +25,7 @@ use std::{io, result};
|
||||
|
||||
use async_stream::try_stream;
|
||||
use byteorder::{BE, ReadBytesExt};
|
||||
use bytes::BufMut;
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
use either::Either;
|
||||
use futures::{Stream, StreamExt};
|
||||
use hex;
|
||||
@@ -34,7 +34,6 @@ use tracing::error;
|
||||
|
||||
use crate::context::RequestContext;
|
||||
use crate::tenant::block_io::{BlockReader, BlockWriter};
|
||||
use crate::virtual_file::{IoBuffer, IoBufferMut, owned_buffers_io::write::Buffer};
|
||||
|
||||
// The maximum size of a value stored in the B-tree. 5 bytes is enough currently.
|
||||
pub const VALUE_SZ: usize = 5;
|
||||
@@ -788,12 +787,12 @@ impl<const L: usize> BuildNode<L> {
|
||||
///
|
||||
/// Serialize the node to on-disk format.
|
||||
///
|
||||
fn pack(&self) -> IoBuffer {
|
||||
fn pack(&self) -> Bytes {
|
||||
assert!(self.keys.len() == self.num_children as usize * self.suffix_len);
|
||||
assert!(self.values.len() == self.num_children as usize * VALUE_SZ);
|
||||
assert!(self.num_children > 0);
|
||||
|
||||
let mut buf = IoBufferMut::with_capacity(PAGE_SZ);
|
||||
let mut buf = BytesMut::new();
|
||||
|
||||
buf.put_u16(self.num_children);
|
||||
buf.put_u8(self.level);
|
||||
@@ -806,7 +805,7 @@ impl<const L: usize> BuildNode<L> {
|
||||
assert!(buf.len() == self.size);
|
||||
|
||||
assert!(buf.len() <= PAGE_SZ);
|
||||
buf.extend_with(0, PAGE_SZ - buf.len());
|
||||
buf.resize(PAGE_SZ, 0);
|
||||
buf.freeze()
|
||||
}
|
||||
|
||||
@@ -840,7 +839,7 @@ pub(crate) mod tests {
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub(crate) struct TestDisk {
|
||||
blocks: Vec<IoBuffer>,
|
||||
blocks: Vec<Bytes>,
|
||||
}
|
||||
impl TestDisk {
|
||||
fn new() -> Self {
|
||||
@@ -858,7 +857,7 @@ pub(crate) mod tests {
|
||||
}
|
||||
}
|
||||
impl BlockWriter for &mut TestDisk {
|
||||
fn write_blk(&mut self, buf: IoBuffer) -> io::Result<u32> {
|
||||
fn write_blk(&mut self, buf: Bytes) -> io::Result<u32> {
|
||||
let blknum = self.blocks.len();
|
||||
self.blocks.push(buf);
|
||||
Ok(blknum as u32)
|
||||
|
||||
@@ -12,7 +12,6 @@ use tokio_epoll_uring::{BoundedBuf, Slice};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info_span};
|
||||
use utils::id::TimelineId;
|
||||
use utils::sync::gate::GateGuard;
|
||||
|
||||
use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
|
||||
use crate::config::PageServerConf;
|
||||
@@ -22,33 +21,16 @@ use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
|
||||
use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
|
||||
use crate::virtual_file::owned_buffers_io::write::{Buffer, FlushTaskError};
|
||||
use crate::virtual_file::{self, IoBufferMut, TempVirtualFile, VirtualFile, owned_buffers_io};
|
||||
|
||||
use self::owned_buffers_io::write::OwnedAsyncWriter;
|
||||
use crate::virtual_file::{self, IoBufferMut, VirtualFile, owned_buffers_io};
|
||||
|
||||
pub struct EphemeralFile {
|
||||
_tenant_shard_id: TenantShardId,
|
||||
_timeline_id: TimelineId,
|
||||
page_cache_file_id: page_cache::FileId,
|
||||
bytes_written: u64,
|
||||
file: TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter,
|
||||
buffered_writer: BufferedWriter,
|
||||
}
|
||||
|
||||
type BufferedWriter = owned_buffers_io::write::BufferedWriter<
|
||||
IoBufferMut,
|
||||
TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter,
|
||||
>;
|
||||
|
||||
/// A TempVirtualFile that is co-owned by the [`EphemeralFile`]` and [`BufferedWriter`].
|
||||
///
|
||||
/// (Actually [`BufferedWriter`] internally is just a client to a background flush task.
|
||||
/// The co-ownership is between [`EphemeralFile`] and that flush task.)
|
||||
///
|
||||
/// Co-ownership allows us to serve reads for data that has already been flushed by the [`BufferedWriter`].
|
||||
#[derive(Debug, Clone)]
|
||||
struct TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {
|
||||
inner: Arc<TempVirtualFile>,
|
||||
buffered_writer: owned_buffers_io::write::BufferedWriter<IoBufferMut, VirtualFile>,
|
||||
/// Gate guard is held on as long as we need to do operations in the path (delete on drop)
|
||||
_gate_guard: utils::sync::gate::GateGuard,
|
||||
}
|
||||
|
||||
const TAIL_SZ: usize = 64 * 1024;
|
||||
@@ -62,12 +44,9 @@ impl EphemeralFile {
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<EphemeralFile> {
|
||||
// TempVirtualFile requires us to never reuse a filename while an old
|
||||
// instance of TempVirtualFile created with that filename is not done dropping yet.
|
||||
// So, we use a monotonic counter to disambiguate the filenames.
|
||||
static NEXT_TEMP_DISAMBIGUATOR: AtomicU64 = AtomicU64::new(1);
|
||||
static NEXT_FILENAME: AtomicU64 = AtomicU64::new(1);
|
||||
let filename_disambiguator =
|
||||
NEXT_TEMP_DISAMBIGUATOR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
NEXT_FILENAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
let filename = conf
|
||||
.timeline_path(&tenant_shard_id, &timeline_id)
|
||||
@@ -75,17 +54,16 @@ impl EphemeralFile {
|
||||
"ephemeral-{filename_disambiguator}"
|
||||
)));
|
||||
|
||||
let file = TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter::new(
|
||||
let file = Arc::new(
|
||||
VirtualFile::open_with_options_v2(
|
||||
&filename,
|
||||
virtual_file::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.read(true)
|
||||
.write(true),
|
||||
.write(true)
|
||||
.create(true),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
gate.enter()?,
|
||||
);
|
||||
|
||||
let page_cache_file_id = page_cache::next_file_id(); // XXX get rid, we're not page-caching anymore
|
||||
@@ -95,60 +73,37 @@ impl EphemeralFile {
|
||||
_timeline_id: timeline_id,
|
||||
page_cache_file_id,
|
||||
bytes_written: 0,
|
||||
file: file.clone(),
|
||||
buffered_writer: BufferedWriter::new(
|
||||
buffered_writer: owned_buffers_io::write::BufferedWriter::new(
|
||||
file,
|
||||
0,
|
||||
|| IoBufferMut::with_capacity(TAIL_SZ),
|
||||
gate.enter()?,
|
||||
cancel.child_token(),
|
||||
ctx,
|
||||
info_span!(parent: None, "ephemeral_file_buffered_writer", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %filename),
|
||||
),
|
||||
_gate_guard: gate.enter()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {
|
||||
fn new(file: VirtualFile, gate_guard: GateGuard) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(TempVirtualFile::new(file, gate_guard)),
|
||||
impl Drop for EphemeralFile {
|
||||
fn drop(&mut self) {
|
||||
// unlink the file
|
||||
// we are clear to do this, because we have entered a gate
|
||||
let path = self.buffered_writer.as_inner().path();
|
||||
let res = std::fs::remove_file(path);
|
||||
if let Err(e) = res {
|
||||
if e.kind() != std::io::ErrorKind::NotFound {
|
||||
// just never log the not found errors, we cannot do anything for them; on detach
|
||||
// the tenant directory is already gone.
|
||||
//
|
||||
// not found files might also be related to https://github.com/neondatabase/neon/issues/2442
|
||||
error!("could not remove ephemeral file '{path}': {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl OwnedAsyncWriter for TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {
|
||||
fn write_all_at<Buf: owned_buffers_io::io_buf_aligned::IoBufAligned + Send>(
|
||||
&self,
|
||||
buf: owned_buffers_io::io_buf_ext::FullSlice<Buf>,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> impl std::future::Future<
|
||||
Output = (
|
||||
owned_buffers_io::io_buf_ext::FullSlice<Buf>,
|
||||
std::io::Result<()>,
|
||||
),
|
||||
> + Send {
|
||||
self.inner.write_all_at(buf, offset, ctx)
|
||||
}
|
||||
|
||||
fn set_len(
|
||||
&self,
|
||||
len: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> impl Future<Output = std::io::Result<()>> + Send {
|
||||
self.inner.set_len(len, ctx)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {
|
||||
type Target = VirtualFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum EphemeralFileWriteError {
|
||||
#[error("{0}")]
|
||||
@@ -307,9 +262,9 @@ impl super::storage_layer::inmemory_layer::vectored_dio_read::File for Ephemeral
|
||||
let mutable_range = Range(std::cmp::max(start, submitted_offset), end);
|
||||
|
||||
let dst = if written_range.len() > 0 {
|
||||
let file: &VirtualFile = self.buffered_writer.as_inner();
|
||||
let bounds = dst.bounds();
|
||||
let slice = self
|
||||
.file
|
||||
let slice = file
|
||||
.read_exact_at(dst.slice(0..written_range.len().into_usize()), start, ctx)
|
||||
.await?;
|
||||
Slice::from_buf_bounds(Slice::into_inner(slice), bounds)
|
||||
@@ -501,7 +456,7 @@ mod tests {
|
||||
assert_eq!(&buf, &content[range]);
|
||||
}
|
||||
|
||||
let file_contents = std::fs::read(file.file.path()).unwrap();
|
||||
let file_contents = std::fs::read(file.buffered_writer.as_inner().path()).unwrap();
|
||||
assert!(file_contents == content[0..cap * 2]);
|
||||
|
||||
let maybe_flushed_buffer_contents = file.buffered_writer.inspect_maybe_flushed().unwrap();
|
||||
@@ -534,7 +489,7 @@ mod tests {
|
||||
// assert the state is as this test expects it to be
|
||||
let load_io_buf_res = file.load_to_io_buf(&ctx).await.unwrap();
|
||||
assert_eq!(&load_io_buf_res[..], &content[0..cap * 2 + cap / 2]);
|
||||
let md = file.file.path().metadata().unwrap();
|
||||
let md = file.buffered_writer.as_inner().path().metadata().unwrap();
|
||||
assert_eq!(
|
||||
md.len(),
|
||||
2 * cap.into_u64(),
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user