mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-20 22:50:38 +00:00
Compare commits
1 Commits
erik/bytes
...
tristan957
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1ca4975c35 |
34
.github/actions/neon-project-create/action.yml
vendored
34
.github/actions/neon-project-create/action.yml
vendored
@@ -49,10 +49,6 @@ inputs:
|
||||
description: 'A JSON object with project settings'
|
||||
required: false
|
||||
default: '{}'
|
||||
default_endpoint_settings:
|
||||
description: 'A JSON object with the default endpoint settings'
|
||||
required: false
|
||||
default: '{}'
|
||||
|
||||
outputs:
|
||||
dsn:
|
||||
@@ -70,9 +66,9 @@ runs:
|
||||
# A shell without `set -x` to not to expose password/dsn in logs
|
||||
shell: bash -euo pipefail {0}
|
||||
run: |
|
||||
res=$(curl \
|
||||
project=$(curl \
|
||||
"https://${API_HOST}/api/v2/projects" \
|
||||
-w "%{http_code}" \
|
||||
--fail \
|
||||
--header "Accept: application/json" \
|
||||
--header "Content-Type: application/json" \
|
||||
--header "Authorization: Bearer ${API_KEY}" \
|
||||
@@ -87,15 +83,6 @@ runs:
|
||||
\"settings\": ${PROJECT_SETTINGS}
|
||||
}
|
||||
}")
|
||||
|
||||
code=${res: -3}
|
||||
if [[ ${code} -ge 400 ]]; then
|
||||
echo Request failed with error code ${code}
|
||||
echo ${res::-3}
|
||||
exit 1
|
||||
else
|
||||
project=${res::-3}
|
||||
fi
|
||||
|
||||
# Mask password
|
||||
echo "::add-mask::$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .password')"
|
||||
@@ -139,22 +126,6 @@ runs:
|
||||
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
|
||||
-d "{\"scheduling\": \"Essential\"}"
|
||||
fi
|
||||
# XXX
|
||||
# This is a workaround for the default endpoint settings, which currently do not allow some settings in the public API.
|
||||
# https://github.com/neondatabase/cloud/issues/27108
|
||||
if [[ -n ${DEFAULT_ENDPOINT_SETTINGS} && ${DEFAULT_ENDPOINT_SETTINGS} != "{}" ]] ; then
|
||||
PROJECT_DATA=$(curl -X GET \
|
||||
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/projects/${project_id}" \
|
||||
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
|
||||
-d "{\"scheduling\": \"Essential\"}"
|
||||
)
|
||||
NEW_DEFAULT_ENDPOINT_SETTINGS=$(echo ${PROJECT_DATA} | jq -rc ".project.default_endpoint_settings + ${DEFAULT_ENDPOINT_SETTINGS}")
|
||||
curl -X POST --fail \
|
||||
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/projects/${project_id}/default_endpoint_settings" \
|
||||
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
|
||||
--data "${NEW_DEFAULT_ENDPOINT_SETTINGS}"
|
||||
fi
|
||||
|
||||
|
||||
env:
|
||||
API_HOST: ${{ inputs.api_host }}
|
||||
@@ -171,4 +142,3 @@ runs:
|
||||
PSQL: ${{ inputs.psql_path }}
|
||||
LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}
|
||||
PROJECT_SETTINGS: ${{ inputs.project_settings }}
|
||||
DEFAULT_ENDPOINT_SETTINGS: ${{ inputs.default_endpoint_settings }}
|
||||
|
||||
19
.github/workflows/_build-and-test-locally.yml
vendored
19
.github/workflows/_build-and-test-locally.yml
vendored
@@ -28,16 +28,6 @@ on:
|
||||
required: false
|
||||
default: 'disabled'
|
||||
type: string
|
||||
test-selection:
|
||||
description: 'specification of selected test(s) to run'
|
||||
required: false
|
||||
default: ''
|
||||
type: string
|
||||
test-run-count:
|
||||
description: 'number of runs to perform for selected tests'
|
||||
required: false
|
||||
default: 1
|
||||
type: number
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -285,7 +275,7 @@ jobs:
|
||||
for io_mode in buffered direct direct-rw ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE=$io_mode \
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOMODE=$io_mode \
|
||||
${cov_prefix} \
|
||||
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
done
|
||||
@@ -391,22 +381,21 @@ jobs:
|
||||
run_with_real_s3: true
|
||||
real_s3_bucket: neon-github-ci-tests
|
||||
real_s3_region: eu-central-1
|
||||
rerun_failed: ${{ inputs.test-run-count == 1 }}
|
||||
rerun_failed: true
|
||||
pg_version: ${{ matrix.pg_version }}
|
||||
sanitizers: ${{ inputs.sanitizers }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
# `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
|
||||
# Attempt to stop tests gracefully to generate test reports
|
||||
# until they are forcibly stopped by the stricter `timeout-minutes` limit.
|
||||
extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }} --count=${{ inputs.test-run-count }}
|
||||
${{ inputs.test-selection != '' && format('-k "{0}"', inputs.test-selection) || '' }}
|
||||
extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }}
|
||||
env:
|
||||
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
|
||||
BUILD_TAG: ${{ inputs.build-tag }}
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
|
||||
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
|
||||
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
|
||||
172
.github/workflows/build-macos.yml
vendored
172
.github/workflows/build-macos.yml
vendored
@@ -34,10 +34,11 @@ permissions:
|
||||
jobs:
|
||||
build-pgxn:
|
||||
if: |
|
||||
inputs.pg_versions != '[]' || inputs.rebuild_everything ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
strategy:
|
||||
@@ -62,8 +63,13 @@ jobs:
|
||||
|
||||
- name: Cache postgres ${{ matrix.postgres-version }} build
|
||||
id: cache_pg
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/${{ matrix.postgres-version }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
@@ -99,21 +105,13 @@ jobs:
|
||||
run: |
|
||||
make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Upload "pg_install/${{ matrix.postgres-version }}" artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: pg_install--${{ matrix.postgres-version }}
|
||||
path: pg_install/${{ matrix.postgres-version }}
|
||||
# The artifact is supposed to be used by the next job in the same workflow,
|
||||
# so there’s no need to store it for too long.
|
||||
retention-days: 1
|
||||
|
||||
build-walproposer-lib:
|
||||
if: |
|
||||
inputs.pg_versions != '[]' || inputs.rebuild_everything ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
needs: [build-pgxn]
|
||||
@@ -134,16 +132,27 @@ jobs:
|
||||
id: pg_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Download "pg_install/v17" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
name: pg_install--v17
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
@@ -169,21 +178,13 @@ jobs:
|
||||
run:
|
||||
make walproposer-lib -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Upload "pg_install/build/walproposer-lib" artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: pg_install--build--walproposer-lib
|
||||
path: pg_install/build/walproposer-lib
|
||||
# The artifact is supposed to be used by the next job in the same workflow,
|
||||
# so there’s no need to store it for too long.
|
||||
retention-days: 1
|
||||
|
||||
cargo-build:
|
||||
if: |
|
||||
inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
needs: [build-pgxn, build-walproposer-lib]
|
||||
@@ -202,45 +203,72 @@ jobs:
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Download "pg_install/v14" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
- name: Set pg v14 for caching
|
||||
id: pg_rev_v14
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v15 for caching
|
||||
id: pg_rev_v15
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v16 for caching
|
||||
id: pg_rev_v16
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v17 for caching
|
||||
id: pg_rev_v17
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
name: pg_install--v14
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v14
|
||||
|
||||
- name: Download "pg_install/v15" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v14-${{ steps.pg_rev_v14.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_v15
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
name: pg_install--v15
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v15
|
||||
|
||||
- name: Download "pg_install/v16" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v15-${{ steps.pg_rev_v15.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_v16
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
name: pg_install--v16
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v16
|
||||
|
||||
- name: Download "pg_install/v17" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v16-${{ steps.pg_rev_v16.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_v17
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
name: pg_install--v17
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Download "pg_install/build/walproposer-lib" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: pg_install--build--walproposer-lib
|
||||
path: pg_install/build/walproposer-lib
|
||||
|
||||
# `actions/download-artifact` doesn't preserve permissions:
|
||||
# https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
|
||||
- name: Make pg_install/v*/bin/* executable
|
||||
run: |
|
||||
chmod +x pg_install/v*/bin/*
|
||||
|
||||
- name: Cache cargo deps
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
- name: Cache cargo deps (only for v17)
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
!~/.cargo/registry/src
|
||||
@@ -248,6 +276,18 @@ jobs:
|
||||
target
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1 # v1.8.0
|
||||
with:
|
||||
endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
|
||||
bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
|
||||
accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
brew install flex bison openssl protobuf icu4c
|
||||
@@ -257,8 +297,8 @@ jobs:
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Run cargo build
|
||||
- name: Run cargo build (only for v17)
|
||||
run: cargo build --all --release -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Check that no warnings are produced
|
||||
- name: Check that no warnings are produced (only for v17)
|
||||
run: ./run_clippy.sh
|
||||
|
||||
120
.github/workflows/build_and_run_selected_test.yml
vendored
120
.github/workflows/build_and_run_selected_test.yml
vendored
@@ -1,120 +0,0 @@
|
||||
name: Build and Run Selected Test
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
test-selection:
|
||||
description: 'Specification of selected test(s), as accepted by pytest -k'
|
||||
required: true
|
||||
type: string
|
||||
run-count:
|
||||
description: 'Number of test runs to perform'
|
||||
required: true
|
||||
type: number
|
||||
archs:
|
||||
description: 'Archs to run tests on, e. g.: ["x64", "arm64"]'
|
||||
default: '["x64"]'
|
||||
required: true
|
||||
type: string
|
||||
build-types:
|
||||
description: 'Build types to run tests on, e. g.: ["debug", "release"]'
|
||||
default: '["release"]'
|
||||
required: true
|
||||
type: string
|
||||
pg-versions:
|
||||
description: 'Postgres versions to use for testing, e.g,: [{"pg_version":"v16"}, {"pg_version":"v17"}])'
|
||||
default: '[{"pg_version":"v17"}]'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
COPT: '-Werror'
|
||||
|
||||
jobs:
|
||||
meta:
|
||||
uses: ./.github/workflows/_meta.yml
|
||||
with:
|
||||
github-event-name: ${{ github.event_name }}
|
||||
github-event-json: ${{ toJSON(github.event) }}
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ meta ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: ${{ fromJson(inputs.archs) }}
|
||||
build-type: ${{ fromJson(inputs.build-types) }}
|
||||
uses: ./.github/workflows/_build-and-test-locally.yml
|
||||
with:
|
||||
arch: ${{ matrix.arch }}
|
||||
build-tools-image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
build-tag: ${{ needs.meta.outputs.build-tag }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
test-cfg: ${{ inputs.pg-versions }}
|
||||
test-selection: ${{ inputs.test-selection }}
|
||||
test-run-count: ${{ fromJson(inputs.run-count) }}
|
||||
secrets: inherit
|
||||
|
||||
create-test-report:
|
||||
needs: [ build-and-test-locally ]
|
||||
if: ${{ !cancelled() }}
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
outputs:
|
||||
report-url: ${{ steps.create-allure-report.outputs.report-url }}
|
||||
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
id: create-allure-report
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_DEV }}
|
||||
|
||||
- uses: actions/github-script@v7
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
|
||||
retries: 5
|
||||
script: |
|
||||
const report = {
|
||||
reportUrl: "${{ steps.create-allure-report.outputs.report-url }}",
|
||||
reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
|
||||
}
|
||||
|
||||
const coverage = {}
|
||||
|
||||
const script = require("./scripts/comment-test-report.js")
|
||||
await script({
|
||||
github,
|
||||
context,
|
||||
fetch,
|
||||
report,
|
||||
coverage,
|
||||
})
|
||||
4
.github/workflows/build_and_test.yml
vendored
4
.github/workflows/build_and_test.yml
vendored
@@ -324,7 +324,7 @@ jobs:
|
||||
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
|
||||
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
|
||||
SYNC_BETWEEN_TESTS: true
|
||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||
# while coverage is currently collected for the debug ones
|
||||
@@ -1238,7 +1238,7 @@ jobs:
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
TIMEOUT=5400 # 90 minutes, usually it takes ~2-3 minutes, but if runners are busy, it might take longer
|
||||
TIMEOUT=1800 # 30 minutes, usually it takes ~2-3 minutes, but if runners are busy, it might take longer
|
||||
INTERVAL=15 # try each N seconds
|
||||
|
||||
last_status="" # a variable to carry the last status of the "build-and-upload-extensions" context
|
||||
|
||||
2
.github/workflows/check-permissions.yml
vendored
2
.github/workflows/check-permissions.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@v2
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@v2
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
|
||||
112
.github/workflows/cloud-extensions.yml
vendored
112
.github/workflows/cloud-extensions.yml
vendored
@@ -1,112 +0,0 @@
|
||||
name: Cloud Extensions Test
|
||||
on:
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '45 1 * * *' # run once a day, timezone is utc
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
inputs:
|
||||
region_id:
|
||||
description: 'Project region id. If not set, the default region will be used'
|
||||
required: false
|
||||
default: 'aws-us-east-2'
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
regress:
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
pg-version: [16, 17]
|
||||
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
# We use the neon-test-extensions image here as it contains the source code for the extensions.
|
||||
image: ghcr.io/neondatabase/neon-test-extensions-v${{ matrix.pg-version }}:latest
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Evaluate the settings
|
||||
id: project-settings
|
||||
run: |
|
||||
if [[ $((${{ matrix.pg-version }})) -lt 17 ]]; then
|
||||
ULID=ulid
|
||||
else
|
||||
ULID=pgx_ulid
|
||||
fi
|
||||
LIBS=timescaledb:rag_bge_small_en_v15,rag_jina_reranker_v1_tiny_en:$ULID
|
||||
settings=$(jq -c -n --arg libs $LIBS '{preload_libraries:{use_defaults:false,enabled_libraries:($libs| split(":"))}}')
|
||||
echo settings=$settings >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
region_id: ${{ inputs.region_id }}
|
||||
postgres_version: ${{ matrix.pg-version }}
|
||||
project_settings: ${{ steps.project-settings.outputs.settings }}
|
||||
# We need these settings to get the expected output results.
|
||||
# We cannot use the environment variables e.g. PGTZ due to
|
||||
# https://github.com/neondatabase/neon/issues/1287
|
||||
default_endpoint_settings: >
|
||||
{
|
||||
"pg_settings": {
|
||||
"DateStyle": "Postgres,MDY",
|
||||
"TimeZone": "America/Los_Angeles",
|
||||
"compute_query_id": "off",
|
||||
"neon.allow_unstable_extensions": "on"
|
||||
}
|
||||
}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
|
||||
|
||||
- name: Run the regression tests
|
||||
run: /run-tests.sh -r /ext-src
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
SKIP: "pg_hint_plan-src,pg_repack-src,pg_cron-src,plpgsql_check-src"
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: ${{ always() }}
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
project_id: ${{ steps.create-neon-project.outputs.project_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
|
||||
with:
|
||||
channel-id: ${{ vars.SLACK_ON_CALL_QA_STAGING_STREAM }}
|
||||
slack-message: |
|
||||
Periodic extensions test on staging: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
2
.github/workflows/fast-forward.yml
vendored
2
.github/workflows/fast-forward.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@v2
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@v2
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@v2
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
|
||||
4
.github/workflows/neon_extra_builds.yml
vendored
4
.github/workflows/neon_extra_builds.yml
vendored
@@ -69,6 +69,10 @@ jobs:
|
||||
|
||||
check-macos-build:
|
||||
needs: [ check-permissions, files-changed ]
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
uses: ./.github/workflows/build-macos.yml
|
||||
with:
|
||||
pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
|
||||
|
||||
2
.github/workflows/pin-build-tools-image.yml
vendored
2
.github/workflows/pin-build-tools-image.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@v2
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
|
||||
4
.github/workflows/trigger-e2e-tests.yml
vendored
4
.github/workflows/trigger-e2e-tests.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@v2
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
@@ -73,7 +73,7 @@ jobs:
|
||||
}}
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
|
||||
uses: step-security/harden-runner@v2
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
|
||||
83
Cargo.lock
generated
83
Cargo.lock
generated
@@ -40,7 +40,7 @@ dependencies = [
|
||||
"getrandom 0.2.11",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
"zerocopy 0.7.31",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1323,6 +1323,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"signal-hook",
|
||||
"spki 0.7.3",
|
||||
"tar",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
@@ -4301,7 +4302,6 @@ dependencies = [
|
||||
"remote_storage",
|
||||
"reqwest",
|
||||
"rpds",
|
||||
"rstest",
|
||||
"rustls 0.23.18",
|
||||
"scopeguard",
|
||||
"send-future",
|
||||
@@ -4415,9 +4415,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "papaya"
|
||||
version = "0.2.1"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6827e3fc394523c21d4464d02c0bb1c19966ea4a58a9844ad6d746214179d2bc"
|
||||
checksum = "aab21828b6b5952fdadd6c377728ffae53ec3a21b2febc47319ab65741f7e2fd"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"seize",
|
||||
@@ -5204,7 +5204,7 @@ dependencies = [
|
||||
"walkdir",
|
||||
"workspace_hack",
|
||||
"x509-cert",
|
||||
"zerocopy 0.8.24",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5594,7 +5594,7 @@ dependencies = [
|
||||
"wasm-bindgen-futures",
|
||||
"wasm-streams",
|
||||
"web-sys",
|
||||
"webpki-roots",
|
||||
"webpki-roots 0.26.1",
|
||||
"winreg",
|
||||
]
|
||||
|
||||
@@ -6195,13 +6195,13 @@ checksum = "224e328af6e080cddbab3c770b1cf50f0351ba0577091ef2410c3951d835ff87"
|
||||
|
||||
[[package]]
|
||||
name = "sentry"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "255914a8e53822abd946e2ce8baa41d4cded6b8e938913b7f7b9da5b7ab44335"
|
||||
checksum = "00421ed8fa0c995f07cde48ba6c89e80f2b312f74ff637326f392fbfd23abe02"
|
||||
dependencies = [
|
||||
"httpdate",
|
||||
"reqwest",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.21.12",
|
||||
"sentry-backtrace",
|
||||
"sentry-contexts",
|
||||
"sentry-core",
|
||||
@@ -6209,14 +6209,14 @@ dependencies = [
|
||||
"sentry-tracing",
|
||||
"tokio",
|
||||
"ureq",
|
||||
"webpki-roots",
|
||||
"webpki-roots 0.25.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-backtrace"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "00293cd332a859961f24fd69258f7e92af736feaeb91020cff84dac4188a4302"
|
||||
checksum = "a79194074f34b0cbe5dd33896e5928bbc6ab63a889bd9df2264af5acb186921e"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"once_cell",
|
||||
@@ -6226,9 +6226,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-contexts"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "961990f9caa76476c481de130ada05614cd7f5aa70fb57c2142f0e09ad3fb2aa"
|
||||
checksum = "eba8870c5dba2bfd9db25c75574a11429f6b95957b0a78ac02e2970dd7a5249a"
|
||||
dependencies = [
|
||||
"hostname",
|
||||
"libc",
|
||||
@@ -6240,9 +6240,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-core"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a6409d845707d82415c800290a5d63be5e3df3c2e417b0997c60531dfbd35ef"
|
||||
checksum = "46a75011ea1c0d5c46e9e57df03ce81f5c7f0a9e199086334a1f9c0a541e0826"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"rand 0.8.5",
|
||||
@@ -6253,9 +6253,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-panic"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "609b1a12340495ce17baeec9e08ff8ed423c337c1a84dffae36a178c783623f3"
|
||||
checksum = "2eaa3ecfa3c8750c78dcfd4637cfa2598b95b52897ed184b4dc77fcf7d95060d"
|
||||
dependencies = [
|
||||
"sentry-backtrace",
|
||||
"sentry-core",
|
||||
@@ -6263,9 +6263,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-tracing"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49f4e86402d5c50239dc7d8fd3f6d5e048221d5fcb4e026d8d50ab57fe4644cb"
|
||||
checksum = "f715932bf369a61b7256687c6f0554141b7ce097287e30e3f7ed6e9de82498fe"
|
||||
dependencies = [
|
||||
"sentry-backtrace",
|
||||
"sentry-core",
|
||||
@@ -6275,9 +6275,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sentry-types"
|
||||
version = "0.37.0"
|
||||
version = "0.32.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d3f117b8755dbede8260952de2aeb029e20f432e72634e8969af34324591631"
|
||||
checksum = "4519c900ce734f7a0eb7aba0869dfb225a7af8820634a7dd51449e3b093cfb7c"
|
||||
dependencies = [
|
||||
"debugid",
|
||||
"hex",
|
||||
@@ -6616,14 +6616,12 @@ dependencies = [
|
||||
"anyhow",
|
||||
"async-stream",
|
||||
"bytes",
|
||||
"camino",
|
||||
"clap",
|
||||
"const_format",
|
||||
"futures",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http-body-util",
|
||||
"http-utils",
|
||||
"humantime",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
@@ -6633,7 +6631,6 @@ dependencies = [
|
||||
"prost 0.13.3",
|
||||
"rustls 0.23.18",
|
||||
"tokio",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
"tracing",
|
||||
@@ -6714,6 +6711,8 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-stream",
|
||||
"aws-config",
|
||||
"aws-sdk-s3",
|
||||
"camino",
|
||||
"chrono",
|
||||
"clap",
|
||||
@@ -7802,7 +7801,7 @@ dependencies = [
|
||||
"rustls 0.23.18",
|
||||
"rustls-pki-types",
|
||||
"url",
|
||||
"webpki-roots",
|
||||
"webpki-roots 0.26.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8170,6 +8169,12 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.25.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc"
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.26.1"
|
||||
@@ -8477,8 +8482,6 @@ dependencies = [
|
||||
"regex-syntax 0.8.2",
|
||||
"reqwest",
|
||||
"rustls 0.23.18",
|
||||
"rustls-pki-types",
|
||||
"rustls-webpki 0.102.8",
|
||||
"scopeguard",
|
||||
"sec1 0.7.3",
|
||||
"serde",
|
||||
@@ -8507,6 +8510,7 @@ dependencies = [
|
||||
"tracing-log",
|
||||
"url",
|
||||
"uuid",
|
||||
"zerocopy",
|
||||
"zeroize",
|
||||
"zstd",
|
||||
"zstd-safe",
|
||||
@@ -8610,16 +8614,8 @@ version = "0.7.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d"
|
||||
dependencies = [
|
||||
"zerocopy-derive 0.7.31",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.8.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879"
|
||||
dependencies = [
|
||||
"zerocopy-derive 0.8.24",
|
||||
"byteorder",
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8633,17 +8629,6 @@ dependencies = [
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.8.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerofrom"
|
||||
version = "0.1.5"
|
||||
|
||||
@@ -164,7 +164,7 @@ scopeguard = "1.1"
|
||||
sysinfo = "0.29.2"
|
||||
sd-notify = "0.4.1"
|
||||
send-future = "0.1.0"
|
||||
sentry = { version = "0.37", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
|
||||
sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_path_to_error = "0.1"
|
||||
@@ -220,7 +220,7 @@ uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
|
||||
walkdir = "2.3.2"
|
||||
rustls-native-certs = "0.8"
|
||||
whoami = "1.5.1"
|
||||
zerocopy = { version = "0.8", features = ["derive", "simd"] }
|
||||
zerocopy = { version = "0.7", features = ["derive"] }
|
||||
json-structural-diff = { version = "0.2.0" }
|
||||
x509-cert = { version = "0.2.5" }
|
||||
|
||||
|
||||
@@ -173,7 +173,7 @@ RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v$
|
||||
&& rm -rf protoc.zip protoc
|
||||
|
||||
# s5cmd
|
||||
ENV S5CMD_VERSION=2.3.0
|
||||
ENV S5CMD_VERSION=2.2.2
|
||||
RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/s5cmd_${S5CMD_VERSION}_Linux-$(uname -m | sed 's/x86_64/64bit/g' | sed 's/aarch64/arm64/g').tar.gz" | tar zxvf - s5cmd \
|
||||
&& chmod +x s5cmd \
|
||||
&& mv s5cmd /usr/local/bin/s5cmd
|
||||
@@ -206,7 +206,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
|
||||
&& rm awscliv2.zip
|
||||
|
||||
# Mold: A Modern Linker
|
||||
ENV MOLD_VERSION=v2.37.1
|
||||
ENV MOLD_VERSION=v2.34.1
|
||||
RUN set -e \
|
||||
&& git clone https://github.com/rui314/mold.git \
|
||||
&& mkdir mold/build \
|
||||
@@ -268,7 +268,7 @@ WORKDIR /home/nonroot
|
||||
RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /home/nonroot/.curlrc
|
||||
|
||||
# Python
|
||||
ENV PYTHON_VERSION=3.11.12 \
|
||||
ENV PYTHON_VERSION=3.11.10 \
|
||||
PYENV_ROOT=/home/nonroot/.pyenv \
|
||||
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
|
||||
RUN set -e \
|
||||
@@ -296,12 +296,12 @@ ENV RUSTC_VERSION=1.86.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
ARG CARGO_HAKARI_VERSION=0.9.36
|
||||
ARG CARGO_DENY_VERSION=0.18.2
|
||||
ARG CARGO_HACK_VERSION=0.6.36
|
||||
ARG CARGO_NEXTEST_VERSION=0.9.94
|
||||
ARG CARGO_HAKARI_VERSION=0.9.33
|
||||
ARG CARGO_DENY_VERSION=0.16.2
|
||||
ARG CARGO_HACK_VERSION=0.6.33
|
||||
ARG CARGO_NEXTEST_VERSION=0.9.85
|
||||
ARG CARGO_CHEF_VERSION=0.1.71
|
||||
ARG CARGO_DIESEL_CLI_VERSION=2.2.9
|
||||
ARG CARGO_DIESEL_CLI_VERSION=2.2.6
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
|
||||
chmod +x rustup-init && \
|
||||
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
|
||||
|
||||
@@ -1800,8 +1800,8 @@ COPY compute/patches/pg_repack.patch /ext-src
|
||||
RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /ext-src/pg_repack.patch
|
||||
|
||||
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
||||
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq \
|
||||
&& apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*
|
||||
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl\
|
||||
&& apt clean && rm -rf /ext-src/*.tar.gz /var/lib/apt/lists/*
|
||||
ENV PATH=/usr/local/pgsql/bin:$PATH
|
||||
ENV PGHOST=compute
|
||||
ENV PGPORT=55433
|
||||
|
||||
@@ -44,6 +44,7 @@ serde.workspace = true
|
||||
serde_with.workspace = true
|
||||
serde_json.workspace = true
|
||||
signal-hook.workspace = true
|
||||
spki = { version = "0.7.3", features = ["std"] }
|
||||
tar.workspace = true
|
||||
tower.workspace = true
|
||||
tower-http.workspace = true
|
||||
|
||||
@@ -57,24 +57,13 @@ use tracing::{error, info};
|
||||
use url::Url;
|
||||
use utils::failpoint_support;
|
||||
|
||||
// Compatibility hack: if the control plane specified any remote-ext-config
|
||||
// use the default value for extension storage proxy gateway.
|
||||
// Remove this once the control plane is updated to pass the gateway URL
|
||||
fn parse_remote_ext_config(arg: &str) -> Result<String> {
|
||||
if arg.starts_with("http") {
|
||||
Ok(arg.trim_end_matches('/').to_string())
|
||||
} else {
|
||||
Ok("http://pg-ext-s3-gateway".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(rename_all = "kebab-case")]
|
||||
struct Cli {
|
||||
#[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
|
||||
pub pgbin: String,
|
||||
|
||||
#[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
|
||||
#[arg(short = 'r', long)]
|
||||
pub remote_ext_config: Option<String>,
|
||||
|
||||
/// The port to bind the external listening HTTP server to. Clients running
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use metrics::core::{AtomicF64, AtomicU64, Collector, GenericCounter, GenericGauge};
|
||||
use metrics::core::{AtomicF64, Collector, GenericGauge};
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::{
|
||||
IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter,
|
||||
register_int_counter_vec, register_int_gauge_vec, register_uint_gauge_vec,
|
||||
IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter_vec,
|
||||
register_int_gauge_vec, register_uint_gauge_vec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
@@ -81,22 +81,6 @@ pub(crate) static COMPUTE_CTL_UP: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static PG_CURR_DOWNTIME_MS: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {
|
||||
register_gauge!(
|
||||
"compute_pg_current_downtime_ms",
|
||||
"Non-cumulative duration of Postgres downtime in ms; resets after successful check",
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static PG_TOTAL_DOWNTIME_MS: Lazy<GenericCounter<AtomicU64>> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"compute_pg_downtime_ms_total",
|
||||
"Cumulative duration of Postgres downtime in ms",
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub fn collect() -> Vec<MetricFamily> {
|
||||
let mut metrics = COMPUTE_CTL_UP.collect();
|
||||
metrics.extend(INSTALLED_EXTENSIONS.collect());
|
||||
@@ -104,7 +88,5 @@ pub fn collect() -> Vec<MetricFamily> {
|
||||
metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
|
||||
metrics.extend(DB_MIGRATION_FAILED.collect());
|
||||
metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
|
||||
metrics.extend(PG_CURR_DOWNTIME_MS.collect());
|
||||
metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
|
||||
metrics
|
||||
}
|
||||
|
||||
@@ -6,294 +6,197 @@ use chrono::{DateTime, Utc};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::ComputeFeature;
|
||||
use postgres::{Client, NoTls};
|
||||
use tracing::{Level, error, info, instrument, span};
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::metrics::{PG_CURR_DOWNTIME_MS, PG_TOTAL_DOWNTIME_MS};
|
||||
|
||||
const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
|
||||
|
||||
struct ComputeMonitor {
|
||||
compute: Arc<ComputeNode>,
|
||||
// Spin in a loop and figure out the last activity time in the Postgres.
|
||||
// Then update it in the shared state. This function never errors out.
|
||||
// NB: the only expected panic is at `Mutex` unwrap(), all other errors
|
||||
// should be handled gracefully.
|
||||
fn watch_compute_activity(compute: &ComputeNode) {
|
||||
// Suppose that `connstr` doesn't change
|
||||
let connstr = compute.params.connstr.clone();
|
||||
let conf = compute.get_conn_conf(Some("compute_ctl:activity_monitor"));
|
||||
|
||||
/// The moment when Postgres had some activity,
|
||||
/// that should prevent compute from being suspended.
|
||||
last_active: Option<DateTime<Utc>>,
|
||||
// During startup and configuration we connect to every Postgres database,
|
||||
// but we don't want to count this as some user activity. So wait until
|
||||
// the compute fully started before monitoring activity.
|
||||
wait_for_postgres_start(compute);
|
||||
|
||||
/// The moment when we last tried to check Postgres.
|
||||
last_checked: DateTime<Utc>,
|
||||
/// The last moment we did a successful Postgres check.
|
||||
last_up: DateTime<Utc>,
|
||||
// Define `client` outside of the loop to reuse existing connection if it's active.
|
||||
let mut client = conf.connect(NoTls);
|
||||
|
||||
/// Only used for internal statistics change tracking
|
||||
/// between monitor runs and can be outdated.
|
||||
active_time: Option<f64>,
|
||||
/// Only used for internal statistics change tracking
|
||||
/// between monitor runs and can be outdated.
|
||||
sessions: Option<i64>,
|
||||
let mut sleep = false;
|
||||
let mut prev_active_time: Option<f64> = None;
|
||||
let mut prev_sessions: Option<i64> = None;
|
||||
|
||||
/// Use experimental statistics-based activity monitor. It's no longer
|
||||
/// 'experimental' per se, as it's enabled for everyone, but we still
|
||||
/// keep the flag as an option to turn it off in some cases if it will
|
||||
/// misbehave.
|
||||
experimental: bool,
|
||||
}
|
||||
|
||||
impl ComputeMonitor {
|
||||
fn report_down(&self) {
|
||||
let now = Utc::now();
|
||||
|
||||
// Calculate and report current downtime
|
||||
// (since the last time Postgres was up)
|
||||
let downtime = now.signed_duration_since(self.last_up);
|
||||
PG_CURR_DOWNTIME_MS.set(downtime.num_milliseconds() as f64);
|
||||
|
||||
// Calculate and update total downtime
|
||||
// (cumulative duration of Postgres downtime in ms)
|
||||
let inc = now
|
||||
.signed_duration_since(self.last_checked)
|
||||
.num_milliseconds();
|
||||
PG_TOTAL_DOWNTIME_MS.inc_by(inc as u64);
|
||||
if compute.has_feature(ComputeFeature::ActivityMonitorExperimental) {
|
||||
info!("starting experimental activity monitor for {}", connstr);
|
||||
} else {
|
||||
info!("starting activity monitor for {}", connstr);
|
||||
}
|
||||
|
||||
fn report_up(&mut self) {
|
||||
self.last_up = Utc::now();
|
||||
PG_CURR_DOWNTIME_MS.set(0.0);
|
||||
}
|
||||
|
||||
fn downtime_info(&self) -> String {
|
||||
format!(
|
||||
"total_ms: {}, current_ms: {}, last_up: {}",
|
||||
PG_TOTAL_DOWNTIME_MS.get(),
|
||||
PG_CURR_DOWNTIME_MS.get(),
|
||||
self.last_up
|
||||
)
|
||||
}
|
||||
|
||||
/// Spin in a loop and figure out the last activity time in the Postgres.
|
||||
/// Then update it in the shared state. This function never errors out.
|
||||
/// NB: the only expected panic is at `Mutex` unwrap(), all other errors
|
||||
/// should be handled gracefully.
|
||||
#[instrument(skip_all)]
|
||||
pub fn run(&mut self) {
|
||||
// Suppose that `connstr` doesn't change
|
||||
let connstr = self.compute.params.connstr.clone();
|
||||
let conf = self
|
||||
.compute
|
||||
.get_conn_conf(Some("compute_ctl:compute_monitor"));
|
||||
|
||||
// During startup and configuration we connect to every Postgres database,
|
||||
// but we don't want to count this as some user activity. So wait until
|
||||
// the compute fully started before monitoring activity.
|
||||
wait_for_postgres_start(&self.compute);
|
||||
|
||||
// Define `client` outside of the loop to reuse existing connection if it's active.
|
||||
let mut client = conf.connect(NoTls);
|
||||
|
||||
info!("starting compute monitor for {}", connstr);
|
||||
|
||||
loop {
|
||||
match &mut client {
|
||||
Ok(cli) => {
|
||||
if cli.is_closed() {
|
||||
info!(
|
||||
downtime_info = self.downtime_info(),
|
||||
"connection to Postgres is closed, trying to reconnect"
|
||||
);
|
||||
self.report_down();
|
||||
|
||||
// Connection is closed, reconnect and try again.
|
||||
client = conf.connect(NoTls);
|
||||
} else {
|
||||
match self.check(cli) {
|
||||
Ok(_) => {
|
||||
self.report_up();
|
||||
self.compute.update_last_active(self.last_active);
|
||||
}
|
||||
Err(e) => {
|
||||
// Although we have many places where we can return errors in `check()`,
|
||||
// normally it shouldn't happen. I.e., we will likely return error if
|
||||
// connection got broken, query timed out, Postgres returned invalid data, etc.
|
||||
// In all such cases it's suspicious, so let's report this as downtime.
|
||||
self.report_down();
|
||||
error!(
|
||||
downtime_info = self.downtime_info(),
|
||||
"could not check Postgres: {}", e
|
||||
);
|
||||
|
||||
// Reconnect to Postgres just in case. During tests, I noticed
|
||||
// that queries in `check()` can fail with `connection closed`,
|
||||
// but `cli.is_closed()` above doesn't detect it. Even if old
|
||||
// connection is still alive, it will be dropped when we reassign
|
||||
// `client` to a new connection.
|
||||
client = conf.connect(NoTls);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
info!(
|
||||
downtime_info = self.downtime_info(),
|
||||
"could not connect to Postgres: {}, retrying", e
|
||||
);
|
||||
self.report_down();
|
||||
|
||||
// Establish a new connection and try again.
|
||||
client = conf.connect(NoTls);
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the `last_checked` timestamp and sleep before the next iteration.
|
||||
self.last_checked = Utc::now();
|
||||
loop {
|
||||
// We use `continue` a lot, so it's more convenient to sleep at the top of the loop.
|
||||
// But skip the first sleep, so we can connect to Postgres immediately.
|
||||
if sleep {
|
||||
// Should be outside of the mutex lock to allow others to read while we sleep.
|
||||
thread::sleep(MONITOR_CHECK_INTERVAL);
|
||||
} else {
|
||||
sleep = true;
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
fn check(&mut self, cli: &mut Client) -> anyhow::Result<()> {
|
||||
// This is new logic, only enable if the feature flag is set.
|
||||
// TODO: remove this once we are sure that it works OR drop it altogether.
|
||||
if self.experimental {
|
||||
// Check if the total active time or sessions across all databases has changed.
|
||||
// If it did, it means that user executed some queries. In theory, it can even go down if
|
||||
// some databases were dropped, but it's still user activity.
|
||||
match get_database_stats(cli) {
|
||||
Ok((active_time, sessions)) => {
|
||||
let mut detected_activity = false;
|
||||
match &mut client {
|
||||
Ok(cli) => {
|
||||
if cli.is_closed() {
|
||||
info!("connection to Postgres is closed, trying to reconnect");
|
||||
|
||||
if let Some(prev_active_time) = self.active_time {
|
||||
if active_time != prev_active_time {
|
||||
detected_activity = true;
|
||||
// Connection is closed, reconnect and try again.
|
||||
client = conf.connect(NoTls);
|
||||
continue;
|
||||
}
|
||||
|
||||
// This is a new logic, only enable if the feature flag is set.
|
||||
// TODO: remove this once we are sure that it works OR drop it altogether.
|
||||
if compute.has_feature(ComputeFeature::ActivityMonitorExperimental) {
|
||||
// First, check if the total active time or sessions across all databases has changed.
|
||||
// If it did, it means that user executed some queries. In theory, it can even go down if
|
||||
// some databases were dropped, but it's still a user activity.
|
||||
match get_database_stats(cli) {
|
||||
Ok((active_time, sessions)) => {
|
||||
let mut detected_activity = false;
|
||||
|
||||
prev_active_time = match prev_active_time {
|
||||
Some(prev_active_time) => {
|
||||
if active_time != prev_active_time {
|
||||
detected_activity = true;
|
||||
}
|
||||
Some(active_time)
|
||||
}
|
||||
None => Some(active_time),
|
||||
};
|
||||
prev_sessions = match prev_sessions {
|
||||
Some(prev_sessions) => {
|
||||
if sessions != prev_sessions {
|
||||
detected_activity = true;
|
||||
}
|
||||
Some(sessions)
|
||||
}
|
||||
None => Some(sessions),
|
||||
};
|
||||
|
||||
if detected_activity {
|
||||
// Update the last active time and continue, we don't need to
|
||||
// check backends state change.
|
||||
compute.update_last_active(Some(Utc::now()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("could not get database statistics: {}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
self.active_time = Some(active_time);
|
||||
}
|
||||
|
||||
if let Some(prev_sessions) = self.sessions {
|
||||
if sessions != prev_sessions {
|
||||
detected_activity = true;
|
||||
// Second, if database statistics is the same, check all backends state change,
|
||||
// maybe there is some with more recent activity. `get_backends_state_change()`
|
||||
// can return None or stale timestamp, so it's `compute.update_last_active()`
|
||||
// responsibility to check if the new timestamp is more recent than the current one.
|
||||
// This helps us to discover new sessions, that did nothing yet.
|
||||
match get_backends_state_change(cli) {
|
||||
Ok(last_active) => {
|
||||
compute.update_last_active(last_active);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("could not get backends state change: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, if there are existing (logical) walsenders, do not suspend.
|
||||
//
|
||||
// walproposer doesn't currently show up in pg_stat_replication,
|
||||
// but protect if it will be
|
||||
let ws_count_query = "select count(*) from pg_stat_replication where application_name != 'walproposer';";
|
||||
match cli.query_one(ws_count_query, &[]) {
|
||||
Ok(r) => match r.try_get::<&str, i64>("count") {
|
||||
Ok(num_ws) => {
|
||||
if num_ws > 0 {
|
||||
compute.update_last_active(Some(Utc::now()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
self.sessions = Some(sessions);
|
||||
|
||||
if detected_activity {
|
||||
// Update the last active time and continue, we don't need to
|
||||
// check backends state change.
|
||||
self.last_active = Some(Utc::now());
|
||||
return Ok(());
|
||||
Err(e) => {
|
||||
warn!("failed to parse walsenders count: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
warn!("failed to get list of walsenders: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!("could not get database statistics: {}", e));
|
||||
//
|
||||
// Don't suspend compute if there is an active logical replication subscription
|
||||
//
|
||||
// `where pid is not null` – to filter out read only computes and subscription on branches
|
||||
//
|
||||
let logical_subscriptions_query =
|
||||
"select count(*) from pg_stat_subscription where pid is not null;";
|
||||
match cli.query_one(logical_subscriptions_query, &[]) {
|
||||
Ok(row) => match row.try_get::<&str, i64>("count") {
|
||||
Ok(num_subscribers) => {
|
||||
if num_subscribers > 0 {
|
||||
compute.update_last_active(Some(Utc::now()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("failed to parse `pg_stat_subscription` count: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"failed to get list of active logical replication subscriptions: {:?}",
|
||||
e
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
//
|
||||
// Do not suspend compute if autovacuum is running
|
||||
//
|
||||
let autovacuum_count_query = "select count(*) from pg_stat_activity where backend_type = 'autovacuum worker'";
|
||||
match cli.query_one(autovacuum_count_query, &[]) {
|
||||
Ok(r) => match r.try_get::<&str, i64>("count") {
|
||||
Ok(num_workers) => {
|
||||
if num_workers > 0 {
|
||||
compute.update_last_active(Some(Utc::now()));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("failed to parse autovacuum workers count: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
warn!("failed to get list of autovacuum workers: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If database statistics are the same, check all backends for state changes.
|
||||
// Maybe there are some with more recent activity. `get_backends_state_change()`
|
||||
// can return None or stale timestamp, so it's `compute.update_last_active()`
|
||||
// responsibility to check if the new timestamp is more recent than the current one.
|
||||
// This helps us to discover new sessions that have not done anything yet.
|
||||
match get_backends_state_change(cli) {
|
||||
Ok(last_active) => match (last_active, self.last_active) {
|
||||
(Some(last_active), Some(prev_last_active)) => {
|
||||
if last_active > prev_last_active {
|
||||
self.last_active = Some(last_active);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
(Some(last_active), None) => {
|
||||
self.last_active = Some(last_active);
|
||||
return Ok(());
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"could not get backends state change: {}",
|
||||
e
|
||||
));
|
||||
debug!("could not connect to Postgres: {}, retrying", e);
|
||||
|
||||
// Establish a new connection and try again.
|
||||
client = conf.connect(NoTls);
|
||||
}
|
||||
}
|
||||
|
||||
// If there are existing (logical) walsenders, do not suspend.
|
||||
//
|
||||
// N.B. walproposer doesn't currently show up in pg_stat_replication,
|
||||
// but protect if it will.
|
||||
const WS_COUNT_QUERY: &str =
|
||||
"select count(*) from pg_stat_replication where application_name != 'walproposer';";
|
||||
match cli.query_one(WS_COUNT_QUERY, &[]) {
|
||||
Ok(r) => match r.try_get::<&str, i64>("count") {
|
||||
Ok(num_ws) => {
|
||||
if num_ws > 0 {
|
||||
self.last_active = Some(Utc::now());
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
let err: anyhow::Error = e.into();
|
||||
return Err(err.context("failed to parse walsenders count"));
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!("failed to get list of walsenders: {}", e));
|
||||
}
|
||||
}
|
||||
|
||||
// Don't suspend compute if there is an active logical replication subscription
|
||||
//
|
||||
// `where pid is not null` – to filter out read only computes and subscription on branches
|
||||
const LOGICAL_SUBSCRIPTIONS_QUERY: &str =
|
||||
"select count(*) from pg_stat_subscription where pid is not null;";
|
||||
match cli.query_one(LOGICAL_SUBSCRIPTIONS_QUERY, &[]) {
|
||||
Ok(row) => match row.try_get::<&str, i64>("count") {
|
||||
Ok(num_subscribers) => {
|
||||
if num_subscribers > 0 {
|
||||
self.last_active = Some(Utc::now());
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"failed to parse 'pg_stat_subscription' count: {}",
|
||||
e
|
||||
));
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"failed to get list of active logical replication subscriptions: {}",
|
||||
e
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Do not suspend compute if autovacuum is running
|
||||
const AUTOVACUUM_COUNT_QUERY: &str =
|
||||
"select count(*) from pg_stat_activity where backend_type = 'autovacuum worker'";
|
||||
match cli.query_one(AUTOVACUUM_COUNT_QUERY, &[]) {
|
||||
Ok(r) => match r.try_get::<&str, i64>("count") {
|
||||
Ok(num_workers) => {
|
||||
if num_workers > 0 {
|
||||
self.last_active = Some(Utc::now());
|
||||
return Ok(());
|
||||
};
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"failed to parse autovacuum workers count: {}",
|
||||
e
|
||||
));
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"failed to get list of autovacuum workers: {}",
|
||||
e
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -412,24 +315,9 @@ fn get_backends_state_change(cli: &mut Client) -> anyhow::Result<Option<DateTime
|
||||
/// Launch a separate compute monitor thread and return its `JoinHandle`.
|
||||
pub fn launch_monitor(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
|
||||
let compute = Arc::clone(compute);
|
||||
let experimental = compute.has_feature(ComputeFeature::ActivityMonitorExperimental);
|
||||
let now = Utc::now();
|
||||
let mut monitor = ComputeMonitor {
|
||||
compute,
|
||||
last_active: None,
|
||||
last_checked: now,
|
||||
last_up: now,
|
||||
active_time: None,
|
||||
sessions: None,
|
||||
experimental,
|
||||
};
|
||||
|
||||
let span = span!(Level::INFO, "compute_monitor");
|
||||
thread::Builder::new()
|
||||
.name("compute-monitor".into())
|
||||
.spawn(move || {
|
||||
let _enter = span.enter();
|
||||
monitor.run();
|
||||
})
|
||||
.spawn(move || watch_compute_activity(&compute))
|
||||
.expect("cannot launch compute monitor thread")
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration};
|
||||
use anyhow::{Context, Result, bail};
|
||||
use compute_api::responses::TlsConfig;
|
||||
use ring::digest;
|
||||
use spki::der::{Decode, PemReader};
|
||||
use x509_cert::Certificate;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
@@ -51,7 +52,7 @@ pub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) {
|
||||
match try_update_key_path_blocking(pg_data, tls_config) {
|
||||
Ok(()) => break,
|
||||
Err(e) => {
|
||||
tracing::error!(error = ?e, "could not create key file");
|
||||
tracing::error!("could not create key file {e:?}");
|
||||
std::thread::sleep(Duration::from_secs(1))
|
||||
}
|
||||
}
|
||||
@@ -91,14 +92,8 @@ fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Resul
|
||||
fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
|
||||
use x509_cert::der::oid::db::rfc5912::ECDSA_WITH_SHA_256;
|
||||
|
||||
let certs = Certificate::load_pem_chain(cert.as_bytes())
|
||||
.context("decoding PEM encoded certificates")?;
|
||||
|
||||
// First certificate is our server-cert,
|
||||
// all the rest of the certs are the CA cert chain.
|
||||
let Some(cert) = certs.first() else {
|
||||
bail!("no certificates found");
|
||||
};
|
||||
let cert = Certificate::decode(&mut PemReader::new(cert.as_bytes()).context("pem reader")?)
|
||||
.context("decode cert")?;
|
||||
|
||||
match cert.signature_algorithm.oid {
|
||||
ECDSA_WITH_SHA_256 => {
|
||||
@@ -120,82 +115,3 @@ fn verify_key_cert(key: &str, cert: &str) -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::verify_key_cert;
|
||||
|
||||
/// Real certificate chain file, generated by cert-manager in dev.
|
||||
/// The server auth certificate has expired since 2025-04-24T15:41:35Z.
|
||||
const CERT: &str = "
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIICCDCCAa+gAwIBAgIQKhLomFcNULbZA/bPdGzaSzAKBggqhkjOPQQDAjBEMQsw
|
||||
CQYDVQQGEwJVUzESMBAGA1UEChMJTmVvbiBJbmMuMSEwHwYDVQQDExhOZW9uIEs4
|
||||
cyBJbnRlcm1lZGlhdGUgQ0EwHhcNMjUwNDIzMTU0MTM1WhcNMjUwNDI0MTU0MTM1
|
||||
WjBBMT8wPQYDVQQDEzZjb21wdXRlLXdpc3B5LWdyYXNzLXcwY21laWp3LmRlZmF1
|
||||
bHQuc3ZjLmNsdXN0ZXIubG9jYWwwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATF
|
||||
QCcG2m/EVHAiZtSsYgVnHgoTjUL/Jtwfdrpvz2t0bVRZmBmSKhlo53uPV9Y5eKFG
|
||||
AmR54p9/gT2eO3xU7vAgo4GFMIGCMA4GA1UdDwEB/wQEAwIFoDAMBgNVHRMBAf8E
|
||||
AjAAMB8GA1UdIwQYMBaAFFR2JAhXkeiNQNEixTvAYIwxUu3QMEEGA1UdEQQ6MDiC
|
||||
NmNvbXB1dGUtd2lzcHktZ3Jhc3MtdzBjbWVpancuZGVmYXVsdC5zdmMuY2x1c3Rl
|
||||
ci5sb2NhbDAKBggqhkjOPQQDAgNHADBEAiBLG22wKG8XS9e9RxBT+kmUx/kIThcP
|
||||
DIpp7jx0PrFcdQIgEMTdnXpx5Cv/Z0NIEDxtMHUD7G0vuRPfztki36JuakM=
|
||||
-----END CERTIFICATE-----
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIICFzCCAb6gAwIBAgIUbbX98N2Ip6lWAONRk8dU9hSz+YIwCgYIKoZIzj0EAwIw
|
||||
RDELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UEAxMYTmVv
|
||||
biBBV1MgSW50ZXJtZWRpYXRlIENBMB4XDTI1MDQyMjE1MTAxMFoXDTI1MDcyMTE1
|
||||
MTAxMFowRDELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UE
|
||||
AxMYTmVvbiBLOHMgSW50ZXJtZWRpYXRlIENBMFkwEwYHKoZIzj0CAQYIKoZIzj0D
|
||||
AQcDQgAE5++m5owqNI4BPMTVNIUQH0qvU7pYhdpHGVGhdj/Lgars6ROvE6uSNQV4
|
||||
SAmJN5HBzj5/6kLQaTPWpXW7EHXjK6OBjTCBijAOBgNVHQ8BAf8EBAMCAQYwEgYD
|
||||
VR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUVHYkCFeR6I1A0SLFO8BgjDFS7dAw
|
||||
HwYDVR0jBBgwFoAUgHfNXfyKtHO0V9qoLOWCjkNiaI8wJAYDVR0eAQH/BBowGKAW
|
||||
MBSCEi5zdmMuY2x1c3Rlci5sb2NhbDAKBggqhkjOPQQDAgNHADBEAiBObVFFdXaL
|
||||
QpOXmN60dYUNnQRwjKreFduEkQgOdOlssgIgVAdJJQFgvlrvEOBhY8j5WyeKRwUN
|
||||
k/ALs6KpgaFBCGY=
|
||||
-----END CERTIFICATE-----
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIB4jCCAYegAwIBAgIUFlxWFn/11yoGdmD+6gf+yQMToS0wCgYIKoZIzj0EAwIw
|
||||
ODELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEVMBMGA1UEAxMMTmVv
|
||||
biBSb290IENBMB4XDTI1MDQwMzA3MTUyMloXDTI2MDQwMzA3MTUyMlowRDELMAkG
|
||||
A1UEBhMCVVMxEjAQBgNVBAoTCU5lb24gSW5jLjEhMB8GA1UEAxMYTmVvbiBBV1Mg
|
||||
SW50ZXJtZWRpYXRlIENBMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEqonG/IQ6
|
||||
ZxtEtOUTkkoNopPieXDO5CBKUkNFTGeJEB7OxRlSpYJgsBpaYIaD6Vc4sVk3thIF
|
||||
p+pLw52idQOIN6NjMGEwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8w
|
||||
HQYDVR0OBBYEFIB3zV38irRztFfaqCzlgo5DYmiPMB8GA1UdIwQYMBaAFKh7M4/G
|
||||
FHvr/ORDQZt4bMLlJvHCMAoGCCqGSM49BAMCA0kAMEYCIQCbS4x7QPslONzBYbjC
|
||||
UQaQ0QLDW4CJHvQ4u4gbWFG87wIhAJMsHQHjP9qTT27Q65zQCR7O8QeLAfha1jrH
|
||||
Ag/LsxSr
|
||||
-----END CERTIFICATE-----
|
||||
";
|
||||
|
||||
/// The key corresponding to [`CERT`]
|
||||
const KEY: &str = "
|
||||
-----BEGIN EC PRIVATE KEY-----
|
||||
MHcCAQEEIDnAnrqmIJjndCLWP1iIO5X3X63Aia48TGpGuMXwvm6IoAoGCCqGSM49
|
||||
AwEHoUQDQgAExUAnBtpvxFRwImbUrGIFZx4KE41C/ybcH3a6b89rdG1UWZgZkioZ
|
||||
aOd7j1fWOXihRgJkeeKff4E9njt8VO7wIA==
|
||||
-----END EC PRIVATE KEY-----
|
||||
";
|
||||
|
||||
/// An incorrect key.
|
||||
const INCORRECT_KEY: &str = "
|
||||
-----BEGIN EC PRIVATE KEY-----
|
||||
MHcCAQEEIL6WqqBDyvM0HWz7Ir5M5+jhFWB7IzOClGn26OPrzHCXoAoGCCqGSM49
|
||||
AwEHoUQDQgAE7XVvdOy5lfwtNKb+gJEUtnG+DrnnXLY5LsHDeGQKV9PTRcEMeCrG
|
||||
YZzHyML4P6Sr4yi2ts+4B9i47uvAG8+XwQ==
|
||||
-----END EC PRIVATE KEY-----
|
||||
";
|
||||
|
||||
#[test]
|
||||
fn certificate_verification() {
|
||||
verify_key_cert(KEY, CERT).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "private key file does not match certificate")]
|
||||
fn certificate_verification_fail() {
|
||||
verify_key_cert(INCORRECT_KEY, CERT).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,10 +17,8 @@ use std::time::Duration;
|
||||
use anyhow::{Context, Result, anyhow, bail};
|
||||
use clap::Parser;
|
||||
use compute_api::spec::ComputeMode;
|
||||
use control_plane::broker::StorageBroker;
|
||||
use control_plane::endpoint::ComputeControlPlane;
|
||||
use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_PORT, EndpointStorage};
|
||||
use control_plane::local_env;
|
||||
use control_plane::local_env::{
|
||||
EndpointStorageConf, InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf,
|
||||
NeonLocalInitPageserverConf, SafekeeperConf,
|
||||
@@ -30,6 +28,7 @@ use control_plane::safekeeper::SafekeeperNode;
|
||||
use control_plane::storage_controller::{
|
||||
NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
|
||||
};
|
||||
use control_plane::{broker, local_env};
|
||||
use nix::fcntl::{FlockArg, flock};
|
||||
use pageserver_api::config::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
|
||||
@@ -989,8 +988,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
|
||||
NeonLocalInitConf {
|
||||
control_plane_api: Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap()),
|
||||
broker: NeonBroker {
|
||||
listen_addr: Some(DEFAULT_BROKER_ADDR.parse().unwrap()),
|
||||
listen_https_addr: None,
|
||||
listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
|
||||
},
|
||||
safekeepers: vec![SafekeeperConf {
|
||||
id: DEFAULT_SAFEKEEPER_ID,
|
||||
@@ -1779,8 +1777,7 @@ async fn handle_endpoint_storage(
|
||||
async fn handle_storage_broker(subcmd: &StorageBrokerCmd, env: &local_env::LocalEnv) -> Result<()> {
|
||||
match subcmd {
|
||||
StorageBrokerCmd::Start(args) => {
|
||||
let storage_broker = StorageBroker::from_env(env);
|
||||
if let Err(e) = storage_broker.start(&args.start_timeout).await {
|
||||
if let Err(e) = broker::start_broker_process(env, &args.start_timeout).await {
|
||||
eprintln!("broker start failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
@@ -1788,8 +1785,7 @@ async fn handle_storage_broker(subcmd: &StorageBrokerCmd, env: &local_env::Local
|
||||
|
||||
StorageBrokerCmd::Stop(_args) => {
|
||||
// FIXME: stop_mode unused
|
||||
let storage_broker = StorageBroker::from_env(env);
|
||||
if let Err(e) = storage_broker.stop() {
|
||||
if let Err(e) = broker::stop_broker_process(env) {
|
||||
eprintln!("broker stop failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
@@ -1839,11 +1835,8 @@ async fn handle_start_all_impl(
|
||||
#[allow(clippy::redundant_closure_call)]
|
||||
(|| {
|
||||
js.spawn(async move {
|
||||
let storage_broker = StorageBroker::from_env(env);
|
||||
storage_broker
|
||||
.start(&retry_timeout)
|
||||
.await
|
||||
.map_err(|e| e.context("start storage_broker"))
|
||||
let retry_timeout = retry_timeout;
|
||||
broker::start_broker_process(env, &retry_timeout).await
|
||||
});
|
||||
|
||||
js.spawn(async move {
|
||||
@@ -1998,8 +1991,7 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
|
||||
}
|
||||
}
|
||||
|
||||
let storage_broker = StorageBroker::from_env(env);
|
||||
if let Err(e) = storage_broker.stop() {
|
||||
if let Err(e) = broker::stop_broker_process(env) {
|
||||
eprintln!("neon broker stop failed: {e:#}");
|
||||
}
|
||||
|
||||
|
||||
@@ -3,86 +3,60 @@
|
||||
//! In the local test environment, the storage broker stores its data directly in
|
||||
//!
|
||||
//! ```text
|
||||
//! .neon/storage_broker
|
||||
//! .neon
|
||||
//! ```
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
|
||||
use crate::{background_process, local_env::LocalEnv};
|
||||
use crate::{background_process, local_env};
|
||||
|
||||
pub struct StorageBroker {
|
||||
env: LocalEnv,
|
||||
pub async fn start_broker_process(
|
||||
env: &local_env::LocalEnv,
|
||||
retry_timeout: &Duration,
|
||||
) -> anyhow::Result<()> {
|
||||
let broker = &env.broker;
|
||||
let listen_addr = &broker.listen_addr;
|
||||
|
||||
print!("Starting neon broker at {}", listen_addr);
|
||||
|
||||
let args = [format!("--listen-addr={listen_addr}")];
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
background_process::start_process(
|
||||
"storage_broker",
|
||||
&env.base_data_dir,
|
||||
&env.storage_broker_bin(),
|
||||
args,
|
||||
[],
|
||||
background_process::InitialPidFile::Create(storage_broker_pid_file_path(env)),
|
||||
retry_timeout,
|
||||
|| async {
|
||||
let url = broker.client_url();
|
||||
let status_url = url.join("status").with_context(|| {
|
||||
format!("Failed to append /status path to broker endpoint {url}")
|
||||
})?;
|
||||
let request = client
|
||||
.get(status_url)
|
||||
.build()
|
||||
.with_context(|| format!("Failed to construct request to broker endpoint {url}"))?;
|
||||
match client.execute(request).await {
|
||||
Ok(resp) => Ok(resp.status().is_success()),
|
||||
Err(_) => Ok(false),
|
||||
}
|
||||
},
|
||||
)
|
||||
.await
|
||||
.context("Failed to spawn storage_broker subprocess")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl StorageBroker {
|
||||
/// Create a new `StorageBroker` instance from the environment.
|
||||
pub fn from_env(env: &LocalEnv) -> Self {
|
||||
Self { env: env.clone() }
|
||||
}
|
||||
|
||||
pub fn initialize(&self) -> anyhow::Result<()> {
|
||||
if self.env.generate_local_ssl_certs {
|
||||
self.env.generate_ssl_cert(
|
||||
&self.env.storage_broker_data_dir().join("server.crt"),
|
||||
&self.env.storage_broker_data_dir().join("server.key"),
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start the storage broker process.
|
||||
pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
||||
let broker = &self.env.broker;
|
||||
|
||||
print!("Starting neon broker at {}", broker.client_url());
|
||||
|
||||
let mut args = Vec::new();
|
||||
|
||||
if let Some(addr) = &broker.listen_addr {
|
||||
args.push(format!("--listen-addr={addr}"));
|
||||
}
|
||||
if let Some(addr) = &broker.listen_https_addr {
|
||||
args.push(format!("--listen-https-addr={addr}"));
|
||||
}
|
||||
|
||||
let client = self.env.create_http_client();
|
||||
background_process::start_process(
|
||||
"storage_broker",
|
||||
&self.env.storage_broker_data_dir(),
|
||||
&self.env.storage_broker_bin(),
|
||||
args,
|
||||
[],
|
||||
background_process::InitialPidFile::Create(self.pid_file_path()),
|
||||
retry_timeout,
|
||||
|| async {
|
||||
let url = broker.client_url();
|
||||
let status_url = url.join("status").with_context(|| {
|
||||
format!("Failed to append /status path to broker endpoint {url}")
|
||||
})?;
|
||||
let request = client.get(status_url).build().with_context(|| {
|
||||
format!("Failed to construct request to broker endpoint {url}")
|
||||
})?;
|
||||
match client.execute(request).await {
|
||||
Ok(resp) => Ok(resp.status().is_success()),
|
||||
Err(_) => Ok(false),
|
||||
}
|
||||
},
|
||||
)
|
||||
.await
|
||||
.context("Failed to spawn storage_broker subprocess")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stop the storage broker process.
|
||||
pub fn stop(&self) -> anyhow::Result<()> {
|
||||
background_process::stop_process(true, "storage_broker", &self.pid_file_path())
|
||||
}
|
||||
|
||||
/// Get the path to the PID file for the storage broker.
|
||||
fn pid_file_path(&self) -> Utf8PathBuf {
|
||||
Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("storage_broker.pid"))
|
||||
.expect("non-Unicode path")
|
||||
}
|
||||
pub fn stop_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
background_process::stop_process(true, "storage_broker", &storage_broker_pid_file_path(env))
|
||||
}
|
||||
|
||||
fn storage_broker_pid_file_path(env: &local_env::LocalEnv) -> Utf8PathBuf {
|
||||
Utf8PathBuf::from_path_buf(env.base_data_dir.join("storage_broker.pid"))
|
||||
.expect("non-Unicode path")
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
//! script which will use local paths.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use std::time::Duration;
|
||||
@@ -14,12 +14,11 @@ use anyhow::{Context, bail};
|
||||
use clap::ValueEnum;
|
||||
use pem::Pem;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::{Certificate, Url};
|
||||
use reqwest::Url;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::auth::encode_from_key_file;
|
||||
use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
|
||||
|
||||
use crate::broker::StorageBroker;
|
||||
use crate::endpoint_storage::{ENDPOINT_STORAGE_REMOTE_STORAGE_DIR, EndpointStorage};
|
||||
use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
@@ -158,16 +157,11 @@ pub struct EndpointStorageConf {
|
||||
}
|
||||
|
||||
/// Broker config for cluster internal communication.
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Default)]
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct NeonBroker {
|
||||
/// Broker listen HTTP address for storage nodes coordination, e.g. '127.0.0.1:50051'.
|
||||
/// At least one of listen_addr or listen_https_addr must be set.
|
||||
pub listen_addr: Option<SocketAddr>,
|
||||
/// Broker listen HTTPS address for storage nodes coordination, e.g. '127.0.0.1:50051'.
|
||||
/// At least one of listen_addr or listen_https_addr must be set.
|
||||
/// listen_https_addr is preferred over listen_addr in neon_local.
|
||||
pub listen_https_addr: Option<SocketAddr>,
|
||||
/// Broker listen address for storage nodes coordination, e.g. '127.0.0.1:50051'.
|
||||
pub listen_addr: SocketAddr,
|
||||
}
|
||||
|
||||
/// A part of storage controller's config the neon_local knows about.
|
||||
@@ -241,19 +235,18 @@ impl Default for NeonStorageControllerConf {
|
||||
}
|
||||
}
|
||||
|
||||
// Dummy Default impl to satisfy Deserialize derive.
|
||||
impl Default for NeonBroker {
|
||||
fn default() -> Self {
|
||||
NeonBroker {
|
||||
listen_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NeonBroker {
|
||||
pub fn client_url(&self) -> Url {
|
||||
let url = if let Some(addr) = self.listen_https_addr {
|
||||
format!("https://{}", addr)
|
||||
} else {
|
||||
format!(
|
||||
"http://{}",
|
||||
self.listen_addr
|
||||
.expect("at least one address should be set")
|
||||
)
|
||||
};
|
||||
|
||||
Url::parse(&url).expect("failed to construct url")
|
||||
Url::parse(&format!("http://{}", self.listen_addr)).expect("failed to construct url")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -448,10 +441,6 @@ impl LocalEnv {
|
||||
self.base_data_dir.join("endpoints")
|
||||
}
|
||||
|
||||
pub fn storage_broker_data_dir(&self) -> PathBuf {
|
||||
self.base_data_dir.join("storage_broker")
|
||||
}
|
||||
|
||||
pub fn pageserver_data_dir(&self, pageserver_id: NodeId) -> PathBuf {
|
||||
self.base_data_dir
|
||||
.join(format!("pageserver_{pageserver_id}"))
|
||||
@@ -514,23 +503,6 @@ impl LocalEnv {
|
||||
)
|
||||
}
|
||||
|
||||
/// Creates HTTP client with local SSL CA certificates.
|
||||
pub fn create_http_client(&self) -> reqwest::Client {
|
||||
let ssl_ca_certs = self.ssl_ca_cert_path().map(|ssl_ca_file| {
|
||||
let buf = std::fs::read(ssl_ca_file).expect("SSL CA file should exist");
|
||||
Certificate::from_pem_bundle(&buf).expect("SSL CA file should be valid")
|
||||
});
|
||||
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() {
|
||||
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||
}
|
||||
|
||||
http_client
|
||||
.build()
|
||||
.expect("HTTP client should construct with no error")
|
||||
}
|
||||
|
||||
/// Inspect the base data directory and extract the instance id and instance directory path
|
||||
/// for all storage controller instances
|
||||
pub async fn storage_controller_instances(&self) -> std::io::Result<Vec<(u8, PathBuf)>> {
|
||||
@@ -939,12 +911,6 @@ impl LocalEnv {
|
||||
// create endpoints dir
|
||||
fs::create_dir_all(env.endpoints_path())?;
|
||||
|
||||
// create storage broker dir
|
||||
fs::create_dir_all(env.storage_broker_data_dir())?;
|
||||
StorageBroker::from_env(&env)
|
||||
.initialize()
|
||||
.context("storage broker init failed")?;
|
||||
|
||||
// create safekeeper dirs
|
||||
for safekeeper in &env.safekeepers {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
|
||||
|
||||
@@ -21,6 +21,7 @@ use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api;
|
||||
use postgres_backend::AuthType;
|
||||
use postgres_connection::{PgConnectionConfig, parse_host_port};
|
||||
use reqwest::Certificate;
|
||||
use utils::auth::{Claims, Scope};
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
@@ -50,6 +51,19 @@ impl PageServerNode {
|
||||
parse_host_port(&conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
|
||||
let port = port.unwrap_or(5432);
|
||||
|
||||
let ssl_ca_certs = env.ssl_ca_cert_path().map(|ssl_ca_file| {
|
||||
let buf = std::fs::read(ssl_ca_file).expect("SSL root CA file should exist");
|
||||
Certificate::from_pem_bundle(&buf).expect("SSL CA file should be valid")
|
||||
});
|
||||
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() {
|
||||
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||
}
|
||||
let http_client = http_client
|
||||
.build()
|
||||
.expect("Client constructs with no errors");
|
||||
|
||||
let endpoint = if env.storage_controller.use_https_pageserver_api {
|
||||
format!(
|
||||
"https://{}",
|
||||
@@ -66,7 +80,7 @@ impl PageServerNode {
|
||||
conf: conf.clone(),
|
||||
env: env.clone(),
|
||||
http_client: mgmt_api::Client::new(
|
||||
env.create_http_client(),
|
||||
http_client,
|
||||
endpoint,
|
||||
{
|
||||
match conf.http_auth_type {
|
||||
|
||||
@@ -87,7 +87,7 @@ impl SafekeeperNode {
|
||||
conf: conf.clone(),
|
||||
pg_connection_config: Self::safekeeper_connection_config(&listen_addr, conf.pg_port),
|
||||
env: env.clone(),
|
||||
http_client: env.create_http_client(),
|
||||
http_client: reqwest::Client::new(),
|
||||
http_base_url: format!("http://{}:{}/v1", listen_addr, conf.http_port),
|
||||
listen_addr,
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
||||
use pem::Pem;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::Method;
|
||||
use reqwest::{Certificate, Method};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::process::Command;
|
||||
@@ -153,11 +153,24 @@ impl StorageController {
|
||||
}
|
||||
};
|
||||
|
||||
let ssl_ca_certs = env.ssl_ca_cert_path().map(|ssl_ca_file| {
|
||||
let buf = std::fs::read(ssl_ca_file).expect("SSL CA file should exist");
|
||||
Certificate::from_pem_bundle(&buf).expect("SSL CA file should be valid")
|
||||
});
|
||||
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() {
|
||||
http_client = http_client.add_root_certificate(ssl_ca_cert);
|
||||
}
|
||||
let http_client = http_client
|
||||
.build()
|
||||
.expect("HTTP client should construct with no error");
|
||||
|
||||
Self {
|
||||
env: env.clone(),
|
||||
private_key,
|
||||
public_key,
|
||||
client: env.create_http_client(),
|
||||
client: http_client,
|
||||
config: env.storage_controller.clone(),
|
||||
listen_port: OnceLock::default(),
|
||||
}
|
||||
|
||||
16
deny.toml
16
deny.toml
@@ -45,7 +45,9 @@ allow = [
|
||||
"ISC",
|
||||
"MIT",
|
||||
"MPL-2.0",
|
||||
"OpenSSL",
|
||||
"Unicode-3.0",
|
||||
"Zlib",
|
||||
]
|
||||
confidence-threshold = 0.8
|
||||
exceptions = [
|
||||
@@ -54,6 +56,14 @@ exceptions = [
|
||||
{ allow = ["Zlib"], name = "const_format", version = "*" },
|
||||
]
|
||||
|
||||
[[licenses.clarify]]
|
||||
name = "ring"
|
||||
version = "*"
|
||||
expression = "MIT AND ISC AND OpenSSL"
|
||||
license-files = [
|
||||
{ path = "LICENSE", hash = 0xbd0eed23 }
|
||||
]
|
||||
|
||||
[licenses.private]
|
||||
ignore = true
|
||||
registries = []
|
||||
@@ -106,11 +116,7 @@ name = "openssl"
|
||||
unknown-registry = "warn"
|
||||
unknown-git = "warn"
|
||||
allow-registry = ["https://github.com/rust-lang/crates.io-index"]
|
||||
allow-git = [
|
||||
# Crate pinned to commit in origin repo due to opentelemetry version.
|
||||
# TODO: Remove this once crate is fetched from crates.io again.
|
||||
"https://github.com/mattiapenati/tower-otel",
|
||||
]
|
||||
allow-git = []
|
||||
|
||||
[sources.allow-org]
|
||||
github = [
|
||||
|
||||
@@ -9,20 +9,21 @@
|
||||
# to verify custom image builds (e.g pre-published ones).
|
||||
#
|
||||
# A test script for postgres extensions
|
||||
# Currently supports only v16+
|
||||
# Currently supports only v16
|
||||
#
|
||||
set -eux -o pipefail
|
||||
|
||||
export COMPOSE_FILE='docker-compose.yml'
|
||||
export COMPOSE_PROFILES=test-extensions
|
||||
cd "$(dirname "${0}")"
|
||||
COMPOSE_FILE='docker-compose.yml'
|
||||
cd $(dirname $0)
|
||||
COMPUTE_CONTAINER_NAME=docker-compose-compute-1
|
||||
TEST_CONTAINER_NAME=docker-compose-neon-test-extensions-1
|
||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
|
||||
|
||||
function cleanup() {
|
||||
cleanup() {
|
||||
echo "show container information"
|
||||
docker ps
|
||||
echo "stop containers..."
|
||||
docker compose down
|
||||
docker compose --profile test-extensions -f $COMPOSE_FILE down
|
||||
}
|
||||
|
||||
for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
@@ -30,55 +31,55 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
|
||||
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull --build -d
|
||||
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --quiet-pull --build -d
|
||||
|
||||
echo "wait until the compute is ready. timeout after 60s. "
|
||||
cnt=0
|
||||
while sleep 3; do
|
||||
# check timeout
|
||||
(( cnt += 3 ))
|
||||
if [[ ${cnt} -gt 60 ]]; then
|
||||
cnt=`expr $cnt + 3`
|
||||
if [ $cnt -gt 60 ]; then
|
||||
echo "timeout before the compute is ready."
|
||||
exit 1
|
||||
fi
|
||||
if docker compose logs "compute_is_ready" | grep -q "accepting connections"; then
|
||||
if docker compose --profile test-extensions -f $COMPOSE_FILE logs "compute_is_ready" | grep -q "accepting connections"; then
|
||||
echo "OK. The compute is ready to connect."
|
||||
echo "execute simple queries."
|
||||
docker compose exec compute /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
|
||||
docker exec $COMPUTE_CONTAINER_NAME /bin/bash -c "psql $PSQL_OPTION"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${pg_version} -ge 16 ]]; then
|
||||
if [ $pg_version -ge 16 ]; then
|
||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config
|
||||
docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||
docker exec $COMPUTE_CONTAINER_NAME touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data"
|
||||
docker compose cp "${TMPDIR}/data" compute:/ext-src/pg_hint_plan-src/
|
||||
rm -rf "${TMPDIR}"
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
|
||||
rm -rf $TMPDIR
|
||||
# The following block does the same for the contrib/file_fdw test
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${TMPDIR}/data"
|
||||
docker compose cp "${TMPDIR}/data" compute:/postgres/contrib/file_fdw/data
|
||||
rm -rf "${TMPDIR}"
|
||||
docker cp $TEST_CONTAINER_NAME:/postgres/contrib/file_fdw/data $TMPDIR/data
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/postgres/contrib/file_fdw/data
|
||||
rm -rf $TMPDIR
|
||||
# Apply patches
|
||||
docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
|
||||
cat ../compute/patches/contrib_pg${pg_version}.patch | docker exec -i $TEST_CONTAINER_NAME bash -c "(cd /postgres && patch -p1)"
|
||||
# We are running tests now
|
||||
rm -f testout.txt testout_contrib.txt
|
||||
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
|
||||
neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
|
||||
docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
|
||||
neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
|
||||
if [[ ${EXT_SUCCESS} -eq 0 || ${CONTRIB_SUCCESS} -eq 0 ]]; then
|
||||
docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
|
||||
$TEST_CONTAINER_NAME /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
|
||||
docker exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
|
||||
$TEST_CONTAINER_NAME /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
|
||||
if [ $EXT_SUCCESS -eq 0 ] || [ $CONTRIB_SUCCESS -eq 0 ]; then
|
||||
CONTRIB_FAILED=
|
||||
FAILED=
|
||||
[[ ${EXT_SUCCESS} -eq 0 ]] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
|
||||
[[ ${CONTRIB_SUCCESS} -eq 0 ]] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
|
||||
for d in ${FAILED} ${CONTRIB_FAILED}; do
|
||||
docker compose exec neon-test-extensions bash -c 'for file in $(find '"${d}"' -name regression.diffs -o -name regression.out); do cat ${file}; done' || [[ ${?} -eq 1 ]]
|
||||
[ $EXT_SUCCESS -eq 0 ] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
|
||||
[ $CONTRIB_SUCCESS -eq 0 ] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
|
||||
for d in $FAILED $CONTRIB_FAILED; do
|
||||
docker exec $TEST_CONTAINER_NAME bash -c 'for file in $(find '"$d"' -name regression.diffs -o -name regression.out); do cat $file; done' || [ $? -eq 1 ]
|
||||
done
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -1,99 +0,0 @@
|
||||
# PostgreSQL Extensions for Testing
|
||||
|
||||
This directory contains PostgreSQL extensions used primarily for:
|
||||
1. Testing extension upgrades between different Compute versions
|
||||
2. Running regression tests with regular users (mostly for cloud instances)
|
||||
|
||||
## Directory Structure
|
||||
|
||||
Each extension directory follows a standard structure:
|
||||
|
||||
- `extension-name-src/` - Directory containing test files for the extension
|
||||
- `test-upgrade.sh` - Script for testing upgrade scenarios
|
||||
- `regular-test.sh` - Script for testing with regular users
|
||||
- Additional test files depending on the extension
|
||||
|
||||
## Available Extensions
|
||||
|
||||
This directory includes the following extensions:
|
||||
|
||||
- `hll-src` - HyperLogLog, a fixed-size data structure for approximating cardinality
|
||||
- `hypopg-src` - Extension to create hypothetical indexes
|
||||
- `ip4r-src` - IPv4/v6 and subnet data types
|
||||
- `pg_cron-src` - Run periodic jobs in PostgreSQL
|
||||
- `pg_graphql-src` - GraphQL support for PostgreSQL
|
||||
- `pg_hint_plan-src` - Execution plan hints
|
||||
- `pg_ivm-src` - Incremental view maintenance
|
||||
- `pg_jsonschema-src` - JSON Schema validation
|
||||
- `pg_repack-src` - Reorganize tables with minimal locks
|
||||
- `pg_roaringbitmap-src` - Roaring bitmap implementation
|
||||
- `pg_semver-src` - Semantic version data type
|
||||
- `pg_session_jwt-src` - JWT authentication for PostgreSQL
|
||||
- `pg_tiktoken-src` - OpenAI Tiktoken tokenizer
|
||||
- `pg_uuidv7-src` - UUIDv7 implementation for PostgreSQL
|
||||
- `pgjwt-src` - JWT tokens for PostgreSQL
|
||||
- `pgrag-src` - Retrieval Augmented Generation for PostgreSQL
|
||||
- `pgtap-src` - Unit testing framework for PostgreSQL
|
||||
- `pgvector-src` - Vector similarity search
|
||||
- `pgx_ulid-src` - ULID data type
|
||||
- `plv8-src` - JavaScript language for PostgreSQL stored procedures
|
||||
- `postgresql-unit-src` - SI units for PostgreSQL
|
||||
- `prefix-src` - Prefix matching for strings
|
||||
- `rag_bge_small_en_v15-src` - BGE embedding model for RAG
|
||||
- `rag_jina_reranker_v1_tiny_en-src` - Jina reranker model for RAG
|
||||
- `rum-src` - RUM access method for text search
|
||||
|
||||
## Usage
|
||||
|
||||
### Extension Upgrade Testing
|
||||
|
||||
The extensions in this directory are used by the `test-upgrade.sh` script to test upgrading extensions between different versions of Neon Compute nodes. The script:
|
||||
|
||||
1. Creates a database with extensions installed on an old Compute version
|
||||
2. Creates timelines for each extension
|
||||
3. Switches to a new Compute version and tests the upgrade process
|
||||
4. Verifies extension functionality after upgrade
|
||||
|
||||
### Regular User Testing
|
||||
|
||||
For testing with regular users (particularly for cloud instances), each extension directory typically contains a `regular-test.sh` script that:
|
||||
|
||||
1. Drops the database if it exists
|
||||
2. Creates a fresh test database
|
||||
3. Installs the extension
|
||||
4. Runs regression tests
|
||||
|
||||
A note about pg_regress: Since pg_regress attempts to set `lc_messages` for the database by default, which is forbidden for regular users, we create databases manually and use the `--use-existing` option to bypass this limitation.
|
||||
|
||||
### CI Workflows
|
||||
|
||||
Two main workflows use these extensions:
|
||||
|
||||
1. **Cloud Extensions Test** - Tests extensions on Neon cloud projects
|
||||
2. **Force Test Upgrading of Extension** - Tests upgrading extensions between different Compute versions
|
||||
|
||||
These workflows are integrated into the build-and-test pipeline through shell scripts:
|
||||
|
||||
- `docker_compose_test.sh` - Tests extensions in a Docker Compose environment
|
||||
|
||||
- `test_extensions_upgrade.sh` - Tests extension upgrades between different Compute versions
|
||||
|
||||
## Adding New Extensions
|
||||
|
||||
To add a new extension for testing:
|
||||
|
||||
1. Create a directory named `extension-name-src` in this directory
|
||||
2. Add at minimum:
|
||||
- `regular-test.sh` for testing with regular users
|
||||
- If `regular-test.sh` doesn't exist, the system will look for `neon-test.sh`
|
||||
- If neither exists, it will try to run `make installcheck`
|
||||
- `test-upgrade.sh` is only needed if you want to test upgrade scenarios
|
||||
3. Update the list of extensions in the `test_extensions_upgrade.sh` script if needed for upgrade testing
|
||||
|
||||
### Patching Extension Sources
|
||||
|
||||
If you need to patch the extension sources:
|
||||
|
||||
1. Place the patch file in the extension's directory
|
||||
2. Apply the patch in the appropriate script (`test-upgrade.sh`, `neon-test.sh`, `regular-test.sh`, or `Makefile`)
|
||||
3. The patch will be applied during the testing process
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
dropdb --if-exists contrib_regression
|
||||
createdb contrib_regression
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression setup add_agg agg_oob auto_sparse card_op cast_shape copy_binary cumulative_add_cardinality_correction cumulative_add_comprehensive_promotion cumulative_add_sparse_edge cumulative_add_sparse_random cumulative_add_sparse_step cumulative_union_comprehensive cumulative_union_explicit_explicit cumulative_union_explicit_promotion cumulative_union_probabilistic_probabilistic cumulative_union_sparse_full_representation cumulative_union_sparse_promotion cumulative_union_sparse_sparse disable_hashagg equal explicit_thresh hash hash_any meta_func murmur_bigint murmur_bytea nosparse notequal scalar_oob storedproc transaction typmod typmod_insert union_op
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
dropdb --if-exists contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --inputdir=test --dbname=contrib_regression hypopg hypo_brin hypo_index_part hypo_include hypo_hash hypo_hide_index
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ip4r ip4r-softerr ip4r-v11
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_cron-test
|
||||
@@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
cd "$(dirname "${0}")"
|
||||
PGXS="$(dirname "$(pg_config --pgxs)" )"
|
||||
REGRESS="${PGXS}/../test/regress/pg_regress"
|
||||
TESTDIR="test"
|
||||
TESTS=$(ls "${TESTDIR}/sql" | sort )
|
||||
TESTS=${TESTS//\.sql/}
|
||||
TESTS=${TESTS/empty_mutations/}
|
||||
TESTS=${TESTS/function_return_row_is_selectable/}
|
||||
TESTS=${TESTS/issue_300/}
|
||||
TESTS=${TESTS/permissions_connection_column/}
|
||||
TESTS=${TESTS/permissions_functions/}
|
||||
TESTS=${TESTS/permissions_node_column/}
|
||||
TESTS=${TESTS/permissions_table_level/}
|
||||
TESTS=${TESTS/permissions_types/}
|
||||
TESTS=${TESTS/row_level_security/}
|
||||
TESTS=${TESTS/sqli_connection/}
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
psql -v ON_ERROR_STOP=1 -f test/fixtures.sql -d contrib_regression
|
||||
${REGRESS} --use-existing --dbname=contrib_regression --inputdir=${TESTDIR} ${TESTS}
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --encoding=UTF8 --dbname=contrib_regression init base_plan pg_hint_plan ut-init ut-A ut-S ut-J ut-L ut-G ut-R ut-fdw ut-W ut-T ut-fini hints_anywhere plpgsql oldextversions
|
||||
@@ -1,9 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
cd "$(dirname ${0})"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
patch -p1 <regular.patch
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_ivm create_immv refresh_immv
|
||||
patch -R -p1 <regular.patch
|
||||
@@ -1,309 +0,0 @@
|
||||
diff --git a/expected/pg_ivm.out b/expected/pg_ivm.out
|
||||
index e8798ee..4081680 100644
|
||||
--- a/expected/pg_ivm.out
|
||||
+++ b/expected/pg_ivm.out
|
||||
@@ -1363,61 +1363,6 @@ SELECT * FROM mv ORDER BY i;
|
||||
| 2 | 4 | 2 | 2 | 2
|
||||
(1 row)
|
||||
|
||||
-ROLLBACK;
|
||||
--- IMMV containing user defined type
|
||||
-BEGIN;
|
||||
-CREATE TYPE mytype;
|
||||
-CREATE FUNCTION mytype_in(cstring)
|
||||
- RETURNS mytype AS 'int4in'
|
||||
- LANGUAGE INTERNAL STRICT IMMUTABLE;
|
||||
-NOTICE: return type mytype is only a shell
|
||||
-CREATE FUNCTION mytype_out(mytype)
|
||||
- RETURNS cstring AS 'int4out'
|
||||
- LANGUAGE INTERNAL STRICT IMMUTABLE;
|
||||
-NOTICE: argument type mytype is only a shell
|
||||
-CREATE TYPE mytype (
|
||||
- LIKE = int4,
|
||||
- INPUT = mytype_in,
|
||||
- OUTPUT = mytype_out
|
||||
-);
|
||||
-CREATE FUNCTION mytype_eq(mytype, mytype)
|
||||
- RETURNS bool AS 'int4eq'
|
||||
- LANGUAGE INTERNAL STRICT IMMUTABLE;
|
||||
-CREATE FUNCTION mytype_lt(mytype, mytype)
|
||||
- RETURNS bool AS 'int4lt'
|
||||
- LANGUAGE INTERNAL STRICT IMMUTABLE;
|
||||
-CREATE FUNCTION mytype_cmp(mytype, mytype)
|
||||
- RETURNS integer AS 'btint4cmp'
|
||||
- LANGUAGE INTERNAL STRICT IMMUTABLE;
|
||||
-CREATE OPERATOR = (
|
||||
- leftarg = mytype, rightarg = mytype,
|
||||
- procedure = mytype_eq);
|
||||
-CREATE OPERATOR < (
|
||||
- leftarg = mytype, rightarg = mytype,
|
||||
- procedure = mytype_lt);
|
||||
-CREATE OPERATOR CLASS mytype_ops
|
||||
- DEFAULT FOR TYPE mytype USING btree AS
|
||||
- OPERATOR 1 <,
|
||||
- OPERATOR 3 = ,
|
||||
- FUNCTION 1 mytype_cmp(mytype,mytype);
|
||||
-CREATE TABLE t_mytype (x mytype);
|
||||
-SELECT create_immv('mv_mytype',
|
||||
- 'SELECT * FROM t_mytype');
|
||||
-NOTICE: could not create an index on immv "mv_mytype" automatically
|
||||
-DETAIL: This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.
|
||||
-HINT: Create an index on the immv for efficient incremental maintenance.
|
||||
- create_immv
|
||||
--------------
|
||||
- 0
|
||||
-(1 row)
|
||||
-
|
||||
-INSERT INTO t_mytype VALUES ('1'::mytype);
|
||||
-SELECT * FROM mv_mytype;
|
||||
- x
|
||||
----
|
||||
- 1
|
||||
-(1 row)
|
||||
-
|
||||
ROLLBACK;
|
||||
-- outer join is not supported
|
||||
SELECT create_immv('mv(a,b)',
|
||||
@@ -1510,112 +1455,6 @@ SELECT create_immv('mv_ivm_only_values1', 'values(1)');
|
||||
ERROR: VALUES is not supported on incrementally maintainable materialized view
|
||||
SELECT create_immv('mv_ivm_only_values2', 'SELECT * FROM (values(1)) AS tmp');
|
||||
ERROR: VALUES is not supported on incrementally maintainable materialized view
|
||||
--- views containing base tables with Row Level Security
|
||||
-DROP USER IF EXISTS ivm_admin;
|
||||
-NOTICE: role "ivm_admin" does not exist, skipping
|
||||
-DROP USER IF EXISTS ivm_user;
|
||||
-NOTICE: role "ivm_user" does not exist, skipping
|
||||
-CREATE USER ivm_admin;
|
||||
-CREATE USER ivm_user;
|
||||
---- create a table with RLS
|
||||
-SET SESSION AUTHORIZATION ivm_admin;
|
||||
-CREATE TABLE rls_tbl(id int, data text, owner name);
|
||||
-INSERT INTO rls_tbl VALUES
|
||||
- (1,'foo','ivm_user'),
|
||||
- (2,'bar','postgres');
|
||||
-CREATE TABLE num_tbl(id int, num text);
|
||||
-INSERT INTO num_tbl VALUES
|
||||
- (1,'one'),
|
||||
- (2,'two'),
|
||||
- (3,'three'),
|
||||
- (4,'four'),
|
||||
- (5,'five'),
|
||||
- (6,'six');
|
||||
---- Users can access only their own rows
|
||||
-CREATE POLICY rls_tbl_policy ON rls_tbl FOR SELECT TO PUBLIC USING(owner = current_user);
|
||||
-ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY;
|
||||
-GRANT ALL on rls_tbl TO PUBLIC;
|
||||
-GRANT ALL on num_tbl TO PUBLIC;
|
||||
---- create a view owned by ivm_user
|
||||
-SET SESSION AUTHORIZATION ivm_user;
|
||||
-SELECT create_immv('ivm_rls', 'SELECT * FROM rls_tbl');
|
||||
-NOTICE: could not create an index on immv "ivm_rls" automatically
|
||||
-DETAIL: This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.
|
||||
-HINT: Create an index on the immv for efficient incremental maintenance.
|
||||
- create_immv
|
||||
--------------
|
||||
- 1
|
||||
-(1 row)
|
||||
-
|
||||
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
|
||||
- id | data | owner
|
||||
-----+------+----------
|
||||
- 1 | foo | ivm_user
|
||||
-(1 row)
|
||||
-
|
||||
-RESET SESSION AUTHORIZATION;
|
||||
---- inserts rows owned by different users
|
||||
-INSERT INTO rls_tbl VALUES
|
||||
- (3,'baz','ivm_user'),
|
||||
- (4,'qux','postgres');
|
||||
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
|
||||
- id | data | owner
|
||||
-----+------+----------
|
||||
- 1 | foo | ivm_user
|
||||
- 3 | baz | ivm_user
|
||||
-(2 rows)
|
||||
-
|
||||
---- combination of diffent kinds of commands
|
||||
-WITH
|
||||
- i AS (INSERT INTO rls_tbl VALUES(5,'quux','postgres'), (6,'corge','ivm_user')),
|
||||
- u AS (UPDATE rls_tbl SET owner = 'postgres' WHERE id = 1),
|
||||
- u2 AS (UPDATE rls_tbl SET owner = 'ivm_user' WHERE id = 2)
|
||||
-SELECT;
|
||||
---
|
||||
-(1 row)
|
||||
-
|
||||
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
|
||||
- id | data | owner
|
||||
-----+-------+----------
|
||||
- 2 | bar | ivm_user
|
||||
- 3 | baz | ivm_user
|
||||
- 6 | corge | ivm_user
|
||||
-(3 rows)
|
||||
-
|
||||
----
|
||||
-SET SESSION AUTHORIZATION ivm_user;
|
||||
-SELECT create_immv('ivm_rls2', 'SELECT * FROM rls_tbl JOIN num_tbl USING(id)');
|
||||
-NOTICE: could not create an index on immv "ivm_rls2" automatically
|
||||
-DETAIL: This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.
|
||||
-HINT: Create an index on the immv for efficient incremental maintenance.
|
||||
- create_immv
|
||||
--------------
|
||||
- 3
|
||||
-(1 row)
|
||||
-
|
||||
-RESET SESSION AUTHORIZATION;
|
||||
-WITH
|
||||
- x AS (UPDATE rls_tbl SET data = data || '_2' where id in (3,4)),
|
||||
- y AS (UPDATE num_tbl SET num = num || '_2' where id in (3,4))
|
||||
-SELECT;
|
||||
---
|
||||
-(1 row)
|
||||
-
|
||||
-SELECT * FROM ivm_rls2 ORDER BY 1,2,3;
|
||||
- id | data | owner | num
|
||||
-----+-------+----------+---------
|
||||
- 2 | bar | ivm_user | two
|
||||
- 3 | baz_2 | ivm_user | three_2
|
||||
- 6 | corge | ivm_user | six
|
||||
-(3 rows)
|
||||
-
|
||||
-DROP TABLE rls_tbl CASCADE;
|
||||
-NOTICE: drop cascades to 2 other objects
|
||||
-DETAIL: drop cascades to table ivm_rls
|
||||
-drop cascades to table ivm_rls2
|
||||
-DROP TABLE num_tbl CASCADE;
|
||||
-DROP USER ivm_user;
|
||||
-DROP USER ivm_admin;
|
||||
-- automatic index creation
|
||||
BEGIN;
|
||||
CREATE TABLE base_a (i int primary key, j int);
|
||||
diff --git a/sql/pg_ivm.sql b/sql/pg_ivm.sql
|
||||
index d3c1a01..203213d 100644
|
||||
--- a/sql/pg_ivm.sql
|
||||
+++ b/sql/pg_ivm.sql
|
||||
@@ -454,53 +454,6 @@ DELETE FROM base_t WHERE v = 5;
|
||||
SELECT * FROM mv ORDER BY i;
|
||||
ROLLBACK;
|
||||
|
||||
--- IMMV containing user defined type
|
||||
-BEGIN;
|
||||
-
|
||||
-CREATE TYPE mytype;
|
||||
-CREATE FUNCTION mytype_in(cstring)
|
||||
- RETURNS mytype AS 'int4in'
|
||||
- LANGUAGE INTERNAL STRICT IMMUTABLE;
|
||||
-CREATE FUNCTION mytype_out(mytype)
|
||||
- RETURNS cstring AS 'int4out'
|
||||
- LANGUAGE INTERNAL STRICT IMMUTABLE;
|
||||
-CREATE TYPE mytype (
|
||||
- LIKE = int4,
|
||||
- INPUT = mytype_in,
|
||||
- OUTPUT = mytype_out
|
||||
-);
|
||||
-
|
||||
-CREATE FUNCTION mytype_eq(mytype, mytype)
|
||||
- RETURNS bool AS 'int4eq'
|
||||
- LANGUAGE INTERNAL STRICT IMMUTABLE;
|
||||
-CREATE FUNCTION mytype_lt(mytype, mytype)
|
||||
- RETURNS bool AS 'int4lt'
|
||||
- LANGUAGE INTERNAL STRICT IMMUTABLE;
|
||||
-CREATE FUNCTION mytype_cmp(mytype, mytype)
|
||||
- RETURNS integer AS 'btint4cmp'
|
||||
- LANGUAGE INTERNAL STRICT IMMUTABLE;
|
||||
-
|
||||
-CREATE OPERATOR = (
|
||||
- leftarg = mytype, rightarg = mytype,
|
||||
- procedure = mytype_eq);
|
||||
-CREATE OPERATOR < (
|
||||
- leftarg = mytype, rightarg = mytype,
|
||||
- procedure = mytype_lt);
|
||||
-
|
||||
-CREATE OPERATOR CLASS mytype_ops
|
||||
- DEFAULT FOR TYPE mytype USING btree AS
|
||||
- OPERATOR 1 <,
|
||||
- OPERATOR 3 = ,
|
||||
- FUNCTION 1 mytype_cmp(mytype,mytype);
|
||||
-
|
||||
-CREATE TABLE t_mytype (x mytype);
|
||||
-SELECT create_immv('mv_mytype',
|
||||
- 'SELECT * FROM t_mytype');
|
||||
-INSERT INTO t_mytype VALUES ('1'::mytype);
|
||||
-SELECT * FROM mv_mytype;
|
||||
-
|
||||
-ROLLBACK;
|
||||
-
|
||||
-- outer join is not supported
|
||||
SELECT create_immv('mv(a,b)',
|
||||
'SELECT a.i, b.i FROM mv_base_a a LEFT JOIN mv_base_b b ON a.i=b.i');
|
||||
@@ -579,71 +532,6 @@ SELECT create_immv('mv_ivm31', 'SELECT sum(i)/sum(j) FROM mv_base_a');
|
||||
SELECT create_immv('mv_ivm_only_values1', 'values(1)');
|
||||
SELECT create_immv('mv_ivm_only_values2', 'SELECT * FROM (values(1)) AS tmp');
|
||||
|
||||
-
|
||||
--- views containing base tables with Row Level Security
|
||||
-DROP USER IF EXISTS ivm_admin;
|
||||
-DROP USER IF EXISTS ivm_user;
|
||||
-CREATE USER ivm_admin;
|
||||
-CREATE USER ivm_user;
|
||||
-
|
||||
---- create a table with RLS
|
||||
-SET SESSION AUTHORIZATION ivm_admin;
|
||||
-CREATE TABLE rls_tbl(id int, data text, owner name);
|
||||
-INSERT INTO rls_tbl VALUES
|
||||
- (1,'foo','ivm_user'),
|
||||
- (2,'bar','postgres');
|
||||
-CREATE TABLE num_tbl(id int, num text);
|
||||
-INSERT INTO num_tbl VALUES
|
||||
- (1,'one'),
|
||||
- (2,'two'),
|
||||
- (3,'three'),
|
||||
- (4,'four'),
|
||||
- (5,'five'),
|
||||
- (6,'six');
|
||||
-
|
||||
---- Users can access only their own rows
|
||||
-CREATE POLICY rls_tbl_policy ON rls_tbl FOR SELECT TO PUBLIC USING(owner = current_user);
|
||||
-ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY;
|
||||
-GRANT ALL on rls_tbl TO PUBLIC;
|
||||
-GRANT ALL on num_tbl TO PUBLIC;
|
||||
-
|
||||
---- create a view owned by ivm_user
|
||||
-SET SESSION AUTHORIZATION ivm_user;
|
||||
-SELECT create_immv('ivm_rls', 'SELECT * FROM rls_tbl');
|
||||
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
|
||||
-RESET SESSION AUTHORIZATION;
|
||||
-
|
||||
---- inserts rows owned by different users
|
||||
-INSERT INTO rls_tbl VALUES
|
||||
- (3,'baz','ivm_user'),
|
||||
- (4,'qux','postgres');
|
||||
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
|
||||
-
|
||||
---- combination of diffent kinds of commands
|
||||
-WITH
|
||||
- i AS (INSERT INTO rls_tbl VALUES(5,'quux','postgres'), (6,'corge','ivm_user')),
|
||||
- u AS (UPDATE rls_tbl SET owner = 'postgres' WHERE id = 1),
|
||||
- u2 AS (UPDATE rls_tbl SET owner = 'ivm_user' WHERE id = 2)
|
||||
-SELECT;
|
||||
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
|
||||
-
|
||||
----
|
||||
-SET SESSION AUTHORIZATION ivm_user;
|
||||
-SELECT create_immv('ivm_rls2', 'SELECT * FROM rls_tbl JOIN num_tbl USING(id)');
|
||||
-RESET SESSION AUTHORIZATION;
|
||||
-
|
||||
-WITH
|
||||
- x AS (UPDATE rls_tbl SET data = data || '_2' where id in (3,4)),
|
||||
- y AS (UPDATE num_tbl SET num = num || '_2' where id in (3,4))
|
||||
-SELECT;
|
||||
-SELECT * FROM ivm_rls2 ORDER BY 1,2,3;
|
||||
-
|
||||
-DROP TABLE rls_tbl CASCADE;
|
||||
-DROP TABLE num_tbl CASCADE;
|
||||
-
|
||||
-DROP USER ivm_user;
|
||||
-DROP USER ivm_admin;
|
||||
-
|
||||
-- automatic index creation
|
||||
BEGIN;
|
||||
CREATE TABLE base_a (i int primary key, j int);
|
||||
@@ -1,13 +1,8 @@
|
||||
EXTENSION = pg_jsonschema
|
||||
DATA = pg_jsonschema--1.0.sql
|
||||
REGRESS = jsonschema_valid_api jsonschema_edge_cases
|
||||
REGRESS_OPTS = --load-extension=pg_jsonschema
|
||||
|
||||
PG_CONFIG ?= pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
|
||||
.PHONY installcheck:
|
||||
installcheck:
|
||||
dropdb --if-exists contrib_regression
|
||||
createdb contrib_regression
|
||||
psql -d contrib_regression -c "CREATE EXTENSION $(EXTENSION)"
|
||||
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)
|
||||
include $(PGXS)
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression roaringbitmap
|
||||
@@ -1,12 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
# For v16 it's required to create a type which is impossible without superuser access
|
||||
# do not run this test so far
|
||||
if [[ "${PG_VERSION}" = v16 ]]; then
|
||||
exit 0
|
||||
fi
|
||||
cd "$(dirname ${0})"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --dbname=contrib_regression base corpus
|
||||
@@ -6,10 +6,4 @@ export PGOPTIONS = -c pg_session_jwt.jwk={"crv":"Ed25519","kty":"OKP","x":"R_Abz
|
||||
|
||||
PG_CONFIG ?= pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
|
||||
.PHONY installcheck:
|
||||
installcheck:
|
||||
dropdb --if-exists contrib_regression
|
||||
createdb contrib_regression
|
||||
psql -d contrib_regression -c "CREATE EXTENSION $(EXTENSION)"
|
||||
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)
|
||||
include $(PGXS)
|
||||
@@ -5,6 +5,4 @@ REGRESS = pg_tiktoken
|
||||
installcheck: regression-test
|
||||
|
||||
regression-test:
|
||||
dropdb --if-exists contrib_regression
|
||||
createdb contrib_regression
|
||||
$(PG_REGRESS) --inputdir=. --outputdir=. --use-existing --dbname=contrib_regression $(REGRESS)
|
||||
$(PG_REGRESS) --inputdir=. --outputdir=. --dbname=contrib_regression $(REGRESS)
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname "${0}")"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --dbname=contrib_regression 001_setup 002_uuid_generate_v7 003_uuid_v7_to_timestamptz 004_uuid_timestamptz_to_v7 005_uuid_v7_to_timestamp 006_uuid_timestamp_to_v7
|
||||
@@ -1,6 +1,4 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
cd "$(dirname "${0}")"
|
||||
dropdb --if-exists contrib_regression
|
||||
createdb contrib_regression
|
||||
pg_prove -d contrib_regression test.sql
|
||||
pg_prove test.sql
|
||||
@@ -1,8 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname "${0}")"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --load-extension=vector --load-extension=rag --dbname=contrib_regression basic_functions text_processing api_keys chunking_functions document_processing embedding_api_functions voyageai_functions
|
||||
@@ -1,10 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
make installcheck || true
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
sed -i '/hastap/d' test/build/run.sch
|
||||
sed -Ei 's/\b(aretap|enumtap|ownership|privs|usergroup)\b//g' test/build/run.sch
|
||||
${PG_REGRESS} --use-existing --dbname=contrib_regression --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --max-connections=879 --schedule test/schedule/main.sch --schedule test/build/run.sch
|
||||
@@ -1,8 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
psql -d contrib_regression -c "CREATE EXTENSION vector"
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --use-existing --dbname=contrib_regression bit btree cast copy halfvec hnsw_bit hnsw_halfvec hnsw_sparsevec hnsw_vector ivfflat_bit ivfflat_halfvec ivfflat_vector sparsevec vector_type
|
||||
@@ -4,21 +4,13 @@ PGFILEDESC = "pgx_ulid - ULID type for PostgreSQL"
|
||||
|
||||
PG_CONFIG ?= pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
PG_REGRESS = $(dir $(PGXS))/../../src/test/regress/pg_regress
|
||||
PG_MAJOR_VERSION := $(word 2, $(subst ., , $(shell $(PG_CONFIG) --version)))
|
||||
ifeq ($(shell test $(PG_MAJOR_VERSION) -lt 17; echo $$?),0)
|
||||
REGRESS_OPTS = --load-extension=ulid
|
||||
REGRESS = 00_ulid_generation 01_ulid_conversions 03_ulid_errors
|
||||
EXTNAME = ulid
|
||||
else
|
||||
REGRESS_OPTS = --load-extension=pgx_ulid
|
||||
REGRESS = 00_ulid_generation 01_ulid_conversions 02_ulid_conversions 03_ulid_errors
|
||||
EXTNAME = pgx_ulid
|
||||
endif
|
||||
|
||||
.PHONY: installcheck
|
||||
installcheck: regression-test
|
||||
|
||||
regression-test:
|
||||
dropdb --if-exists contrib_regression
|
||||
createdb contrib_regression
|
||||
psql -d contrib_regression -c "CREATE EXTENSION $(EXTNAME)"
|
||||
$(PG_REGRESS) --inputdir=. --outputdir=. --use-existing --dbname=contrib_regression $(REGRESS)
|
||||
include $(PGXS)
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
REGRESS="$(make -n installcheck | awk '{print substr($0,index($0,"init-extension"));}')"
|
||||
REGRESS="${REGRESS/startup_perms/}"
|
||||
REGRESS="${REGRESS/startup /}"
|
||||
REGRESS="${REGRESS/find_function_perms/}"
|
||||
REGRESS="${REGRESS/guc/}"
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression ${REGRESS}
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression extension tables unit binary unicode prefix units time temperature functions language_functions round derived compare aggregate iec custom crosstab convert
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression create_extension prefix falcon explain queries
|
||||
@@ -3,13 +3,8 @@ MODULE_big = rag_bge_small_en_v15
|
||||
OBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs))
|
||||
|
||||
REGRESS = basic_functions embedding_functions basic_functions_enhanced embedding_functions_enhanced
|
||||
REGRESS_OPTS = --load-extension=vector --load-extension=rag_bge_small_en_v15
|
||||
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
|
||||
.PHONY installcheck:
|
||||
installcheck:
|
||||
dropdb --if-exists contrib_regression
|
||||
createdb contrib_regression
|
||||
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag_bge_small_en_v15"
|
||||
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)
|
||||
include $(PGXS)
|
||||
|
||||
@@ -3,13 +3,8 @@ MODULE_big = rag_jina_reranker_v1_tiny_en
|
||||
OBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs))
|
||||
|
||||
REGRESS = reranking_functions reranking_functions_enhanced
|
||||
REGRESS_OPTS = --load-extension=vector --load-extension=rag_jina_reranker_v1_tiny_en
|
||||
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
|
||||
.PHONY installcheck:
|
||||
installcheck:
|
||||
dropdb --if-exists contrib_regression
|
||||
createdb contrib_regression
|
||||
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag_jina_reranker_v1_tiny_en"
|
||||
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)
|
||||
include $(PGXS)
|
||||
|
||||
@@ -1,27 +1,25 @@
|
||||
-- Reranking function tests
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
|
||||
round
|
||||
--------
|
||||
0.8989
|
||||
(1 row)
|
||||
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
|
||||
array
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
|
||||
rerank_distance
|
||||
-----------------
|
||||
{0.8989,1.3018}
|
||||
0.8989152
|
||||
(1 row)
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
|
||||
round
|
||||
---------
|
||||
-0.8989
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
|
||||
rerank_distance
|
||||
-----------------------
|
||||
{0.8989152,1.3018152}
|
||||
(1 row)
|
||||
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) as x);
|
||||
array
|
||||
-------------------
|
||||
{-0.8989,-1.3018}
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
|
||||
rerank_score
|
||||
--------------
|
||||
-0.8989152
|
||||
(1 row)
|
||||
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
|
||||
rerank_score
|
||||
-------------------------
|
||||
{-0.8989152,-1.3018152}
|
||||
(1 row)
|
||||
|
||||
|
||||
@@ -1,41 +1,41 @@
|
||||
-- Reranking function tests - single passage
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
|
||||
round
|
||||
--------
|
||||
0.8989
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
|
||||
rerank_distance
|
||||
-----------------
|
||||
0.8989152
|
||||
(1 row)
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);
|
||||
round
|
||||
--------
|
||||
1.3018
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings');
|
||||
rerank_distance
|
||||
-----------------
|
||||
1.3018152
|
||||
(1 row)
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines')::NUMERIC,4);
|
||||
round
|
||||
--------
|
||||
1.3133
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines');
|
||||
rerank_distance
|
||||
-----------------
|
||||
1.3133051
|
||||
(1 row)
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test')::NUMERIC,4);
|
||||
round
|
||||
--------
|
||||
0.7076
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test');
|
||||
rerank_distance
|
||||
-----------------
|
||||
0.7075559
|
||||
(1 row)
|
||||
|
||||
-- Reranking function tests - array of passages
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
|
||||
array
|
||||
-----------------
|
||||
{0.8989,1.3018}
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
|
||||
rerank_distance
|
||||
-----------------------
|
||||
{0.8989152,1.3018152}
|
||||
(1 row)
|
||||
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
|
||||
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);
|
||||
array
|
||||
------------------------
|
||||
{0.1659,0.3348,0.1013}
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
|
||||
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
|
||||
rerank_distance
|
||||
------------------------------------
|
||||
{0.16591403,0.33475375,0.10132827}
|
||||
(1 row)
|
||||
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]::text[]);
|
||||
@@ -45,43 +45,43 @@ SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]:
|
||||
(1 row)
|
||||
|
||||
-- Reranking score function tests - single passage
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
|
||||
round
|
||||
---------
|
||||
-0.8989
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
|
||||
rerank_score
|
||||
--------------
|
||||
-0.8989152
|
||||
(1 row)
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);
|
||||
round
|
||||
---------
|
||||
-1.3018
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings');
|
||||
rerank_score
|
||||
--------------
|
||||
-1.3018152
|
||||
(1 row)
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines')::NUMERIC,4);
|
||||
round
|
||||
---------
|
||||
-1.3133
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines');
|
||||
rerank_score
|
||||
--------------
|
||||
-1.3133051
|
||||
(1 row)
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test')::NUMERIC,4);
|
||||
round
|
||||
---------
|
||||
-0.7076
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test');
|
||||
rerank_score
|
||||
--------------
|
||||
-0.7075559
|
||||
(1 row)
|
||||
|
||||
-- Reranking score function tests - array of passages
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
|
||||
array
|
||||
-------------------
|
||||
{-0.8989,-1.3018}
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
|
||||
rerank_score
|
||||
-------------------------
|
||||
{-0.8989152,-1.3018152}
|
||||
(1 row)
|
||||
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
|
||||
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);
|
||||
array
|
||||
---------------------------
|
||||
{-0.1659,-0.3348,-0.1013}
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
|
||||
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
|
||||
rerank_score
|
||||
---------------------------------------
|
||||
{-0.16591403,-0.33475375,-0.10132827}
|
||||
(1 row)
|
||||
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('empty array test', ARRAY[]::text[]);
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
-- Reranking function tests
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
|
||||
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
|
||||
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) as x);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
|
||||
@@ -1,35 +1,35 @@
|
||||
-- Reranking function tests - single passage
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings');
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines')::NUMERIC,4);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines');
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test')::NUMERIC,4);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test');
|
||||
|
||||
-- Reranking function tests - array of passages
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
|
||||
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
|
||||
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
|
||||
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
|
||||
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]::text[]);
|
||||
|
||||
-- Reranking score function tests - single passage
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings');
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines')::NUMERIC,4);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines');
|
||||
|
||||
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test')::NUMERIC,4);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test');
|
||||
|
||||
-- Reranking score function tests - array of passages
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
|
||||
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
|
||||
|
||||
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
|
||||
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
|
||||
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
|
||||
|
||||
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('empty array test', ARRAY[]::text[]);
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -ex
|
||||
cd "$(dirname ${0})"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
|
||||
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression rum rum_hash ruminv timestamp orderby orderby_hash altorder altorder_hash limits int2 int4 int8 float4 float8 money oid time timetz date interval macaddr inet cidr text varchar char bytea bit varbit numeric rum_weight expr array
|
||||
44
docker-compose/run-tests.sh
Executable file → Normal file
44
docker-compose/run-tests.sh
Executable file → Normal file
@@ -1,42 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
if [[ -v BENCHMARK_CONNSTR ]]; then
|
||||
uri_no_proto="${BENCHMARK_CONNSTR#postgres://}"
|
||||
uri_no_proto="${uri_no_proto#postgresql://}"
|
||||
if [[ $uri_no_proto == *\?* ]]; then
|
||||
base="${uri_no_proto%%\?*}" # before '?'
|
||||
else
|
||||
base="$uri_no_proto"
|
||||
fi
|
||||
if [[ $base =~ ^([^:]+):([^@]+)@([^:/]+):?([0-9]*)/(.+)$ ]]; then
|
||||
export PGUSER="${BASH_REMATCH[1]}"
|
||||
export PGPASSWORD="${BASH_REMATCH[2]}"
|
||||
export PGHOST="${BASH_REMATCH[3]}"
|
||||
export PGPORT="${BASH_REMATCH[4]:-5432}"
|
||||
export PGDATABASE="${BASH_REMATCH[5]}"
|
||||
echo export PGUSER="${BASH_REMATCH[1]}"
|
||||
echo export PGPASSWORD="${BASH_REMATCH[2]}"
|
||||
echo export PGHOST="${BASH_REMATCH[3]}"
|
||||
echo export PGPORT="${BASH_REMATCH[4]:-5432}"
|
||||
echo export PGDATABASE="${BASH_REMATCH[5]}"
|
||||
else
|
||||
echo "Invalid PostgreSQL base URI"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
REGULAR_USER=false
|
||||
while getopts r arg; do
|
||||
case $arg in
|
||||
r)
|
||||
REGULAR_USER=true
|
||||
shift $((OPTIND-1))
|
||||
;;
|
||||
*) :
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
extdir=${1}
|
||||
|
||||
cd "${extdir}" || exit 2
|
||||
@@ -48,11 +12,6 @@ for d in ${LIST}; do
|
||||
FAILED="${d} ${FAILED}"
|
||||
break
|
||||
fi
|
||||
if [[ ${REGULAR_USER} = true ]] && [ -f "${d}"/regular-test.sh ]; then
|
||||
"${d}/regular-test.sh" || FAILED="${d} ${FAILED}"
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ -f "${d}/neon-test.sh" ]; then
|
||||
"${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
|
||||
else
|
||||
@@ -60,8 +19,5 @@ for d in ${LIST}; do
|
||||
fi
|
||||
done
|
||||
[ -z "${FAILED}" ] && exit 0
|
||||
for d in ${FAILED}; do
|
||||
cat "$(find $d -name regression.diffs)"
|
||||
done
|
||||
echo "${FAILED}"
|
||||
exit 1
|
||||
|
||||
@@ -13,7 +13,7 @@ For design details see [the RFC](./rfcs/021-metering.md) and [the discussion on
|
||||
batch format is
|
||||
```json
|
||||
|
||||
{ "events" : [metric1, metric2, ...] }
|
||||
{ "events" : [metric1, metric2, ...]]}
|
||||
|
||||
```
|
||||
See metric format examples below.
|
||||
@@ -49,13 +49,11 @@ Size of the remote storage (S3) directory.
|
||||
This is an absolute, per-tenant metric.
|
||||
|
||||
- `timeline_logical_size`
|
||||
|
||||
Logical size of the data in the timeline.
|
||||
Logical size of the data in the timeline
|
||||
This is an absolute, per-timeline metric.
|
||||
|
||||
- `synthetic_storage_size`
|
||||
|
||||
Size of all tenant's branches including WAL.
|
||||
Size of all tenant's branches including WAL
|
||||
This is the same metric that `tenant/{tenant_id}/size` endpoint returns.
|
||||
This is an absolute, per-tenant metric.
|
||||
|
||||
@@ -108,10 +106,10 @@ This is an incremental, per-endpoint metric.
|
||||
```
|
||||
|
||||
The metric is incremental, so the value is the difference between the current and the previous value.
|
||||
If there is no previous value, the value is the current value and the `start_time` equals `stop_time`.
|
||||
If there is no previous value, the value, the value is the current value and the `start_time` equals `stop_time`.
|
||||
|
||||
### TODO
|
||||
|
||||
- [ ] Handle errors better: currently if one tenant fails to gather metrics, the whole iteration fails and metrics are not sent for any tenant.
|
||||
- [ ] Add retries
|
||||
- [ ] Tune the interval
|
||||
- [ ] Tune the interval
|
||||
@@ -181,7 +181,6 @@ pub struct ConfigToml {
|
||||
pub generate_unarchival_heatmap: Option<bool>,
|
||||
pub tracing: Option<Tracing>,
|
||||
pub enable_tls_page_service_api: bool,
|
||||
pub dev_mode: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -658,7 +657,6 @@ impl Default for ConfigToml {
|
||||
generate_unarchival_heatmap: None,
|
||||
tracing: None,
|
||||
enable_tls_page_service_api: false,
|
||||
dev_mode: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,8 +169,6 @@ pub struct TenantDescribeResponseShard {
|
||||
pub is_pending_compute_notification: bool,
|
||||
/// A shard split is currently underway
|
||||
pub is_splitting: bool,
|
||||
/// A timeline is being imported into this tenant
|
||||
pub is_importing: bool,
|
||||
|
||||
pub scheduling_policy: ShardSchedulingPolicy,
|
||||
|
||||
|
||||
@@ -320,35 +320,6 @@ pub struct TimelineCreateRequest {
|
||||
pub mode: TimelineCreateRequestMode,
|
||||
}
|
||||
|
||||
impl TimelineCreateRequest {
|
||||
pub fn mode_tag(&self) -> &'static str {
|
||||
match &self.mode {
|
||||
TimelineCreateRequestMode::Branch { .. } => "branch",
|
||||
TimelineCreateRequestMode::ImportPgdata { .. } => "import",
|
||||
TimelineCreateRequestMode::Bootstrap { .. } => "bootstrap",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_import(&self) -> bool {
|
||||
matches!(self.mode, TimelineCreateRequestMode::ImportPgdata { .. })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ShardImportStatus {
|
||||
InProgress,
|
||||
Done,
|
||||
Error(String),
|
||||
}
|
||||
impl ShardImportStatus {
|
||||
pub fn is_terminal(&self) -> bool {
|
||||
match self {
|
||||
ShardImportStatus::InProgress => false,
|
||||
ShardImportStatus::Done | ShardImportStatus::Error(_) => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage controller specific extensions to [`TimelineInfo`].
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineCreateResponseStorcon {
|
||||
@@ -1803,8 +1774,6 @@ pub struct TopTenantShardsResponse {
|
||||
}
|
||||
|
||||
pub mod virtual_file {
|
||||
use std::sync::LazyLock;
|
||||
|
||||
#[derive(
|
||||
Copy,
|
||||
Clone,
|
||||
@@ -1842,33 +1811,35 @@ pub mod virtual_file {
|
||||
pub enum IoMode {
|
||||
/// Uses buffered IO.
|
||||
Buffered,
|
||||
/// Uses direct IO for reads only.
|
||||
/// Uses direct IO, error out if the operation fails.
|
||||
#[cfg(target_os = "linux")]
|
||||
Direct,
|
||||
/// Use direct IO for reads and writes.
|
||||
#[cfg(target_os = "linux")]
|
||||
DirectRw,
|
||||
}
|
||||
|
||||
impl IoMode {
|
||||
pub fn preferred() -> Self {
|
||||
// The default behavior when running Rust unit tests without any further
|
||||
// flags is to use the newest behavior (DirectRw).
|
||||
// flags is to use the newest behavior if available on the platform (Direct).
|
||||
// The CI uses the following environment variable to unit tests for all
|
||||
// different modes.
|
||||
// NB: the Python regression & perf tests have their own defaults management
|
||||
// that writes pageserver.toml; they do not use this variable.
|
||||
if cfg!(test) {
|
||||
static CACHED: LazyLock<IoMode> = LazyLock::new(|| {
|
||||
use once_cell::sync::Lazy;
|
||||
static CACHED: Lazy<IoMode> = Lazy::new(|| {
|
||||
utils::env::var_serde_json_string(
|
||||
"NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE",
|
||||
)
|
||||
.unwrap_or(
|
||||
.unwrap_or({
|
||||
#[cfg(target_os = "linux")]
|
||||
IoMode::DirectRw,
|
||||
{
|
||||
IoMode::Direct
|
||||
}
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
IoMode::Buffered,
|
||||
)
|
||||
{
|
||||
IoMode::Buffered
|
||||
}
|
||||
})
|
||||
});
|
||||
*CACHED
|
||||
} else {
|
||||
@@ -1885,8 +1856,6 @@ pub mod virtual_file {
|
||||
v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
|
||||
#[cfg(target_os = "linux")]
|
||||
v if v == (IoMode::Direct as u8) => IoMode::Direct,
|
||||
#[cfg(target_os = "linux")]
|
||||
v if v == (IoMode::DirectRw as u8) => IoMode::DirectRw,
|
||||
x => return Err(x),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -4,10 +4,10 @@
|
||||
//! See docs/rfcs/025-generation-numbers.md
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::id::{NodeId, TimelineId};
|
||||
use utils::id::NodeId;
|
||||
|
||||
use crate::controller_api::NodeRegisterRequest;
|
||||
use crate::models::{LocationConfigMode, ShardImportStatus};
|
||||
use crate::models::LocationConfigMode;
|
||||
use crate::shard::TenantShardId;
|
||||
|
||||
/// Upcall message sent by the pageserver to the configured `control_plane_api` on
|
||||
@@ -62,10 +62,3 @@ pub struct ValidateResponseTenant {
|
||||
pub id: TenantShardId,
|
||||
pub valid: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct PutTimelineImportStatusRequest {
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub status: ShardImportStatus,
|
||||
}
|
||||
|
||||
@@ -14,9 +14,8 @@ use anyhow::{Context, Result};
|
||||
use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
|
||||
use azure_core::{Continuable, HttpClient, RetryOptions, TransportOptions};
|
||||
use azure_storage::StorageCredentials;
|
||||
use azure_storage_blobs::blob::CopyStatus;
|
||||
use azure_storage_blobs::blob::operations::GetBlobBuilder;
|
||||
use azure_storage_blobs::blob::{Blob, CopyStatus};
|
||||
use azure_storage_blobs::container::operations::ListBlobsBuilder;
|
||||
use azure_storage_blobs::prelude::{ClientBuilder, ContainerClient};
|
||||
use bytes::Bytes;
|
||||
use futures::FutureExt;
|
||||
@@ -254,15 +253,53 @@ impl AzureBlobStorage {
|
||||
download
|
||||
}
|
||||
|
||||
fn list_streaming_for_fn<T: Default + ListingCollector>(
|
||||
async fn permit(
|
||||
&self,
|
||||
kind: RequestKind,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<tokio::sync::SemaphorePermit<'_>, Cancelled> {
|
||||
let acquire = self.concurrency_limiter.acquire(kind);
|
||||
|
||||
tokio::select! {
|
||||
permit = acquire => Ok(permit.expect("never closed")),
|
||||
_ = cancel.cancelled() => Err(Cancelled),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn container_name(&self) -> &str {
|
||||
&self.container_name
|
||||
}
|
||||
}
|
||||
|
||||
fn to_azure_metadata(metadata: StorageMetadata) -> Metadata {
|
||||
let mut res = Metadata::new();
|
||||
for (k, v) in metadata.0.into_iter() {
|
||||
res.insert(k, v);
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn to_download_error(error: azure_core::Error) -> DownloadError {
|
||||
if let Some(http_err) = error.as_http_error() {
|
||||
match http_err.status() {
|
||||
StatusCode::NotFound => DownloadError::NotFound,
|
||||
StatusCode::NotModified => DownloadError::Unmodified,
|
||||
StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),
|
||||
_ => DownloadError::Other(anyhow::Error::new(error)),
|
||||
}
|
||||
} else {
|
||||
DownloadError::Other(error.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl RemoteStorage for AzureBlobStorage {
|
||||
fn list_streaming(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
request_kind: RequestKind,
|
||||
customize_builder: impl Fn(ListBlobsBuilder) -> ListBlobsBuilder,
|
||||
) -> impl Stream<Item = Result<T, DownloadError>> {
|
||||
) -> impl Stream<Item = Result<Listing, DownloadError>> {
|
||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||
let list_prefix = prefix.map(|p| self.relative_path_to_name(p)).or_else(|| {
|
||||
self.prefix_in_container.clone().map(|mut s| {
|
||||
@@ -274,7 +311,7 @@ impl AzureBlobStorage {
|
||||
});
|
||||
|
||||
async_stream::stream! {
|
||||
let _permit = self.permit(request_kind, cancel).await?;
|
||||
let _permit = self.permit(RequestKind::List, cancel).await?;
|
||||
|
||||
let mut builder = self.client.list_blobs();
|
||||
|
||||
@@ -290,8 +327,6 @@ impl AzureBlobStorage {
|
||||
builder = builder.max_results(MaxResults::new(limit));
|
||||
}
|
||||
|
||||
builder = customize_builder(builder);
|
||||
|
||||
let mut next_marker = None;
|
||||
|
||||
let mut timeout_try_cnt = 1;
|
||||
@@ -347,20 +382,26 @@ impl AzureBlobStorage {
|
||||
break;
|
||||
};
|
||||
|
||||
let mut res = T::default();
|
||||
let mut res = Listing::default();
|
||||
next_marker = entry.continuation();
|
||||
let prefix_iter = entry
|
||||
.blobs
|
||||
.prefixes()
|
||||
.map(|prefix| self.name_to_relative_path(&prefix.name));
|
||||
res.add_prefixes(self, prefix_iter);
|
||||
res.prefixes.extend(prefix_iter);
|
||||
|
||||
let blob_iter = entry
|
||||
.blobs
|
||||
.blobs();
|
||||
.blobs()
|
||||
.map(|k| ListingObject{
|
||||
key: self.name_to_relative_path(&k.name),
|
||||
last_modified: k.properties.last_modified.into(),
|
||||
size: k.properties.content_length,
|
||||
}
|
||||
);
|
||||
|
||||
for key in blob_iter {
|
||||
res.add_blob(self, key);
|
||||
res.keys.push(key);
|
||||
|
||||
if let Some(mut mk) = max_keys {
|
||||
assert!(mk > 0);
|
||||
@@ -382,128 +423,6 @@ impl AzureBlobStorage {
|
||||
}
|
||||
}
|
||||
|
||||
async fn permit(
|
||||
&self,
|
||||
kind: RequestKind,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<tokio::sync::SemaphorePermit<'_>, Cancelled> {
|
||||
let acquire = self.concurrency_limiter.acquire(kind);
|
||||
|
||||
tokio::select! {
|
||||
permit = acquire => Ok(permit.expect("never closed")),
|
||||
_ = cancel.cancelled() => Err(Cancelled),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn container_name(&self) -> &str {
|
||||
&self.container_name
|
||||
}
|
||||
}
|
||||
|
||||
trait ListingCollector {
|
||||
fn add_prefixes(&mut self, abs: &AzureBlobStorage, prefix_it: impl Iterator<Item = RemotePath>);
|
||||
fn add_blob(&mut self, abs: &AzureBlobStorage, blob: &Blob);
|
||||
}
|
||||
|
||||
impl ListingCollector for Listing {
|
||||
fn add_prefixes(
|
||||
&mut self,
|
||||
_abs: &AzureBlobStorage,
|
||||
prefix_it: impl Iterator<Item = RemotePath>,
|
||||
) {
|
||||
self.prefixes.extend(prefix_it);
|
||||
}
|
||||
fn add_blob(&mut self, abs: &AzureBlobStorage, blob: &Blob) {
|
||||
self.keys.push(ListingObject {
|
||||
key: abs.name_to_relative_path(&blob.name),
|
||||
last_modified: blob.properties.last_modified.into(),
|
||||
size: blob.properties.content_length,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl ListingCollector for crate::VersionListing {
|
||||
fn add_prefixes(
|
||||
&mut self,
|
||||
_abs: &AzureBlobStorage,
|
||||
_prefix_it: impl Iterator<Item = RemotePath>,
|
||||
) {
|
||||
// nothing
|
||||
}
|
||||
fn add_blob(&mut self, abs: &AzureBlobStorage, blob: &Blob) {
|
||||
let id = crate::VersionId(blob.version_id.clone().expect("didn't find version ID"));
|
||||
self.versions.push(crate::Version {
|
||||
key: abs.name_to_relative_path(&blob.name),
|
||||
last_modified: blob.properties.last_modified.into(),
|
||||
kind: crate::VersionKind::Version(id),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn to_azure_metadata(metadata: StorageMetadata) -> Metadata {
|
||||
let mut res = Metadata::new();
|
||||
for (k, v) in metadata.0.into_iter() {
|
||||
res.insert(k, v);
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn to_download_error(error: azure_core::Error) -> DownloadError {
|
||||
if let Some(http_err) = error.as_http_error() {
|
||||
match http_err.status() {
|
||||
StatusCode::NotFound => DownloadError::NotFound,
|
||||
StatusCode::NotModified => DownloadError::Unmodified,
|
||||
StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),
|
||||
_ => DownloadError::Other(anyhow::Error::new(error)),
|
||||
}
|
||||
} else {
|
||||
DownloadError::Other(error.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl RemoteStorage for AzureBlobStorage {
|
||||
fn list_streaming(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> impl Stream<Item = Result<Listing, DownloadError>> {
|
||||
let customize_builder = |builder| builder;
|
||||
let kind = RequestKind::ListVersions;
|
||||
self.list_streaming_for_fn(prefix, mode, max_keys, cancel, kind, customize_builder)
|
||||
}
|
||||
|
||||
async fn list_versions(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> std::result::Result<crate::VersionListing, DownloadError> {
|
||||
let customize_builder = |mut builder: ListBlobsBuilder| {
|
||||
builder = builder.include_versions(true);
|
||||
builder
|
||||
};
|
||||
let kind = RequestKind::ListVersions;
|
||||
|
||||
let mut stream = std::pin::pin!(self.list_streaming_for_fn(
|
||||
prefix,
|
||||
mode,
|
||||
max_keys,
|
||||
cancel,
|
||||
kind,
|
||||
customize_builder
|
||||
));
|
||||
let mut combined: crate::VersionListing =
|
||||
stream.next().await.expect("At least one item required")?;
|
||||
while let Some(list) = stream.next().await {
|
||||
let list = list?;
|
||||
combined.versions.extend(list.versions.into_iter());
|
||||
}
|
||||
Ok(combined)
|
||||
}
|
||||
|
||||
async fn head_object(
|
||||
&self,
|
||||
key: &RemotePath,
|
||||
@@ -613,12 +532,7 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
let mut builder = blob_client.get();
|
||||
|
||||
if let Some(ref etag) = opts.etag {
|
||||
builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()));
|
||||
}
|
||||
|
||||
if let Some(ref version_id) = opts.version_id {
|
||||
let version_id = azure_storage_blobs::prelude::VersionId::new(version_id.0.clone());
|
||||
builder = builder.blob_versioning(version_id);
|
||||
builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()))
|
||||
}
|
||||
|
||||
if let Some((start, end)) = opts.byte_range() {
|
||||
|
||||
@@ -176,32 +176,6 @@ pub struct Listing {
|
||||
pub keys: Vec<ListingObject>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct VersionListing {
|
||||
pub versions: Vec<Version>,
|
||||
}
|
||||
|
||||
pub struct Version {
|
||||
pub key: RemotePath,
|
||||
pub last_modified: SystemTime,
|
||||
pub kind: VersionKind,
|
||||
}
|
||||
|
||||
impl Version {
|
||||
pub fn version_id(&self) -> Option<&VersionId> {
|
||||
match &self.kind {
|
||||
VersionKind::Version(id) => Some(id),
|
||||
VersionKind::DeletionMarker => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum VersionKind {
|
||||
DeletionMarker,
|
||||
Version(VersionId),
|
||||
}
|
||||
|
||||
/// Options for downloads. The default value is a plain GET.
|
||||
pub struct DownloadOpts {
|
||||
/// If given, returns [`DownloadError::Unmodified`] if the object still has
|
||||
@@ -212,8 +186,6 @@ pub struct DownloadOpts {
|
||||
/// The end of the byte range to download, or unbounded. Must be after the
|
||||
/// start bound.
|
||||
pub byte_end: Bound<u64>,
|
||||
/// Optionally request a specific version of a key
|
||||
pub version_id: Option<VersionId>,
|
||||
/// Indicate whether we're downloading something small or large: this indirectly controls
|
||||
/// timeouts: for something like an index/manifest/heatmap, we should time out faster than
|
||||
/// for layer files
|
||||
@@ -225,16 +197,12 @@ pub enum DownloadKind {
|
||||
Small,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct VersionId(pub String);
|
||||
|
||||
impl Default for DownloadOpts {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
etag: Default::default(),
|
||||
byte_start: Bound::Unbounded,
|
||||
byte_end: Bound::Unbounded,
|
||||
version_id: None,
|
||||
kind: DownloadKind::Large,
|
||||
}
|
||||
}
|
||||
@@ -327,14 +295,6 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
||||
Ok(combined)
|
||||
}
|
||||
|
||||
async fn list_versions(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<VersionListing, DownloadError>;
|
||||
|
||||
/// Obtain metadata information about an object.
|
||||
async fn head_object(
|
||||
&self,
|
||||
@@ -515,22 +475,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
||||
}
|
||||
}
|
||||
|
||||
// See [`RemoteStorage::list_versions`].
|
||||
pub async fn list_versions<'a>(
|
||||
&'a self,
|
||||
prefix: Option<&'a RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &'a CancellationToken,
|
||||
) -> Result<VersionListing, DownloadError> {
|
||||
match self {
|
||||
Self::LocalFs(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
|
||||
Self::AwsS3(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
|
||||
Self::AzureBlob(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
|
||||
Self::Unreliable(s) => s.list_versions(prefix, mode, max_keys, cancel).await,
|
||||
}
|
||||
}
|
||||
|
||||
// See [`RemoteStorage::head_object`].
|
||||
pub async fn head_object(
|
||||
&self,
|
||||
@@ -783,7 +727,6 @@ impl ConcurrencyLimiter {
|
||||
RequestKind::Copy => &self.write,
|
||||
RequestKind::TimeTravel => &self.write,
|
||||
RequestKind::Head => &self.read,
|
||||
RequestKind::ListVersions => &self.read,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -445,16 +445,6 @@ impl RemoteStorage for LocalFs {
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_versions(
|
||||
&self,
|
||||
_prefix: Option<&RemotePath>,
|
||||
_mode: ListingMode,
|
||||
_max_keys: Option<NonZeroU32>,
|
||||
_cancel: &CancellationToken,
|
||||
) -> Result<crate::VersionListing, DownloadError> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn head_object(
|
||||
&self,
|
||||
key: &RemotePath,
|
||||
|
||||
@@ -14,7 +14,6 @@ pub(crate) enum RequestKind {
|
||||
Copy = 4,
|
||||
TimeTravel = 5,
|
||||
Head = 6,
|
||||
ListVersions = 7,
|
||||
}
|
||||
|
||||
use RequestKind::*;
|
||||
@@ -30,7 +29,6 @@ impl RequestKind {
|
||||
Copy => "copy_object",
|
||||
TimeTravel => "time_travel_recover",
|
||||
Head => "head_object",
|
||||
ListVersions => "list_versions",
|
||||
}
|
||||
}
|
||||
const fn as_index(&self) -> usize {
|
||||
@@ -38,10 +36,7 @@ impl RequestKind {
|
||||
}
|
||||
}
|
||||
|
||||
const REQUEST_KIND_LIST: &[RequestKind] =
|
||||
&[Get, Put, Delete, List, Copy, TimeTravel, Head, ListVersions];
|
||||
|
||||
const REQUEST_KIND_COUNT: usize = REQUEST_KIND_LIST.len();
|
||||
const REQUEST_KIND_COUNT: usize = 7;
|
||||
pub(crate) struct RequestTyped<C>([C; REQUEST_KIND_COUNT]);
|
||||
|
||||
impl<C> RequestTyped<C> {
|
||||
@@ -50,11 +45,12 @@ impl<C> RequestTyped<C> {
|
||||
}
|
||||
|
||||
fn build_with(mut f: impl FnMut(RequestKind) -> C) -> Self {
|
||||
let mut it = REQUEST_KIND_LIST.iter();
|
||||
use RequestKind::*;
|
||||
let mut it = [Get, Put, Delete, List, Copy, TimeTravel, Head].into_iter();
|
||||
let arr = std::array::from_fn::<C, REQUEST_KIND_COUNT, _>(|index| {
|
||||
let next = it.next().unwrap();
|
||||
assert_eq!(index, next.as_index());
|
||||
f(*next)
|
||||
f(next)
|
||||
});
|
||||
|
||||
if let Some(next) = it.next() {
|
||||
|
||||
@@ -21,8 +21,9 @@ use aws_sdk_s3::config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep};
|
||||
use aws_sdk_s3::error::SdkError;
|
||||
use aws_sdk_s3::operation::get_object::GetObjectError;
|
||||
use aws_sdk_s3::operation::head_object::HeadObjectError;
|
||||
use aws_sdk_s3::types::{Delete, ObjectIdentifier, StorageClass};
|
||||
use aws_sdk_s3::types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass};
|
||||
use aws_smithy_async::rt::sleep::TokioSleep;
|
||||
use aws_smithy_types::DateTime;
|
||||
use aws_smithy_types::body::SdkBody;
|
||||
use aws_smithy_types::byte_stream::ByteStream;
|
||||
use aws_smithy_types::date_time::ConversionError;
|
||||
@@ -45,7 +46,7 @@ use crate::support::PermitCarrying;
|
||||
use crate::{
|
||||
ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
|
||||
MAX_KEYS_PER_DELETE_S3, REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, RemoteStorage,
|
||||
TimeTravelError, TimeoutOrCancel, Version, VersionId, VersionKind, VersionListing,
|
||||
TimeTravelError, TimeoutOrCancel,
|
||||
};
|
||||
|
||||
/// AWS S3 storage.
|
||||
@@ -65,7 +66,6 @@ struct GetObjectRequest {
|
||||
key: String,
|
||||
etag: Option<String>,
|
||||
range: Option<String>,
|
||||
version_id: Option<String>,
|
||||
}
|
||||
impl S3Bucket {
|
||||
/// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
|
||||
@@ -251,7 +251,6 @@ impl S3Bucket {
|
||||
.get_object()
|
||||
.bucket(request.bucket)
|
||||
.key(request.key)
|
||||
.set_version_id(request.version_id)
|
||||
.set_range(request.range);
|
||||
|
||||
if let Some(etag) = request.etag {
|
||||
@@ -406,124 +405,6 @@ impl S3Bucket {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list_versions_with_permit(
|
||||
&self,
|
||||
_permit: &tokio::sync::SemaphorePermit<'_>,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<crate::VersionListing, DownloadError> {
|
||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||
let prefix = prefix
|
||||
.map(|p| self.relative_path_to_s3_object(p))
|
||||
.or_else(|| self.prefix_in_bucket.clone());
|
||||
|
||||
let warn_threshold = 3;
|
||||
let max_retries = 10;
|
||||
let is_permanent = |e: &_| matches!(e, DownloadError::Cancelled);
|
||||
|
||||
let mut key_marker = None;
|
||||
let mut version_id_marker = None;
|
||||
let mut versions_and_deletes = Vec::new();
|
||||
|
||||
loop {
|
||||
let response = backoff::retry(
|
||||
|| async {
|
||||
let mut request = self
|
||||
.client
|
||||
.list_object_versions()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.set_prefix(prefix.clone())
|
||||
.set_key_marker(key_marker.clone())
|
||||
.set_version_id_marker(version_id_marker.clone());
|
||||
|
||||
if let ListingMode::WithDelimiter = mode {
|
||||
request = request.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string());
|
||||
}
|
||||
|
||||
let op = request.send();
|
||||
|
||||
tokio::select! {
|
||||
res = op => res.map_err(|e| DownloadError::Other(e.into())),
|
||||
_ = cancel.cancelled() => Err(DownloadError::Cancelled),
|
||||
}
|
||||
},
|
||||
is_permanent,
|
||||
warn_threshold,
|
||||
max_retries,
|
||||
"listing object versions",
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
.ok_or_else(|| DownloadError::Cancelled)
|
||||
.and_then(|x| x)?;
|
||||
|
||||
tracing::trace!(
|
||||
" Got List response version_id_marker={:?}, key_marker={:?}",
|
||||
response.version_id_marker,
|
||||
response.key_marker
|
||||
);
|
||||
let versions = response
|
||||
.versions
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|version| {
|
||||
let key = version.key.expect("response does not contain a key");
|
||||
let key = self.s3_object_to_relative_path(&key);
|
||||
let version_id = VersionId(version.version_id.expect("needing version id"));
|
||||
let last_modified =
|
||||
SystemTime::try_from(version.last_modified.expect("no last_modified"))?;
|
||||
Ok(Version {
|
||||
key,
|
||||
last_modified,
|
||||
kind: crate::VersionKind::Version(version_id),
|
||||
})
|
||||
});
|
||||
let deletes = response
|
||||
.delete_markers
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|version| {
|
||||
let key = version.key.expect("response does not contain a key");
|
||||
let key = self.s3_object_to_relative_path(&key);
|
||||
let last_modified =
|
||||
SystemTime::try_from(version.last_modified.expect("no last_modified"))?;
|
||||
Ok(Version {
|
||||
key,
|
||||
last_modified,
|
||||
kind: crate::VersionKind::DeletionMarker,
|
||||
})
|
||||
});
|
||||
itertools::process_results(versions.chain(deletes), |n_vds| {
|
||||
versions_and_deletes.extend(n_vds)
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
fn none_if_empty(v: Option<String>) -> Option<String> {
|
||||
v.filter(|v| !v.is_empty())
|
||||
}
|
||||
version_id_marker = none_if_empty(response.next_version_id_marker);
|
||||
key_marker = none_if_empty(response.next_key_marker);
|
||||
if version_id_marker.is_none() {
|
||||
// The final response is not supposed to be truncated
|
||||
if response.is_truncated.unwrap_or_default() {
|
||||
return Err(DownloadError::Other(anyhow::anyhow!(
|
||||
"Received truncated ListObjectVersions response for prefix={prefix:?}"
|
||||
)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
if let Some(max_keys) = max_keys {
|
||||
if versions_and_deletes.len() >= max_keys.get().try_into().unwrap() {
|
||||
return Err(DownloadError::Other(anyhow::anyhow!("too many versions")));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(VersionListing {
|
||||
versions: versions_and_deletes,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn bucket_name(&self) -> &str {
|
||||
&self.bucket_name
|
||||
}
|
||||
@@ -740,19 +621,6 @@ impl RemoteStorage for S3Bucket {
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_versions(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<crate::VersionListing, DownloadError> {
|
||||
let kind = RequestKind::ListVersions;
|
||||
let permit = self.permit(kind, cancel).await?;
|
||||
self.list_versions_with_permit(&permit, prefix, mode, max_keys, cancel)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn head_object(
|
||||
&self,
|
||||
key: &RemotePath,
|
||||
@@ -933,7 +801,6 @@ impl RemoteStorage for S3Bucket {
|
||||
key: self.relative_path_to_s3_object(from),
|
||||
etag: opts.etag.as_ref().map(|e| e.to_string()),
|
||||
range: opts.byte_range_header(),
|
||||
version_id: opts.version_id.as_ref().map(|v| v.0.to_owned()),
|
||||
},
|
||||
cancel,
|
||||
)
|
||||
@@ -978,25 +845,94 @@ impl RemoteStorage for S3Bucket {
|
||||
let kind = RequestKind::TimeTravel;
|
||||
let permit = self.permit(kind, cancel).await?;
|
||||
|
||||
let timestamp = DateTime::from(timestamp);
|
||||
let done_if_after = DateTime::from(done_if_after);
|
||||
|
||||
tracing::trace!("Target time: {timestamp:?}, done_if_after {done_if_after:?}");
|
||||
|
||||
// Limit the number of versions deletions, mostly so that we don't
|
||||
// keep requesting forever if the list is too long, as we'd put the
|
||||
// list in RAM.
|
||||
// Building a list of 100k entries that reaches the limit roughly takes
|
||||
// 40 seconds, and roughly corresponds to tenants of 2 TiB physical size.
|
||||
const COMPLEXITY_LIMIT: Option<NonZeroU32> = NonZeroU32::new(100_000);
|
||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||
let prefix = prefix
|
||||
.map(|p| self.relative_path_to_s3_object(p))
|
||||
.or_else(|| self.prefix_in_bucket.clone());
|
||||
|
||||
let mode = ListingMode::NoDelimiter;
|
||||
let version_listing = self
|
||||
.list_versions_with_permit(&permit, prefix, mode, COMPLEXITY_LIMIT, cancel)
|
||||
let warn_threshold = 3;
|
||||
let max_retries = 10;
|
||||
let is_permanent = |e: &_| matches!(e, TimeTravelError::Cancelled);
|
||||
|
||||
let mut key_marker = None;
|
||||
let mut version_id_marker = None;
|
||||
let mut versions_and_deletes = Vec::new();
|
||||
|
||||
loop {
|
||||
let response = backoff::retry(
|
||||
|| async {
|
||||
let op = self
|
||||
.client
|
||||
.list_object_versions()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.set_prefix(prefix.clone())
|
||||
.set_key_marker(key_marker.clone())
|
||||
.set_version_id_marker(version_id_marker.clone())
|
||||
.send();
|
||||
|
||||
tokio::select! {
|
||||
res = op => res.map_err(|e| TimeTravelError::Other(e.into())),
|
||||
_ = cancel.cancelled() => Err(TimeTravelError::Cancelled),
|
||||
}
|
||||
},
|
||||
is_permanent,
|
||||
warn_threshold,
|
||||
max_retries,
|
||||
"listing object versions for time_travel_recover",
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
DownloadError::Other(e) => TimeTravelError::Other(e),
|
||||
DownloadError::Cancelled => TimeTravelError::Cancelled,
|
||||
other => TimeTravelError::Other(other.into()),
|
||||
})?;
|
||||
let versions_and_deletes = version_listing.versions;
|
||||
.ok_or_else(|| TimeTravelError::Cancelled)
|
||||
.and_then(|x| x)?;
|
||||
|
||||
tracing::trace!(
|
||||
" Got List response version_id_marker={:?}, key_marker={:?}",
|
||||
response.version_id_marker,
|
||||
response.key_marker
|
||||
);
|
||||
let versions = response
|
||||
.versions
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(VerOrDelete::from_version);
|
||||
let deletes = response
|
||||
.delete_markers
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(VerOrDelete::from_delete_marker);
|
||||
itertools::process_results(versions.chain(deletes), |n_vds| {
|
||||
versions_and_deletes.extend(n_vds)
|
||||
})
|
||||
.map_err(TimeTravelError::Other)?;
|
||||
fn none_if_empty(v: Option<String>) -> Option<String> {
|
||||
v.filter(|v| !v.is_empty())
|
||||
}
|
||||
version_id_marker = none_if_empty(response.next_version_id_marker);
|
||||
key_marker = none_if_empty(response.next_key_marker);
|
||||
if version_id_marker.is_none() {
|
||||
// The final response is not supposed to be truncated
|
||||
if response.is_truncated.unwrap_or_default() {
|
||||
return Err(TimeTravelError::Other(anyhow::anyhow!(
|
||||
"Received truncated ListObjectVersions response for prefix={prefix:?}"
|
||||
)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Limit the number of versions deletions, mostly so that we don't
|
||||
// keep requesting forever if the list is too long, as we'd put the
|
||||
// list in RAM.
|
||||
// Building a list of 100k entries that reaches the limit roughly takes
|
||||
// 40 seconds, and roughly corresponds to tenants of 2 TiB physical size.
|
||||
const COMPLEXITY_LIMIT: usize = 100_000;
|
||||
if versions_and_deletes.len() >= COMPLEXITY_LIMIT {
|
||||
return Err(TimeTravelError::TooManyVersions);
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Built list for time travel with {} versions and deletions",
|
||||
@@ -1012,26 +948,24 @@ impl RemoteStorage for S3Bucket {
|
||||
let mut vds_for_key = HashMap::<_, Vec<_>>::new();
|
||||
|
||||
for vd in &versions_and_deletes {
|
||||
let Version { key, .. } = &vd;
|
||||
let version_id = vd.version_id().map(|v| v.0.as_str());
|
||||
if version_id == Some("null") {
|
||||
let VerOrDelete {
|
||||
version_id, key, ..
|
||||
} = &vd;
|
||||
if version_id == "null" {
|
||||
return Err(TimeTravelError::Other(anyhow!(
|
||||
"Received ListVersions response for key={key} with version_id='null', \
|
||||
indicating either disabled versioning, or legacy objects with null version id values"
|
||||
)));
|
||||
}
|
||||
tracing::trace!("Parsing version key={key} kind={:?}", vd.kind);
|
||||
tracing::trace!(
|
||||
"Parsing version key={key} version_id={version_id} kind={:?}",
|
||||
vd.kind
|
||||
);
|
||||
|
||||
vds_for_key.entry(key).or_default().push(vd);
|
||||
}
|
||||
|
||||
let warn_threshold = 3;
|
||||
let max_retries = 10;
|
||||
let is_permanent = |e: &_| matches!(e, TimeTravelError::Cancelled);
|
||||
|
||||
for (key, versions) in vds_for_key {
|
||||
let last_vd = versions.last().unwrap();
|
||||
let key = self.relative_path_to_s3_object(key);
|
||||
if last_vd.last_modified > done_if_after {
|
||||
tracing::trace!("Key {key} has version later than done_if_after, skipping");
|
||||
continue;
|
||||
@@ -1056,11 +990,11 @@ impl RemoteStorage for S3Bucket {
|
||||
do_delete = true;
|
||||
} else {
|
||||
match &versions[version_to_restore_to - 1] {
|
||||
Version {
|
||||
kind: VersionKind::Version(version_id),
|
||||
VerOrDelete {
|
||||
kind: VerOrDeleteKind::Version,
|
||||
version_id,
|
||||
..
|
||||
} => {
|
||||
let version_id = &version_id.0;
|
||||
tracing::trace!("Copying old version {version_id} for {key}...");
|
||||
// Restore the state to the last version by copying
|
||||
let source_id =
|
||||
@@ -1072,7 +1006,7 @@ impl RemoteStorage for S3Bucket {
|
||||
.client
|
||||
.copy_object()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.key(&key)
|
||||
.key(key)
|
||||
.set_storage_class(self.upload_storage_class.clone())
|
||||
.copy_source(&source_id)
|
||||
.send();
|
||||
@@ -1093,8 +1027,8 @@ impl RemoteStorage for S3Bucket {
|
||||
.and_then(|x| x)?;
|
||||
tracing::info!(%version_id, %key, "Copied old version in S3");
|
||||
}
|
||||
Version {
|
||||
kind: VersionKind::DeletionMarker,
|
||||
VerOrDelete {
|
||||
kind: VerOrDeleteKind::DeleteMarker,
|
||||
..
|
||||
} => {
|
||||
do_delete = true;
|
||||
@@ -1102,7 +1036,7 @@ impl RemoteStorage for S3Bucket {
|
||||
}
|
||||
};
|
||||
if do_delete {
|
||||
if matches!(last_vd.kind, VersionKind::DeletionMarker) {
|
||||
if matches!(last_vd.kind, VerOrDeleteKind::DeleteMarker) {
|
||||
// Key has since been deleted (but there was some history), no need to do anything
|
||||
tracing::trace!("Key {key} already deleted, skipping.");
|
||||
} else {
|
||||
@@ -1130,6 +1064,62 @@ impl RemoteStorage for S3Bucket {
|
||||
}
|
||||
}
|
||||
|
||||
// Save RAM and only store the needed data instead of the entire ObjectVersion/DeleteMarkerEntry
|
||||
struct VerOrDelete {
|
||||
kind: VerOrDeleteKind,
|
||||
last_modified: DateTime,
|
||||
version_id: String,
|
||||
key: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum VerOrDeleteKind {
|
||||
Version,
|
||||
DeleteMarker,
|
||||
}
|
||||
|
||||
impl VerOrDelete {
|
||||
fn with_kind(
|
||||
kind: VerOrDeleteKind,
|
||||
last_modified: Option<DateTime>,
|
||||
version_id: Option<String>,
|
||||
key: Option<String>,
|
||||
) -> anyhow::Result<Self> {
|
||||
let lvk = (last_modified, version_id, key);
|
||||
let (Some(last_modified), Some(version_id), Some(key)) = lvk else {
|
||||
anyhow::bail!(
|
||||
"One (or more) of last_modified, key, and id is None. \
|
||||
Is versioning enabled in the bucket? last_modified={:?}, version_id={:?}, key={:?}",
|
||||
lvk.0,
|
||||
lvk.1,
|
||||
lvk.2,
|
||||
);
|
||||
};
|
||||
Ok(Self {
|
||||
kind,
|
||||
last_modified,
|
||||
version_id,
|
||||
key,
|
||||
})
|
||||
}
|
||||
fn from_version(v: ObjectVersion) -> anyhow::Result<Self> {
|
||||
Self::with_kind(
|
||||
VerOrDeleteKind::Version,
|
||||
v.last_modified,
|
||||
v.version_id,
|
||||
v.key,
|
||||
)
|
||||
}
|
||||
fn from_delete_marker(v: DeleteMarkerEntry) -> anyhow::Result<Self> {
|
||||
Self::with_kind(
|
||||
VerOrDeleteKind::DeleteMarker,
|
||||
v.last_modified,
|
||||
v.version_id,
|
||||
v.key,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
@@ -139,20 +139,6 @@ impl RemoteStorage for UnreliableWrapper {
|
||||
self.inner.list(prefix, mode, max_keys, cancel).await
|
||||
}
|
||||
|
||||
async fn list_versions(
|
||||
&self,
|
||||
prefix: Option<&RemotePath>,
|
||||
mode: ListingMode,
|
||||
max_keys: Option<NonZeroU32>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<crate::VersionListing, DownloadError> {
|
||||
self.attempt(RemoteOp::ListPrefixes(prefix.cloned()))
|
||||
.map_err(DownloadError::Other)?;
|
||||
self.inner
|
||||
.list_versions(prefix, mode, max_keys, cancel)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn head_object(
|
||||
&self,
|
||||
key: &RemotePath,
|
||||
|
||||
@@ -106,7 +106,6 @@ hex-literal.workspace = true
|
||||
tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time", "test-util"] }
|
||||
indoc.workspace = true
|
||||
uuid.workspace = true
|
||||
rstest.workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "bench_layer_map"
|
||||
|
||||
@@ -11,7 +11,6 @@ use pageserver::task_mgr::TaskKind;
|
||||
use pageserver::tenant::storage_layer::InMemoryLayer;
|
||||
use pageserver::{page_cache, virtual_file};
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::models::virtual_file::IoMode;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::value::Value;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -29,7 +28,6 @@ fn murmurhash32(mut h: u32) -> u32 {
|
||||
h
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Copy, Debug)]
|
||||
enum KeyLayout {
|
||||
/// Sequential unique keys
|
||||
Sequential,
|
||||
@@ -39,7 +37,6 @@ enum KeyLayout {
|
||||
RandomReuse(u32),
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Copy, Debug)]
|
||||
enum WriteDelta {
|
||||
Yes,
|
||||
No,
|
||||
@@ -141,15 +138,12 @@ async fn ingest(
|
||||
/// Wrapper to instantiate a tokio runtime
|
||||
fn ingest_main(
|
||||
conf: &'static PageServerConf,
|
||||
io_mode: IoMode,
|
||||
put_size: usize,
|
||||
put_count: usize,
|
||||
key_layout: KeyLayout,
|
||||
write_delta: WriteDelta,
|
||||
) {
|
||||
pageserver::virtual_file::set_io_mode(io_mode);
|
||||
|
||||
let runtime = tokio::runtime::Builder::new_multi_thread()
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap();
|
||||
@@ -180,245 +174,93 @@ fn criterion_benchmark(c: &mut Criterion) {
|
||||
virtual_file::init(
|
||||
16384,
|
||||
virtual_file::io_engine_for_bench(),
|
||||
// immaterial, each `ingest_main` invocation below overrides this
|
||||
conf.virtual_file_io_mode,
|
||||
// without actually doing syncs, buffered writes have an unfair advantage over direct IO writes
|
||||
virtual_file::SyncMode::Sync,
|
||||
);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
struct ExplodedParameters {
|
||||
io_mode: IoMode,
|
||||
volume_mib: usize,
|
||||
key_size: usize,
|
||||
key_layout: KeyLayout,
|
||||
write_delta: WriteDelta,
|
||||
}
|
||||
#[derive(Clone)]
|
||||
struct HandPickedParameters {
|
||||
volume_mib: usize,
|
||||
key_size: usize,
|
||||
key_layout: KeyLayout,
|
||||
write_delta: WriteDelta,
|
||||
}
|
||||
let expect = vec![
|
||||
// Small values (100b) tests
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 100,
|
||||
key_layout: KeyLayout::Sequential,
|
||||
write_delta: WriteDelta::Yes,
|
||||
},
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 100,
|
||||
key_layout: KeyLayout::Random,
|
||||
write_delta: WriteDelta::Yes,
|
||||
},
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 100,
|
||||
key_layout: KeyLayout::RandomReuse(0x3ff),
|
||||
write_delta: WriteDelta::Yes,
|
||||
},
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 100,
|
||||
key_layout: KeyLayout::Sequential,
|
||||
write_delta: WriteDelta::No,
|
||||
},
|
||||
// Large values (8k) tests
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 8192,
|
||||
key_layout: KeyLayout::Sequential,
|
||||
write_delta: WriteDelta::Yes,
|
||||
},
|
||||
HandPickedParameters {
|
||||
volume_mib: 128,
|
||||
key_size: 8192,
|
||||
key_layout: KeyLayout::Sequential,
|
||||
write_delta: WriteDelta::No,
|
||||
},
|
||||
];
|
||||
let exploded_parameters = {
|
||||
let mut out = Vec::new();
|
||||
for io_mode in [
|
||||
IoMode::Buffered,
|
||||
#[cfg(target_os = "linux")]
|
||||
IoMode::Direct,
|
||||
#[cfg(target_os = "linux")]
|
||||
IoMode::DirectRw,
|
||||
] {
|
||||
for param in expect.clone() {
|
||||
let HandPickedParameters {
|
||||
volume_mib,
|
||||
key_size,
|
||||
key_layout,
|
||||
write_delta,
|
||||
} = param;
|
||||
out.push(ExplodedParameters {
|
||||
io_mode,
|
||||
volume_mib,
|
||||
key_size,
|
||||
key_layout,
|
||||
write_delta,
|
||||
});
|
||||
}
|
||||
}
|
||||
out
|
||||
};
|
||||
impl ExplodedParameters {
|
||||
fn benchmark_id(&self) -> String {
|
||||
let ExplodedParameters {
|
||||
io_mode,
|
||||
volume_mib,
|
||||
key_size,
|
||||
key_layout,
|
||||
write_delta,
|
||||
} = self;
|
||||
format!(
|
||||
"io_mode={io_mode:?} volume_mib={volume_mib:?} key_size_bytes={key_size:?} key_layout={key_layout:?} write_delta={write_delta:?}"
|
||||
)
|
||||
}
|
||||
}
|
||||
let mut group = c.benchmark_group("ingest");
|
||||
for params in exploded_parameters {
|
||||
let id = params.benchmark_id();
|
||||
let ExplodedParameters {
|
||||
io_mode,
|
||||
volume_mib,
|
||||
key_size,
|
||||
key_layout,
|
||||
write_delta,
|
||||
} = params;
|
||||
let put_count = volume_mib * 1024 * 1024 / key_size;
|
||||
group.throughput(criterion::Throughput::Bytes((key_size * put_count) as u64));
|
||||
{
|
||||
let mut group = c.benchmark_group("ingest-small-values");
|
||||
let put_size = 100usize;
|
||||
let put_count = 128 * 1024 * 1024 / put_size;
|
||||
group.throughput(criterion::Throughput::Bytes((put_size * put_count) as u64));
|
||||
group.sample_size(10);
|
||||
group.bench_function(id, |b| {
|
||||
b.iter(|| ingest_main(conf, io_mode, key_size, put_count, key_layout, write_delta))
|
||||
group.bench_function("ingest 128MB/100b seq", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::Sequential,
|
||||
WriteDelta::Yes,
|
||||
)
|
||||
})
|
||||
});
|
||||
group.bench_function("ingest 128MB/100b rand", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::Random,
|
||||
WriteDelta::Yes,
|
||||
)
|
||||
})
|
||||
});
|
||||
group.bench_function("ingest 128MB/100b rand-1024keys", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::RandomReuse(0x3ff),
|
||||
WriteDelta::Yes,
|
||||
)
|
||||
})
|
||||
});
|
||||
group.bench_function("ingest 128MB/100b seq, no delta", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::Sequential,
|
||||
WriteDelta::No,
|
||||
)
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
let mut group = c.benchmark_group("ingest-big-values");
|
||||
let put_size = 8192usize;
|
||||
let put_count = 128 * 1024 * 1024 / put_size;
|
||||
group.throughput(criterion::Throughput::Bytes((put_size * put_count) as u64));
|
||||
group.sample_size(10);
|
||||
group.bench_function("ingest 128MB/8k seq", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::Sequential,
|
||||
WriteDelta::Yes,
|
||||
)
|
||||
})
|
||||
});
|
||||
group.bench_function("ingest 128MB/8k seq, no delta", |b| {
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
KeyLayout::Sequential,
|
||||
WriteDelta::No,
|
||||
)
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(benches, criterion_benchmark);
|
||||
criterion_main!(benches);
|
||||
|
||||
/*
|
||||
cargo bench --bench bench_ingest
|
||||
|
||||
im4gn.2xlarge:
|
||||
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [1.2901 s 1.2943 s 1.2991 s]
|
||||
thrpt: [98.533 MiB/s 98.892 MiB/s 99.220 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [2.1387 s 2.1623 s 2.1845 s]
|
||||
thrpt: [58.595 MiB/s 59.197 MiB/s 59.851 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
|
||||
time: [1.2036 s 1.2074 s 1.2122 s]
|
||||
thrpt: [105.60 MiB/s 106.01 MiB/s 106.35 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [520.55 ms 521.46 ms 522.57 ms]
|
||||
thrpt: [244.94 MiB/s 245.47 MiB/s 245.89 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [440.33 ms 442.24 ms 444.10 ms]
|
||||
thrpt: [288.22 MiB/s 289.43 MiB/s 290.69 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [168.78 ms 169.42 ms 170.18 ms]
|
||||
thrpt: [752.16 MiB/s 755.52 MiB/s 758.40 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [1.2978 s 1.3094 s 1.3227 s]
|
||||
thrpt: [96.775 MiB/s 97.758 MiB/s 98.632 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [2.1976 s 2.2067 s 2.2154 s]
|
||||
thrpt: [57.777 MiB/s 58.006 MiB/s 58.245 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Yes
|
||||
time: [1.2103 s 1.2160 s 1.2233 s]
|
||||
thrpt: [104.64 MiB/s 105.26 MiB/s 105.76 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [525.05 ms 526.37 ms 527.79 ms]
|
||||
thrpt: [242.52 MiB/s 243.17 MiB/s 243.79 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [443.06 ms 444.88 ms 447.15 ms]
|
||||
thrpt: [286.26 MiB/s 287.72 MiB/s 288.90 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [169.40 ms 169.80 ms 170.17 ms]
|
||||
thrpt: [752.21 MiB/s 753.81 MiB/s 755.60 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [1.2844 s 1.2915 s 1.2990 s]
|
||||
thrpt: [98.536 MiB/s 99.112 MiB/s 99.657 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [2.1431 s 2.1663 s 2.1900 s]
|
||||
thrpt: [58.446 MiB/s 59.087 MiB/s 59.726 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
|
||||
time: [1.1906 s 1.1926 s 1.1947 s]
|
||||
thrpt: [107.14 MiB/s 107.33 MiB/s 107.51 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [516.86 ms 518.25 ms 519.47 ms]
|
||||
thrpt: [246.40 MiB/s 246.98 MiB/s 247.65 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [536.50 ms 536.53 ms 536.60 ms]
|
||||
thrpt: [238.54 MiB/s 238.57 MiB/s 238.59 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [267.77 ms 267.90 ms 268.04 ms]
|
||||
thrpt: [477.53 MiB/s 477.79 MiB/s 478.02 MiB/s]
|
||||
|
||||
Hetzner AX102:
|
||||
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [836.58 ms 861.93 ms 886.57 ms]
|
||||
thrpt: [144.38 MiB/s 148.50 MiB/s 153.00 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [1.2782 s 1.3191 s 1.3665 s]
|
||||
thrpt: [93.668 MiB/s 97.037 MiB/s 100.14 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
|
||||
time: [791.27 ms 807.08 ms 822.95 ms]
|
||||
thrpt: [155.54 MiB/s 158.60 MiB/s 161.77 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [310.78 ms 314.66 ms 318.47 ms]
|
||||
thrpt: [401.92 MiB/s 406.79 MiB/s 411.87 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [377.11 ms 387.77 ms 399.21 ms]
|
||||
thrpt: [320.63 MiB/s 330.10 MiB/s 339.42 MiB/s]
|
||||
ingest/io_mode=Buffered volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [128.37 ms 132.96 ms 138.55 ms]
|
||||
thrpt: [923.83 MiB/s 962.69 MiB/s 997.11 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [900.38 ms 914.88 ms 928.86 ms]
|
||||
thrpt: [137.80 MiB/s 139.91 MiB/s 142.16 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [1.2538 s 1.2936 s 1.3313 s]
|
||||
thrpt: [96.149 MiB/s 98.946 MiB/s 102.09 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Yes
|
||||
time: [787.17 ms 803.89 ms 820.63 ms]
|
||||
thrpt: [155.98 MiB/s 159.23 MiB/s 162.61 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [318.78 ms 321.89 ms 324.74 ms]
|
||||
thrpt: [394.16 MiB/s 397.65 MiB/s 401.53 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [374.01 ms 383.45 ms 393.20 ms]
|
||||
thrpt: [325.53 MiB/s 333.81 MiB/s 342.24 MiB/s]
|
||||
ingest/io_mode=Direct volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [137.98 ms 141.31 ms 143.57 ms]
|
||||
thrpt: [891.58 MiB/s 905.79 MiB/s 927.66 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=Yes
|
||||
time: [613.69 ms 622.48 ms 630.97 ms]
|
||||
thrpt: [202.86 MiB/s 205.63 MiB/s 208.57 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Random write_delta=Yes
|
||||
time: [1.0299 s 1.0766 s 1.1273 s]
|
||||
thrpt: [113.55 MiB/s 118.90 MiB/s 124.29 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=RandomReuse(1023) write_delta=Y...
|
||||
time: [637.80 ms 647.78 ms 658.01 ms]
|
||||
thrpt: [194.53 MiB/s 197.60 MiB/s 200.69 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=100 key_layout=Sequential write_delta=No
|
||||
time: [266.09 ms 267.20 ms 268.31 ms]
|
||||
thrpt: [477.06 MiB/s 479.04 MiB/s 481.04 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=Yes
|
||||
time: [269.34 ms 273.27 ms 277.69 ms]
|
||||
thrpt: [460.95 MiB/s 468.40 MiB/s 475.24 MiB/s]
|
||||
ingest/io_mode=DirectRw volume_mib=128 key_size_bytes=8192 key_layout=Sequential write_delta=No
|
||||
time: [123.18 ms 124.24 ms 125.15 ms]
|
||||
thrpt: [1022.8 MiB/s 1.0061 GiB/s 1.0148 GiB/s]
|
||||
*/
|
||||
|
||||
@@ -419,23 +419,6 @@ impl Client {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn timeline_detail(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<TimelineInfo> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}",
|
||||
self.mgmt_api_endpoint
|
||||
);
|
||||
|
||||
self.request(Method::GET, &uri, ())
|
||||
.await?
|
||||
.json()
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn timeline_archival_config(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
|
||||
@@ -225,11 +225,6 @@ pub struct PageServerConf {
|
||||
/// Does not force TLS: the client negotiates TLS usage during the handshake.
|
||||
/// Uses key and certificate from ssl_key_file/ssl_cert_file.
|
||||
pub enable_tls_page_service_api: bool,
|
||||
|
||||
/// Run in development mode, which disables certain safety checks
|
||||
/// such as authentication requirements for HTTP and PostgreSQL APIs.
|
||||
/// This is insecure and should only be used in development environments.
|
||||
pub dev_mode: bool,
|
||||
}
|
||||
|
||||
/// Token for authentication to safekeepers
|
||||
@@ -403,7 +398,6 @@ impl PageServerConf {
|
||||
generate_unarchival_heatmap,
|
||||
tracing,
|
||||
enable_tls_page_service_api,
|
||||
dev_mode,
|
||||
} = config_toml;
|
||||
|
||||
let mut conf = PageServerConf {
|
||||
@@ -455,7 +449,6 @@ impl PageServerConf {
|
||||
get_vectored_concurrent_io,
|
||||
tracing,
|
||||
enable_tls_page_service_api,
|
||||
dev_mode,
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// fields that require additional validation or custom handling
|
||||
|
||||
@@ -27,9 +27,6 @@ pub(super) enum Name {
|
||||
/// Timeline logical size
|
||||
#[serde(rename = "timeline_logical_size")]
|
||||
LogicalSize,
|
||||
/// Timeline delta from parent (WAL bytes clamped to logical size)
|
||||
#[serde(rename = "timeline_changed_bytes_from_parent")]
|
||||
ChangedBytesFromParent,
|
||||
/// Tenant remote size
|
||||
#[serde(rename = "remote_storage_size")]
|
||||
RemoteSize,
|
||||
@@ -178,24 +175,6 @@ impl MetricsKey {
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// [`Timeline::get_last_record_lsn`] - [`Timeline::get_ancestor_lsn`], clamped to
|
||||
/// [`Timeline::get_current_logical_size`].
|
||||
///
|
||||
/// [`Timeline::get_last_record_lsn`]: crate::tenant::Timeline::get_last_record_lsn
|
||||
/// [`Timeline::get_ancestor_lsn`]: crate::tenant::Timeline::get_ancestor_lsn
|
||||
/// [`Timeline::get_current_logical_size`]: crate::tenant::Timeline::get_current_logical_size
|
||||
const fn timeline_changed_bytes_from_parent(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> AbsoluteValueFactory {
|
||||
MetricsKey {
|
||||
tenant_id,
|
||||
timeline_id: Some(timeline_id),
|
||||
metric: Name::ChangedBytesFromParent,
|
||||
}
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// [`TenantShard::remote_size`]
|
||||
///
|
||||
/// [`TenantShard::remote_size`]: crate::tenant::TenantShard::remote_size
|
||||
@@ -392,7 +371,6 @@ struct TimelineSnapshot {
|
||||
loaded_at: (Lsn, SystemTime),
|
||||
last_record_lsn: Lsn,
|
||||
current_exact_logical_size: Option<u64>,
|
||||
changed_bytes_from_parent: Option<u64>,
|
||||
}
|
||||
|
||||
impl TimelineSnapshot {
|
||||
@@ -428,22 +406,10 @@ impl TimelineSnapshot {
|
||||
}
|
||||
};
|
||||
|
||||
// This is an approximation of how much data has changed on this branch vs. its
|
||||
// ancestor: the number of bytes written to the WAL, clamped to the size of the branch.
|
||||
let changed_bytes_from_parent = current_exact_logical_size.and_then(|size| {
|
||||
if t.get_ancestor_lsn() == Lsn::MAX {
|
||||
return None;
|
||||
}
|
||||
t.get_last_record_lsn()
|
||||
.checked_sub(t.get_ancestor_lsn())
|
||||
.map(|wal_bytes| wal_bytes.0.min(size))
|
||||
});
|
||||
|
||||
Ok(Some(TimelineSnapshot {
|
||||
loaded_at,
|
||||
last_record_lsn,
|
||||
current_exact_logical_size,
|
||||
changed_bytes_from_parent,
|
||||
}))
|
||||
}
|
||||
}
|
||||
@@ -521,17 +487,6 @@ impl TimelineSnapshot {
|
||||
metrics.push(factory.at(now, size));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let factory = MetricsKey::timeline_changed_bytes_from_parent(tenant_id, timeline_id);
|
||||
let current_or_previous = self
|
||||
.changed_bytes_from_parent
|
||||
.or_else(|| cache.get(factory.key()).map(|item| item.value));
|
||||
|
||||
if let Some(size) = current_or_previous {
|
||||
metrics.push(factory.at(now, size));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ fn startup_collected_timeline_metrics_before_advancing() {
|
||||
loaded_at: (disk_consistent_lsn, SystemTime::now()),
|
||||
last_record_lsn: disk_consistent_lsn,
|
||||
current_exact_logical_size: Some(0x42000),
|
||||
changed_bytes_from_parent: Some(0x1000),
|
||||
};
|
||||
|
||||
let now = DateTime::<Utc>::from(SystemTime::now());
|
||||
@@ -34,8 +33,7 @@ fn startup_collected_timeline_metrics_before_advancing() {
|
||||
0
|
||||
),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000),
|
||||
MetricsKey::timeline_changed_bytes_from_parent(tenant_id, timeline_id).at(now, 0x1000)
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000)
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -62,7 +60,6 @@ fn startup_collected_timeline_metrics_second_round() {
|
||||
loaded_at: (disk_consistent_lsn, init),
|
||||
last_record_lsn: disk_consistent_lsn,
|
||||
current_exact_logical_size: Some(0x42000),
|
||||
changed_bytes_from_parent: Some(0x1000),
|
||||
};
|
||||
|
||||
snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
|
||||
@@ -72,8 +69,7 @@ fn startup_collected_timeline_metrics_second_round() {
|
||||
&[
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000),
|
||||
MetricsKey::timeline_changed_bytes_from_parent(tenant_id, timeline_id).at(now, 0x1000)
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000)
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -108,7 +104,6 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() {
|
||||
loaded_at: (disk_consistent_lsn, init),
|
||||
last_record_lsn: disk_consistent_lsn,
|
||||
current_exact_logical_size: Some(0x42000),
|
||||
changed_bytes_from_parent: Some(0x1000),
|
||||
};
|
||||
|
||||
snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
|
||||
@@ -118,8 +113,7 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() {
|
||||
&[
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(just_before, now, 0),
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, disk_consistent_lsn.0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000),
|
||||
MetricsKey::timeline_changed_bytes_from_parent(tenant_id, timeline_id).at(now, 0x1000)
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0x42000)
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -147,7 +141,6 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
loaded_at: (Lsn(50), at_restart),
|
||||
last_record_lsn: Lsn(50),
|
||||
current_exact_logical_size: None,
|
||||
changed_bytes_from_parent: None,
|
||||
};
|
||||
|
||||
let mut cache = HashMap::from([
|
||||
@@ -209,7 +202,6 @@ fn post_restart_current_exact_logical_size_uses_cached() {
|
||||
loaded_at: (Lsn(50), at_restart),
|
||||
last_record_lsn: Lsn(50),
|
||||
current_exact_logical_size: None,
|
||||
changed_bytes_from_parent: Some(0x1000),
|
||||
};
|
||||
|
||||
let cache = HashMap::from([MetricsKey::timeline_logical_size(tenant_id, timeline_id)
|
||||
|
||||
@@ -3,11 +3,10 @@ use std::collections::HashMap;
|
||||
use futures::Future;
|
||||
use pageserver_api::config::NodeMetadata;
|
||||
use pageserver_api::controller_api::{AvailabilityZone, NodeRegisterRequest};
|
||||
use pageserver_api::models::ShardImportStatus;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::upcall_api::{
|
||||
PutTimelineImportStatusRequest, ReAttachRequest, ReAttachResponse, ReAttachResponseTenant,
|
||||
ValidateRequest, ValidateRequestTenant, ValidateResponse,
|
||||
ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
|
||||
ValidateRequestTenant, ValidateResponse,
|
||||
};
|
||||
use reqwest::Certificate;
|
||||
use serde::Serialize;
|
||||
@@ -15,7 +14,7 @@ use serde::de::DeserializeOwned;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use url::Url;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::{NodeId, TimelineId};
|
||||
use utils::id::NodeId;
|
||||
use utils::{backoff, failpoint_support};
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
@@ -47,12 +46,6 @@ pub trait StorageControllerUpcallApi {
|
||||
&self,
|
||||
tenants: Vec<(TenantShardId, Generation)>,
|
||||
) -> impl Future<Output = Result<HashMap<TenantShardId, bool>, RetryForeverError>> + Send;
|
||||
fn put_timeline_import_status(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
status: ShardImportStatus,
|
||||
) -> impl Future<Output = Result<(), RetryForeverError>> + Send;
|
||||
}
|
||||
|
||||
impl StorageControllerUpcallClient {
|
||||
@@ -280,30 +273,4 @@ impl StorageControllerUpcallApi for StorageControllerUpcallClient {
|
||||
|
||||
Ok(result.into_iter().collect())
|
||||
}
|
||||
|
||||
/// Send a shard import status to the storage controller
|
||||
///
|
||||
/// The implementation must have at-least-once delivery semantics.
|
||||
/// To this end, we retry the request until it succeeds. If the pageserver
|
||||
/// restarts or crashes, the shard import will start again from the beggining.
|
||||
#[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context
|
||||
async fn put_timeline_import_status(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
status: ShardImportStatus,
|
||||
) -> Result<(), RetryForeverError> {
|
||||
let url = self
|
||||
.base_url
|
||||
.join("timeline_import_status")
|
||||
.expect("Failed to build path");
|
||||
|
||||
let request = PutTimelineImportStatusRequest {
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
status,
|
||||
};
|
||||
|
||||
self.retry_http_forever(&url, request).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -787,15 +787,6 @@ mod test {
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn put_timeline_import_status(
|
||||
&self,
|
||||
_tenant_shard_id: TenantShardId,
|
||||
_timeline_id: TimelineId,
|
||||
_status: pageserver_api::models::ShardImportStatus,
|
||||
) -> Result<(), RetryForeverError> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
async fn setup(test_name: &str) -> anyhow::Result<TestSetup> {
|
||||
|
||||
@@ -1289,7 +1289,6 @@ pub(crate) enum StorageIoOperation {
|
||||
Seek,
|
||||
Fsync,
|
||||
Metadata,
|
||||
SetLen,
|
||||
}
|
||||
|
||||
impl StorageIoOperation {
|
||||
@@ -1304,7 +1303,6 @@ impl StorageIoOperation {
|
||||
StorageIoOperation::Seek => "seek",
|
||||
StorageIoOperation::Fsync => "fsync",
|
||||
StorageIoOperation::Metadata => "metadata",
|
||||
StorageIoOperation::SetLen => "set_len",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3816,24 +3816,6 @@ impl TenantShard {
|
||||
MaybeDeletedIndexPart::IndexPart(p) => p,
|
||||
};
|
||||
|
||||
// A shard split may not take place while a timeline import is on-going
|
||||
// for the tenant. Timeline imports run as part of each tenant shard
|
||||
// and rely on the sharding scheme to split the work among pageservers.
|
||||
// If we were to split in the middle of this process, we would have to
|
||||
// either ensure that it's driven to completion on the old shard set
|
||||
// or transfer it to the new shard set. It's technically possible, but complex.
|
||||
match index_part.import_pgdata {
|
||||
Some(ref import) if !import.is_done() => {
|
||||
anyhow::bail!(
|
||||
"Cannot split due to import with idempotency key: {:?}",
|
||||
import.idempotency_key()
|
||||
);
|
||||
}
|
||||
Some(_) | None => {
|
||||
// fallthrough
|
||||
}
|
||||
}
|
||||
|
||||
for child_shard in child_shards {
|
||||
tracing::info!(%timeline_id, "Uploading index_part for child {}", child_shard.to_index());
|
||||
upload_index_part(
|
||||
|
||||
@@ -15,23 +15,21 @@
|
||||
//! len >= 128: 1CCCXXXX XXXXXXXX XXXXXXXX XXXXXXXX
|
||||
//!
|
||||
use std::cmp::min;
|
||||
use std::io::Error;
|
||||
|
||||
use anyhow::Context;
|
||||
use async_compression::Level;
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio_epoll_uring::IoBuf;
|
||||
use tokio_epoll_uring::{BoundedBuf, IoBuf, Slice};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::context::RequestContext;
|
||||
use crate::page_cache::PAGE_SZ;
|
||||
use crate::tenant::block_io::BlockCursor;
|
||||
use crate::virtual_file::IoBufferMut;
|
||||
use crate::virtual_file::VirtualFile;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
|
||||
use crate::virtual_file::owned_buffers_io::write::{BufferedWriter, FlushTaskError};
|
||||
use crate::virtual_file::owned_buffers_io::write::{BufferedWriterShutdownMode, OwnedAsyncWriter};
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct CompressionInfo {
|
||||
@@ -52,9 +50,12 @@ pub struct Header {
|
||||
|
||||
impl Header {
|
||||
/// Decodes a header from a byte slice.
|
||||
pub fn decode(bytes: &[u8]) -> anyhow::Result<Self> {
|
||||
pub fn decode(bytes: &[u8]) -> Result<Self, std::io::Error> {
|
||||
let Some(&first_header_byte) = bytes.first() else {
|
||||
anyhow::bail!("zero-length blob header");
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
"zero-length blob header",
|
||||
));
|
||||
};
|
||||
|
||||
// If the first bit is 0, this is just a 1-byte length prefix up to 128 bytes.
|
||||
@@ -68,9 +69,12 @@ impl Header {
|
||||
|
||||
// Otherwise, this is a 4-byte header containing compression information and length.
|
||||
const HEADER_LEN: usize = 4;
|
||||
let mut header_buf: [u8; HEADER_LEN] = bytes[0..HEADER_LEN]
|
||||
.try_into()
|
||||
.map_err(|_| anyhow::anyhow!("blob header too short: {bytes:?}"))?;
|
||||
let mut header_buf: [u8; HEADER_LEN] = bytes[0..HEADER_LEN].try_into().map_err(|_| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("blob header too short: {bytes:?}"),
|
||||
)
|
||||
})?;
|
||||
|
||||
// TODO: verify the compression bits and convert to an enum.
|
||||
let compression_bits = header_buf[0] & LEN_COMPRESSION_BIT_MASK;
|
||||
@@ -90,16 +94,6 @@ impl Header {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum WriteBlobError {
|
||||
#[error(transparent)]
|
||||
Flush(FlushTaskError),
|
||||
#[error("blob too large ({len} bytes)")]
|
||||
BlobTooLarge { len: usize },
|
||||
#[error(transparent)]
|
||||
WriteBlobRaw(anyhow::Error),
|
||||
}
|
||||
|
||||
impl BlockCursor<'_> {
|
||||
/// Read a blob into a new buffer.
|
||||
pub async fn read_blob(
|
||||
@@ -219,64 +213,143 @@ pub(super) const BYTE_UNCOMPRESSED: u8 = 0x80;
|
||||
pub(super) const BYTE_ZSTD: u8 = BYTE_UNCOMPRESSED | 0x10;
|
||||
|
||||
/// A wrapper of `VirtualFile` that allows users to write blobs.
|
||||
pub struct BlobWriter<W> {
|
||||
///
|
||||
/// If a `BlobWriter` is dropped, the internal buffer will be
|
||||
/// discarded. You need to call [`flush_buffer`](Self::flush_buffer)
|
||||
/// manually before dropping.
|
||||
pub struct BlobWriter<const BUFFERED: bool> {
|
||||
inner: VirtualFile,
|
||||
offset: u64,
|
||||
/// A buffer to save on write calls, only used if BUFFERED=true
|
||||
buf: Vec<u8>,
|
||||
/// We do tiny writes for the length headers; they need to be in an owned buffer;
|
||||
io_buf: Option<BytesMut>,
|
||||
writer: BufferedWriter<IoBufferMut, W>,
|
||||
offset: u64,
|
||||
}
|
||||
|
||||
impl<W> BlobWriter<W>
|
||||
where
|
||||
W: OwnedAsyncWriter + std::fmt::Debug + Send + Sync + 'static,
|
||||
{
|
||||
/// See [`BufferedWriter`] struct-level doc comment for semantics of `start_offset`.
|
||||
impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
|
||||
pub fn new(
|
||||
file: W,
|
||||
inner: VirtualFile,
|
||||
start_offset: u64,
|
||||
gate: &utils::sync::gate::Gate,
|
||||
cancel: CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
flush_task_span: tracing::Span,
|
||||
) -> anyhow::Result<Self> {
|
||||
Ok(Self {
|
||||
io_buf: Some(BytesMut::new()),
|
||||
writer: BufferedWriter::new(
|
||||
file,
|
||||
start_offset,
|
||||
|| IoBufferMut::with_capacity(Self::CAPACITY),
|
||||
gate.enter()?,
|
||||
cancel,
|
||||
ctx,
|
||||
flush_task_span,
|
||||
),
|
||||
_gate: &utils::sync::gate::Gate,
|
||||
_cancel: CancellationToken,
|
||||
_ctx: &RequestContext,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
offset: start_offset,
|
||||
})
|
||||
buf: Vec::with_capacity(Self::CAPACITY),
|
||||
io_buf: Some(BytesMut::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
self.offset
|
||||
}
|
||||
|
||||
const CAPACITY: usize = 64 * 1024;
|
||||
const CAPACITY: usize = if BUFFERED { 64 * 1024 } else { 0 };
|
||||
|
||||
/// Writes `src_buf` to the file at the current offset.
|
||||
/// Writes the given buffer directly to the underlying `VirtualFile`.
|
||||
/// You need to make sure that the internal buffer is empty, otherwise
|
||||
/// data will be written in wrong order.
|
||||
#[inline(always)]
|
||||
async fn write_all_unbuffered<Buf: IoBuf + Send>(
|
||||
&mut self,
|
||||
src_buf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<(), Error>) {
|
||||
let (src_buf, res) = self.inner.write_all(src_buf, ctx).await;
|
||||
let nbytes = match res {
|
||||
Ok(nbytes) => nbytes,
|
||||
Err(e) => return (src_buf, Err(e)),
|
||||
};
|
||||
self.offset += nbytes as u64;
|
||||
(src_buf, Ok(()))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// Flushes the internal buffer to the underlying `VirtualFile`.
|
||||
pub async fn flush_buffer(&mut self, ctx: &RequestContext) -> Result<(), Error> {
|
||||
let buf = std::mem::take(&mut self.buf);
|
||||
let (slice, res) = self.inner.write_all(buf.slice_len(), ctx).await;
|
||||
res?;
|
||||
let mut buf = slice.into_raw_slice().into_inner();
|
||||
buf.clear();
|
||||
self.buf = buf;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
/// Writes as much of `src_buf` into the internal buffer as it fits
|
||||
fn write_into_buffer(&mut self, src_buf: &[u8]) -> usize {
|
||||
let remaining = Self::CAPACITY - self.buf.len();
|
||||
let to_copy = src_buf.len().min(remaining);
|
||||
self.buf.extend_from_slice(&src_buf[..to_copy]);
|
||||
self.offset += to_copy as u64;
|
||||
to_copy
|
||||
}
|
||||
|
||||
/// Internal, possibly buffered, write function
|
||||
async fn write_all<Buf: IoBuf + Send>(
|
||||
&mut self,
|
||||
src_buf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<(), FlushTaskError>) {
|
||||
let res = self
|
||||
.writer
|
||||
// TODO: why are we taking a FullSlice if we're going to pass a borrow downstack?
|
||||
// Can remove all the complexity around owned buffers upstack
|
||||
.write_buffered_borrowed(&src_buf, ctx)
|
||||
.await
|
||||
.map(|len| {
|
||||
self.offset += len as u64;
|
||||
});
|
||||
) -> (FullSlice<Buf>, Result<(), Error>) {
|
||||
let src_buf = src_buf.into_raw_slice();
|
||||
let src_buf_bounds = src_buf.bounds();
|
||||
let restore = move |src_buf_slice: Slice<_>| {
|
||||
FullSlice::must_new(Slice::from_buf_bounds(
|
||||
src_buf_slice.into_inner(),
|
||||
src_buf_bounds,
|
||||
))
|
||||
};
|
||||
|
||||
(src_buf, res)
|
||||
if !BUFFERED {
|
||||
assert!(self.buf.is_empty());
|
||||
return self
|
||||
.write_all_unbuffered(FullSlice::must_new(src_buf), ctx)
|
||||
.await;
|
||||
}
|
||||
let remaining = Self::CAPACITY - self.buf.len();
|
||||
let src_buf_len = src_buf.bytes_init();
|
||||
if src_buf_len == 0 {
|
||||
return (restore(src_buf), Ok(()));
|
||||
}
|
||||
let mut src_buf = src_buf.slice(0..src_buf_len);
|
||||
// First try to copy as much as we can into the buffer
|
||||
if remaining > 0 {
|
||||
let copied = self.write_into_buffer(&src_buf);
|
||||
src_buf = src_buf.slice(copied..);
|
||||
}
|
||||
// Then, if the buffer is full, flush it out
|
||||
if self.buf.len() == Self::CAPACITY {
|
||||
if let Err(e) = self.flush_buffer(ctx).await {
|
||||
return (restore(src_buf), Err(e));
|
||||
}
|
||||
}
|
||||
// Finally, write the tail of src_buf:
|
||||
// If it wholly fits into the buffer without
|
||||
// completely filling it, then put it there.
|
||||
// If not, write it out directly.
|
||||
let src_buf = if !src_buf.is_empty() {
|
||||
assert_eq!(self.buf.len(), 0);
|
||||
if src_buf.len() < Self::CAPACITY {
|
||||
let copied = self.write_into_buffer(&src_buf);
|
||||
// We just verified above that src_buf fits into our internal buffer.
|
||||
assert_eq!(copied, src_buf.len());
|
||||
restore(src_buf)
|
||||
} else {
|
||||
let (src_buf, res) = self
|
||||
.write_all_unbuffered(FullSlice::must_new(src_buf), ctx)
|
||||
.await;
|
||||
if let Err(e) = res {
|
||||
return (src_buf, Err(e));
|
||||
}
|
||||
src_buf
|
||||
}
|
||||
} else {
|
||||
restore(src_buf)
|
||||
};
|
||||
(src_buf, Ok(()))
|
||||
}
|
||||
|
||||
/// Write a blob of data. Returns the offset that it was written to,
|
||||
@@ -285,7 +358,7 @@ where
|
||||
&mut self,
|
||||
srcbuf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<u64, WriteBlobError>) {
|
||||
) -> (FullSlice<Buf>, Result<u64, Error>) {
|
||||
let (buf, res) = self
|
||||
.write_blob_maybe_compressed(srcbuf, ctx, ImageCompressionAlgorithm::Disabled)
|
||||
.await;
|
||||
@@ -299,10 +372,7 @@ where
|
||||
srcbuf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
algorithm: ImageCompressionAlgorithm,
|
||||
) -> (
|
||||
FullSlice<Buf>,
|
||||
Result<(u64, CompressionInfo), WriteBlobError>,
|
||||
) {
|
||||
) -> (FullSlice<Buf>, Result<(u64, CompressionInfo), Error>) {
|
||||
let offset = self.offset;
|
||||
let mut compression_info = CompressionInfo {
|
||||
written_compressed: false,
|
||||
@@ -318,16 +388,14 @@ where
|
||||
if len < 128 {
|
||||
// Short blob. Write a 1-byte length header
|
||||
io_buf.put_u8(len as u8);
|
||||
let (slice, res) = self.write_all(io_buf.slice_len(), ctx).await;
|
||||
let res = res.map_err(WriteBlobError::Flush);
|
||||
((slice, res), srcbuf)
|
||||
(self.write_all(io_buf.slice_len(), ctx).await, srcbuf)
|
||||
} else {
|
||||
// Write a 4-byte length header
|
||||
if len > MAX_SUPPORTED_BLOB_LEN {
|
||||
return (
|
||||
(
|
||||
io_buf.slice_len(),
|
||||
Err(WriteBlobError::BlobTooLarge { len }),
|
||||
Err(Error::other(format!("blob too large ({len} bytes)"))),
|
||||
),
|
||||
srcbuf,
|
||||
);
|
||||
@@ -361,9 +429,7 @@ where
|
||||
assert_eq!(len_buf[0] & 0xf0, 0);
|
||||
len_buf[0] |= high_bit_mask;
|
||||
io_buf.extend_from_slice(&len_buf[..]);
|
||||
let (slice, res) = self.write_all(io_buf.slice_len(), ctx).await;
|
||||
let res = res.map_err(WriteBlobError::Flush);
|
||||
((slice, res), srcbuf)
|
||||
(self.write_all(io_buf.slice_len(), ctx).await, srcbuf)
|
||||
}
|
||||
}
|
||||
.await;
|
||||
@@ -378,7 +444,6 @@ where
|
||||
} else {
|
||||
self.write_all(srcbuf, ctx).await
|
||||
};
|
||||
let res = res.map_err(WriteBlobError::Flush);
|
||||
(srcbuf, res.map(|_| (offset, compression_info)))
|
||||
}
|
||||
|
||||
@@ -387,12 +452,9 @@ where
|
||||
&mut self,
|
||||
raw_with_header: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<u64, WriteBlobError>) {
|
||||
) -> (FullSlice<Buf>, Result<u64, Error>) {
|
||||
// Verify the header, to ensure we don't write invalid/corrupt data.
|
||||
let header = match Header::decode(&raw_with_header)
|
||||
.context("decoding blob header")
|
||||
.map_err(WriteBlobError::WriteBlobRaw)
|
||||
{
|
||||
let header = match Header::decode(&raw_with_header) {
|
||||
Ok(header) => header,
|
||||
Err(err) => return (raw_with_header, Err(err)),
|
||||
};
|
||||
@@ -401,26 +463,42 @@ where
|
||||
let raw_len = raw_with_header.len();
|
||||
return (
|
||||
raw_with_header,
|
||||
Err(WriteBlobError::WriteBlobRaw(anyhow::anyhow!(
|
||||
"header length mismatch: {header_total_len} != {raw_len}"
|
||||
))),
|
||||
Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("header length mismatch: {header_total_len} != {raw_len}"),
|
||||
)),
|
||||
);
|
||||
}
|
||||
|
||||
let offset = self.offset;
|
||||
let (raw_with_header, result) = self.write_all(raw_with_header, ctx).await;
|
||||
let result = result.map_err(WriteBlobError::Flush);
|
||||
(raw_with_header, result.map(|_| offset))
|
||||
}
|
||||
}
|
||||
|
||||
/// Finish this blob writer and return the underlying `W`.
|
||||
pub async fn shutdown(
|
||||
self,
|
||||
mode: BufferedWriterShutdownMode,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<W, FlushTaskError> {
|
||||
let (_, file) = self.writer.shutdown(mode, ctx).await?;
|
||||
Ok(file)
|
||||
impl BlobWriter<true> {
|
||||
/// Access the underlying `VirtualFile`.
|
||||
///
|
||||
/// This function flushes the internal buffer before giving access
|
||||
/// to the underlying `VirtualFile`.
|
||||
pub async fn into_inner(mut self, ctx: &RequestContext) -> Result<VirtualFile, Error> {
|
||||
self.flush_buffer(ctx).await?;
|
||||
Ok(self.inner)
|
||||
}
|
||||
|
||||
/// Access the underlying `VirtualFile`.
|
||||
///
|
||||
/// Unlike [`into_inner`](Self::into_inner), this doesn't flush
|
||||
/// the internal buffer before giving access.
|
||||
pub fn into_inner_no_flush(self) -> VirtualFile {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl BlobWriter<false> {
|
||||
/// Access the underlying `VirtualFile`.
|
||||
pub fn into_inner(self) -> VirtualFile {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
@@ -429,25 +507,21 @@ pub(crate) mod tests {
|
||||
use camino::Utf8PathBuf;
|
||||
use camino_tempfile::Utf8TempDir;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use tracing::info_span;
|
||||
|
||||
use super::*;
|
||||
use crate::context::DownloadBehavior;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::block_io::BlockReaderRef;
|
||||
use crate::virtual_file;
|
||||
use crate::virtual_file::TempVirtualFile;
|
||||
use crate::virtual_file::VirtualFile;
|
||||
|
||||
async fn round_trip_test(blobs: &[Vec<u8>]) -> anyhow::Result<()> {
|
||||
round_trip_test_compressed(blobs, false).await
|
||||
async fn round_trip_test<const BUFFERED: bool>(blobs: &[Vec<u8>]) -> Result<(), Error> {
|
||||
round_trip_test_compressed::<BUFFERED>(blobs, false).await
|
||||
}
|
||||
|
||||
pub(crate) async fn write_maybe_compressed(
|
||||
pub(crate) async fn write_maybe_compressed<const BUFFERED: bool>(
|
||||
blobs: &[Vec<u8>],
|
||||
compression: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<(Utf8TempDir, Utf8PathBuf, Vec<u64>)> {
|
||||
) -> Result<(Utf8TempDir, Utf8PathBuf, Vec<u64>), Error> {
|
||||
let temp_dir = camino_tempfile::tempdir()?;
|
||||
let pathbuf = temp_dir.path().join("file");
|
||||
let gate = utils::sync::gate::Gate::default();
|
||||
@@ -456,19 +530,8 @@ pub(crate) mod tests {
|
||||
// Write part (in block to drop the file)
|
||||
let mut offsets = Vec::new();
|
||||
{
|
||||
let file = TempVirtualFile::new(
|
||||
VirtualFile::open_with_options_v2(
|
||||
pathbuf.as_path(),
|
||||
virtual_file::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
gate.enter()?,
|
||||
);
|
||||
let mut wtr =
|
||||
BlobWriter::new(file, 0, &gate, cancel.clone(), ctx, info_span!("test")).unwrap();
|
||||
let file = VirtualFile::create(pathbuf.as_path(), ctx).await?;
|
||||
let mut wtr = BlobWriter::<BUFFERED>::new(file, 0, &gate, cancel.clone(), ctx);
|
||||
for blob in blobs.iter() {
|
||||
let (_, res) = if compression {
|
||||
let res = wtr
|
||||
@@ -485,28 +548,26 @@ pub(crate) mod tests {
|
||||
let offs = res?;
|
||||
offsets.push(offs);
|
||||
}
|
||||
let file = wtr
|
||||
.shutdown(
|
||||
BufferedWriterShutdownMode::ZeroPadToNextMultiple(PAGE_SZ),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
file.disarm_into_inner()
|
||||
};
|
||||
// Write out one page worth of zeros so that we can
|
||||
// read again with read_blk
|
||||
let (_, res) = wtr.write_blob(vec![0; PAGE_SZ].slice_len(), ctx).await;
|
||||
let offs = res?;
|
||||
println!("Writing final blob at offs={offs}");
|
||||
wtr.flush_buffer(ctx).await?;
|
||||
}
|
||||
Ok((temp_dir, pathbuf, offsets))
|
||||
}
|
||||
|
||||
async fn round_trip_test_compressed(
|
||||
async fn round_trip_test_compressed<const BUFFERED: bool>(
|
||||
blobs: &[Vec<u8>],
|
||||
compression: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
) -> Result<(), Error> {
|
||||
let ctx =
|
||||
RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();
|
||||
let (_temp_dir, pathbuf, offsets) =
|
||||
write_maybe_compressed(blobs, compression, &ctx).await?;
|
||||
write_maybe_compressed::<BUFFERED>(blobs, compression, &ctx).await?;
|
||||
|
||||
println!("Done writing!");
|
||||
let file = VirtualFile::open_v2(pathbuf, &ctx).await?;
|
||||
let file = VirtualFile::open(pathbuf, &ctx).await?;
|
||||
let rdr = BlockReaderRef::VirtualFile(&file);
|
||||
let rdr = BlockCursor::new_with_compression(rdr, compression);
|
||||
for (idx, (blob, offset)) in blobs.iter().zip(offsets.iter()).enumerate() {
|
||||
@@ -525,27 +586,30 @@ pub(crate) mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_one() -> anyhow::Result<()> {
|
||||
async fn test_one() -> Result<(), Error> {
|
||||
let blobs = &[vec![12, 21, 22]];
|
||||
round_trip_test(blobs).await?;
|
||||
round_trip_test::<false>(blobs).await?;
|
||||
round_trip_test::<true>(blobs).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_hello_simple() -> anyhow::Result<()> {
|
||||
async fn test_hello_simple() -> Result<(), Error> {
|
||||
let blobs = &[
|
||||
vec![0, 1, 2, 3],
|
||||
b"Hello, World!".to_vec(),
|
||||
Vec::new(),
|
||||
b"foobar".to_vec(),
|
||||
];
|
||||
round_trip_test(blobs).await?;
|
||||
round_trip_test_compressed(blobs, true).await?;
|
||||
round_trip_test::<false>(blobs).await?;
|
||||
round_trip_test::<true>(blobs).await?;
|
||||
round_trip_test_compressed::<false>(blobs, true).await?;
|
||||
round_trip_test_compressed::<true>(blobs, true).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_really_big_array() -> anyhow::Result<()> {
|
||||
async fn test_really_big_array() -> Result<(), Error> {
|
||||
let blobs = &[
|
||||
b"test".to_vec(),
|
||||
random_array(10 * PAGE_SZ),
|
||||
@@ -554,22 +618,25 @@ pub(crate) mod tests {
|
||||
vec![0xf3; 24 * PAGE_SZ],
|
||||
b"foobar".to_vec(),
|
||||
];
|
||||
round_trip_test(blobs).await?;
|
||||
round_trip_test_compressed(blobs, true).await?;
|
||||
round_trip_test::<false>(blobs).await?;
|
||||
round_trip_test::<true>(blobs).await?;
|
||||
round_trip_test_compressed::<false>(blobs, true).await?;
|
||||
round_trip_test_compressed::<true>(blobs, true).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_arrays_inc() -> anyhow::Result<()> {
|
||||
async fn test_arrays_inc() -> Result<(), Error> {
|
||||
let blobs = (0..PAGE_SZ / 8)
|
||||
.map(|v| random_array(v * 16))
|
||||
.collect::<Vec<_>>();
|
||||
round_trip_test(&blobs).await?;
|
||||
round_trip_test::<false>(&blobs).await?;
|
||||
round_trip_test::<true>(&blobs).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_arrays_random_size() -> anyhow::Result<()> {
|
||||
async fn test_arrays_random_size() -> Result<(), Error> {
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
let blobs = (0..1024)
|
||||
.map(|_| {
|
||||
@@ -581,18 +648,20 @@ pub(crate) mod tests {
|
||||
random_array(sz.into())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
round_trip_test(&blobs).await?;
|
||||
round_trip_test::<false>(&blobs).await?;
|
||||
round_trip_test::<true>(&blobs).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_arrays_page_boundary() -> anyhow::Result<()> {
|
||||
async fn test_arrays_page_boundary() -> Result<(), Error> {
|
||||
let blobs = &[
|
||||
random_array(PAGE_SZ - 4),
|
||||
random_array(PAGE_SZ - 4),
|
||||
random_array(PAGE_SZ - 4),
|
||||
];
|
||||
round_trip_test(blobs).await?;
|
||||
round_trip_test::<false>(blobs).await?;
|
||||
round_trip_test::<true>(blobs).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,12 +4,14 @@
|
||||
|
||||
use std::ops::Deref;
|
||||
|
||||
use bytes::Bytes;
|
||||
|
||||
use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};
|
||||
use crate::context::RequestContext;
|
||||
use crate::page_cache::{self, FileId, PAGE_SZ, PageReadGuard, PageWriteGuard, ReadBufResult};
|
||||
#[cfg(test)]
|
||||
use crate::virtual_file::IoBufferMut;
|
||||
use crate::virtual_file::{IoBuffer, VirtualFile};
|
||||
use crate::virtual_file::VirtualFile;
|
||||
|
||||
/// This is implemented by anything that can read 8 kB (PAGE_SZ)
|
||||
/// blocks, using the page cache
|
||||
@@ -245,17 +247,17 @@ pub trait BlockWriter {
|
||||
/// 'buf' must be of size PAGE_SZ. Returns the block number the page was
|
||||
/// written to.
|
||||
///
|
||||
fn write_blk(&mut self, buf: IoBuffer) -> Result<u32, std::io::Error>;
|
||||
fn write_blk(&mut self, buf: Bytes) -> Result<u32, std::io::Error>;
|
||||
}
|
||||
|
||||
///
|
||||
/// A simple in-memory buffer of blocks.
|
||||
///
|
||||
pub struct BlockBuf {
|
||||
pub blocks: Vec<IoBuffer>,
|
||||
pub blocks: Vec<Bytes>,
|
||||
}
|
||||
impl BlockWriter for BlockBuf {
|
||||
fn write_blk(&mut self, buf: IoBuffer) -> Result<u32, std::io::Error> {
|
||||
fn write_blk(&mut self, buf: Bytes) -> Result<u32, std::io::Error> {
|
||||
assert!(buf.len() == PAGE_SZ);
|
||||
let blknum = self.blocks.len();
|
||||
self.blocks.push(buf);
|
||||
|
||||
@@ -25,7 +25,7 @@ use std::{io, result};
|
||||
|
||||
use async_stream::try_stream;
|
||||
use byteorder::{BE, ReadBytesExt};
|
||||
use bytes::BufMut;
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
use either::Either;
|
||||
use futures::{Stream, StreamExt};
|
||||
use hex;
|
||||
@@ -34,7 +34,6 @@ use tracing::error;
|
||||
|
||||
use crate::context::RequestContext;
|
||||
use crate::tenant::block_io::{BlockReader, BlockWriter};
|
||||
use crate::virtual_file::{IoBuffer, IoBufferMut, owned_buffers_io::write::Buffer};
|
||||
|
||||
// The maximum size of a value stored in the B-tree. 5 bytes is enough currently.
|
||||
pub const VALUE_SZ: usize = 5;
|
||||
@@ -788,12 +787,12 @@ impl<const L: usize> BuildNode<L> {
|
||||
///
|
||||
/// Serialize the node to on-disk format.
|
||||
///
|
||||
fn pack(&self) -> IoBuffer {
|
||||
fn pack(&self) -> Bytes {
|
||||
assert!(self.keys.len() == self.num_children as usize * self.suffix_len);
|
||||
assert!(self.values.len() == self.num_children as usize * VALUE_SZ);
|
||||
assert!(self.num_children > 0);
|
||||
|
||||
let mut buf = IoBufferMut::with_capacity(PAGE_SZ);
|
||||
let mut buf = BytesMut::new();
|
||||
|
||||
buf.put_u16(self.num_children);
|
||||
buf.put_u8(self.level);
|
||||
@@ -806,7 +805,7 @@ impl<const L: usize> BuildNode<L> {
|
||||
assert!(buf.len() == self.size);
|
||||
|
||||
assert!(buf.len() <= PAGE_SZ);
|
||||
buf.extend_with(0, PAGE_SZ - buf.len());
|
||||
buf.resize(PAGE_SZ, 0);
|
||||
buf.freeze()
|
||||
}
|
||||
|
||||
@@ -840,7 +839,7 @@ pub(crate) mod tests {
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub(crate) struct TestDisk {
|
||||
blocks: Vec<IoBuffer>,
|
||||
blocks: Vec<Bytes>,
|
||||
}
|
||||
impl TestDisk {
|
||||
fn new() -> Self {
|
||||
@@ -858,7 +857,7 @@ pub(crate) mod tests {
|
||||
}
|
||||
}
|
||||
impl BlockWriter for &mut TestDisk {
|
||||
fn write_blk(&mut self, buf: IoBuffer) -> io::Result<u32> {
|
||||
fn write_blk(&mut self, buf: Bytes) -> io::Result<u32> {
|
||||
let blknum = self.blocks.len();
|
||||
self.blocks.push(buf);
|
||||
Ok(blknum as u32)
|
||||
|
||||
@@ -12,7 +12,6 @@ use tokio_epoll_uring::{BoundedBuf, Slice};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info_span};
|
||||
use utils::id::TimelineId;
|
||||
use utils::sync::gate::GateGuard;
|
||||
|
||||
use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
|
||||
use crate::config::PageServerConf;
|
||||
@@ -22,33 +21,16 @@ use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
|
||||
use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
|
||||
use crate::virtual_file::owned_buffers_io::write::{Buffer, FlushTaskError};
|
||||
use crate::virtual_file::{self, IoBufferMut, TempVirtualFile, VirtualFile, owned_buffers_io};
|
||||
|
||||
use self::owned_buffers_io::write::OwnedAsyncWriter;
|
||||
use crate::virtual_file::{self, IoBufferMut, VirtualFile, owned_buffers_io};
|
||||
|
||||
pub struct EphemeralFile {
|
||||
_tenant_shard_id: TenantShardId,
|
||||
_timeline_id: TimelineId,
|
||||
page_cache_file_id: page_cache::FileId,
|
||||
bytes_written: u64,
|
||||
file: TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter,
|
||||
buffered_writer: BufferedWriter,
|
||||
}
|
||||
|
||||
type BufferedWriter = owned_buffers_io::write::BufferedWriter<
|
||||
IoBufferMut,
|
||||
TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter,
|
||||
>;
|
||||
|
||||
/// A TempVirtualFile that is co-owned by the [`EphemeralFile`]` and [`BufferedWriter`].
|
||||
///
|
||||
/// (Actually [`BufferedWriter`] internally is just a client to a background flush task.
|
||||
/// The co-ownership is between [`EphemeralFile`] and that flush task.)
|
||||
///
|
||||
/// Co-ownership allows us to serve reads for data that has already been flushed by the [`BufferedWriter`].
|
||||
#[derive(Debug, Clone)]
|
||||
struct TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {
|
||||
inner: Arc<TempVirtualFile>,
|
||||
buffered_writer: owned_buffers_io::write::BufferedWriter<IoBufferMut, VirtualFile>,
|
||||
/// Gate guard is held on as long as we need to do operations in the path (delete on drop)
|
||||
_gate_guard: utils::sync::gate::GateGuard,
|
||||
}
|
||||
|
||||
const TAIL_SZ: usize = 64 * 1024;
|
||||
@@ -62,12 +44,9 @@ impl EphemeralFile {
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<EphemeralFile> {
|
||||
// TempVirtualFile requires us to never reuse a filename while an old
|
||||
// instance of TempVirtualFile created with that filename is not done dropping yet.
|
||||
// So, we use a monotonic counter to disambiguate the filenames.
|
||||
static NEXT_TEMP_DISAMBIGUATOR: AtomicU64 = AtomicU64::new(1);
|
||||
static NEXT_FILENAME: AtomicU64 = AtomicU64::new(1);
|
||||
let filename_disambiguator =
|
||||
NEXT_TEMP_DISAMBIGUATOR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
NEXT_FILENAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
let filename = conf
|
||||
.timeline_path(&tenant_shard_id, &timeline_id)
|
||||
@@ -75,17 +54,16 @@ impl EphemeralFile {
|
||||
"ephemeral-{filename_disambiguator}"
|
||||
)));
|
||||
|
||||
let file = TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter::new(
|
||||
let file = Arc::new(
|
||||
VirtualFile::open_with_options_v2(
|
||||
&filename,
|
||||
virtual_file::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.read(true)
|
||||
.write(true),
|
||||
.write(true)
|
||||
.create(true),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
gate.enter()?,
|
||||
);
|
||||
|
||||
let page_cache_file_id = page_cache::next_file_id(); // XXX get rid, we're not page-caching anymore
|
||||
@@ -95,60 +73,37 @@ impl EphemeralFile {
|
||||
_timeline_id: timeline_id,
|
||||
page_cache_file_id,
|
||||
bytes_written: 0,
|
||||
file: file.clone(),
|
||||
buffered_writer: BufferedWriter::new(
|
||||
buffered_writer: owned_buffers_io::write::BufferedWriter::new(
|
||||
file,
|
||||
0,
|
||||
|| IoBufferMut::with_capacity(TAIL_SZ),
|
||||
gate.enter()?,
|
||||
cancel.child_token(),
|
||||
ctx,
|
||||
info_span!(parent: None, "ephemeral_file_buffered_writer", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %filename),
|
||||
),
|
||||
_gate_guard: gate.enter()?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {
|
||||
fn new(file: VirtualFile, gate_guard: GateGuard) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(TempVirtualFile::new(file, gate_guard)),
|
||||
impl Drop for EphemeralFile {
|
||||
fn drop(&mut self) {
|
||||
// unlink the file
|
||||
// we are clear to do this, because we have entered a gate
|
||||
let path = self.buffered_writer.as_inner().path();
|
||||
let res = std::fs::remove_file(path);
|
||||
if let Err(e) = res {
|
||||
if e.kind() != std::io::ErrorKind::NotFound {
|
||||
// just never log the not found errors, we cannot do anything for them; on detach
|
||||
// the tenant directory is already gone.
|
||||
//
|
||||
// not found files might also be related to https://github.com/neondatabase/neon/issues/2442
|
||||
error!("could not remove ephemeral file '{path}': {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl OwnedAsyncWriter for TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {
|
||||
fn write_all_at<Buf: owned_buffers_io::io_buf_aligned::IoBufAligned + Send>(
|
||||
&self,
|
||||
buf: owned_buffers_io::io_buf_ext::FullSlice<Buf>,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> impl std::future::Future<
|
||||
Output = (
|
||||
owned_buffers_io::io_buf_ext::FullSlice<Buf>,
|
||||
std::io::Result<()>,
|
||||
),
|
||||
> + Send {
|
||||
self.inner.write_all_at(buf, offset, ctx)
|
||||
}
|
||||
|
||||
fn set_len(
|
||||
&self,
|
||||
len: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> impl Future<Output = std::io::Result<()>> + Send {
|
||||
self.inner.set_len(len, ctx)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for TempVirtualFileCoOwnedByEphemeralFileAndBufferedWriter {
|
||||
type Target = VirtualFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum EphemeralFileWriteError {
|
||||
#[error("{0}")]
|
||||
@@ -307,9 +262,9 @@ impl super::storage_layer::inmemory_layer::vectored_dio_read::File for Ephemeral
|
||||
let mutable_range = Range(std::cmp::max(start, submitted_offset), end);
|
||||
|
||||
let dst = if written_range.len() > 0 {
|
||||
let file: &VirtualFile = self.buffered_writer.as_inner();
|
||||
let bounds = dst.bounds();
|
||||
let slice = self
|
||||
.file
|
||||
let slice = file
|
||||
.read_exact_at(dst.slice(0..written_range.len().into_usize()), start, ctx)
|
||||
.await?;
|
||||
Slice::from_buf_bounds(Slice::into_inner(slice), bounds)
|
||||
@@ -501,7 +456,7 @@ mod tests {
|
||||
assert_eq!(&buf, &content[range]);
|
||||
}
|
||||
|
||||
let file_contents = std::fs::read(file.file.path()).unwrap();
|
||||
let file_contents = std::fs::read(file.buffered_writer.as_inner().path()).unwrap();
|
||||
assert!(file_contents == content[0..cap * 2]);
|
||||
|
||||
let maybe_flushed_buffer_contents = file.buffered_writer.inspect_maybe_flushed().unwrap();
|
||||
@@ -534,7 +489,7 @@ mod tests {
|
||||
// assert the state is as this test expects it to be
|
||||
let load_io_buf_res = file.load_to_io_buf(&ctx).await.unwrap();
|
||||
assert_eq!(&load_io_buf_res[..], &content[0..cap * 2 + cap / 2]);
|
||||
let md = file.file.path().metadata().unwrap();
|
||||
let md = file.buffered_writer.as_inner().path().metadata().unwrap();
|
||||
assert_eq!(
|
||||
md.len(),
|
||||
2 * cap.into_u64(),
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
use std::collections::HashSet;
|
||||
use std::future::Future;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use anyhow::{Context, anyhow};
|
||||
@@ -16,7 +15,7 @@ use remote_storage::{
|
||||
DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
|
||||
};
|
||||
use tokio::fs::{self, File, OpenOptions};
|
||||
use tokio::io::AsyncSeekExt;
|
||||
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
|
||||
use tokio_util::io::StreamReader;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::warn;
|
||||
@@ -41,10 +40,7 @@ use crate::span::{
|
||||
use crate::tenant::Generation;
|
||||
use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::virtual_file;
|
||||
use crate::virtual_file::owned_buffers_io::write::FlushTaskError;
|
||||
use crate::virtual_file::{IoBufferMut, MaybeFatalIo, VirtualFile};
|
||||
use crate::virtual_file::{TempVirtualFile, owned_buffers_io};
|
||||
use crate::virtual_file::{MaybeFatalIo, VirtualFile, on_fatal_io_error};
|
||||
|
||||
///
|
||||
/// If 'metadata' is given, we will validate that the downloaded file's size matches that
|
||||
@@ -76,34 +72,21 @@ pub async fn download_layer_file<'a>(
|
||||
layer_metadata.generation,
|
||||
);
|
||||
|
||||
let (bytes_amount, temp_file) = download_retry(
|
||||
// Perform a rename inspired by durable_rename from file_utils.c.
|
||||
// The sequence:
|
||||
// write(tmp)
|
||||
// fsync(tmp)
|
||||
// rename(tmp, new)
|
||||
// fsync(new)
|
||||
// fsync(parent)
|
||||
// For more context about durable_rename check this email from postgres mailing list:
|
||||
// https://www.postgresql.org/message-id/56583BDD.9060302@2ndquadrant.com
|
||||
// If pageserver crashes the temp file will be deleted on startup and re-downloaded.
|
||||
let temp_file_path = path_with_suffix_extension(local_path, TEMP_DOWNLOAD_EXTENSION);
|
||||
|
||||
let bytes_amount = download_retry(
|
||||
|| async {
|
||||
// TempVirtualFile requires us to never reuse a filename while an old
|
||||
// instance of TempVirtualFile created with that filename is not done dropping yet.
|
||||
// So, we use a monotonic counter to disambiguate the filenames.
|
||||
static NEXT_TEMP_DISAMBIGUATOR: AtomicU64 = AtomicU64::new(1);
|
||||
let filename_disambiguator =
|
||||
NEXT_TEMP_DISAMBIGUATOR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
let temp_file_path = path_with_suffix_extension(
|
||||
local_path,
|
||||
&format!("{filename_disambiguator:x}.{TEMP_DOWNLOAD_EXTENSION}"),
|
||||
);
|
||||
|
||||
let temp_file = TempVirtualFile::new(
|
||||
VirtualFile::open_with_options_v2(
|
||||
&temp_file_path,
|
||||
virtual_file::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("create a temp file for layer download: {temp_file_path}"))
|
||||
.map_err(DownloadError::Other)?,
|
||||
gate.enter().map_err(|_| DownloadError::Cancelled)?,
|
||||
);
|
||||
download_object(storage, &remote_path, temp_file, gate, cancel, ctx).await
|
||||
download_object(storage, &remote_path, &temp_file_path, gate, cancel, ctx).await
|
||||
},
|
||||
&format!("download {remote_path:?}"),
|
||||
cancel,
|
||||
@@ -113,8 +96,7 @@ pub async fn download_layer_file<'a>(
|
||||
let expected = layer_metadata.file_size;
|
||||
if expected != bytes_amount {
|
||||
return Err(DownloadError::Other(anyhow!(
|
||||
"According to layer file metadata should have downloaded {expected} bytes but downloaded {bytes_amount} bytes into file {:?}",
|
||||
temp_file.path()
|
||||
"According to layer file metadata should have downloaded {expected} bytes but downloaded {bytes_amount} bytes into file {temp_file_path:?}",
|
||||
)));
|
||||
}
|
||||
|
||||
@@ -124,28 +106,11 @@ pub async fn download_layer_file<'a>(
|
||||
)))
|
||||
});
|
||||
|
||||
// Try rename before disarming the temp file.
|
||||
// That way, if rename fails for whatever reason, we clean up the temp file on the return path.
|
||||
|
||||
fs::rename(temp_file.path(), &local_path)
|
||||
fs::rename(&temp_file_path, &local_path)
|
||||
.await
|
||||
.with_context(|| format!("rename download layer file to {local_path}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
// The temp file's VirtualFile points to the temp_file_path which we moved above.
|
||||
// Drop it immediately, it's invalid.
|
||||
// This will get better in https://github.com/neondatabase/neon/issues/11692
|
||||
let _: VirtualFile = temp_file.disarm_into_inner();
|
||||
// NB: The gate guard that was stored in `temp_file` is dropped but we continue
|
||||
// to operate on it and on the parent timeline directory.
|
||||
// Those operations are safe to do because higher-level code is holding another gate guard:
|
||||
// - attached mode: the download task spawned by struct Layer is holding the gate guard
|
||||
// - secondary mode: The TenantDownloader::download holds the gate open
|
||||
|
||||
// The rename above is not durable yet.
|
||||
// It doesn't matter for crash consistency because pageserver startup deletes temp
|
||||
// files and we'll re-download on demand if necessary.
|
||||
|
||||
// We use fatal_err() below because the after the rename above,
|
||||
// the in-memory state of the filesystem already has the layer file in its final place,
|
||||
// and subsequent pageserver code could think it's durable while it really isn't.
|
||||
@@ -181,64 +146,147 @@ pub async fn download_layer_file<'a>(
|
||||
async fn download_object(
|
||||
storage: &GenericRemoteStorage,
|
||||
src_path: &RemotePath,
|
||||
destination_file: TempVirtualFile,
|
||||
gate: &utils::sync::gate::Gate,
|
||||
dst_path: &Utf8PathBuf,
|
||||
#[cfg_attr(target_os = "macos", allow(unused_variables))] gate: &utils::sync::gate::Gate,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(u64, TempVirtualFile), DownloadError> {
|
||||
let mut download = storage
|
||||
.download(src_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
#[cfg_attr(target_os = "macos", allow(unused_variables))] ctx: &RequestContext,
|
||||
) -> Result<u64, DownloadError> {
|
||||
let res = match crate::virtual_file::io_engine::get() {
|
||||
crate::virtual_file::io_engine::IoEngine::NotSet => panic!("unset"),
|
||||
crate::virtual_file::io_engine::IoEngine::StdFs => {
|
||||
async {
|
||||
let destination_file = tokio::fs::File::create(dst_path)
|
||||
.await
|
||||
.with_context(|| format!("create a destination file for layer '{dst_path}'"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
let download = storage
|
||||
.download(src_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
|
||||
let dst_path = destination_file.path().to_owned();
|
||||
let mut buffered = owned_buffers_io::write::BufferedWriter::<IoBufferMut, _>::new(
|
||||
destination_file,
|
||||
0,
|
||||
|| IoBufferMut::with_capacity(super::BUFFER_SIZE),
|
||||
gate.enter().map_err(|_| DownloadError::Cancelled)?,
|
||||
cancel.child_token(),
|
||||
ctx,
|
||||
tracing::info_span!(parent: None, "download_object_buffered_writer", %dst_path),
|
||||
);
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
// TODO: use vectored write (writev) once supported by tokio-epoll-uring.
|
||||
// There's chunks_vectored() on the stream.
|
||||
let (bytes_amount, destination_file) = async {
|
||||
while let Some(res) = futures::StreamExt::next(&mut download.download_stream).await {
|
||||
let chunk = match res {
|
||||
Ok(chunk) => chunk,
|
||||
Err(e) => return Err(DownloadError::from(e)),
|
||||
};
|
||||
buffered
|
||||
.write_buffered_borrowed(&chunk, ctx)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
FlushTaskError::Cancelled => DownloadError::Cancelled,
|
||||
})?;
|
||||
}
|
||||
buffered
|
||||
.shutdown(
|
||||
owned_buffers_io::write::BufferedWriterShutdownMode::PadThenTruncate,
|
||||
ctx,
|
||||
)
|
||||
let mut buf_writer =
|
||||
tokio::io::BufWriter::with_capacity(super::BUFFER_SIZE, destination_file);
|
||||
|
||||
let mut reader = tokio_util::io::StreamReader::new(download.download_stream);
|
||||
|
||||
let bytes_amount = tokio::io::copy_buf(&mut reader, &mut buf_writer).await?;
|
||||
buf_writer.flush().await?;
|
||||
|
||||
let mut destination_file = buf_writer.into_inner();
|
||||
|
||||
// Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that:
|
||||
// A file will not be closed immediately when it goes out of scope if there are any IO operations
|
||||
// that have not yet completed. To ensure that a file is closed immediately when it is dropped,
|
||||
// you should call flush before dropping it.
|
||||
//
|
||||
// From the tokio code I see that it waits for pending operations to complete. There shouldt be any because
|
||||
// we assume that `destination_file` file is fully written. I e there is no pending .write(...).await operations.
|
||||
// But for additional safety lets check/wait for any pending operations.
|
||||
destination_file
|
||||
.flush()
|
||||
.await
|
||||
.maybe_fatal_err("download_object sync_all")
|
||||
.with_context(|| format!("flush source file at {dst_path}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
// not using sync_data because it can lose file size update
|
||||
destination_file
|
||||
.sync_all()
|
||||
.await
|
||||
.maybe_fatal_err("download_object sync_all")
|
||||
.with_context(|| format!("failed to fsync source file at {dst_path}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
Ok(bytes_amount)
|
||||
}
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
FlushTaskError::Cancelled => DownloadError::Cancelled,
|
||||
})
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
crate::virtual_file::io_engine::IoEngine::TokioEpollUring => {
|
||||
use crate::virtual_file::owned_buffers_io::write::FlushTaskError;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::virtual_file::{IoBufferMut, owned_buffers_io};
|
||||
async {
|
||||
let destination_file = Arc::new(
|
||||
VirtualFile::create(dst_path, ctx)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("create a destination file for layer '{dst_path}'")
|
||||
})
|
||||
.map_err(DownloadError::Other)?,
|
||||
);
|
||||
|
||||
let mut download = storage
|
||||
.download(src_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
let mut buffered = owned_buffers_io::write::BufferedWriter::<IoBufferMut, _>::new(
|
||||
destination_file,
|
||||
|| IoBufferMut::with_capacity(super::BUFFER_SIZE),
|
||||
gate.enter().map_err(|_| DownloadError::Cancelled)?,
|
||||
cancel.child_token(),
|
||||
ctx,
|
||||
tracing::info_span!(parent: None, "download_object_buffered_writer", %dst_path),
|
||||
);
|
||||
|
||||
// TODO: use vectored write (writev) once supported by tokio-epoll-uring.
|
||||
// There's chunks_vectored() on the stream.
|
||||
let (bytes_amount, destination_file) = async {
|
||||
while let Some(res) =
|
||||
futures::StreamExt::next(&mut download.download_stream).await
|
||||
{
|
||||
let chunk = match res {
|
||||
Ok(chunk) => chunk,
|
||||
Err(e) => return Err(DownloadError::from(e)),
|
||||
};
|
||||
buffered
|
||||
.write_buffered_borrowed(&chunk, ctx)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
FlushTaskError::Cancelled => DownloadError::Cancelled,
|
||||
})?;
|
||||
}
|
||||
let inner = buffered
|
||||
.flush_and_into_inner(ctx)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
FlushTaskError::Cancelled => DownloadError::Cancelled,
|
||||
})?;
|
||||
Ok(inner)
|
||||
}
|
||||
.await?;
|
||||
|
||||
// not using sync_data because it can lose file size update
|
||||
destination_file
|
||||
.sync_all()
|
||||
.await
|
||||
.maybe_fatal_err("download_object sync_all")
|
||||
.with_context(|| format!("failed to fsync source file at {dst_path}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
Ok(bytes_amount)
|
||||
}
|
||||
.await
|
||||
}
|
||||
};
|
||||
|
||||
// in case the download failed, clean up
|
||||
match res {
|
||||
Ok(bytes_amount) => Ok(bytes_amount),
|
||||
Err(e) => {
|
||||
if let Err(e) = tokio::fs::remove_file(dst_path).await {
|
||||
if e.kind() != std::io::ErrorKind::NotFound {
|
||||
on_fatal_io_error(&e, &format!("Removing temporary file {dst_path}"));
|
||||
}
|
||||
}
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
.await?;
|
||||
|
||||
// not using sync_data because it can lose file size update
|
||||
destination_file
|
||||
.sync_all()
|
||||
.await
|
||||
.maybe_fatal_err("download_object sync_all")
|
||||
.with_context(|| format!("failed to fsync source file at {dst_path}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
Ok((bytes_amount, destination_file))
|
||||
}
|
||||
|
||||
const TEMP_DOWNLOAD_EXTENSION: &str = "temp_download";
|
||||
|
||||
@@ -646,7 +646,7 @@ enum UpdateError {
|
||||
NoData,
|
||||
#[error("Insufficient local storage space")]
|
||||
NoSpace,
|
||||
#[error("Failed to download: {0}")]
|
||||
#[error("Failed to download")]
|
||||
DownloadError(DownloadError),
|
||||
#[error(transparent)]
|
||||
Deserialize(#[from] serde_json::Error),
|
||||
@@ -1521,11 +1521,12 @@ async fn load_heatmap(
|
||||
path: &Utf8PathBuf,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Option<HeatMapTenant>, anyhow::Error> {
|
||||
let st = match VirtualFile::read_to_string(path, ctx).await {
|
||||
Ok(st) => st,
|
||||
let mut file = match VirtualFile::open(path, ctx).await {
|
||||
Ok(file) => file,
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
|
||||
Err(e) => Err(e)?,
|
||||
};
|
||||
let st = file.read_to_string(ctx).await?;
|
||||
let htm = serde_json::from_str(&st)?;
|
||||
Ok(Some(htm))
|
||||
}
|
||||
|
||||
@@ -29,11 +29,11 @@
|
||||
//!
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::fs::File;
|
||||
use std::io::SeekFrom;
|
||||
use std::ops::Range;
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
|
||||
use anyhow::{Context, Result, bail, ensure};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
@@ -45,13 +45,14 @@ use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::value::Value;
|
||||
use rand::Rng;
|
||||
use rand::distributions::Alphanumeric;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::OnceCell;
|
||||
use tokio_epoll_uring::IoBuf;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::bin_ser::BeSer;
|
||||
use utils::bin_ser::SerializeError;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -73,10 +74,8 @@ use crate::tenant::vectored_blob_io::{
|
||||
BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
|
||||
VectoredReadPlanner,
|
||||
};
|
||||
use crate::virtual_file::TempVirtualFile;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
|
||||
use crate::virtual_file::owned_buffers_io::write::{Buffer, BufferedWriterShutdownMode};
|
||||
use crate::virtual_file::{self, IoBuffer, IoBufferMut, MaybeFatalIo, VirtualFile};
|
||||
use crate::virtual_file::{self, IoBufferMut, MaybeFatalIo, VirtualFile};
|
||||
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
|
||||
|
||||
///
|
||||
@@ -114,15 +113,6 @@ impl From<&DeltaLayer> for Summary {
|
||||
}
|
||||
|
||||
impl Summary {
|
||||
/// Serializes the summary header into an aligned buffer of lenth `PAGE_SZ`.
|
||||
pub fn ser_into_page(&self) -> Result<IoBuffer, SerializeError> {
|
||||
let mut buf = IoBufferMut::with_capacity(PAGE_SZ);
|
||||
Self::ser_into(self, &mut buf)?;
|
||||
// Pad zeroes to the buffer so the length is a multiple of the alignment.
|
||||
buf.extend_with(0, buf.capacity() - buf.len());
|
||||
Ok(buf.freeze())
|
||||
}
|
||||
|
||||
pub(super) fn expected(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
@@ -298,20 +288,19 @@ impl DeltaLayer {
|
||||
key_start: Key,
|
||||
lsn_range: &Range<Lsn>,
|
||||
) -> Utf8PathBuf {
|
||||
// TempVirtualFile requires us to never reuse a filename while an old
|
||||
// instance of TempVirtualFile created with that filename is not done dropping yet.
|
||||
// So, we use a monotonic counter to disambiguate the filenames.
|
||||
static NEXT_TEMP_DISAMBIGUATOR: AtomicU64 = AtomicU64::new(1);
|
||||
let filename_disambiguator =
|
||||
NEXT_TEMP_DISAMBIGUATOR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
let rand_string: String = rand::thread_rng()
|
||||
.sample_iter(&Alphanumeric)
|
||||
.take(8)
|
||||
.map(char::from)
|
||||
.collect();
|
||||
|
||||
conf.timeline_path(tenant_shard_id, timeline_id)
|
||||
.join(format!(
|
||||
"{}-XXX__{:016X}-{:016X}.{:x}.{}",
|
||||
"{}-XXX__{:016X}-{:016X}.{}.{}",
|
||||
key_start,
|
||||
u64::from(lsn_range.start),
|
||||
u64::from(lsn_range.end),
|
||||
filename_disambiguator,
|
||||
rand_string,
|
||||
TEMP_FILE_SUFFIX,
|
||||
))
|
||||
}
|
||||
@@ -402,7 +391,7 @@ struct DeltaLayerWriterInner {
|
||||
|
||||
tree: DiskBtreeBuilder<BlockBuf, DELTA_KEY_SIZE>,
|
||||
|
||||
blob_writer: BlobWriter<TempVirtualFile>,
|
||||
blob_writer: BlobWriter<true>,
|
||||
|
||||
// Number of key-lsns in the layer.
|
||||
num_keys: usize,
|
||||
@@ -426,29 +415,16 @@ impl DeltaLayerWriterInner {
|
||||
// Create the file initially with a temporary filename. We don't know
|
||||
// the end key yet, so we cannot form the final filename yet. We will
|
||||
// rename it when we're done.
|
||||
//
|
||||
// Note: This overwrites any existing file. There shouldn't be any.
|
||||
// FIXME: throw an error instead?
|
||||
let path =
|
||||
DeltaLayer::temp_path_for(conf, &tenant_shard_id, &timeline_id, key_start, &lsn_range);
|
||||
let file = TempVirtualFile::new(
|
||||
VirtualFile::open_with_options_v2(
|
||||
&path,
|
||||
virtual_file::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
gate.enter()?,
|
||||
);
|
||||
|
||||
// Start at PAGE_SZ, make room for the header block
|
||||
let blob_writer = BlobWriter::new(
|
||||
file,
|
||||
PAGE_SZ as u64,
|
||||
gate,
|
||||
cancel,
|
||||
ctx,
|
||||
info_span!(parent: None, "delta_layer_writer_flush_task", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %path),
|
||||
)?;
|
||||
let mut file = VirtualFile::create(&path, ctx).await?;
|
||||
// make room for the header block
|
||||
file.seek(SeekFrom::Start(PAGE_SZ as u64)).await?;
|
||||
let blob_writer = BlobWriter::new(file, PAGE_SZ as u64, gate, cancel, ctx);
|
||||
|
||||
// Initialize the b-tree index builder
|
||||
let block_buf = BlockBuf::new();
|
||||
@@ -539,27 +515,34 @@ impl DeltaLayerWriterInner {
|
||||
self,
|
||||
key_end: Key,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
|
||||
let temp_path = self.path.clone();
|
||||
let result = self.finish0(key_end, ctx).await;
|
||||
if let Err(ref e) = result {
|
||||
tracing::info!(%temp_path, "cleaning up temporary file after error during writing: {e}");
|
||||
if let Err(e) = std::fs::remove_file(&temp_path) {
|
||||
tracing::warn!(error=%e, %temp_path, "error cleaning up temporary layer file after error during writing");
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
async fn finish0(
|
||||
self,
|
||||
key_end: Key,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
|
||||
let index_start_blk = self.blob_writer.size().div_ceil(PAGE_SZ as u64) as u32;
|
||||
|
||||
let file = self
|
||||
.blob_writer
|
||||
.shutdown(
|
||||
BufferedWriterShutdownMode::ZeroPadToNextMultiple(PAGE_SZ),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
let mut file = self.blob_writer.into_inner(ctx).await?;
|
||||
|
||||
// Write out the index
|
||||
let (index_root_blk, block_buf) = self.tree.finish()?;
|
||||
let mut offset = index_start_blk as u64 * PAGE_SZ as u64;
|
||||
|
||||
// TODO(yuchen): https://github.com/neondatabase/neon/issues/10092
|
||||
// Should we just replace BlockBuf::blocks with one big buffer
|
||||
file.seek(SeekFrom::Start(index_start_blk as u64 * PAGE_SZ as u64))
|
||||
.await?;
|
||||
for buf in block_buf.blocks {
|
||||
let (_buf, res) = file.write_all_at(buf.slice_len(), offset, ctx).await;
|
||||
let (_buf, res) = file.write_all(buf.slice_len(), ctx).await;
|
||||
res?;
|
||||
offset += PAGE_SZ as u64;
|
||||
}
|
||||
assert!(self.lsn_range.start < self.lsn_range.end);
|
||||
// Fill in the summary on blk 0
|
||||
@@ -574,9 +557,11 @@ impl DeltaLayerWriterInner {
|
||||
index_root_blk,
|
||||
};
|
||||
|
||||
// Writes summary at the first block (offset 0).
|
||||
let buf = summary.ser_into_page()?;
|
||||
let (_buf, res) = file.write_all_at(buf.slice_len(), 0, ctx).await;
|
||||
let mut buf = Vec::with_capacity(PAGE_SZ);
|
||||
// TODO: could use smallvec here but it's a pain with Slice<T>
|
||||
Summary::ser_into(&summary, &mut buf)?;
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
let (_buf, res) = file.write_all(buf.slice_len(), ctx).await;
|
||||
res?;
|
||||
|
||||
let metadata = file
|
||||
@@ -613,10 +598,6 @@ impl DeltaLayerWriterInner {
|
||||
|
||||
trace!("created delta layer {}", self.path);
|
||||
|
||||
// The gate guard stored in `destination_file` is dropped. Callers (e.g.. flush loop or compaction)
|
||||
// keep the gate open also, so that it's safe for them to rename the file to its final destination.
|
||||
file.disarm_into_inner();
|
||||
|
||||
Ok((desc, self.path))
|
||||
}
|
||||
}
|
||||
@@ -745,6 +726,17 @@ impl DeltaLayerWriter {
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DeltaLayerWriter {
|
||||
fn drop(&mut self) {
|
||||
if let Some(inner) = self.inner.take() {
|
||||
// We want to remove the virtual file here, so it's fine to not
|
||||
// having completely flushed unwritten data.
|
||||
let vfile = inner.blob_writer.into_inner_no_flush();
|
||||
vfile.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum RewriteSummaryError {
|
||||
#[error("magic mismatch")]
|
||||
@@ -768,7 +760,7 @@ impl DeltaLayer {
|
||||
where
|
||||
F: Fn(Summary) -> Summary,
|
||||
{
|
||||
let file = VirtualFile::open_with_options_v2(
|
||||
let mut file = VirtualFile::open_with_options(
|
||||
path,
|
||||
virtual_file::OpenOptions::new().read(true).write(true),
|
||||
ctx,
|
||||
@@ -785,8 +777,11 @@ impl DeltaLayer {
|
||||
|
||||
let new_summary = rewrite(actual_summary);
|
||||
|
||||
let buf = new_summary.ser_into_page().context("serialize")?;
|
||||
let (_buf, res) = file.write_all_at(buf.slice_len(), 0, ctx).await;
|
||||
let mut buf = Vec::with_capacity(PAGE_SZ);
|
||||
// TODO: could use smallvec here, but it's a pain with Slice<T>
|
||||
Summary::ser_into(&new_summary, &mut buf).context("serialize")?;
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
let (_buf, res) = file.write_all(buf.slice_len(), ctx).await;
|
||||
res?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -1442,19 +1437,6 @@ impl DeltaLayerInner {
|
||||
}
|
||||
|
||||
pub fn iter<'a>(&'a self, ctx: &'a RequestContext) -> DeltaLayerIterator<'a> {
|
||||
self.iter_with_options(
|
||||
ctx,
|
||||
1024 * 8192, // The default value. Unit tests might use a different value. 1024 * 8K = 8MB buffer.
|
||||
1024, // The default value. Unit tests might use a different value
|
||||
)
|
||||
}
|
||||
|
||||
pub fn iter_with_options<'a>(
|
||||
&'a self,
|
||||
ctx: &'a RequestContext,
|
||||
max_read_size: u64,
|
||||
max_batch_size: usize,
|
||||
) -> DeltaLayerIterator<'a> {
|
||||
let block_reader = FileBlockReader::new(&self.file, self.file_id);
|
||||
let tree_reader =
|
||||
DiskBtreeReader::new(self.index_start_blk, self.index_root_blk, block_reader);
|
||||
@@ -1464,7 +1446,10 @@ impl DeltaLayerInner {
|
||||
index_iter: tree_reader.iter(&[0; DELTA_KEY_SIZE], ctx),
|
||||
key_values_batch: std::collections::VecDeque::new(),
|
||||
is_end: false,
|
||||
planner: StreamingVectoredReadPlanner::new(max_read_size, max_batch_size),
|
||||
planner: StreamingVectoredReadPlanner::new(
|
||||
1024 * 8192, // The default value. Unit tests might use a different value. 1024 * 8K = 8MB buffer.
|
||||
1024, // The default value. Unit tests might use a different value
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1624,8 +1609,8 @@ pub(crate) mod test {
|
||||
use bytes::Bytes;
|
||||
use itertools::MinMaxResult;
|
||||
use pageserver_api::value::Value;
|
||||
use rand::RngCore;
|
||||
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
||||
use rand::{Rng, RngCore};
|
||||
|
||||
use super::*;
|
||||
use crate::DEFAULT_PG_VERSION;
|
||||
|
||||
@@ -27,11 +27,11 @@
|
||||
//! actual page images are stored in the "values" part.
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::fs::File;
|
||||
use std::io::SeekFrom;
|
||||
use std::ops::Range;
|
||||
use std::os::unix::prelude::FileExt;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
|
||||
use anyhow::{Context, Result, bail, ensure};
|
||||
use bytes::Bytes;
|
||||
@@ -43,13 +43,14 @@ use pageserver_api::key::{DBDIR_KEY, KEY_SIZE, Key};
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::shard::{ShardIdentity, TenantShardId};
|
||||
use pageserver_api::value::Value;
|
||||
use rand::Rng;
|
||||
use rand::distributions::Alphanumeric;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::OnceCell;
|
||||
use tokio_stream::StreamExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::bin_ser::BeSer;
|
||||
use utils::bin_ser::SerializeError;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -71,10 +72,8 @@ use crate::tenant::vectored_blob_io::{
|
||||
BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
|
||||
VectoredReadPlanner,
|
||||
};
|
||||
use crate::virtual_file::TempVirtualFile;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||
use crate::virtual_file::owned_buffers_io::write::{Buffer, BufferedWriterShutdownMode};
|
||||
use crate::virtual_file::{self, IoBuffer, IoBufferMut, MaybeFatalIo, VirtualFile};
|
||||
use crate::virtual_file::{self, IoBufferMut, MaybeFatalIo, VirtualFile};
|
||||
use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
|
||||
|
||||
///
|
||||
@@ -113,15 +112,6 @@ impl From<&ImageLayer> for Summary {
|
||||
}
|
||||
|
||||
impl Summary {
|
||||
/// Serializes the summary header into an aligned buffer of lenth `PAGE_SZ`.
|
||||
pub fn ser_into_page(&self) -> Result<IoBuffer, SerializeError> {
|
||||
let mut buf = IoBufferMut::with_capacity(PAGE_SZ);
|
||||
Self::ser_into(self, &mut buf)?;
|
||||
// Pad zeroes to the buffer so the length is a multiple of the alignment.
|
||||
buf.extend_with(0, buf.capacity() - buf.len());
|
||||
Ok(buf.freeze())
|
||||
}
|
||||
|
||||
pub(super) fn expected(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
@@ -262,18 +252,14 @@ impl ImageLayer {
|
||||
tenant_shard_id: TenantShardId,
|
||||
fname: &ImageLayerName,
|
||||
) -> Utf8PathBuf {
|
||||
// TempVirtualFile requires us to never reuse a filename while an old
|
||||
// instance of TempVirtualFile created with that filename is not done dropping yet.
|
||||
// So, we use a monotonic counter to disambiguate the filenames.
|
||||
static NEXT_TEMP_DISAMBIGUATOR: AtomicU64 = AtomicU64::new(1);
|
||||
let filename_disambiguator =
|
||||
NEXT_TEMP_DISAMBIGUATOR.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
let rand_string: String = rand::thread_rng()
|
||||
.sample_iter(&Alphanumeric)
|
||||
.take(8)
|
||||
.map(char::from)
|
||||
.collect();
|
||||
|
||||
conf.timeline_path(&tenant_shard_id, &timeline_id)
|
||||
.join(format!(
|
||||
"{fname}.{:x}.{TEMP_FILE_SUFFIX}",
|
||||
filename_disambiguator
|
||||
))
|
||||
.join(format!("{fname}.{rand_string}.{TEMP_FILE_SUFFIX}"))
|
||||
}
|
||||
|
||||
///
|
||||
@@ -363,7 +349,7 @@ impl ImageLayer {
|
||||
where
|
||||
F: Fn(Summary) -> Summary,
|
||||
{
|
||||
let file = VirtualFile::open_with_options_v2(
|
||||
let mut file = VirtualFile::open_with_options(
|
||||
path,
|
||||
virtual_file::OpenOptions::new().read(true).write(true),
|
||||
ctx,
|
||||
@@ -380,8 +366,11 @@ impl ImageLayer {
|
||||
|
||||
let new_summary = rewrite(actual_summary);
|
||||
|
||||
let buf = new_summary.ser_into_page().context("serialize")?;
|
||||
let (_buf, res) = file.write_all_at(buf.slice_len(), 0, ctx).await;
|
||||
let mut buf = Vec::with_capacity(PAGE_SZ);
|
||||
// TODO: could use smallvec here but it's a pain with Slice<T>
|
||||
Summary::ser_into(&new_summary, &mut buf).context("serialize")?;
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
let (_buf, res) = file.write_all(buf.slice_len(), ctx).await;
|
||||
res?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -685,19 +674,6 @@ impl ImageLayerInner {
|
||||
}
|
||||
|
||||
pub(crate) fn iter<'a>(&'a self, ctx: &'a RequestContext) -> ImageLayerIterator<'a> {
|
||||
self.iter_with_options(
|
||||
ctx,
|
||||
1024 * 8192, // The default value. Unit tests might use a different value. 1024 * 8K = 8MB buffer.
|
||||
1024, // The default value. Unit tests might use a different value
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn iter_with_options<'a>(
|
||||
&'a self,
|
||||
ctx: &'a RequestContext,
|
||||
max_read_size: u64,
|
||||
max_batch_size: usize,
|
||||
) -> ImageLayerIterator<'a> {
|
||||
let block_reader = FileBlockReader::new(&self.file, self.file_id);
|
||||
let tree_reader =
|
||||
DiskBtreeReader::new(self.index_start_blk, self.index_root_blk, block_reader);
|
||||
@@ -707,7 +683,10 @@ impl ImageLayerInner {
|
||||
index_iter: tree_reader.iter(&[0; KEY_SIZE], ctx),
|
||||
key_values_batch: VecDeque::new(),
|
||||
is_end: false,
|
||||
planner: StreamingVectoredReadPlanner::new(max_read_size, max_batch_size),
|
||||
planner: StreamingVectoredReadPlanner::new(
|
||||
1024 * 8192, // The default value. Unit tests might use a different value. 1024 * 8K = 8MB buffer.
|
||||
1024, // The default value. Unit tests might use a different value
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -760,7 +739,7 @@ struct ImageLayerWriterInner {
|
||||
// Number of keys in the layer.
|
||||
num_keys: usize,
|
||||
|
||||
blob_writer: BlobWriter<TempVirtualFile>,
|
||||
blob_writer: BlobWriter<false>,
|
||||
tree: DiskBtreeBuilder<BlockBuf, KEY_SIZE>,
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
@@ -794,27 +773,19 @@ impl ImageLayerWriterInner {
|
||||
},
|
||||
);
|
||||
trace!("creating image layer {}", path);
|
||||
let file = TempVirtualFile::new(
|
||||
VirtualFile::open_with_options_v2(
|
||||
let mut file = {
|
||||
VirtualFile::open_with_options(
|
||||
&path,
|
||||
virtual_file::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true),
|
||||
.write(true)
|
||||
.create_new(true),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
gate.enter()?,
|
||||
);
|
||||
|
||||
// Start at `PAGE_SZ` to make room for the header block.
|
||||
let blob_writer = BlobWriter::new(
|
||||
file,
|
||||
PAGE_SZ as u64,
|
||||
gate,
|
||||
cancel,
|
||||
ctx,
|
||||
info_span!(parent: None, "image_layer_writer_flush_task", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %path),
|
||||
)?;
|
||||
.await?
|
||||
};
|
||||
// make room for the header block
|
||||
file.seek(SeekFrom::Start(PAGE_SZ as u64)).await?;
|
||||
let blob_writer = BlobWriter::new(file, PAGE_SZ as u64, gate, cancel, ctx);
|
||||
|
||||
// Initialize the b-tree index builder
|
||||
let block_buf = BlockBuf::new();
|
||||
@@ -925,6 +896,25 @@ impl ImageLayerWriterInner {
|
||||
self,
|
||||
ctx: &RequestContext,
|
||||
end_key: Option<Key>,
|
||||
) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
|
||||
let temp_path = self.path.clone();
|
||||
let result = self.finish0(ctx, end_key).await;
|
||||
if let Err(ref e) = result {
|
||||
tracing::info!(%temp_path, "cleaning up temporary file after error during writing: {e}");
|
||||
if let Err(e) = std::fs::remove_file(&temp_path) {
|
||||
tracing::warn!(error=%e, %temp_path, "error cleaning up temporary layer file after error during writing");
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
///
|
||||
/// Finish writing the image layer.
|
||||
///
|
||||
async fn finish0(
|
||||
self,
|
||||
ctx: &RequestContext,
|
||||
end_key: Option<Key>,
|
||||
) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
|
||||
let index_start_blk = self.blob_writer.size().div_ceil(PAGE_SZ as u64) as u32;
|
||||
|
||||
@@ -942,24 +932,15 @@ impl ImageLayerWriterInner {
|
||||
crate::metrics::COMPRESSION_IMAGE_OUTPUT_BYTES.inc_by(compressed_size);
|
||||
};
|
||||
|
||||
let file = self
|
||||
.blob_writer
|
||||
.shutdown(
|
||||
BufferedWriterShutdownMode::ZeroPadToNextMultiple(PAGE_SZ),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
let mut file = self.blob_writer.into_inner();
|
||||
|
||||
// Write out the index
|
||||
let mut offset = index_start_blk as u64 * PAGE_SZ as u64;
|
||||
file.seek(SeekFrom::Start(index_start_blk as u64 * PAGE_SZ as u64))
|
||||
.await?;
|
||||
let (index_root_blk, block_buf) = self.tree.finish()?;
|
||||
|
||||
// TODO(yuchen): https://github.com/neondatabase/neon/issues/10092
|
||||
// Should we just replace BlockBuf::blocks with one big buffer?
|
||||
for buf in block_buf.blocks {
|
||||
let (_buf, res) = file.write_all_at(buf.slice_len(), offset, ctx).await;
|
||||
let (_buf, res) = file.write_all(buf.slice_len(), ctx).await;
|
||||
res?;
|
||||
offset += PAGE_SZ as u64;
|
||||
}
|
||||
|
||||
let final_key_range = if let Some(end_key) = end_key {
|
||||
@@ -980,9 +961,11 @@ impl ImageLayerWriterInner {
|
||||
index_root_blk,
|
||||
};
|
||||
|
||||
// Writes summary at the first block (offset 0).
|
||||
let buf = summary.ser_into_page()?;
|
||||
let (_buf, res) = file.write_all_at(buf.slice_len(), 0, ctx).await;
|
||||
let mut buf = Vec::with_capacity(PAGE_SZ);
|
||||
// TODO: could use smallvec here but it's a pain with Slice<T>
|
||||
Summary::ser_into(&summary, &mut buf)?;
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
let (_buf, res) = file.write_all(buf.slice_len(), ctx).await;
|
||||
res?;
|
||||
|
||||
let metadata = file
|
||||
@@ -1017,10 +1000,6 @@ impl ImageLayerWriterInner {
|
||||
|
||||
trace!("created image layer {}", self.path);
|
||||
|
||||
// The gate guard stored in `destination_file` is dropped. Callers (e.g.. flush loop or compaction)
|
||||
// keep the gate open also, so that it's safe for them to rename the file to its final destination.
|
||||
file.disarm_into_inner();
|
||||
|
||||
Ok((desc, self.path))
|
||||
}
|
||||
}
|
||||
@@ -1146,6 +1125,14 @@ impl ImageLayerWriter {
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ImageLayerWriter {
|
||||
fn drop(&mut self) {
|
||||
if let Some(inner) = self.inner.take() {
|
||||
inner.blob_writer.into_inner().remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ImageLayerIterator<'a> {
|
||||
image_layer: &'a ImageLayerInner,
|
||||
ctx: &'a RequestContext,
|
||||
|
||||
@@ -19,7 +19,6 @@ pub(crate) enum LayerRef<'a> {
|
||||
}
|
||||
|
||||
impl<'a> LayerRef<'a> {
|
||||
#[allow(dead_code)]
|
||||
fn iter(self, ctx: &'a RequestContext) -> LayerIterRef<'a> {
|
||||
match self {
|
||||
Self::Image(x) => LayerIterRef::Image(x.iter(ctx)),
|
||||
@@ -27,22 +26,6 @@ impl<'a> LayerRef<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_with_options(
|
||||
self,
|
||||
ctx: &'a RequestContext,
|
||||
max_read_size: u64,
|
||||
max_batch_size: usize,
|
||||
) -> LayerIterRef<'a> {
|
||||
match self {
|
||||
Self::Image(x) => {
|
||||
LayerIterRef::Image(x.iter_with_options(ctx, max_read_size, max_batch_size))
|
||||
}
|
||||
Self::Delta(x) => {
|
||||
LayerIterRef::Delta(x.iter_with_options(ctx, max_read_size, max_batch_size))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn layer_dbg_info(&self) -> String {
|
||||
match self {
|
||||
Self::Image(x) => x.layer_dbg_info(),
|
||||
@@ -83,8 +66,6 @@ pub(crate) enum IteratorWrapper<'a> {
|
||||
first_key_lower_bound: (Key, Lsn),
|
||||
layer: LayerRef<'a>,
|
||||
source_desc: Arc<PersistentLayerKey>,
|
||||
max_read_size: u64,
|
||||
max_batch_size: usize,
|
||||
},
|
||||
Loaded {
|
||||
iter: PeekableLayerIterRef<'a>,
|
||||
@@ -165,8 +146,6 @@ impl<'a> IteratorWrapper<'a> {
|
||||
pub fn create_from_image_layer(
|
||||
image_layer: &'a ImageLayerInner,
|
||||
ctx: &'a RequestContext,
|
||||
max_read_size: u64,
|
||||
max_batch_size: usize,
|
||||
) -> Self {
|
||||
Self::NotLoaded {
|
||||
layer: LayerRef::Image(image_layer),
|
||||
@@ -178,16 +157,12 @@ impl<'a> IteratorWrapper<'a> {
|
||||
is_delta: false,
|
||||
}
|
||||
.into(),
|
||||
max_read_size,
|
||||
max_batch_size,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_from_delta_layer(
|
||||
delta_layer: &'a DeltaLayerInner,
|
||||
ctx: &'a RequestContext,
|
||||
max_read_size: u64,
|
||||
max_batch_size: usize,
|
||||
) -> Self {
|
||||
Self::NotLoaded {
|
||||
layer: LayerRef::Delta(delta_layer),
|
||||
@@ -199,8 +174,6 @@ impl<'a> IteratorWrapper<'a> {
|
||||
is_delta: true,
|
||||
}
|
||||
.into(),
|
||||
max_read_size,
|
||||
max_batch_size,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,13 +204,11 @@ impl<'a> IteratorWrapper<'a> {
|
||||
first_key_lower_bound,
|
||||
layer,
|
||||
source_desc,
|
||||
max_read_size,
|
||||
max_batch_size,
|
||||
} = self
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
let iter = layer.iter_with_options(ctx, *max_read_size, *max_batch_size);
|
||||
let iter = layer.iter(ctx);
|
||||
let iter = PeekableLayerIterRef::create(iter).await?;
|
||||
if let Some((k1, l1, _)) = iter.peek() {
|
||||
let (k2, l2) = first_key_lower_bound;
|
||||
@@ -322,41 +293,21 @@ impl MergeIteratorItem for ((Key, Lsn, Value), Arc<PersistentLayerKey>) {
|
||||
}
|
||||
|
||||
impl<'a> MergeIterator<'a> {
|
||||
pub fn create_with_options(
|
||||
deltas: &[&'a DeltaLayerInner],
|
||||
images: &[&'a ImageLayerInner],
|
||||
ctx: &'a RequestContext,
|
||||
max_read_size: u64,
|
||||
max_batch_size: usize,
|
||||
) -> Self {
|
||||
let mut heap = Vec::with_capacity(images.len() + deltas.len());
|
||||
for image in images {
|
||||
heap.push(IteratorWrapper::create_from_image_layer(
|
||||
image,
|
||||
ctx,
|
||||
max_read_size,
|
||||
max_batch_size,
|
||||
));
|
||||
}
|
||||
for delta in deltas {
|
||||
heap.push(IteratorWrapper::create_from_delta_layer(
|
||||
delta,
|
||||
ctx,
|
||||
max_read_size,
|
||||
max_batch_size,
|
||||
));
|
||||
}
|
||||
Self {
|
||||
heap: BinaryHeap::from(heap),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create(
|
||||
deltas: &[&'a DeltaLayerInner],
|
||||
images: &[&'a ImageLayerInner],
|
||||
ctx: &'a RequestContext,
|
||||
) -> Self {
|
||||
Self::create_with_options(deltas, images, ctx, 1024 * 8192, 1024)
|
||||
let mut heap = Vec::with_capacity(images.len() + deltas.len());
|
||||
for image in images {
|
||||
heap.push(IteratorWrapper::create_from_image_layer(image, ctx));
|
||||
}
|
||||
for delta in deltas {
|
||||
heap.push(IteratorWrapper::create_from_delta_layer(delta, ctx));
|
||||
}
|
||||
Self {
|
||||
heap: BinaryHeap::from(heap),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn next_inner<R: MergeIteratorItem>(&mut self) -> anyhow::Result<Option<R>> {
|
||||
|
||||
@@ -2828,41 +2828,6 @@ impl Timeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if the memory usage is within the limit.
|
||||
async fn check_memory_usage(
|
||||
self: &Arc<Self>,
|
||||
layer_selection: &[Layer],
|
||||
) -> Result<(), CompactionError> {
|
||||
let mut estimated_memory_usage_mb = 0.0;
|
||||
let mut num_image_layers = 0;
|
||||
let mut num_delta_layers = 0;
|
||||
let target_layer_size_bytes = 256 * 1024 * 1024;
|
||||
for layer in layer_selection {
|
||||
let layer_desc = layer.layer_desc();
|
||||
if layer_desc.is_delta() {
|
||||
// Delta layers at most have 1MB buffer; 3x to make it safe (there're deltas as large as 16KB).
|
||||
// Multiply the layer size so that tests can pass.
|
||||
estimated_memory_usage_mb +=
|
||||
3.0 * (layer_desc.file_size / target_layer_size_bytes) as f64;
|
||||
num_delta_layers += 1;
|
||||
} else {
|
||||
// Image layers at most have 1MB buffer but it might be compressed; assume 5x compression ratio.
|
||||
estimated_memory_usage_mb +=
|
||||
5.0 * (layer_desc.file_size / target_layer_size_bytes) as f64;
|
||||
num_image_layers += 1;
|
||||
}
|
||||
}
|
||||
if estimated_memory_usage_mb > 1024.0 {
|
||||
return Err(CompactionError::Other(anyhow!(
|
||||
"estimated memory usage is too high: {}MB, giving up compaction; num_image_layers={}, num_delta_layers={}",
|
||||
estimated_memory_usage_mb,
|
||||
num_image_layers,
|
||||
num_delta_layers
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a watermark for gc-compaction, that is the lowest LSN that we can use as the `gc_horizon` for
|
||||
/// the compaction algorithm. It is min(space_cutoff, time_cutoff, latest_gc_cutoff, standby_horizon).
|
||||
/// Leases and retain_lsns are considered in the gc-compaction job itself so we don't need to account for them
|
||||
@@ -3299,17 +3264,6 @@ impl Timeline {
|
||||
self.check_compaction_space(&job_desc.selected_layers)
|
||||
.await?;
|
||||
|
||||
self.check_memory_usage(&job_desc.selected_layers).await?;
|
||||
if job_desc.selected_layers.len() > 100
|
||||
&& job_desc.rewrite_layers.len() as f64 >= job_desc.selected_layers.len() as f64 * 0.7
|
||||
{
|
||||
return Err(CompactionError::Other(anyhow!(
|
||||
"too many layers to rewrite: {} / {}, giving up compaction",
|
||||
job_desc.rewrite_layers.len(),
|
||||
job_desc.selected_layers.len()
|
||||
)));
|
||||
}
|
||||
|
||||
// Generate statistics for the compaction
|
||||
for layer in &job_desc.selected_layers {
|
||||
let desc = layer.layer_desc();
|
||||
@@ -3405,13 +3359,7 @@ impl Timeline {
|
||||
.context("failed to collect gc compaction keyspace")
|
||||
.map_err(CompactionError::Other)?;
|
||||
let mut merge_iter = FilterIterator::create(
|
||||
MergeIterator::create_with_options(
|
||||
&delta_layers,
|
||||
&image_layers,
|
||||
ctx,
|
||||
128 * 8192, /* 1MB buffer for each of the inner iterators */
|
||||
128,
|
||||
),
|
||||
MergeIterator::create(&delta_layers, &image_layers, ctx),
|
||||
dense_ks,
|
||||
sparse_ks,
|
||||
)
|
||||
|
||||
@@ -1,21 +1,20 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use pageserver_api::models::ShardImportStatus;
|
||||
use remote_storage::RemotePath;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::info;
|
||||
use tracing::{Instrument, info, info_span};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use super::Timeline;
|
||||
use crate::context::RequestContext;
|
||||
use crate::controller_upcall_client::{StorageControllerUpcallApi, StorageControllerUpcallClient};
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
|
||||
mod flow;
|
||||
mod importbucket_client;
|
||||
mod importbucket_format;
|
||||
pub(crate) mod index_part_format;
|
||||
pub(crate) mod upcall_api;
|
||||
|
||||
pub async fn doit(
|
||||
timeline: &Arc<Timeline>,
|
||||
@@ -35,6 +34,23 @@ pub async fn doit(
|
||||
|
||||
let storage = importbucket_client::new(timeline.conf, &location, cancel.clone()).await?;
|
||||
|
||||
info!("get spec early so we know we'll be able to upcall when done");
|
||||
let Some(spec) = storage.get_spec().await? else {
|
||||
bail!("spec not found")
|
||||
};
|
||||
|
||||
let upcall_client =
|
||||
upcall_api::Client::new(timeline.conf, cancel.clone()).context("create upcall client")?;
|
||||
|
||||
//
|
||||
// send an early progress update to clean up k8s job early and generate potentially useful logs
|
||||
//
|
||||
info!("send early progress update");
|
||||
upcall_client
|
||||
.send_progress_until_success(&spec)
|
||||
.instrument(info_span!("early_progress_update"))
|
||||
.await?;
|
||||
|
||||
let status_prefix = RemotePath::from_string("status").unwrap();
|
||||
|
||||
//
|
||||
@@ -160,21 +176,7 @@ pub async fn doit(
|
||||
|
||||
//
|
||||
// Communicate that shard is done.
|
||||
// Ensure at-least-once delivery of the upcall to storage controller
|
||||
// before we mark the task as done and never come here again.
|
||||
//
|
||||
let storcon_client = StorageControllerUpcallClient::new(timeline.conf, &cancel)?
|
||||
.expect("storcon configured");
|
||||
storcon_client
|
||||
.put_timeline_import_status(
|
||||
timeline.tenant_shard_id,
|
||||
timeline.timeline_id,
|
||||
// TODO(vlad): What about import errors?
|
||||
ShardImportStatus::Done,
|
||||
)
|
||||
.await
|
||||
.map_err(|_err| anyhow::anyhow!("Shut down while putting timeline import status"))?;
|
||||
|
||||
storage
|
||||
.put_json(
|
||||
&shard_status_key,
|
||||
@@ -184,6 +186,16 @@ pub async fn doit(
|
||||
.context("put shard status")?;
|
||||
}
|
||||
|
||||
//
|
||||
// Ensure at-least-once deliver of the upcall to cplane
|
||||
// before we mark the task as done and never come here again.
|
||||
//
|
||||
info!("send final progress update");
|
||||
upcall_client
|
||||
.send_progress_until_success(&spec)
|
||||
.instrument(info_span!("final_progress_update"))
|
||||
.await?;
|
||||
|
||||
//
|
||||
// Mark as done in index_part.
|
||||
// This makes subsequent timeline loads enter the normal load code path
|
||||
|
||||
@@ -13,7 +13,7 @@ use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, info, instrument};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use super::index_part_format;
|
||||
use super::{importbucket_format, index_part_format};
|
||||
use crate::assert_u64_eq_usize::U64IsUsize;
|
||||
use crate::config::PageServerConf;
|
||||
|
||||
@@ -173,6 +173,12 @@ impl RemoteStorageWrapper {
|
||||
res
|
||||
}
|
||||
|
||||
pub async fn get_spec(&self) -> Result<Option<importbucket_format::Spec>, anyhow::Error> {
|
||||
self.get_json(&RemotePath::from_string("spec.json").unwrap())
|
||||
.await
|
||||
.context("get spec")
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn get_json<T: DeserializeOwned>(
|
||||
&self,
|
||||
@@ -238,8 +244,7 @@ impl RemoteStorageWrapper {
|
||||
kind: DownloadKind::Large,
|
||||
etag: None,
|
||||
byte_start: Bound::Included(start_inclusive),
|
||||
byte_end: Bound::Excluded(end_exclusive),
|
||||
version_id: None,
|
||||
byte_end: Bound::Excluded(end_exclusive)
|
||||
},
|
||||
&self.cancel)
|
||||
.await?;
|
||||
|
||||
@@ -11,3 +11,10 @@ pub struct ShardStatus {
|
||||
pub done: bool,
|
||||
// TODO: remaining fields
|
||||
}
|
||||
|
||||
// TODO: dedupe with fast_import code
|
||||
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Spec {
|
||||
pub project_id: String,
|
||||
pub branch_id: String,
|
||||
}
|
||||
|
||||
124
pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs
Normal file
124
pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs
Normal file
@@ -0,0 +1,124 @@
|
||||
//! FIXME: most of this is copy-paste from mgmt_api.rs ; dedupe into a `reqwest_utils::Client` crate.
|
||||
use pageserver_client::mgmt_api::{Error, ResponseErrorMessageExt};
|
||||
use reqwest::{Certificate, Method};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::error;
|
||||
|
||||
use super::importbucket_format::Spec;
|
||||
use crate::config::PageServerConf;
|
||||
|
||||
pub struct Client {
|
||||
base_url: String,
|
||||
authorization_header: Option<String>,
|
||||
client: reqwest::Client,
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct ImportProgressRequest {
|
||||
// no fields yet, not sure if there every will be any
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct ImportProgressResponse {
|
||||
// we don't care
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub fn new(conf: &PageServerConf, cancel: CancellationToken) -> anyhow::Result<Self> {
|
||||
let Some(ref base_url) = conf.import_pgdata_upcall_api else {
|
||||
anyhow::bail!("import_pgdata_upcall_api is not configured")
|
||||
};
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
for cert in &conf.ssl_ca_certs {
|
||||
http_client = http_client.add_root_certificate(Certificate::from_der(cert.contents())?);
|
||||
}
|
||||
let http_client = http_client.build()?;
|
||||
|
||||
Ok(Self {
|
||||
base_url: base_url.to_string(),
|
||||
client: http_client,
|
||||
cancel,
|
||||
authorization_header: conf
|
||||
.import_pgdata_upcall_api_token
|
||||
.as_ref()
|
||||
.map(|secret_string| secret_string.get_contents())
|
||||
.map(|jwt| format!("Bearer {jwt}")),
|
||||
})
|
||||
}
|
||||
|
||||
fn start_request<U: reqwest::IntoUrl>(
|
||||
&self,
|
||||
method: Method,
|
||||
uri: U,
|
||||
) -> reqwest::RequestBuilder {
|
||||
let req = self.client.request(method, uri);
|
||||
if let Some(value) = &self.authorization_header {
|
||||
req.header(reqwest::header::AUTHORIZATION, value)
|
||||
} else {
|
||||
req
|
||||
}
|
||||
}
|
||||
|
||||
async fn request_noerror<B: serde::Serialize, U: reqwest::IntoUrl>(
|
||||
&self,
|
||||
method: Method,
|
||||
uri: U,
|
||||
body: B,
|
||||
) -> Result<reqwest::Response> {
|
||||
self.start_request(method, uri)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
async fn request<B: serde::Serialize, U: reqwest::IntoUrl>(
|
||||
&self,
|
||||
method: Method,
|
||||
uri: U,
|
||||
body: B,
|
||||
) -> Result<reqwest::Response> {
|
||||
let res = self.request_noerror(method, uri, body).await?;
|
||||
let response = res.error_from_body().await?;
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub async fn send_progress_once(&self, spec: &Spec) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/projects/{}/branches/{}/import_progress",
|
||||
self.base_url, spec.project_id, spec.branch_id
|
||||
);
|
||||
let ImportProgressResponse {} = self
|
||||
.request(Method::POST, url, &ImportProgressRequest {})
|
||||
.await?
|
||||
.json()
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn send_progress_until_success(&self, spec: &Spec) -> anyhow::Result<()> {
|
||||
loop {
|
||||
match self.send_progress_once(spec).await {
|
||||
Ok(()) => return Ok(()),
|
||||
Err(Error::Cancelled) => return Err(anyhow::anyhow!("cancelled")),
|
||||
Err(err) => {
|
||||
error!(?err, "error sending progress, retrying");
|
||||
if tokio::time::timeout(
|
||||
std::time::Duration::from_secs(10),
|
||||
self.cancel.cancelled(),
|
||||
)
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
anyhow::bail!("cancelled while sending early progress update");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -507,9 +507,7 @@ impl<'a> VectoredBlobReader<'a> {
|
||||
|
||||
for (blob_start, meta) in blobs_at.iter().copied() {
|
||||
let header_start = (blob_start - read.start) as usize;
|
||||
let header = Header::decode(&buf[header_start..]).map_err(|anyhow_err| {
|
||||
std::io::Error::new(std::io::ErrorKind::InvalidData, anyhow_err)
|
||||
})?;
|
||||
let header = Header::decode(&buf[header_start..])?;
|
||||
let data_start = header_start + header.header_len;
|
||||
let end = data_start + header.data_len;
|
||||
let compression_bits = header.compression_bits;
|
||||
@@ -664,6 +662,7 @@ impl StreamingVectoredReadPlanner {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use anyhow::Error;
|
||||
|
||||
use super::super::blob_io::tests::{random_array, write_maybe_compressed};
|
||||
use super::*;
|
||||
@@ -946,16 +945,13 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
async fn round_trip_test_compressed(
|
||||
blobs: &[Vec<u8>],
|
||||
compression: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
async fn round_trip_test_compressed(blobs: &[Vec<u8>], compression: bool) -> Result<(), Error> {
|
||||
let ctx =
|
||||
RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();
|
||||
let (_temp_dir, pathbuf, offsets) =
|
||||
write_maybe_compressed(blobs, compression, &ctx).await?;
|
||||
write_maybe_compressed::<true>(blobs, compression, &ctx).await?;
|
||||
|
||||
let file = VirtualFile::open_v2(&pathbuf, &ctx).await?;
|
||||
let file = VirtualFile::open(&pathbuf, &ctx).await?;
|
||||
let file_len = std::fs::metadata(&pathbuf)?.len();
|
||||
|
||||
// Multiply by two (compressed data might need more space), and add a few bytes for the header
|
||||
@@ -1001,7 +997,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_really_big_array() -> anyhow::Result<()> {
|
||||
async fn test_really_big_array() -> Result<(), Error> {
|
||||
let blobs = &[
|
||||
b"test".to_vec(),
|
||||
random_array(10 * PAGE_SZ),
|
||||
@@ -1016,7 +1012,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_arrays_inc() -> anyhow::Result<()> {
|
||||
async fn test_arrays_inc() -> Result<(), Error> {
|
||||
let blobs = (0..PAGE_SZ / 8)
|
||||
.map(|v| random_array(v * 16))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
@@ -12,11 +12,10 @@
|
||||
//! src/backend/storage/file/fd.c
|
||||
//!
|
||||
use std::fs::File;
|
||||
use std::io::{Error, ErrorKind};
|
||||
use std::io::{Error, ErrorKind, Seek, SeekFrom};
|
||||
use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
|
||||
#[cfg(target_os = "linux")]
|
||||
use std::os::unix::fs::OpenOptionsExt;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering};
|
||||
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
@@ -26,31 +25,29 @@ use owned_buffers_io::aligned_buffer::{AlignedBufferMut, AlignedSlice, ConstAlig
|
||||
use owned_buffers_io::io_buf_aligned::{IoBufAligned, IoBufAlignedMut};
|
||||
use owned_buffers_io::io_buf_ext::FullSlice;
|
||||
use pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT;
|
||||
pub use pageserver_api::models::virtual_file as api;
|
||||
use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
|
||||
use tokio::time::Instant;
|
||||
use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice};
|
||||
|
||||
use self::owned_buffers_io::write::OwnedAsyncWriter;
|
||||
use crate::assert_u64_eq_usize::UsizeIsU64;
|
||||
use crate::context::RequestContext;
|
||||
use crate::metrics::{STORAGE_IO_TIME_METRIC, StorageIoOperation};
|
||||
use crate::page_cache::{PAGE_SZ, PageWriteGuard};
|
||||
|
||||
pub(crate) use api::IoMode;
|
||||
pub(crate) use io_engine::IoEngineKind;
|
||||
pub(crate) mod io_engine;
|
||||
pub use io_engine::{
|
||||
FeatureTestResult as IoEngineFeatureTestResult, feature_test as io_engine_feature_test,
|
||||
io_engine_for_bench,
|
||||
};
|
||||
pub(crate) use metadata::Metadata;
|
||||
pub(crate) use open_options::*;
|
||||
pub use pageserver_api::models::virtual_file as api;
|
||||
pub use temporary::TempVirtualFile;
|
||||
|
||||
pub(crate) mod io_engine;
|
||||
mod metadata;
|
||||
mod open_options;
|
||||
mod temporary;
|
||||
pub(crate) use api::IoMode;
|
||||
pub(crate) use io_engine::IoEngineKind;
|
||||
pub(crate) use metadata::Metadata;
|
||||
pub(crate) use open_options::*;
|
||||
|
||||
use self::owned_buffers_io::write::OwnedAsyncWriter;
|
||||
|
||||
pub(crate) mod owned_buffers_io {
|
||||
//! Abstractions for IO with owned buffers.
|
||||
//!
|
||||
@@ -97,38 +94,69 @@ impl VirtualFile {
|
||||
Self::open_with_options_v2(path.as_ref(), OpenOptions::new().read(true), ctx).await
|
||||
}
|
||||
|
||||
pub async fn create<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Self, std::io::Error> {
|
||||
let inner = VirtualFileInner::create(path, ctx).await?;
|
||||
Ok(VirtualFile {
|
||||
inner,
|
||||
_mode: IoMode::Buffered,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn create_v2<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Self, std::io::Error> {
|
||||
VirtualFile::open_with_options_v2(
|
||||
path.as_ref(),
|
||||
OpenOptions::new().write(true).create(true).truncate(true),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn open_with_options<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
open_options: &OpenOptions,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Self, std::io::Error> {
|
||||
let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
|
||||
Ok(VirtualFile {
|
||||
inner,
|
||||
_mode: IoMode::Buffered,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn open_with_options_v2<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
open_options: &OpenOptions,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Self, std::io::Error> {
|
||||
let mode = get_io_mode();
|
||||
let set_o_direct = match (mode, open_options.is_write()) {
|
||||
(IoMode::Buffered, _) => false,
|
||||
#[cfg(target_os = "linux")]
|
||||
(IoMode::Direct, false) => true,
|
||||
#[cfg(target_os = "linux")]
|
||||
(IoMode::Direct, true) => false,
|
||||
#[cfg(target_os = "linux")]
|
||||
(IoMode::DirectRw, _) => true,
|
||||
};
|
||||
let open_options = open_options.clone();
|
||||
let open_options = if set_o_direct {
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
let mut open_options = open_options;
|
||||
open_options.custom_flags(nix::libc::O_DIRECT);
|
||||
open_options
|
||||
let file = match get_io_mode() {
|
||||
IoMode::Buffered => {
|
||||
let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
|
||||
VirtualFile {
|
||||
inner,
|
||||
_mode: IoMode::Buffered,
|
||||
}
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
IoMode::Direct => {
|
||||
let inner = VirtualFileInner::open_with_options(
|
||||
path,
|
||||
open_options.clone().custom_flags(nix::libc::O_DIRECT),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
VirtualFile {
|
||||
inner,
|
||||
_mode: IoMode::Direct,
|
||||
}
|
||||
}
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
unreachable!(
|
||||
"O_DIRECT is not supported on this platform, IoMode's that result in set_o_direct=true shouldn't even be defined"
|
||||
);
|
||||
} else {
|
||||
open_options
|
||||
};
|
||||
let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
|
||||
Ok(VirtualFile { inner, _mode: mode })
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
pub fn path(&self) -> &Utf8Path {
|
||||
@@ -157,14 +185,18 @@ impl VirtualFile {
|
||||
self.inner.sync_data().await
|
||||
}
|
||||
|
||||
pub async fn set_len(&self, len: u64, ctx: &RequestContext) -> Result<(), Error> {
|
||||
self.inner.set_len(len, ctx).await
|
||||
}
|
||||
|
||||
pub async fn metadata(&self) -> Result<Metadata, Error> {
|
||||
self.inner.metadata().await
|
||||
}
|
||||
|
||||
pub fn remove(self) {
|
||||
self.inner.remove();
|
||||
}
|
||||
|
||||
pub async fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
|
||||
self.inner.seek(pos).await
|
||||
}
|
||||
|
||||
pub async fn read_exact_at<Buf>(
|
||||
&self,
|
||||
slice: Slice<Buf>,
|
||||
@@ -195,31 +227,25 @@ impl VirtualFile {
|
||||
self.inner.write_all_at(buf, offset, ctx).await
|
||||
}
|
||||
|
||||
pub(crate) async fn read_to_string<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
pub async fn write_all<Buf: IoBuf + Send>(
|
||||
&mut self,
|
||||
buf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> std::io::Result<String> {
|
||||
let file = VirtualFile::open(path, ctx).await?; // TODO: open_v2
|
||||
) -> (FullSlice<Buf>, Result<usize, Error>) {
|
||||
self.inner.write_all(buf, ctx).await
|
||||
}
|
||||
|
||||
async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
|
||||
self.inner.read_to_end(buf, ctx).await
|
||||
}
|
||||
|
||||
pub(crate) async fn read_to_string(
|
||||
&mut self,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<String, anyhow::Error> {
|
||||
let mut buf = Vec::new();
|
||||
let mut tmp = vec![0; 128];
|
||||
let mut pos: u64 = 0;
|
||||
loop {
|
||||
let slice = tmp.slice(..128);
|
||||
let (slice, res) = file.inner.read_at(slice, pos, ctx).await;
|
||||
match res {
|
||||
Ok(0) => break,
|
||||
Ok(n) => {
|
||||
pos += n as u64;
|
||||
buf.extend_from_slice(&slice[..n]);
|
||||
}
|
||||
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
tmp = slice.into_inner();
|
||||
}
|
||||
String::from_utf8(buf).map_err(|_| {
|
||||
std::io::Error::new(ErrorKind::InvalidData, "file contents are not valid UTF-8")
|
||||
})
|
||||
self.read_to_end(&mut buf, ctx).await?;
|
||||
Ok(String::from_utf8(buf)?)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -266,6 +292,9 @@ pub struct VirtualFileInner {
|
||||
/// belongs to a different VirtualFile.
|
||||
handle: RwLock<SlotHandle>,
|
||||
|
||||
/// Current file position
|
||||
pos: u64,
|
||||
|
||||
/// File path and options to use to open it.
|
||||
///
|
||||
/// Note: this only contains the options needed to re-open it. For example,
|
||||
@@ -530,7 +559,21 @@ impl VirtualFileInner {
|
||||
path: P,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<VirtualFileInner, std::io::Error> {
|
||||
Self::open_with_options(path.as_ref(), OpenOptions::new().read(true).clone(), ctx).await
|
||||
Self::open_with_options(path.as_ref(), OpenOptions::new().read(true), ctx).await
|
||||
}
|
||||
|
||||
/// Create a new file for writing. If the file exists, it will be truncated.
|
||||
/// Like File::create.
|
||||
pub async fn create<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<VirtualFileInner, std::io::Error> {
|
||||
Self::open_with_options(
|
||||
path.as_ref(),
|
||||
OpenOptions::new().write(true).create(true).truncate(true),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Open a file with given options.
|
||||
@@ -540,7 +583,7 @@ impl VirtualFileInner {
|
||||
/// on the first time. Make sure that's sane!
|
||||
pub async fn open_with_options<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
open_options: OpenOptions,
|
||||
open_options: &OpenOptions,
|
||||
_ctx: &RequestContext,
|
||||
) -> Result<VirtualFileInner, std::io::Error> {
|
||||
let path = path.as_ref();
|
||||
@@ -565,6 +608,7 @@ impl VirtualFileInner {
|
||||
|
||||
let vfile = VirtualFileInner {
|
||||
handle: RwLock::new(handle),
|
||||
pos: 0,
|
||||
path: path.to_owned(),
|
||||
open_options: reopen_options,
|
||||
};
|
||||
@@ -631,13 +675,6 @@ impl VirtualFileInner {
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn set_len(&self, len: u64, _ctx: &RequestContext) -> Result<(), Error> {
|
||||
with_file!(self, StorageIoOperation::SetLen, |file_guard| {
|
||||
let (_file_guard, res) = io_engine::get().set_len(file_guard, len).await;
|
||||
res.maybe_fatal_err("set_len")
|
||||
})
|
||||
}
|
||||
|
||||
/// Helper function internal to `VirtualFile` that looks up the underlying File,
|
||||
/// opens it and evicts some other File if necessary. The passed parameter is
|
||||
/// assumed to be a function available for the physical `File`.
|
||||
@@ -705,6 +742,38 @@ impl VirtualFileInner {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn remove(self) {
|
||||
let path = self.path.clone();
|
||||
drop(self);
|
||||
std::fs::remove_file(path).expect("failed to remove the virtual file");
|
||||
}
|
||||
|
||||
pub async fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
|
||||
match pos {
|
||||
SeekFrom::Start(offset) => {
|
||||
self.pos = offset;
|
||||
}
|
||||
SeekFrom::End(offset) => {
|
||||
self.pos = with_file!(self, StorageIoOperation::Seek, |mut file_guard| file_guard
|
||||
.with_std_file_mut(|std_file| std_file.seek(SeekFrom::End(offset))))?
|
||||
}
|
||||
SeekFrom::Current(offset) => {
|
||||
let pos = self.pos as i128 + offset as i128;
|
||||
if pos < 0 {
|
||||
return Err(Error::new(
|
||||
ErrorKind::InvalidInput,
|
||||
"offset would be negative",
|
||||
));
|
||||
}
|
||||
if pos > u64::MAX as i128 {
|
||||
return Err(Error::new(ErrorKind::InvalidInput, "offset overflow"));
|
||||
}
|
||||
self.pos = pos as u64;
|
||||
}
|
||||
}
|
||||
Ok(self.pos)
|
||||
}
|
||||
|
||||
/// Read the file contents in range `offset..(offset + slice.bytes_total())` into `slice[0..slice.bytes_total()]`.
|
||||
///
|
||||
/// The returned `Slice<Buf>` is equivalent to the input `slice`, i.e., it's the same view into the same buffer.
|
||||
@@ -788,7 +857,59 @@ impl VirtualFileInner {
|
||||
(restore(buf), Ok(()))
|
||||
}
|
||||
|
||||
pub(super) async fn read_at<Buf>(
|
||||
/// Writes `buf` to the file at the current offset.
|
||||
///
|
||||
/// Panics if there is an uninitialized range in `buf`, as that is most likely a bug in the caller.
|
||||
pub async fn write_all<Buf: IoBuf + Send>(
|
||||
&mut self,
|
||||
buf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<usize, Error>) {
|
||||
let buf = buf.into_raw_slice();
|
||||
let bounds = buf.bounds();
|
||||
let restore =
|
||||
|buf: Slice<_>| FullSlice::must_new(Slice::from_buf_bounds(buf.into_inner(), bounds));
|
||||
let nbytes = buf.len();
|
||||
let mut buf = buf;
|
||||
while !buf.is_empty() {
|
||||
let (tmp, res) = self.write(FullSlice::must_new(buf), ctx).await;
|
||||
buf = tmp.into_raw_slice();
|
||||
match res {
|
||||
Ok(0) => {
|
||||
return (
|
||||
restore(buf),
|
||||
Err(Error::new(
|
||||
std::io::ErrorKind::WriteZero,
|
||||
"failed to write whole buffer",
|
||||
)),
|
||||
);
|
||||
}
|
||||
Ok(n) => {
|
||||
buf = buf.slice(n..);
|
||||
}
|
||||
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
|
||||
Err(e) => return (restore(buf), Err(e)),
|
||||
}
|
||||
}
|
||||
(restore(buf), Ok(nbytes))
|
||||
}
|
||||
|
||||
async fn write<B: IoBuf + Send>(
|
||||
&mut self,
|
||||
buf: FullSlice<B>,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<B>, Result<usize, std::io::Error>) {
|
||||
let pos = self.pos;
|
||||
let (buf, res) = self.write_at(buf, pos, ctx).await;
|
||||
let n = match res {
|
||||
Ok(n) => n,
|
||||
Err(e) => return (buf, Err(e)),
|
||||
};
|
||||
self.pos += n as u64;
|
||||
(buf, Ok(n))
|
||||
}
|
||||
|
||||
pub(crate) async fn read_at<Buf>(
|
||||
&self,
|
||||
buf: tokio_epoll_uring::Slice<Buf>,
|
||||
offset: u64,
|
||||
@@ -816,11 +937,23 @@ impl VirtualFileInner {
|
||||
})
|
||||
}
|
||||
|
||||
/// The function aborts the process if the error is fatal.
|
||||
async fn write_at<B: IoBuf + Send>(
|
||||
&self,
|
||||
buf: FullSlice<B>,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<B>, Result<usize, Error>) {
|
||||
let (slice, result) = self.write_at_inner(buf, offset, ctx).await;
|
||||
let result = result.maybe_fatal_err("write_at");
|
||||
(slice, result)
|
||||
}
|
||||
|
||||
async fn write_at_inner<B: IoBuf + Send>(
|
||||
&self,
|
||||
buf: FullSlice<B>,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<B>, Result<usize, Error>) {
|
||||
let file_guard = match self.lock_file().await {
|
||||
Ok(file_guard) => file_guard,
|
||||
@@ -829,13 +962,30 @@ impl VirtualFileInner {
|
||||
observe_duration!(StorageIoOperation::Write, {
|
||||
let ((_file_guard, buf), result) =
|
||||
io_engine::get().write_at(file_guard, offset, buf).await;
|
||||
let result = result.maybe_fatal_err("write_at");
|
||||
if let Ok(size) = result {
|
||||
ctx.io_size_metrics().write.add(size.into_u64());
|
||||
}
|
||||
(buf, result)
|
||||
})
|
||||
}
|
||||
|
||||
async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
|
||||
let mut tmp = vec![0; 128];
|
||||
loop {
|
||||
let slice = tmp.slice(..128);
|
||||
let (slice, res) = self.read_at(slice, self.pos, ctx).await;
|
||||
match res {
|
||||
Ok(0) => return Ok(()),
|
||||
Ok(n) => {
|
||||
self.pos += n as u64;
|
||||
buf.extend_from_slice(&slice[..n]);
|
||||
}
|
||||
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
tmp = slice.into_inner();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Adapted from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#117-135
|
||||
@@ -1050,6 +1200,19 @@ impl FileGuard {
|
||||
let _ = file.into_raw_fd();
|
||||
res
|
||||
}
|
||||
/// Soft deprecation: we'll move VirtualFile to async APIs and remove this function eventually.
|
||||
fn with_std_file_mut<F, R>(&mut self, with: F) -> R
|
||||
where
|
||||
F: FnOnce(&mut File) -> R,
|
||||
{
|
||||
// SAFETY:
|
||||
// - lifetime of the fd: `file` doesn't outlive the OwnedFd stored in `self`.
|
||||
// - &mut usage below: `self` is `&mut`, hence this call is the only task/thread that has control over the underlying fd
|
||||
let mut file = unsafe { File::from_raw_fd(self.as_ref().as_raw_fd()) };
|
||||
let res = with(&mut file);
|
||||
let _ = file.into_raw_fd();
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
impl tokio_epoll_uring::IoFd for FileGuard {
|
||||
@@ -1139,9 +1302,6 @@ impl OwnedAsyncWriter for VirtualFile {
|
||||
) -> (FullSlice<Buf>, std::io::Result<()>) {
|
||||
VirtualFile::write_all_at(self, buf, offset, ctx).await
|
||||
}
|
||||
async fn set_len(&self, len: u64, ctx: &RequestContext) -> std::io::Result<()> {
|
||||
VirtualFile::set_len(self, len, ctx).await
|
||||
}
|
||||
}
|
||||
|
||||
impl OpenFiles {
|
||||
@@ -1206,9 +1366,10 @@ pub(crate) type IoBuffer = AlignedBuffer<ConstAlign<{ get_io_buffer_alignment()
|
||||
pub(crate) type IoPageSlice<'a> =
|
||||
AlignedSlice<'a, PAGE_SZ, ConstAlign<{ get_io_buffer_alignment() }>>;
|
||||
|
||||
static IO_MODE: LazyLock<AtomicU8> = LazyLock::new(|| AtomicU8::new(IoMode::preferred() as u8));
|
||||
static IO_MODE: once_cell::sync::Lazy<AtomicU8> =
|
||||
once_cell::sync::Lazy::new(|| AtomicU8::new(IoMode::preferred() as u8));
|
||||
|
||||
pub fn set_io_mode(mode: IoMode) {
|
||||
pub(crate) fn set_io_mode(mode: IoMode) {
|
||||
IO_MODE.store(mode as u8, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
@@ -1220,6 +1381,7 @@ static SYNC_MODE: AtomicU8 = AtomicU8::new(SyncMode::Sync as u8);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Write;
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -1272,6 +1434,43 @@ mod tests {
|
||||
MaybeVirtualFile::File(file) => file.write_all_at(&buf[..], offset),
|
||||
}
|
||||
}
|
||||
async fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
|
||||
match self {
|
||||
MaybeVirtualFile::VirtualFile(file) => file.seek(pos).await,
|
||||
MaybeVirtualFile::File(file) => file.seek(pos),
|
||||
}
|
||||
}
|
||||
async fn write_all<Buf: IoBuf + Send>(
|
||||
&mut self,
|
||||
buf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), Error> {
|
||||
match self {
|
||||
MaybeVirtualFile::VirtualFile(file) => {
|
||||
let (_buf, res) = file.write_all(buf, ctx).await;
|
||||
res.map(|_| ())
|
||||
}
|
||||
MaybeVirtualFile::File(file) => file.write_all(&buf[..]),
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to slurp contents of a file, starting at the current position,
|
||||
// into a string
|
||||
async fn read_string(&mut self, ctx: &RequestContext) -> Result<String, Error> {
|
||||
use std::io::Read;
|
||||
let mut buf = String::new();
|
||||
match self {
|
||||
MaybeVirtualFile::VirtualFile(file) => {
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf, ctx).await?;
|
||||
return Ok(String::from_utf8(buf).unwrap());
|
||||
}
|
||||
MaybeVirtualFile::File(file) => {
|
||||
file.read_to_string(&mut buf)?;
|
||||
}
|
||||
}
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
// Helper function to slurp a portion of a file into a string
|
||||
async fn read_string_at(
|
||||
@@ -1307,7 +1506,7 @@ mod tests {
|
||||
opts: OpenOptions,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<MaybeVirtualFile, anyhow::Error> {
|
||||
let vf = VirtualFile::open_with_options_v2(&path, &opts, ctx).await?;
|
||||
let vf = VirtualFile::open_with_options(&path, &opts, ctx).await?;
|
||||
Ok(MaybeVirtualFile::VirtualFile(vf))
|
||||
}
|
||||
}
|
||||
@@ -1367,23 +1566,48 @@ mod tests {
|
||||
.await?;
|
||||
|
||||
file_a
|
||||
.write_all_at(IoBuffer::from(b"foobar").slice_len(), 0, &ctx)
|
||||
.write_all(b"foobar".to_vec().slice_len(), &ctx)
|
||||
.await?;
|
||||
|
||||
// cannot read from a file opened in write-only mode
|
||||
let _ = file_a.read_string_at(0, 1, &ctx).await.unwrap_err();
|
||||
let _ = file_a.read_string(&ctx).await.unwrap_err();
|
||||
|
||||
// Close the file and re-open for reading
|
||||
let mut file_a = A::open(path_a, OpenOptions::new().read(true).to_owned(), &ctx).await?;
|
||||
|
||||
// cannot write to a file opened in read-only mode
|
||||
let _ = file_a
|
||||
.write_all_at(IoBuffer::from(b"bar").slice_len(), 0, &ctx)
|
||||
.write_all(b"bar".to_vec().slice_len(), &ctx)
|
||||
.await
|
||||
.unwrap_err();
|
||||
|
||||
// Try simple read
|
||||
assert_eq!("foobar", file_a.read_string_at(0, 6, &ctx).await?);
|
||||
assert_eq!("foobar", file_a.read_string(&ctx).await?);
|
||||
|
||||
// It's positioned at the EOF now.
|
||||
assert_eq!("", file_a.read_string(&ctx).await?);
|
||||
|
||||
// Test seeks.
|
||||
assert_eq!(file_a.seek(SeekFrom::Start(1)).await?, 1);
|
||||
assert_eq!("oobar", file_a.read_string(&ctx).await?);
|
||||
|
||||
assert_eq!(file_a.seek(SeekFrom::End(-2)).await?, 4);
|
||||
assert_eq!("ar", file_a.read_string(&ctx).await?);
|
||||
|
||||
assert_eq!(file_a.seek(SeekFrom::Start(1)).await?, 1);
|
||||
assert_eq!(file_a.seek(SeekFrom::Current(2)).await?, 3);
|
||||
assert_eq!("bar", file_a.read_string(&ctx).await?);
|
||||
|
||||
assert_eq!(file_a.seek(SeekFrom::Current(-5)).await?, 1);
|
||||
assert_eq!("oobar", file_a.read_string(&ctx).await?);
|
||||
|
||||
// Test erroneous seeks to before byte 0
|
||||
file_a.seek(SeekFrom::End(-7)).await.unwrap_err();
|
||||
assert_eq!(file_a.seek(SeekFrom::Start(1)).await?, 1);
|
||||
file_a.seek(SeekFrom::Current(-2)).await.unwrap_err();
|
||||
|
||||
// the erroneous seek should have left the position unchanged
|
||||
assert_eq!("oobar", file_a.read_string(&ctx).await?);
|
||||
|
||||
// Create another test file, and try FileExt functions on it.
|
||||
let path_b = testdir.join("file_b");
|
||||
@@ -1409,6 +1633,9 @@ mod tests {
|
||||
|
||||
// Open a lot of files, enough to cause some evictions. (Or to be precise,
|
||||
// open the same file many times. The effect is the same.)
|
||||
//
|
||||
// leave file_a positioned at offset 1 before we start
|
||||
assert_eq!(file_a.seek(SeekFrom::Start(1)).await?, 1);
|
||||
|
||||
let mut vfiles = Vec::new();
|
||||
for _ in 0..100 {
|
||||
@@ -1418,7 +1645,7 @@ mod tests {
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
assert_eq!("FOOBAR", vfile.read_string_at(0, 6, &ctx).await?);
|
||||
assert_eq!("FOOBAR", vfile.read_string(&ctx).await?);
|
||||
vfiles.push(vfile);
|
||||
}
|
||||
|
||||
@@ -1426,8 +1653,8 @@ mod tests {
|
||||
assert!(vfiles.len() > TEST_MAX_FILE_DESCRIPTORS * 2);
|
||||
|
||||
// The underlying file descriptor for 'file_a' should be closed now. Try to read
|
||||
// from it again.
|
||||
assert_eq!("foobar", file_a.read_string_at(0, 6, &ctx).await?);
|
||||
// from it again. We left the file positioned at offset 1 above.
|
||||
assert_eq!("oobar", file_a.read_string(&ctx).await?);
|
||||
|
||||
// Check that all the other FDs still work too. Use them in random order for
|
||||
// good measure.
|
||||
@@ -1466,7 +1693,7 @@ mod tests {
|
||||
for _ in 0..VIRTUAL_FILES {
|
||||
let f = VirtualFileInner::open_with_options(
|
||||
&test_file_path,
|
||||
OpenOptions::new().read(true).clone(),
|
||||
OpenOptions::new().read(true),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
@@ -1521,7 +1748,7 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
|
||||
let post = file.read_string_at(0, 3, &ctx).await.unwrap();
|
||||
let post = file.read_string(&ctx).await.unwrap();
|
||||
assert_eq!(post, "foo");
|
||||
assert!(!tmp_path.exists());
|
||||
drop(file);
|
||||
@@ -1530,7 +1757,7 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
|
||||
let post = file.read_string_at(0, 3, &ctx).await.unwrap();
|
||||
let post = file.read_string(&ctx).await.unwrap();
|
||||
assert_eq!(post, "bar");
|
||||
assert!(!tmp_path.exists());
|
||||
drop(file);
|
||||
@@ -1555,7 +1782,7 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
|
||||
let post = file.read_string_at(0, 3, &ctx).await.unwrap();
|
||||
let post = file.read_string(&ctx).await.unwrap();
|
||||
assert_eq!(post, "foo");
|
||||
assert!(!tmp_path.exists());
|
||||
drop(file);
|
||||
|
||||
@@ -209,27 +209,6 @@ impl IoEngine {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn set_len(
|
||||
&self,
|
||||
file_guard: FileGuard,
|
||||
len: u64,
|
||||
) -> (FileGuard, std::io::Result<()>) {
|
||||
match self {
|
||||
IoEngine::NotSet => panic!("not initialized"),
|
||||
IoEngine::StdFs => {
|
||||
let res = file_guard.with_std_file(|std_file| std_file.set_len(len));
|
||||
(file_guard, res)
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
IoEngine::TokioEpollUring => {
|
||||
// TODO: ftruncate op for tokio-epoll-uring
|
||||
let res = file_guard.with_std_file(|std_file| std_file.set_len(len));
|
||||
(file_guard, res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn write_at<B: IoBuf + Send>(
|
||||
&self,
|
||||
file_guard: FileGuard,
|
||||
|
||||
@@ -6,12 +6,7 @@ use std::path::Path;
|
||||
use super::io_engine::IoEngine;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OpenOptions {
|
||||
write: bool,
|
||||
inner: Inner,
|
||||
}
|
||||
#[derive(Debug, Clone)]
|
||||
enum Inner {
|
||||
pub enum OpenOptions {
|
||||
StdFs(std::fs::OpenOptions),
|
||||
#[cfg(target_os = "linux")]
|
||||
TokioEpollUring(tokio_epoll_uring::ops::open_at::OpenOptions),
|
||||
@@ -19,17 +14,13 @@ enum Inner {
|
||||
|
||||
impl Default for OpenOptions {
|
||||
fn default() -> Self {
|
||||
let inner = match super::io_engine::get() {
|
||||
match super::io_engine::get() {
|
||||
IoEngine::NotSet => panic!("io engine not set"),
|
||||
IoEngine::StdFs => Inner::StdFs(std::fs::OpenOptions::new()),
|
||||
IoEngine::StdFs => Self::StdFs(std::fs::OpenOptions::new()),
|
||||
#[cfg(target_os = "linux")]
|
||||
IoEngine::TokioEpollUring => {
|
||||
Inner::TokioEpollUring(tokio_epoll_uring::ops::open_at::OpenOptions::new())
|
||||
Self::TokioEpollUring(tokio_epoll_uring::ops::open_at::OpenOptions::new())
|
||||
}
|
||||
};
|
||||
Self {
|
||||
write: false,
|
||||
inner,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -39,17 +30,13 @@ impl OpenOptions {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub(super) fn is_write(&self) -> bool {
|
||||
self.write
|
||||
}
|
||||
|
||||
pub fn read(&mut self, read: bool) -> &mut OpenOptions {
|
||||
match &mut self.inner {
|
||||
Inner::StdFs(x) => {
|
||||
match self {
|
||||
OpenOptions::StdFs(x) => {
|
||||
let _ = x.read(read);
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
Inner::TokioEpollUring(x) => {
|
||||
OpenOptions::TokioEpollUring(x) => {
|
||||
let _ = x.read(read);
|
||||
}
|
||||
}
|
||||
@@ -57,13 +44,12 @@ impl OpenOptions {
|
||||
}
|
||||
|
||||
pub fn write(&mut self, write: bool) -> &mut OpenOptions {
|
||||
self.write = write;
|
||||
match &mut self.inner {
|
||||
Inner::StdFs(x) => {
|
||||
match self {
|
||||
OpenOptions::StdFs(x) => {
|
||||
let _ = x.write(write);
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
Inner::TokioEpollUring(x) => {
|
||||
OpenOptions::TokioEpollUring(x) => {
|
||||
let _ = x.write(write);
|
||||
}
|
||||
}
|
||||
@@ -71,12 +57,12 @@ impl OpenOptions {
|
||||
}
|
||||
|
||||
pub fn create(&mut self, create: bool) -> &mut OpenOptions {
|
||||
match &mut self.inner {
|
||||
Inner::StdFs(x) => {
|
||||
match self {
|
||||
OpenOptions::StdFs(x) => {
|
||||
let _ = x.create(create);
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
Inner::TokioEpollUring(x) => {
|
||||
OpenOptions::TokioEpollUring(x) => {
|
||||
let _ = x.create(create);
|
||||
}
|
||||
}
|
||||
@@ -84,12 +70,12 @@ impl OpenOptions {
|
||||
}
|
||||
|
||||
pub fn create_new(&mut self, create_new: bool) -> &mut OpenOptions {
|
||||
match &mut self.inner {
|
||||
Inner::StdFs(x) => {
|
||||
match self {
|
||||
OpenOptions::StdFs(x) => {
|
||||
let _ = x.create_new(create_new);
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
Inner::TokioEpollUring(x) => {
|
||||
OpenOptions::TokioEpollUring(x) => {
|
||||
let _ = x.create_new(create_new);
|
||||
}
|
||||
}
|
||||
@@ -97,12 +83,12 @@ impl OpenOptions {
|
||||
}
|
||||
|
||||
pub fn truncate(&mut self, truncate: bool) -> &mut OpenOptions {
|
||||
match &mut self.inner {
|
||||
Inner::StdFs(x) => {
|
||||
match self {
|
||||
OpenOptions::StdFs(x) => {
|
||||
let _ = x.truncate(truncate);
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
Inner::TokioEpollUring(x) => {
|
||||
OpenOptions::TokioEpollUring(x) => {
|
||||
let _ = x.truncate(truncate);
|
||||
}
|
||||
}
|
||||
@@ -110,10 +96,10 @@ impl OpenOptions {
|
||||
}
|
||||
|
||||
pub(in crate::virtual_file) async fn open(&self, path: &Path) -> std::io::Result<OwnedFd> {
|
||||
match &self.inner {
|
||||
Inner::StdFs(x) => x.open(path).map(|file| file.into()),
|
||||
match self {
|
||||
OpenOptions::StdFs(x) => x.open(path).map(|file| file.into()),
|
||||
#[cfg(target_os = "linux")]
|
||||
Inner::TokioEpollUring(x) => {
|
||||
OpenOptions::TokioEpollUring(x) => {
|
||||
let system = super::io_engine::tokio_epoll_uring_ext::thread_local_system().await;
|
||||
system.open(path, x).await.map_err(|e| match e {
|
||||
tokio_epoll_uring::Error::Op(e) => e,
|
||||
@@ -128,12 +114,12 @@ impl OpenOptions {
|
||||
|
||||
impl std::os::unix::prelude::OpenOptionsExt for OpenOptions {
|
||||
fn mode(&mut self, mode: u32) -> &mut OpenOptions {
|
||||
match &mut self.inner {
|
||||
Inner::StdFs(x) => {
|
||||
match self {
|
||||
OpenOptions::StdFs(x) => {
|
||||
let _ = x.mode(mode);
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
Inner::TokioEpollUring(x) => {
|
||||
OpenOptions::TokioEpollUring(x) => {
|
||||
let _ = x.mode(mode);
|
||||
}
|
||||
}
|
||||
@@ -141,12 +127,12 @@ impl std::os::unix::prelude::OpenOptionsExt for OpenOptions {
|
||||
}
|
||||
|
||||
fn custom_flags(&mut self, flags: i32) -> &mut OpenOptions {
|
||||
match &mut self.inner {
|
||||
Inner::StdFs(x) => {
|
||||
match self {
|
||||
OpenOptions::StdFs(x) => {
|
||||
let _ = x.custom_flags(flags);
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
Inner::TokioEpollUring(x) => {
|
||||
OpenOptions::TokioEpollUring(x) => {
|
||||
let _ = x.custom_flags(flags);
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user