Compare commits

..

4 Commits

Author SHA1 Message Date
Konstantin Knizhnik
d9788d7cbd Check relation size for gertpage only in case of error 2025-05-10 09:06:37 +03:00
Konstantin Knizhnik
374495c041 Check if page is not out of relation size in getpage for VM/FSM forks 2025-05-09 18:15:18 +03:00
Konstantin Knizhnik
ca4d758504 Make clippy happy 2025-05-09 17:17:43 +03:00
Konstantin Knizhnik
d5a7ecade5 Do not check relsize cache in getpage and do not update it in get_rel_size 2025-05-09 16:53:44 +03:00
254 changed files with 4108 additions and 14664 deletions

View File

@@ -49,6 +49,10 @@ inputs:
description: 'A JSON object with project settings'
required: false
default: '{}'
default_endpoint_settings:
description: 'A JSON object with the default endpoint settings'
required: false
default: '{}'
outputs:
dsn:
@@ -135,6 +139,21 @@ runs:
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
-d "{\"scheduling\": \"Essential\"}"
fi
# XXX
# This is a workaround for the default endpoint settings, which currently do not allow some settings in the public API.
# https://github.com/neondatabase/cloud/issues/27108
if [[ -n ${DEFAULT_ENDPOINT_SETTINGS} && ${DEFAULT_ENDPOINT_SETTINGS} != "{}" ]] ; then
PROJECT_DATA=$(curl -X GET \
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/projects/${project_id}" \
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
-d "{\"scheduling\": \"Essential\"}"
)
NEW_DEFAULT_ENDPOINT_SETTINGS=$(echo ${PROJECT_DATA} | jq -rc ".project.default_endpoint_settings + ${DEFAULT_ENDPOINT_SETTINGS}")
curl -X POST --fail \
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/projects/${project_id}/default_endpoint_settings" \
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
--data "${NEW_DEFAULT_ENDPOINT_SETTINGS}"
fi
env:
@@ -152,3 +171,4 @@ runs:
PSQL: ${{ inputs.psql_path }}
LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}
PROJECT_SETTINGS: ${{ inputs.project_settings }}
DEFAULT_ENDPOINT_SETTINGS: ${{ inputs.default_endpoint_settings }}

View File

@@ -279,14 +279,18 @@ jobs:
# run all non-pageserver tests
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
# run pageserver tests
# (When developing new pageserver features gated by config fields, we commonly make the rust
# unit tests sensitive to an environment variable NEON_PAGESERVER_UNIT_TEST_FEATURENAME.
# Then run the nextest invocation below for all relevant combinations. Singling out the
# pageserver tests from non-pageserver tests cuts down the time it takes for this CI step.)
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=tokio-epoll-uring \
${cov_prefix} \
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
# run pageserver tests with different settings
for get_vectored_concurrent_io in sequential sidecar-task; do
for io_engine in std-fs tokio-epoll-uring ; do
for io_mode in buffered direct direct-rw ; do
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE=$io_mode \
${cov_prefix} \
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
done
done
done
# Run separate tests for real S3
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
@@ -401,6 +405,8 @@ jobs:
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
BUILD_TAG: ${{ inputs.build-tag }}
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
# Temporary disable this step until we figure out why it's so flaky

View File

@@ -314,8 +314,7 @@ jobs:
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ github.ref_name == 'main' }}
# test_pageserver_max_throughput_getpage_at_latest_lsn is run in separate workflow periodic_pagebench.yml because it needs snapshots
extra_params: --splits 5 --group ${{ matrix.pytest_split_group }} --ignore=test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
extra_params: --splits 5 --group ${{ matrix.pytest_split_group }}
benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }}
pg_version: v16
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
@@ -324,6 +323,8 @@ jobs:
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
SYNC_BETWEEN_TESTS: true
# XXX: no coverage data handling here, since benchmarks are run on release builds,
# while coverage is currently collected for the debug ones
@@ -964,7 +965,7 @@ jobs:
fi
- name: Verify docker-compose example and test extensions
timeout-minutes: 60
timeout-minutes: 20
env:
TAG: >-
${{

View File

@@ -35,7 +35,7 @@ jobs:
matrix:
pg-version: [16, 17]
runs-on: us-east-2
runs-on: [ self-hosted, small ]
container:
# We use the neon-test-extensions image here as it contains the source code for the extensions.
image: ghcr.io/neondatabase/neon-test-extensions-v${{ matrix.pg-version }}:latest
@@ -71,7 +71,20 @@ jobs:
region_id: ${{ inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ matrix.pg-version }}
project_settings: ${{ steps.project-settings.outputs.settings }}
# We need these settings to get the expected output results.
# We cannot use the environment variables e.g. PGTZ due to
# https://github.com/neondatabase/neon/issues/1287
default_endpoint_settings: >
{
"pg_settings": {
"DateStyle": "Postgres,MDY",
"TimeZone": "America/Los_Angeles",
"compute_query_id": "off",
"neon.allow_unstable_extensions": "on"
}
}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
- name: Run the regression tests
run: /run-tests.sh -r /ext-src

View File

@@ -63,10 +63,8 @@ jobs:
- name: Filter out only v-string for build matrix
id: postgres_changes
env:
CHANGES: ${{ steps.files_changed.outputs.changes }}
run: |
v_strings_only_as_json_array=$(echo ${CHANGES} | jq '.[]|select(test("v\\d+"))' | jq --slurp -c)
v_strings_only_as_json_array=$(echo ${{ steps.files_changed.outputs.chnages }} | jq '.[]|select(test("v\\d+"))' | jq --slurp -c)
echo "changes=${v_strings_only_as_json_array}" | tee -a "${GITHUB_OUTPUT}"
check-macos-build:

View File

@@ -1,4 +1,4 @@
name: Periodic pagebench performance test on unit-perf hetzner runner
name: Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
on:
schedule:
@@ -8,7 +8,7 @@ on:
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 */4 * * *' # Runs every 4 hours
- cron: '0 */3 * * *' # Runs every 3 hours
workflow_dispatch: # Allows manual triggering of the workflow
inputs:
commit_hash:
@@ -16,11 +16,6 @@ on:
description: 'The long neon repo commit hash for the system under test (pageserver) to be tested.'
required: false
default: ''
recreate_snapshots:
type: boolean
description: 'Recreate snapshots - !!!WARNING!!! We should only recreate snapshots if the previous ones are no longer compatible. Otherwise benchmarking results are not comparable across runs.'
required: false
default: false
defaults:
run:
@@ -34,13 +29,13 @@ permissions:
contents: read
jobs:
run_periodic_pagebench_test:
trigger_bench_on_ec2_machine_in_eu_central_1:
permissions:
id-token: write # aws-actions/configure-aws-credentials
statuses: write
contents: write
pull-requests: write
runs-on: [ self-hosted, unit-perf ]
runs-on: [ self-hosted, small ]
container:
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
credentials:
@@ -49,13 +44,10 @@ jobs:
options: --init
timeout-minutes: 360 # Set the timeout to 6 hours
env:
API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }}
RUN_ID: ${{ github.run_id }}
DEFAULT_PG_VERSION: 16
BUILD_TYPE: release
RUST_BACKTRACE: 1
# NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS: 1 - doesn't work without root in container
S3_BUCKET: neon-github-public-dev
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
AWS_DEFAULT_REGION : "eu-central-1"
AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74"
steps:
# we don't need the neon source code because we run everything remotely
# however we still need the local github actions to run the allure step below
@@ -64,194 +56,99 @@ jobs:
with:
egress-policy: audit
- name: Set up the environment which depends on $RUNNER_TEMP on nvme drive
id: set-env
shell: bash -euxo pipefail {0}
run: |
{
echo "NEON_DIR=${RUNNER_TEMP}/neon"
echo "NEON_BIN=${RUNNER_TEMP}/neon/bin"
echo "POSTGRES_DISTRIB_DIR=${RUNNER_TEMP}/neon/pg_install"
echo "LD_LIBRARY_PATH=${RUNNER_TEMP}/neon/pg_install/v${DEFAULT_PG_VERSION}/lib"
echo "BACKUP_DIR=${RUNNER_TEMP}/instance_store/saved_snapshots"
echo "TEST_OUTPUT=${RUNNER_TEMP}/neon/test_output"
echo "PERF_REPORT_DIR=${RUNNER_TEMP}/neon/test_output/perf-report-local"
echo "ALLURE_DIR=${RUNNER_TEMP}/neon/test_output/allure-results"
echo "ALLURE_RESULTS_DIR=${RUNNER_TEMP}/neon/test_output/allure-results/results"
} >> "$GITHUB_ENV"
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
echo "allure_results_dir=${RUNNER_TEMP}/neon/test_output/allure-results/results" >> "$GITHUB_OUTPUT"
- name: Show my own (github runner) external IP address - usefull for IP allowlisting
run: curl https://ifconfig.me
- uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
- name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine)
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # max 5 hours (needed in case commit hash is still being built)
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }}
role-duration-seconds: 3600
- name: Start EC2 instance and wait for the instance to boot up
run: |
aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID
aws ec2 wait instance-running --instance-ids $AWS_INSTANCE_ID
sleep 60 # sleep some time to allow cloudinit and our API server to start up
- name: Determine public IP of the EC2 instance and set env variable EC2_MACHINE_URL_US
run: |
public_ip=$(aws ec2 describe-instances --instance-ids $AWS_INSTANCE_ID --query 'Reservations[*].Instances[*].PublicIpAddress' --output text)
echo "Public IP of the EC2 instance: $public_ip"
echo "EC2_MACHINE_URL_US=https://${public_ip}:8443" >> $GITHUB_ENV
- name: Determine commit hash
id: commit_hash
shell: bash -euxo pipefail {0}
env:
INPUT_COMMIT_HASH: ${{ github.event.inputs.commit_hash }}
run: |
if [[ -z "${INPUT_COMMIT_HASH}" ]]; then
COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_ENV
echo "commit_hash=$COMMIT_HASH" >> "$GITHUB_OUTPUT"
if [ -z "$INPUT_COMMIT_HASH" ]; then
echo "COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')" >> $GITHUB_ENV
echo "COMMIT_HASH_TYPE=latest" >> $GITHUB_ENV
else
COMMIT_HASH="${INPUT_COMMIT_HASH}"
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_ENV
echo "commit_hash=$COMMIT_HASH" >> "$GITHUB_OUTPUT"
echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
echo "COMMIT_HASH_TYPE=manual" >> $GITHUB_ENV
fi
- name: Checkout the neon repository at given commit hash
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ steps.commit_hash.outputs.commit_hash }}
# does not reuse ./.github/actions/download because we need to download the artifact for the given commit hash
# example artifact
# s3://neon-github-public-dev/artifacts/48b870bc078bd2c450eb7b468e743b9c118549bf/15036827400/1/neon-Linux-X64-release-artifact.tar.zst /instance_store/artifacts/neon-Linux-release-artifact.tar.zst
- name: Determine artifact S3_KEY for given commit hash and download and extract artifact
id: artifact_prefix
shell: bash -euxo pipefail {0}
env:
ARCHIVE: ${{ runner.temp }}/downloads/neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst
COMMIT_HASH: ${{ env.COMMIT_HASH }}
COMMIT_HASH_TYPE: ${{ env.COMMIT_HASH_TYPE }}
- name: Start Bench with run_id
run: |
attempt=0
max_attempts=24 # 5 minutes * 24 = 2 hours
curl -k -X 'POST' \
"${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer $API_KEY" \
-d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\", \"neonRepoCommitHashType\": \"${COMMIT_HASH_TYPE}\"}"
while [[ $attempt -lt $max_attempts ]]; do
# the following command will fail until the artifacts are available ...
S3_KEY=$(aws s3api list-objects-v2 --bucket "$S3_BUCKET" --prefix "artifacts/$COMMIT_HASH/" \
| jq -r '.Contents[]?.Key' \
| grep "neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst" \
| sort --version-sort \
| tail -1) || true # ... thus ignore errors from the command
if [[ -n "${S3_KEY}" ]]; then
echo "Artifact found: $S3_KEY"
echo "S3_KEY=$S3_KEY" >> $GITHUB_ENV
- name: Poll Test Status
id: poll_step
run: |
status=""
while [[ "$status" != "failure" && "$status" != "success" ]]; do
response=$(curl -k -X 'GET' \
"${EC2_MACHINE_URL_US}/test_status/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H "Authorization: Bearer $API_KEY")
echo "Response: $response"
set +x
status=$(echo $response | jq -r '.status')
echo "Test status: $status"
if [[ "$status" == "failure" ]]; then
echo "Test failed"
exit 1 # Fail the job step if status is failure
elif [[ "$status" == "success" || "$status" == "null" ]]; then
break
elif [[ "$status" == "too_many_runs" ]]; then
echo "Too many runs already running"
echo "too_many_runs=true" >> "$GITHUB_OUTPUT"
exit 1
fi
# Increment attempt counter and sleep for 5 minutes
attempt=$((attempt + 1))
echo "Attempt $attempt of $max_attempts to find artifacts in S3 bucket s3://$S3_BUCKET/artifacts/$COMMIT_HASH failed. Retrying in 5 minutes..."
sleep 300 # Sleep for 5 minutes
sleep 60 # Poll every 60 seconds
done
if [[ -z "${S3_KEY}" ]]; then
echo "Error: artifact not found in S3 bucket s3://$S3_BUCKET/artifacts/$COMMIT_HASH" after 2 hours
else
mkdir -p $(dirname $ARCHIVE)
time aws s3 cp --only-show-errors s3://$S3_BUCKET/${S3_KEY} ${ARCHIVE}
mkdir -p ${NEON_DIR}
time tar -xf ${ARCHIVE} -C ${NEON_DIR}
rm -f ${ARCHIVE}
fi
- name: Download snapshots from S3
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.recreate_snapshots == 'false' || github.event.inputs.recreate_snapshots == '' }}
id: download_snapshots
shell: bash -euxo pipefail {0}
- name: Retrieve Test Logs
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
# Download the snapshots from S3
mkdir -p ${TEST_OUTPUT}
mkdir -p $BACKUP_DIR
cd $BACKUP_DIR
mkdir parts
cd parts
PART=$(aws s3api list-objects-v2 --bucket $S3_BUCKET --prefix performance/pagebench/ \
| jq -r '.Contents[]?.Key' \
| grep -E 'shared-snapshots-[0-9]{4}-[0-9]{2}-[0-9]{2}' \
| sort \
| tail -1)
echo "Latest PART: $PART"
if [[ -z "$PART" ]]; then
echo "ERROR: No matching S3 key found" >&2
exit 1
fi
S3_KEY=$(dirname $PART)
time aws s3 cp --only-show-errors --recursive s3://${S3_BUCKET}/$S3_KEY/ .
cd $TEST_OUTPUT
time cat $BACKUP_DIR/parts/* | zstdcat | tar --extract --preserve-permissions
rm -rf ${BACKUP_DIR}
curl -k -X 'GET' \
"${EC2_MACHINE_URL_US}/test_log/${GITHUB_RUN_ID}" \
-H 'accept: application/gzip' \
-H "Authorization: Bearer $API_KEY" \
--output "test_log_${GITHUB_RUN_ID}.gz"
- name: Cache poetry deps
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -euxo pipefail {0}
run: ./scripts/pysync
# we need high number of open files for pagebench
- name: show ulimits
shell: bash -euxo pipefail {0}
- name: Unzip Test Log and Print it into this job's log
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
ulimit -a
- name: Run pagebench testcase
shell: bash -euxo pipefail {0}
env:
CI: false # need to override this env variable set by github to enforce using snapshots
run: |
export PLATFORM=hetzner-unit-perf-${COMMIT_HASH_TYPE}
# report the commit hash of the neon repository in the revision of the test results
export GITHUB_SHA=${COMMIT_HASH}
rm -rf ${PERF_REPORT_DIR}
rm -rf ${ALLURE_RESULTS_DIR}
mkdir -p ${PERF_REPORT_DIR}
mkdir -p ${ALLURE_RESULTS_DIR}
PARAMS="--alluredir=${ALLURE_RESULTS_DIR} --tb=short --verbose -rA"
EXTRA_PARAMS="--out-dir ${PERF_REPORT_DIR} --durations-path $TEST_OUTPUT/benchmark_durations.json"
# run only two selected tests
# environment set by parent:
# RUST_BACKTRACE=1 DEFAULT_PG_VERSION=16 BUILD_TYPE=release
./scripts/pytest ${PARAMS} test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_characterize_throughput_with_n_tenants ${EXTRA_PARAMS}
./scripts/pytest ${PARAMS} test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant ${EXTRA_PARAMS}
- name: upload the performance metrics to the Neon performance database which is used by grafana dashboards to display the results
shell: bash -euxo pipefail {0}
run: |
export REPORT_FROM="$PERF_REPORT_DIR"
export GITHUB_SHA=${COMMIT_HASH}
time ./scripts/generate_and_push_perf_report.sh
- name: Upload test results
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-store
with:
report-dir: ${{ steps.set-env.outputs.allure_results_dir }}
unique-key: ${{ env.BUILD_TYPE }}-${{ env.DEFAULT_PG_VERSION }}-${{ runner.arch }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
gzip -d "test_log_${GITHUB_RUN_ID}.gz"
cat "test_log_${GITHUB_RUN_ID}"
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Upload snapshots
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.recreate_snapshots != 'false' && github.event.inputs.recreate_snapshots != '' }}
id: upload_snapshots
shell: bash -euxo pipefail {0}
run: |
mkdir -p $BACKUP_DIR
cd $TEST_OUTPUT
tar --create --preserve-permissions --file - shared-snapshots | zstd -o $BACKUP_DIR/shared_snapshots.tar.zst
cd $BACKUP_DIR
mkdir parts
split -b 1G shared_snapshots.tar.zst ./parts/shared_snapshots.tar.zst.part.
SNAPSHOT_DATE=$(date +%F) # YYYY-MM-DD
cd parts
time aws s3 cp --recursive . s3://${S3_BUCKET}/performance/pagebench/shared-snapshots-${SNAPSHOT_DATE}/
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
@@ -260,22 +157,26 @@ jobs:
slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
- name: Cleanup Test Resources
if: always()
shell: bash -euxo pipefail {0}
env:
ARCHIVE: ${{ runner.temp }}/downloads/neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst
run: |
# Cleanup the test resources
if [[ -d "${BACKUP_DIR}" ]]; then
rm -rf ${BACKUP_DIR}
fi
if [[ -d "${TEST_OUTPUT}" ]]; then
rm -rf ${TEST_OUTPUT}
fi
if [[ -d "${NEON_DIR}" ]]; then
rm -rf ${NEON_DIR}
fi
rm -rf $(dirname $ARCHIVE)
curl -k -X 'POST' \
"${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H "Authorization: Bearer $API_KEY" \
-d ''
- name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine)
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }}
role-duration-seconds: 3600
- name: Stop EC2 instance and wait for the instance to be stopped
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
aws ec2 stop-instances --instance-ids $AWS_INSTANCE_ID
aws ec2 wait instance-stopped --instance-ids $AWS_INSTANCE_ID

304
Cargo.lock generated
View File

@@ -1112,12 +1112,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cfg_aliases"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]]
name = "cgroups-rs"
version = "0.3.3"
@@ -1276,7 +1270,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"indexmap 2.9.0",
"indexmap 2.0.1",
"jsonwebtoken",
"regex",
"remote_storage",
@@ -1308,11 +1302,11 @@ dependencies = [
"flate2",
"futures",
"http 1.1.0",
"indexmap 2.9.0",
"indexmap 2.0.1",
"itertools 0.10.5",
"jsonwebtoken",
"metrics",
"nix 0.30.1",
"nix 0.27.1",
"notify",
"num_cpus",
"once_cell",
@@ -1435,7 +1429,7 @@ dependencies = [
"humantime-serde",
"hyper 0.14.30",
"jsonwebtoken",
"nix 0.30.1",
"nix 0.27.1",
"once_cell",
"pageserver_api",
"pageserver_client",
@@ -2597,7 +2591,7 @@ dependencies = [
"futures-sink",
"futures-util",
"http 0.2.9",
"indexmap 2.9.0",
"indexmap 2.0.1",
"slab",
"tokio",
"tokio-util",
@@ -2616,7 +2610,7 @@ dependencies = [
"futures-sink",
"futures-util",
"http 1.1.0",
"indexmap 2.9.0",
"indexmap 2.0.1",
"slab",
"tokio",
"tokio-util",
@@ -2863,14 +2857,14 @@ dependencies = [
"pprof",
"regex",
"routerify",
"rustls 0.23.27",
"rustls 0.23.18",
"rustls-pemfile 2.1.1",
"serde",
"serde_json",
"serde_path_to_error",
"thiserror 1.0.69",
"tokio",
"tokio-rustls 0.26.2",
"tokio-rustls 0.26.0",
"tokio-stream",
"tokio-util",
"tracing",
@@ -3200,12 +3194,12 @@ dependencies = [
[[package]]
name = "indexmap"
version = "2.9.0"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
checksum = "ad227c3af19d4914570ad36d30409928b75967c298feb9ea1969db3a610bb14e"
dependencies = [
"equivalent",
"hashbrown 0.15.2",
"hashbrown 0.14.5",
"serde",
]
@@ -3228,7 +3222,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
dependencies = [
"ahash",
"indexmap 2.9.0",
"indexmap 2.0.1",
"is-terminal",
"itoa",
"log",
@@ -3251,7 +3245,7 @@ dependencies = [
"crossbeam-utils",
"dashmap 6.1.0",
"env_logger",
"indexmap 2.9.0",
"indexmap 2.0.1",
"itoa",
"log",
"num-format",
@@ -3518,9 +3512,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.172"
version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "libloading"
@@ -3794,16 +3788,6 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
[[package]]
name = "neon-shmem"
version = "0.1.0"
dependencies = [
"nix 0.30.1",
"tempfile",
"thiserror 1.0.69",
"workspace_hack",
]
[[package]]
name = "never-say-never"
version = "6.6.666"
@@ -3837,13 +3821,12 @@ dependencies = [
[[package]]
name = "nix"
version = "0.30.1"
version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053"
dependencies = [
"bitflags 2.8.0",
"cfg-if",
"cfg_aliases",
"libc",
"memoffset 0.9.0",
]
@@ -3898,16 +3881,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]]
name = "num"
version = "0.4.1"
@@ -4112,7 +4085,7 @@ dependencies = [
"opentelemetry-http",
"opentelemetry-proto",
"opentelemetry_sdk",
"prost 0.13.5",
"prost 0.13.3",
"reqwest",
"thiserror 1.0.69",
]
@@ -4125,8 +4098,8 @@ checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6"
dependencies = [
"opentelemetry",
"opentelemetry_sdk",
"prost 0.13.5",
"tonic 0.12.3",
"prost 0.13.3",
"tonic",
]
[[package]]
@@ -4192,12 +4165,6 @@ version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "p256"
version = "0.11.1"
@@ -4302,7 +4269,6 @@ dependencies = [
"enumset",
"fail",
"futures",
"hashlink",
"hex",
"hex-literal",
"http-utils",
@@ -4314,14 +4280,13 @@ dependencies = [
"jsonwebtoken",
"md5",
"metrics",
"nix 0.30.1",
"nix 0.27.1",
"num-traits",
"num_cpus",
"once_cell",
"pageserver_api",
"pageserver_client",
"pageserver_compaction",
"pageserver_page_api",
"pem",
"pin-project-lite",
"postgres-protocol",
@@ -4330,7 +4295,6 @@ dependencies = [
"postgres_connection",
"postgres_ffi",
"postgres_initdb",
"posthog_client_lite",
"pprof",
"pq_proto",
"procfs",
@@ -4341,7 +4305,7 @@ dependencies = [
"reqwest",
"rpds",
"rstest",
"rustls 0.23.27",
"rustls 0.23.18",
"scopeguard",
"send-future",
"serde",
@@ -4360,16 +4324,13 @@ dependencies = [
"tokio-epoll-uring",
"tokio-io-timeout",
"tokio-postgres",
"tokio-rustls 0.26.2",
"tokio-rustls 0.26.0",
"tokio-stream",
"tokio-tar",
"tokio-util",
"toml_edit",
"tonic 0.13.1",
"tonic-reflection",
"tracing",
"tracing-utils",
"twox-hash",
"url",
"utils",
"uuid",
@@ -4394,7 +4355,7 @@ dependencies = [
"humantime",
"humantime-serde",
"itertools 0.10.5",
"nix 0.30.1",
"nix 0.27.1",
"once_cell",
"postgres_backend",
"postgres_ffi",
@@ -4455,22 +4416,6 @@ dependencies = [
"workspace_hack",
]
[[package]]
name = "pageserver_page_api"
version = "0.1.0"
dependencies = [
"bytes",
"pageserver_api",
"postgres_ffi",
"prost 0.13.5",
"smallvec",
"thiserror 1.0.69",
"tonic 0.13.1",
"tonic-build",
"utils",
"workspace_hack",
]
[[package]]
name = "papaya"
version = "0.2.1"
@@ -4847,14 +4792,14 @@ dependencies = [
"bytes",
"once_cell",
"pq_proto",
"rustls 0.23.27",
"rustls 0.23.18",
"rustls-pemfile 2.1.1",
"serde",
"thiserror 1.0.69",
"tokio",
"tokio-postgres",
"tokio-postgres-rustls",
"tokio-rustls 0.26.2",
"tokio-rustls 0.26.0",
"tokio-util",
"tracing",
]
@@ -4903,24 +4848,6 @@ dependencies = [
"workspace_hack",
]
[[package]]
name = "posthog_client_lite"
version = "0.1.0"
dependencies = [
"anyhow",
"arc-swap",
"reqwest",
"serde",
"serde_json",
"sha2",
"thiserror 1.0.69",
"tokio",
"tokio-util",
"tracing",
"tracing-utils",
"workspace_hack",
]
[[package]]
name = "powerfmt"
version = "0.2.0"
@@ -4966,7 +4893,7 @@ dependencies = [
"inferno 0.12.0",
"num",
"paste",
"prost 0.13.5",
"prost 0.13.3",
]
[[package]]
@@ -5071,12 +4998,12 @@ dependencies = [
[[package]]
name = "prost"
version = "0.13.5"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f"
dependencies = [
"bytes",
"prost-derive 0.13.5",
"prost-derive 0.13.3",
]
[[package]]
@@ -5114,7 +5041,7 @@ dependencies = [
"once_cell",
"petgraph",
"prettyplease",
"prost 0.13.5",
"prost 0.13.3",
"prost-types 0.13.3",
"regex",
"syn 2.0.100",
@@ -5136,9 +5063,9 @@ dependencies = [
[[package]]
name = "prost-derive"
version = "0.13.5"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5"
dependencies = [
"anyhow",
"itertools 0.12.1",
@@ -5162,7 +5089,7 @@ version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670"
dependencies = [
"prost 0.13.5",
"prost 0.13.3",
]
[[package]]
@@ -5210,7 +5137,7 @@ dependencies = [
"hyper 0.14.30",
"hyper 1.4.1",
"hyper-util",
"indexmap 2.9.0",
"indexmap 2.0.1",
"ipnet",
"itertools 0.10.5",
"itoa",
@@ -5244,7 +5171,7 @@ dependencies = [
"rsa",
"rstest",
"rustc-hash 1.1.0",
"rustls 0.23.27",
"rustls 0.23.18",
"rustls-native-certs 0.8.0",
"rustls-pemfile 2.1.1",
"scopeguard",
@@ -5263,14 +5190,13 @@ dependencies = [
"tokio",
"tokio-postgres",
"tokio-postgres2",
"tokio-rustls 0.26.2",
"tokio-rustls 0.26.0",
"tokio-tungstenite 0.21.0",
"tokio-util",
"tracing",
"tracing-log",
"tracing-opentelemetry",
"tracing-subscriber",
"tracing-test",
"tracing-utils",
"try-lock",
"typed-json",
@@ -5487,13 +5413,13 @@ dependencies = [
"num-bigint",
"percent-encoding",
"pin-project-lite",
"rustls 0.23.27",
"rustls 0.23.18",
"rustls-native-certs 0.8.0",
"ryu",
"sha1_smol",
"socket2",
"tokio",
"tokio-rustls 0.26.2",
"tokio-rustls 0.26.0",
"tokio-util",
"url",
]
@@ -5941,15 +5867,15 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.23.27"
version = "0.23.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "730944ca083c1c233a75c09f199e973ca499344a2b7ba9e755c457e86fb4a321"
checksum = "9c9cc1d47e243d655ace55ed38201c19ae02c148ae56412ab8750e8f0166ab7f"
dependencies = [
"log",
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki 0.103.3",
"rustls-webpki 0.102.8",
"subtle",
"zeroize",
]
@@ -6038,17 +5964,6 @@ dependencies = [
"untrusted",
]
[[package]]
name = "rustls-webpki"
version = "0.103.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435"
dependencies = [
"ring",
"rustls-pki-types",
"untrusted",
]
[[package]]
name = "rustversion"
version = "1.0.12"
@@ -6100,7 +6015,7 @@ dependencies = [
"regex",
"remote_storage",
"reqwest",
"rustls 0.23.27",
"rustls 0.23.18",
"safekeeper_api",
"safekeeper_client",
"scopeguard",
@@ -6117,7 +6032,7 @@ dependencies = [
"tokio",
"tokio-io-timeout",
"tokio-postgres",
"tokio-rustls 0.26.2",
"tokio-rustls 0.26.0",
"tokio-stream",
"tokio-tar",
"tokio-util",
@@ -6289,7 +6204,7 @@ checksum = "255914a8e53822abd946e2ce8baa41d4cded6b8e938913b7f7b9da5b7ab44335"
dependencies = [
"httpdate",
"reqwest",
"rustls 0.23.27",
"rustls 0.23.18",
"sentry-backtrace",
"sentry-contexts",
"sentry-core",
@@ -6718,11 +6633,11 @@ dependencies = [
"metrics",
"once_cell",
"parking_lot 0.12.1",
"prost 0.13.5",
"rustls 0.23.27",
"prost 0.13.3",
"rustls 0.23.18",
"tokio",
"tokio-rustls 0.26.2",
"tonic 0.13.1",
"tokio-rustls 0.26.0",
"tonic",
"tonic-build",
"tracing",
"utils",
@@ -6764,7 +6679,7 @@ dependencies = [
"regex",
"reqwest",
"routerify",
"rustls 0.23.27",
"rustls 0.23.18",
"rustls-native-certs 0.8.0",
"safekeeper_api",
"safekeeper_client",
@@ -6779,7 +6694,7 @@ dependencies = [
"tokio",
"tokio-postgres",
"tokio-postgres-rustls",
"tokio-rustls 0.26.2",
"tokio-rustls 0.26.0",
"tokio-util",
"tracing",
"utils",
@@ -6817,7 +6732,7 @@ dependencies = [
"postgres_ffi",
"remote_storage",
"reqwest",
"rustls 0.23.27",
"rustls 0.23.18",
"rustls-native-certs 0.8.0",
"serde",
"serde_json",
@@ -7351,10 +7266,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04fb792ccd6bbcd4bba408eb8a292f70fc4a3589e5d793626f45190e6454b6ab"
dependencies = [
"ring",
"rustls 0.23.27",
"rustls 0.23.18",
"tokio",
"tokio-postgres",
"tokio-rustls 0.26.2",
"tokio-rustls 0.26.0",
"x509-certificate",
]
@@ -7398,11 +7313,12 @@ dependencies = [
[[package]]
name = "tokio-rustls"
version = "0.26.2"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
dependencies = [
"rustls 0.23.27",
"rustls 0.23.18",
"rustls-pki-types",
"tokio",
]
@@ -7500,7 +7416,7 @@ version = "0.22.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f21c7aaf97f1bd9ca9d4f9e73b0a6c74bd5afef56f2bc931943a6e1c37e04e38"
dependencies = [
"indexmap 2.9.0",
"indexmap 2.0.1",
"serde",
"serde_spanned",
"toml_datetime",
@@ -7519,41 +7435,18 @@ dependencies = [
"http 1.1.0",
"http-body 1.0.0",
"http-body-util",
"percent-encoding",
"pin-project",
"prost 0.13.5",
"tokio-stream",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tonic"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
dependencies = [
"async-trait",
"axum",
"base64 0.22.1",
"bytes",
"h2 0.4.4",
"http 1.1.0",
"http-body 1.0.0",
"http-body-util",
"hyper 1.4.1",
"hyper-timeout",
"hyper-util",
"percent-encoding",
"pin-project",
"prost 0.13.5",
"prost 0.13.3",
"rustls-native-certs 0.8.0",
"socket2",
"rustls-pemfile 2.1.1",
"tokio",
"tokio-rustls 0.26.2",
"tokio-rustls 0.26.0",
"tokio-stream",
"tower 0.5.2",
"tower 0.4.13",
"tower-layer",
"tower-service",
"tracing",
@@ -7561,9 +7454,9 @@ dependencies = [
[[package]]
name = "tonic-build"
version = "0.13.1"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eac6f67be712d12f0b41328db3137e0d0757645d8904b4cb7d51cd9c2279e847"
checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11"
dependencies = [
"prettyplease",
"proc-macro2",
@@ -7573,19 +7466,6 @@ dependencies = [
"syn 2.0.100",
]
[[package]]
name = "tonic-reflection"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9687bd5bfeafebdded2356950f278bba8226f0b32109537c4253406e09aafe1"
dependencies = [
"prost 0.13.5",
"prost-types 0.13.3",
"tokio",
"tokio-stream",
"tonic 0.13.1",
]
[[package]]
name = "tower"
version = "0.4.13"
@@ -7594,11 +7474,16 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
dependencies = [
"futures-core",
"futures-util",
"indexmap 1.9.3",
"pin-project",
"pin-project-lite",
"rand 0.8.5",
"slab",
"tokio",
"tokio-util",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
@@ -7609,12 +7494,9 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
dependencies = [
"futures-core",
"futures-util",
"indexmap 2.9.0",
"pin-project-lite",
"slab",
"sync_wrapper 1.0.1",
"tokio",
"tokio-util",
"tower-layer",
"tower-service",
"tracing",
@@ -7765,7 +7647,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"serde",
@@ -7779,27 +7660,6 @@ dependencies = [
"tracing-serde",
]
[[package]]
name = "tracing-test"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "557b891436fe0d5e0e363427fc7f217abf9ccd510d5136549847bdcbcd011d68"
dependencies = [
"tracing-core",
"tracing-subscriber",
"tracing-test-macro",
]
[[package]]
name = "tracing-test-macro"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568"
dependencies = [
"quote",
"syn 2.0.100",
]
[[package]]
name = "tracing-utils"
version = "0.1.0"
@@ -7942,7 +7802,7 @@ dependencies = [
"base64 0.22.1",
"log",
"once_cell",
"rustls 0.23.27",
"rustls 0.23.18",
"rustls-pki-types",
"url",
"webpki-roots",
@@ -8025,7 +7885,7 @@ dependencies = [
"humantime",
"jsonwebtoken",
"metrics",
"nix 0.30.1",
"nix 0.27.1",
"once_cell",
"pem",
"pin-project-lite",
@@ -8137,7 +7997,7 @@ dependencies = [
"pageserver_api",
"postgres_ffi",
"pprof",
"prost 0.13.5",
"prost 0.13.3",
"remote_storage",
"serde",
"serde_json",
@@ -8557,8 +8417,6 @@ dependencies = [
"ahash",
"anstream",
"anyhow",
"axum",
"axum-core",
"base64 0.13.1",
"base64 0.21.7",
"base64ct",
@@ -8593,14 +8451,14 @@ dependencies = [
"hyper 0.14.30",
"hyper 1.4.1",
"hyper-util",
"indexmap 2.9.0",
"indexmap 1.9.3",
"indexmap 2.0.1",
"itertools 0.12.1",
"lazy_static",
"libc",
"log",
"memchr",
"nix 0.26.4",
"nix 0.30.1",
"nom",
"num",
"num-bigint",
@@ -8614,16 +8472,16 @@ dependencies = [
"parquet",
"prettyplease",
"proc-macro2",
"prost 0.13.5",
"prost 0.13.3",
"quote",
"rand 0.8.5",
"regex",
"regex-automata 0.4.3",
"regex-syntax 0.8.2",
"reqwest",
"rustls 0.23.27",
"rustls 0.23.18",
"rustls-pki-types",
"rustls-webpki 0.103.3",
"rustls-webpki 0.102.8",
"scopeguard",
"sec1 0.7.3",
"serde",
@@ -8641,15 +8499,15 @@ dependencies = [
"time",
"time-macros",
"tokio",
"tokio-rustls 0.26.2",
"tokio-rustls 0.26.0",
"tokio-stream",
"tokio-util",
"toml_edit",
"tower 0.5.2",
"tonic",
"tower 0.4.13",
"tracing",
"tracing-core",
"tracing-log",
"tracing-subscriber",
"url",
"uuid",
"zeroize",

View File

@@ -9,7 +9,6 @@ members = [
"pageserver/ctl",
"pageserver/client",
"pageserver/pagebench",
"pageserver/page_api",
"proxy",
"safekeeper",
"safekeeper/client",
@@ -24,11 +23,9 @@ members = [
"libs/postgres_ffi",
"libs/safekeeper_api",
"libs/desim",
"libs/neon-shmem",
"libs/utils",
"libs/consumption_metrics",
"libs/postgres_backend",
"libs/posthog_client_lite",
"libs/pq_proto",
"libs/tenant_size_model",
"libs/metrics",
@@ -129,7 +126,7 @@ md5 = "0.7.0"
measured = { version = "0.0.22", features=["lasso"] }
measured-process = { version = "0.0.22" }
memoffset = "0.9"
nix = { version = "0.30.1", features = ["dir", "fs", "mman", "process", "socket", "signal", "poll"] }
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
# Do not update to >= 7.0.0, at least. The update will have a significant impact
# on compute startup metrics (start_postgres_ms), >= 25% degradation.
notify = "6.0.0"
@@ -149,7 +146,7 @@ pin-project-lite = "0.2"
pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
procfs = "0.16"
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
prost = "0.13.5"
prost = "0.13"
rand = "0.8"
redis = { version = "0.29.2", features = ["tokio-rustls-comp", "keep-alive"] }
regex = "1.10.2"
@@ -199,8 +196,7 @@ tokio-tar = "0.3"
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
toml = "0.8"
toml_edit = "0.22"
tonic = { version = "0.13.1", default-features = false, features = ["channel", "codegen", "prost", "router", "server", "tls-ring", "tls-native-roots"] }
tonic-reflection = { version = "0.13.1", features = ["server"] }
tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
tower = { version = "0.5.2", default-features = false }
tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] }
@@ -247,7 +243,6 @@ azure_storage_blobs = { git = "https://github.com/neondatabase/azure-sdk-for-rus
## Local libraries
compute_api = { version = "0.1", path = "./libs/compute_api/" }
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
desim = { version = "0.1", path = "./libs/desim" }
endpoint_storage = { version = "0.0.1", path = "./endpoint_storage/" }
http-utils = { version = "0.1", path = "./libs/http-utils/" }
metrics = { version = "0.1", path = "./libs/metrics/" }
@@ -255,24 +250,23 @@ pageserver = { path = "./pageserver" }
pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
pageserver_client = { path = "./pageserver/client" }
pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
pageserver_page_api = { path = "./pageserver/page_api" }
postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
postgres_initdb = { path = "./libs/postgres_initdb" }
posthog_client_lite = { version = "0.1", path = "./libs/posthog_client_lite" }
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
safekeeper_client = { path = "./safekeeper/client" }
desim = { version = "0.1", path = "./libs/desim" }
storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
storage_controller_client = { path = "./storage_controller/client" }
tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
utils = { version = "0.1", path = "./libs/utils/" }
vm_monitor = { version = "0.1", path = "./libs/vm_monitor/" }
wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }
walproposer = { version = "0.1", path = "./libs/walproposer/" }
wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }
## Common library dependency
workspace_hack = { version = "0.1", path = "./workspace_hack/" }
@@ -282,7 +276,7 @@ criterion = "0.5.1"
rcgen = "0.13"
rstest = "0.18"
camino-tempfile = "1.0.2"
tonic-build = "0.13.1"
tonic-build = "0.12"
[patch.crates-io]

View File

@@ -155,7 +155,7 @@ RUN set -e \
# Keep the version the same as in compute/compute-node.Dockerfile and
# test_runner/regress/test_compute_metrics.py.
ENV SQL_EXPORTER_VERSION=0.17.3
ENV SQL_EXPORTER_VERSION=0.17.0
RUN curl -fsSL \
"https://github.com/burningalchemist/sql_exporter/releases/download/${SQL_EXPORTER_VERSION}/sql_exporter-${SQL_EXPORTER_VERSION}.linux-$(case "$(uname -m)" in x86_64) echo amd64;; aarch64) echo arm64;; esac).tar.gz" \
--output sql_exporter.tar.gz \
@@ -292,7 +292,7 @@ WORKDIR /home/nonroot
# Rust
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
ENV RUSTC_VERSION=1.87.0
ENV RUSTC_VERSION=1.86.0
ENV RUSTUP_HOME="/home/nonroot/.rustup"
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
ARG RUSTFILT_VERSION=0.2.1
@@ -310,13 +310,13 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
. "$HOME/.cargo/env" && \
cargo --version && rustup --version && \
rustup component add llvm-tools rustfmt clippy && \
cargo install rustfilt --version ${RUSTFILT_VERSION} --locked && \
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} --locked && \
cargo install cargo-deny --version ${CARGO_DENY_VERSION} --locked && \
cargo install cargo-hack --version ${CARGO_HACK_VERSION} --locked && \
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} --locked && \
cargo install cargo-chef --version ${CARGO_CHEF_VERSION} --locked && \
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} --locked \
cargo install rustfilt --version ${RUSTFILT_VERSION} && \
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} && \
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
cargo install cargo-hack --version ${CARGO_HACK_VERSION} && \
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} && \
cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} \
--features postgres-bundled --no-default-features && \
rm -rf /home/nonroot/.cargo/registry && \
rm -rf /home/nonroot/.cargo/git

View File

@@ -582,38 +582,6 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
#########################################################################################
#
# Layer "online_advisor-build"
# compile online_advisor extension
#
#########################################################################################
FROM build-deps AS online_advisor-src
ARG PG_VERSION
# online_advisor supports all Postgres version starting from PG14, but prior to PG17 has to be included in preload_shared_libraries
# last release 1.0 - May 15, 2025
WORKDIR /ext-src
RUN case "${PG_VERSION:?}" in \
"v17") \
;; \
*) \
echo "skipping the version of online_advistor for $PG_VERSION" && exit 0 \
;; \
esac && \
wget https://github.com/knizhnik/online_advisor/archive/refs/tags/1.0.tar.gz -O online_advisor.tar.gz && \
echo "059b7d9e5a90013a58bdd22e9505b88406ce05790675eb2d8434e5b215652d54 online_advisor.tar.gz" | sha256sum --check && \
mkdir online_advisor-src && cd online_advisor-src && tar xzf ../online_advisor.tar.gz --strip-components=1 -C .
FROM pg-build AS online_advisor-build
COPY --from=online_advisor-src /ext-src/ /ext-src/
WORKDIR /ext-src/
RUN if [ -d online_advisor-src ]; then \
cd online_advisor-src && \
make -j install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/online_advisor.control; \
fi
#########################################################################################
#
# Layer "pg_hashids-build"
@@ -1149,8 +1117,8 @@ RUN wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.
mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
echo "#nothing to test here" > neon-test.sh
RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.1.2.tar.gz -O pgrag.tar.gz && \
echo "7361654ea24f08cbb9db13c2ee1c0fe008f6114076401bb871619690dafc5225 pgrag.tar.gz" | sha256sum --check && \
RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.1.1.tar.gz -O pgrag.tar.gz && \
echo "087b2ecd11ba307dc968042ef2e9e43dc04d9ba60e8306e882c407bbe1350a50 pgrag.tar.gz" | sha256sum --check && \
mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C .
FROM rust-extensions-build-pgrx14 AS pgrag-build
@@ -1680,7 +1648,6 @@ COPY --from=pg_jsonschema-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_graphql-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_tiktoken-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=hypopg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=online_advisor-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_hashids-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=rum-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pgtap-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -1784,17 +1751,17 @@ ARG TARGETARCH
RUN if [ "$TARGETARCH" = "amd64" ]; then\
postgres_exporter_sha256='59aa4a7bb0f7d361f5e05732f5ed8c03cc08f78449cef5856eadec33a627694b';\
pgbouncer_exporter_sha256='c9f7cf8dcff44f0472057e9bf52613d93f3ffbc381ad7547a959daa63c5e84ac';\
sql_exporter_sha256='9a41127a493e8bfebfe692bf78c7ed2872a58a3f961ee534d1b0da9ae584aaab';\
sql_exporter_sha256='38e439732bbf6e28ca4a94d7bc3686d3fa1abdb0050773d5617a9efdb9e64d08';\
else\
postgres_exporter_sha256='d1dedea97f56c6d965837bfd1fbb3e35a3b4a4556f8cccee8bd513d8ee086124';\
pgbouncer_exporter_sha256='217c4afd7e6492ae904055bc14fe603552cf9bac458c063407e991d68c519da3';\
sql_exporter_sha256='530e6afc77c043497ed965532c4c9dfa873bc2a4f0b3047fad367715c0081d6a';\
sql_exporter_sha256='11918b00be6e2c3a67564adfdb2414fdcbb15a5db76ea17d1d1a944237a893c6';\
fi\
&& curl -sL https://github.com/prometheus-community/postgres_exporter/releases/download/v0.17.1/postgres_exporter-0.17.1.linux-${TARGETARCH}.tar.gz\
| tar xzf - --strip-components=1 -C.\
&& curl -sL https://github.com/prometheus-community/pgbouncer_exporter/releases/download/v0.10.2/pgbouncer_exporter-0.10.2.linux-${TARGETARCH}.tar.gz\
| tar xzf - --strip-components=1 -C.\
&& curl -sL https://github.com/burningalchemist/sql_exporter/releases/download/0.17.3/sql_exporter-0.17.3.linux-${TARGETARCH}.tar.gz\
&& curl -sL https://github.com/burningalchemist/sql_exporter/releases/download/0.17.0/sql_exporter-0.17.0.linux-${TARGETARCH}.tar.gz\
| tar xzf - --strip-components=1 -C.\
&& echo "${postgres_exporter_sha256} postgres_exporter" | sha256sum -c -\
&& echo "${pgbouncer_exporter_sha256} pgbouncer_exporter" | sha256sum -c -\
@@ -1847,7 +1814,7 @@ COPY docker-compose/ext-src/ /ext-src/
COPY --from=pg-build /postgres /postgres
#COPY --from=postgis-src /ext-src/ /ext-src/
COPY --from=plv8-src /ext-src/ /ext-src/
COPY --from=h3-pg-src /ext-src/h3-pg-src /ext-src/h3-pg-src
#COPY --from=h3-pg-src /ext-src/ /ext-src/
COPY --from=postgresql-unit-src /ext-src/ /ext-src/
COPY --from=pgvector-src /ext-src/ /ext-src/
COPY --from=pgjwt-src /ext-src/ /ext-src/
@@ -1856,7 +1823,6 @@ COPY --from=pgjwt-src /ext-src/ /ext-src/
COPY --from=pg_graphql-src /ext-src/ /ext-src/
#COPY --from=pg_tiktoken-src /ext-src/ /ext-src/
COPY --from=hypopg-src /ext-src/ /ext-src/
COPY --from=online_advisor-src /ext-src/ /ext-src/
COPY --from=pg_hashids-src /ext-src/ /ext-src/
COPY --from=rum-src /ext-src/ /ext-src/
COPY --from=pgtap-src /ext-src/ /ext-src/
@@ -2005,8 +1971,7 @@ COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml
# Make the libraries we built available
COPY --chmod=0666 compute/etc/ld.so.conf.d/00-neon.conf /etc/ld.so.conf.d/00-neon.conf
RUN /sbin/ldconfig
RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
# rsyslog config permissions
# directory for rsyslogd pid file

View File

@@ -1 +0,0 @@
/usr/local/lib

View File

@@ -7,7 +7,7 @@ index 255e616..1c6edb7 100644
RelationGetRelationName(index));
+#ifdef NEON_SMGR
+ smgr_start_unlogged_build(RelationGetSmgr(index));
+ smgr_start_unlogged_build(index->rd_smgr);
+#endif
+
initRumState(&buildstate.rumstate, index);
@@ -18,7 +18,7 @@ index 255e616..1c6edb7 100644
rumUpdateStats(index, &buildstate.buildStats, buildstate.rumstate.isBuild);
+#ifdef NEON_SMGR
+ smgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));
+ smgr_finish_unlogged_build_phase_1(index->rd_smgr);
+#endif
+
/*
@@ -29,7 +29,7 @@ index 255e616..1c6edb7 100644
}
+#ifdef NEON_SMGR
+ smgr_end_unlogged_build(RelationGetSmgr(index));
+ smgr_end_unlogged_build(index->rd_smgr);
+#endif
+
/*

View File

@@ -57,6 +57,21 @@ use tracing::{error, info};
use url::Url;
use utils::failpoint_support;
// Compatibility hack: if the control plane specified any remote-ext-config
// use the default value for extension storage proxy gateway.
// Remove this once the control plane is updated to pass the gateway URL
fn parse_remote_ext_base_url(arg: &str) -> Result<String> {
const FALLBACK_PG_EXT_GATEWAY_BASE_URL: &str =
"http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local";
Ok(if arg.starts_with("http") {
arg
} else {
FALLBACK_PG_EXT_GATEWAY_BASE_URL
}
.to_owned())
}
#[derive(Parser)]
#[command(rename_all = "kebab-case")]
struct Cli {
@@ -64,8 +79,9 @@ struct Cli {
pub pgbin: String,
/// The base URL for the remote extension storage proxy gateway.
#[arg(short = 'r', long)]
pub remote_ext_base_url: Option<Url>,
/// Should be in the form of `http(s)://<gateway-hostname>[:<port>]`.
#[arg(short = 'r', long, value_parser = parse_remote_ext_base_url, alias = "remote-ext-config")]
pub remote_ext_base_url: Option<String>,
/// The port to bind the external listening HTTP server to. Clients running
/// outside the compute will talk to the compute through this port. Keep
@@ -120,10 +136,6 @@ struct Cli {
requires = "compute-id"
)]
pub control_plane_uri: Option<String>,
/// Interval in seconds for collecting installed extensions statistics
#[arg(long, default_value = "3600")]
pub installed_extensions_collection_interval: u64,
}
fn main() -> Result<()> {
@@ -167,7 +179,6 @@ fn main() -> Result<()> {
cgroup: cli.cgroup,
#[cfg(target_os = "linux")]
vm_monitor_addr: cli.vm_monitor_addr,
installed_extensions_collection_interval: cli.installed_extensions_collection_interval,
},
config,
)?;
@@ -260,4 +271,18 @@ mod test {
fn verify_cli() {
Cli::command().debug_assert()
}
#[test]
fn parse_pg_ext_gateway_base_url() {
let arg = "http://pg-ext-s3-gateway2";
let result = super::parse_remote_ext_base_url(arg).unwrap();
assert_eq!(result, arg);
let arg = "pg-ext-s3-gateway";
let result = super::parse_remote_ext_base_url(arg).unwrap();
assert_eq!(
result,
"http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local"
);
}
}

View File

@@ -339,8 +339,6 @@ async fn run_dump_restore(
destination_connstring: String,
) -> Result<(), anyhow::Error> {
let dumpdir = workdir.join("dumpdir");
let num_jobs = num_cpus::get().to_string();
info!("using {num_jobs} jobs for dump/restore");
let common_args = [
// schema mapping (prob suffices to specify them on one side)
@@ -356,7 +354,7 @@ async fn run_dump_restore(
"directory".to_string(),
// concurrency
"--jobs".to_string(),
num_jobs,
num_cpus::get().to_string(),
// progress updates
"--verbose".to_string(),
];

View File

@@ -31,7 +31,6 @@ use std::time::{Duration, Instant};
use std::{env, fs};
use tokio::spawn;
use tracing::{Instrument, debug, error, info, instrument, warn};
use url::Url;
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use utils::measured_stream::MeasuredReader;
@@ -97,10 +96,7 @@ pub struct ComputeNodeParams {
pub internal_http_port: u16,
/// the address of extension storage proxy gateway
pub remote_ext_base_url: Option<Url>,
/// Interval for installed extensions collection
pub installed_extensions_collection_interval: u64,
pub remote_ext_base_url: Option<String>,
}
/// Compute node info shared across several `compute_ctl` threads.
@@ -699,18 +695,25 @@ impl ComputeNode {
let log_directory_path = Path::new(&self.params.pgdata).join("log");
let log_directory_path = log_directory_path.to_string_lossy().to_string();
// Add project_id,endpoint_id to identify the logs.
// Add project_id,endpoint_id tag to identify the logs.
//
// These ids are passed from cplane,
let endpoint_id = pspec.spec.endpoint_id.as_deref().unwrap_or("");
let project_id = pspec.spec.project_id.as_deref().unwrap_or("");
// for backwards compatibility (old computes that don't have them),
// we set them to None.
// TODO: Clean up this code when all computes have them.
let tag: Option<String> = match (
pspec.spec.project_id.as_deref(),
pspec.spec.endpoint_id.as_deref(),
) {
(Some(project_id), Some(endpoint_id)) => {
Some(format!("{project_id}/{endpoint_id}"))
}
(Some(project_id), None) => Some(format!("{project_id}/None")),
(None, Some(endpoint_id)) => Some(format!("None,{endpoint_id}")),
(None, None) => None,
};
configure_audit_rsyslog(
log_directory_path.clone(),
endpoint_id,
project_id,
&remote_endpoint,
)?;
configure_audit_rsyslog(log_directory_path.clone(), tag, &remote_endpoint)?;
// Launch a background task to clean up the audit logs
launch_pgaudit_gc(log_directory_path);
@@ -746,7 +749,17 @@ impl ComputeNode {
let conf = self.get_tokio_conn_conf(None);
tokio::task::spawn(async {
let _ = installed_extensions(conf).await;
let res = get_installed_extensions(conf).await;
match res {
Ok(extensions) => {
info!(
"[NEON_EXT_STAT] {}",
serde_json::to_string(&extensions)
.expect("failed to serialize extensions list")
);
}
Err(err) => error!("could not get installed extensions: {err:?}"),
}
});
}
@@ -776,9 +789,6 @@ impl ComputeNode {
// Log metrics so that we can search for slow operations in logs
info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished");
// Spawn the extension stats background task
self.spawn_extension_stats_task();
if pspec.spec.prewarm_lfc_on_startup {
self.prewarm_lfc();
}
@@ -2189,41 +2199,6 @@ LIMIT 100",
info!("Pageserver config changed");
}
}
pub fn spawn_extension_stats_task(&self) {
let conf = self.tokio_conn_conf.clone();
let installed_extensions_collection_interval =
self.params.installed_extensions_collection_interval;
tokio::spawn(async move {
// An initial sleep is added to ensure that two collections don't happen at the same time.
// The first collection happens during compute startup.
tokio::time::sleep(tokio::time::Duration::from_secs(
installed_extensions_collection_interval,
))
.await;
let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(
installed_extensions_collection_interval,
));
loop {
interval.tick().await;
let _ = installed_extensions(conf.clone()).await;
}
});
}
}
pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
let res = get_installed_extensions(conf).await;
match res {
Ok(extensions) => {
info!(
"[NEON_EXT_STAT] {}",
serde_json::to_string(&extensions).expect("failed to serialize extensions list")
);
}
Err(err) => error!("could not get installed extensions: {err:?}"),
}
Ok(())
}
pub fn forward_termination_signal() {

View File

@@ -224,10 +224,7 @@ pub fn write_postgres_conf(
writeln!(file, "pgaudit.log_rotation_age=5")?;
// Enable audit logs for pg_session_jwt extension
// TODO: Consider a good approach for shipping pg_session_jwt logs to the same sink as
// pgAudit - additional context in https://github.com/neondatabase/cloud/issues/28863
//
// writeln!(file, "pg_session_jwt.audit_log=on")?;
writeln!(file, "pg_session_jwt.audit_log=on")?;
// Add audit shared_preload_libraries, if they are not present.
//

View File

@@ -2,24 +2,10 @@
module(load="imfile")
# Input configuration for log files in the specified directory
# The messages can be multiline. The start of the message is a timestamp
# in "%Y-%m-%d %H:%M:%S.%3N GMT" (so timezone hardcoded).
# Replace log_directory with the directory containing the log files
input(type="imfile" File="{log_directory}/*.log"
Tag="pgaudit_log" Severity="info" Facility="local5"
startmsg.regex="^[[:digit:]]{{4}}-[[:digit:]]{{2}}-[[:digit:]]{{2}} [[:digit:]]{{2}}:[[:digit:]]{{2}}:[[:digit:]]{{2}}.[[:digit:]]{{3}} GMT,")
# Replace {log_directory} with the directory containing the log files
input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Facility="local0")
# the directory to store rsyslog state files
global(workDirectory="/var/log/rsyslog")
# Construct json, endpoint_id and project_id as additional metadata
set $.json_log!endpoint_id = "{endpoint_id}";
set $.json_log!project_id = "{project_id}";
set $.json_log!msg = $msg;
# Template suitable for rfc5424 syslog format
template(name="PgAuditLog" type="string"
string="<%PRI%>1 %TIMESTAMP:::date-rfc3339% %HOSTNAME% - - - - %$.json_log%")
# Forward to remote syslog receiver (@@<hostname>:<port>;format
local5.info @@{remote_endpoint};PgAuditLog
# Forward logs to remote syslog server
*.* @@{remote_endpoint}

View File

@@ -83,7 +83,6 @@ use reqwest::StatusCode;
use tar::Archive;
use tracing::info;
use tracing::log::warn;
use url::Url;
use zstd::stream::read::Decoder;
use crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
@@ -159,14 +158,14 @@ fn parse_pg_version(human_version: &str) -> PostgresMajorVersion {
pub async fn download_extension(
ext_name: &str,
ext_path: &RemotePath,
remote_ext_base_url: &Url,
remote_ext_base_url: &str,
pgbin: &str,
) -> Result<u64> {
info!("Download extension {:?} from {:?}", ext_name, ext_path);
// TODO add retry logic
let download_buffer =
match download_extension_tar(remote_ext_base_url.as_str(), &ext_path.to_string()).await {
match download_extension_tar(remote_ext_base_url, &ext_path.to_string()).await {
Ok(buffer) => buffer,
Err(error_message) => {
return Err(anyhow::anyhow!(

View File

@@ -213,10 +213,8 @@ impl Escaping for PgIdent {
// Find the first suitable tag that is not present in the string.
// Postgres' max role/DB name length is 63 bytes, so even in the
// worst case it won't take long. Outer tag is always `tag + "x"`,
// so if `tag` is not present in the string, `outer_tag` is not
// present in the string either.
while self.contains(&tag.to_string()) {
// worst case it won't take long.
while self.contains(&format!("${tag}$")) || self.contains(&format!("${outer_tag}$")) {
tag += "x";
outer_tag = tag.clone() + "x";
}

View File

@@ -27,40 +27,6 @@ fn get_rsyslog_pid() -> Option<String> {
}
}
fn wait_for_rsyslog_pid() -> Result<String, anyhow::Error> {
const MAX_WAIT: Duration = Duration::from_secs(5);
const INITIAL_SLEEP: Duration = Duration::from_millis(2);
let mut sleep_duration = INITIAL_SLEEP;
let start = std::time::Instant::now();
let mut attempts = 1;
for attempt in 1.. {
attempts = attempt;
match get_rsyslog_pid() {
Some(pid) => return Ok(pid),
None => {
if start.elapsed() >= MAX_WAIT {
break;
}
info!(
"rsyslogd is not running, attempt {}. Sleeping for {} ms",
attempt,
sleep_duration.as_millis()
);
std::thread::sleep(sleep_duration);
sleep_duration *= 2;
}
}
}
Err(anyhow::anyhow!(
"rsyslogd is not running after waiting for {} seconds and {} attempts",
attempts,
start.elapsed().as_secs()
))
}
// Restart rsyslogd to apply the new configuration.
// This is necessary, because there is no other way to reload the rsyslog configuration.
//
@@ -70,29 +36,27 @@ fn wait_for_rsyslog_pid() -> Result<String, anyhow::Error> {
// TODO: test it properly
//
fn restart_rsyslog() -> Result<()> {
let old_pid = get_rsyslog_pid().context("rsyslogd is not running")?;
info!("rsyslogd is running with pid: {}, restart it", old_pid);
// kill it to restart
let _ = Command::new("pkill")
.arg("rsyslogd")
.output()
.context("Failed to restart rsyslogd")?;
// ensure rsyslogd is running
wait_for_rsyslog_pid()?;
.context("Failed to stop rsyslogd")?;
Ok(())
}
pub fn configure_audit_rsyslog(
log_directory: String,
endpoint_id: &str,
project_id: &str,
tag: Option<String>,
remote_endpoint: &str,
) -> Result<()> {
let config_content: String = format!(
include_str!("config_template/compute_audit_rsyslog_template.conf"),
log_directory = log_directory,
endpoint_id = endpoint_id,
project_id = project_id,
tag = tag.unwrap_or("".to_string()),
remote_endpoint = remote_endpoint
);
@@ -167,11 +131,15 @@ pub fn configure_postgres_logs_export(conf: PostgresLogsRsyslogConfig) -> Result
return Ok(());
}
// Nothing to configure
// When new config is empty we can simply remove the configuration file.
if new_config.is_empty() {
// When the configuration is removed, PostgreSQL will stop sending data
// to the files watched by rsyslog, so restarting rsyslog is more effort
// than just ignoring this change.
info!("removing rsyslog config file: {}", POSTGRES_LOGS_CONF_PATH);
match std::fs::remove_file(POSTGRES_LOGS_CONF_PATH) {
Ok(_) => {}
Err(err) if err.kind() == ErrorKind::NotFound => {}
Err(err) => return Err(err.into()),
}
restart_rsyslog()?;
return Ok(());
}

View File

@@ -71,14 +71,6 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor
("name$$$", ("$x$name$$$$x$", "xx")),
("name$$$$", ("$x$name$$$$$x$", "xx")),
("name$x$", ("$xx$name$x$$xx$", "xxx")),
("x", ("$xx$x$xx$", "xxx")),
("xx", ("$xxx$xx$xxx$", "xxxx")),
("$x", ("$xx$$x$xx$", "xxx")),
("x$", ("$xx$x$$xx$", "xxx")),
("$x$", ("$xx$$x$$xx$", "xxx")),
("xx$", ("$xxx$xx$$xxx$", "xxxx")),
("$xx", ("$xxx$$xx$xxx$", "xxxx")),
("$xx$", ("$xxx$$xx$$xxx$", "xxxx")),
];
for (input, expected) in test_cases {

View File

@@ -2,10 +2,8 @@
[pageserver]
listen_pg_addr = '127.0.0.1:64000'
listen_http_addr = '127.0.0.1:9898'
listen_grpc_addr = '127.0.0.1:51051'
pg_auth_type = 'Trust'
http_auth_type = 'Trust'
grpc_auth_type = 'Trust'
[[safekeepers]]
id = 1

View File

@@ -4,10 +4,8 @@
id=1
listen_pg_addr = '127.0.0.1:64000'
listen_http_addr = '127.0.0.1:9898'
listen_grpc_addr = '127.0.0.1:51051'
pg_auth_type = 'Trust'
http_auth_type = 'Trust'
grpc_auth_type = 'Trust'
[[safekeepers]]
id = 1

View File

@@ -14,7 +14,7 @@
use std::ffi::OsStr;
use std::io::Write;
use std::os::fd::AsFd;
use std::os::unix::prelude::AsRawFd;
use std::os::unix::process::CommandExt;
use std::path::Path;
use std::process::Command;
@@ -356,7 +356,7 @@ where
let file = pid_file::claim_for_current_process(&path).expect("claim pid file");
// Remove the FD_CLOEXEC flag on the pidfile descriptor so that the pidfile
// remains locked after exec.
nix::fcntl::fcntl(file.as_fd(), FcntlArg::F_SETFD(FdFlag::empty()))
nix::fcntl::fcntl(file.as_raw_fd(), FcntlArg::F_SETFD(FdFlag::empty()))
.expect("remove FD_CLOEXEC");
// Don't run drop(file), it would close the file before we actually exec.
std::mem::forget(file);

View File

@@ -8,6 +8,7 @@
use std::borrow::Cow;
use std::collections::{BTreeSet, HashMap};
use std::fs::File;
use std::os::fd::AsRawFd;
use std::path::PathBuf;
use std::process::exit;
use std::str::FromStr;
@@ -30,9 +31,8 @@ use control_plane::safekeeper::SafekeeperNode;
use control_plane::storage_controller::{
NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
};
use nix::fcntl::{Flock, FlockArg};
use nix::fcntl::{FlockArg, flock};
use pageserver_api::config::{
DEFAULT_GRPC_LISTEN_PORT as DEFAULT_PAGESERVER_GRPC_PORT,
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
};
@@ -749,16 +749,16 @@ struct TimelineTreeEl {
/// A flock-based guard over the neon_local repository directory
struct RepoLock {
_file: Flock<File>,
_file: File,
}
impl RepoLock {
fn new() -> Result<Self> {
let repo_dir = File::open(local_env::base_path())?;
match Flock::lock(repo_dir, FlockArg::LockExclusive) {
Ok(f) => Ok(Self { _file: f }),
Err((_, e)) => Err(e).context("flock error"),
}
let repo_dir_fd = repo_dir.as_raw_fd();
flock(repo_dir_fd, FlockArg::LockExclusive)?;
Ok(Self { _file: repo_dir })
}
}
@@ -1008,16 +1008,13 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
let grpc_port = DEFAULT_PAGESERVER_GRPC_PORT + i;
NeonLocalInitPageserverConf {
id: pageserver_id,
listen_pg_addr: format!("127.0.0.1:{pg_port}"),
listen_http_addr: format!("127.0.0.1:{http_port}"),
listen_https_addr: None,
listen_grpc_addr: Some(format!("127.0.0.1:{grpc_port}")),
pg_auth_type: AuthType::Trust,
http_auth_type: AuthType::Trust,
grpc_auth_type: AuthType::Trust,
other: Default::default(),
// Typical developer machines use disks with slow fsync, and we don't care
// about data integrity: disable disk syncs.
@@ -1279,7 +1276,6 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
mode: pageserver_api::models::TimelineCreateRequestMode::Branch {
ancestor_timeline_id,
ancestor_start_lsn: start_lsn,
read_only: false,
pg_version: None,
},
};

View File

@@ -278,10 +278,8 @@ pub struct PageServerConf {
pub listen_pg_addr: String,
pub listen_http_addr: String,
pub listen_https_addr: Option<String>,
pub listen_grpc_addr: Option<String>,
pub pg_auth_type: AuthType,
pub http_auth_type: AuthType,
pub grpc_auth_type: AuthType,
pub no_sync: bool,
}
@@ -292,10 +290,8 @@ impl Default for PageServerConf {
listen_pg_addr: String::new(),
listen_http_addr: String::new(),
listen_https_addr: None,
listen_grpc_addr: None,
pg_auth_type: AuthType::Trust,
http_auth_type: AuthType::Trust,
grpc_auth_type: AuthType::Trust,
no_sync: false,
}
}
@@ -310,10 +306,8 @@ pub struct NeonLocalInitPageserverConf {
pub listen_pg_addr: String,
pub listen_http_addr: String,
pub listen_https_addr: Option<String>,
pub listen_grpc_addr: Option<String>,
pub pg_auth_type: AuthType,
pub http_auth_type: AuthType,
pub grpc_auth_type: AuthType,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub no_sync: bool,
#[serde(flatten)]
@@ -327,10 +321,8 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf {
listen_pg_addr,
listen_http_addr,
listen_https_addr,
listen_grpc_addr,
pg_auth_type,
http_auth_type,
grpc_auth_type,
no_sync,
other: _,
} = conf;
@@ -339,9 +331,7 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf {
listen_pg_addr: listen_pg_addr.clone(),
listen_http_addr: listen_http_addr.clone(),
listen_https_addr: listen_https_addr.clone(),
listen_grpc_addr: listen_grpc_addr.clone(),
pg_auth_type: *pg_auth_type,
grpc_auth_type: *grpc_auth_type,
http_auth_type: *http_auth_type,
no_sync: *no_sync,
}
@@ -717,10 +707,8 @@ impl LocalEnv {
listen_pg_addr: String,
listen_http_addr: String,
listen_https_addr: Option<String>,
listen_grpc_addr: Option<String>,
pg_auth_type: AuthType,
http_auth_type: AuthType,
grpc_auth_type: AuthType,
#[serde(default)]
no_sync: bool,
}
@@ -744,10 +732,8 @@ impl LocalEnv {
listen_pg_addr,
listen_http_addr,
listen_https_addr,
listen_grpc_addr,
pg_auth_type,
http_auth_type,
grpc_auth_type,
no_sync,
} = config_toml;
let IdentityTomlSubset {
@@ -764,10 +750,8 @@ impl LocalEnv {
listen_pg_addr,
listen_http_addr,
listen_https_addr,
listen_grpc_addr,
pg_auth_type,
http_auth_type,
grpc_auth_type,
no_sync,
};
pageservers.push(conf);

View File

@@ -129,9 +129,7 @@ impl PageServerNode {
));
}
if [conf.http_auth_type, conf.pg_auth_type, conf.grpc_auth_type]
.contains(&AuthType::NeonJWT)
{
if conf.http_auth_type != AuthType::Trust || conf.pg_auth_type != AuthType::Trust {
// Keys are generated in the toplevel repo dir, pageservers' workdirs
// are one level below that, so refer to keys with ../
overrides.push("auth_validation_public_key_path='../auth_public_key.pem'".to_owned());
@@ -548,16 +546,6 @@ impl PageServerNode {
.map(serde_json::from_str)
.transpose()
.context("Falied to parse 'sampling_ratio'")?,
relsize_snapshot_cache_capacity: settings
.remove("relsize snapshot cache capacity")
.map(|x| x.parse::<usize>())
.transpose()
.context("Falied to parse 'relsize_snapshot_cache_capacity' as integer")?,
basebackup_cache_enabled: settings
.remove("basebackup_cache_enabled")
.map(|x| x.parse::<bool>())
.transpose()
.context("Failed to parse 'basebackup_cache_enabled' as bool")?,
};
if !settings.is_empty() {
bail!("Unrecognized tenant settings: {settings:?}")

View File

@@ -14,14 +14,6 @@ PG_VERSION=${PG_VERSION:-14}
CONFIG_FILE_ORG=/var/db/postgres/configs/config.json
CONFIG_FILE=/tmp/config.json
# Test that the first library path that the dynamic loader looks in is the path
# that we use for custom compiled software
first_path="$(ldconfig --verbose 2>/dev/null \
| grep --invert-match ^$'\t' \
| cut --delimiter=: --fields=1 \
| head --lines=1)"
test "$first_path" == '/usr/local/lib'
echo "Waiting pageserver become ready."
while ! nc -z pageserver 6400; do
sleep 1;

View File

@@ -1,8 +0,0 @@
#!/bin/bash
# We need these settings to get the expected output results.
# We cannot use the environment variables e.g. PGTZ due to
# https://github.com/neondatabase/neon/issues/1287
export DATABASE=${1:-contrib_regression}
psql -c "ALTER DATABASE ${DATABASE} SET neon.allow_unstable_extensions='on'" \
-c "ALTER DATABASE ${DATABASE} SET DateStyle='Postgres,MDY'" \
-c "ALTER DATABASE ${DATABASE} SET TimeZone='America/Los_Angeles'" \

View File

@@ -1,16 +0,0 @@
#!/usr/bin/env bash
set -ex
cd "$(dirname "${0}")"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
dropdb --if-exists contrib_regression
createdb contrib_regression
cd h3_postgis/test
psql -d contrib_regression -c "CREATE EXTENSION postgis" -c "CREATE EXTENSION postgis_raster" -c "CREATE EXTENSION h3" -c "CREATE EXTENSION h3_postgis"
TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
${PG_REGRESS} --use-existing --dbname contrib_regression ${TESTS}
cd ../../h3/test
TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
dropdb --if-exists contrib_regression
createdb contrib_regression
psql -d contrib_regression -c "CREATE EXTENSION h3"
${PG_REGRESS} --use-existing --dbname contrib_regression ${TESTS}

View File

@@ -1,7 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
cd h3/test
TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ${TESTS}

View File

@@ -1,6 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname "${0}")"
if [ -f Makefile ]; then
make installcheck
fi

View File

@@ -1,9 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
[ -f Makefile ] || exit 0
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ${TESTS}

View File

@@ -18,7 +18,6 @@ TESTS=${TESTS/row_level_security/}
TESTS=${TESTS/sqli_connection/}
dropdb --if-exist contrib_regression
createdb contrib_regression
. ../alter_db.sh
psql -v ON_ERROR_STOP=1 -f test/fixtures.sql -d contrib_regression
${REGRESS} --use-existing --dbname=contrib_regression --inputdir=${TESTDIR} ${TESTS}

View File

@@ -3,7 +3,6 @@ set -ex
cd "$(dirname "${0}")"
dropdb --if-exist contrib_regression
createdb contrib_regression
. ../alter_db.sh
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --load-extension=vector --load-extension=rag --dbname=contrib_regression basic_functions text_processing api_keys chunking_functions document_processing embedding_api_functions voyageai_functions

View File

@@ -20,6 +20,5 @@ installcheck: regression-test
regression-test:
dropdb --if-exists contrib_regression
createdb contrib_regression
../alter_db.sh
psql -d contrib_regression -c "CREATE EXTENSION $(EXTNAME)"
$(PG_REGRESS) --inputdir=. --outputdir=. --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -3,7 +3,6 @@ set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
. ../alter_db.sh
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
REGRESS="$(make -n installcheck | awk '{print substr($0,index($0,"init-extension"));}')"
REGRESS="${REGRESS/startup_perms/}"

View File

@@ -11,6 +11,5 @@ PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
installcheck:
dropdb --if-exists contrib_regression
createdb contrib_regression
../alter_db.sh
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag_bge_small_en_v15"
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -11,6 +11,5 @@ PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
installcheck:
dropdb --if-exists contrib_regression
createdb contrib_regression
../alter_db.sh
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag_jina_reranker_v1_tiny_en"
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -3,6 +3,5 @@ set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
. ../alter_db.sh
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression rum rum_hash ruminv timestamp orderby orderby_hash altorder altorder_hash limits int2 int4 int8 float4 float8 money oid time timetz date interval macaddr inet cidr text varchar char bytea bit varbit numeric rum_weight expr array

View File

@@ -5,4 +5,3 @@ listen_http_addr='0.0.0.0:9898'
remote_storage={ endpoint='http://minio:9000', bucket_name='neon', bucket_region='eu-north-1', prefix_in_bucket='/pageserver' }
control_plane_api='http://0.0.0.0:6666' # No storage controller in docker compose, specify a junk address
control_plane_emergency_mode=true
virtual_file_io_mode="buffered" # the CI runners where we run the docker compose tests have slow disks

View File

@@ -82,8 +82,7 @@ EXTENSIONS='[
{"extname": "pg_ivm", "extdir": "pg_ivm-src"},
{"extname": "pgjwt", "extdir": "pgjwt-src"},
{"extname": "pgtap", "extdir": "pgtap-src"},
{"extname": "pg_repack", "extdir": "pg_repack-src"},
{"extname": "h3", "extdir": "h3-pg-src"}
{"extname": "pg_repack", "extdir": "pg_repack-src"}
]'
EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
COMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d

View File

@@ -7,8 +7,6 @@ Author: Christian Schwarz
A brief RFC / GitHub Epic describing a vectored version of the `Timeline::get` method that is at the heart of Pageserver.
**EDIT**: the implementation of this feature is described in [Vlad's (internal) tech talk](https://drive.google.com/file/d/1vfY24S869UP8lEUUDHRWKF1AJn8fpWoJ/view?usp=drive_link).
# Motivation
During basebackup, we issue many `Timeline::get` calls for SLRU pages that are *adjacent* in key space.

View File

@@ -1,194 +0,0 @@
# Bottommost Garbage-Collection Compaction
## Summary
The goal of this doc is to propose a way to reliably collect garbages below the GC horizon. This process is called bottom-most garbage-collect-compaction, and is part of the broader legacy-enhanced compaction that we plan to implement in the future.
## Motivation
The current GC algorithm will wait until the covering via image layers before collecting the garbages of a key region. Relying on image layer generation to generate covering images is not reliable. There are prior arts to generate feedbacks from the GC algorithm to the image generation process to accelerate garbage collection, but it slows down the system and creates write amplification.
# Basic Idea
![](images/036-bottom-most-gc-compaction/01-basic-idea.svg)
The idea of bottom-most compaction is simple: we rewrite all layers that are below or intersect with the GC horizon to produce a flat level of image layers at the GC horizon and deltas above the GC horizon. In this process,
- All images and deltas ≤ GC horizon LSN will be dropped. This process collects garbages.
- We produce images for all keys involved in the compaction process at the GC horizon.
Therefore, it can precisely collect all garbages below the horizon, and reduce the space amplification, i.e., in the staircase pattern (test_gc_feedback).
![The staircase pattern in test_gc_feedback in the original compaction algorithm. The goal is to collect garbage below the red horizontal line.](images/036-bottom-most-gc-compaction/12-staircase-test-gc-feedback.png)
The staircase pattern in test_gc_feedback in the original compaction algorithm. The goal is to collect garbage below the red horizontal line.
# Branches
With branches, the bottom-most compaction should retain a snapshot of the keyspace at the `retain_lsn` so that the child branch can access data at the branch point. This requires some modifications to the basic bottom-most compaction algorithm that we sketched above.
![](images/036-bottom-most-gc-compaction/03-retain-lsn.svg)
## Single Timeline w/ Snapshots: handle `retain_lsn`
First lets look into the case where we create branches over the main branch but dont write any data to them (aka “snapshots”).
The bottom-most compaction algorithm collects all deltas and images of a key and can make decisions on what data to retain. Given that we have a single keys history as below:
```
LSN 0x10 -> A
LSN 0x20 -> append B
retain_lsn: 0x20
LSN 0x30 -> append C
LSN 0x40 -> append D
retain_lsn: 0x40
LSN 0x50 -> append E
GC horizon: 0x50
LSN 0x60 -> append F
```
The algorithm will produce:
```
LSN 0x20 -> AB
(drop all history below the earliest retain_lsn)
LSN 0x40 -> ABCD
(assume the cost of replaying 2 deltas is higher than storing the full image, we generate an image here)
LSN 0x50 -> append E
(replay one delta is cheap)
LSN 0x60 -> append F
(keep everything as-is above the GC horizon)
```
![](images/036-bottom-most-gc-compaction/05-btmgc-parent.svg)
What happens is that we balance the space taken by each retain_lsn and the cost of replaying deltas during the bottom-most compaction process. This is controlled by a threshold. If `count(deltas) < $threshold`, the deltas will be retained. Otherwise, an image will be generated and the deltas will be dropped.
In the example above, the `$threshold` is 2.
## Child Branches with data: pull + partial images
In the previous section we have shown how bottom-most compaction respects `retain_lsn` so that all data that was readable at branch creation remains readable. But branches can have data on their own, and that data can fall out of the branchs PITR window. So, this section explains how we deal with that.
We will run the same bottom-most compaction for these branches, to ensure the space amplification on the child branch is reasonable.
```
branch_lsn: 0x20
LSN 0x30 -> append P
LSN 0x40 -> append Q
LSN 0x50 -> append R
GC horizon: 0x50
LSN 0x60 -> append S
```
Note that bottom-most compaction happens on a per-timeline basis. When it processes this key, it only reads the history from LSN 0x30 without a base image. Therefore, on child branches, the bottom-most compaction process will make image creation decisions based on the same `count(deltas) < $threshold` criteria, and if it decides to create an image, the base image will be retrieved from the ancestor branch.
```
branch_lsn: 0x20
LSN 0x50 -> ABPQR
(we pull the image at LSN 0x20 from the ancestor branch to get AB, and then apply append PQ to the page; we replace the record at 0x40 with an image and drop the delta)
GC horizon: 0x50
LSN 0x60 -> append S
```
![](images/036-bottom-most-gc-compaction/06-btmgc-child.svg)
Note that for child branches, we do not create image layers for the images when bottom-most compaction runs. Instead, we drop the 0x30/0x40/0x50 delta records and directly place the image ABPQR@0x50 into the delta layer, which serves as a sparse image layer. For child branches, if we create image layers, we will need to put all keys in the range into the image layer. This causes space bloat and slow compactions. In this proposal, the compaction process will only compact and process keys modified inside the child branch.
# Result
Bottom-most compaction ensures all garbage under the GC horizon gets collected right away (compared with “eventually” in the current algorithm). Meanwhile, it generates images at each of the retain_lsn to ensure branch reads are fast. As we make per-key decisions on whether to generate an image or not, the theoretical lower bound of the storage space we need to retain for a branch is lower than before.
Before: min(sum(logs for each key), sum(image for each key)), for each partition — we always generate image layers on a key range
After: sum(min(logs for each key, image for each key))
# Compaction Trigger
The bottom-most compaction can be automatically triggered. The goal of the trigger is that it should ensure a constant factor for write amplification. Say that the user write 1GB of WAL into the system, we should write 1GB x C data to S3. The legacy compaction algorithm does not have such a constant factor C. The data we write to S3 is quadratic to the logical size of the database (see [A Theoretical View of Neon Storage](https://www.notion.so/A-Theoretical-View-of-Neon-Storage-8d7ad7555b0c41b2a3597fa780911194?pvs=21)).
We propose the following compaction trigger that generates a constant write amplification factor. Write amplification >= total writes to S3 / total user writes. We only analyze the write amplification caused by the bottom-most GC-compaction process, ignoring the legacy create image layers amplification.
Given that we have ***X*** bytes of the delta layers above the GC horizon, ***A*** bytes of the delta layers intersecting with the GC horizon, ***B*** bytes of the delta layers below the GC horizon, and ***C*** bytes of the image layers below the GC horizon.
The legacy GC + compaction loop will always keep ***A*** unchanged, reduce ***B and C*** when there are image layers covering the key range. This yields 0 write amplification (only file deletions) and extra ***B*** bytes of space.
![](images/036-bottom-most-gc-compaction/09-btmgc-analysis-2.svg)
The bottom-most compaction proposed here will split ***A*** into deltas above the GC horizon and below the GC horizon. Everything below the GC horizon will be image layers after the compaction (not considering branches). Therefore, this yields ***A+C*** extra write traffic each iteration, plus 0 extra space.
![](images/036-bottom-most-gc-compaction/07-btmgc-analysis-1.svg)
Also considering read amplification (below the GC horizon). When a read request reaches the GC horizon, the read amplification will be (A+B+C)/C=1+(A+B)/C. Reducing ***A*** and ***B*** can help reduce the read amplification below the GC horizon.
The metrics-based trigger will wait until a point that space amplification is not that large and write amplification is not that large before the compaction gets triggered. The trigger is defined as **(A+B)/C ≥ 1 (or some other ratio)**.
To reason about this trigger, consider the two cases:
**Data Ingestion**
User keeps ingesting data into the database, which indicates that WAL size roughly equals to the database logical size. The compaction gets triggered only when the newly-written WAL roughly equals to the current bottom-most image size (=X). Therefore, its triggered when the database size gets doubled. This is a reasonable amount of work. Write amplification is 2X/X=1 for the X amount of data written.
![](images/036-bottom-most-gc-compaction/10-btmgc-analysis-3.svg)
**Updates/Deletion**
In this case, WAL size will be larger than the database logical size ***D***. The compaction gets triggered for every ***D*** bytes of WAL written. Therefore, for every ***D*** bytes of WAL, we rewrite the bottom-most layer, which produces an extra ***D*** bytes of write amplification. This incurs exactly 2x write amplification (by the write of D), 1.5x write amplification (if we count from the start of the process) and no space amplification.
![](images/036-bottom-most-gc-compaction/11-btmgc-analysis-4.svg)
Note that here I try to reason that write amplification is a constant (i.e., the data we write to S3 is proportional to the data the user write). The main problem with the current legacy compaction algorithm is that write amplification is proportional to the database size.
The next step is to optimize the write amplification above the GC horizon (i.e., change the image creation criteria, top-most compaction, or introduce tiered compaction), to ensure the write amplification of the whole system is a constant factor.
20GB layers → +20GB layers → delete 20GB, need 40GB temporary space
# Sub-Compactions
The gc-compaction algorithm may take a long time and we need to split the job into multiple sub-compaction jobs.
![](images/036-bottom-most-gc-compaction/13-job-split.svg)
As in the figure, the auto-trigger schedules a compaction job covering the full keyspace below a specific LSN. In such case that we cannot finish compacting it in one run in a reasonable amount of time, the algorithm will vertically split it into multiple jobs (in this case, 5).
Each gc-compaction job will create one level of delta layers and one flat level of image layers for each LSN. Those layers will be automatically split based on size, which means that if the sub-compaction job produces 1GB of deltas, it will produce 4 * 256MB delta layers. For those layers that is not fully contained within the sub-compaction job rectangles, it will be rewritten to only contain the keys outside of the key range.
# Implementation
The main implementation of gc-compaction is in `compaction.rs`.
* `compact_with_gc`: The main loop of gc-compaction. It takes a rectangle range of the layer map and compact that specific range. It selects layers intersecting with the rectangle, downloads the layers, creates the k-merge iterator to read those layers in the key-lsn order, and decide which keys to keep or insert a reconstructed page. The process is the basic unit of a gc-compaction and is not interruptable. If the process gets preempted by L0 compaction, it has to be restarted from scratch. For layers overlaps with the rectangle but not fully inside, the main loop will also rewrite them so that the new layer (or two layers if both left and right ends are outside of the rectangle) has the same LSN range as the original one but only contain the keys outside of the compaction range.
* `gc_compaction_split_jobs`: Splits a big gc-compaction job into sub-compactions based on heuristics in the layer map. The function looks at the layer map and splits the compaction job based on the size of the layers so that each compaction job only pulls ~4GB of layer files.
* `generate_key_retention` and `KeyHistoryRetention`: Implements the algorithm described in the "basic idea" and "branch" chapter of this RFC. It takes a vector of history of a key (key-lsn-value) and decides which LSNs of the key to retain. If there are too many deltas between two retain_lsns, it will reconstruct the page and insert an image into the compaction result. Also, we implement `KeyHistoryRetention::verify` to ensure the generated result is not corrupted -- all retain_lsns and all LSNs above the gc-horizon should be accessible.
* `GcCompactionQueue`: the automatic trigger implementation for gc-compaction. `GcCompactionQueue::iteration` is called at the end of the tenant compaction loop. It will then call `trigger_auto_compaction` to decide whether to trigger a gc-compaction job for this tenant. If yes, the compaction-job will be added to the compaction queue, and the queue will be slowly drained once there are no other compaction jobs running. gc-compaction has the lowest priority. If a sub-compaction job is not successful or gets preempted by L0 compaction (see limitations for reasons why a compaction job would fail), it will _not_ be retried.
* Changes to `index_part.json`: we added a `last_completed_lsn` field to the index part for the auto-trigger to decide when to trigger a compaction.
* Changes to the read path: when gc-compaction updates the layer map, all reads need to wait. See `gc_compaction_layer_update_lock` and comments in the code path for more information.
Gc-compaction can also be scheduled over the HTTP API. Example:
```
curl 'localhost:9898/v1/tenant/:tenant_id/timeline/:timeline_id/compact?enhanced_gc_bottom_most_compaction=true&dry_run=true' -X PUT -H "Content-Type: application/json" -d '{"scheduled": true, "compact_key_range": { "start": "000000067F0000A0000002A1CF0100000000", "end": "000000067F0000A0000002A1D70100000000" } }'
```
The `dry_run` mode can be specified in the query string so that the compaction will go through all layers to estimate how much space can be saved without writing the compaction result into the layer map.
The auto-trigger is controlled by tenant-level flag `gc_compaction_enabled`. If this is set to false, no gc-compaction will be automatically scheduled on this tenant (but manual trigger still works).
# Next Steps
There are still some limitations of gc-compaction itself that needs to be resolved and tested,
- gc-compaction is currently only automatically triggered on root branches. We have not tested gc-compaction on child branches in staging.
- gc-compaction will skip aux key regions because of the possible conflict with the assumption of aux file tombstones.
- gc-compaction does not consider keyspaces at retain_lsns and only look at keys in the layers. This also causes us giving up some sub-compaction jobs because a key might have part of its history available due to traditional GC removing part of the history.
- We limit gc-compaction to run over shards <= 150GB to avoid gc-compaction taking too much time blocking other compaction jobs. The sub-compaction split algorithm needs to be improved to be able to split vertically and horizontally. Also, we need to move the download layer process out of the compaction loop so that we don't block other compaction jobs for too long.
- The compaction trigger always schedules gc-compaction from the lowest LSN to the gc-horizon. Currently we do not schedule compaction jobs that only selects layers in the middle. Allowing this could potentially reduce the number of layers read/write throughout the process.
- gc-compaction will give up if there are too many layers to rewrite or if there are not enough disk space for the compaction.
- gc-compaction sometimes fails with "no key produced during compaction", which means that all existing keys within the compaction range can be collected; but we don't have a way to write this information back to the layer map -- we cannot generate an empty image layer.
- We limit the maximum size of deltas for a single key to 512MB. If above this size, gc-compaction will give up. This can be resolved by changing `generate_key_retention` to be a stream instead of requiring to collect all the key history.
In the future,
- Top-most compaction: ensure we always have an image coverage for the latest data (or near the latest data), so that reads will be fast at the latest LSN.
- Tiered compaction on deltas: ensure read from any LSN is fast.
- Per-timeline compaction → tenant-wide compaction?

View File

@@ -1,362 +0,0 @@
# Direct IO For Pageserver
Date: Apr 30, 2025
## Summary
This document is a retroactive RFC. It
- provides some background on what direct IO is,
- motivates why Pageserver should be using it for its IO, and
- describes how we changed Pageserver to use it.
The [initial proposal](https://github.com/neondatabase/neon/pull/8240) that kicked off the work can be found in this closed GitHub PR.
People primarily involved in this project were:
- Yuchen Liang <yuchen@neon.tech>
- Vlad Lazar <vlad@neon.tech>
- Christian Schwarz <christian@neon.tech>
## Timeline
For posterity, here is the rough timeline of the development work that got us to where we are today.
- Jan 2024: [integrate `tokio-epoll-uring`](https://github.com/neondatabase/neon/pull/5824) along with owned buffers API
- March 2024: `tokio-epoll-uring` enabled in all regions in buffered IO mode
- Feb 2024 to June 2024: PS PageCache Bypass For Data Blocks
- Feb 2024: [Vectored Get Implementation](https://github.com/neondatabase/neon/pull/6576) bypasses delta & image layer blocks for page requests
- Apr to June 2024: [Epic: bypass PageCache for use data blocks](https://github.com/neondatabase/neon/issues/7386) addresses remaining users
- Aug to Nov 2024: direct IO: first code; preliminaries; read path coding; BufferedWriter; benchmarks show perf regressions too high, no-go.
- Nov 2024 to Jan 2025: address perf regressions by developing page_service pipelining (aka batching) and concurrent IO ([Epic](https://github.com/neondatabase/neon/issues/9376))
- Feb to March 2024: rollout batching, then concurrent+direct IO => read path and InMemoryLayer is now direct IO
- Apr 2025: develop & roll out direct IO for the write path
## Background: Terminology & Glossary
**kernel page cache**: the Linux kernel's page cache is a write-back cache for filesystem contents.
The cached unit is memory-page-sized & aligned chunks of the files that are being cached (typically 4k).
The cache lives in kernel memory and is not directly accessible through userspace.
**Buffered IO**: an application's read/write system calls go through the kernel page cache.
For example, a 10 byte sized read or write to offset 5000 in a file will load the file contents
at offset `[4096,8192)` into a free page in the kernel page cache. If necessary, it will evict
a page to make room (cf eviction). Then, the kernel performs a memory-to-memory copy of 10 bytes
from/to the offset `4` (`5000 = 4096 + 4`) within the cached page. If it's a write, the kernel keeps
track of the fact that the page is now "dirty" in some ancillary structure.
**Writeback**: a buffered read/write syscall returns after the memory-to-memory copy. The modifications
made by e.g. write system calls are not even *issued* to disk, let alone durable. Instead, the kernel
asynchronously writes back dirtied pages based on a variety of conditions. For us, the most relevant
ones are a) explicit request by userspace (`fsync`) and b) memory pressure.
**Memory pressure**: the kernel page cache is a best effort service and a user of spare memory capacity.
If there is no free memory, the kernel page allocator will take pages used by page cache to satisfy allocations.
Before reusing a page like that, the page has to be written back (writeback, see above).
The far-reaching consequence of this is that **any allocation of anonymous memory can do IO** if the only
way to get that memory is by eviction & re-using a dirty page cache page.
Notably, this includes a simple `malloc` in userspace, because eventually that boils down to `mmap(..., MAP_ANON, ...)`.
I refer to this effect as the "malloc latency backscatter" caused by buffered IO.
**Direct IO** allows application's read/write system calls to bypass the kernel page cache. The filesystem
is still involved because it is ultimately in charge of mapping the concept of files & offsets within them
to sectors on block devices. Typically, the filesystem poses size and alignment requirements for memory buffers
and file offsets (statx `Dio_mem_align` / `Dio_offset_align`), see [this gist](https://gist.github.com/problame/1c35cac41b7cd617779f8aae50f97155).
The IO operations will fail at runtime with EINVAL if the alignment requirements are not met.
**"buffered" vs "direct"**: the central distinction between buffered and direct IO is about who allocates and
fills the IO buffers, and who controls when exactly the IOs are issued. In buffered IO, it's the syscall handlers,
kernel page cache, and memory management subsystems (cf "writeback"). In direct IO, all of it is done by
the application.
It takes more effort by the application to program with direct instead of buffered IO.
The return is precise control over and a clear distinction between consumption/modification of memory vs disk.
**Pageserver PageCache**: Pageserver has an additional `PageCache` (referred to as PS PageCache from here on, as opposed to "kernel page cache").
Its caching unit is 8KiB blocks of the layer files written by Pageserver.
A miss in PageCache is filled by reading from the filesystem, through the `VirtualFile` abstraction layer.
The default size is tiny (64MiB), very much like Postgres's `shared_buffers`.
We ran production at 128MiB for a long time but gradually moved it up to 2GiB over the past ~year.
**VirtualFile** is Pageserver's abstraction for file IO, very similar to the facility in Postgres that bears the same name.
Its historical purpose appears to be working around open file descriptor limitations, which is practically irrelevant on Linux.
However, the facility in Pageserver is useful as an intermediary layer for metrics and abstracts over the different kinds of
IO engines that Pageserver supports (`std-fs` vs `tokio-epoll-uring`).
## Background: History Of Caching In Pageserver
For multiple years, Pageserver's `PageCache` was on the path of all read _and write_ IO.
It performed write-back to the kernel using buffered IO.
We converted it into a read-only cache of immutable data in [PR 4994](https://github.com/neondatabase/neon/pull/4994).
The introduction of `tokio-epoll-uring` required converting the code base to used owned IO buffers.
The `PageCache` pages are usable as owned IO buffers.
We then started bypassing PageCache for user data blocks.
Data blocks are the 8k blocks of data in layer files that hold the multiple `Value`s, as opposed to the disk btree index blocks that tell us which values exist in a file at what offsets.
The disk btree embedded in delta & image layers remains `PageCache`'d.
Epics for that work were:
- Vectored `Timeline::get` (cf RFC 30) skipped delta and image layer data block `PageCache`ing outright.
- Epic https://github.com/neondatabase/neon/issues/7386 took care of the remaining users for data blocks:
- Materialized page cache (cached materialized pages; shown to be ~0% hit rate in practice)
- InMemoryLayer
- Compaction
The outcome of the above:
1. All data blocks are always read through the `VirtualFile` APIs, hitting the kernel buffered read path (=> kernel page cache).
2. Indirect blocks (=disk btree blocks) would be cached in the PS `PageCache`.
In production we size the PS `PageCache` to be 2GiB.
Thus drives hit rate up to ~99.95% and the eviction rate / replacement rates down to less than 200/second on a 1-minute average, on the busiest machines.
High baseline replacement rates are treated as a signal of resource exhaustion (page cache insufficient to host working set of the PS).
The response to this is to migrate tenants away, or increase PS `PageCache` size.
It is currently manual but could be automated, e.g., in Storage Controller.
In the future, we may eliminate the `PageCache` even for indirect blocks.
For example with an LRU cache that has as unit the entire disk btree content
instead of individual blocks.
## High-Level Design
So, before work on this project started, all data block reads and the entire write path of Pageserver were using kernel-buffered IO, i.e., the kernel page cache.
We now want to get the kernel page cache out of the picture by using direct IO for all interaction with the filesystem.
This achieves the following system properties:
**Predictable VirtualFile latencies**
* With buffered IO, reads are sometimes fast, sometimes slow, depending on kernel page cache hit/miss.
* With buffered IO, appends when writing out new layer files during ingest or compaction are sometimes fast, sometimes slow because of write-back backpressure.
* With buffered IO, the "malloc backscatter" phenomenon pointed out in the Glossary section is not something we actively observe.
But we do have occasional spikes in Dirty memory amount and Memory PSI graphs, so it may already be affecting to some degree.
* By switching to direct IO, above operations will have the (predictable) device latency -- always.
Reads and appends always go to disk.
And malloc will not have to write back dirty data.
**Explicitness & Tangibility of resource usage**
* In a multi-tenant system, it is generally desirable and valuable to be *explicit* about the main resources we use for each tenant.
* By using direct IO, we become explicit about the resources *disk IOPs* and *memory capacity* in a way that was previously being conflated through the kernel page cache, outside our immediate control.
* We will be able to build per-tenant observability of resource usage ("what tenant is causing the actual IOs that are sent to the disk?").
* We will be able to build accounting & QoS by implementing an IO scheduler that is tenant aware. The kernel is not tenant-aware and can't do that.
**CPU Efficiency**
* The involvement of the kernel page cache means one additional memory-to-memory copy on read and write path.
* Direct IO will eliminate that memory-to-memory copy, if we can make the userspace buffers used for the IO calls satisfy direct IO alignment requirements.
The **trade-off** is that we no longer get the theoretical benefits of the kernel page cache. These are:
- read latency improvements for repeat reads of the same data ("locality of reference")
- asterisk: only if that state is still cache-resident by time of next access
- write throughput by having kernel page cache batch small VFS writes into bigger disk writes
- asterisk: only if memory pressure is low enough that the kernel can afford to delay writeback
We are **happy to make this trade-off**:
- Because of the advantages listed above.
- Because we empirically have enough DRAM on Pageservers to serve metadata (=index blocks) from PS PageCache.
(At just 2GiB PS PageCache size, we average a 99.95% hit rate).
So, the latency of going to disk is only for data block reads, not the index traversal.
- Because **the kernel page cache is ineffective** at high tenant density anyway (#tenants/pageserver instance).
And because dense packing of tenants will always be desirable to drive COGS down, we should design the system for it.
(See the appendix for a more detailed explanation why this is).
- So, we accept that some reads that used to be fast by circumstance will have higher but **predictable** latency than before.
### Desired End State
The desired end state of the project is as follows, and with some asterisks, we have achieved it.
All IOs of the Pageserver data path use direct IO, thereby bypassing the kernel page cache.
In particular, the "data path" includes
- the wal ingest path
- compaction
- anything on the `Timeline::get` / `Timeline::get_vectored` path.
The production Pageserver config is tuned such that virtually all non-data blocks are cached in the PS PageCache.
Hit rate target is 99.95%.
There are no regressions to ingest latency.
The total "wait-for-disk time" contribution to random getpage request latency is `O(1 read IOP latency)`.
We accomplish that by having a near 100% PS PageCache hit rate so that layer index traversal effectively never needs not wait for IO.
Thereby, it can issue all the data blocks as it traverses the index, and only wait at the end of it (concurrent IO).
The amortized "wait-for-disk time" contribution of this direct IO proposal to a series of sequential getpage requests is `1/32 * read IOP latency` for each getpage request.
We accomplish this by server-side batching of up to 32 reads into a single `Timeline::get_vectored` call.
(This is an ideal world where our batches are full - that's not the case in prod today because of lack of queue depth).
## Design & Implementation
### Prerequisites
A lot of prerequisite work had to happen to enable use of direct IO.
To meet the "wait-for-disk time" requirements from the DoD, we implement for the read path:
- page_service level server-side batching (config field `page_service_pipelining`)
- concurrent IO (config field `get_vectored_concurrent_io`)
The work for both of these these was tracked [in the epic](https://github.com/neondatabase/neon/issues/9376).
Server-side batching will likely be obsoleted by the [#proj-compute-communicator](https://github.com/neondatabase/neon/pull/10799).
The Concurrent IO work is described in retroactive RFC `2025-04-30-pageserver-concurrent-io-on-read-path.md`.
The implementation is relatively brittle and needs further investment, see the `Future Work` section in that RFC.
For the write path, and especially WAL ingest, we need to hide write latency.
We accomplish this by implementing a (`BufferedWriter`) type that does double-buffering: flushes of the filled
buffer happen in a sidecar tokio task while new writes fill a new buffer.
We refactor InMemoryLayer as well as BlobWriter (=> delta and image layer writers) to use this new `BufferedWriter`.
The most comprehensive write-up of this work is in [the PR description](https://github.com/neondatabase/neon/pull/11558).
### Ensuring Adherence to Alignment Requirements
Direct IO puts requirements on
- memory buffer alignment
- io size (=memory buffer size)
- file offset alignment
The requirements are specific to a combination of filesystem/block-device/architecture(hardware page size!).
In Neon production environments we currently use ext4 with Linux 6.1.X on AWS and Azure storage-optimized instances (locally attached NVMe).
Instead of dynamic discovery using `statx`, we statically hard-code 512 bytes as the buffer/offset alignment and size-multiple.
We made this decision because:
- a) it is compatible with all the environments we need to run in
- b) our primary workload can be small-random-read-heavy (we do merge adjacent reads if possible, but the worst case is that all `Value`s that needs to be read are far apart)
- c) 512-byte tail latency on the production instance types is much better than 4k (p99.9: 3x lower, p99.99 5x lower).
- d) hard-coding at compile-time allows us to use the Rust type system to enforce the use of only aligned IO buffers, eliminating a source of runtime errors typically associated with direct IO.
This was [discussed here](https://neondb.slack.com/archives/C07BZ38E6SD/p1725036790965549?thread_ts=1725026845.455259&cid=C07BZ38E6SD).
The new `IoBufAligned` / `IoBufAlignedMut` marker traits indicate that a given buffer meets memory alignment requirements.
All `VirtualFile` APIs and several software layers built on top of them only accept buffers that implement those traits.
Implementors of the marker traits are:
- `IoBuffer` / `IoBufferMut`: used for most reads and writes
- `PageWriteGuardBuf`: for filling PS PageCache pages (index blocks!)
The alignment requirement is infectious; it permeates bottom-up throughout the code base.
We stop the infection at roughly the same layers in the code base where we stopped permeating the
use of owned-buffers-style API for tokio-epoll-uring. The way the stopping works is by introducing
a memory-to-memory copy from/to some unaligned memory location on the stack/current/heap.
The places where we currently stop permeating are sort of arbitrary. For example, it would probably
make sense to replace more usage of `Bytes` that we know holds 8k pages with 8k-sized `IoBuffer`s.
The `IoBufAligned` / `IoBufAlignedMut` types do not protect us from the following types of runtime errors:
- non-adherence to file offset alignment requirements
- non-adherence to io size requirements
The following higher-level constructs ensure we meet the requirements:
- read path: the `ChunkedVectoredReadBuilder` and `mod vectored_dio_read` ensure reads happen at aligned offsets and in appropriate size multiples.
- write path: `BufferedWriter` only writes in multiples of the capacity, at offsets that are `start_offset+N*capacity`; see its doc comment.
Note that these types are used always, regardless of whether direct IO is enabled or not.
There are some cases where this adds unnecessary overhead to buffered IO (e.g. all memcpy's inflated to multiples of 512).
But we could not identify meaningful impact in practice when we shipped these changes while we were still using buffered IO.
### Configuration / Feature Flagging
In the previous section we described how all users of VirtualFile were changed to always adhere to direct IO alignment and size-multiple requirements.
To actually enable direct IO, all we need to do is set the `O_DIRECT` flag in `open` syscalls / io_uring operations.
We set `O_DIRECT` based on:
- the VirtualFile API used to create/open the VirtualFile instance
- the `virtual_file_io_mode` configuration flag
- the OpenOptions `read` and/or `write` flags.
The VirtualFile APIs suffixed with `_v2` are the only ones that _may_ open with `O_DIRECT` depending on the other two factors in above list.
Other APIs never use `O_DIRECT`.
(The name is bad and should really be `_maybe_direct_io`.)
The reason for having new APIs is because all code used VirtualFile but implementation and rollout happened in consecutive phases (read path, InMemoryLayer, write path).
At the VirtualFile level, context on whether an instance of VirtualFile is on read path, InMemoryLayer, or write path is not available.
The `_v2` APIs then check make the decision to set `O_DIRECT` based on the `virtual_file_io_mode` flag and the OpenOptions `read`/`write` flags.
The result is the following runtime behavior:
|what|OpenOptions|`v_f_io_mode`<br/>=`buffered`|`v_f_io_mode`<br/>=`direct`|`v_f_io_mode`<br/>=`direct-rw`|
|-|-|-|-|-|
|`DeltaLayerInner`|read|()|O_DIRECT|O_DIRECT|
|`ImageLayerInner`|read|()|O_DIRECT|O_DIRECT|
|`InMemoryLayer`|read + write|()|()*|O_DIRECT|
|`DeltaLayerWriter`| write | () | () | O_DIRECT |
|`ImageLayerWriter`| write | () | () | O_DIRECT |
|`download_layer_file`|write |()|()|O_DIRECT|
The `InMemoryLayer` is marked with `*` because there was a period when it *did* use O_DIRECT under `=direct`.
That period was when we implemented and shipped the first version of `BufferedWriter`.
We used it in `InMemoryLayer` and `download_layer_file` but it was only sensitive to `v_f_io_mode` in `InMemoryLayer`.
The introduction of `=direct-rw`, and the switch of the remaining write path to `BufferedWriter`, happened later,
in https://github.com/neondatabase/neon/pull/11558.
Note that this way of feature flagging inside VirtualFile makes it less and less a general purpose POSIX file access abstraction.
For example, with `=direct-rw` enabled, it is no longer possible to open a `VirtualFile` without `O_DIRECT`. It'll always be set.
## Correctness Validation
The correctness risks with this project were:
- Memory safety issues in the `IoBuffer` / `IoBufferMut` implementation.
These types expose an API that is largely identical to that of the `bytes` crate and/or Vec.
- Runtime errors (=> downtime / unavailability) because of non-adherence to alignment/size-multiple requirements, resulting in EINVAL on the read path.
We sadly do not have infrastructure to run pageserver under `cargo miri`.
So for memory safety issues, we relied on careful peer review.
We do assert the production-like alignment requirements in testing builds.
However, these asserts were added retroactively.
The actual validation before rollout happened in staging and pre-prod.
We eventually enabled `=direct`/`=direct-rw` for Rust unit tests and the regression test suite.
I cannot recall a single instance of staging/pre-prod/production errors caused by non-adherence to alignment/size-multiple requirements.
Evidently developer testing was good enough.
## Performance Validation
The read path went through a lot of iterations of benchmarking in staging and pre-prod.
The benchmarks in those environments demonstrated performance regressions early in the implementation.
It was actually this performance testing that made us implement batching and concurrent IO to avoid unacceptable regressions.
The write path was much quicker to validate because `bench_ingest` covered all of the (less numerous) access patterns.
## Future Work
There is minor and major follow-up work that can be considered in the future.
Check the (soon-to-be-closed) Epic https://github.com/neondatabase/neon/issues/8130's "Follow-Ups" section for a current list.
Read Path:
- PS PageCache hit rate is crucial to unlock concurrent IO and reasonable latency for random reads generally.
Instead of reactively sizing PS PageCache, we should estimate the required PS PageCache size
and potentially also use that to drive placement decisions of shards from StorageController
https://github.com/neondatabase/neon/issues/9288
- ... unless we get rid of PS PageCache entirely and cache the index block in a more specialized cache.
But even then, an estimation of the working set would be helpful to figure out caching strategy.
Write Path:
- BlobWriter and its users could switch back to a borrowed API https://github.com/neondatabase/neon/issues/10129
- ... unless we want to implement bypass mode for large writes https://github.com/neondatabase/neon/issues/10101
- The `TempVirtualFile` introduced as part of this project could internalize more of the common usage pattern: https://github.com/neondatabase/neon/issues/11692
- Reduce conditional compilation around `virtual_file_io_mode`: https://github.com/neondatabase/neon/issues/11676
Both:
- A performance simulation mode that pads VirtualFile op latencies to typical NVMe latencies, even if the underlying storage is faster.
This would avoid misleadingly good performance on developer systems and in benchmarks on systems that are less busy than production hosts.
However, padding latencies at microsecond scale is non-trivial.
Misc:
- We should finish trimming VirtualFile's scope to be truly limited to core data path read & write.
Abstractions for reading & writing pageserver config, location config, heatmaps, etc, should use
APIs in a different package (`VirtualFile::crashsafe_overwrite` and `VirtualFile::read_to_string`
are good entrypoints for cleanup.) https://github.com/neondatabase/neon/issues/11809
# Appendix
## Why Kernel Page Cache Is Ineffective At Tenant High Density
In the Motivation section, we stated:
> - **The kernel page cache ineffective** at high tenant density anyways (#tenants/pageserver instance).
The reason is that the Pageserver workload sent from Computes is whatever is a Compute cache(s) miss.
That's either sequential scans or random reads.
A random read workload simply causes cache thrashing because a packed Pageserver NVMe drive (`im4gn.2xlarge`) has ~100x more capacity than DRAM available.
It is complete waste to have the kernel page cache cache data blocks in this case.
Sequential read workloads *can* benefit iff those pages have been updated recently (=no image layer yet) and together in time/LSN space.
In such cases, the WAL records of those updates likely sit on the same delta layer block.
When Compute does a sequential scan, it sends a series of single-page requests for these individual pages.
When Pageserver processes the second request in such a series, it goes to the same delta layer block and have a kernel page cache hit.
This dependence on kernel page cache for sequential scan performance is significant, but the solution is at a higher level than generic data block caching.
We can either add a small per-connection LRU cache for such delta layer blocks.
Or we can merge those sequential requests into a larger vectored get request, which is designed to never read a block twice.
This amortizes the read latency for our delta layer block across the vectored get batch size (which currently is up to 32).
There are Pageserver-internal workloads that do sequential access (compaction, image layer generation), but these
1. are not latency-critical and can do batched access outside of the `page_service` protocol constraints (image layer generation)
2. don't actually need to reconstruct images and therefore can use totally different access methods (=> compaction can use k-way merge iterators with their own internal buffering / prefetching).

View File

@@ -1,251 +0,0 @@
# Concurrent IO for Pageserver Read Path
Date: May 6, 2025
## Summary
This document is a retroactive RFC on the Pageserver Concurrent IO work that happened in late 2024 / early 2025.
The gist of it is that Pageserver's `Timeline::get_vectored` now _issues_ the data block read operations against layer files
_as it traverses the layer map_ and only _wait_ once, for all of them, after traversal is complete.
Assuming a good PS PageCache hits on the index blocks during traversal, this drives down the "wait-for-disk" time
contribution down from `random_read_io_latency * O(number_of_values)` to `random_read_io_latency * O(1 + traversal)`.
The motivation for why this work had to happen when it happened was the switch of Pageserver to
- not cache user data blocks in PS PageCache and
- switch to use direct IO.
More context on this are given in complimentary RFC `./rfcs/2025-04-30-direct-io-for-pageserver.md`.
### Refs
- Epic: https://github.com/neondatabase/neon/issues/9378
- Prototyping happened during the Lisbon 2024 Offsite hackathon: https://github.com/neondatabase/neon/pull/9002
- Main implementation PR with good description: https://github.com/neondatabase/neon/issues/9378
Design and implementation by:
- Vlad Lazar <vlad@neon.tech>
- Christian Schwarz <christian@neon.tech>
## Background & Motivation
The Pageserver read path (`Timeline::get_vectored`) consists of two high-level steps:
- Retrieve the delta and image `Value`s required to reconstruct the requested Page@LSN (`Timeline::get_values_reconstruct_data`).
- Pass these values to walredo to reconstruct the page images.
The read path used to be single-key but has been made multi-key some time ago.
([Internal tech talk by Vlad](https://drive.google.com/file/d/1vfY24S869UP8lEUUDHRWKF1AJn8fpWoJ/view?usp=drive_link))
However, for simplicity, most of this doc will explain things in terms of a single key being requested.
The `Value` retrieval step above can be broken down into the following functions:
- **Traversal** of the layer map to figure out which `Value`s from which layer files are required for the page reconstruction.
- **Read IO Planning**: planning of the read IOs that need to be issued to the layer files / filesystem / disk.
The main job here is to coalesce the small value reads into larger filesystem-level read operations.
This layer also takes care of direct IO alignment and size-multiple requirements (cf the RFC for details.)
Check `struct VectoredReadPlanner` and `mod vectored_dio_read` for how it's done.
- **Perform the read IO** using `tokio-epoll-uring`.
Before this project, above functions were sequentially interleaved, meaning:
1. we would advance traversal, ...
2. discover, that we need to read a value, ...
3. read it from disk using `tokio-epoll-uring`, ...
4. goto 1 unless we're done.
This meant that if N `Value`s need to be read to reconstruct a page,
the time we spend waiting for disk will be we `random_read_io_latency * O(number_of_values)`.
## Design
The **traversal** and **read IO Planning** jobs still happen sequentially, layer by layer, as before.
But instead of performing the read IOs inline, we submit the IOs to a concurrent tokio task for execution.
After the last read from the last layer is submitted, we wait for the IOs to complete.
Assuming the filesystem / disk is able to actually process the submitted IOs without queuing,
we arrive at _time spent waiting for disk_ ~ `random_read_io_latency * O(1 + traversal)`.
Note this whole RFC is concerned with the steady state where all layer files required for reconstruction are resident on local NVMe.
Traversal will stall on on-demand layer download if a layer is not yet resident.
It cannot proceed without the layer being resident beccause its next step depends on the contents of the layer index.
### Avoiding Waiting For IO During Traversal
The `traversal` component in above time-spent-waiting-for-disk estimation is dominant and needs to be minimized.
Before this project, traversal needed to perform IOs for the following:
1. The time we are waiting on PS PageCache to page in the visited layers' disk btree index blocks.
2. When visiting a delta layer, reading the data block that contains a `Value` for a requested key,
to determine whether the `Value::will_init` the page and therefore traversal can stop for this key.
The solution for (1) is to raise the PS PageCache size such that the hit rate is practically 100%.
(Check out the `Background: History Of Caching In Pageserver` section in the RFC on Direct IO for more details.)
The solution for (2) is source `will_init` from the disk btree index keys, which fortunately
already encode this bit of information since the introduction of the current storage/layer format.
### Concurrent IOs, Submission & Completion
To separate IO submission from waiting for its completion,
we introduce the notion of an `IoConcurrency` struct through which IOs are issued.
An IO is an opaque future that
- captures the `tx` side of a `oneshot` channel
- performs the read IO by calling `VirtualFile::read_exact_at().await`
- sending the result into the `tx`
Issuing an IO means `Box`ing the future above and handing that `Box` over to the `IoConcurrency` struct.
The traversal code that submits the IO stores the the corresponding `oneshot::Receiver`
in the `VectoredValueReconstructState`, in the the place where we previously stored
the sequentially read `img` and `records` fields.
When we're done with traversal, we wait for all submitted IOs:
for each key, there is a future that awaits all the `oneshot::Receiver`s
for that key, and then calls into walredo to reconstruct the page image.
Walredo is now invoked concurrently for each value instead of sequentially.
Walredo itself remains unchanged.
The spawned IO futures are driven to completion by a sidecar tokio task that
is separate from the task that performs all the layer visiting and spawning of IOs.
That tasks receives the IO futures via an unbounded mpsc channel and
drives them to completion inside a `FuturedUnordered`.
### Error handling, Panics, Cancellation-Safety
There are two error classes during reconstruct data retrieval:
* traversal errors: index lookup, move to next layer, and the like
* value read IO errors
A traversal error fails the entire `get_vectored` request, as before this PR.
A value read error only fails reconstruction of that value.
Panics and dropping of the `get_vectored` future before it completes
leaves the sidecar task running and does not cancel submitted IOs
(see next section for details on sidecar task lifecycle).
All of this is safe, but, today's preference in the team is to close out
all resource usage explicitly if possible, rather than cancelling + forgetting
about it on drop. So, there is warning if we drop a
`VectoredValueReconstructState`/`ValuesReconstructState` that still has uncompleted IOs.
### Sidecar Task Lifecycle
The sidecar tokio task is spawned as part of the `IoConcurrency::spawn_from_conf` struct.
The `IoConcurrency` object acts as a handle through which IO futures are submitted.
The spawned tokio task holds the `Timeline::gate` open.
It is _not_ sensitive to `Timeline::cancel`, but instead to the `IoConcurrency` object being dropped.
Once the `IoConcurrency` struct is dropped, no new IO futures can come in
but already submitted IO futures will be driven to completion regardless.
We _could_ safely stop polling these futures because `tokio-epoll-uring` op futures are cancel-safe.
But the underlying kernel and hardware resources are not magically freed up by that.
So, again, in the interest of closing out all outstanding resource usage, we make timeline shutdown wait for sidecar tasks and their IOs to complete.
Under normal conditions, this should be in the low hundreds of microseconds.
It is advisable to make the `IoConcurrency` as long-lived as possible to minimize the amount of
tokio task churn (=> lower pressure on tokio). Generally this means creating it "high up" in the call stack.
The pain with this is that the `IoConcurrency` reference needs to be propagated "down" to
the (short-lived) functions/scope where we issue the IOs.
We would like to use `RequestContext` for this propagation in the future (issue [here](https://github.com/neondatabase/neon/issues/10460)).
For now, we just add another argument to the relevant code paths.
### Feature Gating
The `IoConcurrency` is an `enum` with two variants: `Sequential` and `SidecarTask`.
The behavior from before this project is available through `IoConcurrency::Sequential`,
which awaits the IO futures in place, without "spawning" or "submitting" them anywhere.
The `get_vectored_concurrent_io` pageserver config variable determines the runtime value,
**except** for the places that use `IoConcurrency::sequential` to get an `IoConcurrency` object.
### Alternatives Explored & Caveats Encountered
A few words on the rationale behind having a sidecar *task* and what
alternatives were considered but abandoned.
#### Why We Need A Sidecar *Task* / Why Just `FuturesUnordered` Doesn't Work
We explored to not have a sidecar task, and instead have a `FuturesUnordered` per
`Timeline::get_vectored`. We would queue all IO futures in it and poll it for the
first time after traversal is complete (i.e., at `collect_pending_ios`).
The obvious disadvantage, but not showstopper, is that we wouldn't be submitting
IOs until traversal is complete.
The showstopper however, is that deadlocks happen if we don't drive the
IO futures to completion independently of the traversal task.
The reason is that both the IO futures and the traversal task may hold _some_,
_and_ try to acquire _more_, shared limited resources.
For example, both the travseral task and IO future may try to acquire
* a `VirtualFile` file descriptor cache slot async mutex (observed during impl)
* a `tokio-epoll-uring` submission slot (observed during impl)
* a `PageCache` slot (currently this is not the case but we may move more code into the IO futures in the future)
#### Why We Don't Do `tokio::task`-per-IO-future
Another option is to spawn a short-lived `tokio::task` for each IO future.
We implemented and benchmarked it during development, but found little
throughput improvement and moderate mean & tail latency degradation.
Concerns about pressure on the tokio scheduler led us to abandon this variant.
## Future Work
In addition to what is listed here, also check the "Punted" list in the epic:
https://github.com/neondatabase/neon/issues/9378
### Enable `Timeline::get`
The only major code path that still uses `IoConcurrency::sequential` is `Timeline::get`.
The impact is that roughly the following parts of pageserver do not benefit yet:
- parts of basebackup
- reads performed by the ingest path
- most internal operations that read metadata keys (e.g. `collect_keyspace`!)
The solution is to propagate `IoConcurrency` via `RequestContext`:https://github.com/neondatabase/neon/issues/10460
The tricky part is to figure out at which level of the code the `IoConcurrency` is spawned (and added to the RequestContext).
Also, propagation via `RequestContext` makes makes it harder to tell during development whether a given
piece of code uses concurrent vs sequential mode: one has to recurisvely walk up the call tree to find the
place that puts the `IoConcurrency` into the `RequestContext`.
We'd have to use `::Sequential` as the conservative default value in a fresh `RequestContext`, and add some
observability to weed out places that fail to enrich with a properly spanwed `IoConcurrency::spawn_from_conf`.
### Concurrent On-Demand Downloads enabled by Detached Indices
As stated earlier, traversal stalls on on-demand download because its next step depends on the contents of the layer index.
Once we have separated indices from data blocks (=> https://github.com/neondatabase/neon/issues/11695)
we will only need to stall if the index is not resident. The download of the data blocks can happen concurrently or in the background. For example:
- Move the `Layer::get_or_maybe_download().await` inside the IO futures.
This goes in the opposite direction of the next "future work" item below, but it's easy to do.
- Serve the IO future directly from object storage and dispatch the layer download
to some other actor, e.g., an actor that is responsible for both downloads & eviction.
### New `tokio-epoll-uring` API That Separates Submission & Wait-For-Completion
Instead of `$op().await` style API, it would be useful to have a different `tokio-epoll-uring` API
that separates enqueuing (without necessarily `io_uring_enter`ing the kernel each time), submission,
and then wait for completion.
The `$op().await` API is too opaque, so we _have_ to stuff it into a `FuturesUnordered`.
A split API as sketched above would allow traversal to ensure an IO operation is enqueued to the kernel/disk (and get back-pressure iff the io_uring squeue is full).
While avoiding spending of CPU cycles on processing of completions while we're still traversing.
The idea gets muddied by the fact that we may self-deadlock if we submit too much without completing.
So, the submission part of the split API needs to process completions if squeue is full.
In any way, this split API is precondition for the bigger issue with the design presented here,
which we dicsuss in the next section.
### Opaque Futures Are Brittle
The use of opaque futures to represent submitted IOs is a clever hack to minimize changes & allow for near-perfect feature-gating.
However, we take on **brittleness** because callers must guarantee that the submitted futures are independent.
By our experience, it is non-trivial to identify or rule out the interdependencies.
See the lengthy doc comment on the `IoConcurrency::spawn_io` method for more details.
The better interface and proper subsystem boundary is a _descriptive_ struct of what needs to be done ("read this range from this VirtualFile into this buffer")
and get back a means to wait for completion.
The subsystem can thereby reason by its own how operations may be related;
unlike today, where the submitted opaque future can do just about anything.

View File

@@ -1,135 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="82 284 863 375" width="863" height="375">
<defs/>
<g id="01-basic-idea" stroke-opacity="1" stroke-dasharray="none" stroke="none" fill="none" fill-opacity="1">
<title>01-basic-idea</title>
<rect fill="white" x="82" y="284" width="863" height="375"/>
<g id="01-basic-idea_Layer_1">
<title>Layer 1</title>
<g id="Graphic_2">
<rect x="234" y="379.5" width="203.5" height="17.5" fill="white"/>
<rect x="234" y="379.5" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_3">
<rect x="453.5" y="379.5" width="203.5" height="17.5" fill="white"/>
<rect x="453.5" y="379.5" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_4">
<rect x="672.5" y="379.5" width="203.5" height="17.5" fill="white"/>
<rect x="672.5" y="379.5" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_5">
<rect x="234" y="288.5" width="127" height="77.5" fill="white"/>
<rect x="234" y="288.5" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_6">
<rect x="375" y="288.5" width="127" height="77.5" fill="white"/>
<rect x="375" y="288.5" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_7">
<rect x="516" y="288.5" width="127" height="77.5" fill="white"/>
<rect x="516" y="288.5" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_8">
<rect x="657" y="288.5" width="127" height="77.5" fill="white"/>
<rect x="657" y="288.5" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_9">
<rect x="798" y="288.5" width="78" height="77.5" fill="white"/>
<rect x="798" y="288.5" width="78" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_11">
<line x1="185.5" y1="326.75" x2="943.7734" y2="326.75" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_12">
<text transform="translate(87 318.026)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_13">
<text transform="translate(106.41 372.886)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="8.39" y="15" xml:space="preserve">Images </tspan>
<tspan font-family="Helvetica Neue" font-size="10" fill="black" x="29132252e-19" y="28.447998" xml:space="preserve">at earlier LSN</tspan>
</text>
</g>
<g id="Graphic_14">
<text transform="translate(121.92 289.578)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="8739676e-19" y="15" xml:space="preserve">Deltas</tspan>
</text>
</g>
<g id="Graphic_15">
<path d="M 517.125 423.5 L 553.375 423.5 L 553.375 482 L 571.5 482 L 535.25 512 L 499 482 L 517.125 482 Z" fill="white"/>
<path d="M 517.125 423.5 L 553.375 423.5 L 553.375 482 L 571.5 482 L 535.25 512 L 499 482 L 517.125 482 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_26">
<rect x="234" y="599.474" width="203.5" height="17.5" fill="white"/>
<rect x="234" y="599.474" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_25">
<rect x="453.5" y="599.474" width="203.5" height="17.5" fill="white"/>
<rect x="453.5" y="599.474" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_24">
<rect x="672.5" y="599.474" width="203.5" height="17.5" fill="white"/>
<rect x="672.5" y="599.474" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_23">
<rect x="234" y="533" width="127" height="52.974" fill="white"/>
<rect x="234" y="533" width="127" height="52.974" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_22">
<rect x="375" y="533" width="310.5" height="52.974" fill="white"/>
<rect x="375" y="533" width="310.5" height="52.974" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_21">
<rect x="702.5" y="533" width="173.5" height="52.974" fill="white"/>
<rect x="702.5" y="533" width="173.5" height="52.974" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_18">
<line x1="185.5" y1="607.724" x2="943.7734" y2="607.724" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_16">
<text transform="translate(121.92 538)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="8739676e-19" y="15" xml:space="preserve">Deltas</tspan>
</text>
</g>
<g id="Graphic_27">
<text transform="translate(114.8 592.86)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="3488765e-18" y="15" xml:space="preserve">Images </tspan>
<tspan font-family="Helvetica Neue" font-size="10" fill="black" x="4.01" y="28.447998" xml:space="preserve">at GC LSN</tspan>
</text>
</g>
<g id="Graphic_28">
<rect x="243.06836" y="300" width="624.3633" height="17.5" fill="#c0ffc0"/>
<text transform="translate(248.06836 301.068)" fill="black">
<tspan font-family="Helvetica Neue" font-size="13" fill="black" x="233.52364" y="12" xml:space="preserve">Deltas above GC Horizon</tspan>
</text>
</g>
<g id="Graphic_30">
<rect x="243.06836" y="335.5" width="624.3633" height="17.5" fill="#c0ffff"/>
<text transform="translate(248.06836 336.568)" fill="black">
<tspan font-family="Helvetica Neue" font-size="13" fill="black" x="233.89414" y="12" xml:space="preserve">Deltas below GC Horizon</tspan>
</text>
</g>
<g id="Graphic_32">
<rect x="243.06836" y="550.737" width="624.3633" height="17.5" fill="#c0ffc0"/>
<text transform="translate(248.06836 551.805)" fill="black">
<tspan font-family="Helvetica Neue" font-size="13" fill="black" x="233.52364" y="12" xml:space="preserve">Deltas above GC Horizon</tspan>
</text>
</g>
<g id="Graphic_33">
<rect x="304" y="630.474" width="485.5" height="28.447998" fill="#c0ffff"/>
<text transform="translate(309 637.016)" fill="black">
<tspan font-family="Helvetica Neue" font-size="13" fill="black" x="63.095" y="12" xml:space="preserve">Deltas and image below GC Horizon gets garbage-collected</tspan>
</text>
</g>
<g id="Graphic_34">
<text transform="translate(576.5 444.0325)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="0" y="11" xml:space="preserve">WAL replay of deltas+image below GC Horizon</tspan>
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="0" y="25.336" xml:space="preserve">Reshuffle deltas</tspan>
</text>
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 8.1 KiB

View File

@@ -1,141 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="-104 215 863 335" width="863" height="335">
<defs>
<marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="FilledArrow_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -4 10 8" markerWidth="10" markerHeight="8" color="#7f8080">
<g>
<path d="M 8 0 L 0 -3 L 0 3 Z" fill="currentColor" stroke="currentColor" stroke-width="1"/>
</g>
</marker>
</defs>
<g id="03-retain-lsn" stroke-opacity="1" stroke-dasharray="none" stroke="none" fill="none" fill-opacity="1">
<title>03-retain-lsn</title>
<rect fill="white" x="-104" y="215" width="863" height="335"/>
<g id="03-retain-lsn_Layer_1">
<title>Layer 1</title>
<g id="Graphic_28">
<rect x="48" y="477" width="203.5" height="9.990005" fill="white"/>
<rect x="48" y="477" width="203.5" height="9.990005" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_27">
<rect x="267.5" y="477" width="203.5" height="9.990005" fill="white"/>
<rect x="267.5" y="477" width="203.5" height="9.990005" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_26">
<rect x="486.5" y="477" width="203.5" height="9.990005" fill="white"/>
<rect x="486.5" y="477" width="203.5" height="9.990005" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_20">
<line x1="-.5" y1="387.172" x2="757.7734" y2="387.172" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_19">
<text transform="translate(-99 378.448)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_31">
<rect x="48.25" y="410" width="203.5" height="9.990005" fill="white"/>
<rect x="48.25" y="410" width="203.5" height="9.990005" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_30">
<rect x="267.75" y="410" width="203.5" height="9.990005" fill="white"/>
<rect x="267.75" y="410" width="203.5" height="9.990005" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_29">
<rect x="486.75" y="410" width="203.5" height="9.990005" fill="white"/>
<rect x="486.75" y="410" width="203.5" height="9.990005" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_34">
<rect x="48.25" y="431.495" width="113.75" height="34" fill="white"/>
<rect x="48.25" y="431.495" width="113.75" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_33">
<rect x="172.5" y="431.495" width="203.5" height="34" fill="white"/>
<rect x="172.5" y="431.495" width="203.5" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_32">
<rect x="386.5" y="431.495" width="303.5" height="34" fill="white"/>
<rect x="386.5" y="431.495" width="303.5" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_37">
<rect x="48" y="498.495" width="203.5" height="9.990005" fill="white"/>
<rect x="48" y="498.495" width="203.5" height="9.990005" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_36">
<rect x="267.5" y="498.495" width="203.5" height="9.990005" fill="white"/>
<rect x="267.5" y="498.495" width="203.5" height="9.990005" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_35">
<rect x="486.5" y="498.495" width="203.5" height="9.990005" fill="white"/>
<rect x="486.5" y="498.495" width="203.5" height="9.990005" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_38">
<line x1="-10.48" y1="535.5395" x2="39.318294" y2="508.24794" marker-end="url(#FilledArrow_Marker)" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_39">
<text transform="translate(-96.984 526.3155)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="40500936e-20" y="15" xml:space="preserve">retain_lsn 1</tspan>
</text>
</g>
<g id="Line_41">
<line x1="-10.48" y1="507.0915" x2="38.90236" y2="485.8992" marker-end="url(#FilledArrow_Marker)" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_40">
<text transform="translate(-96.984 497.8675)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="40500936e-20" y="15" xml:space="preserve">retain_lsn 2</tspan>
</text>
</g>
<g id="Line_43">
<line x1="-10.48" y1="478.6435" x2="39.44267" y2="453.01616" marker-end="url(#FilledArrow_Marker)" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_42">
<text transform="translate(-96.984 469.4195)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="40500936e-20" y="15" xml:space="preserve">retain_lsn 3</tspan>
</text>
</g>
<g id="Line_45">
<line x1="-10.48" y1="448.495" x2="39.65061" y2="419.90015" marker-end="url(#FilledArrow_Marker)" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_44">
<text transform="translate(-96.984 439.271)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="40500936e-20" y="15" xml:space="preserve">retain_lsn 4</tspan>
</text>
</g>
<g id="Graphic_46">
<rect x="335.46477" y="215.5" width="353.4299" height="125.495" fill="white"/>
<rect x="335.46477" y="215.5" width="353.4299" height="125.495" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_48">
<text transform="translate(549.3766 317.547)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="6536993e-19" y="15" xml:space="preserve">Dependent Branch</tspan>
</text>
</g>
<g id="Graphic_50">
<text transform="translate(340.43824 317.547)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="40500936e-20" y="15" xml:space="preserve">retain_lsn 3</tspan>
</text>
</g>
<g id="Line_57">
<line x1="323.90685" y1="248.8045" x2="714.9232" y2="248.8045" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_56">
<text transform="translate(165.91346 240.0805)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="35811354e-19" y="15" xml:space="preserve">Branch GC Horizon</tspan>
</text>
</g>
<g id="Graphic_58">
<rect x="493.9232" y="301.6405" width="107.45294" height="9.990005" fill="white"/>
<rect x="493.9232" y="301.6405" width="107.45294" height="9.990005" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_59">
<text transform="translate(358.9232 277.276)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">Partial Image Coverage</tspan>
</text>
</g>
<g id="Graphic_60">
<rect x="354.1732" y="301.6405" width="107.45294" height="9.990005" fill="white"/>
<rect x="354.1732" y="301.6405" width="107.45294" height="9.990005" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 8.4 KiB

View File

@@ -1,187 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="-235 426 864 366" width="864" height="366">
<defs/>
<g id="05-btmgc-parent" stroke-opacity="1" stroke-dasharray="none" stroke="none" fill="none" fill-opacity="1">
<title>05-btmgc-parent</title>
<rect fill="white" x="-235" y="426" width="864" height="366"/>
<g id="05-btmgc-parent_Layer_1">
<title>Layer 1</title>
<g id="Graphic_23">
<rect x="-83" y="510.15" width="203.5" height="26.391998" fill="white"/>
<rect x="-83" y="510.15" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-78 516.178)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="51.714" y="11" xml:space="preserve">Append C@0x30</tspan>
</text>
</g>
<g id="Graphic_22">
<rect x="136.5" y="510.15" width="203.5" height="26.391998" fill="white"/>
<rect x="136.5" y="510.15" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_21">
<rect x="355.5" y="510.15" width="203.5" height="26.391998" fill="white"/>
<rect x="355.5" y="510.15" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_20">
<line x1="-100.448" y1="459.224" x2="626.77344" y2="459.224" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_19">
<text transform="translate(-230 450.5)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_18">
<rect x="-82.75" y="426.748" width="203.5" height="26.391998" fill="white"/>
<rect x="-82.75" y="426.748" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-77.75 432.776)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="52.602" y="11" xml:space="preserve">Append F@0x60</tspan>
</text>
</g>
<g id="Graphic_17">
<rect x="136.75" y="426.748" width="203.5" height="26.391998" fill="white"/>
<rect x="136.75" y="426.748" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_16">
<rect x="355.75" y="426.748" width="203.5" height="26.391998" fill="white"/>
<rect x="355.75" y="426.748" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_15">
<rect x="-82.75" y="464.645" width="113.75" height="34" fill="white"/>
<rect x="-82.75" y="464.645" width="113.75" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-77.75 467.309)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="7.505" y="11" xml:space="preserve">Append E@0x50</tspan>
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="6.947" y="25.336" xml:space="preserve">Append D@0x40</tspan>
</text>
</g>
<g id="Graphic_14">
<rect x="41.5" y="464.645" width="203.5" height="34" fill="white"/>
<rect x="41.5" y="464.645" width="203.5" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_13">
<rect x="255.5" y="464.645" width="303.5" height="34" fill="white"/>
<rect x="255.5" y="464.645" width="303.5" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_12">
<rect x="-83" y="548.047" width="203.5" height="26.391998" fill="white"/>
<rect x="-83" y="548.047" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-78 554.075)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="26.796" y="11" xml:space="preserve">A@0x10, Append B@0x20</tspan>
</text>
</g>
<g id="Graphic_11">
<rect x="136.5" y="548.047" width="203.5" height="26.391998" fill="white"/>
<rect x="136.5" y="548.047" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_10">
<rect x="355.5" y="548.047" width="203.5" height="26.391998" fill="white"/>
<rect x="355.5" y="548.047" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_24">
<line x1="-104" y1="542" x2="610.5" y2="542" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_25">
<text transform="translate(-139.604 534.5)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x20</tspan>
</text>
</g>
<g id="Graphic_28">
<text transform="translate(-139.604 452.556)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x50</tspan>
</text>
</g>
<g id="Line_30">
<line x1="-100.448" y1="481.145" x2="614.052" y2="481.145" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_29">
<text transform="translate(-139.604 473.449)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x40</tspan>
</text>
</g>
<g id="Line_48">
<line x1="-99.448" y1="701.513" x2="627.77344" y2="701.513" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_47">
<text transform="translate(-229 692.789)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_46">
<rect x="-81.75" y="670.496" width="113.75" height="26.391998" fill="white"/>
<rect x="-81.75" y="670.496" width="113.75" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-76.75 676.524)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="7.727" y="11" xml:space="preserve">Append F@0x60</tspan>
</text>
</g>
<g id="Graphic_43">
<rect x="-81.75" y="708.393" width="113.75" height="34" fill="white"/>
<rect x="-81.75" y="708.393" width="113.75" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-76.75 718.225)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="7.505" y="11" xml:space="preserve">Append E@0x50</tspan>
</text>
</g>
<g id="Line_37">
<line x1="-101" y1="777.2665" x2="613.5" y2="777.2665" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_36">
<text transform="translate(-138.604 769.7665)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x20</tspan>
</text>
</g>
<g id="Graphic_33">
<text transform="translate(-138.604 694.845)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x50</tspan>
</text>
</g>
<g id="Line_32">
<line x1="-99.448" y1="755.089" x2="615.052" y2="755.089" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_31">
<text transform="translate(-138.604 747.393)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x40</tspan>
</text>
</g>
<g id="Graphic_40">
<rect x="-82" y="770.909" width="203.5" height="14.107002" fill="white"/>
<rect x="-82" y="770.909" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-77 770.7945)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="70.836" y="11" xml:space="preserve">AB@0x20</tspan>
</text>
</g>
<g id="Graphic_39">
<rect x="137.5" y="770.909" width="203.5" height="14.107002" fill="white"/>
<rect x="137.5" y="770.909" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_38">
<rect x="356.5" y="770.909" width="203.5" height="14.107002" fill="white"/>
<rect x="356.5" y="770.909" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_54">
<rect x="-81.75" y="748.5355" width="203.5" height="14.107002" fill="white"/>
<rect x="-81.75" y="748.5355" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-76.75 748.421)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="62.28" y="11" xml:space="preserve">ABCD@0x40</tspan>
</text>
</g>
<g id="Graphic_53">
<rect x="137.75" y="748.5355" width="203.5" height="14.107002" fill="white"/>
<rect x="137.75" y="748.5355" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_52">
<rect x="356.75" y="748.5355" width="203.5" height="14.107002" fill="white"/>
<rect x="356.75" y="748.5355" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_57">
<path d="M 211.32422 585 L 265.17578 585 L 265.17578 611.332 L 287.84375 611.332 L 238.25 633.117 L 188.65625 611.332 L 211.32422 611.332 Z" fill="white"/>
<path d="M 211.32422 585 L 265.17578 585 L 265.17578 611.332 L 287.84375 611.332 L 238.25 633.117 L 188.65625 611.332 L 211.32422 611.332 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_60">
<rect x="359" y="692.858" width="203.5" height="14.107002" fill="white"/>
<rect x="359" y="692.858" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_59">
<rect x="41.5" y="693.858" width="303" height="14.107002" fill="white"/>
<rect x="41.5" y="693.858" width="303" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 12 KiB

View File

@@ -1,184 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="-413 471 931 354" width="931" height="354">
<defs/>
<g id="06-btmgc-child" stroke-opacity="1" stroke-dasharray="none" stroke="none" fill="none" fill-opacity="1">
<title>06-btmgc-child</title>
<rect fill="white" x="-413" y="471" width="931" height="354"/>
<g id="06-btmgc-child_Layer_1">
<title>Layer 1</title>
<g id="Graphic_47">
<rect x="-412" y="594.402" width="928" height="28.447998" fill="white"/>
<rect x="-412" y="594.402" width="928" height="28.447998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_46">
<rect x="-205" y="555.552" width="203.5" height="26.391998" fill="white"/>
<rect x="-205" y="555.552" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-200 561.58)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="52.158" y="11" xml:space="preserve">Append P@0x30</tspan>
</text>
</g>
<g id="Graphic_45">
<rect x="14.5" y="555.552" width="203.5" height="26.391998" fill="white"/>
<rect x="14.5" y="555.552" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_44">
<rect x="233.5" y="555.552" width="203.5" height="26.391998" fill="white"/>
<rect x="233.5" y="555.552" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_43">
<line x1="-222.448" y1="504.724" x2="504.77344" y2="504.724" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_42">
<text transform="translate(-352 496)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_41">
<rect x="-204.75" y="472.15" width="203.5" height="26.391998" fill="white"/>
<rect x="-204.75" y="472.15" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-199.75 478.178)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="52.158" y="11" xml:space="preserve">Append S@0x60</tspan>
</text>
</g>
<g id="Graphic_40">
<rect x="14.75" y="472.15" width="203.5" height="26.391998" fill="white"/>
<rect x="14.75" y="472.15" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_39">
<rect x="233.75" y="472.15" width="203.5" height="26.391998" fill="white"/>
<rect x="233.75" y="472.15" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_38">
<rect x="-204.75" y="510.047" width="113.75" height="34" fill="white"/>
<rect x="-204.75" y="510.047" width="113.75" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-199.75 512.711)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="7.061" y="11" xml:space="preserve">Append R@0x50</tspan>
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="6.611" y="25.336" xml:space="preserve">Append Q@0x40</tspan>
</text>
</g>
<g id="Graphic_37">
<rect x="-80.5" y="510.047" width="203.5" height="34" fill="white"/>
<rect x="-80.5" y="510.047" width="203.5" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_36">
<rect x="133.5" y="510.047" width="303.5" height="34" fill="white"/>
<rect x="133.5" y="510.047" width="303.5" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_33">
<text transform="translate(-261.604 498.056)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x50</tspan>
</text>
</g>
<g id="Line_30">
<line x1="-224" y1="607.9115" x2="490.5" y2="607.9115" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_29">
<text transform="translate(-261.604 600.4115)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x20</tspan>
</text>
</g>
<g id="Graphic_28">
<rect x="-205" y="601.554" width="203.5" height="14.107002" fill="white"/>
<rect x="-205" y="601.554" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-200 601.4395)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="70.836" y="11" xml:space="preserve">AB@0x20</tspan>
</text>
</g>
<g id="Graphic_27">
<rect x="14.5" y="601.554" width="203.5" height="14.107002" fill="white"/>
<rect x="14.5" y="601.554" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_26">
<rect x="233.5" y="601.554" width="203.5" height="14.107002" fill="white"/>
<rect x="233.5" y="601.554" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_25">
<text transform="translate(-407 599.1875)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">Ancestor Branch</tspan>
</text>
</g>
<g id="Graphic_24">
<rect x="-411" y="795.46" width="928" height="28.447998" fill="white"/>
<rect x="-411" y="795.46" width="928" height="28.447998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_20">
<line x1="-221.448" y1="755.528" x2="505.77344" y2="755.528" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_19">
<text transform="translate(-351 746.804)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_18">
<rect x="-203.75" y="723.579" width="203.25" height="26.391998" fill="white"/>
<rect x="-203.75" y="723.579" width="203.25" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-198.75 729.607)" fill="black">
<tspan font-family="Helvetica Neue" font-size="12" fill="black" x="52.033" y="11" xml:space="preserve">Append S@0x60</tspan>
</text>
</g>
<g id="Graphic_10">
<text transform="translate(-260.604 748.86)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x50</tspan>
</text>
</g>
<g id="Line_7">
<line x1="-223" y1="808.9695" x2="491.5" y2="808.9695" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_6">
<text transform="translate(-260.604 801.4695)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x20</tspan>
</text>
</g>
<g id="Graphic_5">
<rect x="-204" y="802.612" width="203.5" height="14.107002" fill="white"/>
<rect x="-204" y="802.612" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-199 802.4975)" fill="#b1001c">
<tspan font-family="Helvetica Neue" font-size="12" fill="#b1001c" x="70.836" y="11" xml:space="preserve">AB</tspan>
<tspan font-family="Helvetica Neue" font-size="12" fill="black" y="11" xml:space="preserve">@0x20</tspan>
</text>
</g>
<g id="Graphic_4">
<rect x="15.5" y="802.612" width="203.5" height="14.107002" fill="white"/>
<rect x="15.5" y="802.612" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_3">
<rect x="234.5" y="802.612" width="203.5" height="14.107002" fill="white"/>
<rect x="234.5" y="802.612" width="203.5" height="14.107002" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_2">
<text transform="translate(-406 800.2455)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">Ancestor Branch</tspan>
</text>
</g>
<g id="Graphic_48">
<path d="M 89.32422 639.081 L 143.17578 639.081 L 143.17578 665.413 L 165.84375 665.413 L 116.25 687.198 L 66.65625 665.413 L 89.32422 665.413 Z" fill="white"/>
<path d="M 89.32422 639.081 L 143.17578 639.081 L 143.17578 665.413 L 165.84375 665.413 L 116.25 687.198 L 66.65625 665.413 L 89.32422 665.413 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_49">
<rect x="-204" y="762.428" width="203.5" height="26.391998" fill="white"/>
<rect x="-204" y="762.428" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-199 768.456)" fill="#b1001c">
<tspan font-family="Helvetica Neue" font-size="12" fill="#b1001c" x="58.278" y="11" xml:space="preserve">AB</tspan>
<tspan font-family="Helvetica Neue" font-size="12" fill="black" y="11" xml:space="preserve">PQR@0x50</tspan>
</text>
</g>
<g id="Graphic_59">
<rect x="14.5" y="723.579" width="203.5" height="26.391998" fill="white"/>
<rect x="14.5" y="723.579" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_58">
<rect x="233.5" y="723.579" width="203.5" height="26.391998" fill="white"/>
<rect x="233.5" y="723.579" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_63">
<rect x="9" y="762.085" width="203.5" height="26.391998" fill="white"/>
<rect x="9" y="762.085" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_62">
<rect x="225" y="762.085" width="213" height="26.391998" fill="white"/>
<rect x="225" y="762.085" width="213" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 12 KiB

View File

@@ -1,180 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="-556 476 923 411" width="923" height="411">
<defs/>
<g id="07-btmgc-analysis-1" stroke-opacity="1" stroke-dasharray="none" stroke="none" fill="none" fill-opacity="1">
<title>07-btmgc-analysis-1</title>
<rect fill="white" x="-556" y="476" width="923" height="411"/>
<g id="07-btmgc-analysis-1_Layer_1">
<title>Layer 1</title>
<g id="Graphic_85">
<rect x="-404" y="609.062" width="203.5" height="17.5" fill="white"/>
<rect x="-404" y="609.062" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_84">
<rect x="-184.5" y="609.062" width="203.5" height="17.5" fill="white"/>
<rect x="-184.5" y="609.062" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_83">
<rect x="34.5" y="609.062" width="203.5" height="17.5" fill="white"/>
<rect x="34.5" y="609.062" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_82">
<rect x="-404" y="479.922" width="127" height="77.5" fill="white"/>
<rect x="-404" y="479.922" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_81">
<rect x="-263" y="479.922" width="127" height="77.5" fill="white"/>
<rect x="-263" y="479.922" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_80">
<rect x="-122" y="479.922" width="127" height="77.5" fill="white"/>
<rect x="-122" y="479.922" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_79">
<rect x="19" y="479.922" width="127" height="77.5" fill="white"/>
<rect x="19" y="479.922" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_78">
<rect x="160" y="479.922" width="78" height="77.5" fill="white"/>
<rect x="160" y="479.922" width="78" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_77">
<line x1="-452.5" y1="518.172" x2="251" y2="518.172" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_76">
<text transform="translate(-551 509.448)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_75">
<text transform="translate(-531.59 602.448)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="8.39" y="15" xml:space="preserve">Images </tspan>
<tspan font-family="Helvetica Neue" font-size="10" fill="black" x="29132252e-19" y="28.447998" xml:space="preserve">at earlier LSN</tspan>
</text>
</g>
<g id="Graphic_74">
<text transform="translate(-516.08 481)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="8739676e-19" y="15" xml:space="preserve">Deltas</tspan>
</text>
</g>
<g id="Graphic_73">
<path d="M -120.675 651.5 L -84.425 651.5 L -84.425 710 L -66.3 710 L -102.55 740 L -138.8 710 L -120.675 710 Z" fill="white"/>
<path d="M -120.675 651.5 L -84.425 651.5 L -84.425 710 L -66.3 710 L -102.55 740 L -138.8 710 L -120.675 710 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_72">
<rect x="-403.8" y="827.474" width="203.5" height="17.5" fill="white"/>
<rect x="-403.8" y="827.474" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_71">
<rect x="-184.3" y="827.474" width="203.5" height="17.5" fill="white"/>
<rect x="-184.3" y="827.474" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_70">
<rect x="34.7" y="827.474" width="203.5" height="17.5" fill="white"/>
<rect x="34.7" y="827.474" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_69">
<rect x="-403.8" y="761" width="127" height="52.974" fill="white"/>
<rect x="-403.8" y="761" width="127" height="52.974" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_68">
<rect x="-262.8" y="761" width="310.5" height="52.974" fill="white"/>
<rect x="-262.8" y="761" width="310.5" height="52.974" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_67">
<rect x="64.7" y="761" width="173.5" height="52.974" fill="white"/>
<rect x="64.7" y="761" width="173.5" height="52.974" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_66">
<line x1="-452.3" y1="835.724" x2="251.2" y2="835.724" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_65">
<text transform="translate(-515.88 766)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="8739676e-19" y="15" xml:space="preserve">Deltas</tspan>
</text>
</g>
<g id="Graphic_64">
<text transform="translate(-523 820.86)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="3488765e-18" y="15" xml:space="preserve">Images </tspan>
<tspan font-family="Helvetica Neue" font-size="10" fill="black" x="4.01" y="28.447998" xml:space="preserve">at GC LSN</tspan>
</text>
</g>
<g id="Graphic_63">
<rect x="-394.93164" y="491.422" width="624.3633" height="17.5" fill="#c0ffc0"/>
<text transform="translate(-389.93164 492.49)" fill="black">
<tspan font-family="Helvetica Neue" font-size="13" fill="black" x="233.52364" y="12" xml:space="preserve">Deltas above GC Horizon</tspan>
</text>
</g>
<g id="Graphic_62">
<rect x="-394.93164" y="526.922" width="624.3633" height="17.5" fill="#c0ffff"/>
<text transform="translate(-389.93164 527.99)" fill="black">
<tspan font-family="Helvetica Neue" font-size="13" fill="black" x="233.89414" y="12" xml:space="preserve">Deltas below GC Horizon</tspan>
</text>
</g>
<g id="Graphic_61">
<rect x="-394.73164" y="778.737" width="624.3633" height="17.5" fill="#c0ffc0"/>
<text transform="translate(-389.73164 779.805)" fill="black">
<tspan font-family="Helvetica Neue" font-size="13" fill="black" x="233.52364" y="12" xml:space="preserve">Deltas above GC Horizon</tspan>
</text>
</g>
<g id="Graphic_60">
<rect x="-333.8" y="858.474" width="485.5" height="28.447998" fill="#c0ffff"/>
<text transform="translate(-328.8 865.016)" fill="black">
<tspan font-family="Helvetica Neue" font-size="13" fill="black" x="63.095" y="12" xml:space="preserve">Deltas and image below GC Horizon gets garbage-collected</tspan>
</text>
</g>
<g id="Graphic_86">
<text transform="translate(263 499.724)" fill="black">
<tspan font-family="Helvetica Neue" font-size="32" fill="black" x="0" y="30" xml:space="preserve">size=A</tspan>
</text>
</g>
<g id="Line_87">
<line x1="260.87012" y1="479.068" x2="360.71387" y2="479.068" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_88">
<line x1="260.87012" y1="561" x2="360.71387" y2="561" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_89">
<rect x="-403.8" y="569" width="161.8" height="28.447998" fill="white"/>
<rect x="-403.8" y="569" width="161.8" height="28.447998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_90">
<rect x="-229.5" y="569.018" width="277.2" height="28.447998" fill="white"/>
<rect x="-229.5" y="569.018" width="277.2" height="28.447998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_91">
<rect x="64.7" y="569.018" width="173.5" height="28.447998" fill="white"/>
<rect x="64.7" y="569.018" width="173.5" height="28.447998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_92">
<line x1="262" y1="602" x2="361.84375" y2="602" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_93">
<line x1="263" y1="625.562" x2="362.84375" y2="625.562" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_94">
<text transform="translate(264.53787 562.276)" fill="black">
<tspan font-family="Helvetica Neue" font-size="32" fill="black" x="14210855e-21" y="30" xml:space="preserve">size=B</tspan>
</text>
</g>
<g id="Graphic_95">
<text transform="translate(285.12 599.5)" fill="black">
<tspan font-family="Helvetica Neue" font-size="24" fill="black" x="0" y="23" xml:space="preserve">size=C</tspan>
</text>
</g>
<g id="Graphic_98">
<text transform="translate(264.53787 773.772)" fill="black">
<tspan font-family="Helvetica Neue" font-size="26" fill="black" x="8881784e-19" y="25" xml:space="preserve">A</tspan>
<tspan font-family="Lucida Grande" font-size="26" fill="black" y="25" xml:space="preserve"></tspan>
</text>
</g>
<g id="Graphic_97">
<text transform="translate(265.87013 815.5)" fill="black">
<tspan font-family="Helvetica Neue" font-size="26" fill="black" x="6536993e-19" y="25" xml:space="preserve">B</tspan>
<tspan font-family="Lucida Grande" font-size="26" fill="black" y="25" xml:space="preserve"></tspan>
</text>
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 11 KiB

View File

@@ -1,158 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="-235 406 586 424" width="586" height="424">
<defs/>
<g id="08-optimization" stroke-opacity="1" stroke-dasharray="none" stroke="none" fill="none" fill-opacity="1">
<title>08-optimization</title>
<rect fill="white" x="-235" y="406" width="586" height="424"/>
<g id="08-optimization_Layer_1">
<title>Layer 1</title>
<g id="Graphic_22">
<rect x="-100.448" y="509.902" width="203.5" height="26.391998" fill="white"/>
<rect x="-100.448" y="509.902" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_21">
<rect x="118.552" y="509.902" width="203.5" height="26.391998" fill="white"/>
<rect x="118.552" y="509.902" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_20">
<line x1="-101.79572" y1="420.322" x2="349.5" y2="420.322" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_19">
<text transform="translate(-230 411.598)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_17">
<rect x="-100.198" y="426.5" width="203.5" height="26.391998" fill="white"/>
<rect x="-100.198" y="426.5" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_16">
<rect x="118.802" y="426.5" width="203.5" height="26.391998" fill="white"/>
<rect x="118.802" y="426.5" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_14">
<rect x="-100.198" y="464.397" width="108.25" height="34" fill="white"/>
<rect x="-100.198" y="464.397" width="108.25" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_13">
<rect x="18.552" y="464.397" width="303.5" height="34" fill="white"/>
<rect x="18.552" y="464.397" width="303.5" height="34" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_11">
<rect x="-100.448" y="547.799" width="203.5" height="26.391998" fill="white"/>
<rect x="-100.448" y="547.799" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_10">
<rect x="118.552" y="547.799" width="203.5" height="26.391998" fill="white"/>
<rect x="118.552" y="547.799" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_24">
<line x1="-104" y1="542" x2="339.4011" y2="542" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_25">
<text transform="translate(-139.604 534.5)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x20</tspan>
</text>
</g>
<g id="Line_27">
<line x1="-101.79572" y1="459.098" x2="341.6054" y2="459.098" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_26">
<text transform="translate(-139.604 451.402)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x50</tspan>
</text>
</g>
<g id="Graphic_28">
<text transform="translate(-139.604 413.654)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x60</tspan>
</text>
</g>
<g id="Line_30">
<line x1="-101.79572" y1="481.145" x2="341.6054" y2="481.145" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_29">
<text transform="translate(-139.604 473.449)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x40</tspan>
</text>
</g>
<g id="Graphic_77">
<rect x="-100.448" y="765.19595" width="203.5" height="26.391998" fill="white"/>
<rect x="-100.448" y="765.19595" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_76">
<rect x="118.552" y="765.19595" width="203.5" height="26.391998" fill="white"/>
<rect x="118.552" y="765.19595" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_75">
<line x1="-101.79572" y1="637.317" x2="349.5" y2="637.317" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_74">
<text transform="translate(-230 628.593)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_73">
<rect x="-100.198" y="681.794" width="203.5" height="26.391998" fill="white"/>
<rect x="-100.198" y="681.794" width="203.5" height="26.391998" stroke="#b1001c" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_72">
<rect x="118.802" y="681.794" width="203.5" height="26.391998" fill="white"/>
<rect x="118.802" y="681.794" width="203.5" height="26.391998" stroke="#b1001c" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_71">
<rect x="-100.198" y="719.69096" width="108.25" height="34" fill="white"/>
<rect x="-100.198" y="719.69096" width="108.25" height="34" stroke="#b1001c" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_70">
<rect x="18.552" y="719.69096" width="303.5" height="34" fill="white"/>
<rect x="18.552" y="719.69096" width="303.5" height="34" stroke="#b1001c" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_69">
<rect x="-100.448" y="803.09295" width="203.5" height="26.391998" fill="white"/>
<rect x="-100.448" y="803.09295" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_68">
<rect x="118.552" y="803.09295" width="203.5" height="26.391998" fill="white"/>
<rect x="118.552" y="803.09295" width="203.5" height="26.391998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_67">
<line x1="-104" y1="797.294" x2="339.4011" y2="797.294" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_66">
<text transform="translate(-139.604 789.794)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x20</tspan>
</text>
</g>
<g id="Graphic_63">
<text transform="translate(-139.604 630.649)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x70</tspan>
</text>
</g>
<g id="Line_62">
<line x1="-101.79572" y1="736.439" x2="341.6054" y2="736.439" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="4.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_61">
<text transform="translate(-139.604 728.743)" fill="black">
<tspan font-family="Helvetica Neue" font-size="14" fill="black" x="0" y="13" xml:space="preserve">0x40</tspan>
</text>
</g>
<g id="Graphic_79">
<rect x="-100.198" y="644.393" width="168.198" height="26.391998" fill="white"/>
<rect x="-100.198" y="644.393" width="168.198" height="26.391998" stroke="#b1001c" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_78">
<rect x="80" y="644.393" width="242.302" height="26.391998" fill="white"/>
<rect x="80" y="644.393" width="242.302" height="26.391998" stroke="#b1001c" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_81">
<line x1="-101.79572" y1="714.139" x2="341.6054" y2="714.139" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-dasharray="1.0,4.0" stroke-width="1"/>
</g>
<g id="Graphic_80">
<text transform="translate(-139.604 706.443)" fill="#a5a5a5">
<tspan font-family="Helvetica Neue" font-size="14" fill="#a5a5a5" x="0" y="13" xml:space="preserve">0x50</tspan>
</text>
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 9.4 KiB

View File

@@ -1,184 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="-562 479 876 429" width="876" height="429">
<defs/>
<g id="09-btmgc-analysis-2" stroke-opacity="1" stroke-dasharray="none" stroke="none" fill="none" fill-opacity="1">
<title>09-btmgc-analysis-2</title>
<rect fill="white" x="-562" y="479" width="876" height="429"/>
<g id="09-btmgc-analysis-2_Layer_1">
<title>Layer 1</title>
<g id="Graphic_85">
<rect x="-404" y="622.386" width="203.5" height="17.5" fill="white"/>
<rect x="-404" y="622.386" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-399 621.912)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="90.974" y="15" xml:space="preserve">C</tspan>
</text>
</g>
<g id="Graphic_84">
<rect x="-184.5" y="622.386" width="203.5" height="17.5" fill="white"/>
<rect x="-184.5" y="622.386" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-179.5 621.912)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="90.974" y="15" xml:space="preserve">C</tspan>
</text>
</g>
<g id="Graphic_83">
<rect x="34.5" y="622.386" width="203.5" height="17.5" fill="white"/>
<rect x="34.5" y="622.386" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(39.5 621.912)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="90.974" y="15" xml:space="preserve">C</tspan>
</text>
</g>
<g id="Graphic_82">
<rect x="-404" y="479.922" width="127" height="77.5" fill="white"/>
<rect x="-404" y="479.922" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-399 509.448)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="53.316" y="15" xml:space="preserve">A</tspan>
</text>
</g>
<g id="Graphic_81">
<rect x="-263" y="479.922" width="127" height="77.5" fill="white"/>
<rect x="-263" y="479.922" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-258 509.448)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="53.316" y="15" xml:space="preserve">A</tspan>
</text>
</g>
<g id="Graphic_80">
<rect x="-122" y="479.922" width="127" height="77.5" fill="white"/>
<rect x="-122" y="479.922" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-117 509.448)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="53.316" y="15" xml:space="preserve">A</tspan>
</text>
</g>
<g id="Graphic_79">
<rect x="19" y="479.922" width="127" height="77.5" fill="white"/>
<rect x="19" y="479.922" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(24 509.448)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="53.316" y="15" xml:space="preserve">A</tspan>
</text>
</g>
<g id="Graphic_78">
<rect x="160" y="479.922" width="78" height="77.5" fill="white"/>
<rect x="160" y="479.922" width="78" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(165 509.448)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="28.816" y="15" xml:space="preserve">A</tspan>
</text>
</g>
<g id="Line_77">
<line x1="-452.5" y1="518.172" x2="251" y2="518.172" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_76">
<text transform="translate(-551 509.448)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_73">
<path d="M -120.675 651.5 L -84.425 651.5 L -84.425 710 L -66.3 710 L -102.55 740 L -138.8 710 L -120.675 710 Z" fill="white"/>
<path d="M -120.675 651.5 L -84.425 651.5 L -84.425 710 L -66.3 710 L -102.55 740 L -138.8 710 L -120.675 710 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_89">
<rect x="-403.8" y="582.324" width="161.8" height="28.447998" fill="white"/>
<rect x="-403.8" y="582.324" width="161.8" height="28.447998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-398.8 587.324)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="70.42" y="15" xml:space="preserve">B</tspan>
</text>
</g>
<g id="Graphic_90">
<rect x="-229.5" y="582.342" width="277.2" height="28.447998" fill="white"/>
<rect x="-229.5" y="582.342" width="277.2" height="28.447998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-224.5 587.342)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="128.12" y="15" xml:space="preserve">B</tspan>
</text>
</g>
<g id="Graphic_91">
<rect x="64.7" y="582.342" width="173.5" height="28.447998" fill="white"/>
<rect x="64.7" y="582.342" width="173.5" height="28.447998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(69.7 587.342)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="76.27" y="15" xml:space="preserve">B</tspan>
</text>
</g>
<g id="Graphic_97">
<rect x="-403.8" y="564.842" width="490.8" height="12.157997" fill="white"/>
<rect x="-403.8" y="564.842" width="490.8" height="12.157997" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-398.8 561.697)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="234.624" y="15" xml:space="preserve">C</tspan>
</text>
</g>
<g id="Graphic_109">
<rect x="28.6" y="889.964" width="203.5" height="17.5" fill="white"/>
<rect x="28.6" y="889.964" width="203.5" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(33.6 889.49)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="90.974" y="15" xml:space="preserve">C</tspan>
</text>
</g>
<g id="Graphic_108">
<rect x="-409.9" y="747.5" width="127" height="77.5" fill="white"/>
<rect x="-409.9" y="747.5" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-404.9 777.026)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="53.316" y="15" xml:space="preserve">A</tspan>
</text>
</g>
<g id="Graphic_107">
<rect x="-268.9" y="747.5" width="127" height="77.5" fill="white"/>
<rect x="-268.9" y="747.5" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-263.9 777.026)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="53.316" y="15" xml:space="preserve">A</tspan>
</text>
</g>
<g id="Graphic_106">
<rect x="-127.9" y="747.5" width="127" height="77.5" fill="white"/>
<rect x="-127.9" y="747.5" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-122.9 777.026)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="53.316" y="15" xml:space="preserve">A</tspan>
</text>
</g>
<g id="Graphic_105">
<rect x="13.1" y="747.5" width="127" height="77.5" fill="white"/>
<rect x="13.1" y="747.5" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(18.1 777.026)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="53.316" y="15" xml:space="preserve">A</tspan>
</text>
</g>
<g id="Graphic_104">
<rect x="154.1" y="747.5" width="78" height="77.5" fill="white"/>
<rect x="154.1" y="747.5" width="78" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(159.1 777.026)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="28.816" y="15" xml:space="preserve">A</tspan>
</text>
</g>
<g id="Line_103">
<line x1="-458.4" y1="785.75" x2="245.1" y2="785.75" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_102">
<text transform="translate(-556.9 777.026)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_99">
<rect x="58.8" y="849.92" width="173.5" height="28.447998" fill="white"/>
<rect x="58.8" y="849.92" width="173.5" height="28.447998" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(63.8 854.92)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="76.27" y="15" xml:space="preserve">B</tspan>
</text>
</g>
<g id="Graphic_98">
<rect x="-409.7" y="832.42" width="490.8" height="12.157997" fill="white"/>
<rect x="-409.7" y="832.42" width="490.8" height="12.157997" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(-404.7 829.275)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="234.624" y="15" xml:space="preserve">C</tspan>
</text>
</g>
<g id="Graphic_112">
<text transform="translate(273 797.5)" fill="black">
<tspan font-family="Helvetica Neue" font-size="26" fill="black" x="6536993e-19" y="25" xml:space="preserve">B</tspan>
<tspan font-family="Lucida Grande" font-size="26" fill="black" y="25" xml:space="preserve"></tspan>
</text>
</g>
<g id="Graphic_113">
<text transform="translate(273 833.974)" fill="black">
<tspan font-family="Helvetica Neue" font-size="26" fill="black" x="42277293e-20" y="25" xml:space="preserve">C</tspan>
<tspan font-family="Lucida Grande" font-size="26" fill="black" y="25" xml:space="preserve"></tspan>
</text>
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 12 KiB

View File

@@ -1,81 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="-12 920 809 269" width="809" height="269">
<defs/>
<g id="10-btmgc-analysis-3" stroke-opacity="1" stroke-dasharray="none" stroke="none" fill="none" fill-opacity="1">
<title>10-btmgc-analysis-3</title>
<rect fill="white" x="-12" y="920" width="809" height="269"/>
<g id="10-btmgc-analysis-3_Layer_1">
<title>Layer 1</title>
<g id="Graphic_13">
<rect x="433.7" y="949" width="63.559346" height="77.5" fill="white"/>
<rect x="433.7" y="949" width="63.559346" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(438.7 978.526)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="8.107673" y="15" xml:space="preserve">1/5 X</tspan>
</text>
</g>
<g id="Graphic_12">
<rect x="503.7654" y="949" width="63.559346" height="77.5" fill="white"/>
<rect x="503.7654" y="949" width="63.559346" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(508.7654 978.526)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="8.107673" y="15" xml:space="preserve">1/5 X</tspan>
</text>
</g>
<g id="Graphic_11">
<rect x="574.8318" y="949" width="63.559346" height="77.5" fill="white"/>
<rect x="574.8318" y="949" width="63.559346" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(579.8318 978.526)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="8.107673" y="15" xml:space="preserve">1/5 X</tspan>
</text>
</g>
<g id="Graphic_10">
<rect x="645.3977" y="949" width="63.559346" height="77.5" fill="white"/>
<rect x="645.3977" y="949" width="63.559346" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(650.3977 978.526)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="8.107673" y="15" xml:space="preserve">1/5 X</tspan>
</text>
</g>
<g id="Line_8">
<line x1="92" y1="934.276" x2="795.5" y2="934.276" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_7">
<text transform="translate(-6.500003 925.552)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_2">
<rect x="113.2" y="1033.92" width="321.3" height="12.157997" fill="white"/>
<rect x="113.2" y="1033.92" width="321.3" height="12.157997" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(118.2 1030.775)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="150.762" y="15" xml:space="preserve">X</tspan>
</text>
</g>
<g id="Graphic_17">
<path d="M 420.125 1062 L 456.375 1062 L 456.375 1120.5 L 474.5 1120.5 L 438.25 1150.5 L 402 1120.5 L 420.125 1120.5 Z" fill="white"/>
<path d="M 420.125 1062 L 456.375 1062 L 456.375 1120.5 L 474.5 1120.5 L 438.25 1150.5 L 402 1120.5 L 420.125 1120.5 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_27">
<line x1="93" y1="1164.224" x2="796.5" y2="1164.224" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_26">
<text transform="translate(-5.5000034 1155.5)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_25">
<rect x="114" y="1173.5" width="641.8" height="12.157997" fill="white"/>
<rect x="114" y="1173.5" width="641.8" height="12.157997" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(119 1170.355)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="306.564" y="15" xml:space="preserve">2X</tspan>
</text>
</g>
<g id="Graphic_33">
<rect x="715.96355" y="949" width="63.559346" height="77.5" fill="white"/>
<rect x="715.96355" y="949" width="63.559346" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(720.96355 978.526)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="8.107673" y="15" xml:space="preserve">1/5 X</tspan>
</text>
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 5.1 KiB

View File

@@ -1,81 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="-12 920 809 269" width="809" height="269">
<defs/>
<g id="11-btmgc-analysis-4" stroke-opacity="1" stroke-dasharray="none" stroke="none" fill="none" fill-opacity="1">
<title>11-btmgc-analysis-4</title>
<rect fill="white" x="-12" y="920" width="809" height="269"/>
<g id="11-btmgc-analysis-4_Layer_1">
<title>Layer 1</title>
<g id="Graphic_13">
<rect x="113" y="949" width="127" height="77.5" fill="white"/>
<rect x="113" y="949" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(118 978.526)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="39.084" y="15" xml:space="preserve">1/5 D</tspan>
</text>
</g>
<g id="Graphic_12">
<rect x="253" y="949" width="127" height="77.5" fill="white"/>
<rect x="253" y="949" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(258 978.526)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="39.084" y="15" xml:space="preserve">1/5 D</tspan>
</text>
</g>
<g id="Graphic_11">
<rect x="395" y="949" width="127" height="77.5" fill="white"/>
<rect x="395" y="949" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(400 978.526)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="39.084" y="15" xml:space="preserve">1/5 D</tspan>
</text>
</g>
<g id="Graphic_10">
<rect x="536" y="949" width="127" height="77.5" fill="white"/>
<rect x="536" y="949" width="127" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(541 978.526)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="39.084" y="15" xml:space="preserve">1/5 D</tspan>
</text>
</g>
<g id="Graphic_9">
<rect x="677" y="949" width="78" height="77.5" fill="white"/>
<rect x="677" y="949" width="78" height="77.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(682 978.526)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="14.584" y="15" xml:space="preserve">1/5 D</tspan>
</text>
</g>
<g id="Line_8">
<line x1="92" y1="934.276" x2="795.5" y2="934.276" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_7">
<text transform="translate(-6.500003 925.552)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_2">
<rect x="113.2" y="1033.92" width="641.8" height="12.157997" fill="white"/>
<rect x="113.2" y="1033.92" width="641.8" height="12.157997" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(118.2 1030.775)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="310.268" y="15" xml:space="preserve">D</tspan>
</text>
</g>
<g id="Graphic_17">
<path d="M 420.125 1062 L 456.375 1062 L 456.375 1120.5 L 474.5 1120.5 L 438.25 1150.5 L 402 1120.5 L 420.125 1120.5 Z" fill="white"/>
<path d="M 420.125 1062 L 456.375 1062 L 456.375 1120.5 L 474.5 1120.5 L 438.25 1150.5 L 402 1120.5 L 420.125 1120.5 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_27">
<line x1="93" y1="1164.224" x2="796.5" y2="1164.224" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_26">
<text transform="translate(-5.5000034 1155.5)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">GC Horizon</tspan>
</text>
</g>
<g id="Graphic_25">
<rect x="114" y="1173.5" width="641.8" height="12.157997" fill="white"/>
<rect x="114" y="1173.5" width="641.8" height="12.157997" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(119 1170.355)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="310.268" y="15" xml:space="preserve">D</tspan>
</text>
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 5.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 142 KiB

View File

@@ -1,176 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns="http://www.w3.org/2000/svg" xmlns:xl="http://www.w3.org/1999/xlink" viewBox="210 271 870 514" width="870" height="514">
<defs/>
<g id="gc-compaction-split" stroke-dasharray="none" fill-opacity="1" stroke="none" fill="none" stroke-opacity="1">
<title>gc-compaction-split</title>
<rect fill="white" x="210" y="271" width="870" height="514"/>
<g id="gc-compaction-split_Layer_1">
<title>Layer 1</title>
<g id="Graphic_12">
<rect x="241" y="272" width="213" height="50.5" fill="white"/>
<rect x="241" y="272" width="213" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_11">
<rect x="468.72266" y="272" width="213" height="50.5" fill="white"/>
<rect x="468.72266" y="272" width="213" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_10">
<rect x="695.72266" y="272" width="213" height="50.5" fill="white"/>
<rect x="695.72266" y="272" width="213" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_9">
<rect x="241" y="337.3711" width="303.5" height="50.5" fill="white"/>
<rect x="241" y="337.3711" width="303.5" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_8">
<rect x="556.2617" y="337.3711" width="352.46094" height="50.5" fill="white"/>
<rect x="556.2617" y="337.3711" width="352.46094" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_7">
<rect x="241" y="402.7422" width="667.72266" height="50.5" fill="white"/>
<rect x="241" y="402.7422" width="667.72266" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_6">
<line x1="211" y1="355.5" x2="947.4961" y2="355.5" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_5">
<text transform="translate(952.4961 346.776)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">branch point</tspan>
</text>
</g>
<g id="Line_4">
<line x1="212" y1="438.5182" x2="948.4961" y2="438.5182" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_3">
<text transform="translate(953.4961 429.7942)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">last branch point</tspan>
</text>
</g>
<g id="Graphic_13">
<rect x="241" y="272" width="127.99101" height="181.24219" fill="#3a8eed" fill-opacity=".5"/>
<text transform="translate(246 353.3971)" fill="white">
<tspan font-family="Helvetica Neue" font-size="16" fill="white" x="38.835502" y="15" xml:space="preserve">Job 1</tspan>
</text>
</g>
<g id="Graphic_57">
<rect x="359" y="647.96484" width="551.72266" height="50.5" fill="white"/>
<rect x="359" y="647.96484" width="551.72266" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_54">
<rect x="359" y="517.22266" width="96" height="50.5" fill="white"/>
<rect x="359" y="517.22266" width="96" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_53">
<rect x="469.72266" y="517.22266" width="213" height="50.5" fill="white"/>
<rect x="469.72266" y="517.22266" width="213" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_52">
<rect x="696.72266" y="517.22266" width="213" height="50.5" fill="white"/>
<rect x="696.72266" y="517.22266" width="213" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_51">
<rect x="359" y="582.59375" width="186.5" height="50.5" fill="white"/>
<rect x="359" y="582.59375" width="186.5" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_50">
<rect x="557.2617" y="582.59375" width="352.46094" height="50.5" fill="white"/>
<rect x="557.2617" y="582.59375" width="352.46094" height="50.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Line_49">
<line x1="212" y1="600.72266" x2="948.4961" y2="600.72266" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_48">
<text transform="translate(953.4961 591.99866)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">branch point</tspan>
</text>
</g>
<g id="Line_47">
<line x1="213" y1="683.74084" x2="949.4961" y2="683.74084" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_46">
<text transform="translate(954.4961 675.01685)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="0" y="15" xml:space="preserve">last branch point</tspan>
</text>
</g>
<g id="Graphic_63">
<rect x="376.72525" y="272" width="127.99101" height="181.24219" fill="#3a8eed" fill-opacity=".5"/>
<text transform="translate(381.72525 353.3971)" fill="white">
<tspan font-family="Helvetica Neue" font-size="16" fill="white" x="38.835502" y="15" xml:space="preserve">Job 2</tspan>
</text>
</g>
<g id="Graphic_64">
<rect x="511.39405" y="272" width="127.99101" height="181.24219" fill="#3a8eed" fill-opacity=".5"/>
<text transform="translate(516.39405 353.3971)" fill="white">
<tspan font-family="Helvetica Neue" font-size="16" fill="white" x="38.835502" y="15" xml:space="preserve">Job 3</tspan>
</text>
</g>
<g id="Graphic_65">
<rect x="646.06285" y="272" width="127.99101" height="181.24219" fill="#3a8eed" fill-opacity=".5"/>
<text transform="translate(651.06285 353.3971)" fill="white">
<tspan font-family="Helvetica Neue" font-size="16" fill="white" x="38.835502" y="15" xml:space="preserve">Job 4</tspan>
</text>
</g>
<g id="Graphic_66">
<rect x="780.73165" y="272" width="127.99101" height="181.24219" fill="#3a8eed" fill-opacity=".5"/>
<text transform="translate(785.73165 353.3971)" fill="white">
<tspan font-family="Helvetica Neue" font-size="16" fill="white" x="38.835502" y="15" xml:space="preserve">Job 5</tspan>
</text>
</g>
<g id="Graphic_56">
<rect x="243.5" y="517.22266" width="125.49101" height="181.24219" fill="#ccc"/>
<rect x="243.5" y="517.22266" width="125.49101" height="181.24219" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_55">
<rect x="243.5" y="673.46484" width="125.49101" height="17.5" fill="#6b7ca5"/>
<rect x="243.5" y="673.46484" width="125.49101" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_68">
<rect x="379.22525" y="517.22266" width="125.49101" height="181.24219" fill="#ccc"/>
<rect x="379.22525" y="517.22266" width="125.49101" height="181.24219" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_67">
<rect x="379.22525" y="673.46484" width="125.49101" height="17.5" fill="#6b7ca5"/>
<rect x="379.22525" y="673.46484" width="125.49101" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_70">
<rect x="514.22525" y="517.22266" width="125.49101" height="181.24219" fill="#ccc"/>
<rect x="514.22525" y="517.22266" width="125.49101" height="181.24219" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_69">
<rect x="514.22525" y="673.46484" width="125.49101" height="17.5" fill="#6b7ca5"/>
<rect x="514.22525" y="673.46484" width="125.49101" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_72">
<rect x="649.22525" y="517.22266" width="125.49101" height="181.24219" fill="#ccc"/>
<rect x="649.22525" y="517.22266" width="125.49101" height="181.24219" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_71">
<rect x="649.22525" y="673.46484" width="125.49101" height="17.5" fill="#6b7ca5"/>
<rect x="649.22525" y="673.46484" width="125.49101" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_74">
<rect x="785.23165" y="517.22266" width="125.49101" height="181.24219" fill="#ccc"/>
<rect x="785.23165" y="517.22266" width="125.49101" height="181.24219" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_73">
<rect x="785.23165" y="673.46484" width="125.49101" height="17.5" fill="#6b7ca5"/>
<rect x="785.23165" y="673.46484" width="125.49101" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
</g>
<g id="Graphic_78">
<rect x="241" y="731.3359" width="125.49101" height="27.26953" fill="#ccc"/>
<rect x="241" y="731.3359" width="125.49101" height="27.26953" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(246 735.7467)" fill="black">
<tspan font-family="Helvetica Neue" font-size="16" fill="black" x="17.297502" y="15" xml:space="preserve">Delta Layer</tspan>
</text>
</g>
<g id="Graphic_79">
<rect x="241" y="766.759" width="125.49101" height="17.5" fill="#6b7ca5"/>
<rect x="241" y="766.759" width="125.49101" height="17.5" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
<text transform="translate(246 766.285)" fill="white">
<tspan font-family="Helvetica Neue" font-size="16" fill="white" x="13.737502" y="15" xml:space="preserve">Image Layer</tspan>
</text>
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 11 KiB

View File

@@ -462,8 +462,6 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
if var(REAL_S3_ENV).is_ok() {
assert!(body.contains("remote_storage_s3_deleted_objects_total"));
}
#[cfg(target_os = "linux")]
assert!(body.contains("process_threads"));
}

View File

@@ -27,7 +27,6 @@ pub use prometheus::{
pub mod launch_timestamp;
mod wrappers;
pub use prometheus;
pub use wrappers::{CountedReader, CountedWriter};
mod hll;
pub use hll::{HyperLogLog, HyperLogLogState, HyperLogLogVec};

View File

@@ -1,13 +0,0 @@
[package]
name = "neon-shmem"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
thiserror.workspace = true
nix.workspace=true
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
[target.'cfg(target_os = "macos")'.dependencies]
tempfile = "3.14.0"

View File

@@ -1,418 +0,0 @@
//! Shared memory utilities for neon communicator
use std::num::NonZeroUsize;
use std::os::fd::{AsFd, BorrowedFd, OwnedFd};
use std::ptr::NonNull;
use std::sync::atomic::{AtomicUsize, Ordering};
use nix::errno::Errno;
use nix::sys::mman::MapFlags;
use nix::sys::mman::ProtFlags;
use nix::sys::mman::mmap as nix_mmap;
use nix::sys::mman::munmap as nix_munmap;
use nix::unistd::ftruncate as nix_ftruncate;
/// ShmemHandle represents a shared memory area that can be shared by processes over fork().
/// Unlike shared memory allocated by Postgres, this area is resizable, up to 'max_size' that's
/// specified at creation.
///
/// The area is backed by an anonymous file created with memfd_create(). The full address space for
/// 'max_size' is reserved up-front with mmap(), but whenever you call [`ShmemHandle::set_size`],
/// the underlying file is resized. Do not access the area beyond the current size. Currently, that
/// will cause the file to be expanded, but we might use mprotect() etc. to enforce that in the
/// future.
pub struct ShmemHandle {
/// memfd file descriptor
fd: OwnedFd,
max_size: usize,
// Pointer to the beginning of the shared memory area. The header is stored there.
shared_ptr: NonNull<SharedStruct>,
// Pointer to the beginning of the user data
pub data_ptr: NonNull<u8>,
}
/// This is stored at the beginning in the shared memory area.
struct SharedStruct {
max_size: usize,
/// Current size of the backing file. The high-order bit is used for the RESIZE_IN_PROGRESS flag
current_size: AtomicUsize,
}
const RESIZE_IN_PROGRESS: usize = 1 << 63;
const HEADER_SIZE: usize = std::mem::size_of::<SharedStruct>();
/// Error type returned by the ShmemHandle functions.
#[derive(thiserror::Error, Debug)]
#[error("{msg}: {errno}")]
pub struct Error {
pub msg: String,
pub errno: Errno,
}
impl Error {
fn new(msg: &str, errno: Errno) -> Error {
Error {
msg: msg.to_string(),
errno,
}
}
}
impl ShmemHandle {
/// Create a new shared memory area. To communicate between processes, the processes need to be
/// fork()'d after calling this, so that the ShmemHandle is inherited by all processes.
///
/// If the ShmemHandle is dropped, the memory is unmapped from the current process. Other
/// processes can continue using it, however.
pub fn new(name: &str, initial_size: usize, max_size: usize) -> Result<ShmemHandle, Error> {
// create the backing anonymous file.
let fd = create_backing_file(name)?;
Self::new_with_fd(fd, initial_size, max_size)
}
fn new_with_fd(
fd: OwnedFd,
initial_size: usize,
max_size: usize,
) -> Result<ShmemHandle, Error> {
// We reserve the high-order bit for the RESIZE_IN_PROGRESS flag, and the actual size
// is a little larger than this because of the SharedStruct header. Make the upper limit
// somewhat smaller than that, because with anything close to that, you'll run out of
// memory anyway.
if max_size >= 1 << 48 {
panic!("max size {} too large", max_size);
}
if initial_size > max_size {
panic!("initial size {initial_size} larger than max size {max_size}");
}
// The actual initial / max size is the one given by the caller, plus the size of
// 'SharedStruct'.
let initial_size = HEADER_SIZE + initial_size;
let max_size = NonZeroUsize::new(HEADER_SIZE + max_size).unwrap();
// Reserve address space for it with mmap
//
// TODO: Use MAP_HUGETLB if possible
let start_ptr = unsafe {
nix_mmap(
None,
max_size,
ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
MapFlags::MAP_SHARED,
&fd,
0,
)
}
.map_err(|e| Error::new("mmap failed: {e}", e))?;
// Reserve space for the initial size
enlarge_file(fd.as_fd(), initial_size as u64)?;
// Initialize the header
let shared: NonNull<SharedStruct> = start_ptr.cast();
unsafe {
shared.write(SharedStruct {
max_size: max_size.into(),
current_size: AtomicUsize::new(initial_size),
})
};
// The user data begins after the header
let data_ptr = unsafe { start_ptr.cast().add(HEADER_SIZE) };
Ok(ShmemHandle {
fd,
max_size: max_size.into(),
shared_ptr: shared,
data_ptr,
})
}
// return reference to the header
fn shared(&self) -> &SharedStruct {
unsafe { self.shared_ptr.as_ref() }
}
/// Resize the shared memory area. 'new_size' must not be larger than the 'max_size' specified
/// when creating the area.
///
/// This may only be called from one process/thread concurrently. We detect that case
/// and return an Error.
pub fn set_size(&self, new_size: usize) -> Result<(), Error> {
let new_size = new_size + HEADER_SIZE;
let shared = self.shared();
if new_size > self.max_size {
panic!(
"new size ({} is greater than max size ({})",
new_size, self.max_size
);
}
assert_eq!(self.max_size, shared.max_size);
// Lock the area by setting the bit in 'current_size'
//
// Ordering::Relaxed would probably be sufficient here, as we don't access any other memory
// and the posix_fallocate/ftruncate call is surely a synchronization point anyway. But
// since this is not performance-critical, better safe than sorry .
let mut old_size = shared.current_size.load(Ordering::Acquire);
loop {
if (old_size & RESIZE_IN_PROGRESS) != 0 {
return Err(Error::new(
"concurrent resize detected",
Errno::UnknownErrno,
));
}
match shared.current_size.compare_exchange(
old_size,
new_size,
Ordering::Acquire,
Ordering::Relaxed,
) {
Ok(_) => break,
Err(x) => old_size = x,
}
}
// Ok, we got the lock.
//
// NB: If anything goes wrong, we *must* clear the bit!
let result = {
use std::cmp::Ordering::{Equal, Greater, Less};
match new_size.cmp(&old_size) {
Less => nix_ftruncate(&self.fd, new_size as i64).map_err(|e| {
Error::new("could not shrink shmem segment, ftruncate failed: {e}", e)
}),
Equal => Ok(()),
Greater => enlarge_file(self.fd.as_fd(), new_size as u64),
}
};
// Unlock
shared.current_size.store(
if result.is_ok() { new_size } else { old_size },
Ordering::Release,
);
result
}
/// Returns the current user-visible size of the shared memory segment.
///
/// NOTE: a concurrent set_size() call can change the size at any time. It is the caller's
/// responsibility not to access the area beyond the current size.
pub fn current_size(&self) -> usize {
let total_current_size =
self.shared().current_size.load(Ordering::Relaxed) & !RESIZE_IN_PROGRESS;
total_current_size - HEADER_SIZE
}
}
impl Drop for ShmemHandle {
fn drop(&mut self) {
// SAFETY: The pointer was obtained from mmap() with the given size.
// We unmap the entire region.
let _ = unsafe { nix_munmap(self.shared_ptr.cast(), self.max_size) };
// The fd is dropped automatically by OwnedFd.
}
}
/// Create a "backing file" for the shared memory area. On Linux, use memfd_create(), to create an
/// anonymous in-memory file. One macos, fall back to a regular file. That's good enough for
/// development and testing, but in production we want the file to stay in memory.
///
/// disable 'unused_variables' warnings, because in the macos path, 'name' is unused.
#[allow(unused_variables)]
fn create_backing_file(name: &str) -> Result<OwnedFd, Error> {
#[cfg(not(target_os = "macos"))]
{
nix::sys::memfd::memfd_create(name, nix::sys::memfd::MFdFlags::empty())
.map_err(|e| Error::new("memfd_create failed: {e}", e))
}
#[cfg(target_os = "macos")]
{
let file = tempfile::tempfile().map_err(|e| {
Error::new(
"could not create temporary file to back shmem area: {e}",
nix::errno::Errno::from_raw(e.raw_os_error().unwrap_or(0)),
)
})?;
Ok(OwnedFd::from(file))
}
}
fn enlarge_file(fd: BorrowedFd, size: u64) -> Result<(), Error> {
// Use posix_fallocate() to enlarge the file. It reserves the space correctly, so that
// we don't get a segfault later when trying to actually use it.
#[cfg(not(target_os = "macos"))]
{
nix::fcntl::posix_fallocate(fd, 0, size as i64).map_err(|e| {
Error::new(
"could not grow shmem segment, posix_fallocate failed: {e}",
e,
)
})
}
// As a fallback on macos, which doesn't have posix_fallocate, use plain 'fallocate'
#[cfg(target_os = "macos")]
{
nix::unistd::ftruncate(fd, size as i64)
.map_err(|e| Error::new("could not grow shmem segment, ftruncate failed: {e}", e))
}
}
#[cfg(test)]
mod tests {
use super::*;
use nix::unistd::ForkResult;
use std::ops::Range;
/// check that all bytes in given range have the expected value.
fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {
for i in range {
let b = unsafe { *(ptr.add(i)) };
assert_eq!(expected, b, "unexpected byte at offset {}", i);
}
}
/// Write 'b' to all bytes in the given range
fn write_range(ptr: *mut u8, b: u8, range: Range<usize>) {
unsafe { std::ptr::write_bytes(ptr.add(range.start), b, range.end - range.start) };
}
// simple single-process test of growing and shrinking
#[test]
fn test_shmem_resize() -> Result<(), Error> {
let max_size = 1024 * 1024;
let init_struct = ShmemHandle::new("test_shmem_resize", 0, max_size)?;
assert_eq!(init_struct.current_size(), 0);
// Initial grow
let size1 = 10000;
init_struct.set_size(size1).unwrap();
assert_eq!(init_struct.current_size(), size1);
// Write some data
let data_ptr = init_struct.data_ptr.as_ptr();
write_range(data_ptr, 0xAA, 0..size1);
assert_range(data_ptr, 0xAA, 0..size1);
// Shrink
let size2 = 5000;
init_struct.set_size(size2).unwrap();
assert_eq!(init_struct.current_size(), size2);
// Grow again
let size3 = 20000;
init_struct.set_size(size3).unwrap();
assert_eq!(init_struct.current_size(), size3);
// Try to read it. The area that was shrunk and grown again should read as all zeros now
assert_range(data_ptr, 0xAA, 0..5000);
assert_range(data_ptr, 0, 5000..size1);
// Try to grow beyond max_size
//let size4 = max_size + 1;
//assert!(init_struct.set_size(size4).is_err());
// Dropping init_struct should unmap the memory
drop(init_struct);
Ok(())
}
/// This is used in tests to coordinate between test processes. It's like std::sync::Barrier,
/// but is stored in the shared memory area and works across processes. It's implemented by
/// polling, because e.g. standard rust mutexes are not guaranteed to work across processes.
struct SimpleBarrier {
num_procs: usize,
count: AtomicUsize,
}
impl SimpleBarrier {
unsafe fn init(ptr: *mut SimpleBarrier, num_procs: usize) {
unsafe {
*ptr = SimpleBarrier {
num_procs,
count: AtomicUsize::new(0),
}
}
}
pub fn wait(&self) {
let old = self.count.fetch_add(1, Ordering::Relaxed);
let generation = old / self.num_procs;
let mut current = old + 1;
while current < (generation + 1) * self.num_procs {
std::thread::sleep(std::time::Duration::from_millis(10));
current = self.count.load(Ordering::Relaxed);
}
}
}
#[test]
fn test_multi_process() {
// Initialize
let max_size = 1_000_000_000_000;
let init_struct = ShmemHandle::new("test_multi_process", 0, max_size).unwrap();
let ptr = init_struct.data_ptr.as_ptr();
// Store the SimpleBarrier in the first 1k of the area.
init_struct.set_size(10000).unwrap();
let barrier_ptr: *mut SimpleBarrier = unsafe {
ptr.add(ptr.align_offset(std::mem::align_of::<SimpleBarrier>()))
.cast()
};
unsafe { SimpleBarrier::init(barrier_ptr, 2) };
let barrier = unsafe { barrier_ptr.as_ref().unwrap() };
// Fork another test process. The code after this runs in both processes concurrently.
let fork_result = unsafe { nix::unistd::fork().unwrap() };
// In the parent, fill bytes between 1000..2000. In the child, between 2000..3000
if fork_result.is_parent() {
write_range(ptr, 0xAA, 1000..2000);
} else {
write_range(ptr, 0xBB, 2000..3000);
}
barrier.wait();
// Verify the contents. (in both processes)
assert_range(ptr, 0xAA, 1000..2000);
assert_range(ptr, 0xBB, 2000..3000);
// Grow, from the child this time
let size = 10_000_000;
if !fork_result.is_parent() {
init_struct.set_size(size).unwrap();
}
barrier.wait();
// make some writes at the end
if fork_result.is_parent() {
write_range(ptr, 0xAA, (size - 10)..size);
} else {
write_range(ptr, 0xBB, (size - 20)..(size - 10));
}
barrier.wait();
// Verify the contents. (This runs in both processes)
assert_range(ptr, 0, (size - 1000)..(size - 20));
assert_range(ptr, 0xBB, (size - 20)..(size - 10));
assert_range(ptr, 0xAA, (size - 10)..size);
if let ForkResult::Parent { child } = fork_result {
nix::sys::wait::waitpid(child, None).unwrap();
}
}
}

View File

@@ -8,8 +8,6 @@ pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
// TODO: gRPC is disabled by default for now, but the port is used in neon_local.
pub const DEFAULT_GRPC_LISTEN_PORT: u16 = 51051; // storage-broker already uses 50051
use std::collections::HashMap;
use std::num::{NonZeroU64, NonZeroUsize};
@@ -45,21 +43,6 @@ pub struct NodeMetadata {
pub other: HashMap<String, serde_json::Value>,
}
/// PostHog integration config.
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct PostHogConfig {
/// PostHog project ID
pub project_id: String,
/// Server-side (private) API key
pub server_api_key: String,
/// Client-side (public) API key
pub client_api_key: String,
/// Private API URL
pub private_api_url: String,
/// Public API URL
pub public_api_url: String,
}
/// `pageserver.toml`
///
/// We use serde derive with `#[serde(default)]` to generate a deserializer
@@ -121,7 +104,6 @@ pub struct ConfigToml {
pub listen_pg_addr: String,
pub listen_http_addr: String,
pub listen_https_addr: Option<String>,
pub listen_grpc_addr: Option<String>,
pub ssl_key_file: Utf8PathBuf,
pub ssl_cert_file: Utf8PathBuf,
#[serde(with = "humantime_serde")]
@@ -141,7 +123,6 @@ pub struct ConfigToml {
pub http_auth_type: AuthType,
#[serde_as(as = "serde_with::DisplayFromStr")]
pub pg_auth_type: AuthType,
pub grpc_auth_type: AuthType,
pub auth_validation_public_key_path: Option<Utf8PathBuf>,
pub remote_storage: Option<RemoteStorageConfig>,
pub tenant_config: TenantConfigToml,
@@ -201,11 +182,7 @@ pub struct ConfigToml {
pub tracing: Option<Tracing>,
pub enable_tls_page_service_api: bool,
pub dev_mode: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub posthog_config: Option<PostHogConfig>,
pub timeline_import_config: TimelineImportConfig,
#[serde(skip_serializing_if = "Option::is_none")]
pub basebackup_cache_config: Option<BasebackupCacheConfig>,
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -258,7 +235,7 @@ pub enum PageServiceProtocolPipelinedBatchingStrategy {
ScatteredLsn,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(tag = "mode", rename_all = "kebab-case")]
pub enum GetVectoredConcurrentIo {
/// The read path is fully sequential: layers are visited
@@ -328,27 +305,6 @@ impl From<OtelExporterProtocol> for tracing_utils::Protocol {
pub struct TimelineImportConfig {
pub import_job_concurrency: NonZeroUsize,
pub import_job_soft_size_limit: NonZeroUsize,
pub import_job_checkpoint_threshold: NonZeroUsize,
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(default)]
pub struct BasebackupCacheConfig {
#[serde(with = "humantime_serde")]
pub cleanup_period: Duration,
// FIXME: Support max_size_bytes.
// pub max_size_bytes: usize,
pub max_size_entries: i64,
}
impl Default for BasebackupCacheConfig {
fn default() -> Self {
Self {
cleanup_period: Duration::from_secs(60),
// max_size_bytes: 1024 * 1024 * 1024, // 1 GiB
max_size_entries: 1000,
}
}
}
pub mod statvfs {
@@ -534,14 +490,6 @@ pub struct TenantConfigToml {
/// Tenant level performance sampling ratio override. Controls the ratio of get page requests
/// that will get perf sampling for the tenant.
pub sampling_ratio: Option<Ratio>,
/// Capacity of relsize snapshot cache (used by replicas).
pub relsize_snapshot_cache_capacity: usize,
/// Enable preparing basebackup on XLOG_CHECKPOINT_SHUTDOWN and using it in basebackup requests.
// FIXME: Remove skip_serializing_if when the feature is stable.
#[serde(skip_serializing_if = "std::ops::Not::not")]
pub basebackup_cache_enabled: bool,
}
pub mod defaults {
@@ -609,7 +557,6 @@ impl Default for ConfigToml {
listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),
listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),
listen_https_addr: (None),
listen_grpc_addr: None, // TODO: default to 127.0.0.1:51051
ssl_key_file: Utf8PathBuf::from(DEFAULT_SSL_KEY_FILE),
ssl_cert_file: Utf8PathBuf::from(DEFAULT_SSL_CERT_FILE),
ssl_cert_reload_period: Duration::from_secs(60),
@@ -626,7 +573,6 @@ impl Default for ConfigToml {
pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir()
http_auth_type: (AuthType::Trust),
pg_auth_type: (AuthType::Trust),
grpc_auth_type: (AuthType::Trust),
auth_validation_public_key_path: (None),
remote_storage: None,
broker_endpoint: (storage_broker::DEFAULT_ENDPOINT
@@ -693,15 +639,23 @@ impl Default for ConfigToml {
tenant_config: TenantConfigToml::default(),
no_sync: None,
wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL,
page_service_pipelining: PageServicePipeliningConfig::Pipelined(
PageServicePipeliningConfigPipelined {
page_service_pipelining: if !cfg!(test) {
PageServicePipeliningConfig::Serial
} else {
// Do not turn this into the default until scattered reads have been
// validated and rolled-out fully.
PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
max_batch_size: NonZeroUsize::new(32).unwrap(),
execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
batching: PageServiceProtocolPipelinedBatchingStrategy::ScatteredLsn,
},
),
get_vectored_concurrent_io: GetVectoredConcurrentIo::SidecarTask,
enable_read_path_debugging: if cfg!(feature = "testing") {
})
},
get_vectored_concurrent_io: if !cfg!(test) {
GetVectoredConcurrentIo::Sequential
} else {
GetVectoredConcurrentIo::SidecarTask
},
enable_read_path_debugging: if cfg!(test) || cfg!(feature = "testing") {
Some(true)
} else {
None
@@ -715,10 +669,7 @@ impl Default for ConfigToml {
timeline_import_config: TimelineImportConfig {
import_job_concurrency: NonZeroUsize::new(128).unwrap(),
import_job_soft_size_limit: NonZeroUsize::new(1024 * 1024 * 1024).unwrap(),
import_job_checkpoint_threshold: NonZeroUsize::new(128).unwrap(),
},
basebackup_cache_config: None,
posthog_config: None,
}
}
}
@@ -785,7 +736,6 @@ pub mod tenant_conf_defaults {
pub const DEFAULT_GC_COMPACTION_VERIFICATION: bool = true;
pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
pub const DEFAULT_RELSIZE_SNAPSHOT_CACHE_CAPACITY: usize = 1000;
}
impl Default for TenantConfigToml {
@@ -843,8 +793,6 @@ impl Default for TenantConfigToml {
gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
sampling_ratio: None,
relsize_snapshot_cache_capacity: DEFAULT_RELSIZE_SNAPSHOT_CACHE_CAPACITY,
basebackup_cache_enabled: false,
}
}
}

View File

@@ -561,6 +561,21 @@ pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
}
}
#[inline(always)]
pub fn key_to_rel_tag(key: Key) -> RelTag {
RelTag {
spcnode: key.field2,
dbnode: key.field3,
relnode: key.field4,
forknum: key.field5,
}
}
#[inline(always)]
pub fn key_to_blknum(key: Key) -> BlockNumber {
key.field6
}
#[inline(always)]
pub fn rel_size_to_key(rel: RelTag) -> Key {
Key {
@@ -910,11 +925,6 @@ impl Key {
self.field1 == 0x00 && self.field4 != 0 && self.field6 != 0xffffffff
}
#[inline(always)]
pub fn is_rel_block_of_rel(&self, rel: Oid) -> bool {
self.is_rel_block_key() && self.field4 == rel
}
#[inline(always)]
pub fn is_rel_dir_key(&self) -> bool {
self.field1 == 0x00

View File

@@ -336,33 +336,14 @@ impl TimelineCreateRequest {
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub enum ShardImportStatus {
InProgress(Option<ShardImportProgress>),
InProgress,
Done,
Error(String),
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub enum ShardImportProgress {
V1(ShardImportProgressV1),
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct ShardImportProgressV1 {
/// Total number of jobs in the import plan
pub jobs: usize,
/// Number of jobs completed
pub completed: usize,
/// Hash of the plan
pub import_plan_hash: u64,
/// Soft limit for the job size
/// This needs to remain constant throughout the import
pub job_soft_size_limit: usize,
}
impl ShardImportStatus {
pub fn is_terminal(&self) -> bool {
match self {
ShardImportStatus::InProgress(_) => false,
ShardImportStatus::InProgress => false,
ShardImportStatus::Done | ShardImportStatus::Error(_) => true,
}
}
@@ -405,8 +386,6 @@ pub enum TimelineCreateRequestMode {
// using a flattened enum, so, it was an accepted field, and
// we continue to accept it by having it here.
pg_version: Option<u32>,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
read_only: bool,
},
ImportPgdata {
import_pgdata: TimelineCreateRequestModeImportPgdata,
@@ -635,10 +614,6 @@ pub struct TenantConfigPatch {
pub gc_compaction_ratio_percent: FieldPatch<u64>,
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
pub sampling_ratio: FieldPatch<Option<Ratio>>,
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
pub relsize_snapshot_cache_capacity: FieldPatch<usize>,
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
pub basebackup_cache_enabled: FieldPatch<bool>,
}
/// Like [`crate::config::TenantConfigToml`], but preserves the information
@@ -768,12 +743,6 @@ pub struct TenantConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub sampling_ratio: Option<Option<Ratio>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub relsize_snapshot_cache_capacity: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub basebackup_cache_enabled: Option<bool>,
}
impl TenantConfig {
@@ -819,8 +788,6 @@ impl TenantConfig {
mut gc_compaction_initial_threshold_kb,
mut gc_compaction_ratio_percent,
mut sampling_ratio,
mut relsize_snapshot_cache_capacity,
mut basebackup_cache_enabled,
} = self;
patch.checkpoint_distance.apply(&mut checkpoint_distance);
@@ -922,12 +889,6 @@ impl TenantConfig {
.gc_compaction_ratio_percent
.apply(&mut gc_compaction_ratio_percent);
patch.sampling_ratio.apply(&mut sampling_ratio);
patch
.relsize_snapshot_cache_capacity
.apply(&mut relsize_snapshot_cache_capacity);
patch
.basebackup_cache_enabled
.apply(&mut basebackup_cache_enabled);
Ok(Self {
checkpoint_distance,
@@ -967,8 +928,6 @@ impl TenantConfig {
gc_compaction_initial_threshold_kb,
gc_compaction_ratio_percent,
sampling_ratio,
relsize_snapshot_cache_capacity,
basebackup_cache_enabled,
})
}
@@ -1077,12 +1036,6 @@ impl TenantConfig {
.gc_compaction_ratio_percent
.unwrap_or(global_conf.gc_compaction_ratio_percent),
sampling_ratio: self.sampling_ratio.unwrap_or(global_conf.sampling_ratio),
relsize_snapshot_cache_capacity: self
.relsize_snapshot_cache_capacity
.unwrap_or(global_conf.relsize_snapshot_cache_capacity),
basebackup_cache_enabled: self
.basebackup_cache_enabled
.unwrap_or(global_conf.basebackup_cache_enabled),
}
}
}
@@ -1850,6 +1803,7 @@ pub struct TopTenantShardsResponse {
}
pub mod virtual_file {
use std::sync::LazyLock;
#[derive(
Copy,
@@ -1878,7 +1832,6 @@ pub mod virtual_file {
Eq,
Hash,
strum_macros::EnumString,
strum_macros::EnumIter,
strum_macros::Display,
serde_with::DeserializeFromStr,
serde_with::SerializeDisplay,
@@ -1890,14 +1843,37 @@ pub mod virtual_file {
/// Uses buffered IO.
Buffered,
/// Uses direct IO for reads only.
#[cfg(target_os = "linux")]
Direct,
/// Use direct IO for reads and writes.
#[cfg(target_os = "linux")]
DirectRw,
}
impl IoMode {
pub fn preferred() -> Self {
IoMode::DirectRw
// The default behavior when running Rust unit tests without any further
// flags is to use the newest behavior (DirectRw).
// The CI uses the following environment variable to unit tests for all
// different modes.
// NB: the Python regression & perf tests have their own defaults management
// that writes pageserver.toml; they do not use this variable.
if cfg!(test) {
static CACHED: LazyLock<IoMode> = LazyLock::new(|| {
utils::env::var_serde_json_string(
"NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE",
)
.unwrap_or(
#[cfg(target_os = "linux")]
IoMode::DirectRw,
#[cfg(not(target_os = "linux"))]
IoMode::Buffered,
)
});
*CACHED
} else {
IoMode::Buffered
}
}
}
@@ -1907,7 +1883,9 @@ pub mod virtual_file {
fn try_from(value: u8) -> Result<Self, Self::Error> {
Ok(match value {
v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
#[cfg(target_os = "linux")]
v if v == (IoMode::Direct as u8) => IoMode::Direct,
#[cfg(target_os = "linux")]
v if v == (IoMode::DirectRw as u8) => IoMode::DirectRw,
x => return Err(x),
})

View File

@@ -4,7 +4,6 @@
//! See docs/rfcs/025-generation-numbers.md
use serde::{Deserialize, Serialize};
use utils::generation::Generation;
use utils::id::{NodeId, TimelineId};
use crate::controller_api::NodeRegisterRequest;
@@ -64,17 +63,9 @@ pub struct ValidateResponseTenant {
pub valid: bool,
}
#[derive(Serialize, Deserialize)]
pub struct TimelineImportStatusRequest {
pub tenant_shard_id: TenantShardId,
pub timeline_id: TimelineId,
pub generation: Generation,
}
#[derive(Serialize, Deserialize)]
pub struct PutTimelineImportStatusRequest {
pub tenant_shard_id: TenantShardId,
pub timeline_id: TimelineId,
pub status: ShardImportStatus,
pub generation: Generation,
}

View File

@@ -36,24 +36,6 @@ impl Value {
Value::WalRecord(rec) => rec.will_init(),
}
}
#[inline(always)]
pub fn estimated_size(&self) -> usize {
match self {
Value::Image(image) => image.len(),
Value::WalRecord(NeonWalRecord::AuxFile {
content: Some(content),
..
}) => content.len(),
Value::WalRecord(NeonWalRecord::Postgres { rec, .. }) => rec.len(),
Value::WalRecord(NeonWalRecord::ClogSetAborted { xids }) => xids.len() * 4,
Value::WalRecord(NeonWalRecord::ClogSetCommitted { xids, .. }) => xids.len() * 4,
Value::WalRecord(NeonWalRecord::MultixactMembersCreate { members, .. }) => {
members.len() * 8
}
_ => 8192, /* use image size as the estimation */
}
}
}
#[derive(Debug, PartialEq)]

View File

@@ -1,19 +0,0 @@
[package]
name = "posthog_client_lite"
version = "0.1.0"
edition = "2024"
license.workspace = true
[dependencies]
anyhow.workspace = true
arc-swap.workspace = true
reqwest.workspace = true
serde_json.workspace = true
serde.workspace = true
sha2.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
tokio-util.workspace = true
tracing-utils.workspace = true
tracing.workspace = true
workspace_hack.workspace = true

View File

@@ -1,64 +0,0 @@
//! A background loop that fetches feature flags from PostHog and updates the feature store.
use std::{sync::Arc, time::Duration};
use arc_swap::ArcSwap;
use tokio_util::sync::CancellationToken;
use tracing::{Instrument, info_span};
use crate::{FeatureStore, PostHogClient, PostHogClientConfig};
/// A background loop that fetches feature flags from PostHog and updates the feature store.
pub struct FeatureResolverBackgroundLoop {
posthog_client: PostHogClient,
feature_store: ArcSwap<FeatureStore>,
cancel: CancellationToken,
}
impl FeatureResolverBackgroundLoop {
pub fn new(config: PostHogClientConfig, shutdown_pageserver: CancellationToken) -> Self {
Self {
posthog_client: PostHogClient::new(config),
feature_store: ArcSwap::new(Arc::new(FeatureStore::new())),
cancel: shutdown_pageserver,
}
}
pub fn spawn(self: Arc<Self>, handle: &tokio::runtime::Handle, refresh_period: Duration) {
let this = self.clone();
let cancel = self.cancel.clone();
handle.spawn(
async move {
tracing::info!("Starting PostHog feature resolver");
let mut ticker = tokio::time::interval(refresh_period);
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
loop {
tokio::select! {
_ = ticker.tick() => {}
_ = cancel.cancelled() => break
}
let resp = match this
.posthog_client
.get_feature_flags_local_evaluation()
.await
{
Ok(resp) => resp,
Err(e) => {
tracing::warn!("Cannot get feature flags: {}", e);
continue;
}
};
let feature_store = FeatureStore::new_with_flags(resp.flags);
this.feature_store.store(Arc::new(feature_store));
tracing::info!("Feature flag updated");
}
tracing::info!("PostHog feature resolver stopped");
}
.instrument(info_span!("posthog_feature_resolver")),
);
}
pub fn feature_store(&self) -> Arc<FeatureStore> {
self.feature_store.load_full()
}
}

View File

@@ -1,940 +0,0 @@
//! A lite version of the PostHog client that only supports local evaluation of feature flags.
mod background_loop;
pub use background_loop::FeatureResolverBackgroundLoop;
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use serde_json::json;
use sha2::Digest;
#[derive(Debug, thiserror::Error)]
pub enum PostHogEvaluationError {
/// The feature flag is not available, for example, because the local evaluation data is not populated yet.
#[error("Feature flag not available: {0}")]
NotAvailable(String),
#[error("No condition group is matched")]
NoConditionGroupMatched,
/// Real errors, e.g., the rollout percentage does not add up to 100.
#[error("Failed to evaluate feature flag: {0}")]
Internal(String),
}
#[derive(Deserialize)]
pub struct LocalEvaluationResponse {
pub flags: Vec<LocalEvaluationFlag>,
}
#[derive(Deserialize)]
pub struct LocalEvaluationFlag {
key: String,
filters: LocalEvaluationFlagFilters,
active: bool,
}
#[derive(Deserialize)]
pub struct LocalEvaluationFlagFilters {
groups: Vec<LocalEvaluationFlagFilterGroup>,
multivariate: Option<LocalEvaluationFlagMultivariate>,
}
#[derive(Deserialize)]
pub struct LocalEvaluationFlagFilterGroup {
variant: Option<String>,
properties: Option<Vec<LocalEvaluationFlagFilterProperty>>,
rollout_percentage: i64,
}
#[derive(Deserialize)]
pub struct LocalEvaluationFlagFilterProperty {
key: String,
value: PostHogFlagFilterPropertyValue,
operator: String,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
pub enum PostHogFlagFilterPropertyValue {
String(String),
Number(f64),
Boolean(bool),
List(Vec<String>),
}
#[derive(Deserialize)]
pub struct LocalEvaluationFlagMultivariate {
variants: Vec<LocalEvaluationFlagMultivariateVariant>,
}
#[derive(Deserialize)]
pub struct LocalEvaluationFlagMultivariateVariant {
key: String,
rollout_percentage: i64,
}
pub struct FeatureStore {
flags: HashMap<String, LocalEvaluationFlag>,
}
impl Default for FeatureStore {
fn default() -> Self {
Self::new()
}
}
enum GroupEvaluationResult {
MatchedAndOverride(String),
MatchedAndEvaluate,
Unmatched,
}
impl FeatureStore {
pub fn new() -> Self {
Self {
flags: HashMap::new(),
}
}
pub fn new_with_flags(flags: Vec<LocalEvaluationFlag>) -> Self {
let mut store = Self::new();
store.set_flags(flags);
store
}
pub fn set_flags(&mut self, flags: Vec<LocalEvaluationFlag>) {
self.flags.clear();
for flag in flags {
self.flags.insert(flag.key.clone(), flag);
}
}
/// Generate a consistent hash for a user ID (e.g., tenant ID).
///
/// The implementation is different from PostHog SDK. In PostHog SDK, it is sha1 of `user_id.distinct_id.salt`.
/// However, as we do not upload all of our tenant IDs to PostHog, we do not have the PostHog distinct_id for a
/// tenant. Therefore, the way we compute it is sha256 of `user_id.feature_id.salt`.
fn consistent_hash(user_id: &str, flag_key: &str, salt: &str) -> f64 {
let mut hasher = sha2::Sha256::new();
hasher.update(user_id);
hasher.update(".");
hasher.update(flag_key);
hasher.update(".");
hasher.update(salt);
let hash = hasher.finalize();
let hash_int = u64::from_le_bytes(hash[..8].try_into().unwrap());
hash_int as f64 / u64::MAX as f64
}
/// Evaluate a condition. Returns an error if the condition cannot be evaluated due to parsing error or missing
/// property.
fn evaluate_condition(
&self,
operator: &str,
provided: &PostHogFlagFilterPropertyValue,
requested: &PostHogFlagFilterPropertyValue,
) -> Result<bool, PostHogEvaluationError> {
match operator {
"exact" => {
let PostHogFlagFilterPropertyValue::String(provided) = provided else {
// Left should be a string
return Err(PostHogEvaluationError::Internal(format!(
"The left side of the condition is not a string: {:?}",
provided
)));
};
let PostHogFlagFilterPropertyValue::List(requested) = requested else {
// Right should be a list of string
return Err(PostHogEvaluationError::Internal(format!(
"The right side of the condition is not a list: {:?}",
requested
)));
};
Ok(requested.contains(provided))
}
"lt" | "gt" => {
let PostHogFlagFilterPropertyValue::String(requested) = requested else {
// Right should be a string
return Err(PostHogEvaluationError::Internal(format!(
"The right side of the condition is not a string: {:?}",
requested
)));
};
let Ok(requested) = requested.parse::<f64>() else {
return Err(PostHogEvaluationError::Internal(format!(
"Can not parse the right side of the condition as a number: {:?}",
requested
)));
};
// Left can either be a number or a string
let provided = match provided {
PostHogFlagFilterPropertyValue::Number(provided) => *provided,
PostHogFlagFilterPropertyValue::String(provided) => {
let Ok(provided) = provided.parse::<f64>() else {
return Err(PostHogEvaluationError::Internal(format!(
"Can not parse the left side of the condition as a number: {:?}",
provided
)));
};
provided
}
_ => {
return Err(PostHogEvaluationError::Internal(format!(
"The left side of the condition is not a number or a string: {:?}",
provided
)));
}
};
match operator {
"lt" => Ok(provided < requested),
"gt" => Ok(provided > requested),
op => Err(PostHogEvaluationError::Internal(format!(
"Unsupported operator: {}",
op
))),
}
}
_ => Err(PostHogEvaluationError::Internal(format!(
"Unsupported operator: {}",
operator
))),
}
}
/// Evaluate a percentage.
fn evaluate_percentage(&self, mapped_user_id: f64, percentage: i64) -> bool {
mapped_user_id <= percentage as f64 / 100.0
}
/// Evaluate a filter group for a feature flag. Returns an error if there are errors during the evaluation.
///
/// Return values:
/// Ok(GroupEvaluationResult::MatchedAndOverride(variant)): matched and evaluated to this value
/// Ok(GroupEvaluationResult::MatchedAndEvaluate): condition matched but no variant override, use the global rollout percentage
/// Ok(GroupEvaluationResult::Unmatched): condition unmatched
fn evaluate_group(
&self,
group: &LocalEvaluationFlagFilterGroup,
hash_on_group_rollout_percentage: f64,
provided_properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
) -> Result<GroupEvaluationResult, PostHogEvaluationError> {
if let Some(ref properties) = group.properties {
for property in properties {
if let Some(value) = provided_properties.get(&property.key) {
// The user provided the property value
if !self.evaluate_condition(
property.operator.as_ref(),
value,
&property.value,
)? {
return Ok(GroupEvaluationResult::Unmatched);
}
} else {
// We cannot evaluate, the property is not available
return Err(PostHogEvaluationError::NotAvailable(format!(
"The required property in the condition is not available: {}",
property.key
)));
}
}
}
// The group has no condition matchers or we matched the properties
if self.evaluate_percentage(hash_on_group_rollout_percentage, group.rollout_percentage) {
if let Some(ref variant_override) = group.variant {
Ok(GroupEvaluationResult::MatchedAndOverride(
variant_override.clone(),
))
} else {
Ok(GroupEvaluationResult::MatchedAndEvaluate)
}
} else {
Ok(GroupEvaluationResult::Unmatched)
}
}
/// Evaluate a multivariate feature flag. Returns an error if the flag is not available or if there are errors
/// during the evaluation.
///
/// The parsing logic is as follows:
///
/// * Match each filter group.
/// - If a group is matched, it will first determine whether the user is in the range of the group's rollout
/// percentage. We will generate a consistent hash for the user ID on the group rollout percentage. This hash
/// is shared across all groups.
/// - If the hash falls within the group's rollout percentage, return the variant if it's overridden, or
/// - Evaluate the variant using the global config and the global rollout percentage.
/// * Otherwise, continue with the next group until all groups are evaluated and no group is within the
/// rollout percentage.
/// * If there are no matching groups, return an error.
///
/// Example: we have a multivariate flag with 3 groups of the configured global rollout percentage: A (10%), B (20%), C (70%).
/// There is a single group with a condition that has a rollout percentage of 10% and it does not have a variant override.
/// Then, we will have 1% of the users evaluated to A, 2% to B, and 7% to C.
///
/// Error handling: the caller should inspect the error and decide the behavior when a feature flag
/// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
/// propagated beyond where the feature flag gets resolved.
pub fn evaluate_multivariate(
&self,
flag_key: &str,
user_id: &str,
properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
) -> Result<String, PostHogEvaluationError> {
let hash_on_global_rollout_percentage =
Self::consistent_hash(user_id, flag_key, "multivariate");
let hash_on_group_rollout_percentage =
Self::consistent_hash(user_id, flag_key, "within_group");
self.evaluate_multivariate_inner(
flag_key,
hash_on_global_rollout_percentage,
hash_on_group_rollout_percentage,
properties,
)
}
/// Evaluate a boolean feature flag. Returns an error if the flag is not available or if there are errors
/// during the evaluation.
///
/// The parsing logic is as follows:
///
/// * Generate a consistent hash for the tenant-feature.
/// * Match each filter group.
/// - If a group is matched, it will first determine whether the user is in the range of the rollout
/// percentage.
/// - If the hash falls within the group's rollout percentage, return true.
/// * Otherwise, continue with the next group until all groups are evaluated and no group is within the
/// rollout percentage.
/// * If there are no matching groups, return an error.
///
/// Returns `Ok(())` if the feature flag evaluates to true. In the future, it will return a payload.
///
/// Error handling: the caller should inspect the error and decide the behavior when a feature flag
/// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
/// propagated beyond where the feature flag gets resolved.
pub fn evaluate_boolean(
&self,
flag_key: &str,
user_id: &str,
properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
) -> Result<(), PostHogEvaluationError> {
let hash_on_global_rollout_percentage = Self::consistent_hash(user_id, flag_key, "boolean");
self.evaluate_boolean_inner(flag_key, hash_on_global_rollout_percentage, properties)
}
/// Evaluate a multivariate feature flag. Note that we directly take the mapped user ID
/// (a consistent hash ranging from 0 to 1) so that it is easier to use it in the tests
/// and avoid duplicate computations.
///
/// Use a different consistent hash for evaluating the group rollout percentage.
/// The behavior: if the condition is set to rolling out to 10% of the users, and
/// we set the variant A to 20% in the global config, then 2% of the total users will
/// be evaluated to variant A.
///
/// Note that the hash to determine group rollout percentage is shared across all groups. So if we have two
/// exactly-the-same conditions with 10% and 20% rollout percentage respectively, a total of 20% of the users
/// will be evaluated (versus 30% if group evaluation is done independently).
pub(crate) fn evaluate_multivariate_inner(
&self,
flag_key: &str,
hash_on_global_rollout_percentage: f64,
hash_on_group_rollout_percentage: f64,
properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
) -> Result<String, PostHogEvaluationError> {
if let Some(flag_config) = self.flags.get(flag_key) {
if !flag_config.active {
return Err(PostHogEvaluationError::NotAvailable(format!(
"The feature flag is not active: {}",
flag_key
)));
}
let Some(ref multivariate) = flag_config.filters.multivariate else {
return Err(PostHogEvaluationError::Internal(format!(
"No multivariate available, should use evaluate_boolean?: {flag_key}"
)));
};
// TODO: sort the groups so that variant overrides always get evaluated first and it follows the PostHog
// Python SDK behavior; for now we do not configure conditions without variant overrides in Neon so it
// does not matter.
for group in &flag_config.filters.groups {
match self.evaluate_group(group, hash_on_group_rollout_percentage, properties)? {
GroupEvaluationResult::MatchedAndOverride(variant) => return Ok(variant),
GroupEvaluationResult::MatchedAndEvaluate => {
let mut percentage = 0;
for variant in &multivariate.variants {
percentage += variant.rollout_percentage;
if self
.evaluate_percentage(hash_on_global_rollout_percentage, percentage)
{
return Ok(variant.key.clone());
}
}
// This should not happen because the rollout percentage always adds up to 100, but just in case that PostHog
// returned invalid spec, we return an error.
return Err(PostHogEvaluationError::Internal(format!(
"Rollout percentage does not add up to 100: {}",
flag_key
)));
}
GroupEvaluationResult::Unmatched => continue,
}
}
// If no group is matched, the feature is not available, and up to the caller to decide what to do.
Err(PostHogEvaluationError::NoConditionGroupMatched)
} else {
// The feature flag is not available yet
Err(PostHogEvaluationError::NotAvailable(format!(
"Not found in the local evaluation spec: {}",
flag_key
)))
}
}
/// Evaluate a multivariate feature flag. Note that we directly take the mapped user ID
/// (a consistent hash ranging from 0 to 1) so that it is easier to use it in the tests
/// and avoid duplicate computations.
///
/// Use a different consistent hash for evaluating the group rollout percentage.
/// The behavior: if the condition is set to rolling out to 10% of the users, and
/// we set the variant A to 20% in the global config, then 2% of the total users will
/// be evaluated to variant A.
///
/// Note that the hash to determine group rollout percentage is shared across all groups. So if we have two
/// exactly-the-same conditions with 10% and 20% rollout percentage respectively, a total of 20% of the users
/// will be evaluated (versus 30% if group evaluation is done independently).
pub(crate) fn evaluate_boolean_inner(
&self,
flag_key: &str,
hash_on_global_rollout_percentage: f64,
properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
) -> Result<(), PostHogEvaluationError> {
if let Some(flag_config) = self.flags.get(flag_key) {
if !flag_config.active {
return Err(PostHogEvaluationError::NotAvailable(format!(
"The feature flag is not active: {}",
flag_key
)));
}
if flag_config.filters.multivariate.is_some() {
return Err(PostHogEvaluationError::Internal(format!(
"This looks like a multivariate flag, should use evaluate_multivariate?: {flag_key}"
)));
};
// TODO: sort the groups so that variant overrides always get evaluated first and it follows the PostHog
// Python SDK behavior; for now we do not configure conditions without variant overrides in Neon so it
// does not matter.
for group in &flag_config.filters.groups {
match self.evaluate_group(group, hash_on_global_rollout_percentage, properties)? {
GroupEvaluationResult::MatchedAndOverride(_) => {
return Err(PostHogEvaluationError::Internal(format!(
"Boolean flag cannot have overrides: {}",
flag_key
)));
}
GroupEvaluationResult::MatchedAndEvaluate => {
return Ok(());
}
GroupEvaluationResult::Unmatched => continue,
}
}
// If no group is matched, the feature is not available, and up to the caller to decide what to do.
Err(PostHogEvaluationError::NoConditionGroupMatched)
} else {
// The feature flag is not available yet
Err(PostHogEvaluationError::NotAvailable(format!(
"Not found in the local evaluation spec: {}",
flag_key
)))
}
}
/// Infer whether a feature flag is a boolean flag by checking if it has a multivariate filter.
pub fn is_feature_flag_boolean(&self, flag_key: &str) -> Result<bool, PostHogEvaluationError> {
if let Some(flag_config) = self.flags.get(flag_key) {
Ok(flag_config.filters.multivariate.is_none())
} else {
Err(PostHogEvaluationError::NotAvailable(format!(
"Not found in the local evaluation spec: {}",
flag_key
)))
}
}
}
pub struct PostHogClientConfig {
/// The server API key.
pub server_api_key: String,
/// The client API key.
pub client_api_key: String,
/// The project ID.
pub project_id: String,
/// The private API URL.
pub private_api_url: String,
/// The public API URL.
pub public_api_url: String,
}
/// A lite PostHog client.
///
/// At the point of writing this code, PostHog does not have a functional Rust client with feature flag support.
/// This is a lite version that only supports local evaluation of feature flags and only supports those JSON specs
/// that will be used within Neon.
///
/// PostHog is designed as a browser-server system: the browser (client) side uses the client key and is exposed
/// to the end users; the server side uses a server key and is not exposed to the end users. The client and the
/// server has different API keys and provide a different set of APIs. In Neon, we only have the server (that is
/// pageserver), and it will use both the client API and the server API. So we need to store two API keys within
/// our PostHog client.
///
/// The server API is used to fetch the feature flag specs. The client API is used to capture events in case we
/// want to report the feature flag usage back to PostHog. The current plan is to use PostHog only as an UI to
/// configure feature flags so it is very likely that the client API will not be used.
pub struct PostHogClient {
/// The config.
config: PostHogClientConfig,
/// The HTTP client.
client: reqwest::Client,
}
impl PostHogClient {
pub fn new(config: PostHogClientConfig) -> Self {
let client = reqwest::Client::new();
Self { config, client }
}
pub fn new_with_us_region(
server_api_key: String,
client_api_key: String,
project_id: String,
) -> Self {
Self::new(PostHogClientConfig {
server_api_key,
client_api_key,
project_id,
private_api_url: "https://us.posthog.com".to_string(),
public_api_url: "https://us.i.posthog.com".to_string(),
})
}
/// Fetch the feature flag specs from the server.
///
/// This is unfortunately an undocumented API at:
/// - <https://posthog.com/docs/api/feature-flags#get-api-projects-project_id-feature_flags-local_evaluation>
/// - <https://posthog.com/docs/feature-flags/local-evaluation>
///
/// The handling logic in [`FeatureStore`] mostly follows the Python API implementation.
/// See `_compute_flag_locally` in <https://github.com/PostHog/posthog-python/blob/master/posthog/client.py>
pub async fn get_feature_flags_local_evaluation(
&self,
) -> anyhow::Result<LocalEvaluationResponse> {
// BASE_URL/api/projects/:project_id/feature_flags/local_evaluation
// with bearer token of self.server_api_key
let url = format!(
"{}/api/projects/{}/feature_flags/local_evaluation",
self.config.private_api_url, self.config.project_id
);
let response = self
.client
.get(url)
.bearer_auth(&self.config.server_api_key)
.send()
.await?;
let status = response.status();
let body = response.text().await?;
if !status.is_success() {
return Err(anyhow::anyhow!(
"Failed to get feature flags: {}, {}",
status,
body
));
}
Ok(serde_json::from_str(&body)?)
}
/// Capture an event. This will only be used to report the feature flag usage back to PostHog, though
/// it also support a lot of other functionalities.
///
/// <https://posthog.com/docs/api/capture>
pub async fn capture_event(
&self,
event: &str,
distinct_id: &str,
properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
) -> anyhow::Result<()> {
// PUBLIC_URL/capture/
// with bearer token of self.client_api_key
let url = format!("{}/capture/", self.config.public_api_url);
self.client
.post(url)
.body(serde_json::to_string(&json!({
"api_key": self.config.client_api_key,
"distinct_id": distinct_id,
"event": event,
"properties": properties,
}))?)
.send()
.await?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn data() -> &'static str {
r#"{
"flags": [
{
"id": 141807,
"team_id": 152860,
"name": "",
"key": "image-compaction-boundary",
"filters": {
"groups": [
{
"variant": null,
"properties": [
{
"key": "plan_type",
"type": "person",
"value": [
"free"
],
"operator": "exact"
}
],
"rollout_percentage": 40
},
{
"variant": null,
"properties": [],
"rollout_percentage": 10
}
],
"payloads": {},
"multivariate": null
},
"deleted": false,
"active": true,
"ensure_experience_continuity": false,
"has_encrypted_payloads": false,
"version": 1
},
{
"id": 135586,
"team_id": 152860,
"name": "",
"key": "boolean-flag",
"filters": {
"groups": [
{
"variant": null,
"properties": [
{
"key": "plan_type",
"type": "person",
"value": [
"free"
],
"operator": "exact"
}
],
"rollout_percentage": 47
}
],
"payloads": {},
"multivariate": null
},
"deleted": false,
"active": true,
"ensure_experience_continuity": false,
"has_encrypted_payloads": false,
"version": 1
},
{
"id": 132794,
"team_id": 152860,
"name": "",
"key": "gc-compaction",
"filters": {
"groups": [
{
"variant": "enabled-stage-2",
"properties": [
{
"key": "plan_type",
"type": "person",
"value": [
"free"
],
"operator": "exact"
},
{
"key": "pageserver_remote_size",
"type": "person",
"value": "10000000",
"operator": "lt"
}
],
"rollout_percentage": 50
},
{
"properties": [
{
"key": "plan_type",
"type": "person",
"value": [
"free"
],
"operator": "exact"
},
{
"key": "pageserver_remote_size",
"type": "person",
"value": "10000000",
"operator": "lt"
}
],
"rollout_percentage": 80
}
],
"payloads": {},
"multivariate": {
"variants": [
{
"key": "disabled",
"name": "",
"rollout_percentage": 90
},
{
"key": "enabled-stage-1",
"name": "",
"rollout_percentage": 10
},
{
"key": "enabled-stage-2",
"name": "",
"rollout_percentage": 0
},
{
"key": "enabled-stage-3",
"name": "",
"rollout_percentage": 0
},
{
"key": "enabled",
"name": "",
"rollout_percentage": 0
}
]
}
},
"deleted": false,
"active": true,
"ensure_experience_continuity": false,
"has_encrypted_payloads": false,
"version": 7
}
],
"group_type_mapping": {},
"cohorts": {}
}"#
}
#[test]
fn parse_local_evaluation() {
let data = data();
let _: LocalEvaluationResponse = serde_json::from_str(data).unwrap();
}
#[test]
fn evaluate_multivariate() {
let mut store = FeatureStore::new();
let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();
store.set_flags(response.flags);
// This lacks the required properties and cannot be evaluated.
let variant =
store.evaluate_multivariate_inner("gc-compaction", 1.00, 0.40, &HashMap::new());
assert!(matches!(
variant,
Err(PostHogEvaluationError::NotAvailable(_))
),);
let properties_unmatched = HashMap::from([
(
"plan_type".to_string(),
PostHogFlagFilterPropertyValue::String("paid".to_string()),
),
(
"pageserver_remote_size".to_string(),
PostHogFlagFilterPropertyValue::Number(1000.0),
),
]);
// This does not match any group so there will be an error.
let variant =
store.evaluate_multivariate_inner("gc-compaction", 1.00, 0.40, &properties_unmatched);
assert!(matches!(
variant,
Err(PostHogEvaluationError::NoConditionGroupMatched)
),);
let variant =
store.evaluate_multivariate_inner("gc-compaction", 0.80, 0.80, &properties_unmatched);
assert!(matches!(
variant,
Err(PostHogEvaluationError::NoConditionGroupMatched)
),);
let properties = HashMap::from([
(
"plan_type".to_string(),
PostHogFlagFilterPropertyValue::String("free".to_string()),
),
(
"pageserver_remote_size".to_string(),
PostHogFlagFilterPropertyValue::Number(1000.0),
),
]);
// It matches the first group as 0.10 <= 0.50 and the properties are matched. Then it gets evaluated to the variant override.
let variant = store.evaluate_multivariate_inner("gc-compaction", 0.10, 0.10, &properties);
assert_eq!(variant.unwrap(), "enabled-stage-2".to_string());
// It matches the second group as 0.50 <= 0.60 <= 0.80 and the properties are matched. Then it gets evaluated using the global percentage.
let variant = store.evaluate_multivariate_inner("gc-compaction", 0.99, 0.60, &properties);
assert_eq!(variant.unwrap(), "enabled-stage-1".to_string());
let variant = store.evaluate_multivariate_inner("gc-compaction", 0.80, 0.60, &properties);
assert_eq!(variant.unwrap(), "disabled".to_string());
// It matches the group conditions but not the group rollout percentage.
let variant = store.evaluate_multivariate_inner("gc-compaction", 1.00, 0.90, &properties);
assert!(matches!(
variant,
Err(PostHogEvaluationError::NoConditionGroupMatched)
),);
}
#[test]
fn evaluate_boolean_1() {
// The `boolean-flag` feature flag only has one group that matches on the free user.
let mut store = FeatureStore::new();
let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();
store.set_flags(response.flags);
// This lacks the required properties and cannot be evaluated.
let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &HashMap::new());
assert!(matches!(
variant,
Err(PostHogEvaluationError::NotAvailable(_))
),);
let properties_unmatched = HashMap::from([
(
"plan_type".to_string(),
PostHogFlagFilterPropertyValue::String("paid".to_string()),
),
(
"pageserver_remote_size".to_string(),
PostHogFlagFilterPropertyValue::Number(1000.0),
),
]);
// This does not match any group so there will be an error.
let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &properties_unmatched);
assert!(matches!(
variant,
Err(PostHogEvaluationError::NoConditionGroupMatched)
),);
let properties = HashMap::from([
(
"plan_type".to_string(),
PostHogFlagFilterPropertyValue::String("free".to_string()),
),
(
"pageserver_remote_size".to_string(),
PostHogFlagFilterPropertyValue::Number(1000.0),
),
]);
// It matches the first group as 0.10 <= 0.50 and the properties are matched. Then it gets evaluated to the variant override.
let variant = store.evaluate_boolean_inner("boolean-flag", 0.10, &properties);
assert!(variant.is_ok());
// It matches the group conditions but not the group rollout percentage.
let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &properties);
assert!(matches!(
variant,
Err(PostHogEvaluationError::NoConditionGroupMatched)
),);
}
#[test]
fn evaluate_boolean_2() {
// The `image-compaction-boundary` feature flag has one group that matches on the free user and a group that matches on all users.
let mut store = FeatureStore::new();
let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();
store.set_flags(response.flags);
// This lacks the required properties and cannot be evaluated.
let variant =
store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &HashMap::new());
assert!(matches!(
variant,
Err(PostHogEvaluationError::NotAvailable(_))
),);
let properties_unmatched = HashMap::from([
(
"plan_type".to_string(),
PostHogFlagFilterPropertyValue::String("paid".to_string()),
),
(
"pageserver_remote_size".to_string(),
PostHogFlagFilterPropertyValue::Number(1000.0),
),
]);
// This does not match the filtered group but the all user group.
let variant =
store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &properties_unmatched);
assert!(matches!(
variant,
Err(PostHogEvaluationError::NoConditionGroupMatched)
),);
let variant =
store.evaluate_boolean_inner("image-compaction-boundary", 0.05, &properties_unmatched);
assert!(variant.is_ok());
let properties = HashMap::from([
(
"plan_type".to_string(),
PostHogFlagFilterPropertyValue::String("free".to_string()),
),
(
"pageserver_remote_size".to_string(),
PostHogFlagFilterPropertyValue::Number(1000.0),
),
]);
// It matches the first group as 0.30 <= 0.40 and the properties are matched. Then it gets evaluated to the variant override.
let variant = store.evaluate_boolean_inner("image-compaction-boundary", 0.30, &properties);
assert!(variant.is_ok());
// It matches the group conditions but not the group rollout percentage.
let variant = store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &properties);
assert!(matches!(
variant,
Err(PostHogEvaluationError::NoConditionGroupMatched)
),);
// It matches the second "all" group conditions.
let variant = store.evaluate_boolean_inner("image-compaction-boundary", 0.09, &properties);
assert!(variant.is_ok());
}
}

View File

@@ -25,7 +25,6 @@ where
Ok(())
}
#[derive(Debug)]
pub enum BindError {
Conversion(Box<dyn Error + marker::Sync + Send>),
Serialization(io::Error),
@@ -289,12 +288,6 @@ pub fn sync(buf: &mut BytesMut) {
write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
}
#[inline]
pub fn flush(buf: &mut BytesMut) {
buf.put_u8(b'H');
write_body(buf, |_| Ok::<(), io::Error>(())).unwrap();
}
#[inline]
pub fn terminate(buf: &mut BytesMut) {
buf.put_u8(b'X');

View File

@@ -9,6 +9,7 @@ use std::error::Error;
use std::fmt;
use std::sync::Arc;
use bytes::BytesMut;
use fallible_iterator::FallibleIterator;
#[doc(inline)]
pub use postgres_protocol2::Oid;
@@ -26,6 +27,41 @@ macro_rules! accepts {
)
}
/// Generates an implementation of `ToSql::to_sql_checked`.
///
/// All `ToSql` implementations should use this macro.
macro_rules! to_sql_checked {
() => {
fn to_sql_checked(
&self,
ty: &$crate::Type,
out: &mut $crate::private::BytesMut,
) -> ::std::result::Result<
$crate::IsNull,
Box<dyn ::std::error::Error + ::std::marker::Sync + ::std::marker::Send>,
> {
$crate::__to_sql_checked(self, ty, out)
}
};
}
// WARNING: this function is not considered part of this crate's public API.
// It is subject to change at any time.
#[doc(hidden)]
pub fn __to_sql_checked<T>(
v: &T,
ty: &Type,
out: &mut BytesMut,
) -> Result<IsNull, Box<dyn Error + Sync + Send>>
where
T: ToSql,
{
if !T::accepts(ty) {
return Err(Box::new(WrongType::new::<T>(ty.clone())));
}
v.to_sql(ty, out)
}
// mod pg_lsn;
#[doc(hidden)]
pub mod private;
@@ -106,7 +142,7 @@ pub enum Kind {
/// An array type along with the type of its elements.
Array(Type),
/// A range type along with the type of its elements.
Range(Oid),
Range(Type),
/// A multirange type along with the type of its elements.
Multirange(Type),
/// A domain type along with its underlying type.
@@ -341,6 +377,43 @@ pub enum IsNull {
No,
}
/// A trait for types that can be converted into Postgres values.
pub trait ToSql: fmt::Debug {
/// Converts the value of `self` into the binary format of the specified
/// Postgres `Type`, appending it to `out`.
///
/// The caller of this method is responsible for ensuring that this type
/// is compatible with the Postgres `Type`.
///
/// The return value indicates if this value should be represented as
/// `NULL`. If this is the case, implementations **must not** write
/// anything to `out`.
fn to_sql(&self, ty: &Type, out: &mut BytesMut) -> Result<IsNull, Box<dyn Error + Sync + Send>>
where
Self: Sized;
/// Determines if a value of this type can be converted to the specified
/// Postgres `Type`.
fn accepts(ty: &Type) -> bool
where
Self: Sized;
/// An adaptor method used internally by Rust-Postgres.
///
/// *All* implementations of this method should be generated by the
/// `to_sql_checked!()` macro.
fn to_sql_checked(
&self,
ty: &Type,
out: &mut BytesMut,
) -> Result<IsNull, Box<dyn Error + Sync + Send>>;
/// Specify the encode format
fn encode_format(&self, _ty: &Type) -> Format {
Format::Binary
}
}
/// Supported Postgres message format types
///
/// Using Text format in a message assumes a Postgres `SERVER_ENCODING` of `UTF8`
@@ -351,3 +424,52 @@ pub enum Format {
/// Compact, typed binary format
Binary,
}
impl ToSql for &str {
fn to_sql(&self, ty: &Type, w: &mut BytesMut) -> Result<IsNull, Box<dyn Error + Sync + Send>> {
match *ty {
ref ty if ty.name() == "ltree" => types::ltree_to_sql(self, w),
ref ty if ty.name() == "lquery" => types::lquery_to_sql(self, w),
ref ty if ty.name() == "ltxtquery" => types::ltxtquery_to_sql(self, w),
_ => types::text_to_sql(self, w),
}
Ok(IsNull::No)
}
fn accepts(ty: &Type) -> bool {
match *ty {
Type::VARCHAR | Type::TEXT | Type::BPCHAR | Type::NAME | Type::UNKNOWN => true,
ref ty
if (ty.name() == "citext"
|| ty.name() == "ltree"
|| ty.name() == "lquery"
|| ty.name() == "ltxtquery") =>
{
true
}
_ => false,
}
}
to_sql_checked!();
}
macro_rules! simple_to {
($t:ty, $f:ident, $($expected:ident),+) => {
impl ToSql for $t {
fn to_sql(&self,
_: &Type,
w: &mut BytesMut)
-> Result<IsNull, Box<dyn Error + Sync + Send>> {
types::$f(*self, w);
Ok(IsNull::No)
}
accepts!($($expected),+);
to_sql_checked!();
}
}
}
simple_to!(u32, oid_to_sql, OID);

View File

@@ -393,7 +393,7 @@ impl Inner {
}
}
pub const fn const_oid(&self) -> Oid {
pub fn oid(&self) -> Oid {
match *self {
Inner::Bool => 16,
Inner::Bytea => 17,
@@ -580,14 +580,7 @@ impl Inner {
Inner::TstzmultiRangeArray => 6153,
Inner::DatemultiRangeArray => 6155,
Inner::Int8multiRangeArray => 6157,
Inner::Other(_) => u32::MAX,
}
}
pub fn oid(&self) -> Oid {
match *self {
Inner::Other(ref u) => u.oid,
_ => self.const_oid(),
}
}
@@ -734,17 +727,17 @@ impl Inner {
Inner::JsonbArray => &Kind::Array(Type(Inner::Jsonb)),
Inner::AnyRange => &Kind::Pseudo,
Inner::EventTrigger => &Kind::Pseudo,
Inner::Int4Range => &const { Kind::Range(Inner::Int4.const_oid()) },
Inner::Int4Range => &Kind::Range(Type(Inner::Int4)),
Inner::Int4RangeArray => &Kind::Array(Type(Inner::Int4Range)),
Inner::NumRange => &const { Kind::Range(Inner::Numeric.const_oid()) },
Inner::NumRange => &Kind::Range(Type(Inner::Numeric)),
Inner::NumRangeArray => &Kind::Array(Type(Inner::NumRange)),
Inner::TsRange => &const { Kind::Range(Inner::Timestamp.const_oid()) },
Inner::TsRange => &Kind::Range(Type(Inner::Timestamp)),
Inner::TsRangeArray => &Kind::Array(Type(Inner::TsRange)),
Inner::TstzRange => &const { Kind::Range(Inner::Timestamptz.const_oid()) },
Inner::TstzRange => &Kind::Range(Type(Inner::Timestamptz)),
Inner::TstzRangeArray => &Kind::Array(Type(Inner::TstzRange)),
Inner::DateRange => &const { Kind::Range(Inner::Date.const_oid()) },
Inner::DateRange => &Kind::Range(Type(Inner::Date)),
Inner::DateRangeArray => &Kind::Array(Type(Inner::DateRange)),
Inner::Int8Range => &const { Kind::Range(Inner::Int8.const_oid()) },
Inner::Int8Range => &Kind::Range(Type(Inner::Int8)),
Inner::Int8RangeArray => &Kind::Array(Type(Inner::Int8Range)),
Inner::Jsonpath => &Kind::Simple,
Inner::JsonpathArray => &Kind::Array(Type(Inner::Jsonpath)),

View File

@@ -1,12 +1,14 @@
use std::collections::HashMap;
use std::fmt;
use std::net::IpAddr;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::Duration;
use bytes::BytesMut;
use fallible_iterator::FallibleIterator;
use futures_util::{TryStreamExt, future, ready};
use parking_lot::Mutex;
use postgres_protocol2::message::backend::Message;
use postgres_protocol2::message::frontend;
use serde::{Deserialize, Serialize};
@@ -14,52 +16,29 @@ use tokio::sync::mpsc;
use crate::codec::{BackendMessages, FrontendMessage};
use crate::config::{Host, SslMode};
use crate::connection::{Request, RequestMessages};
use crate::query::RowStream;
use crate::simple_query::SimpleQueryStream;
use crate::types::{Oid, Type};
use crate::{
CancelToken, Error, ReadyForQueryStatus, SimpleQueryMessage, Transaction, TransactionBuilder,
query, simple_query,
CancelToken, Error, ReadyForQueryStatus, SimpleQueryMessage, Statement, Transaction,
TransactionBuilder, query, simple_query,
};
pub struct Responses {
/// new messages from conn
receiver: mpsc::Receiver<BackendMessages>,
/// current batch of messages
cur: BackendMessages,
/// number of total queries sent.
waiting: usize,
/// number of ReadyForQuery messages received.
received: usize,
}
impl Responses {
pub fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll<Result<Message, Error>> {
loop {
// get the next saved message
if let Some(message) = self.cur.next().map_err(Error::parse)? {
let received = self.received;
// increase the query head if this is the last message.
if let Message::ReadyForQuery(_) = message {
self.received += 1;
}
// check if the client has skipped this query.
if received + 1 < self.waiting {
// grab the next message.
continue;
}
// convenience: turn the error messaage into a proper error.
let res = match message {
Message::ErrorResponse(body) => Err(Error::db(body)),
message => Ok(message),
};
return Poll::Ready(res);
match self.cur.next().map_err(Error::parse)? {
Some(Message::ErrorResponse(body)) => return Poll::Ready(Err(Error::db(body))),
Some(message) => return Poll::Ready(Ok(message)),
None => {}
}
// get the next batch of messages.
match ready!(self.receiver.poll_recv(cx)) {
Some(messages) => self.cur = messages,
None => return Poll::Ready(Err(Error::closed())),
@@ -76,87 +55,44 @@ impl Responses {
/// (corresponding to the queries in the [crate::prepare] module).
#[derive(Default)]
pub(crate) struct CachedTypeInfo {
/// A statement for basic information for a type from its
/// OID. Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_QUERY) (or its
/// fallback).
pub(crate) typeinfo: Option<Statement>,
/// Cache of types already looked up.
pub(crate) types: HashMap<Oid, Type>,
}
pub struct InnerClient {
sender: mpsc::UnboundedSender<FrontendMessage>,
responses: Responses,
sender: mpsc::UnboundedSender<Request>,
/// A buffer to use when writing out postgres commands.
buffer: BytesMut,
buffer: Mutex<BytesMut>,
}
impl InnerClient {
pub fn start(&mut self) -> Result<PartialQuery, Error> {
self.responses.waiting += 1;
Ok(PartialQuery(Some(self)))
pub fn send(&self, messages: RequestMessages) -> Result<Responses, Error> {
let (sender, receiver) = mpsc::channel(1);
let request = Request { messages, sender };
self.sender.send(request).map_err(|_| Error::closed())?;
Ok(Responses {
receiver,
cur: BackendMessages::empty(),
})
}
// pub fn send_with_sync<F>(&mut self, f: F) -> Result<&mut Responses, Error>
// where
// F: FnOnce(&mut BytesMut) -> Result<(), Error>,
// {
// self.start()?.send_with_sync(f)
// }
pub fn send_simple_query(&mut self, query: &str) -> Result<&mut Responses, Error> {
self.responses.waiting += 1;
self.buffer.clear();
// simple queries do not need sync.
frontend::query(query, &mut self.buffer).map_err(Error::encode)?;
let buf = self.buffer.split().freeze();
self.send_message(FrontendMessage::Raw(buf))
}
fn send_message(&mut self, messages: FrontendMessage) -> Result<&mut Responses, Error> {
self.sender.send(messages).map_err(|_| Error::closed())?;
Ok(&mut self.responses)
}
}
pub struct PartialQuery<'a>(Option<&'a mut InnerClient>);
impl Drop for PartialQuery<'_> {
fn drop(&mut self) {
if let Some(client) = self.0.take() {
client.buffer.clear();
frontend::sync(&mut client.buffer);
let buf = client.buffer.split().freeze();
let _ = client.send_message(FrontendMessage::Raw(buf));
}
}
}
impl<'a> PartialQuery<'a> {
pub fn send_with_flush<F>(&mut self, f: F) -> Result<&mut Responses, Error>
/// Call the given function with a buffer to be used when writing out
/// postgres commands.
pub fn with_buf<F, R>(&self, f: F) -> R
where
F: FnOnce(&mut BytesMut) -> Result<(), Error>,
F: FnOnce(&mut BytesMut) -> R,
{
let client = self.0.as_deref_mut().unwrap();
client.buffer.clear();
f(&mut client.buffer)?;
frontend::flush(&mut client.buffer);
let buf = client.buffer.split().freeze();
client.send_message(FrontendMessage::Raw(buf))
}
pub fn send_with_sync<F>(mut self, f: F) -> Result<&'a mut Responses, Error>
where
F: FnOnce(&mut BytesMut) -> Result<(), Error>,
{
let client = self.0.as_deref_mut().unwrap();
client.buffer.clear();
f(&mut client.buffer)?;
frontend::sync(&mut client.buffer);
let buf = client.buffer.split().freeze();
let _ = client.send_message(FrontendMessage::Raw(buf));
Ok(&mut self.0.take().unwrap().responses)
let mut buffer = self.buffer.lock();
let r = f(&mut buffer);
buffer.clear();
r
}
}
@@ -173,7 +109,7 @@ pub struct SocketConfig {
/// The client is one half of what is returned when a connection is established. Users interact with the database
/// through this client object.
pub struct Client {
inner: InnerClient,
inner: Arc<InnerClient>,
cached_typeinfo: CachedTypeInfo,
socket_config: SocketConfig,
@@ -184,24 +120,17 @@ pub struct Client {
impl Client {
pub(crate) fn new(
sender: mpsc::UnboundedSender<FrontendMessage>,
receiver: mpsc::Receiver<BackendMessages>,
sender: mpsc::UnboundedSender<Request>,
socket_config: SocketConfig,
ssl_mode: SslMode,
process_id: i32,
secret_key: i32,
) -> Client {
Client {
inner: InnerClient {
inner: Arc::new(InnerClient {
sender,
responses: Responses {
receiver,
cur: BackendMessages::empty(),
waiting: 0,
received: 0,
},
buffer: Default::default(),
},
}),
cached_typeinfo: Default::default(),
socket_config,
@@ -216,29 +145,19 @@ impl Client {
self.process_id
}
pub(crate) fn inner_mut(&mut self) -> &mut InnerClient {
&mut self.inner
pub(crate) fn inner(&self) -> &Arc<InnerClient> {
&self.inner
}
/// Pass text directly to the Postgres backend to allow it to sort out typing itself and
/// to save a roundtrip
pub async fn query_raw_txt<S, I>(
&mut self,
statement: &str,
params: I,
) -> Result<RowStream, Error>
pub async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
where
S: AsRef<str>,
I: IntoIterator<Item = Option<S>>,
I::IntoIter: ExactSizeIterator,
{
query::query_txt(
&mut self.inner,
&mut self.cached_typeinfo,
statement,
params,
)
.await
query::query_txt(&self.inner, statement, params).await
}
/// Executes a sequence of SQL statements using the simple query protocol, returning the resulting rows.
@@ -254,15 +173,12 @@ impl Client {
/// Prepared statements should be use for any query which contains user-specified data, as they provided the
/// functionality to safely embed that data in the request. Do not form statements via string concatenation and pass
/// them to this method!
pub async fn simple_query(&mut self, query: &str) -> Result<Vec<SimpleQueryMessage>, Error> {
pub async fn simple_query(&self, query: &str) -> Result<Vec<SimpleQueryMessage>, Error> {
self.simple_query_raw(query).await?.try_collect().await
}
pub(crate) async fn simple_query_raw(
&mut self,
query: &str,
) -> Result<SimpleQueryStream, Error> {
simple_query::simple_query(self.inner_mut(), query).await
pub(crate) async fn simple_query_raw(&self, query: &str) -> Result<SimpleQueryStream, Error> {
simple_query::simple_query(self.inner(), query).await
}
/// Executes a sequence of SQL statements using the simple query protocol.
@@ -275,11 +191,15 @@ impl Client {
/// Prepared statements should be use for any query which contains user-specified data, as they provided the
/// functionality to safely embed that data in the request. Do not form statements via string concatenation and pass
/// them to this method!
pub async fn batch_execute(&mut self, query: &str) -> Result<ReadyForQueryStatus, Error> {
simple_query::batch_execute(self.inner_mut(), query).await
pub async fn batch_execute(&self, query: &str) -> Result<ReadyForQueryStatus, Error> {
simple_query::batch_execute(self.inner(), query).await
}
pub async fn discard_all(&mut self) -> Result<ReadyForQueryStatus, Error> {
// clear the prepared statements that are about to be nuked from the postgres session
self.cached_typeinfo.typeinfo = None;
self.batch_execute("discard all").await
}
@@ -288,7 +208,7 @@ impl Client {
/// The transaction will roll back by default - use the `commit` method to commit it.
pub async fn transaction(&mut self) -> Result<Transaction<'_>, Error> {
struct RollbackIfNotDone<'me> {
client: &'me mut Client,
client: &'me Client,
done: bool,
}
@@ -298,7 +218,14 @@ impl Client {
return;
}
let _ = self.client.inner.send_simple_query("ROLLBACK");
let buf = self.client.inner().with_buf(|buf| {
frontend::query("ROLLBACK", buf).unwrap();
buf.split().freeze()
});
let _ = self
.client
.inner()
.send(RequestMessages::Single(FrontendMessage::Raw(buf)));
}
}
@@ -312,7 +239,7 @@ impl Client {
client: self,
done: false,
};
cleaner.client.batch_execute("BEGIN").await?;
self.batch_execute("BEGIN").await?;
cleaner.done = true;
}
@@ -338,6 +265,11 @@ impl Client {
}
}
/// Query for type information
pub(crate) async fn get_type_inner(&mut self, oid: Oid) -> Result<Type, Error> {
crate::prepare::get_type(&self.inner, &mut self.cached_typeinfo, oid).await
}
/// Determines if the connection to the server has already closed.
///
/// In that case, all future queries will fail.

View File

@@ -1,16 +1,21 @@
use std::io;
use bytes::{Bytes, BytesMut};
use bytes::{Buf, Bytes, BytesMut};
use fallible_iterator::FallibleIterator;
use postgres_protocol2::message::backend;
use postgres_protocol2::message::frontend::CopyData;
use tokio_util::codec::{Decoder, Encoder};
pub enum FrontendMessage {
Raw(Bytes),
CopyData(CopyData<Box<dyn Buf + Send>>),
}
pub enum BackendMessage {
Normal { messages: BackendMessages },
Normal {
messages: BackendMessages,
request_complete: bool,
},
Async(backend::Message),
}
@@ -39,6 +44,7 @@ impl Encoder<FrontendMessage> for PostgresCodec {
fn encode(&mut self, item: FrontendMessage, dst: &mut BytesMut) -> io::Result<()> {
match item {
FrontendMessage::Raw(buf) => dst.extend_from_slice(&buf),
FrontendMessage::CopyData(data) => data.write(dst),
}
Ok(())
@@ -51,6 +57,7 @@ impl Decoder for PostgresCodec {
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<BackendMessage>, io::Error> {
let mut idx = 0;
let mut request_complete = false;
while let Some(header) = backend::Header::parse(&src[idx..])? {
let len = header.len() as usize + 1;
@@ -75,6 +82,7 @@ impl Decoder for PostgresCodec {
idx += len;
if header.tag() == backend::READY_FOR_QUERY_TAG {
request_complete = true;
break;
}
}
@@ -84,6 +92,7 @@ impl Decoder for PostgresCodec {
} else {
Ok(Some(BackendMessage::Normal {
messages: BackendMessages(src.split_to(idx)),
request_complete,
}))
}
}

View File

@@ -59,11 +59,9 @@ where
connect_timeout: config.connect_timeout,
};
let (client_tx, conn_rx) = mpsc::unbounded_channel();
let (conn_tx, client_rx) = mpsc::channel(4);
let (sender, receiver) = mpsc::unbounded_channel();
let client = Client::new(
client_tx,
client_rx,
sender,
socket_config,
config.ssl_mode,
process_id,
@@ -76,7 +74,7 @@ where
.map(|m| BackendMessage::Async(Message::NoticeResponse(m)))
.collect();
let connection = Connection::new(stream, delayed, parameters, conn_tx, conn_rx);
let connection = Connection::new(stream, delayed, parameters, receiver);
Ok((client, connection))
}

View File

@@ -4,6 +4,7 @@ use std::pin::Pin;
use std::task::{Context, Poll};
use bytes::BytesMut;
use fallible_iterator::FallibleIterator;
use futures_util::{Sink, Stream, ready};
use postgres_protocol2::message::backend::Message;
use postgres_protocol2::message::frontend;
@@ -18,12 +19,30 @@ use crate::error::DbError;
use crate::maybe_tls_stream::MaybeTlsStream;
use crate::{AsyncMessage, Error, Notification};
pub enum RequestMessages {
Single(FrontendMessage),
}
pub struct Request {
pub messages: RequestMessages,
pub sender: mpsc::Sender<BackendMessages>,
}
pub struct Response {
sender: PollSender<BackendMessages>,
}
#[derive(PartialEq, Debug)]
enum State {
Active,
Closing,
}
enum WriteReady {
Terminating,
WaitingOnRead,
}
/// A connection to a PostgreSQL database.
///
/// This is one half of what is returned when a new connection is established. It performs the actual IO with the
@@ -37,11 +56,9 @@ pub struct Connection<S, T> {
pub stream: Framed<MaybeTlsStream<S, T>, PostgresCodec>,
/// HACK: we need this in the Neon Proxy to forward params.
pub parameters: HashMap<String, String>,
sender: PollSender<BackendMessages>,
receiver: mpsc::UnboundedReceiver<FrontendMessage>,
receiver: mpsc::UnboundedReceiver<Request>,
pending_responses: VecDeque<BackendMessage>,
responses: VecDeque<Response>,
state: State,
}
@@ -54,15 +71,14 @@ where
stream: Framed<MaybeTlsStream<S, T>, PostgresCodec>,
pending_responses: VecDeque<BackendMessage>,
parameters: HashMap<String, String>,
sender: mpsc::Sender<BackendMessages>,
receiver: mpsc::UnboundedReceiver<FrontendMessage>,
receiver: mpsc::UnboundedReceiver<Request>,
) -> Connection<S, T> {
Connection {
stream,
parameters,
sender: PollSender::new(sender),
receiver,
pending_responses,
responses: VecDeque::new(),
state: State::Active,
}
}
@@ -94,7 +110,7 @@ where
}
};
let messages = match message {
let (mut messages, request_complete) = match message {
BackendMessage::Async(Message::NoticeResponse(body)) => {
let error = DbError::parse(&mut body.fields()).map_err(Error::parse)?;
return Poll::Ready(Ok(AsyncMessage::Notice(error)));
@@ -115,19 +131,41 @@ where
continue;
}
BackendMessage::Async(_) => unreachable!(),
BackendMessage::Normal { messages } => messages,
BackendMessage::Normal {
messages,
request_complete,
} => (messages, request_complete),
};
match self.sender.poll_reserve(cx) {
let mut response = match self.responses.pop_front() {
Some(response) => response,
None => match messages.next().map_err(Error::parse)? {
Some(Message::ErrorResponse(error)) => {
return Poll::Ready(Err(Error::db(error)));
}
_ => return Poll::Ready(Err(Error::unexpected_message())),
},
};
match response.sender.poll_reserve(cx) {
Poll::Ready(Ok(())) => {
let _ = self.sender.send_item(messages);
let _ = response.sender.send_item(messages);
if !request_complete {
self.responses.push_front(response);
}
}
Poll::Ready(Err(_)) => {
return Poll::Ready(Err(Error::closed()));
// we need to keep paging through the rest of the messages even if the receiver's hung up
if !request_complete {
self.responses.push_front(response);
}
}
Poll::Pending => {
self.pending_responses
.push_back(BackendMessage::Normal { messages });
self.responses.push_front(response);
self.pending_responses.push_back(BackendMessage::Normal {
messages,
request_complete,
});
trace!("poll_read: waiting on sender");
return Poll::Pending;
}
@@ -136,7 +174,7 @@ where
}
/// Fetch the next client request and enqueue the response sender.
fn poll_request(&mut self, cx: &mut Context<'_>) -> Poll<Option<FrontendMessage>> {
fn poll_request(&mut self, cx: &mut Context<'_>) -> Poll<Option<RequestMessages>> {
if self.receiver.is_closed() {
return Poll::Ready(None);
}
@@ -144,7 +182,10 @@ where
match self.receiver.poll_recv(cx) {
Poll::Ready(Some(request)) => {
trace!("polled new request");
Poll::Ready(Some(request))
self.responses.push_back(Response {
sender: PollSender::new(request.sender),
});
Poll::Ready(Some(request.messages))
}
Poll::Ready(None) => Poll::Ready(None),
Poll::Pending => Poll::Pending,
@@ -153,7 +194,7 @@ where
/// Process client requests and write them to the postgres connection, flushing if necessary.
/// client -> postgres
fn poll_write(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
fn poll_write(&mut self, cx: &mut Context<'_>) -> Poll<Result<WriteReady, Error>> {
loop {
if Pin::new(&mut self.stream)
.poll_ready(cx)
@@ -168,14 +209,14 @@ where
match self.poll_request(cx) {
// send the message to postgres
Poll::Ready(Some(request)) => {
Poll::Ready(Some(RequestMessages::Single(request))) => {
Pin::new(&mut self.stream)
.start_send(request)
.map_err(Error::io)?;
}
// No more messages from the client, and no more responses to wait for.
// Send a terminate message to postgres
Poll::Ready(None) => {
Poll::Ready(None) if self.responses.is_empty() => {
trace!("poll_write: at eof, terminating");
let mut request = BytesMut::new();
frontend::terminate(&mut request);
@@ -187,7 +228,16 @@ where
trace!("poll_write: sent eof, closing");
trace!("poll_write: done");
return Poll::Ready(Ok(()));
return Poll::Ready(Ok(WriteReady::Terminating));
}
// No more messages from the client, but there are still some responses to wait for.
Poll::Ready(None) => {
trace!(
"poll_write: at eof, pending responses {}",
self.responses.len()
);
ready!(self.poll_flush(cx))?;
return Poll::Ready(Ok(WriteReady::WaitingOnRead));
}
// Still waiting for a message from the client.
Poll::Pending => {
@@ -248,7 +298,7 @@ where
// if the state is still active, try read from and write to postgres.
let message = self.poll_read(cx)?;
let closing = self.poll_write(cx)?;
if let Poll::Ready(()) = closing {
if let Poll::Ready(WriteReady::Terminating) = closing {
self.state = State::Closing;
}

View File

@@ -86,27 +86,6 @@ pub struct DbError {
}
impl DbError {
pub fn new_test_error(code: SqlState, message: String) -> Self {
DbError {
severity: "ERROR".to_string(),
parsed_severity: Some(Severity::Error),
code,
message,
detail: None,
hint: None,
position: None,
where_: None,
schema: None,
table: None,
column: None,
datatype: None,
constraint: None,
file: None,
line: None,
routine: None,
}
}
pub(crate) fn parse(fields: &mut ErrorFields<'_>) -> io::Result<DbError> {
let mut severity = None;
let mut parsed_severity = None;

View File

@@ -1,6 +1,9 @@
#![allow(async_fn_in_trait)]
use postgres_protocol2::Oid;
use crate::query::RowStream;
use crate::types::Type;
use crate::{Client, Error, Transaction};
mod private {
@@ -12,17 +15,20 @@ mod private {
/// This trait is "sealed", and cannot be implemented outside of this crate.
pub trait GenericClient: private::Sealed {
/// Like `Client::query_raw_txt`.
async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
where
S: AsRef<str> + Sync + Send,
I: IntoIterator<Item = Option<S>> + Sync + Send,
I::IntoIter: ExactSizeIterator + Sync + Send;
/// Query for type information
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error>;
}
impl private::Sealed for Client {}
impl GenericClient for Client {
async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
where
S: AsRef<str> + Sync + Send,
I: IntoIterator<Item = Option<S>> + Sync + Send,
@@ -30,12 +36,17 @@ impl GenericClient for Client {
{
self.query_raw_txt(statement, params).await
}
/// Query for type information
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error> {
self.get_type_inner(oid).await
}
}
impl private::Sealed for Transaction<'_> {}
impl GenericClient for Transaction<'_> {
async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
where
S: AsRef<str> + Sync + Send,
I: IntoIterator<Item = Option<S>> + Sync + Send,
@@ -43,4 +54,9 @@ impl GenericClient for Transaction<'_> {
{
self.query_raw_txt(statement, params).await
}
/// Query for type information
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error> {
self.client_mut().get_type(oid).await
}
}

View File

@@ -18,6 +18,7 @@ pub use crate::statement::{Column, Statement};
pub use crate::tls::NoTls;
pub use crate::transaction::Transaction;
pub use crate::transaction_builder::{IsolationLevel, TransactionBuilder};
use crate::types::ToSql;
/// After executing a query, the connection will be in one of these states
#[derive(Clone, Copy, Debug, PartialEq)]
@@ -119,3 +120,9 @@ pub enum SimpleQueryMessage {
/// The number of rows modified or selected is returned.
CommandComplete(u64),
}
fn slice_iter<'a>(
s: &'a [&'a (dyn ToSql + Sync)],
) -> impl ExactSizeIterator<Item = &'a (dyn ToSql + Sync)> + 'a {
s.iter().map(|s| *s as _)
}

View File

@@ -1,14 +1,19 @@
use bytes::BytesMut;
use fallible_iterator::FallibleIterator;
use postgres_protocol2::IsNull;
use postgres_protocol2::message::backend::{Message, RowDescriptionBody};
use postgres_protocol2::message::frontend;
use postgres_protocol2::types::oid_to_sql;
use postgres_types2::Format;
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
use crate::client::{CachedTypeInfo, PartialQuery, Responses};
use bytes::Bytes;
use fallible_iterator::FallibleIterator;
use futures_util::{TryStreamExt, pin_mut};
use postgres_protocol2::message::backend::Message;
use postgres_protocol2::message::frontend;
use tracing::debug;
use crate::client::{CachedTypeInfo, InnerClient};
use crate::codec::FrontendMessage;
use crate::connection::RequestMessages;
use crate::types::{Kind, Oid, Type};
use crate::{Column, Error, Row, Statement};
use crate::{Column, Error, Statement, query, slice_iter};
pub(crate) const TYPEINFO_QUERY: &str = "\
SELECT t.typname, t.typtype, t.typelem, r.rngsubtype, t.typbasetype, n.nspname, t.typrelid
@@ -18,51 +23,22 @@ INNER JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid
WHERE t.oid = $1
";
/// we need to make sure we close this prepared statement.
struct CloseStmt<'a, 'b> {
client: Option<&'a mut PartialQuery<'b>>,
name: &'static str,
}
impl<'a> CloseStmt<'a, '_> {
fn close(mut self) -> Result<&'a mut Responses, Error> {
let client = self.client.take().unwrap();
client.send_with_flush(|buf| {
frontend::close(b'S', self.name, buf).map_err(Error::encode)?;
Ok(())
})
}
}
impl Drop for CloseStmt<'_, '_> {
fn drop(&mut self) {
if let Some(client) = self.client.take() {
let _ = client.send_with_flush(|buf| {
frontend::close(b'S', self.name, buf).map_err(Error::encode)?;
Ok(())
});
}
}
}
async fn prepare_typecheck(
client: &mut PartialQuery<'_>,
client: &Arc<InnerClient>,
name: &'static str,
query: &str,
types: &[Type],
) -> Result<Statement, Error> {
let responses = client.send_with_flush(|buf| {
frontend::parse(name, query, [], buf).map_err(Error::encode)?;
frontend::describe(b'S', name, buf).map_err(Error::encode)?;
Ok(())
})?;
let buf = encode(client, name, query, types)?;
let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
match responses.next().await? {
Message::ParseComplete => {}
_ => return Err(Error::unexpected_message()),
}
match responses.next().await? {
Message::ParameterDescription(_) => {}
let parameter_description = match responses.next().await? {
Message::ParameterDescription(body) => body,
_ => return Err(Error::unexpected_message()),
};
@@ -72,6 +48,13 @@ async fn prepare_typecheck(
_ => return Err(Error::unexpected_message()),
};
let mut parameters = vec![];
let mut it = parameter_description.parameters();
while let Some(oid) = it.next().map_err(Error::parse)? {
let type_ = Type::from_oid(oid).ok_or_else(Error::unexpected_message)?;
parameters.push(type_);
}
let mut columns = vec![];
if let Some(row_description) = row_description {
let mut it = row_description.fields();
@@ -82,168 +65,98 @@ async fn prepare_typecheck(
}
}
Ok(Statement::new(name, columns))
Ok(Statement::new(client, name, parameters, columns))
}
fn try_from_cache(typecache: &CachedTypeInfo, oid: Oid) -> Option<Type> {
fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Result<Bytes, Error> {
if types.is_empty() {
debug!("preparing query {}: {}", name, query);
} else {
debug!("preparing query {} with types {:?}: {}", name, types, query);
}
client.with_buf(|buf| {
frontend::parse(name, query, types.iter().map(Type::oid), buf).map_err(Error::encode)?;
frontend::describe(b'S', name, buf).map_err(Error::encode)?;
frontend::sync(buf);
Ok(buf.split().freeze())
})
}
pub async fn get_type(
client: &Arc<InnerClient>,
typecache: &mut CachedTypeInfo,
oid: Oid,
) -> Result<Type, Error> {
if let Some(type_) = Type::from_oid(oid) {
return Some(type_);
return Ok(type_);
}
if let Some(type_) = typecache.types.get(&oid) {
return Some(type_.clone());
return Ok(type_.clone());
};
None
}
let stmt = typeinfo_statement(client, typecache).await?;
pub async fn parse_row_description(
client: &mut PartialQuery<'_>,
typecache: &mut CachedTypeInfo,
row_description: Option<RowDescriptionBody>,
) -> Result<Vec<Column>, Error> {
let mut columns = vec![];
let rows = query::query(client, stmt, slice_iter(&[&oid])).await?;
pin_mut!(rows);
if let Some(row_description) = row_description {
let mut it = row_description.fields();
while let Some(field) = it.next().map_err(Error::parse)? {
let type_ = try_from_cache(typecache, field.type_oid()).unwrap_or(Type::UNKNOWN);
let column = Column::new(field.name().to_string(), type_, field);
columns.push(column);
}
}
let all_known = columns.iter().all(|c| c.type_ != Type::UNKNOWN);
if all_known {
// all known, return early.
return Ok(columns);
}
let typeinfo = "neon_proxy_typeinfo";
// make sure to close the typeinfo statement before exiting.
let mut guard = CloseStmt {
name: typeinfo,
client: None,
};
let client = guard.client.insert(client);
// get the typeinfo statement.
let stmt = prepare_typecheck(client, typeinfo, TYPEINFO_QUERY).await?;
for column in &mut columns {
column.type_ = get_type(client, typecache, &stmt, column.type_oid()).await?;
}
// cancel the close guard.
let responses = guard.close()?;
match responses.next().await? {
Message::CloseComplete => {}
_ => return Err(Error::unexpected_message()),
}
Ok(columns)
}
async fn get_type(
client: &mut PartialQuery<'_>,
typecache: &mut CachedTypeInfo,
stmt: &Statement,
mut oid: Oid,
) -> Result<Type, Error> {
let mut stack = vec![];
let mut type_ = loop {
if let Some(type_) = try_from_cache(typecache, oid) {
break type_;
}
let row = exec(client, stmt, oid).await?;
if stack.len() > 8 {
return Err(Error::unexpected_message());
}
let name: String = row.try_get(0)?;
let type_: i8 = row.try_get(1)?;
let elem_oid: Oid = row.try_get(2)?;
let rngsubtype: Option<Oid> = row.try_get(3)?;
let basetype: Oid = row.try_get(4)?;
let schema: String = row.try_get(5)?;
let relid: Oid = row.try_get(6)?;
let kind = if type_ == b'e' as i8 {
Kind::Enum
} else if type_ == b'p' as i8 {
Kind::Pseudo
} else if basetype != 0 {
Kind::Domain(basetype)
} else if elem_oid != 0 {
stack.push((name, oid, schema));
oid = elem_oid;
continue;
} else if relid != 0 {
Kind::Composite(relid)
} else if let Some(rngsubtype) = rngsubtype {
Kind::Range(rngsubtype)
} else {
Kind::Simple
};
let type_ = Type::new(name, oid, kind, schema);
typecache.types.insert(oid, type_.clone());
break type_;
let row = match rows.try_next().await? {
Some(row) => row,
None => return Err(Error::unexpected_message()),
};
while let Some((name, oid, schema)) = stack.pop() {
type_ = Type::new(name, oid, Kind::Array(type_), schema);
typecache.types.insert(oid, type_.clone());
}
let name: String = row.try_get(0)?;
let type_: i8 = row.try_get(1)?;
let elem_oid: Oid = row.try_get(2)?;
let rngsubtype: Option<Oid> = row.try_get(3)?;
let basetype: Oid = row.try_get(4)?;
let schema: String = row.try_get(5)?;
let relid: Oid = row.try_get(6)?;
let kind = if type_ == b'e' as i8 {
Kind::Enum
} else if type_ == b'p' as i8 {
Kind::Pseudo
} else if basetype != 0 {
Kind::Domain(basetype)
} else if elem_oid != 0 {
let type_ = get_type_rec(client, typecache, elem_oid).await?;
Kind::Array(type_)
} else if relid != 0 {
Kind::Composite(relid)
} else if let Some(rngsubtype) = rngsubtype {
let type_ = get_type_rec(client, typecache, rngsubtype).await?;
Kind::Range(type_)
} else {
Kind::Simple
};
let type_ = Type::new(name, oid, kind, schema);
typecache.types.insert(oid, type_.clone());
Ok(type_)
}
/// exec the typeinfo statement returning one row.
async fn exec(
client: &mut PartialQuery<'_>,
statement: &Statement,
param: Oid,
) -> Result<Row, Error> {
let responses = client.send_with_flush(|buf| {
encode_bind(statement, param, "", buf);
frontend::execute("", 0, buf).map_err(Error::encode)?;
Ok(())
})?;
fn get_type_rec<'a>(
client: &'a Arc<InnerClient>,
typecache: &'a mut CachedTypeInfo,
oid: Oid,
) -> Pin<Box<dyn Future<Output = Result<Type, Error>> + Send + 'a>> {
Box::pin(get_type(client, typecache, oid))
}
match responses.next().await? {
Message::BindComplete => {}
_ => return Err(Error::unexpected_message()),
async fn typeinfo_statement(
client: &Arc<InnerClient>,
typecache: &mut CachedTypeInfo,
) -> Result<Statement, Error> {
if let Some(stmt) = &typecache.typeinfo {
return Ok(stmt.clone());
}
let row = match responses.next().await? {
Message::DataRow(body) => Row::new(statement.clone(), body, Format::Binary)?,
_ => return Err(Error::unexpected_message()),
};
let typeinfo = "neon_proxy_typeinfo";
let stmt = prepare_typecheck(client, typeinfo, TYPEINFO_QUERY, &[]).await?;
match responses.next().await? {
Message::CommandComplete(_) => {}
_ => return Err(Error::unexpected_message()),
};
Ok(row)
}
fn encode_bind(statement: &Statement, param: Oid, portal: &str, buf: &mut BytesMut) {
frontend::bind(
portal,
statement.name(),
[Format::Binary as i16],
[param],
|param, buf| {
oid_to_sql(param, buf);
Ok(IsNull::No)
},
[Format::Binary as i16],
buf,
)
.unwrap();
typecache.typeinfo = Some(stmt.clone());
Ok(stmt)
}

View File

@@ -1,43 +1,76 @@
use std::fmt;
use std::marker::PhantomPinned;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use bytes::BufMut;
use bytes::{BufMut, Bytes, BytesMut};
use fallible_iterator::FallibleIterator;
use futures_util::{Stream, ready};
use pin_project_lite::pin_project;
use postgres_protocol2::message::backend::Message;
use postgres_protocol2::message::frontend;
use postgres_types2::Format;
use postgres_types2::{Format, ToSql, Type};
use tracing::debug;
use crate::client::{CachedTypeInfo, InnerClient, Responses};
use crate::{Error, ReadyForQueryStatus, Row, Statement};
use crate::client::{InnerClient, Responses};
use crate::codec::FrontendMessage;
use crate::connection::RequestMessages;
use crate::types::IsNull;
use crate::{Column, Error, ReadyForQueryStatus, Row, Statement};
pub async fn query_txt<'a, S, I>(
client: &'a mut InnerClient,
typecache: &mut CachedTypeInfo,
struct BorrowToSqlParamsDebug<'a>(&'a [&'a (dyn ToSql + Sync)]);
impl fmt::Debug for BorrowToSqlParamsDebug<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_list().entries(self.0.iter()).finish()
}
}
pub async fn query<'a, I>(
client: &InnerClient,
statement: Statement,
params: I,
) -> Result<RowStream, Error>
where
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
I::IntoIter: ExactSizeIterator,
{
let buf = if tracing::enabled!(tracing::Level::DEBUG) {
let params = params.into_iter().collect::<Vec<_>>();
debug!(
"executing statement {} with parameters: {:?}",
statement.name(),
BorrowToSqlParamsDebug(params.as_slice()),
);
encode(client, &statement, params)?
} else {
encode(client, &statement, params)?
};
let responses = start(client, buf).await?;
Ok(RowStream {
statement,
responses,
command_tag: None,
status: ReadyForQueryStatus::Unknown,
output_format: Format::Binary,
_p: PhantomPinned,
})
}
pub async fn query_txt<S, I>(
client: &Arc<InnerClient>,
query: &str,
params: I,
) -> Result<RowStream<'a>, Error>
) -> Result<RowStream, Error>
where
S: AsRef<str>,
I: IntoIterator<Item = Option<S>>,
I::IntoIter: ExactSizeIterator,
{
let params = params.into_iter();
let mut client = client.start()?;
// Flow:
// 1. Parse the query
// 2. Inspect the row description for OIDs
// 3. If there's any OIDs we don't already know about, perform the typeinfo routine
// 4. Execute the query
// 5. Sync.
//
// The typeinfo routine:
// 1. Parse the typeinfo query
// 2. Execute the query on each OID
// 3. If the result does not match an OID we know, repeat 2.
// parse the query and get type info
let responses = client.send_with_flush(|buf| {
let buf = client.with_buf(|buf| {
frontend::parse(
"", // unnamed prepared statement
query, // query to parse
@@ -46,30 +79,7 @@ where
)
.map_err(Error::encode)?;
frontend::describe(b'S', "", buf).map_err(Error::encode)?;
Ok(())
})?;
match responses.next().await? {
Message::ParseComplete => {}
_ => return Err(Error::unexpected_message()),
}
match responses.next().await? {
Message::ParameterDescription(_) => {}
_ => return Err(Error::unexpected_message()),
};
let row_description = match responses.next().await? {
Message::RowDescription(body) => Some(body),
Message::NoData => None,
_ => return Err(Error::unexpected_message()),
};
let columns =
crate::prepare::parse_row_description(&mut client, typecache, row_description).await?;
let responses = client.send_with_sync(|buf| {
// Bind, pass params as text, retrieve as text
// Bind, pass params as text, retrieve as binary
match frontend::bind(
"", // empty string selects the unnamed portal
"", // unnamed prepared statement
@@ -92,55 +102,173 @@ where
// Execute
frontend::execute("", 0, buf).map_err(Error::encode)?;
// Sync
frontend::sync(buf);
Ok(())
Ok(buf.split().freeze())
})?;
// now read the responses
let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
match responses.next().await? {
Message::ParseComplete => {}
_ => return Err(Error::unexpected_message()),
}
let parameter_description = match responses.next().await? {
Message::ParameterDescription(body) => body,
_ => return Err(Error::unexpected_message()),
};
let row_description = match responses.next().await? {
Message::RowDescription(body) => Some(body),
Message::NoData => None,
_ => return Err(Error::unexpected_message()),
};
match responses.next().await? {
Message::BindComplete => {}
_ => return Err(Error::unexpected_message()),
}
let mut parameters = vec![];
let mut it = parameter_description.parameters();
while let Some(oid) = it.next().map_err(Error::parse)? {
let type_ = Type::from_oid(oid).unwrap_or(Type::UNKNOWN);
parameters.push(type_);
}
let mut columns = vec![];
if let Some(row_description) = row_description {
let mut it = row_description.fields();
while let Some(field) = it.next().map_err(Error::parse)? {
let type_ = Type::from_oid(field.type_oid()).unwrap_or(Type::UNKNOWN);
let column = Column::new(field.name().to_string(), type_, field);
columns.push(column);
}
}
Ok(RowStream {
statement: Statement::new_anonymous(parameters, columns),
responses,
statement: Statement::new("", columns),
command_tag: None,
status: ReadyForQueryStatus::Unknown,
output_format: Format::Text,
_p: PhantomPinned,
})
}
/// A stream of table rows.
pub struct RowStream<'a> {
responses: &'a mut Responses,
output_format: Format,
pub statement: Statement,
pub command_tag: Option<String>,
pub status: ReadyForQueryStatus,
async fn start(client: &InnerClient, buf: Bytes) -> Result<Responses, Error> {
let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
match responses.next().await? {
Message::BindComplete => {}
_ => return Err(Error::unexpected_message()),
}
Ok(responses)
}
impl Stream for RowStream<'_> {
pub fn encode<'a, I>(client: &InnerClient, statement: &Statement, params: I) -> Result<Bytes, Error>
where
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
I::IntoIter: ExactSizeIterator,
{
client.with_buf(|buf| {
encode_bind(statement, params, "", buf)?;
frontend::execute("", 0, buf).map_err(Error::encode)?;
frontend::sync(buf);
Ok(buf.split().freeze())
})
}
pub fn encode_bind<'a, I>(
statement: &Statement,
params: I,
portal: &str,
buf: &mut BytesMut,
) -> Result<(), Error>
where
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
I::IntoIter: ExactSizeIterator,
{
let param_types = statement.params();
let params = params.into_iter();
assert!(
param_types.len() == params.len(),
"expected {} parameters but got {}",
param_types.len(),
params.len()
);
let (param_formats, params): (Vec<_>, Vec<_>) = params
.zip(param_types.iter())
.map(|(p, ty)| (p.encode_format(ty) as i16, p))
.unzip();
let params = params.into_iter();
let mut error_idx = 0;
let r = frontend::bind(
portal,
statement.name(),
param_formats,
params.zip(param_types).enumerate(),
|(idx, (param, ty)), buf| match param.to_sql_checked(ty, buf) {
Ok(IsNull::No) => Ok(postgres_protocol2::IsNull::No),
Ok(IsNull::Yes) => Ok(postgres_protocol2::IsNull::Yes),
Err(e) => {
error_idx = idx;
Err(e)
}
},
Some(1),
buf,
);
match r {
Ok(()) => Ok(()),
Err(frontend::BindError::Conversion(e)) => Err(Error::to_sql(e, error_idx)),
Err(frontend::BindError::Serialization(e)) => Err(Error::encode(e)),
}
}
pin_project! {
/// A stream of table rows.
pub struct RowStream {
statement: Statement,
responses: Responses,
command_tag: Option<String>,
output_format: Format,
status: ReadyForQueryStatus,
#[pin]
_p: PhantomPinned,
}
}
impl Stream for RowStream {
type Item = Result<Row, Error>;
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let this = self.get_mut();
let this = self.project();
loop {
match ready!(this.responses.poll_next(cx)?) {
Message::DataRow(body) => {
return Poll::Ready(Some(Ok(Row::new(
this.statement.clone(),
body,
this.output_format,
*this.output_format,
)?)));
}
Message::EmptyQueryResponse | Message::PortalSuspended => {}
Message::CommandComplete(body) => {
if let Ok(tag) = body.tag() {
this.command_tag = Some(tag.to_string());
*this.command_tag = Some(tag.to_string());
}
}
Message::ReadyForQuery(status) => {
this.status = status.into();
*this.status = status.into();
return Poll::Ready(None);
}
_ => return Poll::Ready(Some(Err(Error::unexpected_message()))),
@@ -148,3 +276,24 @@ impl Stream for RowStream<'_> {
}
}
}
impl RowStream {
/// Returns information about the columns of data in the row.
pub fn columns(&self) -> &[Column] {
self.statement.columns()
}
/// Returns the command tag of this query.
///
/// This is only available after the stream has been exhausted.
pub fn command_tag(&self) -> Option<String> {
self.command_tag.clone()
}
/// Returns if the connection is ready for querying, with the status of the connection.
///
/// This might be available only after the stream has been exhausted.
pub fn ready_status(&self) -> ReadyForQueryStatus {
self.status
}
}

View File

@@ -1,14 +1,19 @@
use std::marker::PhantomPinned;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use bytes::Bytes;
use fallible_iterator::FallibleIterator;
use futures_util::{Stream, ready};
use pin_project_lite::pin_project;
use postgres_protocol2::message::backend::Message;
use postgres_protocol2::message::frontend;
use tracing::debug;
use crate::client::{InnerClient, Responses};
use crate::codec::FrontendMessage;
use crate::connection::RequestMessages;
use crate::{Error, ReadyForQueryStatus, SimpleQueryMessage, SimpleQueryRow};
/// Information about a column of a single query row.
@@ -28,28 +33,28 @@ impl SimpleColumn {
}
}
pub async fn simple_query<'a>(
client: &'a mut InnerClient,
query: &str,
) -> Result<SimpleQueryStream<'a>, Error> {
pub async fn simple_query(client: &InnerClient, query: &str) -> Result<SimpleQueryStream, Error> {
debug!("executing simple query: {}", query);
let responses = client.send_simple_query(query)?;
let buf = encode(client, query)?;
let responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
Ok(SimpleQueryStream {
responses,
columns: None,
status: ReadyForQueryStatus::Unknown,
_p: PhantomPinned,
})
}
pub async fn batch_execute(
client: &mut InnerClient,
client: &InnerClient,
query: &str,
) -> Result<ReadyForQueryStatus, Error> {
debug!("executing statement batch: {}", query);
let responses = client.send_simple_query(query)?;
let buf = encode(client, query)?;
let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
loop {
match responses.next().await? {
@@ -63,16 +68,25 @@ pub async fn batch_execute(
}
}
pub(crate) fn encode(client: &InnerClient, query: &str) -> Result<Bytes, Error> {
client.with_buf(|buf| {
frontend::query(query, buf).map_err(Error::encode)?;
Ok(buf.split().freeze())
})
}
pin_project! {
/// A stream of simple query results.
pub struct SimpleQueryStream<'a> {
responses: &'a mut Responses,
pub struct SimpleQueryStream {
responses: Responses,
columns: Option<Arc<[SimpleColumn]>>,
status: ReadyForQueryStatus,
#[pin]
_p: PhantomPinned,
}
}
impl SimpleQueryStream<'_> {
impl SimpleQueryStream {
/// Returns if the connection is ready for querying, with the status of the connection.
///
/// This might be available only after the stream has been exhausted.
@@ -81,7 +95,7 @@ impl SimpleQueryStream<'_> {
}
}
impl Stream for SimpleQueryStream<'_> {
impl Stream for SimpleQueryStream {
type Item = Result<SimpleQueryMessage, Error>;
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {

View File

@@ -1,15 +1,35 @@
use std::fmt;
use std::sync::Arc;
use std::sync::{Arc, Weak};
use crate::types::Type;
use postgres_protocol2::Oid;
use postgres_protocol2::message::backend::Field;
use postgres_protocol2::message::frontend;
use crate::client::InnerClient;
use crate::codec::FrontendMessage;
use crate::connection::RequestMessages;
use crate::types::Type;
struct StatementInner {
client: Weak<InnerClient>,
name: &'static str,
params: Vec<Type>,
columns: Vec<Column>,
}
impl Drop for StatementInner {
fn drop(&mut self) {
if let Some(client) = self.client.upgrade() {
let buf = client.with_buf(|buf| {
frontend::close(b'S', self.name, buf).unwrap();
frontend::sync(buf);
buf.split().freeze()
});
let _ = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)));
}
}
}
/// A prepared statement.
///
/// Prepared statements can only be used with the connection that created them.
@@ -17,14 +37,38 @@ struct StatementInner {
pub struct Statement(Arc<StatementInner>);
impl Statement {
pub(crate) fn new(name: &'static str, columns: Vec<Column>) -> Statement {
Statement(Arc::new(StatementInner { name, columns }))
pub(crate) fn new(
inner: &Arc<InnerClient>,
name: &'static str,
params: Vec<Type>,
columns: Vec<Column>,
) -> Statement {
Statement(Arc::new(StatementInner {
client: Arc::downgrade(inner),
name,
params,
columns,
}))
}
pub(crate) fn new_anonymous(params: Vec<Type>, columns: Vec<Column>) -> Statement {
Statement(Arc::new(StatementInner {
client: Weak::new(),
name: "<anonymous>",
params,
columns,
}))
}
pub(crate) fn name(&self) -> &str {
self.0.name
}
/// Returns the expected types of the statement's parameters.
pub fn params(&self) -> &[Type] {
&self.0.params
}
/// Returns information about the columns returned when the statement is queried.
pub fn columns(&self) -> &[Column] {
&self.0.columns
@@ -34,7 +78,7 @@ impl Statement {
/// Information about a column of a query.
pub struct Column {
name: String,
pub(crate) type_: Type,
type_: Type,
// raw fields from RowDescription
table_oid: Oid,

View File

@@ -1,3 +1,7 @@
use postgres_protocol2::message::frontend;
use crate::codec::FrontendMessage;
use crate::connection::RequestMessages;
use crate::query::RowStream;
use crate::{CancelToken, Client, Error, ReadyForQueryStatus};
@@ -16,7 +20,14 @@ impl Drop for Transaction<'_> {
return;
}
let _ = self.client.inner_mut().send_simple_query("ROLLBACK");
let buf = self.client.inner().with_buf(|buf| {
frontend::query("ROLLBACK", buf).unwrap();
buf.split().freeze()
});
let _ = self
.client
.inner()
.send(RequestMessages::Single(FrontendMessage::Raw(buf)));
}
}
@@ -43,11 +54,7 @@ impl<'a> Transaction<'a> {
}
/// Like `Client::query_raw_txt`.
pub async fn query_raw_txt<S, I>(
&mut self,
statement: &str,
params: I,
) -> Result<RowStream, Error>
pub async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
where
S: AsRef<str>,
I: IntoIterator<Item = Option<S>>,

View File

@@ -330,18 +330,11 @@ impl AzureBlobStorage {
if let Err(DownloadError::Timeout) = &next_item {
timeout_try_cnt += 1;
if timeout_try_cnt <= 5 {
continue 'outer;
continue;
}
}
let next_item = match next_item {
Ok(next_item) => next_item,
Err(e) => {
// The error is potentially retryable, so we must rewind the loop after yielding.
yield Err(e);
continue 'outer;
},
};
let next_item = next_item?;
// Log a warning if we saw two timeouts in a row before a successful request
if timeout_try_cnt > 2 {

View File

@@ -657,14 +657,7 @@ impl RemoteStorage for S3Bucket {
res = request => Ok(res),
_ = tokio::time::sleep(self.timeout) => Err(DownloadError::Timeout),
_ = cancel.cancelled() => Err(DownloadError::Cancelled),
};
if let Err(DownloadError::Timeout) = &response {
yield Err(DownloadError::Timeout);
continue 'outer;
}
let response = response?; // always yield cancellation errors and stop the stream
}?;
let response = response
.context("Failed to list S3 prefixes")

View File

@@ -299,7 +299,6 @@ pub struct PullTimelineRequest {
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
pub http_hosts: Vec<String>,
pub ignore_tombstone: Option<bool>,
}
#[derive(Debug, Serialize, Deserialize)]

View File

@@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::fs::{self, File};
use std::io::{self, Write};
use std::os::fd::AsFd;
use std::os::fd::AsRawFd;
use camino::{Utf8Path, Utf8PathBuf};
@@ -210,13 +210,13 @@ pub fn overwrite(
/// Syncs the filesystem for the given file descriptor.
#[cfg_attr(target_os = "macos", allow(unused_variables))]
pub fn syncfs(fd: impl AsFd) -> anyhow::Result<()> {
pub fn syncfs(fd: impl AsRawFd) -> anyhow::Result<()> {
// Linux guarantees durability for syncfs.
// POSIX doesn't have syncfs, and further does not actually guarantee durability of sync().
#[cfg(target_os = "linux")]
{
use anyhow::Context;
nix::unistd::syncfs(fd).context("syncfs")?;
nix::unistd::syncfs(fd.as_raw_fd()).context("syncfs")?;
}
#[cfg(target_os = "macos")]
{

View File

@@ -11,9 +11,9 @@ pub fn rename_noreplace<P1: ?Sized + NixPath, P2: ?Sized + NixPath>(
#[cfg(all(target_os = "linux", target_env = "gnu"))]
{
nix::fcntl::renameat2(
nix::fcntl::AT_FDCWD,
None,
src,
nix::fcntl::AT_FDCWD,
None,
dst,
nix::fcntl::RenameFlags::RENAME_NOREPLACE,
)

View File

@@ -1,6 +1,6 @@
//! A module to create and read lock files.
//!
//! File locking is done using [`nix::fcntl::Flock`] exclusive locks.
//! File locking is done using [`fcntl::flock`] exclusive locks.
//! The only consumer of this module is currently
//! [`pid_file`](crate::pid_file). See the module-level comment
//! there for potential pitfalls with lock files that are used
@@ -9,25 +9,26 @@
use std::fs;
use std::io::{Read, Write};
use std::ops::Deref;
use std::os::unix::prelude::AsRawFd;
use anyhow::Context;
use camino::{Utf8Path, Utf8PathBuf};
use nix::errno::Errno::EAGAIN;
use nix::fcntl::{Flock, FlockArg};
use nix::fcntl;
use crate::crashsafe;
/// A handle to an open and flocked, but not-yet-written lock file.
/// A handle to an open and unlocked, but not-yet-written lock file.
/// Returned by [`create_exclusive`].
#[must_use]
pub struct UnwrittenLockFile {
path: Utf8PathBuf,
file: Flock<fs::File>,
file: fs::File,
}
/// Returned by [`UnwrittenLockFile::write_content`].
#[must_use]
pub struct LockFileGuard(Flock<fs::File>);
pub struct LockFileGuard(fs::File);
impl Deref for LockFileGuard {
type Target = fs::File;
@@ -66,14 +67,17 @@ pub fn create_exclusive(lock_file_path: &Utf8Path) -> anyhow::Result<UnwrittenLo
.open(lock_file_path)
.context("open lock file")?;
let res = Flock::lock(lock_file, FlockArg::LockExclusiveNonblock);
let res = fcntl::flock(
lock_file.as_raw_fd(),
fcntl::FlockArg::LockExclusiveNonblock,
);
match res {
Ok(lock_file) => Ok(UnwrittenLockFile {
Ok(()) => Ok(UnwrittenLockFile {
path: lock_file_path.to_owned(),
file: lock_file,
}),
Err((_, EAGAIN)) => anyhow::bail!("file is already locked"),
Err((_, e)) => Err(e).context("flock error"),
Err(EAGAIN) => anyhow::bail!("file is already locked"),
Err(e) => Err(e).context("flock error"),
}
}
@@ -101,37 +105,32 @@ pub enum LockFileRead {
/// Check the [`LockFileRead`] variants for details.
pub fn read_and_hold_lock_file(path: &Utf8Path) -> anyhow::Result<LockFileRead> {
let res = fs::OpenOptions::new().read(true).open(path);
let lock_file = match res {
let mut lock_file = match res {
Ok(f) => f,
Err(e) => match e.kind() {
std::io::ErrorKind::NotFound => return Ok(LockFileRead::NotExist),
_ => return Err(e).context("open lock file"),
},
};
let res = Flock::lock(lock_file, FlockArg::LockExclusiveNonblock);
let res = fcntl::flock(
lock_file.as_raw_fd(),
fcntl::FlockArg::LockExclusiveNonblock,
);
// We need the content regardless of lock success / failure.
// But, read it after flock so that, if it succeeded, the content is consistent.
let mut content = String::new();
lock_file
.read_to_string(&mut content)
.context("read lock file")?;
match res {
Ok(mut locked_file) => {
let mut content = String::new();
locked_file
.read_to_string(&mut content)
.context("read lock file")?;
Ok(LockFileRead::NotHeldByAnyProcess(
LockFileGuard(locked_file),
content,
))
}
Err((mut not_locked_file, EAGAIN)) => {
let mut content = String::new();
not_locked_file
.read_to_string(&mut content)
.context("read lock file")?;
Ok(LockFileRead::LockedByOtherProcess {
not_locked_file,
content,
})
}
Err((_, e)) => Err(e).context("flock error"),
Ok(()) => Ok(LockFileRead::NotHeldByAnyProcess(
LockFileGuard(lock_file),
content,
)),
Err(EAGAIN) => Ok(LockFileRead::LockedByOtherProcess {
not_locked_file: lock_file,
content,
}),
Err(e) => Err(e).context("flock error"),
}
}

View File

@@ -127,12 +127,12 @@ macro_rules! __check_fields_present {
match check_fields_present0($extractors) {
Ok(FoundEverything) => Ok(()),
Ok(Unconfigured) if cfg!(feature = "testing") => {
Ok(Unconfigured) if cfg!(test) => {
// allow unconfigured in tests
Ok(())
},
Ok(Unconfigured) => {
panic!(r#"utils::tracing_span_assert: outside of #[cfg(feature = "testing")] expected tracing to be configured with tracing_error::ErrorLayer"#)
panic!("utils::tracing_span_assert: outside of #[cfg(test)] expected tracing to be configured with tracing_error::ErrorLayer")
},
Err(missing) => Err(missing)
}

View File

@@ -1,7 +1,6 @@
#![allow(clippy::todo)]
use std::ffi::CString;
use std::str::FromStr;
use postgres_ffi::WAL_SEGMENT_SIZE;
use utils::id::TenantTimelineId;
@@ -174,8 +173,6 @@ pub struct Config {
pub ttid: TenantTimelineId,
/// List of safekeepers in format `host:port`
pub safekeepers_list: Vec<String>,
/// libpq connection info options
pub safekeeper_conninfo_options: String,
/// Safekeeper reconnect timeout in milliseconds
pub safekeeper_reconnect_timeout: i32,
/// Safekeeper connection timeout in milliseconds
@@ -205,9 +202,6 @@ impl Wrapper {
.into_bytes_with_nul();
assert!(safekeepers_list_vec.len() == safekeepers_list_vec.capacity());
let safekeepers_list = safekeepers_list_vec.as_mut_ptr() as *mut std::ffi::c_char;
let safekeeper_conninfo_options = CString::from_str(&config.safekeeper_conninfo_options)
.unwrap()
.into_raw();
let callback_data = Box::into_raw(Box::new(api)) as *mut ::std::os::raw::c_void;
@@ -215,7 +209,6 @@ impl Wrapper {
neon_tenant,
neon_timeline,
safekeepers_list,
safekeeper_conninfo_options,
safekeeper_reconnect_timeout: config.safekeeper_reconnect_timeout,
safekeeper_connection_timeout: config.safekeeper_connection_timeout,
wal_segment_size: WAL_SEGMENT_SIZE as i32, // default 16MB
@@ -583,7 +576,6 @@ mod tests {
let config = crate::walproposer::Config {
ttid,
safekeepers_list: vec!["localhost:5000".to_string()],
safekeeper_conninfo_options: String::new(),
safekeeper_reconnect_timeout: 1000,
safekeeper_connection_timeout: 10000,
sync_safekeepers: true,

View File

@@ -17,69 +17,49 @@ anyhow.workspace = true
arc-swap.workspace = true
async-compression.workspace = true
async-stream.workspace = true
bincode.workspace = true
bit_field.workspace = true
bincode.workspace = true
byteorder.workspace = true
bytes.workspace = true
camino-tempfile.workspace = true
camino.workspace = true
camino-tempfile.workspace = true
chrono = { workspace = true, features = ["serde"] }
clap = { workspace = true, features = ["string"] }
consumption_metrics.workspace = true
crc32c.workspace = true
either.workspace = true
enum-map.workspace = true
enumset = { workspace = true, features = ["serde"]}
fail.workspace = true
futures.workspace = true
hashlink.workspace = true
hex.workspace = true
http-utils.workspace = true
humantime-serde.workspace = true
humantime.workspace = true
humantime-serde.workspace = true
hyper0.workspace = true
itertools.workspace = true
jsonwebtoken.workspace = true
md5.workspace = true
metrics.workspace = true
nix.workspace = true
num_cpus.workspace = true # hack to get the number of worker threads tokio uses
# hack to get the number of worker threads tokio uses
num_cpus.workspace = true
num-traits.workspace = true
once_cell.workspace = true
pageserver_api.workspace = true
pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
pageserver_compaction.workspace = true
pageserver_page_api.workspace = true
pem.workspace = true
pin-project-lite.workspace = true
postgres_backend.workspace = true
postgres_connection.workspace = true
postgres_ffi.workspace = true
postgres_initdb.workspace = true
postgres-protocol.workspace = true
postgres-types.workspace = true
posthog_client_lite.workspace = true
postgres_initdb.workspace = true
pprof.workspace = true
pq_proto.workspace = true
rand.workspace = true
range-set-blaze = { version = "0.1.16", features = ["alloc"] }
regex.workspace = true
remote_storage.workspace = true
reqwest.workspace = true
rpds.workspace = true
rustls.workspace = true
scopeguard.workspace = true
send-future.workspace = true
serde.workspace = true
serde_json = { workspace = true, features = ["raw_value"] }
serde_path_to_error.workspace = true
serde_with.workspace = true
serde.workspace = true
smallvec.workspace = true
storage_broker.workspace = true
strum_macros.workspace = true
strum.workspace = true
sysinfo.workspace = true
tenant_size_model.workspace = true
tokio-tar.workspace = true
thiserror.workspace = true
tikv-jemallocator.workspace = true
tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
@@ -88,19 +68,34 @@ tokio-io-timeout.workspace = true
tokio-postgres.workspace = true
tokio-rustls.workspace = true
tokio-stream.workspace = true
tokio-tar.workspace = true
tokio-util.workspace = true
toml_edit = { workspace = true, features = [ "serde" ] }
tonic.workspace = true
tonic-reflection.workspace = true
tracing.workspace = true
tracing-utils.workspace = true
url.workspace = true
utils.workspace = true
wal_decoder.workspace = true
walkdir.workspace = true
metrics.workspace = true
pageserver_api.workspace = true
pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
pageserver_compaction.workspace = true
pem.workspace = true
postgres_connection.workspace = true
postgres_ffi.workspace = true
pq_proto.workspace = true
remote_storage.workspace = true
storage_broker.workspace = true
tenant_size_model.workspace = true
http-utils.workspace = true
utils.workspace = true
workspace_hack.workspace = true
twox-hash.workspace = true
reqwest.workspace = true
rpds.workspace = true
enum-map.workspace = true
enumset = { workspace = true, features = ["serde"]}
strum.workspace = true
strum_macros.workspace = true
wal_decoder.workspace = true
smallvec.workspace = true
[target.'cfg(target_os = "linux")'.dependencies]
procfs.workspace = true

View File

@@ -14,7 +14,6 @@ use pageserver_api::key::Key;
use pageserver_api::models::virtual_file::IoMode;
use pageserver_api::shard::TenantShardId;
use pageserver_api::value::Value;
use strum::IntoEnumIterator;
use tokio_util::sync::CancellationToken;
use utils::bin_ser::BeSer;
use utils::id::{TenantId, TimelineId};
@@ -245,7 +244,13 @@ fn criterion_benchmark(c: &mut Criterion) {
];
let exploded_parameters = {
let mut out = Vec::new();
for io_mode in IoMode::iter() {
for io_mode in [
IoMode::Buffered,
#[cfg(target_os = "linux")]
IoMode::Direct,
#[cfg(target_os = "linux")]
IoMode::DirectRw,
] {
for param in expect.clone() {
let HandPickedParameters {
volume_mib,

View File

@@ -264,56 +264,10 @@ mod propagation_of_cached_label_value {
}
}
criterion_group!(histograms, histograms::bench_bucket_scalability);
mod histograms {
use std::time::Instant;
use criterion::{BenchmarkId, Criterion};
use metrics::core::Collector;
pub fn bench_bucket_scalability(c: &mut Criterion) {
let mut g = c.benchmark_group("bucket_scalability");
for n in [1, 4, 8, 16, 32, 64, 128, 256] {
g.bench_with_input(BenchmarkId::new("nbuckets", n), &n, |b, n| {
b.iter_custom(|iters| {
let buckets: Vec<f64> = (0..*n).map(|i| i as f64 * 100.0).collect();
let histo = metrics::Histogram::with_opts(
metrics::prometheus::HistogramOpts::new("name", "help")
.buckets(buckets.clone()),
)
.unwrap();
let start = Instant::now();
for i in 0..usize::try_from(iters).unwrap() {
histo.observe(buckets[i % buckets.len()]);
}
let elapsed = start.elapsed();
// self-test
let mfs = histo.collect();
assert_eq!(mfs.len(), 1);
let metrics = mfs[0].get_metric();
assert_eq!(metrics.len(), 1);
let histo = metrics[0].get_histogram();
let buckets = histo.get_bucket();
assert!(
buckets
.iter()
.enumerate()
.all(|(i, b)| b.get_cumulative_count()
>= i as u64 * (iters / buckets.len() as u64))
);
elapsed
})
});
}
}
}
criterion_main!(
label_values,
single_metric_multicore_scalability,
propagation_of_cached_label_value,
histograms,
propagation_of_cached_label_value
);
/*
@@ -336,14 +290,6 @@ propagation_of_cached_label_value__naive/nthreads/8 time: [211.50 ns 214.44 ns
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [14.135 ns 14.147 ns 14.160 ns]
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [14.243 ns 14.255 ns 14.268 ns]
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [14.470 ns 14.682 ns 14.895 ns]
bucket_scalability/nbuckets/1 time: [30.352 ns 30.353 ns 30.354 ns]
bucket_scalability/nbuckets/4 time: [30.464 ns 30.465 ns 30.467 ns]
bucket_scalability/nbuckets/8 time: [30.569 ns 30.575 ns 30.584 ns]
bucket_scalability/nbuckets/16 time: [30.961 ns 30.965 ns 30.969 ns]
bucket_scalability/nbuckets/32 time: [35.691 ns 35.707 ns 35.722 ns]
bucket_scalability/nbuckets/64 time: [47.829 ns 47.898 ns 47.974 ns]
bucket_scalability/nbuckets/128 time: [73.479 ns 73.512 ns 73.545 ns]
bucket_scalability/nbuckets/256 time: [127.92 ns 127.94 ns 127.96 ns]
Results on an i3en.3xlarge instance
@@ -398,14 +344,6 @@ propagation_of_cached_label_value__naive/nthreads/8 time: [434.87 ns 456.4
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [3.3767 ns 3.3974 ns 3.4220 ns]
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [3.6105 ns 4.2355 ns 5.1463 ns]
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [4.0889 ns 4.9714 ns 6.0779 ns]
bucket_scalability/nbuckets/1 time: [4.8455 ns 4.8542 ns 4.8646 ns]
bucket_scalability/nbuckets/4 time: [4.5663 ns 4.5722 ns 4.5787 ns]
bucket_scalability/nbuckets/8 time: [4.5531 ns 4.5670 ns 4.5842 ns]
bucket_scalability/nbuckets/16 time: [4.6392 ns 4.6524 ns 4.6685 ns]
bucket_scalability/nbuckets/32 time: [6.0302 ns 6.0439 ns 6.0589 ns]
bucket_scalability/nbuckets/64 time: [10.608 ns 10.644 ns 10.691 ns]
bucket_scalability/nbuckets/128 time: [22.178 ns 22.316 ns 22.483 ns]
bucket_scalability/nbuckets/256 time: [42.190 ns 42.328 ns 42.492 ns]
Results on a Hetzner AX102 AMD Ryzen 9 7950X3D 16-Core Processor
@@ -424,13 +362,5 @@ propagation_of_cached_label_value__naive/nthreads/8 time: [164.24 ns 170.1
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [2.2915 ns 2.2960 ns 2.3012 ns]
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [2.5726 ns 2.6158 ns 2.6624 ns]
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [2.7068 ns 2.8243 ns 2.9824 ns]
bucket_scalability/nbuckets/1 time: [6.3998 ns 6.4288 ns 6.4684 ns]
bucket_scalability/nbuckets/4 time: [6.3603 ns 6.3620 ns 6.3637 ns]
bucket_scalability/nbuckets/8 time: [6.1646 ns 6.1654 ns 6.1667 ns]
bucket_scalability/nbuckets/16 time: [6.1341 ns 6.1391 ns 6.1454 ns]
bucket_scalability/nbuckets/32 time: [8.2206 ns 8.2254 ns 8.2301 ns]
bucket_scalability/nbuckets/64 time: [13.988 ns 13.994 ns 14.000 ns]
bucket_scalability/nbuckets/128 time: [28.180 ns 28.216 ns 28.251 ns]
bucket_scalability/nbuckets/256 time: [54.914 ns 54.931 ns 54.951 ns]
*/

View File

@@ -1,6 +1,5 @@
use std::collections::HashMap;
use std::error::Error as _;
use std::time::Duration;
use bytes::Bytes;
use detach_ancestor::AncestorDetached;
@@ -820,25 +819,4 @@ impl Client {
.await
.map(|resp| resp.status())
}
pub async fn activate_post_import(
&self,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
activate_timeline_timeout: Duration,
) -> Result<TimelineInfo> {
let uri = format!(
"{}/v1/tenant/{}/timeline/{}/activate_post_import?timeline_activate_timeout_ms={}",
self.mgmt_api_endpoint,
tenant_shard_id,
timeline_id,
activate_timeline_timeout.as_millis()
);
self.request(Method::PUT, uri, ())
.await?
.json()
.await
.map_err(Error::ReceiveBody)
}
}

View File

@@ -1,19 +0,0 @@
[package]
name = "pageserver_page_api"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
bytes.workspace = true
pageserver_api.workspace = true
postgres_ffi.workspace = true
prost.workspace = true
smallvec.workspace = true
thiserror.workspace = true
tonic.workspace = true
utils.workspace = true
workspace_hack.workspace = true
[build-dependencies]
tonic-build.workspace = true

View File

@@ -1,13 +0,0 @@
use std::env;
use std::path::PathBuf;
/// Generates Rust code from .proto Protobuf schemas, along with a binary file
/// descriptor set for Protobuf schema reflection.
fn main() -> Result<(), Box<dyn std::error::Error>> {
let out_dir = PathBuf::from(env::var("OUT_DIR")?);
tonic_build::configure()
.bytes(["."])
.file_descriptor_set_path(out_dir.join("page_api_descriptor.bin"))
.compile_protos(&["proto/page_service.proto"], &["proto"])
.map_err(|err| err.into())
}

View File

@@ -1,239 +0,0 @@
// Page service, presented by pageservers for computes.
//
// This is the compute read path. It primarily serves page versions at given
// LSNs, but also base backups, SLRU segments, and relation metadata.
//
// EXPERIMENTAL: this is still under development and subject to change.
//
// Request metadata headers:
// - authorization: JWT token ("Bearer <token>"), if auth is enabled
// - neon-tenant-id: tenant ID ("7c4a1f9e3bd6470c8f3e21a65bd2e980")
// - neon-shard-id: shard ID, as <number><count> in hex ("0b10" = shard 11 of 16, 0-based)
// - neon-timeline-id: timeline ID ("f08c4e9a2d5f76b1e3a7c2d8910f4b3e")
//
// The service can be accessed via e.g. grpcurl:
//
// ```
// grpcurl \
// -plaintext \
// -H "neon-tenant-id: 7c4a1f9e3bd6470c8f3e21a65bd2e980" \
// -H "neon-shard-id: 0b10" \
// -H "neon-timeline-id: f08c4e9a2d5f76b1e3a7c2d8910f4b3e" \
// -H "authorization: Bearer $JWT" \
// -d '{"read_lsn": {"request_lsn": 1234567890}, "rel": {"spc_oid": 1663, "db_oid": 1234, "rel_number": 5678, "fork_number": 0}}'
// localhost:51051 page_api.PageService/CheckRelExists
// ```
//
// TODO: consider adding neon-compute-mode ("primary", "static", "replica").
// However, this will require reconnecting when changing modes.
//
// TODO: write implementation guidance on
// - Health checks
// - Tracing, OpenTelemetry
// - Compression
syntax = "proto3";
package page_api;
service PageService {
// Returns whether a relation exists.
rpc CheckRelExists(CheckRelExistsRequest) returns (CheckRelExistsResponse);
// Fetches a base backup.
rpc GetBaseBackup (GetBaseBackupRequest) returns (stream GetBaseBackupResponseChunk);
// Returns the total size of a database, as # of bytes.
rpc GetDbSize (GetDbSizeRequest) returns (GetDbSizeResponse);
// Fetches pages.
//
// This is implemented as a bidirectional streaming RPC for performance. Unary
// requests incur costs for e.g. HTTP/2 stream setup, header parsing,
// authentication, and so on -- with streaming, we only pay these costs during
// the initial stream setup. This ~doubles throughput in benchmarks. Other
// RPCs use regular unary requests, since they are not as frequent and
// performance-critical, and this simplifies implementation.
//
// NB: a gRPC status response (e.g. errors) will terminate the stream. The
// stream may be shared by multiple Postgres backends, so we avoid this by
// sending them as GetPageResponse.status_code instead.
rpc GetPages (stream GetPageRequest) returns (stream GetPageResponse);
// Returns the size of a relation, as # of blocks.
rpc GetRelSize (GetRelSizeRequest) returns (GetRelSizeResponse);
// Fetches an SLRU segment.
rpc GetSlruSegment (GetSlruSegmentRequest) returns (GetSlruSegmentResponse);
}
// The LSN a request should read at.
message ReadLsn {
// The request's read LSN. Required.
uint64 request_lsn = 1;
// If given, the caller guarantees that the page has not been modified since
// this LSN. Must be smaller than or equal to request_lsn. This allows the
// Pageserver to serve an old page without waiting for the request LSN to
// arrive. Valid for all request types.
//
// It is undefined behaviour to make a request such that the page was, in
// fact, modified between request_lsn and not_modified_since_lsn. The
// Pageserver might detect it and return an error, or it might return the old
// page version or the new page version. Setting not_modified_since_lsn equal
// to request_lsn is always safe, but can lead to unnecessary waiting.
uint64 not_modified_since_lsn = 2;
}
// A relation identifier.
message RelTag {
uint32 spc_oid = 1;
uint32 db_oid = 2;
uint32 rel_number = 3;
uint32 fork_number = 4;
}
// Checks whether a relation exists, at the given LSN. Only valid on shard 0,
// other shards will error.
message CheckRelExistsRequest {
ReadLsn read_lsn = 1;
RelTag rel = 2;
}
message CheckRelExistsResponse {
bool exists = 1;
}
// Requests a base backup at a given LSN.
message GetBaseBackupRequest {
// The LSN to fetch a base backup at.
ReadLsn read_lsn = 1;
// If true, logical replication slots will not be created.
bool replica = 2;
}
// Base backup response chunk, returned as an ordered stream.
message GetBaseBackupResponseChunk {
// A basebackup data chunk. The size is undefined, but bounded by the 4 MB
// gRPC message size limit.
bytes chunk = 1;
}
// Requests the size of a database, as # of bytes. Only valid on shard 0, other
// shards will error.
message GetDbSizeRequest {
ReadLsn read_lsn = 1;
uint32 db_oid = 2;
}
message GetDbSizeResponse {
uint64 num_bytes = 1;
}
// Requests one or more pages.
message GetPageRequest {
// A request ID. Will be included in the response. Should be unique for
// in-flight requests on the stream.
uint64 request_id = 1;
// The request class.
GetPageClass request_class = 2;
// The LSN to read at.
ReadLsn read_lsn = 3;
// The relation to read from.
RelTag rel = 4;
// Page numbers to read. Must belong to the remote shard.
//
// Multiple pages will be executed as a single batch by the Pageserver,
// amortizing layer access costs and parallelizing them. This may increase the
// latency of any individual request, but improves the overall latency and
// throughput of the batch as a whole.
//
// TODO: this causes an allocation in the common single-block case. The sender
// can use a SmallVec to stack-allocate it, but Prost will always deserialize
// into a heap-allocated Vec. Consider optimizing this.
//
// TODO: we might be able to avoid a sort or something if we mandate that these
// are always in order. But we can't currenly rely on this on the server, because
// of compatibility with the libpq protocol handler.
repeated uint32 block_number = 5;
}
// A GetPageRequest class. Primarily intended for observability, but may also be
// used for prioritization in the future.
enum GetPageClass {
// Unknown class. For backwards compatibility: used when an older client version sends a class
// that a newer server version has removed.
GET_PAGE_CLASS_UNKNOWN = 0;
// A normal request. This is the default.
GET_PAGE_CLASS_NORMAL = 1;
// A prefetch request. NB: can only be classified on pg < 18.
GET_PAGE_CLASS_PREFETCH = 2;
// A background request (e.g. vacuum).
GET_PAGE_CLASS_BACKGROUND = 3;
}
// A GetPage response.
//
// A batch response will contain all of the requested pages. We could eagerly
// emit individual pages as soon as they are ready, but on a readv() Postgres
// holds buffer pool locks on all pages in the batch and we'll only return once
// the entire batch is ready, so no one can make use of the individual pages.
message GetPageResponse {
// The original request's ID.
uint64 request_id = 1;
// The response status code.
GetPageStatusCode status_code = 2;
// A string describing the status, if any.
string reason = 3;
// The 8KB page images, in the same order as the request. Empty if status_code != OK.
repeated bytes page_image = 4;
}
// A GetPageResponse status code.
//
// These are effectively equivalent to gRPC statuses. However, we use a bidirectional stream
// (potentially shared by many backends), and a gRPC status response would terminate the stream so
// we send GetPageResponse messages with these codes instead.
enum GetPageStatusCode {
// Unknown status. For forwards compatibility: used when an older client version receives a new
// status code from a newer server version.
GET_PAGE_STATUS_CODE_UNKNOWN = 0;
// The request was successful.
GET_PAGE_STATUS_CODE_OK = 1;
// The page did not exist. The tenant/timeline/shard has already been
// validated during stream setup.
GET_PAGE_STATUS_CODE_NOT_FOUND = 2;
// The request was invalid.
GET_PAGE_STATUS_CODE_INVALID_REQUEST = 3;
// The request failed due to an internal server error.
GET_PAGE_STATUS_CODE_INTERNAL_ERROR = 4;
// The tenant is rate limited. Slow down and retry later.
GET_PAGE_STATUS_CODE_SLOW_DOWN = 5;
// NB: shutdown errors are emitted as a gRPC Unavailable status.
//
// TODO: consider adding a GET_PAGE_STATUS_CODE_LAYER_DOWNLOAD in the case of a layer download.
// This could free up the server task to process other requests while the download is in progress.
}
// Fetches the size of a relation at a given LSN, as # of blocks. Only valid on
// shard 0, other shards will error.
message GetRelSizeRequest {
ReadLsn read_lsn = 1;
RelTag rel = 2;
}
message GetRelSizeResponse {
uint32 num_blocks = 1;
}
// Requests an SLRU segment. Only valid on shard 0, other shards will error.
message GetSlruSegmentRequest {
ReadLsn read_lsn = 1;
uint32 kind = 2;
uint32 segno = 3;
}
// Returns an SLRU segment.
//
// These are up 32 pages (256 KB), so we can send them as a single response.
message GetSlruSegmentResponse {
bytes segment = 1;
}

View File

@@ -1,23 +0,0 @@
//! This crate provides the Pageserver's page API. It contains:
//!
//! * proto/page_service.proto: the Protobuf schema for the page API.
//! * proto: auto-generated Protobuf types for gRPC.
//!
//! This crate is used by both the client and the server. Try to keep it slim.
// Code generated by protobuf.
pub mod proto {
tonic::include_proto!("page_api");
/// File descriptor set for Protobuf schema reflection. This allows using
/// e.g. grpcurl with the API.
pub const FILE_DESCRIPTOR_SET: &[u8] =
tonic::include_file_descriptor_set!("page_api_descriptor");
pub use page_service_client::PageServiceClient;
pub use page_service_server::{PageService, PageServiceServer};
}
mod model;
pub use model::*;

Some files were not shown because too many files have changed in this diff Show More