mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-02 21:10:38 +00:00
Compare commits
42 Commits
rust_pitfa
...
dkr/unknow
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
15bde20c4b | ||
|
|
88ad410a81 | ||
|
|
4967355664 | ||
|
|
d551de60d7 | ||
|
|
d287e6a522 | ||
|
|
18f6ccd77e | ||
|
|
fdd504f043 | ||
|
|
7b9e8be6e4 | ||
|
|
89307822b0 | ||
|
|
30fe310602 | ||
|
|
ef41b63db7 | ||
|
|
1bceceac5a | ||
|
|
4431779e32 | ||
|
|
131343ed45 | ||
|
|
511b0945c3 | ||
|
|
b7db62411b | ||
|
|
efe9e131a7 | ||
|
|
4a67f60a3b | ||
|
|
a65e0774a5 | ||
|
|
a0b34e8c49 | ||
|
|
fdc1c12fb0 | ||
|
|
0322e2720f | ||
|
|
4f64be4a98 | ||
|
|
e7514cc15e | ||
|
|
6415dc791c | ||
|
|
a5615bd8ea | ||
|
|
4a76f2b8d6 | ||
|
|
9cd6f2ceeb | ||
|
|
2855c73990 | ||
|
|
edcf4d61a4 | ||
|
|
a2a9c598be | ||
|
|
bb06d281ea | ||
|
|
5869234290 | ||
|
|
ecfe4757d3 | ||
|
|
845e296562 | ||
|
|
1988cc5527 | ||
|
|
1d266a6365 | ||
|
|
80522a1b9d | ||
|
|
ecced13d90 | ||
|
|
59510f6449 | ||
|
|
7fc778d251 | ||
|
|
1d490b2311 |
186
.github/actions/allure-report-generate/action.yml
vendored
Normal file
186
.github/actions/allure-report-generate/action.yml
vendored
Normal file
@@ -0,0 +1,186 @@
|
||||
name: 'Create Allure report'
|
||||
description: 'Generate Allure report from uploaded by actions/allure-report-store tests results'
|
||||
|
||||
outputs:
|
||||
report-url:
|
||||
description: 'Allure report URL'
|
||||
value: ${{ steps.generate-report.outputs.report-url }}
|
||||
report-json-url:
|
||||
description: 'Allure report JSON URL'
|
||||
value: ${{ steps.generate-report.outputs.report-json-url }}
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
|
||||
steps:
|
||||
# We're using some of env variables quite offen, so let's set them once.
|
||||
#
|
||||
# It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
|
||||
#
|
||||
# - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
|
||||
# - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
|
||||
#
|
||||
- name: Set variables
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
|
||||
if [ "${PR_NUMBER}" != "null" ]; then
|
||||
BRANCH_OR_PR=pr-${PR_NUMBER}
|
||||
elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ]; then
|
||||
# Shortcut for special branches
|
||||
BRANCH_OR_PR=${GITHUB_REF_NAME}
|
||||
else
|
||||
BRANCH_OR_PR=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
|
||||
fi
|
||||
|
||||
LOCK_FILE=reports/${BRANCH_OR_PR}/lock.txt
|
||||
|
||||
WORKDIR=/tmp/${BRANCH_OR_PR}-$(date +%s)
|
||||
mkdir -p ${WORKDIR}
|
||||
|
||||
echo "BRANCH_OR_PR=${BRANCH_OR_PR}" >> $GITHUB_ENV
|
||||
echo "LOCK_FILE=${LOCK_FILE}" >> $GITHUB_ENV
|
||||
echo "WORKDIR=${WORKDIR}" >> $GITHUB_ENV
|
||||
echo "BUCKET=${BUCKET}" >> $GITHUB_ENV
|
||||
env:
|
||||
BUCKET: neon-github-public-dev
|
||||
|
||||
# TODO: We can replace with a special docker image with Java and Allure pre-installed
|
||||
- uses: actions/setup-java@v3
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '17'
|
||||
|
||||
- name: Install Allure
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
if ! which allure; then
|
||||
ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
|
||||
wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
|
||||
echo "${ALLURE_ZIP_MD5} ${ALLURE_ZIP}" | md5sum -c
|
||||
unzip -q ${ALLURE_ZIP}
|
||||
echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
|
||||
rm -f ${ALLURE_ZIP}
|
||||
fi
|
||||
env:
|
||||
ALLURE_VERSION: 2.22.0
|
||||
ALLURE_ZIP_MD5: d5c9f0989b896482536956340a7d5ec9
|
||||
|
||||
# Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
|
||||
- name: Acquire lock
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
LOCK_TIMEOUT=300 # seconds
|
||||
|
||||
LOCK_CONTENT="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
|
||||
echo ${LOCK_CONTENT} > ${WORKDIR}/lock.txt
|
||||
|
||||
# Do it up to 5 times to avoid race condition
|
||||
for _ in $(seq 1 5); do
|
||||
for i in $(seq 1 ${LOCK_TIMEOUT}); do
|
||||
LOCK_ACQUIRED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
|
||||
# `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
|
||||
if [ -z "${LOCK_ACQUIRED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ACQUIRED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
aws s3 mv --only-show-errors ${WORKDIR}/lock.txt "s3://${BUCKET}/${LOCK_FILE}"
|
||||
|
||||
# Double-check that exactly THIS run has acquired the lock
|
||||
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
|
||||
if [ "$(cat lock.txt)" = "${LOCK_CONTENT}" ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Generate and publish final Allure report
|
||||
id: generate-report
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
REPORT_PREFIX=reports/${BRANCH_OR_PR}
|
||||
RAW_PREFIX=reports-raw/${BRANCH_OR_PR}/${GITHUB_RUN_ID}
|
||||
|
||||
# Get previously uploaded data for this run
|
||||
ZSTD_NBTHREADS=0
|
||||
|
||||
S3_FILEPATHS=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/ | jq --raw-output '.Contents[].Key')
|
||||
if [ -z "$S3_FILEPATHS" ]; then
|
||||
# There's no previously uploaded data for this $GITHUB_RUN_ID
|
||||
exit 0
|
||||
fi
|
||||
for S3_FILEPATH in ${S3_FILEPATHS}; do
|
||||
time aws s3 cp --only-show-errors "s3://${BUCKET}/${S3_FILEPATH}" "${WORKDIR}"
|
||||
|
||||
archive=${WORKDIR}/$(basename $S3_FILEPATH)
|
||||
mkdir -p ${archive%.tar.zst}
|
||||
time tar -xf ${archive} -C ${archive%.tar.zst}
|
||||
rm -f ${archive}
|
||||
done
|
||||
|
||||
# Get history trend
|
||||
time aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${WORKDIR}/latest/history" || true
|
||||
|
||||
# Generate report
|
||||
time allure generate --clean --output ${WORKDIR}/report ${WORKDIR}/*
|
||||
|
||||
# Replace a logo link with a redirect to the latest version of the report
|
||||
sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html?nocache='"'+Date.now()+'"'" class=|g' ${WORKDIR}/report/app.js
|
||||
|
||||
# Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
|
||||
time aws s3 mv --recursive --only-show-errors "${WORKDIR}/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
|
||||
time aws s3 mv --recursive --only-show-errors "${WORKDIR}/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
|
||||
|
||||
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
|
||||
|
||||
# Generate redirect
|
||||
cat <<EOF > ${WORKDIR}/index.html
|
||||
<!DOCTYPE html>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<title>Redirecting to ${REPORT_URL}</title>
|
||||
<meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
|
||||
EOF
|
||||
time aws s3 cp --only-show-errors ${WORKDIR}/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
|
||||
|
||||
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
|
||||
echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
|
||||
|
||||
- name: Release lock
|
||||
if: always()
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
|
||||
|
||||
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" ]; then
|
||||
aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
|
||||
fi
|
||||
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
if [ -d "${WORKDIR}" ]; then
|
||||
rm -rf ${WORKDIR}
|
||||
fi
|
||||
|
||||
- uses: actions/github-script@v6
|
||||
if: always()
|
||||
env:
|
||||
REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
|
||||
COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
with:
|
||||
script: |
|
||||
const { REPORT_URL, COMMIT_SHA } = process.env
|
||||
|
||||
await github.rest.repos.createCommitStatus({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
sha: `${COMMIT_SHA}`,
|
||||
state: 'success',
|
||||
target_url: `${REPORT_URL}`,
|
||||
context: 'Allure report',
|
||||
})
|
||||
72
.github/actions/allure-report-store/action.yml
vendored
Normal file
72
.github/actions/allure-report-store/action.yml
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
name: 'Store Allure results'
|
||||
description: 'Upload test results to be used by actions/allure-report-generate'
|
||||
|
||||
inputs:
|
||||
report-dir:
|
||||
description: 'directory with test results generated by tests'
|
||||
required: true
|
||||
unique-key:
|
||||
description: 'string to distinguish different results in the same run'
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
|
||||
steps:
|
||||
- name: Set variables
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
|
||||
if [ "${PR_NUMBER}" != "null" ]; then
|
||||
BRANCH_OR_PR=pr-${PR_NUMBER}
|
||||
elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ]; then
|
||||
# Shortcut for special branches
|
||||
BRANCH_OR_PR=${GITHUB_REF_NAME}
|
||||
else
|
||||
BRANCH_OR_PR=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
|
||||
fi
|
||||
|
||||
echo "BRANCH_OR_PR=${BRANCH_OR_PR}" >> $GITHUB_ENV
|
||||
echo "REPORT_DIR=${REPORT_DIR}" >> $GITHUB_ENV
|
||||
env:
|
||||
REPORT_DIR: ${{ inputs.report-dir }}
|
||||
|
||||
- name: Upload test results
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
REPORT_PREFIX=reports/${BRANCH_OR_PR}
|
||||
RAW_PREFIX=reports-raw/${BRANCH_OR_PR}/${GITHUB_RUN_ID}
|
||||
|
||||
# Add metadata
|
||||
cat <<EOF > ${REPORT_DIR}/executor.json
|
||||
{
|
||||
"name": "GitHub Actions",
|
||||
"type": "github",
|
||||
"url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
|
||||
"buildOrder": ${GITHUB_RUN_ID},
|
||||
"buildName": "GitHub Actions Run #${GITHUB_RUN_NUMBER}/${GITHUB_RUN_ATTEMPT}",
|
||||
"buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
|
||||
"reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
|
||||
"reportName": "Allure Report"
|
||||
}
|
||||
EOF
|
||||
|
||||
cat <<EOF > ${REPORT_DIR}/environment.properties
|
||||
COMMIT_SHA=${COMMIT_SHA}
|
||||
EOF
|
||||
|
||||
ARCHIVE="${UNIQUE_KEY}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
|
||||
ZSTD_NBTHREADS=0
|
||||
|
||||
time tar -C ${REPORT_DIR} -cf ${ARCHIVE} --zstd .
|
||||
time aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
|
||||
env:
|
||||
UNIQUE_KEY: ${{ inputs.unique-key }}
|
||||
COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
BUCKET: neon-github-public-dev
|
||||
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
rm -rf ${REPORT_DIR}
|
||||
254
.github/actions/allure-report/action.yml
vendored
254
.github/actions/allure-report/action.yml
vendored
@@ -1,254 +0,0 @@
|
||||
name: 'Create Allure report'
|
||||
description: 'Create and publish Allure report'
|
||||
|
||||
inputs:
|
||||
action:
|
||||
desctiption: 'generate or store'
|
||||
required: true
|
||||
build_type:
|
||||
description: '`build_type` from run-python-test-set action'
|
||||
required: true
|
||||
test_selection:
|
||||
description: '`test_selector` from run-python-test-set action'
|
||||
required: false
|
||||
outputs:
|
||||
report-url:
|
||||
description: 'Allure report URL'
|
||||
value: ${{ steps.generate-report.outputs.report-url }}
|
||||
report-json-url:
|
||||
description: 'Allure report JSON URL'
|
||||
value: ${{ steps.generate-report.outputs.report-json-url }}
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
|
||||
steps:
|
||||
# We're using some of env variables quite offen, so let's set them once.
|
||||
#
|
||||
# It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
|
||||
#
|
||||
# - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
|
||||
# - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
|
||||
#
|
||||
- name: Set common environment variables
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
echo "BUILD_TYPE=${BUILD_TYPE}" >> $GITHUB_ENV
|
||||
echo "BUCKET=${BUCKET}" >> $GITHUB_ENV
|
||||
echo "TEST_OUTPUT=${TEST_OUTPUT}" >> $GITHUB_ENV
|
||||
env:
|
||||
BUILD_TYPE: ${{ inputs.build_type }}
|
||||
BUCKET: neon-github-public-dev
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
|
||||
- name: Validate input parameters
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then
|
||||
echo >&2 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then
|
||||
echo >&2 "inputs.test_selection must be set for 'store' action"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
- name: Calculate variables
|
||||
id: calculate-vars
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
# TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key
|
||||
|
||||
pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
|
||||
if [ "${pr_number}" != "null" ]; then
|
||||
key=pr-${pr_number}
|
||||
elif [ "${GITHUB_REF_NAME}" = "main" ]; then
|
||||
# Shortcut for a special branch
|
||||
key=main
|
||||
elif [ "${GITHUB_REF_NAME}" = "release" ]; then
|
||||
# Shortcut for a special branch
|
||||
key=release
|
||||
else
|
||||
key=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
|
||||
fi
|
||||
echo "KEY=${key}" >> $GITHUB_OUTPUT
|
||||
|
||||
# Sanitize test selection to remove `/` and any other special characters
|
||||
# Use printf instead of echo to avoid having `\n` at the end of the string
|
||||
test_selection=$(printf "${{ inputs.test_selection }}" | tr -c "[:alnum:]._-" "-" )
|
||||
echo "TEST_SELECTION=${test_selection}" >> $GITHUB_OUTPUT
|
||||
|
||||
- uses: actions/setup-java@v3
|
||||
if: ${{ inputs.action == 'generate' }}
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: '17'
|
||||
|
||||
- name: Install Allure
|
||||
if: ${{ inputs.action == 'generate' }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
if ! which allure; then
|
||||
ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
|
||||
wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
|
||||
echo "${ALLURE_ZIP_MD5} ${ALLURE_ZIP}" | md5sum -c
|
||||
unzip -q ${ALLURE_ZIP}
|
||||
echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
|
||||
rm -f ${ALLURE_ZIP}
|
||||
fi
|
||||
env:
|
||||
ALLURE_VERSION: 2.21.0
|
||||
ALLURE_ZIP_MD5: c8db4dd8e2a7882583d569ed2c82879c
|
||||
|
||||
- name: Upload Allure results
|
||||
if: ${{ inputs.action == 'store' }}
|
||||
env:
|
||||
REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
|
||||
RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
|
||||
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
# Add metadata
|
||||
cat <<EOF > $TEST_OUTPUT/allure/results/executor.json
|
||||
{
|
||||
"name": "GitHub Actions",
|
||||
"type": "github",
|
||||
"url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
|
||||
"buildOrder": ${GITHUB_RUN_ID},
|
||||
"buildName": "GitHub Actions Run #${{ github.run_number }}/${GITHUB_RUN_ATTEMPT}",
|
||||
"buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
|
||||
"reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
|
||||
"reportName": "Allure Report"
|
||||
}
|
||||
EOF
|
||||
cat <<EOF > $TEST_OUTPUT/allure/results/environment.properties
|
||||
TEST_SELECTION=${{ inputs.test_selection }}
|
||||
BUILD_TYPE=${BUILD_TYPE}
|
||||
EOF
|
||||
|
||||
ARCHIVE="${GITHUB_RUN_ID}-${TEST_SELECTION}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
|
||||
ZSTD_NBTHREADS=0
|
||||
|
||||
tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd .
|
||||
aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
|
||||
|
||||
# Potentially we could have several running build for the same key (for example for the main branch), so we use improvised lock for this
|
||||
- name: Acquire Allure lock
|
||||
if: ${{ inputs.action == 'generate' }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
|
||||
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
|
||||
run: |
|
||||
LOCK_TIMEOUT=300 # seconds
|
||||
|
||||
for _ in $(seq 1 5); do
|
||||
for i in $(seq 1 ${LOCK_TIMEOUT}); do
|
||||
LOCK_ADDED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
|
||||
# `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
|
||||
if [ -z "${LOCK_ADDED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ADDED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" > lock.txt
|
||||
aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}"
|
||||
|
||||
# A double-check that exactly WE have acquired the lock
|
||||
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
|
||||
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Generate and publish final Allure report
|
||||
if: ${{ inputs.action == 'generate' }}
|
||||
id: generate-report
|
||||
env:
|
||||
REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
|
||||
RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
# Get previously uploaded data for this run
|
||||
ZSTD_NBTHREADS=0
|
||||
|
||||
s3_filepaths=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/${GITHUB_RUN_ID}- | jq --raw-output '.Contents[].Key')
|
||||
if [ -z "$s3_filepaths" ]; then
|
||||
# There's no previously uploaded data for this run
|
||||
exit 0
|
||||
fi
|
||||
for s3_filepath in ${s3_filepaths}; do
|
||||
aws s3 cp --only-show-errors "s3://${BUCKET}/${s3_filepath}" "${TEST_OUTPUT}/allure/"
|
||||
|
||||
archive=${TEST_OUTPUT}/allure/$(basename $s3_filepath)
|
||||
mkdir -p ${archive%.tar.zst}
|
||||
tar -xf ${archive} -C ${archive%.tar.zst}
|
||||
rm -f ${archive}
|
||||
done
|
||||
|
||||
# Get history trend
|
||||
aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${TEST_OUTPUT}/allure/latest/history" || true
|
||||
|
||||
# Generate report
|
||||
allure generate --clean --output $TEST_OUTPUT/allure/report $TEST_OUTPUT/allure/*
|
||||
|
||||
# Replace a logo link with a redirect to the latest version of the report
|
||||
sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html" class=|g' $TEST_OUTPUT/allure/report/app.js
|
||||
|
||||
# Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
|
||||
aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
|
||||
aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
|
||||
|
||||
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
|
||||
|
||||
# Generate redirect
|
||||
cat <<EOF > ${TEST_OUTPUT}/allure/index.html
|
||||
<!DOCTYPE html>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<title>Redirecting to ${REPORT_URL}</title>
|
||||
<meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
|
||||
EOF
|
||||
aws s3 cp --only-show-errors ${TEST_OUTPUT}/allure/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
|
||||
|
||||
echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
|
||||
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
|
||||
echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Release Allure lock
|
||||
if: ${{ inputs.action == 'generate' && always() }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
|
||||
TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
|
||||
run: |
|
||||
aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
|
||||
|
||||
if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
|
||||
aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
|
||||
fi
|
||||
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
rm -rf ${TEST_OUTPUT}/allure
|
||||
|
||||
- uses: actions/github-script@v6
|
||||
if: ${{ inputs.action == 'generate' && always() }}
|
||||
env:
|
||||
REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
|
||||
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
with:
|
||||
script: |
|
||||
const { REPORT_URL, BUILD_TYPE, SHA } = process.env
|
||||
|
||||
await github.rest.repos.createCommitStatus({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
sha: `${SHA}`,
|
||||
state: 'success',
|
||||
target_url: `${REPORT_URL}`,
|
||||
context: `Allure report / ${BUILD_TYPE}`,
|
||||
})
|
||||
24
.github/actions/run-python-test-set/action.yml
vendored
24
.github/actions/run-python-test-set/action.yml
vendored
@@ -48,6 +48,10 @@ inputs:
|
||||
description: 'Whether to rerun flaky tests'
|
||||
required: false
|
||||
default: 'false'
|
||||
pg_version:
|
||||
description: 'Postgres version to use for tests'
|
||||
required: false
|
||||
default: 'v14'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
@@ -68,7 +72,7 @@ runs:
|
||||
prefix: latest
|
||||
|
||||
- name: Download compatibility snapshot for Postgres 14
|
||||
if: inputs.build_type != 'remote'
|
||||
if: inputs.build_type != 'remote' && inputs.pg_version == 'v14'
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: compatibility-snapshot-${{ inputs.build_type }}-pg14
|
||||
@@ -106,13 +110,14 @@ runs:
|
||||
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
|
||||
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
|
||||
RERUN_FLAKY: ${{ inputs.rerun_flaky }}
|
||||
PG_VERSION: ${{ inputs.pg_version }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
# PLATFORM will be embedded in the perf test report
|
||||
# and it is needed to distinguish different environments
|
||||
export PLATFORM=${PLATFORM:-github-actions-selfhosted}
|
||||
export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
|
||||
export DEFAULT_PG_VERSION=${DEFAULT_PG_VERSION:-14}
|
||||
export DEFAULT_PG_VERSION=${PG_VERSION#v}
|
||||
|
||||
if [ "${BUILD_TYPE}" = "remote" ]; then
|
||||
export REMOTE_ENV=1
|
||||
@@ -193,18 +198,17 @@ runs:
|
||||
fi
|
||||
|
||||
- name: Upload compatibility snapshot for Postgres 14
|
||||
if: github.ref_name == 'release'
|
||||
if: github.ref_name == 'release' && inputs.pg_version == 'v14'
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
|
||||
# The path includes a test name (test_create_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
|
||||
path: /tmp/test_output/test_create_snapshot/compatibility_snapshot_pg14/
|
||||
# Directory is created by test_compatibility.py::test_create_snapshot, keep the path in sync with the test
|
||||
path: /tmp/test_output/compatibility_snapshot_pg14/
|
||||
prefix: latest
|
||||
|
||||
- name: Create Allure report
|
||||
- name: Upload test results
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report
|
||||
uses: ./.github/actions/allure-report-store
|
||||
with:
|
||||
action: store
|
||||
build_type: ${{ inputs.build_type }}
|
||||
test_selection: ${{ inputs.test_selection }}
|
||||
report-dir: /tmp/test_output/allure/results
|
||||
unique-key: ${{ inputs.build_type }}
|
||||
|
||||
4
.github/pull_request_template.md
vendored
4
.github/pull_request_template.md
vendored
@@ -1,6 +1,6 @@
|
||||
## Describe your changes
|
||||
## Problem
|
||||
|
||||
## Issue ticket number and link
|
||||
## Summary of changes
|
||||
|
||||
## Checklist before requesting a review
|
||||
|
||||
|
||||
49
.github/workflows/benchmarking.yml
vendored
49
.github/workflows/benchmarking.yml
vendored
@@ -16,12 +16,12 @@ on:
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
inputs:
|
||||
region_id:
|
||||
description: 'Use a particular region. If not set the default region will be used'
|
||||
description: 'Project region id. If not set, the default region will be used'
|
||||
required: false
|
||||
default: 'aws-us-east-2'
|
||||
save_perf_report:
|
||||
type: boolean
|
||||
description: 'Publish perf report or not. If not set, the report is published only for the main branch'
|
||||
description: 'Publish perf report. If not set, the report will be published only for the main branch'
|
||||
required: false
|
||||
|
||||
defaults:
|
||||
@@ -93,10 +93,7 @@ jobs:
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report
|
||||
with:
|
||||
action: generate
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -128,13 +125,14 @@ jobs:
|
||||
matrix='{
|
||||
"platform": [
|
||||
"neon-captest-new",
|
||||
"neon-captest-reuse"
|
||||
"neon-captest-reuse",
|
||||
"neonvm-captest-new"
|
||||
],
|
||||
"db_size": [ "10gb" ],
|
||||
"include": [
|
||||
{ "platform": "neon-captest-freetier", "db_size": "3gb" },
|
||||
{ "platform": "neon-captest-new", "db_size": "50gb" }
|
||||
]
|
||||
"include": [{ "platform": "neon-captest-freetier", "db_size": "3gb" },
|
||||
{ "platform": "neon-captest-new", "db_size": "50gb" },
|
||||
{ "platform": "neonvm-captest-freetier", "db_size": "3gb" },
|
||||
{ "platform": "neonvm-captest-new", "db_size": "50gb" }]
|
||||
}'
|
||||
|
||||
if [ "$(date +%A)" = "Saturday" ]; then
|
||||
@@ -200,7 +198,7 @@ jobs:
|
||||
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Create Neon Project
|
||||
if: contains(fromJson('["neon-captest-new", "neon-captest-freetier"]'), matrix.platform)
|
||||
if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier"]'), matrix.platform)
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
@@ -208,6 +206,7 @@ jobs:
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
|
||||
provisioner: ${{ (contains(matrix.platform, 'neonvm-') && 'k8s-neonvm') || 'k8s-pod' }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
@@ -216,7 +215,7 @@ jobs:
|
||||
neon-captest-reuse)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
|
||||
;;
|
||||
neon-captest-new | neon-captest-freetier)
|
||||
neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier)
|
||||
CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
|
||||
;;
|
||||
rds-aurora)
|
||||
@@ -226,7 +225,7 @@ jobs:
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'"
|
||||
echo >&2 "Unknown PLATFORM=${PLATFORM}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@@ -283,10 +282,7 @@ jobs:
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report
|
||||
with:
|
||||
action: generate
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -380,10 +376,7 @@ jobs:
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report
|
||||
with:
|
||||
action: generate
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -476,10 +469,7 @@ jobs:
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report
|
||||
with:
|
||||
action: generate
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -566,16 +556,13 @@ jobs:
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report
|
||||
with:
|
||||
action: generate
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
slack-message: "Periodic User example perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
126
.github/workflows/build_and_test.yml
vendored
126
.github/workflows/build_and_test.yml
vendored
@@ -324,12 +324,14 @@ jobs:
|
||||
runs-on: [ self-hosted, gen3, large ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
# Default shared memory is 64mb
|
||||
options: --init --shm-size=512mb
|
||||
needs: [ build-neon ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [ debug, release ]
|
||||
pg_version: [ v14, v15 ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
@@ -349,19 +351,21 @@ jobs:
|
||||
real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
|
||||
real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
|
||||
rerun_flaky: true
|
||||
pg_version: ${{ matrix.pg_version }}
|
||||
env:
|
||||
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
|
||||
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
|
||||
|
||||
- name: Merge and upload coverage data
|
||||
if: matrix.build_type == 'debug'
|
||||
if: matrix.build_type == 'debug' && matrix.pg_version == 'v14'
|
||||
uses: ./.github/actions/save-coverage-data
|
||||
|
||||
benchmarks:
|
||||
runs-on: [ self-hosted, gen3, small ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
# Default shared memory is 64mb
|
||||
options: --init --shm-size=512mb
|
||||
needs: [ build-neon ]
|
||||
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
|
||||
strategy:
|
||||
@@ -399,21 +403,10 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Create Allure report (debug)
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
id: create-allure-report-debug
|
||||
uses: ./.github/actions/allure-report
|
||||
with:
|
||||
action: generate
|
||||
build_type: debug
|
||||
|
||||
- name: Create Allure report (release)
|
||||
if: ${{ !cancelled() }}
|
||||
id: create-allure-report-release
|
||||
uses: ./.github/actions/allure-report
|
||||
with:
|
||||
action: generate
|
||||
build_type: release
|
||||
id: create-allure-report
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- uses: actions/github-script@v6
|
||||
if: >
|
||||
@@ -423,52 +416,37 @@ jobs:
|
||||
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
|
||||
retries: 5
|
||||
script: |
|
||||
const reports = [{
|
||||
buildType: "debug",
|
||||
reportUrl: "${{ steps.create-allure-report-debug.outputs.report-url }}",
|
||||
jsonUrl: "${{ steps.create-allure-report-debug.outputs.report-json-url }}",
|
||||
}, {
|
||||
buildType: "release",
|
||||
reportUrl: "${{ steps.create-allure-report-release.outputs.report-url }}",
|
||||
jsonUrl: "${{ steps.create-allure-report-release.outputs.report-json-url }}",
|
||||
}]
|
||||
const report = {
|
||||
reportUrl: "${{ steps.create-allure-report.outputs.report-url }}",
|
||||
reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
|
||||
}
|
||||
|
||||
const script = require("./scripts/pr-comment-test-report.js")
|
||||
await script({
|
||||
github,
|
||||
context,
|
||||
fetch,
|
||||
reports,
|
||||
report,
|
||||
})
|
||||
|
||||
- name: Store Allure test stat in the DB
|
||||
if: >
|
||||
!cancelled() && (
|
||||
steps.create-allure-report-debug.outputs.report-url ||
|
||||
steps.create-allure-report-release.outputs.report-url
|
||||
)
|
||||
if: ${{ !cancelled() && steps.create-allure-report.outputs.report-json-url }}
|
||||
env:
|
||||
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
REPORT_JSON_URL_DEBUG: ${{ steps.create-allure-report-debug.outputs.report-json-url }}
|
||||
REPORT_JSON_URL_RELEASE: ${{ steps.create-allure-report-release.outputs.report-json-url }}
|
||||
COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
REPORT_JSON_URL: ${{ steps.create-allure-report.outputs.report-json-url }}
|
||||
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
|
||||
run: |
|
||||
./scripts/pysync
|
||||
|
||||
for report_url in $REPORT_JSON_URL_DEBUG $REPORT_JSON_URL_RELEASE; do
|
||||
if [ -z "$report_url" ]; then
|
||||
continue
|
||||
fi
|
||||
curl --fail --output suites.json "${REPORT_JSON_URL}"
|
||||
export BUILD_TYPE=unified
|
||||
export DATABASE_URL="$TEST_RESULT_CONNSTR"
|
||||
|
||||
if [[ "$report_url" == "$REPORT_JSON_URL_DEBUG" ]]; then
|
||||
BUILD_TYPE=debug
|
||||
else
|
||||
BUILD_TYPE=release
|
||||
fi
|
||||
|
||||
curl --fail --output suites.json "${report_url}"
|
||||
DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json
|
||||
done
|
||||
poetry run python3 scripts/ingest_regress_test_result.py \
|
||||
--revision ${COMMIT_SHA} \
|
||||
--reference ${GITHUB_REF} \
|
||||
--build-type ${BUILD_TYPE} \
|
||||
--ingest suites.json
|
||||
|
||||
coverage-report:
|
||||
runs-on: [ self-hosted, gen3, small ]
|
||||
@@ -514,37 +492,43 @@ jobs:
|
||||
- name: Merge coverage data
|
||||
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
|
||||
|
||||
- name: Build and upload coverage report
|
||||
- name: Build coverage report
|
||||
env:
|
||||
COMMIT_URL: ${{ github.server_url }}/${{ github.repository }}/commit/${{ github.event.pull_request.head.sha || github.sha }}
|
||||
run: |
|
||||
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
|
||||
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
|
||||
COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA
|
||||
|
||||
scripts/coverage \
|
||||
--dir=/tmp/coverage report \
|
||||
--input-objects=/tmp/coverage/binaries.list \
|
||||
--commit-url=$COMMIT_URL \
|
||||
--commit-url=${COMMIT_URL} \
|
||||
--format=github
|
||||
|
||||
REPORT_URL=https://${{ github.repository_owner }}.github.io/zenith-coverage-data/$COMMIT_SHA
|
||||
- name: Upload coverage report
|
||||
id: upload-coverage-report
|
||||
env:
|
||||
BUCKET: neon-github-public-dev
|
||||
COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
run: |
|
||||
aws s3 cp --only-show-errors --recursive /tmp/coverage/report s3://neon-github-public-dev/code-coverage/${COMMIT_SHA}
|
||||
|
||||
scripts/git-upload \
|
||||
--repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
|
||||
--message="Add code coverage for $COMMIT_URL" \
|
||||
copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE
|
||||
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/code-coverage/${COMMIT_SHA}/index.html
|
||||
echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
|
||||
|
||||
# Add link to the coverage report to the commit
|
||||
curl -f -X POST \
|
||||
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
|
||||
--data \
|
||||
"{
|
||||
\"state\": \"success\",
|
||||
\"context\": \"neon-coverage\",
|
||||
\"description\": \"Coverage report is ready\",
|
||||
\"target_url\": \"$REPORT_URL\"
|
||||
}"
|
||||
- uses: actions/github-script@v6
|
||||
env:
|
||||
REPORT_URL: ${{ steps.upload-coverage-report.outputs.report-url }}
|
||||
COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
with:
|
||||
script: |
|
||||
const { REPORT_URL, COMMIT_SHA } = process.env
|
||||
|
||||
await github.rest.repos.createCommitStatus({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
sha: `${COMMIT_SHA}`,
|
||||
state: 'success',
|
||||
target_url: `${REPORT_URL}`,
|
||||
context: 'Code coverage report',
|
||||
})
|
||||
|
||||
trigger-e2e-tests:
|
||||
runs-on: [ self-hosted, gen3, small ]
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
Howdy! Usual good software engineering practices apply. Write
|
||||
tests. Write comments. Follow standard Rust coding practices where
|
||||
possible. Use 'cargo fmt' and 'clippy' to tidy up formatting.
|
||||
possible. Use `cargo fmt` and `cargo clippy` to tidy up formatting.
|
||||
|
||||
There are soft spots in the code, which could use cleanup,
|
||||
refactoring, additional comments, and so forth. Let's try to raise the
|
||||
|
||||
339
Cargo.lock
generated
339
Cargo.lock
generated
@@ -230,40 +230,38 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "aws-config"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56a636c44c77fa18bdba56126a34d30cfe5538fe88f7d34988fa731fee143ddd"
|
||||
checksum = "fc00553f5f3c06ffd4510a9d576f92143618706c45ea6ff81e84ad9be9588abd"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-http",
|
||||
"aws-sdk-sso",
|
||||
"aws-sdk-sts",
|
||||
"aws-smithy-async 0.51.0",
|
||||
"aws-smithy-client 0.51.0",
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-smithy-http-tower 0.51.0",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-client",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-http-tower",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-types 0.51.0",
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"hex",
|
||||
"fastrand",
|
||||
"http",
|
||||
"hyper",
|
||||
"ring",
|
||||
"time",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tracing",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-credential-types"
|
||||
version = "0.55.1"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4232d3729eefc287adc0d5a8adc97b7d94eefffe6bbe94312cc86c7ab6b06ce"
|
||||
checksum = "4cb57ac6088805821f78d282c0ba8aec809f11cbee10dda19a97b03ab040ccc2"
|
||||
dependencies = [
|
||||
"aws-smithy-async 0.55.1",
|
||||
"aws-smithy-types 0.55.1",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-types",
|
||||
"fastrand",
|
||||
"tokio",
|
||||
"tracing",
|
||||
@@ -272,13 +270,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-endpoint"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ca8f374874f6459aaa88dc861d7f5d834ca1ff97668eae190e97266b5f6c3fb"
|
||||
checksum = "9c5f6f84a4f46f95a9bb71d9300b73cd67eb868bc43ae84f66ad34752299f4ac"
|
||||
dependencies = [
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-types 0.51.0",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"http",
|
||||
"regex",
|
||||
"tracing",
|
||||
@@ -286,13 +284,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-http"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78d41e19e779b73463f5f0c21b3aacc995f4ba783ab13a7ae9f5dfb159a551b4"
|
||||
checksum = "a754683c322f7dc5167484266489fdebdcd04d26e53c162cad1f3f949f2c5671"
|
||||
dependencies = [
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-types 0.51.0",
|
||||
"aws-credential-types",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"http",
|
||||
"http-body",
|
||||
@@ -304,127 +303,104 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-s3"
|
||||
version = "0.21.0"
|
||||
version = "0.25.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9f08665c8e03aca8cb092ef01e617436ebfa977fddc1240e1b062488ab5d48a"
|
||||
checksum = "392b9811ca489747ac84349790e49deaa1f16631949e7dd4156000251c260eae"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-endpoint",
|
||||
"aws-http",
|
||||
"aws-sig-auth",
|
||||
"aws-sigv4",
|
||||
"aws-smithy-async 0.51.0",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-checksums",
|
||||
"aws-smithy-client 0.51.0",
|
||||
"aws-smithy-client",
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-smithy-http-tower 0.51.0",
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-http-tower",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-types",
|
||||
"aws-smithy-xml",
|
||||
"aws-types 0.51.0",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"bytes-utils",
|
||||
"http",
|
||||
"http-body",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"regex",
|
||||
"tokio-stream",
|
||||
"tower",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-sts"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d0fbe3c2c342bc8dfea4bb43937405a8ec06f99140a0dcb9c7b59e54dfa93a1"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-endpoint",
|
||||
"aws-http",
|
||||
"aws-sig-auth",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-client",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-http-tower",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-query",
|
||||
"aws-smithy-types",
|
||||
"aws-smithy-xml",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"http",
|
||||
"regex",
|
||||
"tower",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-sso"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86dcb1cb71aa8763b327542ead410424515cff0cde5b753eedd2917e09c63734"
|
||||
dependencies = [
|
||||
"aws-endpoint",
|
||||
"aws-http",
|
||||
"aws-sig-auth",
|
||||
"aws-smithy-async 0.51.0",
|
||||
"aws-smithy-client 0.51.0",
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-smithy-http-tower 0.51.0",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-types 0.51.0",
|
||||
"bytes",
|
||||
"http",
|
||||
"tokio-stream",
|
||||
"tower",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-sts"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fdfcf584297c666f6b472d5368a78de3bc714b6e0a53d7fbf76c3e347c292ab1"
|
||||
dependencies = [
|
||||
"aws-endpoint",
|
||||
"aws-http",
|
||||
"aws-sig-auth",
|
||||
"aws-smithy-async 0.51.0",
|
||||
"aws-smithy-client 0.51.0",
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-smithy-http-tower 0.51.0",
|
||||
"aws-smithy-query",
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-smithy-xml",
|
||||
"aws-types 0.51.0",
|
||||
"bytes",
|
||||
"http",
|
||||
"tower",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sig-auth"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12cbe7b2be9e185c1fbce27fc9c41c66b195b32d89aa099f98768d9544221308"
|
||||
checksum = "84dc92a63ede3c2cbe43529cb87ffa58763520c96c6a46ca1ced80417afba845"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-sigv4",
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-types 0.51.0",
|
||||
"aws-smithy-http",
|
||||
"aws-types",
|
||||
"http",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sigv4"
|
||||
version = "0.51.1"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c0b2658d2cb66dbf02f0e8dee80810ef1e0ca3530ede463e0ef994c301087d1"
|
||||
checksum = "392fefab9d6fcbd76d518eb3b1c040b84728ab50f58df0c3c53ada4bea9d327e"
|
||||
dependencies = [
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-smithy-http",
|
||||
"bytes",
|
||||
"form_urlencoded",
|
||||
"hex",
|
||||
"hmac",
|
||||
"http",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"regex",
|
||||
"ring",
|
||||
"sha2",
|
||||
"time",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-async"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b3442b4c5d3fc39891a2e5e625735fba6b24694887d49c6518460fde98247a9"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-async"
|
||||
version = "0.55.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88573bcfbe1dcfd54d4912846df028b42d6255cbf9ce07be216b1bbfd11fc4b9"
|
||||
checksum = "ae23b9fe7a07d0919000116c4c5c0578303fbce6fc8d32efca1f7759d4c20faf"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
@@ -434,12 +410,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-checksums"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc227e36e346f45298288359f37123e1a92628d1cec6b11b5eb335553278bd9e"
|
||||
checksum = "a6367acbd6849b8c7c659e166955531274ae147bf83ab4312885991f6b6706cb"
|
||||
dependencies = [
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"crc32c",
|
||||
"crc32fast",
|
||||
@@ -455,14 +431,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-client"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ff28d553714f8f54cd921227934fc13a536a1c03f106e56b362fd57e16d450ad"
|
||||
checksum = "5230d25d244a51339273b8870f0f77874cd4449fb4f8f629b21188ae10cfc0ba"
|
||||
dependencies = [
|
||||
"aws-smithy-async 0.51.0",
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-smithy-http-tower 0.51.0",
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-http-tower",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"fastrand",
|
||||
"http",
|
||||
@@ -471,26 +447,7 @@ dependencies = [
|
||||
"hyper-rustls",
|
||||
"lazy_static",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-client"
|
||||
version = "0.55.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2f52352bae50d3337d5d6151b695d31a8c10ebea113eca5bead531f8301b067"
|
||||
dependencies = [
|
||||
"aws-smithy-async 0.55.1",
|
||||
"aws-smithy-http 0.55.1",
|
||||
"aws-smithy-http-tower 0.55.1",
|
||||
"aws-smithy-types 0.55.1",
|
||||
"bytes",
|
||||
"fastrand",
|
||||
"http",
|
||||
"http-body",
|
||||
"pin-project-lite",
|
||||
"rustls 0.20.8",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tracing",
|
||||
@@ -498,23 +455,23 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-eventstream"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7ea0df7161ce65b5c8ca6eb709a1a907376fa18226976e41c748ce02ccccf24"
|
||||
checksum = "22d2a2bcc16e5c4d949ffd2b851da852b9bbed4bb364ed4ae371b42137ca06d9"
|
||||
dependencies = [
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"crc32fast",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-http"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf58ed4fefa61dbf038e5421a521cbc2c448ef69deff0ab1d915d8a10eda5664"
|
||||
checksum = "b60e2133beb9fe6ffe0b70deca57aaeff0a35ad24a9c6fab2fd3b4f45b99fdb5"
|
||||
dependencies = [
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"bytes-utils",
|
||||
"futures-core",
|
||||
@@ -530,49 +487,14 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-http"
|
||||
version = "0.55.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03bcc02d7ed9649d855c8ce4a735e9848d7b8f7568aad0504c158e3baa955df8"
|
||||
dependencies = [
|
||||
"aws-smithy-types 0.55.1",
|
||||
"bytes",
|
||||
"bytes-utils",
|
||||
"futures-core",
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"pin-utils",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-http-tower"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20c96d7bd35e7cf96aca1134b2f81b1b59ffe493f7c6539c051791cbbf7a42d3"
|
||||
checksum = "3a4d94f556c86a0dd916a5d7c39747157ea8cb909ca469703e20fee33e448b67"
|
||||
dependencies = [
|
||||
"aws-smithy-http 0.51.0",
|
||||
"bytes",
|
||||
"http",
|
||||
"http-body",
|
||||
"pin-project-lite",
|
||||
"tower",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-http-tower"
|
||||
version = "0.55.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da88b3a860f65505996c29192d800f1aeb9480440f56d63aad33a3c12045017a"
|
||||
dependencies = [
|
||||
"aws-smithy-http 0.55.1",
|
||||
"aws-smithy-types 0.55.1",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"http",
|
||||
"http-body",
|
||||
@@ -583,40 +505,28 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-json"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8324ba98c8a94187723cc16c37aefa09504646ee65c3d2c3af495bab5ea701b"
|
||||
checksum = "5ce3d6e6ebb00b2cce379f079ad5ec508f9bcc3a9510d9b9c1840ed1d6f8af39"
|
||||
dependencies = [
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-smithy-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-query"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83834ed2ff69ea6f6657baf205267dc2c0abe940703503a3e5d60ce23be3d306"
|
||||
checksum = "d58edfca32ef9bfbc1ca394599e17ea329cb52d6a07359827be74235b64b3298"
|
||||
dependencies = [
|
||||
"aws-smithy-types 0.51.0",
|
||||
"aws-smithy-types",
|
||||
"urlencoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-types"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b02e06ea63498c43bc0217ea4d16605d4e58d85c12fc23f6572ff6d0a840c61"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"num-integer",
|
||||
"ryu",
|
||||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-types"
|
||||
version = "0.55.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd0afc731fd1417d791f9145a1e0c30e23ae0beaab9b4814017708ead2fc20f1"
|
||||
checksum = "58db46fc1f4f26be01ebdb821751b4e2482cd43aa2b64a0348fb89762defaffa"
|
||||
dependencies = [
|
||||
"base64-simd",
|
||||
"itoa",
|
||||
@@ -627,40 +537,24 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-xml"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "246e9f83dd1fdf5d347fa30ae4ad30a9d1d42ce4cd74a93d94afa874646f94cd"
|
||||
checksum = "fb557fe4995bd9ec87fb244bbb254666a971dc902a783e9da8b7711610e9664c"
|
||||
dependencies = [
|
||||
"xmlparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-types"
|
||||
version = "0.51.0"
|
||||
version = "0.55.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05701d32da168b44f7ee63147781aed8723e792cc131cb9b18363b5393f17f70"
|
||||
dependencies = [
|
||||
"aws-smithy-async 0.51.0",
|
||||
"aws-smithy-client 0.51.0",
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-smithy-types 0.51.0",
|
||||
"http",
|
||||
"rustc_version",
|
||||
"tracing",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-types"
|
||||
version = "0.55.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9b082e329d9a304d39e193ad5c7ab363a0d6507aca6965e0673a746686fb0cc"
|
||||
checksum = "de0869598bfe46ec44ffe17e063ed33336e59df90356ca8ff0e8da6f7c1d994b"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-smithy-async 0.55.1",
|
||||
"aws-smithy-client 0.55.1",
|
||||
"aws-smithy-http 0.55.1",
|
||||
"aws-smithy-types 0.55.1",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-client",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
"http",
|
||||
"rustc_version",
|
||||
"tracing",
|
||||
@@ -3367,9 +3261,10 @@ dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"aws-config",
|
||||
"aws-credential-types",
|
||||
"aws-sdk-s3",
|
||||
"aws-smithy-http 0.51.0",
|
||||
"aws-types 0.55.1",
|
||||
"aws-smithy-http",
|
||||
"aws-types",
|
||||
"hyper",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
|
||||
@@ -21,9 +21,10 @@ anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
atty = "0.2.14"
|
||||
aws-config = { version = "0.51.0", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "0.21.0"
|
||||
aws-smithy-http = "0.51.0"
|
||||
aws-config = { version = "0.55", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "0.25"
|
||||
aws-smithy-http = "0.55"
|
||||
aws-credential-types = "0.55"
|
||||
aws-types = "0.55"
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
|
||||
10
README.md
10
README.md
@@ -17,7 +17,7 @@ The Neon storage engine consists of two major components:
|
||||
- Pageserver. Scalable storage backend for the compute nodes.
|
||||
- Safekeepers. The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage.
|
||||
|
||||
See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more information.
|
||||
See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more information.
|
||||
|
||||
## Running local installation
|
||||
|
||||
@@ -238,9 +238,9 @@ CARGO_BUILD_FLAGS="--features=testing" make
|
||||
|
||||
## Documentation
|
||||
|
||||
[/docs/](/docs/) Contains a top-level overview of all available markdown documentation.
|
||||
[docs](/docs) Contains a top-level overview of all available markdown documentation.
|
||||
|
||||
- [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.
|
||||
- [sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.
|
||||
|
||||
To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`
|
||||
|
||||
@@ -265,6 +265,6 @@ To get more familiar with this aspect, refer to:
|
||||
|
||||
## Join the development
|
||||
|
||||
- Read `CONTRIBUTING.md` to learn about project code style and practices.
|
||||
- To get familiar with a source tree layout, use [/docs/sourcetree.md](/docs/sourcetree.md).
|
||||
- Read [CONTRIBUTING.md](/CONTRIBUTING.md) to learn about project code style and practices.
|
||||
- To get familiar with a source tree layout, use [sourcetree.md](/docs/sourcetree.md).
|
||||
- To learn more about PostgreSQL internals, check http://www.interdb.jp/pg/index.html
|
||||
|
||||
@@ -8,9 +8,7 @@ use std::process::{Child, Command};
|
||||
use std::{io, result};
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use pageserver_api::models::{
|
||||
TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
|
||||
};
|
||||
use pageserver_api::models::{self, TenantInfo, TimelineInfo};
|
||||
use postgres_backend::AuthType;
|
||||
use postgres_connection::{parse_host_port, PgConnectionConfig};
|
||||
use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
@@ -316,8 +314,8 @@ impl PageServerNode {
|
||||
settings: HashMap<&str, &str>,
|
||||
) -> anyhow::Result<TenantId> {
|
||||
let mut settings = settings.clone();
|
||||
let request = TenantCreateRequest {
|
||||
new_tenant_id,
|
||||
|
||||
let config = models::TenantConfig {
|
||||
checkpoint_distance: settings
|
||||
.remove("checkpoint_distance")
|
||||
.map(|x| x.parse::<u64>())
|
||||
@@ -372,6 +370,10 @@ impl PageServerNode {
|
||||
.remove("evictions_low_residence_duration_metric_threshold")
|
||||
.map(|x| x.to_string()),
|
||||
};
|
||||
let request = models::TenantCreateRequest {
|
||||
new_tenant_id,
|
||||
config,
|
||||
};
|
||||
if !settings.is_empty() {
|
||||
bail!("Unrecognized tenant settings: {settings:?}")
|
||||
}
|
||||
@@ -391,67 +393,81 @@ impl PageServerNode {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn tenant_config(&self, tenant_id: TenantId, settings: HashMap<&str, &str>) -> Result<()> {
|
||||
self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))?
|
||||
.json(&TenantConfigRequest {
|
||||
tenant_id,
|
||||
pub fn tenant_config(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
mut settings: HashMap<&str, &str>,
|
||||
) -> anyhow::Result<()> {
|
||||
let config = {
|
||||
// Braces to make the diff easier to read
|
||||
models::TenantConfig {
|
||||
checkpoint_distance: settings
|
||||
.get("checkpoint_distance")
|
||||
.remove("checkpoint_distance")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'checkpoint_distance' as an integer")?,
|
||||
checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
|
||||
checkpoint_timeout: settings.remove("checkpoint_timeout").map(|x| x.to_string()),
|
||||
compaction_target_size: settings
|
||||
.get("compaction_target_size")
|
||||
.remove("compaction_target_size")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_target_size' as an integer")?,
|
||||
compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
|
||||
compaction_period: settings.remove("compaction_period").map(|x| x.to_string()),
|
||||
compaction_threshold: settings
|
||||
.get("compaction_threshold")
|
||||
.remove("compaction_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_threshold' as an integer")?,
|
||||
gc_horizon: settings
|
||||
.get("gc_horizon")
|
||||
.remove("gc_horizon")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'gc_horizon' as an integer")?,
|
||||
gc_period: settings.get("gc_period").map(|x| x.to_string()),
|
||||
gc_period: settings.remove("gc_period").map(|x| x.to_string()),
|
||||
image_creation_threshold: settings
|
||||
.get("image_creation_threshold")
|
||||
.remove("image_creation_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'image_creation_threshold' as non zero integer")?,
|
||||
pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
|
||||
pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
|
||||
walreceiver_connect_timeout: settings
|
||||
.get("walreceiver_connect_timeout")
|
||||
.remove("walreceiver_connect_timeout")
|
||||
.map(|x| x.to_string()),
|
||||
lagging_wal_timeout: settings
|
||||
.remove("lagging_wal_timeout")
|
||||
.map(|x| x.to_string()),
|
||||
lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
|
||||
max_lsn_wal_lag: settings
|
||||
.get("max_lsn_wal_lag")
|
||||
.remove("max_lsn_wal_lag")
|
||||
.map(|x| x.parse::<NonZeroU64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
|
||||
trace_read_requests: settings
|
||||
.get("trace_read_requests")
|
||||
.remove("trace_read_requests")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'trace_read_requests' as bool")?,
|
||||
eviction_policy: settings
|
||||
.get("eviction_policy")
|
||||
.map(|x| serde_json::from_str(x))
|
||||
.remove("eviction_policy")
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse 'eviction_policy' json")?,
|
||||
min_resident_size_override: settings
|
||||
.get("min_resident_size_override")
|
||||
.remove("min_resident_size_override")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'min_resident_size_override' as an integer")?,
|
||||
evictions_low_residence_duration_metric_threshold: settings
|
||||
.get("evictions_low_residence_duration_metric_threshold")
|
||||
.remove("evictions_low_residence_duration_metric_threshold")
|
||||
.map(|x| x.to_string()),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
if !settings.is_empty() {
|
||||
bail!("Unrecognized tenant settings: {settings:?}")
|
||||
}
|
||||
|
||||
self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))?
|
||||
.json(&models::TenantConfigRequest { tenant_id, config })
|
||||
.send()?
|
||||
.error_from_body()?;
|
||||
|
||||
@@ -483,7 +499,7 @@ impl PageServerNode {
|
||||
Method::POST,
|
||||
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
|
||||
)?
|
||||
.json(&TimelineCreateRequest {
|
||||
.json(&models::TimelineCreateRequest {
|
||||
new_timeline_id,
|
||||
ancestor_start_lsn,
|
||||
ancestor_timeline_id,
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
marker::PhantomData,
|
||||
num::{NonZeroU64, NonZeroUsize},
|
||||
time::SystemTime,
|
||||
unreachable,
|
||||
};
|
||||
|
||||
use byteorder::{BigEndian, ReadBytesExt};
|
||||
@@ -48,13 +50,33 @@ pub enum TenantState {
|
||||
}
|
||||
|
||||
impl TenantState {
|
||||
pub fn has_in_progress_downloads(&self) -> bool {
|
||||
pub fn attachment_status(&self) -> TenantAttachmentStatus {
|
||||
use TenantAttachmentStatus::*;
|
||||
match self {
|
||||
Self::Loading => true,
|
||||
Self::Attaching => true,
|
||||
Self::Active => false,
|
||||
Self::Stopping => false,
|
||||
Self::Broken { .. } => false,
|
||||
// The attach procedure writes the marker file before adding the Attaching tenant to the tenants map.
|
||||
// So, technically, we can return Attached here.
|
||||
// However, as soon as Console observes Attached, it will proceed with the Postgres-level health check.
|
||||
// But, our attach task might still be fetching the remote timelines, etc.
|
||||
// So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
|
||||
Self::Attaching => Maybe,
|
||||
// tenant mgr startup distinguishes attaching from loading via marker file.
|
||||
// If it's loading, there is no attach marker file, i.e., attach had finished in the past.
|
||||
Self::Loading => Attached,
|
||||
// We only reach Active after successful load / attach.
|
||||
// So, call atttachment status Attached.
|
||||
Self::Active => Attached,
|
||||
// If the (initial or resumed) attach procedure fails, the tenant becomes Broken.
|
||||
// However, it also becomes Broken if the regular load fails.
|
||||
// We would need a separate TenantState variant to distinguish these cases.
|
||||
// However, there's no practical difference from Console's perspective.
|
||||
// It will run a Postgres-level health check as soon as it observes Attached.
|
||||
// That will fail on Broken tenants.
|
||||
// Console can then rollback the attach, or, wait for operator to fix the Broken tenant.
|
||||
Self::Broken { .. } => Attached,
|
||||
// Why is Stopping a Maybe case? Because, during pageserver shutdown,
|
||||
// we set the Stopping state irrespective of whether the tenant
|
||||
// has finished attaching or not.
|
||||
Self::Stopping => Maybe,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,10 +134,25 @@ pub struct TimelineCreateRequest {
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Default)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct TenantCreateRequest {
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub new_tenant_id: Option<TenantId>,
|
||||
#[serde(flatten)]
|
||||
pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
|
||||
}
|
||||
|
||||
impl std::ops::Deref for TenantCreateRequest {
|
||||
type Target = TenantConfig;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.config
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Default)]
|
||||
pub struct TenantConfig {
|
||||
pub checkpoint_distance: Option<u64>,
|
||||
pub checkpoint_timeout: Option<String>,
|
||||
pub compaction_target_size: Option<u64>,
|
||||
@@ -159,36 +196,25 @@ impl TenantCreateRequest {
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct TenantConfigRequest {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub tenant_id: TenantId,
|
||||
#[serde(default)]
|
||||
pub checkpoint_distance: Option<u64>,
|
||||
pub checkpoint_timeout: Option<String>,
|
||||
pub compaction_target_size: Option<u64>,
|
||||
pub compaction_period: Option<String>,
|
||||
pub compaction_threshold: Option<usize>,
|
||||
pub gc_horizon: Option<u64>,
|
||||
pub gc_period: Option<String>,
|
||||
pub image_creation_threshold: Option<usize>,
|
||||
pub pitr_interval: Option<String>,
|
||||
pub walreceiver_connect_timeout: Option<String>,
|
||||
pub lagging_wal_timeout: Option<String>,
|
||||
pub max_lsn_wal_lag: Option<NonZeroU64>,
|
||||
pub trace_read_requests: Option<bool>,
|
||||
// We defer the parsing of the eviction_policy field to the request handler.
|
||||
// Otherwise we'd have to move the types for eviction policy into this package.
|
||||
// We might do that once the eviction feature has stabilizied.
|
||||
// For now, this field is not even documented in the openapi_spec.yml.
|
||||
pub eviction_policy: Option<serde_json::Value>,
|
||||
pub min_resident_size_override: Option<u64>,
|
||||
pub evictions_low_residence_duration_metric_threshold: Option<String>,
|
||||
#[serde(flatten)]
|
||||
pub config: TenantConfig,
|
||||
}
|
||||
|
||||
impl std::ops::Deref for TenantConfigRequest {
|
||||
type Target = TenantConfig;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.config
|
||||
}
|
||||
}
|
||||
|
||||
impl TenantConfigRequest {
|
||||
pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
|
||||
TenantConfigRequest {
|
||||
tenant_id,
|
||||
let config = TenantConfig {
|
||||
checkpoint_distance: None,
|
||||
checkpoint_timeout: None,
|
||||
compaction_target_size: None,
|
||||
@@ -205,20 +231,30 @@ impl TenantConfigRequest {
|
||||
eviction_policy: None,
|
||||
min_resident_size_override: None,
|
||||
evictions_low_residence_duration_metric_threshold: None,
|
||||
}
|
||||
};
|
||||
TenantConfigRequest { tenant_id, config }
|
||||
}
|
||||
}
|
||||
|
||||
/// See [`TenantState::attachment_status`] and the OpenAPI docs for context.
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum TenantAttachmentStatus {
|
||||
Maybe,
|
||||
Attached,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TenantInfo {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub id: TenantId,
|
||||
// NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
|
||||
pub state: TenantState,
|
||||
/// Sum of the size of all layer files.
|
||||
/// If a layer is present in both local FS and S3, it counts only once.
|
||||
pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
|
||||
pub has_in_progress_downloads: Option<bool>,
|
||||
pub attachment_status: TenantAttachmentStatus,
|
||||
}
|
||||
|
||||
/// This represents the output of the "timeline_detail" and "timeline_list" API calls.
|
||||
@@ -691,7 +727,7 @@ mod tests {
|
||||
id: TenantId::generate(),
|
||||
state: TenantState::Active,
|
||||
current_physical_size: Some(42),
|
||||
has_in_progress_downloads: Some(false),
|
||||
attachment_status: TenantAttachmentStatus::Attached,
|
||||
};
|
||||
let expected_active = json!({
|
||||
"id": original_active.id.to_string(),
|
||||
@@ -699,7 +735,7 @@ mod tests {
|
||||
"slug": "Active",
|
||||
},
|
||||
"current_physical_size": 42,
|
||||
"has_in_progress_downloads": false,
|
||||
"attachment_status": "attached",
|
||||
});
|
||||
|
||||
let original_broken = TenantInfo {
|
||||
@@ -709,7 +745,7 @@ mod tests {
|
||||
backtrace: "backtrace info".into(),
|
||||
},
|
||||
current_physical_size: Some(42),
|
||||
has_in_progress_downloads: Some(false),
|
||||
attachment_status: TenantAttachmentStatus::Attached,
|
||||
};
|
||||
let expected_broken = json!({
|
||||
"id": original_broken.id.to_string(),
|
||||
@@ -721,7 +757,7 @@ mod tests {
|
||||
}
|
||||
},
|
||||
"current_physical_size": 42,
|
||||
"has_in_progress_downloads": false,
|
||||
"attachment_status": "attached",
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
|
||||
@@ -146,6 +146,10 @@ pub const XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
|
||||
pub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8;
|
||||
pub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
|
||||
|
||||
// From replication/message.h
|
||||
pub const XLOG_LOGICAL_MESSAGE: u8 = 0x00;
|
||||
|
||||
// From rmgrlist.h
|
||||
pub const RM_XLOG_ID: u8 = 0;
|
||||
pub const RM_XACT_ID: u8 = 1;
|
||||
pub const RM_SMGR_ID: u8 = 2;
|
||||
@@ -157,6 +161,7 @@ pub const RM_RELMAP_ID: u8 = 7;
|
||||
pub const RM_STANDBY_ID: u8 = 8;
|
||||
pub const RM_HEAP2_ID: u8 = 9;
|
||||
pub const RM_HEAP_ID: u8 = 10;
|
||||
pub const RM_LOGICALMSG_ID: u8 = 21;
|
||||
|
||||
// from xlogreader.h
|
||||
pub const XLR_INFO_MASK: u8 = 0x0F;
|
||||
|
||||
@@ -12,6 +12,7 @@ aws-smithy-http.workspace = true
|
||||
aws-types.workspace = true
|
||||
aws-config.workspace = true
|
||||
aws-sdk-s3.workspace = true
|
||||
aws-credential-types.workspace = true
|
||||
hyper = { workspace = true, features = ["stream"] }
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -9,14 +9,15 @@ use std::sync::Arc;
|
||||
use anyhow::Context;
|
||||
use aws_config::{
|
||||
environment::credentials::EnvironmentVariableCredentialsProvider,
|
||||
imds::credentials::ImdsCredentialsProvider,
|
||||
meta::credentials::{CredentialsProviderChain, LazyCachingCredentialsProvider},
|
||||
imds::credentials::ImdsCredentialsProvider, meta::credentials::CredentialsProviderChain,
|
||||
};
|
||||
use aws_credential_types::cache::CredentialsCache;
|
||||
use aws_sdk_s3::{
|
||||
config::Config,
|
||||
error::{GetObjectError, GetObjectErrorKind},
|
||||
types::{ByteStream, SdkError},
|
||||
Client, Endpoint, Region,
|
||||
config::{Config, Region},
|
||||
error::SdkError,
|
||||
operation::get_object::GetObjectError,
|
||||
primitives::ByteStream,
|
||||
Client,
|
||||
};
|
||||
use aws_smithy_http::body::SdkBody;
|
||||
use hyper::Body;
|
||||
@@ -125,28 +126,23 @@ impl S3Bucket {
|
||||
|
||||
let credentials_provider = {
|
||||
// uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
|
||||
let env_creds = EnvironmentVariableCredentialsProvider::new();
|
||||
CredentialsProviderChain::first_try(
|
||||
"env",
|
||||
EnvironmentVariableCredentialsProvider::new(),
|
||||
)
|
||||
// uses imds v2
|
||||
let imds = ImdsCredentialsProvider::builder().build();
|
||||
|
||||
// finally add caching.
|
||||
// this might change in future, see https://github.com/awslabs/aws-sdk-rust/issues/629
|
||||
LazyCachingCredentialsProvider::builder()
|
||||
.load(CredentialsProviderChain::first_try("env", env_creds).or_else("imds", imds))
|
||||
.build()
|
||||
.or_else("imds", ImdsCredentialsProvider::builder().build())
|
||||
};
|
||||
|
||||
let mut config_builder = Config::builder()
|
||||
.region(Region::new(aws_config.bucket_region.clone()))
|
||||
.credentials_cache(CredentialsCache::lazy())
|
||||
.credentials_provider(credentials_provider);
|
||||
|
||||
if let Some(custom_endpoint) = aws_config.endpoint.clone() {
|
||||
let endpoint = Endpoint::immutable(
|
||||
custom_endpoint
|
||||
.parse()
|
||||
.expect("Failed to parse S3 custom endpoint"),
|
||||
);
|
||||
config_builder.set_endpoint_resolver(Some(Arc::new(endpoint)));
|
||||
config_builder = config_builder
|
||||
.endpoint_url(custom_endpoint)
|
||||
.force_path_style(true);
|
||||
}
|
||||
let client = Client::from_conf(config_builder.build());
|
||||
|
||||
@@ -229,14 +225,9 @@ impl S3Bucket {
|
||||
))),
|
||||
})
|
||||
}
|
||||
Err(SdkError::ServiceError {
|
||||
err:
|
||||
GetObjectError {
|
||||
kind: GetObjectErrorKind::NoSuchKey(..),
|
||||
..
|
||||
},
|
||||
..
|
||||
}) => Err(DownloadError::NotFound),
|
||||
Err(SdkError::ServiceError(e)) if matches!(e.err(), GetObjectError::NoSuchKey(_)) => {
|
||||
Err(DownloadError::NotFound)
|
||||
}
|
||||
Err(e) => {
|
||||
metrics::inc_get_object_fail();
|
||||
Err(DownloadError::Other(anyhow::anyhow!(
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euxo pipefail
|
||||
|
||||
PG_BIN=$1
|
||||
WAL_PATH=$2
|
||||
DATA_DIR=$3
|
||||
PORT=$4
|
||||
SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
|
||||
rm -fr $DATA_DIR
|
||||
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
|
||||
echo port=$PORT >> $DATA_DIR/postgresql.conf
|
||||
REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
|
||||
SYSID=$(od -A n -j 24 -N 8 -t d8 "$WAL_PATH"/000000010000000000000002* | cut -c 3-)
|
||||
rm -fr "$DATA_DIR"
|
||||
env -i LD_LIBRARY_PATH="$PG_BIN"/../lib "$PG_BIN"/initdb -E utf8 -U cloud_admin -D "$DATA_DIR" --sysid="$SYSID"
|
||||
echo port="$PORT" >> "$DATA_DIR"/postgresql.conf
|
||||
REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"| cut -c 42-)
|
||||
declare -i WAL_SIZE=$REDO_POS+114
|
||||
$PG_BIN/pg_ctl -D $DATA_DIR -l logfile start
|
||||
$PG_BIN/pg_ctl -D $DATA_DIR -l logfile stop -m immediate
|
||||
cp $DATA_DIR/pg_wal/000000010000000000000001 .
|
||||
cp $WAL_PATH/* $DATA_DIR/pg_wal/
|
||||
if [ -f $DATA_DIR/pg_wal/*.partial ]
|
||||
then
|
||||
(cd $DATA_DIR/pg_wal ; for partial in \*.partial ; do mv $partial `basename $partial .partial` ; done)
|
||||
fi
|
||||
dd if=000000010000000000000001 of=$DATA_DIR/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
|
||||
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l logfile start
|
||||
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l logfile stop -m immediate
|
||||
cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
|
||||
cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
|
||||
for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done
|
||||
dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
|
||||
rm -f 000000010000000000000001
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
PG_BIN=$1
|
||||
WAL_PATH=$2
|
||||
DATA_DIR=$3
|
||||
PORT=$4
|
||||
SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
|
||||
rm -fr $DATA_DIR /tmp/pg_wals
|
||||
mkdir /tmp/pg_wals
|
||||
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
|
||||
echo port=$PORT >> $DATA_DIR/postgresql.conf
|
||||
REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
|
||||
declare -i WAL_SIZE=$REDO_POS+114
|
||||
cp $WAL_PATH/* /tmp/pg_wals
|
||||
if [ -f $DATA_DIR/pg_wal/*.partial ]
|
||||
then
|
||||
(cd /tmp/pg_wals ; for partial in \*.partial ; do mv $partial `basename $partial .partial` ; done)
|
||||
fi
|
||||
dd if=$DATA_DIR/pg_wal/000000010000000000000001 of=/tmp/pg_wals/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
|
||||
echo > $DATA_DIR/recovery.signal
|
||||
rm -f $DATA_DIR/pg_wal/*
|
||||
echo "restore_command = 'cp /tmp/pg_wals/%f %p'" >> $DATA_DIR/postgresql.conf
|
||||
@@ -60,24 +60,43 @@ pub mod tracing_span_assert;
|
||||
|
||||
pub mod rate_limit;
|
||||
|
||||
/// use with fail::cfg("$name", "return(2000)")
|
||||
#[macro_export]
|
||||
macro_rules! failpoint_sleep_millis_async {
|
||||
($name:literal) => {{
|
||||
let should_sleep: Option<std::time::Duration> = (|| {
|
||||
fail::fail_point!($name, |v: Option<_>| {
|
||||
let millis = v.unwrap().parse::<u64>().unwrap();
|
||||
Some(Duration::from_millis(millis))
|
||||
});
|
||||
None
|
||||
})();
|
||||
if let Some(d) = should_sleep {
|
||||
tracing::info!("failpoint {:?}: sleeping for {:?}", $name, d);
|
||||
tokio::time::sleep(d).await;
|
||||
tracing::info!("failpoint {:?}: sleep done", $name);
|
||||
}
|
||||
}};
|
||||
mod failpoint_macro_helpers {
|
||||
|
||||
/// use with fail::cfg("$name", "return(2000)")
|
||||
///
|
||||
/// The effect is similar to a "sleep(2000)" action, i.e. we sleep for the
|
||||
/// specified time (in milliseconds). The main difference is that we use async
|
||||
/// tokio sleep function. Another difference is that we print lines to the log,
|
||||
/// which can be useful in tests to check that the failpoint was hit.
|
||||
#[macro_export]
|
||||
macro_rules! failpoint_sleep_millis_async {
|
||||
($name:literal) => {{
|
||||
// If the failpoint is used with a "return" action, set should_sleep to the
|
||||
// returned value (as string). Otherwise it's set to None.
|
||||
let should_sleep = (|| {
|
||||
::fail::fail_point!($name, |x| x);
|
||||
::std::option::Option::None
|
||||
})();
|
||||
|
||||
// Sleep if the action was a returned value
|
||||
if let ::std::option::Option::Some(duration_str) = should_sleep {
|
||||
$crate::failpoint_sleep_helper($name, duration_str).await
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
// Helper function used by the macro. (A function has nicer scoping so we
|
||||
// don't need to decorate everything with "::")
|
||||
pub async fn failpoint_sleep_helper(name: &'static str, duration_str: String) {
|
||||
let millis = duration_str.parse::<u64>().unwrap();
|
||||
let d = std::time::Duration::from_millis(millis);
|
||||
|
||||
tracing::info!("failpoint {:?}: sleeping for {:?}", name, d);
|
||||
tokio::time::sleep(d).await;
|
||||
tracing::info!("failpoint {:?}: sleep done", name);
|
||||
}
|
||||
}
|
||||
pub use failpoint_macro_helpers::failpoint_sleep_helper;
|
||||
|
||||
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
|
||||
///
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
//!
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
|
||||
use crate::tenant::mgr;
|
||||
use crate::tenant::{mgr, LogicalSizeCalculationCause};
|
||||
use anyhow;
|
||||
use chrono::Utc;
|
||||
use consumption_metrics::{idempotency_key, Event, EventChunk, EventType, CHUNK_SIZE};
|
||||
@@ -164,7 +164,8 @@ pub async fn collect_metrics_iteration(
|
||||
timeline_written_size,
|
||||
));
|
||||
|
||||
match timeline.get_current_logical_size(ctx) {
|
||||
let span = info_span!("collect_metrics_iteration", tenant_id = %timeline.tenant_id, timeline_id = %timeline.timeline_id);
|
||||
match span.in_scope(|| timeline.get_current_logical_size(ctx)) {
|
||||
// Only send timeline logical size when it is fully calculated.
|
||||
Ok((size, is_exact)) if is_exact => {
|
||||
current_metrics.push((
|
||||
@@ -334,7 +335,9 @@ pub async fn calculate_synthetic_size_worker(
|
||||
|
||||
if let Ok(tenant) = mgr::get_tenant(tenant_id, true).await
|
||||
{
|
||||
if let Err(e) = tenant.calculate_synthetic_size(ctx).await {
|
||||
if let Err(e) = tenant.calculate_synthetic_size(
|
||||
LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize,
|
||||
ctx).await {
|
||||
error!("failed to calculate synthetic size for tenant {}: {}", tenant_id, e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -346,23 +346,23 @@ paths:
|
||||
starts writing to the tenant's S3 state unless it receives one of the
|
||||
distinguished errors below that state otherwise.
|
||||
|
||||
The method to identify whether a request has arrived at the pageserver, and
|
||||
whether it has succeeded, is to poll for the tenant status to reach "Active"
|
||||
or "Broken" state. These values are currently not explicitly documented in
|
||||
the API spec.
|
||||
Polling for `has_in_progress_downloads == false` is INCORRECT because that
|
||||
value can turn `false` during shutdown while the Attach operation is still
|
||||
unfinished.
|
||||
If a client receives a not-distinguished response, e.g., a network timeout,
|
||||
it MUST retry the /attach request and poll again for the tenant's
|
||||
attachment status.
|
||||
|
||||
After the client has received a 202, it MUST poll the tenant's
|
||||
attachment status (field `attachment_status`) to reach state `attached`.
|
||||
If the `attachment_status` is missing, the client MUST retry the `/attach`
|
||||
request (goto previous paragraph). This is a robustness measure in case the tenant
|
||||
status endpoint is buggy, but the attach operation is ongoing.
|
||||
|
||||
There is no way to cancel an in-flight request.
|
||||
|
||||
If a client receives a not-distinguished response, e.g., a network timeout,
|
||||
it MUST retry the /attach request and poll again for tenant status.
|
||||
|
||||
In any case, it must
|
||||
* NOT ASSUME that the /attach request has been lost in the network,
|
||||
* NOT ASSUME that the request has been lost based on a subsequent
|
||||
tenant status request returning 404. (The request may still be in flight!)
|
||||
In any case, the client
|
||||
* MUST NOT ASSUME that the /attach request has been lost in the network,
|
||||
* MUST NOT ASSUME that the request has been lost, based on the observation
|
||||
that a subsequent tenant status request returns 404. The request may
|
||||
still be in flight. It must be retried.
|
||||
responses:
|
||||
"202":
|
||||
description: Tenant attaching scheduled
|
||||
@@ -741,13 +741,16 @@ paths:
|
||||
$ref: "#/components/schemas/Error"
|
||||
post:
|
||||
description: |
|
||||
Create a tenant. Returns new tenant id on success.\
|
||||
Create a tenant. Returns new tenant id on success.
|
||||
|
||||
If no new tenant id is specified in parameters, it would be generated. It's an error to recreate the same tenant.
|
||||
|
||||
Invalid fields in the tenant config will cause the request to be rejected with status 400.
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/TenantCreateInfo"
|
||||
$ref: "#/components/schemas/TenantCreateRequest"
|
||||
responses:
|
||||
"201":
|
||||
description: New tenant created successfully
|
||||
@@ -790,11 +793,13 @@ paths:
|
||||
put:
|
||||
description: |
|
||||
Update tenant's config.
|
||||
|
||||
Invalid fields in the tenant config will cause the request to be rejected with status 400.
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/TenantConfigInfo"
|
||||
$ref: "#/components/schemas/TenantConfigRequest"
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
@@ -846,7 +851,7 @@ paths:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/TenantConfig"
|
||||
$ref: "#/components/schemas/TenantConfigResponse"
|
||||
"400":
|
||||
description: Malformed get tenanant config request
|
||||
content:
|
||||
@@ -888,42 +893,48 @@ components:
|
||||
type: object
|
||||
required:
|
||||
- id
|
||||
- attachment_status
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
current_physical_size:
|
||||
type: integer
|
||||
has_in_progress_downloads:
|
||||
type: boolean
|
||||
TenantCreateInfo:
|
||||
attachment_status:
|
||||
description: |
|
||||
Status of this tenant's attachment to this pageserver.
|
||||
|
||||
- `maybe` means almost nothing, don't read anything into it
|
||||
except for the fact that the pageserver _might_ be already
|
||||
writing to the tenant's S3 state, so, DO NOT ATTACH the
|
||||
tenant to any other pageserver, or we risk split-brain.
|
||||
- `attached` means that the attach operation has completed,
|
||||
maybe successfully, maybe not. Perform a health check at
|
||||
the Postgres level to determine healthiness of the tenant.
|
||||
|
||||
See the tenant `/attach` endpoint for more information.
|
||||
type: string
|
||||
enum: [ "maybe", "attached" ]
|
||||
TenantCreateRequest:
|
||||
allOf:
|
||||
- $ref: '#/components/schemas/TenantConfig'
|
||||
- type: object
|
||||
properties:
|
||||
new_tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
TenantConfigRequest:
|
||||
allOf:
|
||||
- $ref: '#/components/schemas/TenantConfig'
|
||||
- type: object
|
||||
required:
|
||||
- tenant_id
|
||||
properties:
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
TenantConfig:
|
||||
type: object
|
||||
properties:
|
||||
new_tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
gc_period:
|
||||
type: string
|
||||
gc_horizon:
|
||||
type: integer
|
||||
pitr_interval:
|
||||
type: string
|
||||
checkpoint_distance:
|
||||
type: integer
|
||||
checkpoint_timeout:
|
||||
type: string
|
||||
compaction_period:
|
||||
type: string
|
||||
compaction_threshold:
|
||||
type: string
|
||||
TenantConfigInfo:
|
||||
type: object
|
||||
properties:
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
gc_period:
|
||||
type: string
|
||||
gc_horizon:
|
||||
@@ -950,13 +961,13 @@ components:
|
||||
type: integer
|
||||
trace_read_requests:
|
||||
type: boolean
|
||||
TenantConfig:
|
||||
TenantConfigResponse:
|
||||
type: object
|
||||
properties:
|
||||
tenant_specific_overrides:
|
||||
$ref: "#/components/schemas/TenantConfigInfo"
|
||||
$ref: "#/components/schemas/TenantConfig"
|
||||
effective_config:
|
||||
$ref: "#/components/schemas/TenantConfigInfo"
|
||||
$ref: "#/components/schemas/TenantConfig"
|
||||
TimelineInfo:
|
||||
type: object
|
||||
required:
|
||||
|
||||
@@ -19,13 +19,14 @@ use super::models::{
|
||||
};
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::disk_usage_eviction_task;
|
||||
use crate::metrics::{StorageTimeOperation, STORAGE_TIME_GLOBAL};
|
||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::config::TenantConfOpt;
|
||||
use crate::tenant::mgr::{TenantMapInsertError, TenantStateError};
|
||||
use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||
use crate::tenant::{PageReconstructError, Timeline};
|
||||
use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
|
||||
use crate::{config::PageServerConf, tenant::mgr};
|
||||
use utils::{
|
||||
auth::JwtAuth,
|
||||
@@ -105,6 +106,9 @@ impl From<PageReconstructError> for ApiError {
|
||||
PageReconstructError::Cancelled => {
|
||||
ApiError::InternalServerError(anyhow::anyhow!("request was cancelled"))
|
||||
}
|
||||
PageReconstructError::AncestorStopping(_) => {
|
||||
ApiError::InternalServerError(anyhow::Error::new(pre))
|
||||
}
|
||||
PageReconstructError::WalRedo(pre) => {
|
||||
ApiError::InternalServerError(anyhow::Error::new(pre))
|
||||
}
|
||||
@@ -169,6 +173,8 @@ async fn build_timeline_info(
|
||||
include_non_incremental_logical_size: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let mut info = build_timeline_info_common(timeline, ctx)?;
|
||||
if include_non_incremental_logical_size {
|
||||
// XXX we should be using spawn_ondemand_logical_size_calculation here.
|
||||
@@ -191,6 +197,7 @@ fn build_timeline_info_common(
|
||||
timeline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
let last_record_lsn = timeline.get_last_record_lsn();
|
||||
let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
|
||||
let guard = timeline.last_received_wal.lock().unwrap();
|
||||
@@ -263,25 +270,28 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Error);
|
||||
|
||||
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
||||
match tenant.create_timeline(
|
||||
new_timeline_id,
|
||||
request_data.ancestor_timeline_id.map(TimelineId::from),
|
||||
request_data.ancestor_start_lsn,
|
||||
request_data.pg_version.unwrap_or(crate::DEFAULT_PG_VERSION),
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
|
||||
.await {
|
||||
Ok(Some(new_timeline)) => {
|
||||
// Created. Construct a TimelineInfo for it.
|
||||
let timeline_info = build_timeline_info_common(&new_timeline, &ctx)
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
json_response(StatusCode::CREATED, timeline_info)
|
||||
async {
|
||||
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
||||
match tenant.create_timeline(
|
||||
new_timeline_id,
|
||||
request_data.ancestor_timeline_id.map(TimelineId::from),
|
||||
request_data.ancestor_start_lsn,
|
||||
request_data.pg_version.unwrap_or(crate::DEFAULT_PG_VERSION),
|
||||
&ctx,
|
||||
)
|
||||
.await {
|
||||
Ok(Some(new_timeline)) => {
|
||||
// Created. Construct a TimelineInfo for it.
|
||||
let timeline_info = build_timeline_info_common(&new_timeline, &ctx)
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
json_response(StatusCode::CREATED, timeline_info)
|
||||
}
|
||||
Ok(None) => json_response(StatusCode::CONFLICT, ()), // timeline already exists
|
||||
Err(err) => Err(ApiError::InternalServerError(err)),
|
||||
}
|
||||
Ok(None) => json_response(StatusCode::CONFLICT, ()), // timeline already exists
|
||||
Err(err) => Err(ApiError::InternalServerError(err)),
|
||||
}
|
||||
.instrument(info_span!("timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
@@ -303,6 +313,7 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
include_non_incremental_logical_size.unwrap_or(false),
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("build_timeline_info", timeline_id = %timeline.timeline_id))
|
||||
.await
|
||||
.context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
@@ -387,9 +398,17 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
let state = get_state(&request);
|
||||
|
||||
if let Some(remote_storage) = &state.remote_storage {
|
||||
mgr::attach_tenant(state.conf, tenant_id, remote_storage.clone(), &ctx)
|
||||
.instrument(info_span!("tenant_attach", tenant = %tenant_id))
|
||||
.await?;
|
||||
mgr::attach_tenant(
|
||||
state.conf,
|
||||
tenant_id,
|
||||
// XXX: Attach should provide the config, especially during tenant migration.
|
||||
// See https://github.com/neondatabase/neon/issues/1555
|
||||
TenantConfOpt::default(),
|
||||
remote_storage.clone(),
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("tenant_attach", tenant = %tenant_id))
|
||||
.await?;
|
||||
} else {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"attach_tenant is not possible because pageserver was configured without remote storage"
|
||||
@@ -467,7 +486,7 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
id: *id,
|
||||
state: state.clone(),
|
||||
current_physical_size: None,
|
||||
has_in_progress_downloads: Some(state.has_in_progress_downloads()),
|
||||
attachment_status: state.attachment_status(),
|
||||
})
|
||||
.collect::<Vec<TenantInfo>>();
|
||||
|
||||
@@ -492,7 +511,7 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
id: tenant_id,
|
||||
state: state.clone(),
|
||||
current_physical_size: Some(current_physical_size),
|
||||
has_in_progress_downloads: Some(state.has_in_progress_downloads()),
|
||||
attachment_status: state.attachment_status(),
|
||||
})
|
||||
}
|
||||
.instrument(info_span!("tenant_status_handler", tenant = %tenant_id))
|
||||
@@ -527,7 +546,11 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
|
||||
// this can be long operation
|
||||
let inputs = tenant
|
||||
.gather_size_inputs(retention_period, &ctx)
|
||||
.gather_size_inputs(
|
||||
retention_period,
|
||||
LogicalSizeCalculationCause::TenantSizeHandler,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
@@ -694,11 +717,17 @@ pub fn html_response(status: StatusCode, data: String) -> Result<Response<Body>,
|
||||
async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
check_permission(&request, None)?;
|
||||
|
||||
let _timer = STORAGE_TIME_GLOBAL
|
||||
.get_metric_with_label_values(&[StorageTimeOperation::CreateTenant.into()])
|
||||
.expect("bug")
|
||||
.start_timer();
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
|
||||
|
||||
let request_data: TenantCreateRequest = json_request(&mut request).await?;
|
||||
|
||||
let tenant_conf = TenantConfOpt::try_from(&request_data).map_err(ApiError::BadRequest)?;
|
||||
let tenant_conf =
|
||||
TenantConfOpt::try_from(&request_data.config).map_err(ApiError::BadRequest)?;
|
||||
|
||||
let target_tenant_id = request_data
|
||||
.new_tenant_id
|
||||
@@ -729,6 +758,7 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
res.context("created tenant failed to become active")
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
}
|
||||
|
||||
json_response(
|
||||
StatusCode::CREATED,
|
||||
TenantCreateResponse(new_tenant.tenant_id()),
|
||||
@@ -766,7 +796,8 @@ async fn update_tenant_config_handler(
|
||||
let tenant_id = request_data.tenant_id;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant_conf = TenantConfOpt::try_from(&request_data).map_err(ApiError::BadRequest)?;
|
||||
let tenant_conf =
|
||||
TenantConfOpt::try_from(&request_data.config).map_err(ApiError::BadRequest)?;
|
||||
|
||||
let state = get_state(&request);
|
||||
mgr::set_new_tenant_config(state.conf, tenant_conf, tenant_id)
|
||||
|
||||
@@ -8,6 +8,7 @@ use metrics::{
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::models::TenantState;
|
||||
use strum::VariantNames;
|
||||
use strum_macros::{EnumVariantNames, IntoStaticStr};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
/// Prometheus histogram buckets (in seconds) for operations in the critical
|
||||
@@ -24,15 +25,33 @@ const CRITICAL_OP_BUCKETS: &[f64] = &[
|
||||
];
|
||||
|
||||
// Metrics collected on operations on the storage repository.
|
||||
const STORAGE_TIME_OPERATIONS: &[&str] = &[
|
||||
"layer flush",
|
||||
"compact",
|
||||
"create images",
|
||||
"init logical size",
|
||||
"logical size",
|
||||
"load layer map",
|
||||
"gc",
|
||||
];
|
||||
#[derive(Debug, EnumVariantNames, IntoStaticStr)]
|
||||
#[strum(serialize_all = "kebab_case")]
|
||||
pub enum StorageTimeOperation {
|
||||
#[strum(serialize = "layer flush")]
|
||||
LayerFlush,
|
||||
|
||||
#[strum(serialize = "compact")]
|
||||
Compact,
|
||||
|
||||
#[strum(serialize = "create images")]
|
||||
CreateImages,
|
||||
|
||||
#[strum(serialize = "logical size")]
|
||||
LogicalSize,
|
||||
|
||||
#[strum(serialize = "imitate logical size")]
|
||||
ImitateLogicalSize,
|
||||
|
||||
#[strum(serialize = "load layer map")]
|
||||
LoadLayerMap,
|
||||
|
||||
#[strum(serialize = "gc")]
|
||||
Gc,
|
||||
|
||||
#[strum(serialize = "create tenant")]
|
||||
CreateTenant,
|
||||
}
|
||||
|
||||
pub static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
|
||||
register_counter_vec!(
|
||||
@@ -186,6 +205,16 @@ static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
register_histogram_vec!(
|
||||
"pageserver_eviction_iteration_duration_seconds_global",
|
||||
"Time spent on a single eviction iteration",
|
||||
&["period_secs", "threshold_secs"],
|
||||
STORAGE_OP_BUCKETS.into(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_evictions",
|
||||
@@ -478,6 +507,15 @@ pub static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
.expect("Failed to register tenant_task_events metric")
|
||||
});
|
||||
|
||||
pub static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_background_loop_period_overrun_count",
|
||||
"Incremented whenever warn_when_period_overrun() logs a warning.",
|
||||
&["task", "period"],
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
// walreceiver metrics
|
||||
|
||||
pub static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
|
||||
@@ -652,7 +690,9 @@ pub struct StorageTimeMetrics {
|
||||
}
|
||||
|
||||
impl StorageTimeMetrics {
|
||||
pub fn new(operation: &str, tenant_id: &str, timeline_id: &str) -> Self {
|
||||
pub fn new(operation: StorageTimeOperation, tenant_id: &str, timeline_id: &str) -> Self {
|
||||
let operation: &'static str = operation.into();
|
||||
|
||||
let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
|
||||
.get_metric_with_label_values(&[operation, tenant_id, timeline_id])
|
||||
.unwrap();
|
||||
@@ -688,6 +728,7 @@ pub struct TimelineMetrics {
|
||||
pub compact_time_histo: StorageTimeMetrics,
|
||||
pub create_images_time_histo: StorageTimeMetrics,
|
||||
pub logical_size_histo: StorageTimeMetrics,
|
||||
pub imitate_logical_size_histo: StorageTimeMetrics,
|
||||
pub load_layer_map_histo: StorageTimeMetrics,
|
||||
pub garbage_collect_histo: StorageTimeMetrics,
|
||||
pub last_record_gauge: IntGauge,
|
||||
@@ -715,14 +756,23 @@ impl TimelineMetrics {
|
||||
let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let flush_time_histo = StorageTimeMetrics::new("layer flush", &tenant_id, &timeline_id);
|
||||
let compact_time_histo = StorageTimeMetrics::new("compact", &tenant_id, &timeline_id);
|
||||
let flush_time_histo =
|
||||
StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
|
||||
let compact_time_histo =
|
||||
StorageTimeMetrics::new(StorageTimeOperation::Compact, &tenant_id, &timeline_id);
|
||||
let create_images_time_histo =
|
||||
StorageTimeMetrics::new("create images", &tenant_id, &timeline_id);
|
||||
let logical_size_histo = StorageTimeMetrics::new("logical size", &tenant_id, &timeline_id);
|
||||
StorageTimeMetrics::new(StorageTimeOperation::CreateImages, &tenant_id, &timeline_id);
|
||||
let logical_size_histo =
|
||||
StorageTimeMetrics::new(StorageTimeOperation::LogicalSize, &tenant_id, &timeline_id);
|
||||
let imitate_logical_size_histo = StorageTimeMetrics::new(
|
||||
StorageTimeOperation::ImitateLogicalSize,
|
||||
&tenant_id,
|
||||
&timeline_id,
|
||||
);
|
||||
let load_layer_map_histo =
|
||||
StorageTimeMetrics::new("load layer map", &tenant_id, &timeline_id);
|
||||
let garbage_collect_histo = StorageTimeMetrics::new("gc", &tenant_id, &timeline_id);
|
||||
StorageTimeMetrics::new(StorageTimeOperation::LoadLayerMap, &tenant_id, &timeline_id);
|
||||
let garbage_collect_histo =
|
||||
StorageTimeMetrics::new(StorageTimeOperation::Gc, &tenant_id, &timeline_id);
|
||||
let last_record_gauge = LAST_RECORD_LSN
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
@@ -756,6 +806,7 @@ impl TimelineMetrics {
|
||||
compact_time_histo,
|
||||
create_images_time_histo,
|
||||
logical_size_histo,
|
||||
imitate_logical_size_histo,
|
||||
garbage_collect_histo,
|
||||
load_layer_map_histo,
|
||||
last_record_gauge,
|
||||
@@ -789,7 +840,7 @@ impl Drop for TimelineMetrics {
|
||||
.write()
|
||||
.unwrap()
|
||||
.remove(tenant_id, timeline_id);
|
||||
for op in STORAGE_TIME_OPERATIONS {
|
||||
for op in StorageTimeOperation::VARIANTS {
|
||||
let _ =
|
||||
STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]);
|
||||
let _ =
|
||||
@@ -1216,4 +1267,7 @@ pub fn preinitialize_metrics() {
|
||||
// Initialize it eagerly, so that our alert rule can distinguish absence of the metric from metric value 0.
|
||||
assert_eq!(UNEXPECTED_ONDEMAND_DOWNLOADS.get(), 0);
|
||||
UNEXPECTED_ONDEMAND_DOWNLOADS.reset();
|
||||
|
||||
// Same as above for this metric, but, it's a Vec-type metric for which we don't know all the labels.
|
||||
BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT.reset();
|
||||
}
|
||||
|
||||
@@ -256,7 +256,10 @@ async fn page_service_conn_main(
|
||||
//
|
||||
// no write timeout is used, because the kernel is assumed to error writes after some time.
|
||||
let mut socket = tokio_io_timeout::TimeoutReader::new(socket);
|
||||
socket.set_timeout(Some(std::time::Duration::from_secs(60 * 10)));
|
||||
|
||||
// timeout should be lower, but trying out multiple days for
|
||||
// <https://github.com/neondatabase/neon/issues/4205>
|
||||
socket.set_timeout(Some(std::time::Duration::from_secs(60 * 60 * 24 * 3)));
|
||||
let socket = std::pin::pin!(socket);
|
||||
|
||||
// XXX: pgbackend.run() should take the connection_ctx,
|
||||
|
||||
@@ -500,6 +500,8 @@ impl Timeline {
|
||||
cancel: CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<u64, CalculateLogicalSizeError> {
|
||||
crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
// Fetch list of database dirs and iterate them
|
||||
let buf = self.get(DBDIR_KEY, lsn, ctx).await.context("read dbdir")?;
|
||||
let dbdir = DbDirectory::des(&buf).context("deserialize db directory")?;
|
||||
|
||||
@@ -97,7 +97,10 @@ mod timeline;
|
||||
|
||||
pub mod size;
|
||||
|
||||
pub use timeline::{LocalLayerInfoForDiskUsageEviction, PageReconstructError, Timeline};
|
||||
pub(crate) use timeline::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
pub use timeline::{
|
||||
LocalLayerInfoForDiskUsageEviction, LogicalSizeCalculationCause, PageReconstructError, Timeline,
|
||||
};
|
||||
|
||||
// re-export this function so that page_cache.rs can use it.
|
||||
pub use crate::tenant::ephemeral_file::writeback as writeback_ephemeral_file;
|
||||
@@ -593,15 +596,15 @@ impl Tenant {
|
||||
/// finishes. You can use wait_until_active() to wait for the task to
|
||||
/// complete.
|
||||
///
|
||||
pub fn spawn_attach(
|
||||
pub(crate) fn spawn_attach(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
ctx: &RequestContext,
|
||||
) -> Arc<Tenant> {
|
||||
// XXX: Attach should provide the config, especially during tenant migration.
|
||||
// See https://github.com/neondatabase/neon/issues/1555
|
||||
let tenant_conf = TenantConfOpt::default();
|
||||
) -> anyhow::Result<Arc<Tenant>> {
|
||||
// TODO dedup with spawn_load
|
||||
let tenant_conf =
|
||||
Self::load_tenant_config(conf, tenant_id).context("load tenant config")?;
|
||||
|
||||
let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id));
|
||||
let tenant = Arc::new(Tenant::new(
|
||||
@@ -635,7 +638,7 @@ impl Tenant {
|
||||
Ok(())
|
||||
},
|
||||
);
|
||||
tenant
|
||||
Ok(tenant)
|
||||
}
|
||||
|
||||
///
|
||||
@@ -643,26 +646,15 @@ impl Tenant {
|
||||
///
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
|
||||
async fn attach(self: &Arc<Tenant>, ctx: RequestContext) -> anyhow::Result<()> {
|
||||
// Create directory with marker file to indicate attaching state.
|
||||
// The load_local_tenants() function in tenant::mgr relies on the marker file
|
||||
// to determine whether a tenant has finished attaching.
|
||||
let tenant_dir = self.conf.tenant_path(&self.tenant_id);
|
||||
let marker_file = self.conf.tenant_attaching_mark_file_path(&self.tenant_id);
|
||||
debug_assert_eq!(marker_file.parent().unwrap(), tenant_dir);
|
||||
if tenant_dir.exists() {
|
||||
if !marker_file.is_file() {
|
||||
anyhow::bail!(
|
||||
"calling Tenant::attach with a tenant directory that doesn't have the attaching marker file:\ntenant_dir: {}\nmarker_file: {}",
|
||||
tenant_dir.display(), marker_file.display());
|
||||
}
|
||||
} else {
|
||||
crashsafe::create_dir_all(&tenant_dir).context("create tenant directory")?;
|
||||
fs::File::create(&marker_file).context("create tenant attaching marker file")?;
|
||||
crashsafe::fsync_file_and_parent(&marker_file)
|
||||
.context("fsync tenant attaching marker file and parent")?;
|
||||
if !tokio::fs::try_exists(&marker_file)
|
||||
.await
|
||||
.context("check for existence of marker file")?
|
||||
{
|
||||
anyhow::bail!(
|
||||
"implementation error: marker file should exist at beginning of this function"
|
||||
);
|
||||
}
|
||||
debug_assert!(tenant_dir.is_dir());
|
||||
debug_assert!(marker_file.is_file());
|
||||
|
||||
// Get list of remote timelines
|
||||
// download index files for every tenant timeline
|
||||
@@ -800,6 +792,8 @@ impl Tenant {
|
||||
remote_client: RemoteTimelineClient,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
info!("downloading index file for timeline {}", timeline_id);
|
||||
tokio::fs::create_dir_all(self.conf.timeline_path(&timeline_id, &self.tenant_id))
|
||||
.await
|
||||
@@ -839,11 +833,15 @@ impl Tenant {
|
||||
}
|
||||
|
||||
/// Create a placeholder Tenant object for a broken tenant
|
||||
pub fn create_broken_tenant(conf: &'static PageServerConf, tenant_id: TenantId) -> Arc<Tenant> {
|
||||
pub fn create_broken_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
reason: String,
|
||||
) -> Arc<Tenant> {
|
||||
let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id));
|
||||
Arc::new(Tenant::new(
|
||||
TenantState::Broken {
|
||||
reason: "create_broken_tenant".into(),
|
||||
reason,
|
||||
backtrace: String::new(),
|
||||
},
|
||||
conf,
|
||||
@@ -876,7 +874,7 @@ impl Tenant {
|
||||
Ok(conf) => conf,
|
||||
Err(e) => {
|
||||
error!("load tenant config failed: {:?}", e);
|
||||
return Tenant::create_broken_tenant(conf, tenant_id);
|
||||
return Tenant::create_broken_tenant(conf, tenant_id, format!("{e:#}"));
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1060,6 +1058,8 @@ impl Tenant {
|
||||
local_metadata: TimelineMetadata,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
let remote_client = self.remote_storage.as_ref().map(|remote_storage| {
|
||||
RemoteTimelineClient::new(
|
||||
remote_storage.clone(),
|
||||
@@ -1226,8 +1226,24 @@ impl Tenant {
|
||||
"Cannot create timelines on inactive tenant"
|
||||
);
|
||||
|
||||
if self.get_timeline(new_timeline_id, false).is_ok() {
|
||||
if let Ok(existing) = self.get_timeline(new_timeline_id, false) {
|
||||
debug!("timeline {new_timeline_id} already exists");
|
||||
|
||||
if let Some(remote_client) = existing.remote_client.as_ref() {
|
||||
// Wait for uploads to complete, so that when we return Ok, the timeline
|
||||
// is known to be durable on remote storage. Just like we do at the end of
|
||||
// this function, after we have created the timeline ourselves.
|
||||
//
|
||||
// We only really care that the initial version of `index_part.json` has
|
||||
// been uploaded. That's enough to remember that the timeline
|
||||
// exists. However, there is no function to wait specifically for that so
|
||||
// we just wait for all in-progress uploads to finish.
|
||||
remote_client
|
||||
.wait_completion()
|
||||
.await
|
||||
.context("wait for timeline uploads to complete")?;
|
||||
}
|
||||
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
@@ -1269,6 +1285,17 @@ impl Tenant {
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(remote_client) = loaded_timeline.remote_client.as_ref() {
|
||||
// Wait for the upload of the 'index_part.json` file to finish, so that when we return
|
||||
// Ok, the timeline is durable in remote storage.
|
||||
let kind = ancestor_timeline_id
|
||||
.map(|_| "branched")
|
||||
.unwrap_or("bootstrapped");
|
||||
remote_client.wait_completion().await.with_context(|| {
|
||||
format!("wait for {} timeline initial uploads to complete", kind)
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(Some(loaded_timeline))
|
||||
}
|
||||
|
||||
@@ -1562,6 +1589,8 @@ impl Tenant {
|
||||
|
||||
/// Changes tenant status to active, unless shutdown was already requested.
|
||||
fn activate(&self, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
let mut result = Ok(());
|
||||
self.state.send_modify(|current_state| {
|
||||
match &*current_state {
|
||||
@@ -2053,6 +2082,7 @@ impl Tenant {
|
||||
// enough to just fsync it always.
|
||||
|
||||
crashsafe::fsync(target_config_parent)?;
|
||||
// XXX we're not fsyncing the parent dir, need to do that in case `creating_tenant`
|
||||
Ok(())
|
||||
};
|
||||
|
||||
@@ -2338,17 +2368,18 @@ impl Tenant {
|
||||
src_timeline.initdb_lsn,
|
||||
src_timeline.pg_version,
|
||||
);
|
||||
let mut timelines = self.timelines.lock().unwrap();
|
||||
let new_timeline = self
|
||||
.prepare_timeline(
|
||||
|
||||
let new_timeline = {
|
||||
let mut timelines = self.timelines.lock().unwrap();
|
||||
self.prepare_timeline(
|
||||
dst_id,
|
||||
&metadata,
|
||||
timeline_uninit_mark,
|
||||
false,
|
||||
Some(Arc::clone(src_timeline)),
|
||||
)?
|
||||
.initialize_with_lock(ctx, &mut timelines, true, true)?;
|
||||
drop(timelines);
|
||||
.initialize_with_lock(ctx, &mut timelines, true, true)?
|
||||
};
|
||||
|
||||
// Root timeline gets its layers during creation and uploads them along with the metadata.
|
||||
// A branch timeline though, when created, can get no writes for some time, hence won't get any layers created.
|
||||
@@ -2606,6 +2637,7 @@ impl Tenant {
|
||||
// `max_retention_period` overrides the cutoff that is used to calculate the size
|
||||
// (only if it is shorter than the real cutoff).
|
||||
max_retention_period: Option<u64>,
|
||||
cause: LogicalSizeCalculationCause,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<size::ModelInputs> {
|
||||
let logical_sizes_at_once = self
|
||||
@@ -2627,6 +2659,7 @@ impl Tenant {
|
||||
logical_sizes_at_once,
|
||||
max_retention_period,
|
||||
&mut shared_cache,
|
||||
cause,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
@@ -2636,8 +2669,12 @@ impl Tenant {
|
||||
/// This is periodically called by background worker.
|
||||
/// result is cached in tenant struct
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
|
||||
pub async fn calculate_synthetic_size(&self, ctx: &RequestContext) -> anyhow::Result<u64> {
|
||||
let inputs = self.gather_size_inputs(None, ctx).await?;
|
||||
pub async fn calculate_synthetic_size(
|
||||
&self,
|
||||
cause: LogicalSizeCalculationCause,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<u64> {
|
||||
let inputs = self.gather_size_inputs(None, cause, ctx).await?;
|
||||
|
||||
let size = inputs.calculate()?;
|
||||
|
||||
@@ -2689,15 +2726,23 @@ fn remove_timeline_and_uninit_mark(timeline_dir: &Path, uninit_mark: &Path) -> a
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) enum CreateTenantFilesMode {
|
||||
Create,
|
||||
Attach,
|
||||
}
|
||||
|
||||
pub(crate) fn create_tenant_files(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
tenant_id: TenantId,
|
||||
mode: CreateTenantFilesMode,
|
||||
) -> anyhow::Result<PathBuf> {
|
||||
let target_tenant_directory = conf.tenant_path(&tenant_id);
|
||||
anyhow::ensure!(
|
||||
!target_tenant_directory.exists(),
|
||||
"cannot create new tenant repo: '{tenant_id}' directory already exists",
|
||||
!target_tenant_directory
|
||||
.try_exists()
|
||||
.context("check existence of tenant directory")?,
|
||||
"tenant directory already exists",
|
||||
);
|
||||
|
||||
let temporary_tenant_dir =
|
||||
@@ -2719,6 +2764,7 @@ pub(crate) fn create_tenant_files(
|
||||
conf,
|
||||
tenant_conf,
|
||||
tenant_id,
|
||||
mode,
|
||||
&temporary_tenant_dir,
|
||||
&target_tenant_directory,
|
||||
);
|
||||
@@ -2743,9 +2789,28 @@ fn try_create_target_tenant_dir(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
tenant_id: TenantId,
|
||||
mode: CreateTenantFilesMode,
|
||||
temporary_tenant_dir: &Path,
|
||||
target_tenant_directory: &Path,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
match mode {
|
||||
CreateTenantFilesMode::Create => {} // needs no attach marker, writing tenant conf + atomic rename of dir is good enough
|
||||
CreateTenantFilesMode::Attach => {
|
||||
let attach_marker_path = temporary_tenant_dir.join(TENANT_ATTACHING_MARKER_FILENAME);
|
||||
let file = std::fs::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true)
|
||||
.open(&attach_marker_path)
|
||||
.with_context(|| {
|
||||
format!("could not create attach marker file {attach_marker_path:?}")
|
||||
})?;
|
||||
file.sync_all().with_context(|| {
|
||||
format!("could not sync attach marker file: {attach_marker_path:?}")
|
||||
})?;
|
||||
// fsync of the directory in which the file resides comes later in this function
|
||||
}
|
||||
}
|
||||
|
||||
let temporary_tenant_timelines_dir = rebase_directory(
|
||||
&conf.timelines_path(&tenant_id),
|
||||
target_tenant_directory,
|
||||
@@ -2772,6 +2837,11 @@ fn try_create_target_tenant_dir(
|
||||
anyhow::bail!("failpoint tenant-creation-before-tmp-rename");
|
||||
});
|
||||
|
||||
// Make sure the current tenant directory entries are durable before renaming.
|
||||
// Without this, a crash may reorder any of the directory entry creations above.
|
||||
crashsafe::fsync(temporary_tenant_dir)
|
||||
.with_context(|| format!("sync temporary tenant directory {temporary_tenant_dir:?}"))?;
|
||||
|
||||
fs::rename(temporary_tenant_dir, target_tenant_directory).with_context(|| {
|
||||
format!(
|
||||
"move tenant {} temporary directory {} into the permanent one {}",
|
||||
@@ -3397,14 +3467,26 @@ mod tests {
|
||||
.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, &ctx)
|
||||
.await?;
|
||||
|
||||
// The branchpoints should contain all timelines, even ones marked
|
||||
// as Broken.
|
||||
{
|
||||
let branchpoints = &tline.gc_info.read().unwrap().retain_lsns;
|
||||
assert_eq!(branchpoints.len(), 1);
|
||||
assert_eq!(branchpoints[0], Lsn(0x40));
|
||||
}
|
||||
|
||||
// You can read the key from the child branch even though the parent is
|
||||
// Broken, as long as you don't need to access data from the parent.
|
||||
assert_eq!(
|
||||
newtline.get(*TEST_KEY, Lsn(0x50), &ctx).await?,
|
||||
TEST_IMG(&format!("foo at {}", Lsn(0x40)))
|
||||
newtline.get(*TEST_KEY, Lsn(0x70), &ctx).await?,
|
||||
TEST_IMG(&format!("foo at {}", Lsn(0x70)))
|
||||
);
|
||||
|
||||
let branchpoints = &tline.gc_info.read().unwrap().retain_lsns;
|
||||
assert_eq!(branchpoints.len(), 1);
|
||||
assert_eq!(branchpoints[0], Lsn(0x40));
|
||||
// This needs to traverse to the parent, and fails.
|
||||
let err = newtline.get(*TEST_KEY, Lsn(0x50), &ctx).await.unwrap_err();
|
||||
assert!(err
|
||||
.to_string()
|
||||
.contains("will not become active. Current state: Broken"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -3895,3 +3977,28 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
#[inline]
|
||||
pub(crate) fn debug_assert_current_span_has_tenant_id() {}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub static TENANT_ID_EXTRACTOR: once_cell::sync::Lazy<
|
||||
utils::tracing_span_assert::MultiNameExtractor<2>,
|
||||
> = once_cell::sync::Lazy::new(|| {
|
||||
utils::tracing_span_assert::MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"])
|
||||
});
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[inline]
|
||||
pub(crate) fn debug_assert_current_span_has_tenant_id() {
|
||||
use utils::tracing_span_assert;
|
||||
|
||||
match tracing_span_assert::check_fields_present([&*TENANT_ID_EXTRACTOR]) {
|
||||
Ok(()) => (),
|
||||
Err(missing) => panic!(
|
||||
"missing extractors: {:?}",
|
||||
missing.into_iter().map(|e| e.name()).collect::<Vec<_>>()
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
//! may lead to a data loss.
|
||||
//!
|
||||
use anyhow::Context;
|
||||
use pageserver_api::models::{TenantConfigRequest, TenantCreateRequest};
|
||||
use pageserver_api::models;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::num::NonZeroU64;
|
||||
use std::time::Duration;
|
||||
@@ -292,10 +292,10 @@ fn bad_duration<'a>(field_name: &'static str, value: &'a str) -> impl 'a + Fn()
|
||||
move || format!("Cannot parse `{field_name}` duration {value:?}")
|
||||
}
|
||||
|
||||
impl TryFrom<&'_ TenantCreateRequest> for TenantConfOpt {
|
||||
impl TryFrom<&'_ models::TenantConfig> for TenantConfOpt {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(request_data: &TenantCreateRequest) -> Result<Self, Self::Error> {
|
||||
fn try_from(request_data: &'_ models::TenantConfig) -> Result<Self, Self::Error> {
|
||||
let mut tenant_conf = TenantConfOpt::default();
|
||||
|
||||
if let Some(gc_period) = &request_data.gc_period {
|
||||
@@ -377,84 +377,6 @@ impl TryFrom<&'_ TenantCreateRequest> for TenantConfOpt {
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&'_ TenantConfigRequest> for TenantConfOpt {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(request_data: &TenantConfigRequest) -> Result<Self, Self::Error> {
|
||||
let mut tenant_conf = TenantConfOpt::default();
|
||||
if let Some(gc_period) = &request_data.gc_period {
|
||||
tenant_conf.gc_period = Some(
|
||||
humantime::parse_duration(gc_period)
|
||||
.with_context(bad_duration("gc_period", gc_period))?,
|
||||
);
|
||||
}
|
||||
tenant_conf.gc_horizon = request_data.gc_horizon;
|
||||
tenant_conf.image_creation_threshold = request_data.image_creation_threshold;
|
||||
|
||||
if let Some(pitr_interval) = &request_data.pitr_interval {
|
||||
tenant_conf.pitr_interval = Some(
|
||||
humantime::parse_duration(pitr_interval)
|
||||
.with_context(bad_duration("pitr_interval", pitr_interval))?,
|
||||
);
|
||||
}
|
||||
if let Some(walreceiver_connect_timeout) = &request_data.walreceiver_connect_timeout {
|
||||
tenant_conf.walreceiver_connect_timeout = Some(
|
||||
humantime::parse_duration(walreceiver_connect_timeout).with_context(
|
||||
bad_duration("walreceiver_connect_timeout", walreceiver_connect_timeout),
|
||||
)?,
|
||||
);
|
||||
}
|
||||
if let Some(lagging_wal_timeout) = &request_data.lagging_wal_timeout {
|
||||
tenant_conf.lagging_wal_timeout = Some(
|
||||
humantime::parse_duration(lagging_wal_timeout)
|
||||
.with_context(bad_duration("lagging_wal_timeout", lagging_wal_timeout))?,
|
||||
);
|
||||
}
|
||||
tenant_conf.max_lsn_wal_lag = request_data.max_lsn_wal_lag;
|
||||
tenant_conf.trace_read_requests = request_data.trace_read_requests;
|
||||
|
||||
tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
|
||||
if let Some(checkpoint_timeout) = &request_data.checkpoint_timeout {
|
||||
tenant_conf.checkpoint_timeout = Some(
|
||||
humantime::parse_duration(checkpoint_timeout)
|
||||
.with_context(bad_duration("checkpoint_timeout", checkpoint_timeout))?,
|
||||
);
|
||||
}
|
||||
tenant_conf.compaction_target_size = request_data.compaction_target_size;
|
||||
tenant_conf.compaction_threshold = request_data.compaction_threshold;
|
||||
|
||||
if let Some(compaction_period) = &request_data.compaction_period {
|
||||
tenant_conf.compaction_period = Some(
|
||||
humantime::parse_duration(compaction_period)
|
||||
.with_context(bad_duration("compaction_period", compaction_period))?,
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(eviction_policy) = &request_data.eviction_policy {
|
||||
tenant_conf.eviction_policy = Some(
|
||||
serde::Deserialize::deserialize(eviction_policy)
|
||||
.context("parse field `eviction_policy`")?,
|
||||
);
|
||||
}
|
||||
|
||||
tenant_conf.min_resident_size_override = request_data.min_resident_size_override;
|
||||
|
||||
if let Some(evictions_low_residence_duration_metric_threshold) =
|
||||
&request_data.evictions_low_residence_duration_metric_threshold
|
||||
{
|
||||
tenant_conf.evictions_low_residence_duration_metric_threshold = Some(
|
||||
humantime::parse_duration(evictions_low_residence_duration_metric_threshold)
|
||||
.with_context(bad_duration(
|
||||
"evictions_low_residence_duration_metric_threshold",
|
||||
evictions_low_residence_duration_metric_threshold,
|
||||
))?,
|
||||
);
|
||||
}
|
||||
|
||||
Ok(tenant_conf)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -19,7 +19,7 @@ use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::task_mgr::{self, TaskKind};
|
||||
use crate::tenant::config::TenantConfOpt;
|
||||
use crate::tenant::{Tenant, TenantState};
|
||||
use crate::tenant::{create_tenant_files, CreateTenantFilesMode, Tenant, TenantState};
|
||||
use crate::IGNORED_TENANT_FILE_NAME;
|
||||
|
||||
use utils::fs_ext::PathExt;
|
||||
@@ -186,10 +186,20 @@ pub fn schedule_local_tenant_processing(
|
||||
let tenant = if conf.tenant_attaching_mark_file_path(&tenant_id).exists() {
|
||||
info!("tenant {tenant_id} has attaching mark file, resuming its attach operation");
|
||||
if let Some(remote_storage) = remote_storage {
|
||||
Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx)
|
||||
match Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx) {
|
||||
Ok(tenant) => tenant,
|
||||
Err(e) => {
|
||||
error!("Failed to spawn_attach tenant {tenant_id}, reason: {e:#}");
|
||||
Tenant::create_broken_tenant(conf, tenant_id, format!("{e:#}"))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn!("tenant {tenant_id} has attaching mark file, but pageserver has no remote storage configured");
|
||||
Tenant::create_broken_tenant(conf, tenant_id)
|
||||
Tenant::create_broken_tenant(
|
||||
conf,
|
||||
tenant_id,
|
||||
"attaching mark file present but no remote storage configured".to_string(),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
info!("tenant {tenant_id} is assumed to be loadable, starting load operation");
|
||||
@@ -272,9 +282,15 @@ pub async fn create_tenant(
|
||||
// We're holding the tenants lock in write mode while doing local IO.
|
||||
// If this section ever becomes contentious, introduce a new `TenantState::Creating`
|
||||
// and do the work in that state.
|
||||
let tenant_directory = super::create_tenant_files(conf, tenant_conf, tenant_id)?;
|
||||
let tenant_directory = super::create_tenant_files(conf, tenant_conf, tenant_id, CreateTenantFilesMode::Create)?;
|
||||
// TODO: tenant directory remains on disk if we bail out from here on.
|
||||
// See https://github.com/neondatabase/neon/issues/4233
|
||||
|
||||
let created_tenant =
|
||||
schedule_local_tenant_processing(conf, &tenant_directory, remote_storage, ctx)?;
|
||||
// TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here.
|
||||
// See https://github.com/neondatabase/neon/issues/4233
|
||||
|
||||
let crated_tenant_id = created_tenant.tenant_id();
|
||||
anyhow::ensure!(
|
||||
tenant_id == crated_tenant_id,
|
||||
@@ -456,18 +472,32 @@ pub async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, TenantMapLis
|
||||
pub async fn attach_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
tenant_conf: TenantConfOpt,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), TenantMapInsertError> {
|
||||
tenant_map_insert(tenant_id, |vacant_entry| {
|
||||
let tenant_path = conf.tenant_path(&tenant_id);
|
||||
anyhow::ensure!(
|
||||
!tenant_path.exists(),
|
||||
"Cannot attach tenant {tenant_id}, local tenant directory already exists"
|
||||
);
|
||||
let tenant_dir = create_tenant_files(conf, tenant_conf, tenant_id, CreateTenantFilesMode::Attach)?;
|
||||
// TODO: tenant directory remains on disk if we bail out from here on.
|
||||
// See https://github.com/neondatabase/neon/issues/4233
|
||||
|
||||
let tenant = Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx);
|
||||
vacant_entry.insert(tenant);
|
||||
// Without the attach marker, schedule_local_tenant_processing will treat the attached tenant as fully attached
|
||||
let marker_file_exists = conf
|
||||
.tenant_attaching_mark_file_path(&tenant_id)
|
||||
.try_exists()
|
||||
.context("check for attach marker file existence")?;
|
||||
anyhow::ensure!(marker_file_exists, "create_tenant_files should have created the attach marker file");
|
||||
|
||||
let attached_tenant = schedule_local_tenant_processing(conf, &tenant_dir, Some(remote_storage), ctx)?;
|
||||
// TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here.
|
||||
// See https://github.com/neondatabase/neon/issues/4233
|
||||
|
||||
let attached_tenant_id = attached_tenant.tenant_id();
|
||||
anyhow::ensure!(
|
||||
tenant_id == attached_tenant_id,
|
||||
"loaded created tenant has unexpected tenant id (expect {tenant_id} != actual {attached_tenant_id})",
|
||||
);
|
||||
vacant_entry.insert(Arc::clone(&attached_tenant));
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -11,7 +11,7 @@ use tokio_util::sync::CancellationToken;
|
||||
use crate::context::RequestContext;
|
||||
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
|
||||
|
||||
use super::Tenant;
|
||||
use super::{LogicalSizeCalculationCause, Tenant};
|
||||
use crate::tenant::Timeline;
|
||||
use utils::id::TimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
@@ -126,6 +126,7 @@ pub(super) async fn gather_inputs(
|
||||
limit: &Arc<Semaphore>,
|
||||
max_retention_period: Option<u64>,
|
||||
logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
|
||||
cause: LogicalSizeCalculationCause,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<ModelInputs> {
|
||||
// refresh is needed to update gc related pitr_cutoff and horizon_cutoff
|
||||
@@ -318,7 +319,15 @@ pub(super) async fn gather_inputs(
|
||||
|
||||
// We left the 'size' field empty in all of the Segments so far.
|
||||
// Now find logical sizes for all of the points that might need or benefit from them.
|
||||
fill_logical_sizes(&timelines, &mut segments, limit, logical_size_cache, ctx).await?;
|
||||
fill_logical_sizes(
|
||||
&timelines,
|
||||
&mut segments,
|
||||
limit,
|
||||
logical_size_cache,
|
||||
cause,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(ModelInputs {
|
||||
segments,
|
||||
@@ -336,6 +345,7 @@ async fn fill_logical_sizes(
|
||||
segments: &mut [SegmentMeta],
|
||||
limit: &Arc<Semaphore>,
|
||||
logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
|
||||
cause: LogicalSizeCalculationCause,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
let timeline_hash: HashMap<TimelineId, Arc<Timeline>> = HashMap::from_iter(
|
||||
@@ -373,13 +383,17 @@ async fn fill_logical_sizes(
|
||||
let timeline = Arc::clone(timeline_hash.get(&timeline_id).unwrap());
|
||||
let parallel_size_calcs = Arc::clone(limit);
|
||||
let ctx = ctx.attached_child();
|
||||
joinset.spawn(calculate_logical_size(
|
||||
parallel_size_calcs,
|
||||
timeline,
|
||||
lsn,
|
||||
ctx,
|
||||
cancel.child_token(),
|
||||
));
|
||||
joinset.spawn(
|
||||
calculate_logical_size(
|
||||
parallel_size_calcs,
|
||||
timeline,
|
||||
lsn,
|
||||
cause,
|
||||
ctx,
|
||||
cancel.child_token(),
|
||||
)
|
||||
.in_current_span(),
|
||||
);
|
||||
}
|
||||
e.insert(cached_size);
|
||||
}
|
||||
@@ -482,6 +496,7 @@ async fn calculate_logical_size(
|
||||
limit: Arc<tokio::sync::Semaphore>,
|
||||
timeline: Arc<crate::tenant::Timeline>,
|
||||
lsn: utils::lsn::Lsn,
|
||||
cause: LogicalSizeCalculationCause,
|
||||
ctx: RequestContext,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<TimelineAtLsnSizeResult, RecvError> {
|
||||
@@ -490,7 +505,7 @@ async fn calculate_logical_size(
|
||||
.expect("global semaphore should not had been closed");
|
||||
|
||||
let size_res = timeline
|
||||
.spawn_ondemand_logical_size_calculation(lsn, ctx, cancel)
|
||||
.spawn_ondemand_logical_size_calculation(lsn, cause, ctx, cancel)
|
||||
.instrument(info_span!("spawn_ondemand_logical_size_calculation"))
|
||||
.await?;
|
||||
Ok(TimelineAtLsnSizeResult(timeline, lsn, size_res))
|
||||
|
||||
@@ -259,6 +259,7 @@ pub(crate) async fn random_init_delay(
|
||||
}
|
||||
}
|
||||
|
||||
/// Attention: the `task` and `period` beocme labels of a pageserver-wide prometheus metric.
|
||||
pub(crate) fn warn_when_period_overrun(elapsed: Duration, period: Duration, task: &str) {
|
||||
// Duration::ZERO will happen because it's the "disable [bgtask]" value.
|
||||
if elapsed >= period && period != Duration::ZERO {
|
||||
@@ -271,5 +272,8 @@ pub(crate) fn warn_when_period_overrun(elapsed: Duration, period: Duration, task
|
||||
task,
|
||||
"task iteration took longer than the configured period"
|
||||
);
|
||||
crate::metrics::BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT
|
||||
.with_label_values(&[task, &format!("{}", period.as_secs())])
|
||||
.inc();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -396,6 +396,9 @@ pub enum PageReconstructError {
|
||||
/// The operation was cancelled
|
||||
Cancelled,
|
||||
|
||||
/// The ancestor of this is being stopped
|
||||
AncestorStopping(TimelineId),
|
||||
|
||||
/// An error happened replaying WAL records
|
||||
#[error(transparent)]
|
||||
WalRedo(#[from] crate::walredo::WalRedoError),
|
||||
@@ -414,6 +417,9 @@ impl std::fmt::Debug for PageReconstructError {
|
||||
)
|
||||
}
|
||||
Self::Cancelled => write!(f, "cancelled"),
|
||||
Self::AncestorStopping(timeline_id) => {
|
||||
write!(f, "ancestor timeline {timeline_id} is being stopped")
|
||||
}
|
||||
Self::WalRedo(err) => err.fmt(f),
|
||||
}
|
||||
}
|
||||
@@ -432,11 +438,22 @@ impl std::fmt::Display for PageReconstructError {
|
||||
)
|
||||
}
|
||||
Self::Cancelled => write!(f, "cancelled"),
|
||||
Self::AncestorStopping(timeline_id) => {
|
||||
write!(f, "ancestor timeline {timeline_id} is being stopped")
|
||||
}
|
||||
Self::WalRedo(err) => err.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum LogicalSizeCalculationCause {
|
||||
Initial,
|
||||
ConsumptionMetricsSyntheticSize,
|
||||
EvictionTaskImitation,
|
||||
TenantSizeHandler,
|
||||
}
|
||||
|
||||
/// Public interface functions
|
||||
impl Timeline {
|
||||
/// Get the LSN where this branch was created
|
||||
@@ -926,6 +943,31 @@ impl Timeline {
|
||||
self.state.subscribe()
|
||||
}
|
||||
|
||||
pub async fn wait_to_become_active(
|
||||
&self,
|
||||
_ctx: &RequestContext, // Prepare for use by cancellation
|
||||
) -> Result<(), TimelineState> {
|
||||
let mut receiver = self.state.subscribe();
|
||||
loop {
|
||||
let current_state = *receiver.borrow_and_update();
|
||||
match current_state {
|
||||
TimelineState::Loading => {
|
||||
receiver
|
||||
.changed()
|
||||
.await
|
||||
.expect("holding a reference to self");
|
||||
}
|
||||
TimelineState::Active { .. } => {
|
||||
return Ok(());
|
||||
}
|
||||
TimelineState::Broken { .. } | TimelineState::Stopping => {
|
||||
// There's no chance the timeline can transition back into ::Active
|
||||
return Err(current_state);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn layer_map_info(&self, reset: LayerAccessStatsReset) -> LayerMapInfo {
|
||||
let layer_map = self.layers.read().unwrap();
|
||||
let mut in_memory_layers = Vec::with_capacity(layer_map.frozen_layers.len() + 1);
|
||||
@@ -1839,18 +1881,31 @@ impl Timeline {
|
||||
// to spawn_ondemand_logical_size_calculation.
|
||||
let cancel = CancellationToken::new();
|
||||
let calculated_size = match self_clone
|
||||
.logical_size_calculation_task(lsn, &background_ctx, cancel)
|
||||
.logical_size_calculation_task(lsn, LogicalSizeCalculationCause::Initial, &background_ctx, cancel)
|
||||
.await
|
||||
{
|
||||
Ok(s) => s,
|
||||
Err(CalculateLogicalSizeError::Cancelled) => {
|
||||
// Don't make noise, this is a common task.
|
||||
// In the unlikely case that there ihs another call to this function, we'll retry
|
||||
// In the unlikely case that there is another call to this function, we'll retry
|
||||
// because initial_logical_size is still None.
|
||||
info!("initial size calculation cancelled, likely timeline delete / tenant detach");
|
||||
return Ok(());
|
||||
}
|
||||
x @ Err(_) => x.context("Failed to calculate logical size")?,
|
||||
Err(CalculateLogicalSizeError::Other(err)) => {
|
||||
if let Some(e @ PageReconstructError::AncestorStopping(_)) =
|
||||
err.root_cause().downcast_ref()
|
||||
{
|
||||
// This can happen if the timeline parent timeline switches to
|
||||
// Stopping state while we're still calculating the initial
|
||||
// timeline size for the child, for example if the tenant is
|
||||
// being detached or the pageserver is shut down. Like with
|
||||
// CalculateLogicalSizeError::Cancelled, don't make noise.
|
||||
info!("initial size calculation failed because the timeline or its ancestor is Stopping, likely because the tenant is being detached: {e:#}");
|
||||
return Ok(());
|
||||
}
|
||||
return Err(err.context("Failed to calculate logical size"));
|
||||
}
|
||||
};
|
||||
|
||||
// we cannot query current_logical_size.current_size() to know the current
|
||||
@@ -1886,13 +1941,14 @@ impl Timeline {
|
||||
// so that we prevent future callers from spawning this task
|
||||
permit.forget();
|
||||
Ok(())
|
||||
},
|
||||
}.in_current_span(),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn spawn_ondemand_logical_size_calculation(
|
||||
self: &Arc<Self>,
|
||||
lsn: Lsn,
|
||||
cause: LogicalSizeCalculationCause,
|
||||
ctx: RequestContext,
|
||||
cancel: CancellationToken,
|
||||
) -> oneshot::Receiver<Result<u64, CalculateLogicalSizeError>> {
|
||||
@@ -1915,22 +1971,26 @@ impl Timeline {
|
||||
false,
|
||||
async move {
|
||||
let res = self_clone
|
||||
.logical_size_calculation_task(lsn, &ctx, cancel)
|
||||
.logical_size_calculation_task(lsn, cause, &ctx, cancel)
|
||||
.await;
|
||||
let _ = sender.send(res).ok();
|
||||
Ok(()) // Receiver is responsible for handling errors
|
||||
},
|
||||
}
|
||||
.in_current_span(),
|
||||
);
|
||||
receiver
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(tenant = %self.tenant_id, timeline = %self.timeline_id))]
|
||||
#[instrument(skip_all)]
|
||||
async fn logical_size_calculation_task(
|
||||
self: &Arc<Self>,
|
||||
lsn: Lsn,
|
||||
cause: LogicalSizeCalculationCause,
|
||||
ctx: &RequestContext,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<u64, CalculateLogicalSizeError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let mut timeline_state_updates = self.subscribe_for_state_updates();
|
||||
let self_calculation = Arc::clone(self);
|
||||
|
||||
@@ -1938,7 +1998,7 @@ impl Timeline {
|
||||
let cancel = cancel.child_token();
|
||||
let ctx = ctx.attached_child();
|
||||
self_calculation
|
||||
.calculate_logical_size(lsn, cancel, &ctx)
|
||||
.calculate_logical_size(lsn, cause, cancel, &ctx)
|
||||
.await
|
||||
});
|
||||
let timeline_state_cancellation = async {
|
||||
@@ -1993,6 +2053,7 @@ impl Timeline {
|
||||
pub async fn calculate_logical_size(
|
||||
&self,
|
||||
up_to_lsn: Lsn,
|
||||
cause: LogicalSizeCalculationCause,
|
||||
cancel: CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<u64, CalculateLogicalSizeError> {
|
||||
@@ -2026,7 +2087,15 @@ impl Timeline {
|
||||
if let Some(size) = self.current_logical_size.initialized_size(up_to_lsn) {
|
||||
return Ok(size);
|
||||
}
|
||||
let timer = self.metrics.logical_size_histo.start_timer();
|
||||
let storage_time_metrics = match cause {
|
||||
LogicalSizeCalculationCause::Initial
|
||||
| LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize
|
||||
| LogicalSizeCalculationCause::TenantSizeHandler => &self.metrics.logical_size_histo,
|
||||
LogicalSizeCalculationCause::EvictionTaskImitation => {
|
||||
&self.metrics.imitate_logical_size_histo
|
||||
}
|
||||
};
|
||||
let timer = storage_time_metrics.start_timer();
|
||||
let logical_size = self
|
||||
.get_current_logical_size_non_incremental(up_to_lsn, cancel, ctx)
|
||||
.await?;
|
||||
@@ -2218,6 +2287,46 @@ impl Timeline {
|
||||
Ok(timeline) => timeline,
|
||||
Err(e) => return Err(PageReconstructError::from(e)),
|
||||
};
|
||||
|
||||
// It's possible that the ancestor timeline isn't active yet, or
|
||||
// is active but hasn't yet caught up to the branch point. Wait
|
||||
// for it.
|
||||
//
|
||||
// This cannot happen while the pageserver is running normally,
|
||||
// because you cannot create a branch from a point that isn't
|
||||
// present in the pageserver yet. However, we don't wait for the
|
||||
// branch point to be uploaded to cloud storage before creating
|
||||
// a branch. I.e., the branch LSN need not be remote consistent
|
||||
// for the branching operation to succeed.
|
||||
//
|
||||
// Hence, if we try to load a tenant in such a state where
|
||||
// 1. the existence of the branch was persisted (in IndexPart and/or locally)
|
||||
// 2. but the ancestor state is behind branch_lsn because it was not yet persisted
|
||||
// then we will need to wait for the ancestor timeline to
|
||||
// re-stream WAL up to branch_lsn before we access it.
|
||||
//
|
||||
// How can a tenant get in such a state?
|
||||
// - ungraceful pageserver process exit
|
||||
// - detach+attach => this is a bug, https://github.com/neondatabase/neon/issues/4219
|
||||
//
|
||||
// NB: this could be avoided by requiring
|
||||
// branch_lsn >= remote_consistent_lsn
|
||||
// during branch creation.
|
||||
match ancestor.wait_to_become_active(ctx).await {
|
||||
Ok(()) => {}
|
||||
Err(state) if state == TimelineState::Stopping => {
|
||||
return Err(PageReconstructError::AncestorStopping(ancestor.timeline_id));
|
||||
}
|
||||
Err(state) => {
|
||||
return Err(PageReconstructError::Other(anyhow::anyhow!(
|
||||
"Timeline {} will not become active. Current state: {:?}",
|
||||
ancestor.timeline_id,
|
||||
&state,
|
||||
)));
|
||||
}
|
||||
}
|
||||
ancestor.wait_lsn(timeline.ancestor_lsn, ctx).await?;
|
||||
|
||||
timeline_owned = ancestor;
|
||||
timeline = &*timeline_owned;
|
||||
prev_lsn = Lsn(u64::MAX);
|
||||
@@ -4307,12 +4416,6 @@ pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {}
|
||||
pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {
|
||||
use utils::tracing_span_assert;
|
||||
|
||||
pub static TENANT_ID_EXTRACTOR: once_cell::sync::Lazy<
|
||||
tracing_span_assert::MultiNameExtractor<2>,
|
||||
> = once_cell::sync::Lazy::new(|| {
|
||||
tracing_span_assert::MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"])
|
||||
});
|
||||
|
||||
pub static TIMELINE_ID_EXTRACTOR: once_cell::sync::Lazy<
|
||||
tracing_span_assert::MultiNameExtractor<2>,
|
||||
> = once_cell::sync::Lazy::new(|| {
|
||||
@@ -4320,7 +4423,7 @@ pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {
|
||||
});
|
||||
|
||||
match tracing_span_assert::check_fields_present([
|
||||
&*TENANT_ID_EXTRACTOR,
|
||||
&*super::TENANT_ID_EXTRACTOR,
|
||||
&*TIMELINE_ID_EXTRACTOR,
|
||||
]) {
|
||||
Ok(()) => (),
|
||||
|
||||
@@ -30,7 +30,7 @@ use crate::{
|
||||
tenant::{
|
||||
config::{EvictionPolicy, EvictionPolicyLayerAccessThreshold},
|
||||
storage_layer::PersistentLayer,
|
||||
Tenant,
|
||||
LogicalSizeCalculationCause, Tenant,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -120,6 +120,13 @@ impl Timeline {
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
crate::tenant::tasks::warn_when_period_overrun(elapsed, p.period, "eviction");
|
||||
crate::metrics::EVICTION_ITERATION_DURATION
|
||||
.get_metric_with_label_values(&[
|
||||
&format!("{}", p.period.as_secs()),
|
||||
&format!("{}", p.threshold.as_secs()),
|
||||
])
|
||||
.unwrap()
|
||||
.observe(elapsed.as_secs_f64());
|
||||
ControlFlow::Continue(start + p.period)
|
||||
}
|
||||
}
|
||||
@@ -335,7 +342,12 @@ impl Timeline {
|
||||
|
||||
// imitiate on-restart initial logical size
|
||||
let size = self
|
||||
.calculate_logical_size(lsn, cancel.clone(), ctx)
|
||||
.calculate_logical_size(
|
||||
lsn,
|
||||
LogicalSizeCalculationCause::EvictionTaskImitation,
|
||||
cancel.clone(),
|
||||
ctx,
|
||||
)
|
||||
.instrument(info_span!("calculate_logical_size"))
|
||||
.await;
|
||||
|
||||
@@ -407,9 +419,15 @@ impl Timeline {
|
||||
.inner();
|
||||
|
||||
let mut throwaway_cache = HashMap::new();
|
||||
let gather =
|
||||
crate::tenant::size::gather_inputs(tenant, limit, None, &mut throwaway_cache, ctx)
|
||||
.instrument(info_span!("gather_inputs"));
|
||||
let gather = crate::tenant::size::gather_inputs(
|
||||
tenant,
|
||||
limit,
|
||||
None,
|
||||
&mut throwaway_cache,
|
||||
LogicalSizeCalculationCause::EvictionTaskImitation,
|
||||
ctx,
|
||||
)
|
||||
.instrument(info_span!("gather_inputs"));
|
||||
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => {}
|
||||
|
||||
@@ -28,8 +28,8 @@ use storage_broker::proto::SubscribeSafekeeperInfoRequest;
|
||||
use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use storage_broker::Streaming;
|
||||
use tokio::select;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::{select, sync::watch};
|
||||
use tracing::*;
|
||||
|
||||
use crate::{exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};
|
||||
@@ -50,13 +50,13 @@ pub(super) async fn connection_manager_loop_step(
|
||||
ctx: &RequestContext,
|
||||
manager_status: &RwLock<Option<ConnectionManagerStatus>>,
|
||||
) -> ControlFlow<(), ()> {
|
||||
let mut timeline_state_updates = connection_manager_state
|
||||
match connection_manager_state
|
||||
.timeline
|
||||
.subscribe_for_state_updates();
|
||||
|
||||
match wait_for_active_timeline(&mut timeline_state_updates).await {
|
||||
ControlFlow::Continue(()) => {}
|
||||
ControlFlow::Break(()) => {
|
||||
.wait_to_become_active(ctx)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {}
|
||||
Err(_) => {
|
||||
info!("Timeline dropped state updates sender before becoming active, stopping wal connection manager loop");
|
||||
return ControlFlow::Break(());
|
||||
}
|
||||
@@ -72,6 +72,10 @@ pub(super) async fn connection_manager_loop_step(
|
||||
timeline_id: connection_manager_state.timeline.timeline_id,
|
||||
};
|
||||
|
||||
let mut timeline_state_updates = connection_manager_state
|
||||
.timeline
|
||||
.subscribe_for_state_updates();
|
||||
|
||||
// Subscribe to the broker updates. Stream shares underlying TCP connection
|
||||
// with other streams on this client (other connection managers). When
|
||||
// object goes out of scope, stream finishes in drop() automatically.
|
||||
@@ -195,34 +199,6 @@ pub(super) async fn connection_manager_loop_step(
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_active_timeline(
|
||||
timeline_state_updates: &mut watch::Receiver<TimelineState>,
|
||||
) -> ControlFlow<(), ()> {
|
||||
let current_state = *timeline_state_updates.borrow();
|
||||
if current_state == TimelineState::Active {
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
|
||||
loop {
|
||||
match timeline_state_updates.changed().await {
|
||||
Ok(()) => {
|
||||
let new_state = *timeline_state_updates.borrow();
|
||||
match new_state {
|
||||
TimelineState::Active => {
|
||||
debug!("Timeline state changed to active, continuing the walreceiver connection manager");
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
state => {
|
||||
debug!("Not running the walreceiver connection manager, timeline is not active: {state:?}");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_sender_dropped_error) => return ControlFlow::Break(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Endlessly try to subscribe for broker updates for a given timeline.
|
||||
async fn subscribe_for_timeline_updates(
|
||||
broker_client: &mut BrokerClientChannel,
|
||||
|
||||
@@ -305,6 +305,15 @@ impl<'a> WalIngest<'a> {
|
||||
self.checkpoint_modified = true;
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_LOGICALMSG_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
if info == pg_constants::XLOG_LOGICAL_MESSAGE {
|
||||
// This is a convenient way to make the WAL ingestion pause at
|
||||
// particular point in the WAL. For more fine-grained control,
|
||||
// we could peek into the message and only pause if it contains
|
||||
// a particular string, for example, but this is enough for now.
|
||||
utils::failpoint_sleep_millis_async!("wal-ingest-logical-message-sleep");
|
||||
}
|
||||
}
|
||||
|
||||
// Iterate through all the blocks that the record modifies, and
|
||||
|
||||
@@ -192,8 +192,9 @@ retry:
|
||||
{
|
||||
if (!PQconsumeInput(pageserver_conn))
|
||||
{
|
||||
neon_log(LOG, "could not get response from pageserver: %s",
|
||||
PQerrorMessage(pageserver_conn));
|
||||
char *msg = pchomp(PQerrorMessage(pageserver_conn));
|
||||
neon_log(LOG, "could not get response from pageserver: %s", msg);
|
||||
pfree(msg);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -343,7 +344,7 @@ pageserver_receive(void)
|
||||
resp = NULL;
|
||||
}
|
||||
else if (rc == -2)
|
||||
neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn));
|
||||
neon_log(ERROR, "could not read COPY data: %s", pchomp(PQerrorMessage(pageserver_conn)));
|
||||
else
|
||||
neon_log(ERROR, "unexpected PQgetCopyData return value: %d", rc);
|
||||
}
|
||||
@@ -367,7 +368,7 @@ pageserver_flush(void)
|
||||
}
|
||||
else if (PQflush(pageserver_conn))
|
||||
{
|
||||
char *msg = PQerrorMessage(pageserver_conn);
|
||||
char *msg = pchomp(PQerrorMessage(pageserver_conn));
|
||||
|
||||
pageserver_disconnect();
|
||||
neon_log(ERROR, "failed to flush page requests: %s", msg);
|
||||
|
||||
@@ -52,7 +52,7 @@ typedef struct
|
||||
#define NEON_TAG "[NEON_SMGR] "
|
||||
#define neon_log(tag, fmt, ...) ereport(tag, \
|
||||
(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
|
||||
errhidestmt(true), errhidecontext(true), internalerrposition(0)))
|
||||
errhidestmt(true), errhidecontext(true), errposition(0), internalerrposition(0)))
|
||||
|
||||
/*
|
||||
* supertype of all the Neon*Request structs below
|
||||
|
||||
@@ -7,6 +7,7 @@ mod credentials;
|
||||
pub use credentials::ClientCredentials;
|
||||
|
||||
mod password_hack;
|
||||
pub use password_hack::parse_endpoint_param;
|
||||
use password_hack::PasswordHackPayload;
|
||||
|
||||
mod flow;
|
||||
@@ -44,10 +45,10 @@ pub enum AuthErrorImpl {
|
||||
#[error(
|
||||
"Endpoint ID is not specified. \
|
||||
Either please upgrade the postgres client library (libpq) for SNI support \
|
||||
or pass the endpoint ID (first part of the domain name) as a parameter: '?options=project%3D<endpoint-id>'. \
|
||||
or pass the endpoint ID (first part of the domain name) as a parameter: '?options=endpoint%3D<endpoint-id>'. \
|
||||
See more at https://neon.tech/sni"
|
||||
)]
|
||||
MissingProjectName,
|
||||
MissingEndpointName,
|
||||
|
||||
#[error("password authentication failed for user '{0}'")]
|
||||
AuthFailed(Box<str>),
|
||||
@@ -88,7 +89,7 @@ impl UserFacingError for AuthError {
|
||||
AuthFailed(_) => self.to_string(),
|
||||
BadAuthMethod(_) => self.to_string(),
|
||||
MalformedPassword(_) => self.to_string(),
|
||||
MissingProjectName => self.to_string(),
|
||||
MissingEndpointName => self.to_string(),
|
||||
Io(_) => "Internal error".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,8 +52,8 @@ pub async fn password_hack(
|
||||
.authenticate()
|
||||
.await?;
|
||||
|
||||
info!(project = &payload.project, "received missing parameter");
|
||||
creds.project = Some(payload.project);
|
||||
info!(project = &payload.endpoint, "received missing parameter");
|
||||
creds.project = Some(payload.endpoint);
|
||||
|
||||
let mut node = api.wake_compute(extra, creds).await?;
|
||||
node.config.password(payload.password);
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
//! User credentials used in authentication.
|
||||
|
||||
use crate::error::UserFacingError;
|
||||
use crate::{auth::password_hack::parse_endpoint_param, error::UserFacingError};
|
||||
use itertools::Itertools;
|
||||
use pq_proto::StartupMessageParams;
|
||||
use std::collections::HashSet;
|
||||
use thiserror::Error;
|
||||
@@ -61,7 +62,15 @@ impl<'a> ClientCredentials<'a> {
|
||||
// Project name might be passed via PG's command-line options.
|
||||
let project_option = params
|
||||
.options_raw()
|
||||
.and_then(|mut options| options.find_map(|opt| opt.strip_prefix("project=")))
|
||||
.and_then(|options| {
|
||||
// We support both `project` (deprecated) and `endpoint` options for backward compatibility.
|
||||
// However, if both are present, we don't exactly know which one to use.
|
||||
// Therefore we require that only one of them is present.
|
||||
options
|
||||
.filter_map(parse_endpoint_param)
|
||||
.at_most_one()
|
||||
.ok()?
|
||||
})
|
||||
.map(|name| name.to_string());
|
||||
|
||||
let project_from_domain = if let Some(sni_str) = sni {
|
||||
@@ -177,6 +186,51 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_endpoint_from_options() -> anyhow::Result<()> {
|
||||
let options = StartupMessageParams::new([
|
||||
("user", "john_doe"),
|
||||
("options", "-ckey=1 endpoint=bar -c geqo=off"),
|
||||
]);
|
||||
|
||||
let creds = ClientCredentials::parse(&options, None, None)?;
|
||||
assert_eq!(creds.user, "john_doe");
|
||||
assert_eq!(creds.project.as_deref(), Some("bar"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_three_endpoints_from_options() -> anyhow::Result<()> {
|
||||
let options = StartupMessageParams::new([
|
||||
("user", "john_doe"),
|
||||
(
|
||||
"options",
|
||||
"-ckey=1 endpoint=one endpoint=two endpoint=three -c geqo=off",
|
||||
),
|
||||
]);
|
||||
|
||||
let creds = ClientCredentials::parse(&options, None, None)?;
|
||||
assert_eq!(creds.user, "john_doe");
|
||||
assert!(creds.project.is_none());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_when_endpoint_and_project_are_in_options() -> anyhow::Result<()> {
|
||||
let options = StartupMessageParams::new([
|
||||
("user", "john_doe"),
|
||||
("options", "-ckey=1 endpoint=bar project=foo -c geqo=off"),
|
||||
]);
|
||||
|
||||
let creds = ClientCredentials::parse(&options, None, None)?;
|
||||
assert_eq!(creds.user, "john_doe");
|
||||
assert!(creds.project.is_none());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_projects_identical() -> anyhow::Result<()> {
|
||||
let options = StartupMessageParams::new([("user", "john_doe"), ("options", "project=baz")]);
|
||||
|
||||
@@ -91,7 +91,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, PasswordHack> {
|
||||
// the user neither enabled SNI nor resorted to any other method
|
||||
// for passing the project name we rely on. We should show them
|
||||
// the most helpful error message and point to the documentation.
|
||||
.ok_or(AuthErrorImpl::MissingProjectName)?;
|
||||
.ok_or(AuthErrorImpl::MissingEndpointName)?;
|
||||
|
||||
Ok(payload)
|
||||
}
|
||||
|
||||
@@ -6,27 +6,55 @@
|
||||
use bstr::ByteSlice;
|
||||
|
||||
pub struct PasswordHackPayload {
|
||||
pub project: String,
|
||||
pub endpoint: String,
|
||||
pub password: Vec<u8>,
|
||||
}
|
||||
|
||||
impl PasswordHackPayload {
|
||||
pub fn parse(bytes: &[u8]) -> Option<Self> {
|
||||
// The format is `project=<utf-8>;<password-bytes>`.
|
||||
let mut iter = bytes.strip_prefix(b"project=")?.splitn_str(2, ";");
|
||||
let project = iter.next()?.to_str().ok()?.to_owned();
|
||||
let mut iter = bytes.splitn_str(2, ";");
|
||||
let endpoint = iter.next()?.to_str().ok()?;
|
||||
let endpoint = parse_endpoint_param(endpoint)?.to_owned();
|
||||
let password = iter.next()?.to_owned();
|
||||
|
||||
Some(Self { project, password })
|
||||
Some(Self { endpoint, password })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_endpoint_param(bytes: &str) -> Option<&str> {
|
||||
bytes
|
||||
.strip_prefix("project=")
|
||||
.or_else(|| bytes.strip_prefix("endpoint="))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_password_hack_payload() {
|
||||
fn parse_endpoint_param_fn() {
|
||||
let input = "";
|
||||
assert!(parse_endpoint_param(input).is_none());
|
||||
|
||||
let input = "project=";
|
||||
assert_eq!(parse_endpoint_param(input), Some(""));
|
||||
|
||||
let input = "project=foobar";
|
||||
assert_eq!(parse_endpoint_param(input), Some("foobar"));
|
||||
|
||||
let input = "endpoint=";
|
||||
assert_eq!(parse_endpoint_param(input), Some(""));
|
||||
|
||||
let input = "endpoint=foobar";
|
||||
assert_eq!(parse_endpoint_param(input), Some("foobar"));
|
||||
|
||||
let input = "other_option=foobar";
|
||||
assert!(parse_endpoint_param(input).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_password_hack_payload_project() {
|
||||
let bytes = b"";
|
||||
assert!(PasswordHackPayload::parse(bytes).is_none());
|
||||
|
||||
@@ -34,13 +62,33 @@ mod tests {
|
||||
assert!(PasswordHackPayload::parse(bytes).is_none());
|
||||
|
||||
let bytes = b"project=;";
|
||||
let payload = PasswordHackPayload::parse(bytes).expect("parsing failed");
|
||||
assert_eq!(payload.project, "");
|
||||
let payload: PasswordHackPayload =
|
||||
PasswordHackPayload::parse(bytes).expect("parsing failed");
|
||||
assert_eq!(payload.endpoint, "");
|
||||
assert_eq!(payload.password, b"");
|
||||
|
||||
let bytes = b"project=foobar;pass;word";
|
||||
let payload = PasswordHackPayload::parse(bytes).expect("parsing failed");
|
||||
assert_eq!(payload.project, "foobar");
|
||||
assert_eq!(payload.endpoint, "foobar");
|
||||
assert_eq!(payload.password, b"pass;word");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_password_hack_payload_endpoint() {
|
||||
let bytes = b"";
|
||||
assert!(PasswordHackPayload::parse(bytes).is_none());
|
||||
|
||||
let bytes = b"endpoint=";
|
||||
assert!(PasswordHackPayload::parse(bytes).is_none());
|
||||
|
||||
let bytes = b"endpoint=;";
|
||||
let payload = PasswordHackPayload::parse(bytes).expect("parsing failed");
|
||||
assert_eq!(payload.endpoint, "");
|
||||
assert_eq!(payload.password, b"");
|
||||
|
||||
let bytes = b"endpoint=foobar;pass;word";
|
||||
let payload = PasswordHackPayload::parse(bytes).expect("parsing failed");
|
||||
assert_eq!(payload.endpoint, "foobar");
|
||||
assert_eq!(payload.password, b"pass;word");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::{cancellation::CancelClosure, error::UserFacingError};
|
||||
use crate::{auth::parse_endpoint_param, cancellation::CancelClosure, error::UserFacingError};
|
||||
use futures::{FutureExt, TryFutureExt};
|
||||
use itertools::Itertools;
|
||||
use pq_proto::StartupMessageParams;
|
||||
@@ -279,7 +279,7 @@ fn filtered_options(params: &StartupMessageParams) -> Option<String> {
|
||||
#[allow(unstable_name_collisions)]
|
||||
let options: String = params
|
||||
.options_raw()?
|
||||
.filter(|opt| !opt.starts_with("project="))
|
||||
.filter(|opt| parse_endpoint_param(opt).is_none())
|
||||
.intersperse(" ") // TODO: use impl from std once it's stabilized
|
||||
.collect();
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ FLAKY_TESTS_QUERY = """
|
||||
jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'name' as suite,
|
||||
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'name' as test,
|
||||
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'status' as status,
|
||||
jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'retriesStatusChange' as retries_status_change,
|
||||
to_timestamp((jsonb_array_elements(jsonb_array_elements(jsonb_array_elements(data -> 'children') -> 'children') -> 'children') -> 'time' -> 'start')::bigint / 1000)::date as timestamp
|
||||
FROM
|
||||
regress_test_results
|
||||
@@ -29,7 +30,7 @@ FLAKY_TESTS_QUERY = """
|
||||
) data
|
||||
WHERE
|
||||
timestamp > CURRENT_DATE - INTERVAL '%s' day
|
||||
AND status::text IN ('"failed"', '"broken"')
|
||||
AND (status::text IN ('"failed"', '"broken"') OR retries_status_change::boolean)
|
||||
;
|
||||
"""
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
//
|
||||
// The script parses Allure reports and posts a comment with a summary of the test results to the PR.
|
||||
// It accepts an array of items and creates a comment with a summary for each one (for "release" and "debug", together or separately if any of them failed to be generated).
|
||||
//
|
||||
// The comment is updated on each run with the latest results.
|
||||
//
|
||||
@@ -13,19 +12,35 @@
|
||||
// github,
|
||||
// context,
|
||||
// fetch,
|
||||
// reports: [{...}, ...], // each report is expected to have "buildType", "reportUrl", and "jsonUrl" properties
|
||||
// report: {
|
||||
// reportUrl: "...",
|
||||
// reportJsonUrl: "...",
|
||||
// },
|
||||
// })
|
||||
//
|
||||
|
||||
module.exports = async ({ github, context, fetch, reports }) => {
|
||||
// Analog of Python's defaultdict.
|
||||
//
|
||||
// const dm = new DefaultMap(() => new DefaultMap(() => []))
|
||||
// dm["firstKey"]["secondKey"].push("value")
|
||||
//
|
||||
class DefaultMap extends Map {
|
||||
constructor(getDefaultValue) {
|
||||
return new Proxy({}, {
|
||||
get: (target, name) => name in target ? target[name] : (target[name] = getDefaultValue(name))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = async ({ github, context, fetch, report }) => {
|
||||
// Marker to find the comment in the subsequent runs
|
||||
const startMarker = `<!--AUTOMATIC COMMENT START #${context.payload.number}-->`
|
||||
// Let users know that the comment is updated automatically
|
||||
const autoupdateNotice = `<div align="right"><sub>The comment gets automatically updated with the latest test results<br>${context.payload.pull_request.head.sha} at ${new Date().toISOString()} :recycle:</sub></div>`
|
||||
// GitHub bot id taken from (https://api.github.com/users/github-actions[bot])
|
||||
const githubActionsBotId = 41898282
|
||||
// The latest commit in the PR URL
|
||||
const commitUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/pull/${context.payload.number}/commits/${context.payload.pull_request.head.sha}`
|
||||
// Commend body itself
|
||||
let commentBody = `${startMarker}\n### Test results for ${commitUrl}:\n___\n`
|
||||
let commentBody = `${startMarker}\n`
|
||||
|
||||
// Common parameters for GitHub API requests
|
||||
const ownerRepoParams = {
|
||||
@@ -33,76 +48,124 @@ module.exports = async ({ github, context, fetch, reports }) => {
|
||||
repo: context.repo.repo,
|
||||
}
|
||||
|
||||
for (const report of reports) {
|
||||
const {buildType, reportUrl, jsonUrl} = report
|
||||
const {reportUrl, reportJsonUrl} = report
|
||||
|
||||
if (!reportUrl || !jsonUrl) {
|
||||
commentBody += `#### ${buildType} build: no tests were run or test report is not available\n`
|
||||
continue
|
||||
}
|
||||
|
||||
const suites = await (await fetch(jsonUrl)).json()
|
||||
|
||||
// Allure distinguishes "failed" (with an assertion error) and "broken" (with any other error) tests.
|
||||
// For this report it's ok to treat them in the same way (as failed).
|
||||
failedTests = []
|
||||
passedTests = []
|
||||
skippedTests = []
|
||||
|
||||
retriedTests = []
|
||||
retriedStatusChangedTests = []
|
||||
|
||||
for (const parentSuite of suites.children) {
|
||||
for (const suite of parentSuite.children) {
|
||||
for (const test of suite.children) {
|
||||
pytestName = `${parentSuite.name.replace(".", "/")}/${suite.name}.py::${test.name}`
|
||||
test.pytestName = pytestName
|
||||
|
||||
if (test.status === "passed") {
|
||||
passedTests.push(test);
|
||||
} else if (test.status === "failed" || test.status === "broken") {
|
||||
failedTests.push(test);
|
||||
} else if (test.status === "skipped") {
|
||||
skippedTests.push(test);
|
||||
}
|
||||
|
||||
if (test.retriesCount > 0) {
|
||||
retriedTests.push(test);
|
||||
|
||||
if (test.retriedStatusChangedTests) {
|
||||
retriedStatusChangedTests.push(test);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const totalTestsCount = failedTests.length + passedTests.length + skippedTests.length
|
||||
commentBody += `#### ${buildType} build: ${totalTestsCount} tests run: ${passedTests.length} passed, ${failedTests.length} failed, ${skippedTests.length} skipped ([full report](${reportUrl}))\n`
|
||||
if (failedTests.length > 0) {
|
||||
commentBody += `Failed tests:\n`
|
||||
for (const test of failedTests) {
|
||||
const allureLink = `${reportUrl}#suites/${test.parentUid}/${test.uid}`
|
||||
|
||||
commentBody += `- [\`${test.pytestName}\`](${allureLink})`
|
||||
if (test.retriesCount > 0) {
|
||||
commentBody += ` (ran [${test.retriesCount + 1} times](${allureLink}/retries))`
|
||||
}
|
||||
commentBody += "\n"
|
||||
}
|
||||
commentBody += "\n"
|
||||
}
|
||||
if (retriedStatusChangedTests > 0) {
|
||||
commentBody += `Flaky tests:\n`
|
||||
for (const test of retriedStatusChangedTests) {
|
||||
const status = test.status === "passed" ? ":white_check_mark:" : ":x:"
|
||||
commentBody += `- ${status} [\`${test.pytestName}\`](${reportUrl}#suites/${test.parentUid}/${test.uid}/retries)\n`
|
||||
}
|
||||
commentBody += "\n"
|
||||
}
|
||||
commentBody += "___\n"
|
||||
if (!reportUrl || !reportJsonUrl) {
|
||||
commentBody += `#### No tests were run or test report is not available\n`
|
||||
commentBody += autoupdateNotice
|
||||
return
|
||||
}
|
||||
|
||||
const suites = await (await fetch(reportJsonUrl)).json()
|
||||
|
||||
// Allure distinguishes "failed" (with an assertion error) and "broken" (with any other error) tests.
|
||||
// For this report it's ok to treat them in the same way (as failed).
|
||||
const failedTests = new DefaultMap(() => new DefaultMap(() => []))
|
||||
const passedTests = new DefaultMap(() => new DefaultMap(() => []))
|
||||
const skippedTests = new DefaultMap(() => new DefaultMap(() => []))
|
||||
const retriedTests = new DefaultMap(() => new DefaultMap(() => []))
|
||||
const flakyTests = new DefaultMap(() => new DefaultMap(() => []))
|
||||
|
||||
let failedTestsCount = 0
|
||||
let passedTestsCount = 0
|
||||
let skippedTestsCount = 0
|
||||
let flakyTestsCount = 0
|
||||
|
||||
const pgVersions = new Set()
|
||||
|
||||
for (const parentSuite of suites.children) {
|
||||
for (const suite of parentSuite.children) {
|
||||
for (const test of suite.children) {
|
||||
let buildType, pgVersion
|
||||
const match = test.name.match(/[\[-](?<buildType>debug|release)-pg(?<pgVersion>\d+)[-\]]/)?.groups
|
||||
if (match) {
|
||||
({buildType, pgVersion} = match)
|
||||
} else {
|
||||
// It's ok, we embed BUILD_TYPE and Postgres Version into the test name only for regress suite and do not for other suites (like performance).
|
||||
console.info(`Cannot get BUILD_TYPE and Postgres Version from test name: "${test.name}", defaulting to "release" and "14"`)
|
||||
|
||||
buildType = "release"
|
||||
pgVersion = "14"
|
||||
}
|
||||
|
||||
pgVersions.add(pgVersion)
|
||||
|
||||
// Removing build type and PostgreSQL version from the test name to make it shorter
|
||||
const testName = test.name.replace(new RegExp(`${buildType}-pg${pgVersion}-?`), "").replace("[]", "")
|
||||
test.pytestName = `${parentSuite.name.replace(".", "/")}/${suite.name}.py::${testName}`
|
||||
test.pgVersion = pgVersion
|
||||
test.buildType = buildType
|
||||
|
||||
if (test.status === "passed") {
|
||||
passedTests[pgVersion][testName].push(test)
|
||||
passedTestsCount += 1
|
||||
} else if (test.status === "failed" || test.status === "broken") {
|
||||
failedTests[pgVersion][testName].push(test)
|
||||
failedTestsCount += 1
|
||||
} else if (test.status === "skipped") {
|
||||
skippedTests[pgVersion][testName].push(test)
|
||||
skippedTestsCount += 1
|
||||
}
|
||||
|
||||
if (test.retriesCount > 0) {
|
||||
retriedTests[pgVersion][testName].push(test)
|
||||
|
||||
if (test.retriesStatusChange) {
|
||||
flakyTests[pgVersion][testName].push(test)
|
||||
flakyTestsCount += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const totalTestsCount = failedTestsCount + passedTestsCount + skippedTestsCount
|
||||
commentBody += `### ${totalTestsCount} tests run: ${passedTestsCount} passed, ${failedTestsCount} failed, ${skippedTestsCount} skipped ([full report](${reportUrl}))\n___\n`
|
||||
|
||||
// Print test resuls from the newest to the oldest Postgres version for release and debug builds.
|
||||
for (const pgVersion of Array.from(pgVersions).sort().reverse()) {
|
||||
if (Object.keys(failedTests[pgVersion]).length > 0) {
|
||||
commentBody += `#### Failures on Posgres ${pgVersion}\n\n`
|
||||
for (const [testName, tests] of Object.entries(failedTests[pgVersion])) {
|
||||
const links = []
|
||||
for (const test of tests) {
|
||||
const allureLink = `${reportUrl}#suites/${test.parentUid}/${test.uid}`
|
||||
links.push(`[${test.buildType}](${allureLink})`)
|
||||
}
|
||||
commentBody += `- \`${testName}\`: ${links.join(", ")}\n`
|
||||
}
|
||||
|
||||
const testsToRerun = Object.values(failedTests[pgVersion]).map(x => x[0].name)
|
||||
const command = `DEFAULT_PG_VERSION=${pgVersion} scripts/pytest -k "${testsToRerun.join(" or ")}"`
|
||||
|
||||
commentBody += "```\n"
|
||||
commentBody += `# Run failed on Postgres ${pgVersion} tests locally:\n`
|
||||
commentBody += `${command}\n`
|
||||
commentBody += "```\n"
|
||||
}
|
||||
}
|
||||
|
||||
if (flakyTestsCount > 0) {
|
||||
commentBody += `<details>\n<summary>Flaky tests (${flakyTestsCount})</summary>\n\n`
|
||||
for (const pgVersion of Array.from(pgVersions).sort().reverse()) {
|
||||
if (Object.keys(flakyTests[pgVersion]).length > 0) {
|
||||
commentBody += `#### Postgres ${pgVersion}\n\n`
|
||||
for (const [testName, tests] of Object.entries(flakyTests[pgVersion])) {
|
||||
const links = []
|
||||
for (const test of tests) {
|
||||
const allureLink = `${reportUrl}#suites/${test.parentUid}/${test.uid}/retries`
|
||||
const status = test.status === "passed" ? ":white_check_mark:" : ":x:"
|
||||
links.push(`[${status} ${test.buildType}](${allureLink})`)
|
||||
}
|
||||
commentBody += `- \`${testName}\`: ${links.join(", ")}\n`
|
||||
}
|
||||
}
|
||||
}
|
||||
commentBody += "\n</details>\n"
|
||||
}
|
||||
|
||||
commentBody += autoupdateNotice
|
||||
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
issue_number: context.payload.number,
|
||||
...ownerRepoParams,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
pytest_plugins = (
|
||||
"fixtures.pg_version",
|
||||
"fixtures.allure",
|
||||
"fixtures.neon_fixtures",
|
||||
"fixtures.benchmark_fixture",
|
||||
"fixtures.pg_stats",
|
||||
|
||||
25
test_runner/fixtures/allure.py
Normal file
25
test_runner/fixtures/allure.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from fixtures.pg_version import DEFAULT_VERSION, PgVersion
|
||||
|
||||
"""
|
||||
Set of utilities to make Allure report more informative.
|
||||
|
||||
- It adds BUILD_TYPE and DEFAULT_PG_VERSION to the test names (only in test_runner/regress)
|
||||
to make tests distinguishable in Allure report.
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def allure_noop():
|
||||
pass
|
||||
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
if "test_runner/regress" in metafunc.definition._nodeid:
|
||||
build_type = os.environ.get("BUILD_TYPE", "DEBUG").lower()
|
||||
pg_version = PgVersion(os.environ.get("DEFAULT_PG_VERSION", DEFAULT_VERSION))
|
||||
|
||||
metafunc.parametrize("allure_noop", [f"{build_type}-pg{pg_version}"])
|
||||
@@ -451,13 +451,17 @@ def pytest_terminal_summary(
|
||||
revision = os.getenv("GITHUB_SHA", "local")
|
||||
platform = os.getenv("PLATFORM", "local")
|
||||
|
||||
terminalreporter.section("Benchmark results", "-")
|
||||
is_header_printed = False
|
||||
|
||||
result = []
|
||||
for test_report in terminalreporter.stats.get("passed", []):
|
||||
result_entry = []
|
||||
|
||||
for _, recorded_property in test_report.user_properties:
|
||||
if not is_header_printed:
|
||||
terminalreporter.section("Benchmark results", "-")
|
||||
is_header_printed = True
|
||||
|
||||
terminalreporter.write(
|
||||
"{}.{}: ".format(test_report.head_line, recorded_property["name"])
|
||||
)
|
||||
@@ -485,7 +489,6 @@ def pytest_terminal_summary(
|
||||
|
||||
out_dir = config.getoption("out_dir")
|
||||
if out_dir is None:
|
||||
warnings.warn("no out dir provided to store performance test results")
|
||||
return
|
||||
|
||||
if not result:
|
||||
|
||||
@@ -149,11 +149,16 @@ class PageserverHttpClient(requests.Session):
|
||||
assert isinstance(res_json, list)
|
||||
return res_json
|
||||
|
||||
def tenant_create(self, new_tenant_id: Optional[TenantId] = None) -> TenantId:
|
||||
def tenant_create(
|
||||
self, new_tenant_id: Optional[TenantId] = None, conf: Optional[Dict[str, Any]] = None
|
||||
) -> TenantId:
|
||||
if conf is not None:
|
||||
assert "new_tenant_id" not in conf.keys()
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant",
|
||||
json={
|
||||
"new_tenant_id": str(new_tenant_id) if new_tenant_id else None,
|
||||
**(conf or {}),
|
||||
},
|
||||
)
|
||||
self.verbose_error(res)
|
||||
@@ -272,6 +277,7 @@ class PageserverHttpClient(requests.Session):
|
||||
new_timeline_id: Optional[TimelineId] = None,
|
||||
ancestor_timeline_id: Optional[TimelineId] = None,
|
||||
ancestor_start_lsn: Optional[Lsn] = None,
|
||||
**kwargs,
|
||||
) -> Dict[Any, Any]:
|
||||
body: Dict[str, Any] = {
|
||||
"new_timeline_id": str(new_timeline_id) if new_timeline_id else None,
|
||||
@@ -281,7 +287,9 @@ class PageserverHttpClient(requests.Session):
|
||||
if pg_version != PgVersion.NOT_SET:
|
||||
body["pg_version"] = int(pg_version)
|
||||
|
||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline", json=body)
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline", json=body, **kwargs
|
||||
)
|
||||
self.verbose_error(res)
|
||||
if res.status_code == 409:
|
||||
raise Exception(f"could not create timeline: already exists for id {new_timeline_id}")
|
||||
|
||||
@@ -46,6 +46,20 @@ class PgVersion(str, enum.Enum):
|
||||
DEFAULT_VERSION: PgVersion = PgVersion.V14
|
||||
|
||||
|
||||
def skip_on_postgres(version: PgVersion, reason: str):
|
||||
return pytest.mark.skipif(
|
||||
PgVersion(os.environ.get("DEFAULT_PG_VERSION", DEFAULT_VERSION)) is version,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
|
||||
def xfail_on_postgres(version: PgVersion, reason: str):
|
||||
return pytest.mark.xfail(
|
||||
PgVersion(os.environ.get("DEFAULT_PG_VERSION", DEFAULT_VERSION)) is version,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
|
||||
def pytest_addoption(parser: Parser):
|
||||
parser.addoption(
|
||||
"--pg-version",
|
||||
|
||||
@@ -16,7 +16,7 @@ from fixtures.neon_fixtures import (
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.pg_version import PgVersion, skip_on_postgres
|
||||
from fixtures.types import Lsn
|
||||
from pytest import FixtureRequest
|
||||
|
||||
@@ -41,12 +41,15 @@ check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif(
|
||||
)
|
||||
|
||||
|
||||
# Note: if renaming this test, don't forget to update a reference to it in a workflow file:
|
||||
# "Upload compatibility snapshot" step in .github/actions/run-python-test-set/action.yml
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@skip_on_postgres(PgVersion.V15, "Compatibility tests doesn't support Postgres 15 yet")
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(before="test_forward_compatibility")
|
||||
def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_output_dir: Path):
|
||||
def test_create_snapshot(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
pg_bin: PgBin,
|
||||
top_output_dir: Path,
|
||||
test_output_dir: Path,
|
||||
):
|
||||
# The test doesn't really test anything
|
||||
# it creates a new snapshot for releases after we tested the current version against the previous snapshot in `test_backward_compatibility`.
|
||||
#
|
||||
@@ -86,10 +89,14 @@ def test_create_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_o
|
||||
sk.stop()
|
||||
env.pageserver.stop()
|
||||
|
||||
shutil.copytree(test_output_dir, test_output_dir / "compatibility_snapshot_pg14")
|
||||
# Directory `test_output_dir / "compatibility_snapshot_pg14"` is uploaded to S3 in a workflow, keep the name in sync with it
|
||||
# Directory `compatibility_snapshot_dir` is uploaded to S3 in a workflow, keep the name in sync with it
|
||||
compatibility_snapshot_dir = top_output_dir / "compatibility_snapshot_pg14"
|
||||
if compatibility_snapshot_dir.exists():
|
||||
shutil.rmtree(compatibility_snapshot_dir)
|
||||
shutil.copytree(test_output_dir, compatibility_snapshot_dir)
|
||||
|
||||
|
||||
@skip_on_postgres(PgVersion.V15, "Compatibility tests doesn't support Postgres 15 yet")
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
@@ -148,11 +155,13 @@ def test_backward_compatibility(
|
||||
), "Breaking changes are allowed by ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"
|
||||
|
||||
|
||||
@skip_on_postgres(PgVersion.V15, "Compatibility tests doesn't support Postgres 15 yet")
|
||||
@check_ondisk_data_compatibility_if_enabled
|
||||
@pytest.mark.xdist_group("compatibility")
|
||||
@pytest.mark.order(after="test_create_snapshot")
|
||||
def test_forward_compatibility(
|
||||
test_output_dir: Path,
|
||||
top_output_dir: Path,
|
||||
port_distributor: PortDistributor,
|
||||
pg_version: PgVersion,
|
||||
request: FixtureRequest,
|
||||
@@ -174,9 +183,7 @@ def test_forward_compatibility(
|
||||
), "COMPATIBILITY_POSTGRES_DISTRIB_DIR is not set. It should be set to a pg_install directrory (ideally generated by the previous version of Neon)"
|
||||
compatibility_postgres_distrib_dir = Path(compatibility_postgres_distrib_dir_env).resolve()
|
||||
|
||||
compatibility_snapshot_dir = (
|
||||
test_output_dir.parent / "test_create_snapshot" / "compatibility_snapshot_pg14"
|
||||
)
|
||||
compatibility_snapshot_dir = top_output_dir / "compatibility_snapshot_pg14"
|
||||
|
||||
breaking_changes_allowed = (
|
||||
os.environ.get("ALLOW_FORWARD_COMPATIBILITY_BREAKAGE", "false").lower() == "true"
|
||||
|
||||
@@ -136,9 +136,7 @@ def eviction_env(request, neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) -> Ev
|
||||
env.pageserver.allowed_errors.append(r".* running disk usage based eviction due to pressure.*")
|
||||
|
||||
# remove the initial tenant
|
||||
## why wait for upload queue? => https://github.com/neondatabase/neon/issues/3865
|
||||
assert env.initial_timeline
|
||||
wait_for_upload_queue_empty(pageserver_http, env.initial_tenant, env.initial_timeline)
|
||||
pageserver_http.tenant_detach(env.initial_tenant)
|
||||
assert isinstance(env.remote_storage, LocalFsStorage)
|
||||
tenant_remote_storage = env.remote_storage.root / "tenants" / str(env.initial_tenant)
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.pg_version import PgVersion, xfail_on_postgres
|
||||
|
||||
|
||||
@xfail_on_postgres(PgVersion.V15, reason="https://github.com/neondatabase/neon/pull/4182")
|
||||
@pytest.mark.timeout(1800)
|
||||
def test_hot_standby(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
|
||||
@@ -20,6 +20,7 @@ from fixtures.pageserver.utils import (
|
||||
assert_tenant_state,
|
||||
wait_for_last_record_lsn,
|
||||
wait_for_upload,
|
||||
wait_for_upload_queue_empty,
|
||||
wait_until_tenant_state,
|
||||
)
|
||||
from fixtures.types import Lsn
|
||||
@@ -149,6 +150,7 @@ def test_ondemand_download_timetravel(
|
||||
|
||||
##### First start, insert data and upload it to the remote storage
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# Override defaults, to create more layers
|
||||
tenant, _ = env.neon_cli.create_tenant(
|
||||
@@ -225,7 +227,8 @@ def test_ondemand_download_timetravel(
|
||||
assert filled_current_physical == filled_size, "we don't yet do layer eviction"
|
||||
|
||||
# Wait until generated image layers are uploaded to S3
|
||||
time.sleep(3)
|
||||
if remote_storage_kind is not None:
|
||||
wait_for_upload_queue_empty(pageserver_http, env.initial_tenant, timeline_id)
|
||||
|
||||
env.pageserver.stop()
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
|
||||
from fixtures.pg_version import PgVersion, xfail_on_postgres
|
||||
|
||||
|
||||
# Run the main PostgreSQL regression tests, in src/test/regress.
|
||||
@@ -71,6 +72,7 @@ def test_pg_regress(
|
||||
#
|
||||
# This runs for a long time, especially in debug mode, so use a larger-than-default
|
||||
# timeout.
|
||||
@xfail_on_postgres(PgVersion.V15, reason="https://github.com/neondatabase/neon/pull/4213")
|
||||
@pytest.mark.timeout(1800)
|
||||
def test_isolation(
|
||||
neon_simple_env: NeonEnv,
|
||||
|
||||
@@ -5,16 +5,18 @@ import pytest
|
||||
from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres
|
||||
|
||||
|
||||
def test_proxy_select_1(static_proxy: NeonProxy):
|
||||
@pytest.mark.parametrize("option_name", ["project", "endpoint"])
|
||||
def test_proxy_select_1(static_proxy: NeonProxy, option_name: str):
|
||||
"""
|
||||
A simplest smoke test: check proxy against a local postgres instance.
|
||||
"""
|
||||
|
||||
out = static_proxy.safe_psql("select 1", options="project=generic-project-name")
|
||||
out = static_proxy.safe_psql("select 1", options=f"{option_name}=generic-project-name")
|
||||
assert out[0][0] == 1
|
||||
|
||||
|
||||
def test_password_hack(static_proxy: NeonProxy):
|
||||
@pytest.mark.parametrize("option_name", ["project", "endpoint"])
|
||||
def test_password_hack(static_proxy: NeonProxy, option_name: str):
|
||||
"""
|
||||
Check the PasswordHack auth flow: an alternative to SCRAM auth for
|
||||
clients which can't provide the project/endpoint name via SNI or `options`.
|
||||
@@ -23,11 +25,12 @@ def test_password_hack(static_proxy: NeonProxy):
|
||||
user = "borat"
|
||||
password = "password"
|
||||
static_proxy.safe_psql(
|
||||
f"create role {user} with login password '{password}'", options="project=irrelevant"
|
||||
f"create role {user} with login password '{password}'",
|
||||
options=f"{option_name}=irrelevant",
|
||||
)
|
||||
|
||||
# Note the format of `magic`!
|
||||
magic = f"project=irrelevant;{password}"
|
||||
magic = f"{option_name}=irrelevant;{password}"
|
||||
static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic)
|
||||
|
||||
# Must also check that invalid magic won't be accepted.
|
||||
@@ -56,55 +59,62 @@ async def test_link_auth(vanilla_pg: VanillaPostgres, link_proxy: NeonProxy):
|
||||
assert out == "42"
|
||||
|
||||
|
||||
def test_proxy_options(static_proxy: NeonProxy):
|
||||
@pytest.mark.parametrize("option_name", ["project", "endpoint"])
|
||||
def test_proxy_options(static_proxy: NeonProxy, option_name: str):
|
||||
"""
|
||||
Check that we pass extra `options` to the PostgreSQL server:
|
||||
* `project=...` shouldn't be passed at all (otherwise postgres will raise an error).
|
||||
* `project=...` and `endpoint=...` shouldn't be passed at all
|
||||
* (otherwise postgres will raise an error).
|
||||
* everything else should be passed as-is.
|
||||
"""
|
||||
|
||||
options = "project=irrelevant -cproxytest.option=value"
|
||||
options = f"{option_name}=irrelevant -cproxytest.option=value"
|
||||
out = static_proxy.safe_psql("show proxytest.option", options=options)
|
||||
assert out[0][0] == "value"
|
||||
|
||||
options = "-c proxytest.foo=\\ str project=irrelevant"
|
||||
options = f"-c proxytest.foo=\\ str {option_name}=irrelevant"
|
||||
out = static_proxy.safe_psql("show proxytest.foo", options=options)
|
||||
assert out[0][0] == " str"
|
||||
|
||||
|
||||
def test_auth_errors(static_proxy: NeonProxy):
|
||||
@pytest.mark.parametrize("option_name", ["project", "endpoint"])
|
||||
def test_auth_errors(static_proxy: NeonProxy, option_name: str):
|
||||
"""
|
||||
Check that we throw very specific errors in some unsuccessful auth scenarios.
|
||||
"""
|
||||
|
||||
# User does not exist
|
||||
with pytest.raises(psycopg2.Error) as exprinfo:
|
||||
static_proxy.connect(user="pinocchio", options="project=irrelevant")
|
||||
static_proxy.connect(user="pinocchio", options=f"{option_name}=irrelevant")
|
||||
text = str(exprinfo.value).strip()
|
||||
assert text.endswith("password authentication failed for user 'pinocchio'")
|
||||
|
||||
static_proxy.safe_psql(
|
||||
"create role pinocchio with login password 'magic'", options="project=irrelevant"
|
||||
"create role pinocchio with login password 'magic'",
|
||||
options=f"{option_name}=irrelevant",
|
||||
)
|
||||
|
||||
# User exists, but password is missing
|
||||
with pytest.raises(psycopg2.Error) as exprinfo:
|
||||
static_proxy.connect(user="pinocchio", password=None, options="project=irrelevant")
|
||||
static_proxy.connect(user="pinocchio", password=None, options=f"{option_name}=irrelevant")
|
||||
text = str(exprinfo.value).strip()
|
||||
assert text.endswith("password authentication failed for user 'pinocchio'")
|
||||
|
||||
# User exists, but password is wrong
|
||||
with pytest.raises(psycopg2.Error) as exprinfo:
|
||||
static_proxy.connect(user="pinocchio", password="bad", options="project=irrelevant")
|
||||
static_proxy.connect(user="pinocchio", password="bad", options=f"{option_name}=irrelevant")
|
||||
text = str(exprinfo.value).strip()
|
||||
assert text.endswith("password authentication failed for user 'pinocchio'")
|
||||
|
||||
# Finally, check that the user can connect
|
||||
with static_proxy.connect(user="pinocchio", password="magic", options="project=irrelevant"):
|
||||
with static_proxy.connect(
|
||||
user="pinocchio", password="magic", options=f"{option_name}=irrelevant"
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
def test_forward_params_to_client(static_proxy: NeonProxy):
|
||||
@pytest.mark.parametrize("option_name", ["project", "endpoint"])
|
||||
def test_forward_params_to_client(static_proxy: NeonProxy, option_name: str):
|
||||
"""
|
||||
Check that we forward all necessary PostgreSQL server params to client.
|
||||
"""
|
||||
@@ -130,7 +140,7 @@ def test_forward_params_to_client(static_proxy: NeonProxy):
|
||||
where name = any(%s)
|
||||
"""
|
||||
|
||||
with static_proxy.connect(options="project=irrelevant") as conn:
|
||||
with static_proxy.connect(options=f"{option_name}=irrelevant") as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(query, (reported_params_subset,))
|
||||
for name, value in cur.fetchall():
|
||||
@@ -138,17 +148,18 @@ def test_forward_params_to_client(static_proxy: NeonProxy):
|
||||
assert conn.get_parameter_status(name) == value
|
||||
|
||||
|
||||
@pytest.mark.parametrize("option_name", ["project", "endpoint"])
|
||||
@pytest.mark.timeout(5)
|
||||
def test_close_on_connections_exit(static_proxy: NeonProxy):
|
||||
def test_close_on_connections_exit(static_proxy: NeonProxy, option_name: str):
|
||||
# Open two connections, send SIGTERM, then ensure that proxy doesn't exit
|
||||
# until after connections close.
|
||||
with static_proxy.connect(options="project=irrelevant"), static_proxy.connect(
|
||||
options="project=irrelevant"
|
||||
with static_proxy.connect(options=f"{option_name}=irrelevant"), static_proxy.connect(
|
||||
options=f"{option_name}=irrelevant"
|
||||
):
|
||||
static_proxy.terminate()
|
||||
with pytest.raises(subprocess.TimeoutExpired):
|
||||
static_proxy.wait_for_exit(timeout=2)
|
||||
# Ensure we don't accept any more connections
|
||||
with pytest.raises(psycopg2.OperationalError):
|
||||
static_proxy.connect(options="project=irrelevant")
|
||||
static_proxy.connect(options=f"{option_name}=irrelevant")
|
||||
static_proxy.wait_for_exit()
|
||||
|
||||
@@ -2,11 +2,12 @@
|
||||
# env NEON_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/neon_zzz/'}" poetry ......
|
||||
|
||||
import os
|
||||
import queue
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
@@ -26,6 +27,7 @@ from fixtures.pageserver.utils import (
|
||||
)
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import print_gc_result, query_scalar, wait_until
|
||||
from requests import ReadTimeout
|
||||
|
||||
|
||||
#
|
||||
@@ -81,9 +83,7 @@ def test_remote_storage_backup_and_restore(
|
||||
env.pageserver.allowed_errors.append(".*failed to load remote timeline.*")
|
||||
# we have a bunch of pytest.raises for these below
|
||||
env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Cannot attach tenant .*?, local tenant directory already exists.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
|
||||
env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
@@ -626,10 +626,7 @@ def test_empty_branch_remote_storage_upload(
|
||||
|
||||
new_branch_name = "new_branch"
|
||||
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
|
||||
|
||||
with env.endpoints.create_start(new_branch_name, tenant_id=env.initial_tenant) as endpoint:
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_branch_timeline_id)
|
||||
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
|
||||
assert_nothing_to_upload(client, env.initial_tenant, new_branch_timeline_id)
|
||||
|
||||
timelines_before_detach = set(
|
||||
map(
|
||||
@@ -658,13 +655,19 @@ def test_empty_branch_remote_storage_upload(
|
||||
), f"Expected to have same timelines after reattach, but got {timelines_after_detach}"
|
||||
|
||||
|
||||
# Branches off a root branch, but does not write anything to the new branch, so it has a metadata file only.
|
||||
# Ensures the branch is not on the remote storage and restarts the pageserver — the branch should be uploaded after the restart.
|
||||
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
|
||||
def test_empty_branch_remote_storage_upload_on_restart(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
remote_storage_kind: RemoteStorageKind,
|
||||
):
|
||||
"""
|
||||
Branches off a root branch, but does not write anything to the new branch, so
|
||||
it has a metadata file only.
|
||||
|
||||
Ensures the branch is not on the remote storage and restarts the pageserver
|
||||
— the upload should be scheduled by load, and create_timeline should await
|
||||
for it even though it gets 409 Conflict.
|
||||
"""
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_empty_branch_remote_storage_upload_on_restart",
|
||||
@@ -673,35 +676,87 @@ def test_empty_branch_remote_storage_upload_on_restart(
|
||||
env = neon_env_builder.init_start()
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
new_branch_name = "new_branch"
|
||||
new_branch_timeline_id = env.neon_cli.create_branch(new_branch_name, "main", env.initial_tenant)
|
||||
client.configure_failpoints(("before-upload-index", "return"))
|
||||
|
||||
with env.endpoints.create_start(new_branch_name, tenant_id=env.initial_tenant) as endpoint:
|
||||
wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, new_branch_timeline_id)
|
||||
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
|
||||
new_branch_timeline_id = TimelineId.generate()
|
||||
|
||||
with pytest.raises(ReadTimeout):
|
||||
client.timeline_create(
|
||||
tenant_id=env.initial_tenant,
|
||||
ancestor_timeline_id=env.initial_timeline,
|
||||
new_timeline_id=new_branch_timeline_id,
|
||||
pg_version=env.pg_version,
|
||||
timeout=4,
|
||||
)
|
||||
|
||||
env.pageserver.allowed_errors.append(
|
||||
f".*POST.* path=/v1/tenant/{env.initial_tenant}/timeline.* request was dropped before completing"
|
||||
)
|
||||
|
||||
# index upload is now hitting the failpoint, should not block the shutdown
|
||||
env.pageserver.stop()
|
||||
|
||||
# Remove new branch from the remote storage
|
||||
assert isinstance(env.remote_storage, LocalFsStorage)
|
||||
new_branch_on_remote_storage = (
|
||||
env.remote_storage.root
|
||||
/ "tenants"
|
||||
/ str(env.initial_tenant)
|
||||
/ "timelines"
|
||||
/ str(new_branch_timeline_id)
|
||||
timeline_path = (
|
||||
Path("tenants") / str(env.initial_tenant) / "timelines" / str(new_branch_timeline_id)
|
||||
)
|
||||
assert (
|
||||
new_branch_on_remote_storage.is_dir()
|
||||
), f"'{new_branch_on_remote_storage}' path does not exist on the remote storage"
|
||||
shutil.rmtree(new_branch_on_remote_storage)
|
||||
|
||||
env.pageserver.start()
|
||||
local_metadata = env.repo_dir / timeline_path / "metadata"
|
||||
assert local_metadata.is_file(), "timeout cancelled timeline branching, not the upload"
|
||||
|
||||
wait_upload_queue_empty(client, env.initial_tenant, new_branch_timeline_id)
|
||||
assert isinstance(env.remote_storage, LocalFsStorage)
|
||||
new_branch_on_remote_storage = env.remote_storage.root / timeline_path
|
||||
assert (
|
||||
new_branch_on_remote_storage.is_dir()
|
||||
), f"New branch should have been reuploaded on pageserver restart to the remote storage path '{new_branch_on_remote_storage}'"
|
||||
not new_branch_on_remote_storage.exists()
|
||||
), "failpoint should had prohibited index_part.json upload"
|
||||
|
||||
# during reconciliation we should had scheduled the uploads and on the
|
||||
# retried create_timeline, we will await for those to complete on next
|
||||
# client.timeline_create
|
||||
env.pageserver.start(extra_env_vars={"FAILPOINTS": "before-upload-index=return"})
|
||||
|
||||
# sleep a bit to force the upload task go into exponential backoff
|
||||
time.sleep(1)
|
||||
|
||||
q: queue.Queue[Optional[PageserverApiException]] = queue.Queue()
|
||||
barrier = threading.Barrier(2)
|
||||
|
||||
def create_in_background():
|
||||
barrier.wait()
|
||||
try:
|
||||
client.timeline_create(
|
||||
tenant_id=env.initial_tenant,
|
||||
ancestor_timeline_id=env.initial_timeline,
|
||||
new_timeline_id=new_branch_timeline_id,
|
||||
pg_version=env.pg_version,
|
||||
)
|
||||
q.put(None)
|
||||
except PageserverApiException as e:
|
||||
q.put(e)
|
||||
|
||||
create_thread = threading.Thread(target=create_in_background)
|
||||
create_thread.start()
|
||||
|
||||
try:
|
||||
# maximize chances of actually waiting for the uploads by create_timeline
|
||||
barrier.wait()
|
||||
|
||||
assert not new_branch_on_remote_storage.exists(), "failpoint should had stopped uploading"
|
||||
|
||||
client.configure_failpoints(("before-upload-index", "off"))
|
||||
conflict = q.get()
|
||||
|
||||
assert conflict, "create_timeline should not have succeeded"
|
||||
assert (
|
||||
conflict.status_code == 409
|
||||
), "timeline was created before restart, and uploads scheduled during initial load, so we expect 409 conflict"
|
||||
|
||||
assert_nothing_to_upload(client, env.initial_tenant, new_branch_timeline_id)
|
||||
|
||||
assert (
|
||||
new_branch_on_remote_storage / "index_part.json"
|
||||
).is_file(), "uploads scheduled during initial load should had been awaited for"
|
||||
finally:
|
||||
create_thread.join()
|
||||
|
||||
|
||||
def wait_upload_queue_empty(
|
||||
@@ -752,4 +807,17 @@ def get_queued_count(
|
||||
return int(val)
|
||||
|
||||
|
||||
def assert_nothing_to_upload(
|
||||
client: PageserverHttpClient,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
):
|
||||
"""
|
||||
Check last_record_lsn == remote_consistent_lsn. Assert works only for empty timelines, which
|
||||
do not have anything to compact or gc.
|
||||
"""
|
||||
detail = client.timeline_detail(tenant_id, timeline_id)
|
||||
assert Lsn(detail["last_record_lsn"]) == Lsn(detail["remote_consistent_lsn"])
|
||||
|
||||
|
||||
# TODO Test that we correctly handle GC of files that are stuck in upload queue.
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
import json
|
||||
from contextlib import closing
|
||||
from typing import Generator
|
||||
|
||||
import psycopg2.extras
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
LocalFsStorage,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
RemoteStorageKind,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import assert_tenant_state, wait_for_upload
|
||||
from fixtures.types import Lsn
|
||||
from fixtures.utils import wait_until
|
||||
@@ -403,3 +407,62 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
|
||||
metric = get_metric()
|
||||
assert int(metric.labels["low_threshold_secs"]) == 24 * 60 * 60, "label resets to default"
|
||||
assert int(metric.value) == 0, "value resets to default"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def unknown_fields_env(neon_env_builder: NeonEnvBuilder) -> Generator[NeonEnv, None, None]:
|
||||
env = neon_env_builder.init_start()
|
||||
yield env
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*/v1/tenant .*Error processing HTTP request: Bad request.*",
|
||||
".*/v1/tenant/config .*Error processing HTTP request: Bad request.*",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def test_unknown_fields_cli_create(unknown_fields_env: NeonEnv):
|
||||
"""
|
||||
When specifying an invalid config field during tenant creation on the CLI, the CLI should fail with an error.
|
||||
"""
|
||||
|
||||
with pytest.raises(Exception, match="Unrecognized tenant settings"):
|
||||
unknown_fields_env.neon_cli.create_tenant(conf={"unknown_field": "unknown_value"})
|
||||
|
||||
|
||||
def test_unknown_fields_http_create(unknown_fields_env: NeonEnv):
|
||||
"""
|
||||
When specifying an invalid config field during tenant creation on the HTTP API, the API should fail with an error.
|
||||
"""
|
||||
|
||||
ps_http = unknown_fields_env.pageserver.http_client()
|
||||
|
||||
with pytest.raises(PageserverApiException) as excinfo:
|
||||
ps_http.tenant_create(conf={"unknown_field": "unknown_value"})
|
||||
assert excinfo.value.status_code == 400
|
||||
|
||||
|
||||
def test_unknown_fields_cli_config(unknown_fields_env: NeonEnv):
|
||||
"""
|
||||
When specifying an invalid config field during tenant configuration on the CLI, the CLI should fail with an error.
|
||||
"""
|
||||
|
||||
(tenant_id, _) = unknown_fields_env.neon_cli.create_tenant()
|
||||
|
||||
with pytest.raises(Exception, match="Unrecognized tenant settings"):
|
||||
unknown_fields_env.neon_cli.config_tenant(
|
||||
tenant_id, conf={"unknown_field": "unknown_value"}
|
||||
)
|
||||
|
||||
|
||||
def test_unknown_fields_http_config(unknown_fields_env: NeonEnv):
|
||||
"""
|
||||
When specifying an invalid config field during tenant configuration on the HTTP API, the API should fail with an error.
|
||||
"""
|
||||
|
||||
(tenant_id, _) = unknown_fields_env.neon_cli.create_tenant()
|
||||
ps_http = unknown_fields_env.pageserver.http_client()
|
||||
|
||||
with pytest.raises(PageserverApiException) as excinfo:
|
||||
ps_http.set_tenant_config(tenant_id, {"unknown_field": "unknown_value"})
|
||||
assert excinfo.value.status_code == 400
|
||||
|
||||
@@ -685,12 +685,10 @@ def test_load_attach_negatives(
|
||||
|
||||
pageserver_http.tenant_ignore(tenant_id)
|
||||
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Cannot attach tenant .*?, local tenant directory already exists.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
|
||||
with pytest.raises(
|
||||
expected_exception=PageserverApiException,
|
||||
match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists",
|
||||
match="tenant directory already exists",
|
||||
):
|
||||
pageserver_http.tenant_attach(tenant_id)
|
||||
|
||||
@@ -734,12 +732,10 @@ def test_ignore_while_attaching(
|
||||
pageserver_http.tenant_ignore(tenant_id)
|
||||
|
||||
# Cannot attach it due to some local files existing
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Cannot attach tenant .*?, local tenant directory already exists.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
|
||||
with pytest.raises(
|
||||
expected_exception=PageserverApiException,
|
||||
match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists",
|
||||
match="tenant directory already exists",
|
||||
):
|
||||
pageserver_http.tenant_attach(tenant_id)
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import os
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from contextlib import closing, contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
@@ -12,6 +14,8 @@ from fixtures.neon_fixtures import (
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
PortDistributor,
|
||||
RemoteStorageKind,
|
||||
available_remote_storages,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.pageserver.utils import (
|
||||
@@ -512,3 +516,225 @@ def test_tenant_relocation(
|
||||
if line.startswith("listen_pg_addr"):
|
||||
lines[i] = f"listen_pg_addr = 'localhost:{env.pageserver.service_port.pg}'"
|
||||
(env.repo_dir / "config").write_text("\n".join(lines))
|
||||
|
||||
|
||||
# Simulate hard crash of pageserver and re-attach a tenant with a branch
|
||||
#
|
||||
# This exercises a race condition after tenant attach, where the
|
||||
# branch point on the ancestor timeline is greater than the ancestor's
|
||||
# last-record LSN. We had a bug where GetPage incorrectly followed the
|
||||
# timeline to the ancestor without waiting for the missing WAL to
|
||||
# arrive.
|
||||
@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
|
||||
def test_emergency_relocate_with_branches_slow_replay(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
remote_storage_kind: RemoteStorageKind,
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_emergency_relocate_with_branches_slow_replay",
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
env.pageserver.is_testing_enabled_or_skip()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# Prepare for the test:
|
||||
#
|
||||
# - Main branch, with a table and two inserts to it.
|
||||
# - A logical replication message between the inserts, so that we can conveniently
|
||||
# pause the WAL ingestion between the two inserts.
|
||||
# - Child branch, created after the inserts
|
||||
tenant_id, _ = env.neon_cli.create_tenant()
|
||||
|
||||
main_endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
with main_endpoint.cursor() as cur:
|
||||
cur.execute("CREATE TABLE test_reattach (t text)")
|
||||
cur.execute("INSERT INTO test_reattach VALUES ('before pause')")
|
||||
|
||||
cur.execute("SELECT pg_logical_emit_message(false, 'neon-test', 'between inserts')")
|
||||
|
||||
cur.execute("INSERT INTO test_reattach VALUES ('after pause')")
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
|
||||
main_endpoint.stop()
|
||||
env.neon_cli.create_branch("child", tenant_id=tenant_id, ancestor_start_lsn=current_lsn)
|
||||
|
||||
# Now kill the pageserver, remove the tenant directory, and restart. This simulates
|
||||
# the scenario that a pageserver dies unexpectedly and cannot be recovered, so we relocate
|
||||
# the tenant to a different pageserver. We reuse the same pageserver because it's
|
||||
# simpler than initializing a new one from scratch, but the effect on the single tenant
|
||||
# is the same.
|
||||
env.pageserver.stop(immediate=True)
|
||||
shutil.rmtree(Path(env.repo_dir) / "tenants" / str(tenant_id))
|
||||
env.pageserver.start()
|
||||
|
||||
# This fail point will pause the WAL ingestion on the main branch, after the
|
||||
# the first insert
|
||||
pageserver_http.configure_failpoints([("wal-ingest-logical-message-sleep", "return(5000)")])
|
||||
|
||||
# Attach and wait a few seconds to give it time to load the tenants, attach to the
|
||||
# safekeepers, and to stream and ingest the WAL up to the pause-point.
|
||||
before_attach_time = time.time()
|
||||
pageserver_http.tenant_attach(tenant_id)
|
||||
time.sleep(3)
|
||||
|
||||
# The wal ingestion on the main timeline should now be paused at the fail point.
|
||||
# Run a query on the child branch. The GetPage requests for this should recurse to the
|
||||
# parent timeline, and wait for the WAL to be ingested there. Otherwise it won't see
|
||||
# the second insert.
|
||||
child_endpoint = env.endpoints.create_start("child", tenant_id=tenant_id)
|
||||
with child_endpoint.cursor() as cur:
|
||||
cur.execute("SELECT * FROM test_reattach")
|
||||
assert cur.fetchall() == [("before pause",), ("after pause",)]
|
||||
|
||||
# Sanity check that the failpoint was reached
|
||||
assert env.pageserver.log_contains('failpoint "wal-ingest-logical-message-sleep": sleep done')
|
||||
assert time.time() - before_attach_time > 5
|
||||
|
||||
# Clean up
|
||||
pageserver_http.configure_failpoints(("wal-ingest-logical-message-sleep", "off"))
|
||||
|
||||
|
||||
# Simulate hard crash of pageserver and re-attach a tenant with a branch
|
||||
#
|
||||
# This exercises the same race condition after as
|
||||
# 'test_emergency_relocate_with_branches_slow_replay', but this test case
|
||||
# is closer to the original scenario where we originally found the
|
||||
# issue.
|
||||
#
|
||||
# In this scenario, the incorrect result to get-request leads to
|
||||
# *permanent damage* in the child timeline, because ingesting the WAL
|
||||
# on the child timeline depended on incorrect view of the parent. This
|
||||
# test reproduced one such case; the symptom was an error on the child, when
|
||||
# trying to connect to the child endpoint after re-attaching the tenant:
|
||||
#
|
||||
# FATAL: database "neondb" does not exist
|
||||
#
|
||||
# In the original case where we bumped into this, the error was slightly
|
||||
# different:
|
||||
#
|
||||
# FATAL: "base/16385" is not a valid data directory
|
||||
# DETAIL: File "base/16385/PG_VERSION" is missing.
|
||||
#
|
||||
### Detailed explanation of the original bug and why it lead to that error:
|
||||
#
|
||||
# The WAL on the main and the child branches look like this:
|
||||
#
|
||||
# Main Child
|
||||
# 1. CREATE DATABASE
|
||||
# <child branch is created>
|
||||
# 2. CREATE TABLE AS SELECT ... 3. CREATE TABLE AS SELECT ...
|
||||
#
|
||||
# None of these WAL records have been flushed to disk or uploaded to remote
|
||||
# storage in the pageserver yet, when the tenant is detached.
|
||||
#
|
||||
# After detach and re-attach, a walreceiver is spawned on both timelines.
|
||||
# They will connect to the safekeepers and start ingesting the WAL
|
||||
# from their respective IndexParts' `disk_consistent_lsn` onward.
|
||||
#
|
||||
# The bug occurs if the child branch's walreceiver runs before the
|
||||
# main's. It receives the SMGR_CREATE WAL record emitted by the
|
||||
# CREATE TABLE statement (3.), and applies it, without seeing the
|
||||
# effect of the CREATE DATABASE statement.
|
||||
#
|
||||
# To understand why that leads to a 'File "base/16385/PG_VERSION" is
|
||||
# missing' error, let's look at what the handlers for the WAL records
|
||||
# do:
|
||||
#
|
||||
# CREATE DATABASE WAL record is handled by ingest_xlog_dbase_create:
|
||||
#
|
||||
# ingest_xlog_dbase_create:
|
||||
# put_relmap_file()
|
||||
# // NOTE 'true': It means that there is a relmapper and PG_VERSION file
|
||||
# 1: let r = dbdir.dbdirs.insert((spcnode, dbnode), true);
|
||||
#
|
||||
#
|
||||
# CREATE TABLE emits an SMGR_CREATE record, which is handled by:
|
||||
#
|
||||
# ingest_xlog_smgr_create:
|
||||
# put_rel_creation:
|
||||
# ...
|
||||
# let mut rel_dir = if dbdir.dbdirs.get(&(rel.spcnode, rel.dbnode)).is_none() {
|
||||
# 2: // Didn't exist. Update dbdir
|
||||
# dbdir.dbdirs.insert((rel.spcnode, rel.dbnode), false);
|
||||
# let buf = DbDirectory::ser(&dbdir)?;
|
||||
# self.put(DBDIR_KEY, Value::Image(buf.into()));
|
||||
#
|
||||
# // and create the RelDirectory
|
||||
# RelDirectory::default()
|
||||
# } else {
|
||||
# 3: // reldir already exists, fetch it
|
||||
# RelDirectory::des(&self.get(rel_dir_key, ctx).await?)?
|
||||
# };
|
||||
#
|
||||
#
|
||||
# In the correct ordering, the SMGR_CREATE record is applied after the
|
||||
# CREATE DATABASE record. The CREATE DATABASE creates the entry in the
|
||||
# 'dbdir', with the 'true' flag that indicates that PG_VERSION exists
|
||||
# (1). The SMGR_CREATE handler calls put_rel_creation, which finds the
|
||||
# dbdir entry that the CREATE DATABASE record created, and takes the
|
||||
# "reldir already exists, fetch it" else-branch at the if statement (3).
|
||||
#
|
||||
# In the incorrect ordering, the child walreceiver applies the
|
||||
# SMGR_CREATE record without seeing the effects of the CREATE
|
||||
# DATABASE. In that case, put_rel_creation takes the "Didn't
|
||||
# exist. Update dbir" path (2), and inserts an entry in the
|
||||
# DbDirectory with 'false' to indicate there is no PG_VERSION file.
|
||||
#
|
||||
@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
|
||||
def test_emergency_relocate_with_branches_createdb(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
remote_storage_kind: RemoteStorageKind,
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_emergency_relocate_with_branches_createdb",
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# create new nenant
|
||||
tenant_id, _ = env.neon_cli.create_tenant()
|
||||
|
||||
main_endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
with main_endpoint.cursor() as cur:
|
||||
cur.execute("SELECT pg_logical_emit_message(false, 'neon-test', 'between inserts')")
|
||||
|
||||
cur.execute("CREATE DATABASE neondb")
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
env.neon_cli.create_branch("child", tenant_id=tenant_id, ancestor_start_lsn=current_lsn)
|
||||
|
||||
with main_endpoint.cursor(dbname="neondb") as cur:
|
||||
cur.execute("CREATE TABLE test_migrate_one AS SELECT generate_series(1,100)")
|
||||
main_endpoint.stop()
|
||||
|
||||
child_endpoint = env.endpoints.create_start("child", tenant_id=tenant_id)
|
||||
with child_endpoint.cursor(dbname="neondb") as cur:
|
||||
cur.execute("CREATE TABLE test_migrate_one AS SELECT generate_series(1,200)")
|
||||
child_endpoint.stop()
|
||||
|
||||
# Kill the pageserver, remove the tenant directory, and restart
|
||||
env.pageserver.stop(immediate=True)
|
||||
shutil.rmtree(Path(env.repo_dir) / "tenants" / str(tenant_id))
|
||||
env.pageserver.start()
|
||||
|
||||
# Wait before ingesting the WAL for CREATE DATABASE on the main branch. The original
|
||||
# bug reproduced easily even without this, as there is always some delay between
|
||||
# loading the timeline and establishing the connection to the safekeeper to stream and
|
||||
# ingest the WAL, but let's make this less dependent on accidental timing.
|
||||
pageserver_http.configure_failpoints([("wal-ingest-logical-message-sleep", "return(5000)")])
|
||||
before_attach_time = time.time()
|
||||
pageserver_http.tenant_attach(tenant_id)
|
||||
|
||||
child_endpoint.start()
|
||||
with child_endpoint.cursor(dbname="neondb") as cur:
|
||||
assert query_scalar(cur, "SELECT count(*) FROM test_migrate_one") == 200
|
||||
|
||||
# Sanity check that the failpoint was reached
|
||||
assert env.pageserver.log_contains('failpoint "wal-ingest-logical-message-sleep": sleep done')
|
||||
assert time.time() - before_attach_time > 5
|
||||
|
||||
# Clean up
|
||||
pageserver_http.configure_failpoints(("wal-ingest-logical-message-sleep", "off"))
|
||||
|
||||
@@ -11,6 +11,7 @@ from fixtures.neon_fixtures import (
|
||||
wait_for_wal_insert_lsn,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.pg_version import PgVersion, xfail_on_postgres
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
|
||||
|
||||
@@ -512,6 +513,7 @@ def test_single_branch_get_tenant_size_grows(
|
||||
assert size_after == prev, "size after restarting pageserver should not have changed"
|
||||
|
||||
|
||||
@xfail_on_postgres(PgVersion.V15, reason="Test significantly more flaky on Postgres 15")
|
||||
def test_get_tenant_size_with_multiple_branches(
|
||||
neon_env_builder: NeonEnvBuilder, test_output_dir: Path
|
||||
):
|
||||
|
||||
@@ -217,6 +217,16 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
|
||||
labels = ",".join([f'{key}="{value}"' for key, value in sample.labels.items()])
|
||||
log.info(f"{sample.name}{{{labels}}} {sample.value}")
|
||||
|
||||
# Test that we gather tenant create metric
|
||||
storage_operation_metrics = [
|
||||
"pageserver_storage_operations_seconds_global_bucket",
|
||||
"pageserver_storage_operations_seconds_global_sum",
|
||||
"pageserver_storage_operations_seconds_global_count",
|
||||
]
|
||||
for metric in storage_operation_metrics:
|
||||
value = ps_metrics.query_all(metric, filter={"operation": "create tenant"})
|
||||
assert value
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"remote_storage_kind",
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnvBuilder,
|
||||
PgBin,
|
||||
PortDistributor,
|
||||
VanillaPostgres,
|
||||
)
|
||||
from fixtures.types import TenantId
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
sys.platform != "linux",
|
||||
reason="restore_from_wal.sh supports only Linux",
|
||||
)
|
||||
def test_wal_restore(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
pg_bin: PgBin,
|
||||
@@ -22,6 +28,7 @@ def test_wal_restore(
|
||||
endpoint = env.endpoints.create_start("test_wal_restore")
|
||||
endpoint.safe_psql("create table t as select generate_series(1,300000)")
|
||||
tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
|
||||
env.neon_cli.pageserver_stop()
|
||||
port = port_distributor.get_port()
|
||||
data_dir = test_output_dir / "pgsql.restored"
|
||||
@@ -30,9 +37,16 @@ def test_wal_restore(
|
||||
) as restored:
|
||||
pg_bin.run_capture(
|
||||
[
|
||||
str(base_dir / "libs/utils/scripts/restore_from_wal.sh"),
|
||||
str(base_dir / "libs" / "utils" / "scripts" / "restore_from_wal.sh"),
|
||||
str(pg_distrib_dir / f"v{env.pg_version}/bin"),
|
||||
str(test_output_dir / "repo" / "safekeepers" / "sk1" / str(tenant_id) / "*"),
|
||||
str(
|
||||
test_output_dir
|
||||
/ "repo"
|
||||
/ "safekeepers"
|
||||
/ "sk1"
|
||||
/ str(tenant_id)
|
||||
/ str(timeline_id)
|
||||
),
|
||||
str(data_dir),
|
||||
str(port),
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user