Compare commits

..

1 Commits

Author SHA1 Message Date
Bojan Serafimov
44feda0061 Lock in sorted order 2022-06-27 15:32:05 -04:00
93 changed files with 621 additions and 2103 deletions

View File

@@ -6,7 +6,5 @@ timeout = 30
[ssh_connection]
ssh_args = -F ./ansible.ssh.cfg
# teleport doesn't support sftp yet https://github.com/gravitational/teleport/issues/7127
# and scp neither worked for me
transfer_method = piped
scp_if_ssh = True
pipelining = True

View File

@@ -1,7 +1,3 @@
# Remove this once https://github.com/gravitational/teleport/issues/10918 is fixed
# (use pre 8.5 option name to cope with old ssh in CI)
PubkeyAcceptedKeyTypes +ssh-rsa-cert-v01@openssh.com
Host tele.zenith.tech
User admin
Port 3023

View File

@@ -12,7 +12,6 @@ pageservers
safekeepers
[storage:vars]
env_name = prod-1
console_mgmt_base_url = http://console-release.local
bucket_name = zenith-storage-oregon
bucket_region = us-west-2

View File

@@ -13,7 +13,6 @@ pageservers
safekeepers
[storage:vars]
env_name = us-stage
console_mgmt_base_url = http://console-staging.local
bucket_name = zenith-staging-storage-us-east-1
bucket_region = us-east-1

View File

@@ -6,7 +6,7 @@ After=network.target auditd.service
Type=simple
User=safekeeper
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="wal"}'
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT

View File

@@ -100,8 +100,10 @@ jobs:
name: Rust build << parameters.build_type >>
command: |
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
CARGO_FLAGS=
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
CARGO_FLAGS="--release --features profiling"
fi
@@ -110,7 +112,7 @@ jobs:
export RUSTC_WRAPPER=cachepot
export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
"${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
cachepot -s
- save_cache:
@@ -126,24 +128,32 @@ jobs:
name: cargo test
command: |
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
CARGO_FLAGS=
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
CARGO_FLAGS=--release
fi
cargo test $CARGO_FLAGS
"${cov_prefix[@]}" cargo test $CARGO_FLAGS
# Install the rust binaries, for use by test jobs
- run:
name: Install rust binaries
command: |
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
fi
binaries=$(
cargo metadata --format-version=1 --no-deps |
"${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
)
test_exe_paths=$(
cargo test --message-format=json --no-run |
"${cov_prefix[@]}" cargo test --message-format=json --no-run |
jq -r '.executable | select(. != null)'
)
@@ -156,15 +166,34 @@ jobs:
SRC=target/$BUILD_TYPE/$bin
DST=/tmp/zenith/bin/$bin
cp $SRC $DST
echo $DST >> /tmp/zenith/etc/binaries.list
done
# Install test executables (for code coverage)
if [[ $BUILD_TYPE == "debug" ]]; then
for bin in $test_exe_paths; do
SRC=$bin
DST=/tmp/zenith/test_bin/$(basename $bin)
cp $SRC $DST
echo $DST >> /tmp/zenith/etc/binaries.list
done
fi
# Install the postgres binaries, for use by test jobs
- run:
name: Install postgres binaries
command: |
cp -a tmp_install /tmp/zenith/pg_install
# Save rust binaries for other jobs in the workflow
- run:
name: Merge coverage data
command: |
# This will speed up workspace uploads
if [[ $BUILD_TYPE == "debug" ]]; then
scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
fi
# Save the rust binaries and coverage data for other jobs in this workflow.
- persist_to_workspace:
root: /tmp/zenith
paths:
@@ -257,7 +286,7 @@ jobs:
# no_output_timeout, specified here.
no_output_timeout: 10m
environment:
- NEON_BIN: /tmp/zenith/bin
- ZENITH_BIN: /tmp/zenith/bin
- POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
- TEST_OUTPUT: /tmp/test_output
# this variable will be embedded in perf test report
@@ -285,6 +314,12 @@ jobs:
export GITHUB_SHA=$CIRCLE_SHA1
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
fi
# Run the tests.
#
# The junit.xml file allows CircleCI to display more fine-grained test information
@@ -295,7 +330,7 @@ jobs:
# -n4 uses four processes to run tests via pytest-xdist
# -s is not used to prevent pytest from capturing output, because tests are running
# in parallel and logs are mixed between different tests
./scripts/pytest \
"${cov_prefix[@]}" ./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \
--verbose \
@@ -324,12 +359,67 @@ jobs:
# The store_test_results step tells CircleCI where to find the junit.xml file.
- store_test_results:
path: /tmp/test_output
# Save data (if any)
- run:
name: Merge coverage data
command: |
# This will speed up workspace uploads
if [[ $BUILD_TYPE == "debug" ]]; then
scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
fi
# Save coverage data (if any)
- persist_to_workspace:
root: /tmp/zenith
paths:
- "*"
coverage-report:
executor: neon-xlarge-executor
steps:
- attach_workspace:
at: /tmp/zenith
- checkout
- restore_cache:
name: Restore rust cache
keys:
# Require an exact match. While an out of date cache might speed up the build,
# there's no way to clean out old packages, so the cache grows every time something
# changes.
- v04-rust-cache-deps-debug-{{ checksum "Cargo.lock" }}
- run:
name: Build coverage report
command: |
COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
scripts/coverage \
--dir=/tmp/zenith/coverage report \
--input-objects=/tmp/zenith/etc/binaries.list \
--commit-url=$COMMIT_URL \
--format=github
- run:
name: Upload coverage report
command: |
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
REPORT_URL=https://neondatabase.github.io/zenith-coverage-data/$CIRCLE_SHA1
COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
scripts/git-upload \
--repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/neondatabase/zenith-coverage-data.git \
--message="Add code coverage for $COMMIT_URL" \
copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE
# Add link to the coverage report to the commit
curl -f -X POST \
https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
-H "Accept: application/vnd.github.v3+json" \
--user "$CI_ACCESS_TOKEN" \
--data \
"{
\"state\": \"success\",
\"context\": \"zenith-coverage\",
\"description\": \"Coverage report is ready\",
\"target_url\": \"$REPORT_URL\"
}"
# Build neondatabase/neon:latest image and push it to Docker hub
docker-image:
docker:
@@ -598,6 +688,50 @@ jobs:
helm upgrade neon-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
# Trigger a new remote CI job
remote-ci-trigger:
docker:
- image: cimg/base:2021.04
parameters:
remote_repo:
type: string
environment:
REMOTE_REPO: << parameters.remote_repo >>
steps:
- run:
name: Set PR's status to pending
command: |
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
curl -f -X POST \
https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
-H "Accept: application/vnd.github.v3+json" \
--user "$CI_ACCESS_TOKEN" \
--data \
"{
\"state\": \"pending\",
\"context\": \"neon-cloud-e2e\",
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
}"
- run:
name: Request a remote CI test
command: |
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
curl -f -X POST \
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-H "Accept: application/vnd.github.v3+json" \
--user "$CI_ACCESS_TOKEN" \
--data \
"{
\"ref\": \"main\",
\"inputs\": {
\"ci_job_name\": \"neon-cloud-e2e\",
\"commit_hash\": \"$CIRCLE_SHA1\",
\"remote_repo\": \"$LOCAL_REPO\"
}
}"
workflows:
build_and_test:
jobs:
@@ -640,6 +774,12 @@ workflows:
save_perf_report: true
requires:
- build-neon-release
- coverage-report:
# Context passes credentials for gh api
context: CI_ACCESS_TOKEN
requires:
# TODO: consider adding more
- other-tests-debug
- docker-image:
# Context gives an ability to login
context: Docker Hub
@@ -740,3 +880,14 @@ workflows:
- release
requires:
- docker-image-release
- remote-ci-trigger:
# Context passes credentials for gh api
context: CI_ACCESS_TOKEN
remote_repo: "neondatabase/cloud"
requires:
# XXX: Successful build doesn't mean everything is OK, but
# the job to be triggered takes so much time to complete (~22 min)
# that it's better not to wait for the commented-out steps
- build-neon-release
# - pg_regress-tests-release
# - other-tests-release

View File

@@ -2,29 +2,25 @@ name: 'Run python test'
description: 'Runs a Neon python test set, performing all the required preparations before'
inputs:
# Select the type of Rust build. Must be "release" or "debug".
build_type:
description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug".'
required: true
rust_toolchain:
description: 'Rust toolchain version to fetch the caches'
required: true
# This parameter is required, to prevent the mistake of running all tests in one job.
test_selection:
description: 'A python test suite to run'
required: true
# Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr
extra_params:
description: 'Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr'
required: false
default: ''
needs_postgres_source:
description: 'Set to true if the test suite requires postgres source checked out'
required: false
default: 'false'
run_in_parallel:
description: 'Whether to run tests in parallel'
required: false
default: 'true'
save_perf_report:
description: 'Whether to upload the performance report'
required: false
default: 'false'
@@ -64,7 +60,7 @@ runs:
- name: Run pytest
env:
NEON_BIN: /tmp/neon/bin
ZENITH_BIN: /tmp/neon/bin
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
TEST_OUTPUT: /tmp/test_output
# this variable will be embedded in perf test report
@@ -92,7 +88,7 @@ runs:
fi
if [[ "${{ inputs.build_type }}" == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
elif [[ "${{ inputs.build_type }}" == "release" ]]; then
cov_prefix=()
fi
@@ -121,20 +117,3 @@ runs:
scripts/generate_and_push_perf_report.sh
fi
fi
- name: Delete all data but logs
shell: bash -ex {0}
if: always()
run: |
du -sh /tmp/test_output/*
find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
du -sh /tmp/test_output/*
- name: Upload python test logs
if: always()
uses: actions/upload-artifact@v3
with:
retention-days: 7
if-no-files-found: error
name: python-test-${{ inputs.test_selection }}-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-logs
path: /tmp/test_output/

View File

@@ -1,17 +0,0 @@
name: 'Merge and upload coverage data'
description: 'Compresses and uploads the coverage data as an artifact'
runs:
using: "composite"
steps:
- name: Merge coverage data
shell: bash -ex {0}
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
- name: Upload coverage data
uses: actions/upload-artifact@v3
with:
retention-days: 7
if-no-files-found: error
name: coverage-data-artifact
path: /tmp/coverage/

View File

@@ -1,28 +1,13 @@
name: Test
on:
push:
branches:
- main
pull_request:
name: build_and_test
on: [ push ]
defaults:
run:
shell: bash -ex {0}
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
build-postgres:
runs-on: [ self-hosted, Linux, k8s-runner ]
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
rust_toolchain: [ 1.58 ]
@@ -49,7 +34,7 @@ jobs:
- name: Build postgres
if: steps.cache_pg.outputs.cache-hit != 'true'
run: mold -run make postgres -j$(nproc)
run: COPT='-Werror' mold -run make postgres -j$(nproc)
# actions/cache@v3 does not allow concurrently using the same cache across job steps, so use a separate cache
- name: Prepare postgres artifact
@@ -67,7 +52,6 @@ jobs:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ build-postgres ]
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
rust_toolchain: [ 1.58 ]
@@ -101,39 +85,44 @@ jobs:
~/.cargo/registry/
~/.cargo/git/
target/
# Fall back to older versions of the key, if no cache for current Cargo.lock was found
key: |
v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
key: v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
- name: Run cargo build
run: |
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
CARGO_FLAGS=
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
CARGO_FLAGS="--release --features profiling"
fi
export CACHEPOT_BUCKET=zenith-rust-cachepot
export RUSTC_WRAPPER=cachepot
export AWS_ACCESS_KEY_ID="${{ secrets.AWS_ACCESS_KEY_ID }}"
export AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_SECRET_ACCESS_KEY }}"
export HOME=/home/runner
"${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
cachepot -s
- name: Run cargo test
run: |
export HOME=/home/runner
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
CARGO_FLAGS=
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
CARGO_FLAGS=--release
fi
"${cov_prefix[@]}" cargo test $CARGO_FLAGS
- name: Install rust binaries
run: |
export HOME=/home/runner
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
fi
@@ -148,36 +137,39 @@ jobs:
jq -r '.executable | select(. != null)'
)
mkdir -p /tmp/neon/bin/
mkdir -p /tmp/neon/test_bin/
mkdir -p /tmp/neon/etc/
# Keep bloated coverage data files away from the rest of the artifact
mkdir -p /tmp/coverage/
mkdir -p /tmp/neon/bin
mkdir -p /tmp/neon/test_bin
mkdir -p /tmp/neon/etc
# Install target binaries
for bin in $binaries; do
SRC=target/$BUILD_TYPE/$bin
DST=/tmp/neon/bin/$bin
cp "$SRC" "$DST"
cp $SRC $DST
echo $DST >> /tmp/neon/etc/binaries.list
done
# Install test executables and write list of all binaries (for code coverage)
# Install test executables (for code coverage)
if [[ $BUILD_TYPE == "debug" ]]; then
for bin in $binaries; do
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
done
for bin in $test_exe_paths; do
SRC=$bin
DST=/tmp/neon/test_bin/$(basename $bin)
cp "$SRC" "$DST"
echo "$DST" >> /tmp/coverage/binaries.list
cp $SRC $DST
echo $DST >> /tmp/neon/etc/binaries.list
done
fi
- name: Install postgres binaries
run: cp -a tmp_install /tmp/neon/pg_install
- name: Merge coverage data
run: |
export HOME=/home/runner
# This will speed up workspace uploads
if [[ $BUILD_TYPE == "debug" ]]; then
scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage merge
fi
- name: Prepare neon artifact
run: tar -C /tmp/neon/ -czf ./neon.tgz .
@@ -189,17 +181,38 @@ jobs:
name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
path: ./neon.tgz
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
- name: Merge and upload coverage data
if: matrix.build_type == 'debug'
uses: ./.github/actions/save-coverage-data
check-codestyle-python:
runs-on: [ self-hosted, Linux, k8s-runner ]
strategy:
matrix:
rust_toolchain: [ 1.58 ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
run: ./scripts/pysync
- name: Run yapf to ensure code format
run: poetry run yapf --recursive --diff .
- name: Run mypy to check types
run: poetry run mypy .
pg_regress-tests:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ build-neon ]
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
rust_toolchain: [ 1.58 ]
@@ -218,15 +231,10 @@ jobs:
test_selection: batch_pg_regress
needs_postgres_source: true
- name: Merge and upload coverage data
if: matrix.build_type == 'debug'
uses: ./.github/actions/save-coverage-data
other-tests:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ build-neon ]
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
rust_toolchain: [ 1.58 ]
@@ -244,15 +252,10 @@ jobs:
rust_toolchain: ${{ matrix.rust_toolchain }}
test_selection: batch_others
- name: Merge and upload coverage data
if: matrix.build_type == 'debug'
uses: ./.github/actions/save-coverage-data
benchmarks:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ build-neon ]
strategy:
fail-fast: false
matrix:
build_type: [ release ]
rust_toolchain: [ 1.58 ]
@@ -270,120 +273,4 @@ jobs:
rust_toolchain: ${{ matrix.rust_toolchain }}
test_selection: performance
run_in_parallel: false
save_perf_report: true
# XXX: no coverage data handling here, since benchmarks are run on release builds,
# while coverage is currently collected for the debug ones
coverage-report:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ other-tests, pg_regress-tests ]
strategy:
fail-fast: false
matrix:
build_type: [ debug ]
rust_toolchain: [ 1.58 ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Restore cargo deps cache
id: cache_cargo
uses: actions/cache@v3
with:
path: |
~/.cargo/registry/
~/.cargo/git/
target/
key: v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
- name: Get Neon artifact for restoration
uses: actions/download-artifact@v3
with:
name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
path: ./neon-artifact/
- name: Extract Neon artifact
run: |
mkdir -p /tmp/neon/
tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
rm -rf ./neon-artifact/
- name: Restore coverage data
uses: actions/download-artifact@v3
with:
name: coverage-data-artifact
path: /tmp/coverage/
- name: Merge coverage data
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
- name: Build and upload coverage report
run: |
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA
scripts/coverage \
--dir=/tmp/coverage report \
--input-objects=/tmp/coverage/binaries.list \
--commit-url=$COMMIT_URL \
--format=github
REPORT_URL=https://${{ github.repository_owner }}.github.io/zenith-coverage-data/$COMMIT_SHA
scripts/git-upload \
--repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
--message="Add code coverage for $COMMIT_URL" \
copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE
# Add link to the coverage report to the commit
curl -f -X POST \
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"state\": \"success\",
\"context\": \"neon-coverage\",
\"description\": \"Coverage report is ready\",
\"target_url\": \"$REPORT_URL\"
}"
trigger-e2e-tests:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ build-neon ]
steps:
- name: Set PR's status to pending and request a remote CI test
run: |
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
REMOTE_REPO="${{ github.repository_owner }}/cloud"
curl -f -X POST \
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"state\": \"pending\",
\"context\": \"neon-cloud-e2e\",
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
}"
curl -f -X POST \
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"ref\": \"main\",
\"inputs\": {
\"ci_job_name\": \"neon-cloud-e2e\",
\"commit_hash\": \"$COMMIT_SHA\",
\"remote_repo\": \"${{ github.repository }}\"
}
}"
# save_perf_report: true

View File

@@ -1,74 +0,0 @@
name: Test Postgres client libraries
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '23 02 * * *' # run once a day, timezone is utc
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
test-postgres-client-libs:
runs-on: [ ubuntu-latest ]
steps:
- name: Checkout
uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -ex {0}
run: ./scripts/pysync
- name: Run pytest
env:
REMOTE_ENV: 1
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
TEST_OUTPUT: /tmp/test_output
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
# this variable will be embedded in perf test report
# and is needed to distinguish different environments
PLATFORM: github-actions-selfhosted
shell: bash -ex {0}
run: |
# Test framework expects we have psql binary;
# but since we don't really need it in this test, let's mock it
mkdir -p "$POSTGRES_DISTRIB_DIR/bin" && touch "$POSTGRES_DISTRIB_DIR/bin/psql";
./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \
--verbose \
-m "remote_cluster" \
-rA "test_runner/pg_clients"
- name: Post to a Slack channel
if: failure()
id: slack
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -1,4 +1,4 @@
name: Check code style and build
name: Build and Test
on:
push:
@@ -6,27 +6,15 @@ on:
- main
pull_request:
defaults:
run:
shell: bash -ex {0}
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
jobs:
check-codestyle-rust:
regression-check:
strategy:
fail-fast: false
matrix:
# If we want to duplicate this job for different
# Rust toolchains (e.g. nightly or 1.37.0), add them here.
rust_toolchain: [1.58]
os: [ubuntu-latest, macos-latest]
timeout-minutes: 50
timeout-minutes: 30
name: run regression test suite
runs-on: ${{ matrix.os }}
@@ -104,30 +92,5 @@ jobs:
- name: Run cargo clippy
run: ./run_clippy.sh
- name: Ensure all project builds
run: cargo build --all --all-targets
check-codestyle-python:
runs-on: [ self-hosted, Linux, k8s-runner ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: false
fetch-depth: 1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-codestyle-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
run: ./scripts/pysync
- name: Run yapf to ensure code format
run: poetry run yapf --recursive --diff .
- name: Run mypy to check types
run: poetry run mypy .
- name: Run cargo test
run: cargo test --all --all-targets

1
Cargo.lock generated
View File

@@ -461,7 +461,6 @@ dependencies = [
"tar",
"tokio",
"tokio-postgres",
"url",
"workspace_hack",
]

View File

@@ -1,5 +1,5 @@
# Build Postgres
FROM neondatabase/rust:1.58 AS pg-build
FROM zimg/rust:1.58 AS pg-build
WORKDIR /pg
USER root
@@ -14,7 +14,7 @@ RUN set -e \
&& tar -C tmp_install -czf /postgres_install.tar.gz .
# Build zenith binaries
FROM neondatabase/rust:1.58 AS build
FROM zimg/rust:1.58 AS build
ARG GIT_VERSION=local
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
@@ -46,9 +46,9 @@ RUN set -e \
&& useradd -d /data zenith \
&& chown -R zenith:zenith /data
COPY --from=build --chown=zenith:zenith /home/runner/target/release/pageserver /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/runner/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/runner/target/release/proxy /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/pageserver /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/proxy /usr/local/bin
COPY --from=pg-build /pg/tmp_install/ /usr/local/
COPY --from=pg-build /postgres_install.tar.gz /data/

View File

@@ -1,6 +1,6 @@
# First transient image to build compute_tools binaries
# NB: keep in sync with rust image version in .circle/config.yml
FROM neondatabase/rust:1.58 AS rust-build
FROM zimg/rust:1.58 AS rust-build
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
ARG AWS_ACCESS_KEY_ID
@@ -15,4 +15,4 @@ RUN set -e \
# Final image that only has one binary
FROM debian:buster-slim
COPY --from=rust-build /home/runner/target/release/compute_ctl /usr/local/bin/compute_ctl
COPY --from=rust-build /home/circleci/project/target/release/compute_ctl /usr/local/bin/compute_ctl

View File

@@ -53,7 +53,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
1. Install XCode and dependencies
```
xcode-select --install
brew install protobuf etcd openssl
brew install protobuf etcd
```
2. [Install Rust](https://www.rust-lang.org/tools/install)

View File

@@ -18,5 +18,4 @@ serde_json = "1"
tar = "0.4"
tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
url = "2.2.2"
workspace_hack = { version = "0.1", path = "../workspace_hack" }

View File

@@ -33,7 +33,7 @@ use std::process::exit;
use std::sync::{Arc, RwLock};
use std::{thread, time::Duration};
use anyhow::{Context, Result};
use anyhow::Result;
use chrono::Utc;
use clap::Arg;
use log::{error, info};
@@ -45,7 +45,6 @@ use compute_tools::monitor::launch_monitor;
use compute_tools::params::*;
use compute_tools::pg_helpers::*;
use compute_tools::spec::*;
use url::Url;
fn main() -> Result<()> {
// TODO: re-use `utils::logging` later
@@ -132,7 +131,7 @@ fn main() -> Result<()> {
let compute_state = ComputeNode {
start_time: Utc::now(),
connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
connstr: connstr.to_string(),
pgdata: pgdata.to_string(),
pgbin: pgbin.to_string(),
spec,

View File

@@ -1,3 +1,5 @@
use std::sync::Arc;
use anyhow::{anyhow, Result};
use log::error;
use postgres::Client;
@@ -21,8 +23,9 @@ pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
Ok(())
}
pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
pub async fn check_writability(compute: &Arc<ComputeNode>) -> Result<()> {
let connstr = &compute.connstr;
let (client, connection) = tokio_postgres::connect(connstr, NoTls).await?;
if client.is_closed() {
return Err(anyhow!("connection to postgres closed"));
}

View File

@@ -35,8 +35,7 @@ use crate::spec::*;
/// Compute node info shared across several `compute_ctl` threads.
pub struct ComputeNode {
pub start_time: DateTime<Utc>,
// Url type maintains proper escaping
pub connstr: url::Url,
pub connstr: String,
pub pgdata: String,
pub pgbin: String,
pub spec: ComputeSpec,
@@ -269,32 +268,27 @@ impl ComputeNode {
// In this case we need to connect with old `zenith_admin`name
// and create new user. We cannot simply rename connected user,
// but we can create a new one and grant it all privileges.
let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
let mut client = match Client::connect(&self.connstr, NoTls) {
Err(e) => {
info!(
"cannot connect to postgres: {}, retrying with `zenith_admin` username",
e
);
let mut zenith_admin_connstr = self.connstr.clone();
let zenith_admin_connstr = self.connstr.replacen("cloud_admin", "zenith_admin", 1);
zenith_admin_connstr
.set_username("zenith_admin")
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
let mut client = Client::connect(&zenith_admin_connstr, NoTls)?;
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
drop(client);
// reconnect with connsting with expected name
Client::connect(self.connstr.as_str(), NoTls)?
Client::connect(&self.connstr, NoTls)?
}
Ok(client) => client,
};
handle_roles(&self.spec, &mut client)?;
handle_databases(&self.spec, &mut client)?;
handle_role_deletions(self, &mut client)?;
handle_grants(&self.spec, &mut client)?;
create_writablity_check_data(&mut client)?;

View File

@@ -13,11 +13,11 @@ const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds
// Spin in a loop and figure out the last activity time in the Postgres.
// Then update it in the shared state. This function never errors out.
// XXX: the only expected panic is at `RwLock` unwrap().
fn watch_compute_activity(compute: &ComputeNode) {
fn watch_compute_activity(compute: &Arc<ComputeNode>) {
// Suppose that `connstr` doesn't change
let connstr = compute.connstr.as_str();
let connstr = compute.connstr.clone();
// Define `client` outside of the loop to reuse existing connection if it's active.
let mut client = Client::connect(connstr, NoTls);
let mut client = Client::connect(&connstr, NoTls);
let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL);
info!("watching Postgres activity at {}", connstr);
@@ -32,7 +32,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
info!("connection to postgres closed, trying to reconnect");
// Connection is closed, reconnect and try again.
client = Client::connect(connstr, NoTls);
client = Client::connect(&connstr, NoTls);
continue;
}
@@ -93,7 +93,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
debug!("cannot connect to postgres: {}, retrying", e);
// Establish a new connection and try again.
client = Client::connect(connstr, NoTls);
client = Client::connect(&connstr, NoTls);
}
}
}

View File

@@ -1,4 +1,3 @@
use std::fmt::Write;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::net::{SocketAddr, TcpStream};
@@ -139,11 +138,9 @@ impl Role {
// Now we also support SCRAM-SHA-256 and to preserve compatibility
// we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
if pass.starts_with("SCRAM-SHA-256") {
write!(params, " PASSWORD '{pass}'")
.expect("String is documented to not to error during write operations");
params.push_str(&format!(" PASSWORD '{}'", pass));
} else {
write!(params, " PASSWORD 'md5{pass}'")
.expect("String is documented to not to error during write operations");
params.push_str(&format!(" PASSWORD 'md5{}'", pass));
}
} else {
params.push_str(" PASSWORD NULL");
@@ -161,8 +158,7 @@ impl Database {
/// it may require a proper quoting too.
pub fn to_pg_options(&self) -> String {
let mut params: String = self.options.as_pg_options();
write!(params, " OWNER {}", &self.owner.quote())
.expect("String is documented to not to error during write operations");
params.push_str(&format!(" OWNER {}", &self.owner.quote()));
params
}

View File

@@ -2,10 +2,9 @@ use std::path::Path;
use anyhow::Result;
use log::{info, log_enabled, warn, Level};
use postgres::{Client, NoTls};
use postgres::Client;
use serde::Deserialize;
use crate::compute::ComputeNode;
use crate::config;
use crate::params::PG_HBA_ALL_MD5;
use crate::pg_helpers::*;
@@ -98,13 +97,18 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
// Process delta operations first
if let Some(ops) = &spec.delta_operations {
info!("processing role renames");
info!("processing delta operations on roles");
for op in ops {
match op.action.as_ref() {
// We do not check either role exists or not,
// Postgres will take care of it for us
"delete_role" => {
// no-op now, roles will be deleted at the end of configuration
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
warn!("deleting role '{}'", &op.name);
xact.execute(query.as_str(), &[])?;
}
// Renaming role drops its password, since role name is
// Renaming role drops its password, since tole name is
// used as a salt there. It is important that this role
// is recorded with a new `name` in the `roles` list.
// Follow up roles update will set the new password.
@@ -178,7 +182,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
xact.execute(query.as_str(), &[])?;
let grant_query = format!(
"GRANT pg_read_all_data, pg_write_all_data TO {}",
"grant pg_read_all_data, pg_write_all_data to {}",
name.quote()
);
xact.execute(grant_query.as_str(), &[])?;
@@ -193,70 +197,6 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
Ok(())
}
/// Reassign all dependent objects and delete requested roles.
pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<()> {
let spec = &node.spec;
// First, reassign all dependent objects to db owners.
if let Some(ops) = &spec.delta_operations {
info!("reassigning dependent objects of to-be-deleted roles");
for op in ops {
if op.action == "delete_role" {
reassign_owned_objects(node, &op.name)?;
}
}
}
// Second, proceed with role deletions.
let mut xact = client.transaction()?;
if let Some(ops) = &spec.delta_operations {
info!("processing role deletions");
for op in ops {
// We do not check either role exists or not,
// Postgres will take care of it for us
if op.action == "delete_role" {
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
warn!("deleting role '{}'", &op.name);
xact.execute(query.as_str(), &[])?;
}
}
}
Ok(())
}
// Reassign all owned objects in all databases to the owner of the database.
fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
for db in &node.spec.cluster.databases {
if db.owner != *role_name {
let mut connstr = node.connstr.clone();
// database name is always the last and the only component of the path
connstr.set_path(&db.name);
let mut client = Client::connect(connstr.as_str(), NoTls)?;
// This will reassign all dependent objects to the db owner
let reassign_query = format!(
"REASSIGN OWNED BY {} TO {}",
role_name.quote(),
db.owner.quote()
);
info!(
"reassigning objects owned by '{}' in db '{}' to '{}'",
role_name, &db.name, &db.owner
);
client.simple_query(&reassign_query)?;
// This now will only drop privileges of the role
let drop_query = format!("DROP OWNED BY {}", role_name.quote());
client.simple_query(&drop_query)?;
}
}
Ok(())
}
/// It follows mostly the same logic as `handle_roles()` excepting that we
/// does not use an explicit transactions block, since major database operations
/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
@@ -354,26 +294,13 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
pub fn handle_grants(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
info!("cluster spec grants:");
// We now have a separate `web_access` role to connect to the database
// via the web interface and proxy link auth. And also we grant a
// read / write all data privilege to every role. So also grant
// create to everyone.
// XXX: later we should stop messing with Postgres ACL in such horrible
// ways.
let roles = spec
.cluster
.roles
.iter()
.map(|r| r.name.quote())
.collect::<Vec<_>>();
for db in &spec.cluster.databases {
let dbname = &db.name;
let query: String = format!(
"GRANT CREATE ON DATABASE {} TO {}",
dbname.quote(),
roles.join(", ")
db.owner.quote()
);
info!("grant query {}", &query);

View File

@@ -403,6 +403,16 @@ impl LocalEnv {
self.pg_distrib_dir.display()
);
}
for binary in ["pageserver", "safekeeper"] {
if !self.zenith_distrib_dir.join(binary).exists() {
bail!(
"Can't find binary '{}' in zenith distrib dir '{}'",
binary,
self.zenith_distrib_dir.display()
);
}
}
for binary in ["pageserver", "safekeeper"] {
if !self.zenith_distrib_dir.join(binary).exists() {
bail!(
@@ -411,6 +421,12 @@ impl LocalEnv {
);
}
}
if !self.pg_distrib_dir.join("bin/postgres").exists() {
bail!(
"Can't find postgres binary at {}",
self.pg_distrib_dir.display()
);
}
fs::create_dir(&base_path)?;

View File

@@ -263,8 +263,6 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
// start profiler (if enabled)
let profiler_guard = profiling::init_profiler(conf);
pageserver::tenant_tasks::init_tenant_task_pool()?;
// initialize authentication for incoming connections
let auth = match &conf.auth_type {
AuthType::Trust | AuthType::MD5 => None,

View File

@@ -158,18 +158,6 @@ pub struct LayeredRepository {
// Global pageserver config parameters
pub conf: &'static PageServerConf,
// Allows us to gracefully cancel operations that edit the directory
// that backs this layered repository. Usage:
//
// Use `let _guard = file_lock.try_read()` while writing any files.
// Use `let _guard = file_lock.write().unwrap()` to wait for all writes to finish.
//
// TODO try_read this lock during checkpoint as well to prevent race
// between checkpoint and detach/delete.
// TODO try_read this lock for all gc/compaction operations, not just
// ones scheduled by the tenant task manager.
pub file_lock: RwLock<()>,
// Overridden tenant-specific config parameters.
// We keep TenantConfOpt sturct here to preserve the information
// about parameters that are not set.
@@ -349,12 +337,16 @@ impl Repository for LayeredRepository {
// compactions. We don't want to block everything else while the
// compaction runs.
let timelines = self.timelines.lock().unwrap();
let timelines_to_compact = timelines
let mut timelines_to_compact = timelines
.iter()
.map(|(timelineid, timeline)| (*timelineid, timeline.clone()))
.collect::<Vec<_>>();
drop(timelines);
// Sort to prevent deadlock
timelines_to_compact.sort_by(|a, b| a.0.cmp(&b.0));
// Compact all timelines in order
for (timelineid, timeline) in &timelines_to_compact {
let _entered =
info_span!("compact", timeline = %timelineid, tenant = %self.tenant_id).entered();
@@ -697,7 +689,6 @@ impl LayeredRepository {
) -> LayeredRepository {
LayeredRepository {
tenant_id,
file_lock: RwLock::new(()),
conf,
tenant_conf: Arc::new(RwLock::new(tenant_conf)),
timelines: Mutex::new(HashMap::new()),
@@ -1923,28 +1914,15 @@ impl LayeredTimeline {
} else {
Lsn(0)
};
// Let's consider an example:
//
// delta layer with LSN range 71-81
// delta layer with LSN range 81-91
// delta layer with LSN range 91-101
// image layer at LSN 100
//
// If 'lsn' is still 100, i.e. no new WAL has been processed since the last image layer,
// there's no need to create a new one. We check this case explicitly, to avoid passing
// a bogus range to count_deltas below, with start > end. It's even possible that there
// are some delta layers *later* than current 'lsn', if more WAL was processed and flushed
// after we read last_record_lsn, which is passed here in the 'lsn' argument.
if img_lsn < lsn {
let num_deltas = layers.count_deltas(&img_range, &(img_lsn..lsn))?;
debug!(
"key range {}-{}, has {} deltas on this timeline in LSN range {}..{}",
img_range.start, img_range.end, num_deltas, img_lsn, lsn
);
if num_deltas >= self.get_image_creation_threshold() {
return Ok(true);
}
let num_deltas = layers.count_deltas(&img_range, &(img_lsn..lsn))?;
debug!(
"range {}-{}, has {} deltas on this timeline",
img_range.start, img_range.end, num_deltas
);
if num_deltas >= self.get_image_creation_threshold() {
return Ok(true);
}
}
}
@@ -2236,9 +2214,6 @@ impl LayeredTimeline {
LsnForTimestamp::Past(lsn) => {
debug!("past({})", lsn);
}
LsnForTimestamp::NoData(lsn) => {
debug!("nodata({})", lsn);
}
}
debug!("pitr_cutoff_lsn = {:?}", pitr_cutoff_lsn)
}

View File

@@ -34,7 +34,7 @@ pub trait BlobCursor {
) -> Result<(), std::io::Error>;
}
impl<R> BlobCursor for BlockCursor<R>
impl<'a, R> BlobCursor for BlockCursor<R>
where
R: BlockReader,
{

View File

@@ -445,10 +445,7 @@ impl ImageLayerWriter {
},
);
info!("new image layer {}", path.display());
let mut file = VirtualFile::open_with_options(
&path,
std::fs::OpenOptions::new().write(true).create_new(true),
)?;
let mut file = VirtualFile::create(&path)?;
// make room for the header block
file.seek(SeekFrom::Start(PAGE_SZ as u64))?;
let blob_writer = WriteBlobWriter::new(file, PAGE_SZ as u64);

View File

@@ -13,7 +13,7 @@ pub mod repository;
pub mod storage_sync;
pub mod tenant_config;
pub mod tenant_mgr;
pub mod tenant_tasks;
pub mod tenant_threads;
pub mod thread_mgr;
pub mod timelines;
pub mod virtual_file;

View File

@@ -554,7 +554,7 @@ impl PageServerHandler {
// Create empty timeline
info!("creating new timeline");
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
let timeline = repo.create_empty_timeline(timeline_id, base_lsn)?;
let timeline = repo.create_empty_timeline(timeline_id, Lsn(0))?;
let repartition_distance = repo.get_checkpoint_distance();
let mut datadir_timeline =
DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);
@@ -1151,7 +1151,6 @@ impl postgres_backend::Handler for PageServerHandler {
LsnForTimestamp::Present(lsn) => format!("{}", lsn),
LsnForTimestamp::Future(_lsn) => "future".into(),
LsnForTimestamp::Past(_lsn) => "past".into(),
LsnForTimestamp::NoData(_lsn) => "nodata".into(),
};
pgb.write_message_noflush(&BeMessage::DataRow(&[Some(result.as_bytes())]))?;
pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;

View File

@@ -51,7 +51,6 @@ pub enum LsnForTimestamp {
Present(Lsn),
Future(Lsn),
Past(Lsn),
NoData(Lsn),
}
impl<R: Repository> DatadirTimeline<R> {
@@ -264,7 +263,7 @@ impl<R: Repository> DatadirTimeline<R> {
(false, false) => {
// This can happen if no commit records have been processed yet, e.g.
// just after importing a cluster.
Ok(LsnForTimestamp::NoData(max_lsn))
bail!("no commit timestamps found");
}
(true, false) => {
// Didn't find any commit timestamps larger than the request

View File

@@ -81,12 +81,6 @@ mod profiling_impl {
pub struct DummyProfilerGuard;
impl Drop for DummyProfilerGuard {
fn drop(&mut self) {
// do nothing, this exists to calm Clippy down
}
}
pub fn profpoint_start(
_conf: &PageServerConf,
_point: ProfilingConfig,

View File

@@ -37,7 +37,7 @@ pub mod defaults {
pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10_000;
}
/// Per-tenant configuration options

View File

@@ -230,6 +230,8 @@ pub fn shutdown_all_tenants() {
drop(m);
thread_mgr::shutdown_threads(Some(ThreadKind::WalReceiverManager), None, None);
thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), None, None);
thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), None, None);
// Ok, no background threads running anymore. Flush any remaining data in
// memory to disk.
@@ -328,12 +330,44 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow:
}
(TenantState::Idle, TenantState::Active) => {
info!("activating tenant {tenant_id}");
let compactor_spawn_result = thread_mgr::spawn(
ThreadKind::Compactor,
Some(tenant_id),
None,
"Compactor thread",
false,
move || crate::tenant_threads::compact_loop(tenant_id),
);
if compactor_spawn_result.is_err() {
let mut m = tenants_state::write_tenants();
m.get_mut(&tenant_id)
.with_context(|| format!("Tenant not found for id {tenant_id}"))?
.state = old_state;
drop(m);
}
compactor_spawn_result?;
// Spawn gc and compaction loops. The loops will shut themselves
// down when they notice that the tenant is inactive.
// TODO maybe use tokio::sync::watch instead?
crate::tenant_tasks::start_compaction_loop(tenant_id)?;
crate::tenant_tasks::start_gc_loop(tenant_id)?;
let gc_spawn_result = thread_mgr::spawn(
ThreadKind::GarbageCollector,
Some(tenant_id),
None,
"GC thread",
false,
move || crate::tenant_threads::gc_loop(tenant_id),
)
.map(|_thread_id| ()) // update the `Result::Ok` type to match the outer function's return signature
.with_context(|| format!("Failed to launch GC thread for tenant {tenant_id}"));
if let Err(e) = &gc_spawn_result {
let mut m = tenants_state::write_tenants();
m.get_mut(&tenant_id)
.with_context(|| format!("Tenant not found for id {tenant_id}"))?
.state = old_state;
drop(m);
error!("Failed to start GC thread for tenant {tenant_id}, stopping its checkpointer thread: {e:?}");
thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
return gc_spawn_result;
}
}
(TenantState::Idle, TenantState::Stopping) => {
info!("stopping idle tenant {tenant_id}");
@@ -345,10 +379,8 @@ pub fn set_tenant_state(tenant_id: ZTenantId, new_state: TenantState) -> anyhow:
Some(tenant_id),
None,
);
// Wait until all gc/compaction tasks finish
let repo = get_repository_for_tenant(tenant_id)?;
let _guard = repo.file_lock.write().unwrap();
thread_mgr::shutdown_threads(Some(ThreadKind::GarbageCollector), Some(tenant_id), None);
thread_mgr::shutdown_threads(Some(ThreadKind::Compactor), Some(tenant_id), None);
}
}

View File

@@ -1,288 +0,0 @@
//! This module contains functions to serve per-tenant background processes,
//! such as compaction and GC
use std::collections::HashMap;
use std::ops::ControlFlow;
use std::time::Duration;
use crate::repository::Repository;
use crate::tenant_mgr::TenantState;
use crate::thread_mgr::ThreadKind;
use crate::{tenant_mgr, thread_mgr};
use anyhow::{self, Context};
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use metrics::{register_int_counter_vec, IntCounterVec};
use once_cell::sync::{Lazy, OnceCell};
use tokio::sync::mpsc;
use tokio::sync::watch;
use tracing::*;
use utils::zid::ZTenantId;
static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"pageserver_tenant_task_events",
"Number of task start/stop/fail events.",
&["event"],
)
.expect("Failed to register tenant_task_events metric")
});
///
/// Compaction task's main loop
///
async fn compaction_loop(tenantid: ZTenantId, mut cancel: watch::Receiver<()>) {
loop {
trace!("waking up");
// Run blocking part of the task
let period: Result<Result<_, anyhow::Error>, _> = tokio::task::spawn_blocking(move || {
// Break if tenant is not active
if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
return Ok(ControlFlow::Break(()));
}
// Break if we're not allowed to write to disk
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
// TODO do this inside repo.compaction_iteration instead.
let _guard = match repo.file_lock.try_read() {
Ok(g) => g,
Err(_) => return Ok(ControlFlow::Break(())),
};
// Run compaction
let compaction_period = repo.get_compaction_period();
repo.compaction_iteration()?;
Ok(ControlFlow::Continue(compaction_period))
})
.await;
// Decide whether to sleep or break
let sleep_duration = match period {
Ok(Ok(ControlFlow::Continue(period))) => period,
Ok(Ok(ControlFlow::Break(()))) => break,
Ok(Err(e)) => {
error!("Compaction failed, retrying: {}", e);
Duration::from_secs(2)
}
Err(e) => {
error!("Compaction join error, retrying: {}", e);
Duration::from_secs(2)
}
};
// Sleep
tokio::select! {
_ = cancel.changed() => {
trace!("received cancellation request");
break;
},
_ = tokio::time::sleep(sleep_duration) => {},
}
}
trace!(
"compaction loop stopped. State is {:?}",
tenant_mgr::get_tenant_state(tenantid)
);
}
static START_GC_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();
static START_COMPACTION_LOOP: OnceCell<mpsc::Sender<ZTenantId>> = OnceCell::new();
/// Spawn a task that will periodically schedule garbage collection until
/// the tenant becomes inactive. This should be called on tenant
/// activation.
pub fn start_gc_loop(tenantid: ZTenantId) -> anyhow::Result<()> {
START_GC_LOOP
.get()
.context("Failed to get START_GC_LOOP")?
.blocking_send(tenantid)
.context("Failed to send to START_GC_LOOP channel")?;
Ok(())
}
/// Spawn a task that will periodically schedule compaction until
/// the tenant becomes inactive. This should be called on tenant
/// activation.
pub fn start_compaction_loop(tenantid: ZTenantId) -> anyhow::Result<()> {
START_COMPACTION_LOOP
.get()
.context("failed to get START_COMPACTION_LOOP")?
.blocking_send(tenantid)
.context("failed to send to START_COMPACTION_LOOP")?;
Ok(())
}
/// Spawn the TenantTaskManager
/// This needs to be called before start_gc_loop or start_compaction_loop
pub fn init_tenant_task_pool() -> anyhow::Result<()> {
let runtime = tokio::runtime::Builder::new_multi_thread()
.thread_name("tenant-task-worker")
.worker_threads(40) // Way more than necessary
.max_blocking_threads(100) // Way more than necessary
.enable_all()
.build()?;
let (gc_send, mut gc_recv) = mpsc::channel::<ZTenantId>(100);
START_GC_LOOP
.set(gc_send)
.expect("Failed to set START_GC_LOOP");
let (compaction_send, mut compaction_recv) = mpsc::channel::<ZTenantId>(100);
START_COMPACTION_LOOP
.set(compaction_send)
.expect("Failed to set START_COMPACTION_LOOP");
// TODO this is getting repetitive
let mut gc_loops = HashMap::<ZTenantId, watch::Sender<()>>::new();
let mut compaction_loops = HashMap::<ZTenantId, watch::Sender<()>>::new();
thread_mgr::spawn(
ThreadKind::TenantTaskManager,
None,
None,
"Tenant task manager main thread",
true,
move || {
runtime.block_on(async move {
let mut futures = FuturesUnordered::new();
loop {
tokio::select! {
_ = thread_mgr::shutdown_watcher() => {
// Send cancellation to all tasks
for (_, cancel) in gc_loops.drain() {
cancel.send(()).ok();
}
for (_, cancel) in compaction_loops.drain() {
cancel.send(()).ok();
}
// Exit after all tasks finish
while let Some(result) = futures.next().await {
match result {
Ok(()) => {
TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
},
Err(e) => {
TENANT_TASK_EVENTS.with_label_values(&["panic"]).inc();
error!("loop join error {}", e)
},
}
}
break;
},
tenantid = gc_recv.recv() => {
let tenantid = tenantid.expect("Gc task channel closed unexpectedly");
// Spawn new task, request cancellation of the old one if exists
let (cancel_send, cancel_recv) = watch::channel(());
let handle = tokio::spawn(gc_loop(tenantid, cancel_recv)
.instrument(info_span!("gc loop", tenant = %tenantid)));
if let Some(old_cancel_send) = gc_loops.insert(tenantid, cancel_send) {
old_cancel_send.send(()).ok();
}
// Update metrics, remember handle
TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
futures.push(handle);
},
tenantid = compaction_recv.recv() => {
let tenantid = tenantid.expect("Compaction task channel closed unexpectedly");
// Spawn new task, request cancellation of the old one if exists
let (cancel_send, cancel_recv) = watch::channel(());
let handle = tokio::spawn(compaction_loop(tenantid, cancel_recv)
.instrument(info_span!("compaction loop", tenant = %tenantid)));
if let Some(old_cancel_send) = compaction_loops.insert(tenantid, cancel_send) {
old_cancel_send.send(()).ok();
}
// Update metrics, remember handle
TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
futures.push(handle);
},
result = futures.next() => {
// Log and count any unhandled panics
match result {
Some(Ok(())) => {
TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
},
Some(Err(e)) => {
TENANT_TASK_EVENTS.with_label_values(&["panic"]).inc();
error!("loop join error {}", e)
},
None => {},
};
},
}
}
});
Ok(())
},
)?;
Ok(())
}
///
/// GC task's main loop
///
async fn gc_loop(tenantid: ZTenantId, mut cancel: watch::Receiver<()>) {
loop {
trace!("waking up");
// Run blocking part of the task
let period: Result<Result<_, anyhow::Error>, _> = tokio::task::spawn_blocking(move || {
// Break if tenant is not active
if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
return Ok(ControlFlow::Break(()));
}
// Break if we're not allowed to write to disk
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
// TODO do this inside repo.gc_iteration instead.
let _guard = match repo.file_lock.try_read() {
Ok(g) => g,
Err(_) => return Ok(ControlFlow::Break(())),
};
// Run gc
let gc_period = repo.get_gc_period();
let gc_horizon = repo.get_gc_horizon();
if gc_horizon > 0 {
repo.gc_iteration(None, gc_horizon, repo.get_pitr_interval(), false)?;
}
Ok(ControlFlow::Continue(gc_period))
})
.await;
// Decide whether to sleep or break
let sleep_duration = match period {
Ok(Ok(ControlFlow::Continue(period))) => period,
Ok(Ok(ControlFlow::Break(()))) => break,
Ok(Err(e)) => {
error!("Gc failed, retrying: {}", e);
Duration::from_secs(2)
}
Err(e) => {
error!("Gc join error, retrying: {}", e);
Duration::from_secs(2)
}
};
// Sleep
tokio::select! {
_ = cancel.changed() => {
trace!("received cancellation request");
break;
},
_ = tokio::time::sleep(sleep_duration) => {},
}
}
trace!(
"GC loop stopped. State is {:?}",
tenant_mgr::get_tenant_state(tenantid)
);
}

View File

@@ -0,0 +1,79 @@
//! This module contains functions to serve per-tenant background processes,
//! such as compaction and GC
use crate::repository::Repository;
use crate::tenant_mgr;
use crate::tenant_mgr::TenantState;
use anyhow::Result;
use std::time::Duration;
use tracing::*;
use utils::zid::ZTenantId;
///
/// Compaction thread's main loop
///
pub fn compact_loop(tenantid: ZTenantId) -> Result<()> {
if let Err(err) = compact_loop_ext(tenantid) {
error!("compact loop terminated with error: {:?}", err);
Err(err)
} else {
Ok(())
}
}
fn compact_loop_ext(tenantid: ZTenantId) -> Result<()> {
loop {
if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
break;
}
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
let compaction_period = repo.get_compaction_period();
std::thread::sleep(compaction_period);
trace!("compaction thread for tenant {} waking up", tenantid);
// Compact timelines
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
repo.compaction_iteration()?;
}
trace!(
"compaction thread stopped for tenant {} state is {:?}",
tenantid,
tenant_mgr::get_tenant_state(tenantid)
);
Ok(())
}
///
/// GC thread's main loop
///
pub fn gc_loop(tenantid: ZTenantId) -> Result<()> {
loop {
if tenant_mgr::get_tenant_state(tenantid) != Some(TenantState::Active) {
break;
}
trace!("gc thread for tenant {} waking up", tenantid);
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
let gc_horizon = repo.get_gc_horizon();
// Garbage collect old files that are not needed for PITR anymore
if gc_horizon > 0 {
repo.gc_iteration(None, gc_horizon, repo.get_pitr_interval(), false)?;
}
// TODO Write it in more adequate way using
// condvar.wait_timeout() or something
let mut sleep_time = repo.get_gc_period().as_secs();
while sleep_time > 0 && tenant_mgr::get_tenant_state(tenantid) == Some(TenantState::Active)
{
sleep_time -= 1;
std::thread::sleep(Duration::from_secs(1));
}
}
trace!(
"GC thread stopped for tenant {} state is {:?}",
tenantid,
tenant_mgr::get_tenant_state(tenantid)
);
Ok(())
}

View File

@@ -94,8 +94,11 @@ pub enum ThreadKind {
// Main walreceiver manager thread that ensures that every timeline spawns a connection to safekeeper, to fetch WAL.
WalReceiverManager,
// Thread that schedules new compaction and gc jobs
TenantTaskManager,
// Thread that handles compaction of all timelines for a tenant.
Compactor,
// Thread that handles GC of a tenant
GarbageCollector,
// Thread that flushes frozen in-memory layers to disk
LayerFlushThread,

View File

@@ -178,7 +178,7 @@ async fn shutdown_all_wal_connections(
/// That may lead to certain events not being observed by the listener.
#[derive(Debug)]
struct TaskHandle<E> {
handle: JoinHandle<Result<(), String>>,
handle: JoinHandle<()>,
events_receiver: watch::Receiver<TaskEvent<E>>,
cancellation: watch::Sender<()>,
}
@@ -205,8 +205,8 @@ impl<E: Clone> TaskHandle<E> {
let sender = Arc::clone(&events_sender);
let handle = tokio::task::spawn(async move {
events_sender.send(TaskEvent::Started).ok();
task(sender, cancellation_receiver).await
let task_result = task(sender, cancellation_receiver).await;
events_sender.send(TaskEvent::End(task_result)).ok();
});
TaskHandle {
@@ -216,16 +216,6 @@ impl<E: Clone> TaskHandle<E> {
}
}
async fn next_task_event(&mut self) -> TaskEvent<E> {
select! {
next_task_event = self.events_receiver.changed() => match next_task_event {
Ok(()) => self.events_receiver.borrow().clone(),
Err(_task_channel_part_dropped) => join_on_handle(&mut self.handle).await,
},
task_completion_result = join_on_handle(&mut self.handle) => task_completion_result,
}
}
/// Aborts current task, waiting for it to finish.
async fn shutdown(self) {
self.cancellation.send(()).ok();
@@ -235,19 +225,6 @@ impl<E: Clone> TaskHandle<E> {
}
}
async fn join_on_handle<E>(handle: &mut JoinHandle<Result<(), String>>) -> TaskEvent<E> {
match handle.await {
Ok(task_result) => TaskEvent::End(task_result),
Err(e) => {
if e.is_cancelled() {
TaskEvent::End(Ok(()))
} else {
TaskEvent::End(Err(format!("WAL receiver task panicked: {e}")))
}
}
}
}
/// A step to process timeline attach/detach events to enable/disable the corresponding WAL receiver machinery.
/// In addition to WAL streaming management, the step ensures that corresponding tenant has its service threads enabled or disabled.
/// This is done here, since only walreceiver knows when a certain tenant has no streaming enabled.

View File

@@ -104,29 +104,49 @@ async fn connection_manager_loop_step(
Some(wal_connection_update) = async {
match walreceiver_state.wal_connection.as_mut() {
Some(wal_connection) => Some(wal_connection.connection_task.next_task_event().await),
Some(wal_connection) => {
let receiver = &mut wal_connection.connection_task.events_receiver;
Some(match receiver.changed().await {
Ok(()) => receiver.borrow().clone(),
Err(_cancellation_error) => TaskEvent::End(Ok(())),
})
}
None => None,
}
} => {
let wal_connection = walreceiver_state.wal_connection.as_mut().expect("Should have a connection, as checked by the corresponding select! guard");
match &wal_connection_update {
TaskEvent::Started => {
wal_connection.latest_connection_update = Utc::now().naive_utc();
*walreceiver_state.wal_connection_attempts.entry(wal_connection.sk_id).or_insert(0) += 1;
},
TaskEvent::NewEvent(replication_feedback) => {
wal_connection.latest_connection_update = DateTime::<Local>::from(replication_feedback.ps_replytime).naive_utc();
// reset connection attempts here only, the only place where both nodes
// explicitly confirmn with replication feedback that they are connected to each other
walreceiver_state.wal_connection_attempts.remove(&wal_connection.sk_id);
},
let (connection_update, reset_connection_attempts) = match &wal_connection_update {
TaskEvent::Started => (Some(Utc::now().naive_utc()), true),
TaskEvent::NewEvent(replication_feedback) => (Some(DateTime::<Local>::from(replication_feedback.ps_replytime).naive_utc()), true),
TaskEvent::End(end_result) => {
match end_result {
Ok(()) => debug!("WAL receiving task finished"),
Err(e) => warn!("WAL receiving task failed: {e}"),
let should_reset_connection_attempts = match end_result {
Ok(()) => {
debug!("WAL receiving task finished");
true
},
Err(e) => {
warn!("WAL receiving task failed: {e}");
false
},
};
walreceiver_state.wal_connection = None;
(None, should_reset_connection_attempts)
},
};
if let Some(connection_update) = connection_update {
match &mut walreceiver_state.wal_connection {
Some(wal_connection) => {
wal_connection.latest_connection_update = connection_update;
let attempts_entry = walreceiver_state.wal_connection_attempts.entry(wal_connection.sk_id).or_insert(0);
if reset_connection_attempts {
*attempts_entry = 0;
} else {
*attempts_entry += 1;
}
},
None => error!("Received connection update for WAL connection that is not active, update: {wal_connection_update:?}"),
}
}
},
@@ -386,8 +406,10 @@ impl WalreceiverState {
Some(existing_wal_connection) => {
let connected_sk_node = existing_wal_connection.sk_id;
let (new_sk_id, new_safekeeper_etcd_data, new_wal_producer_connstr) =
self.select_connection_candidate(Some(connected_sk_node))?;
let (new_sk_id, new_safekeeper_etcd_data, new_wal_producer_connstr) = self
.applicable_connection_candidates()
.filter(|&(sk_id, _, _)| sk_id != connected_sk_node)
.max_by_key(|(_, info, _)| info.commit_lsn)?;
let now = Utc::now().naive_utc();
if let Ok(latest_interaciton) =
@@ -440,8 +462,9 @@ impl WalreceiverState {
}
}
None => {
let (new_sk_id, _, new_wal_producer_connstr) =
self.select_connection_candidate(None)?;
let (new_sk_id, _, new_wal_producer_connstr) = self
.applicable_connection_candidates()
.max_by_key(|(_, info, _)| info.commit_lsn)?;
return Some(NewWalConnectionCandidate {
safekeeper_id: new_sk_id,
wal_producer_connstr: new_wal_producer_connstr,
@@ -453,49 +476,6 @@ impl WalreceiverState {
None
}
/// Selects the best possible candidate, based on the data collected from etcd updates about the safekeepers.
/// Optionally, omits the given node, to support gracefully switching from a healthy safekeeper to another.
///
/// The candidate that is chosen:
/// * has fewest connection attempts from pageserver to safekeeper node (reset every time the WAL replication feedback is sent)
/// * has greatest data Lsn among the ones that are left
///
/// NOTE:
/// We evict timeline data received from etcd based on time passed since it was registered, along with its connection attempts values, but
/// otherwise to reset the connection attempts, a successful connection to that node is needed.
/// That won't happen now, before all nodes with less connection attempts are connected to first, which might leave the sk node with more advanced state to be ignored.
fn select_connection_candidate(
&self,
node_to_omit: Option<NodeId>,
) -> Option<(NodeId, &SkTimelineInfo, String)> {
let all_candidates = self
.applicable_connection_candidates()
.filter(|&(sk_id, _, _)| Some(sk_id) != node_to_omit)
.collect::<Vec<_>>();
let smallest_attempts_allowed = all_candidates
.iter()
.map(|(sk_id, _, _)| {
self.wal_connection_attempts
.get(sk_id)
.copied()
.unwrap_or(0)
})
.min()?;
all_candidates
.into_iter()
.filter(|(sk_id, _, _)| {
smallest_attempts_allowed
>= self
.wal_connection_attempts
.get(sk_id)
.copied()
.unwrap_or(0)
})
.max_by_key(|(_, info, _)| info.commit_lsn)
}
fn applicable_connection_candidates(
&self,
) -> impl Iterator<Item = (NodeId, &SkTimelineInfo, String)> {
@@ -520,25 +500,15 @@ impl WalreceiverState {
}
fn cleanup_old_candidates(&mut self) {
let mut node_ids_to_remove = Vec::with_capacity(self.wal_stream_candidates.len());
self.wal_stream_candidates.retain(|node_id, etcd_info| {
self.wal_stream_candidates.retain(|_, etcd_info| {
if let Ok(time_since_latest_etcd_update) =
(Utc::now().naive_utc() - etcd_info.latest_update).to_std()
{
let should_retain = time_since_latest_etcd_update < self.lagging_wal_timeout;
if !should_retain {
node_ids_to_remove.push(*node_id);
}
should_retain
time_since_latest_etcd_update < self.lagging_wal_timeout
} else {
true
}
});
for node_id in node_ids_to_remove {
self.wal_connection_attempts.remove(&node_id);
}
}
}
@@ -873,64 +843,6 @@ mod tests {
Ok(())
}
#[tokio::test]
async fn candidate_with_many_connection_failures() -> anyhow::Result<()> {
let harness = RepoHarness::create("candidate_with_many_connection_failures")?;
let mut state = dummy_state(&harness);
let now = Utc::now().naive_utc();
let current_lsn = Lsn(100_000).align();
let bigger_lsn = Lsn(current_lsn.0 + 100).align();
state.wal_connection = None;
state.wal_stream_candidates = HashMap::from([
(
NodeId(0),
EtcdSkTimeline {
timeline: SkTimelineInfo {
last_log_term: None,
flush_lsn: None,
commit_lsn: Some(bigger_lsn),
backup_lsn: None,
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
},
etcd_version: 0,
latest_update: now,
},
),
(
NodeId(1),
EtcdSkTimeline {
timeline: SkTimelineInfo {
last_log_term: None,
flush_lsn: None,
commit_lsn: Some(current_lsn),
backup_lsn: None,
remote_consistent_lsn: None,
peer_horizon_lsn: None,
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
},
etcd_version: 0,
latest_update: now,
},
),
]);
state.wal_connection_attempts = HashMap::from([(NodeId(0), 1), (NodeId(1), 0)]);
let candidate_with_less_errors = state
.next_connection_candidate()
.expect("Expected one candidate selected, but got none");
assert_eq!(
candidate_with_less_errors.safekeeper_id,
NodeId(1),
"Should select the node with less connection errors"
);
Ok(())
}
#[tokio::test]
async fn connection_no_etcd_data_candidate() -> anyhow::Result<()> {
let harness = RepoHarness::create("connection_no_etcd_data_candidate")?;

View File

@@ -49,12 +49,6 @@ impl UserFacingError for ConsoleAuthError {
}
}
impl From<&auth::credentials::ClientCredsParseError> for ConsoleAuthError {
fn from(e: &auth::credentials::ClientCredsParseError) -> Self {
ConsoleAuthError::BadProjectName(e.clone())
}
}
// TODO: convert into an enum with "error"
#[derive(Serialize, Deserialize, Debug)]
struct GetRoleSecretResponse {
@@ -98,9 +92,14 @@ impl<'a> Api<'a> {
async fn get_auth_info(&self) -> Result<AuthInfo> {
let mut url = self.endpoint.clone();
let project_name = self
.creds
.project_name
.as_ref()
.map_err(|e| ConsoleAuthError::BadProjectName(e.clone()))?;
url.path_segments_mut().push("proxy_get_role_secret");
url.query_pairs_mut()
.append_pair("project", self.creds.project_name.as_ref()?)
.append_pair("project", project_name)
.append_pair("role", &self.creds.user);
// TODO: use a proper logger
@@ -122,8 +121,12 @@ impl<'a> Api<'a> {
/// Wake up the compute node and return the corresponding connection info.
async fn wake_compute(&self) -> Result<DatabaseInfo> {
let mut url = self.endpoint.clone();
let project_name = self
.creds
.project_name
.as_ref()
.map_err(|e| ConsoleAuthError::BadProjectName(e.clone()))?;
url.path_segments_mut().push("proxy_wake_compute");
let project_name = self.creds.project_name.as_ref()?;
url.query_pairs_mut().append_pair("project", project_name);
// TODO: use a proper logger

View File

@@ -115,7 +115,7 @@ mod tests {
Ok(())
});
waiter.await?;
let () = waiter.await?;
notifier.await?
}
}

View File

@@ -5,11 +5,6 @@ use anyhow::Context;
use anyhow::Error;
use anyhow::Result;
use etcd_broker::subscription_value::SkTimelineInfo;
use etcd_broker::LeaseKeepAliveStream;
use etcd_broker::LeaseKeeper;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::time::Duration;
use tokio::spawn;
use tokio::task::JoinHandle;
@@ -26,7 +21,7 @@ use utils::zid::{NodeId, ZTenantTimelineId};
const RETRY_INTERVAL_MSEC: u64 = 1000;
const PUSH_INTERVAL_MSEC: u64 = 1000;
const LEASE_TTL_SEC: i64 = 10;
const LEASE_TTL_SEC: i64 = 5;
pub fn thread_main(conf: SafeKeeperConf) {
let runtime = runtime::Builder::new_current_thread()
@@ -159,48 +154,13 @@ pub fn get_candiate_name(system_id: NodeId) -> String {
format!("id_{system_id}")
}
async fn push_sk_info(
zttid: ZTenantTimelineId,
mut client: Client,
key: String,
sk_info: SkTimelineInfo,
mut lease: Lease,
) -> anyhow::Result<(ZTenantTimelineId, Lease)> {
let put_opts = PutOptions::new().with_lease(lease.id);
client
.put(
key.clone(),
serde_json::to_string(&sk_info)?,
Some(put_opts),
)
.await
.with_context(|| format!("failed to push safekeeper info to {}", key))?;
// revive the lease
lease
.keeper
.keep_alive()
.await
.context("failed to send LeaseKeepAliveRequest")?;
lease
.ka_stream
.message()
.await
.context("failed to receive LeaseKeepAliveResponse")?;
Ok((zttid, lease))
}
struct Lease {
id: i64,
keeper: LeaseKeeper,
ka_stream: LeaseKeepAliveStream,
}
/// Push once in a while data about all active timelines to the broker.
async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
let mut client = Client::connect(&conf.broker_endpoints, None).await?;
let mut leases: HashMap<ZTenantTimelineId, Lease> = HashMap::new();
// Get and maintain lease to automatically delete obsolete data
let lease = client.lease_grant(LEASE_TTL_SEC, None).await?;
let (mut keeper, mut ka_stream) = client.lease_keep_alive(lease.id()).await?;
let push_interval = Duration::from_millis(PUSH_INTERVAL_MSEC);
loop {
@@ -208,46 +168,33 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
// is under plain mutex. That's ok, all this code is not performance
// sensitive and there is no risk of deadlock as we don't await while
// lock is held.
let active_tlis = GlobalTimelines::get_active_timelines();
// // Get and maintain (if not yet) per timeline lease to automatically delete obsolete data.
for zttid in active_tlis.iter() {
if let Entry::Vacant(v) = leases.entry(*zttid) {
let lease = client.lease_grant(LEASE_TTL_SEC, None).await?;
let (keeper, ka_stream) = client.lease_keep_alive(lease.id()).await?;
v.insert(Lease {
id: lease.id(),
keeper,
ka_stream,
});
for zttid in GlobalTimelines::get_active_timelines() {
if let Some(tli) = GlobalTimelines::get_loaded(zttid) {
let sk_info = tli.get_public_info(&conf)?;
let put_opts = PutOptions::new().with_lease(lease.id());
client
.put(
timeline_safekeeper_path(
conf.broker_etcd_prefix.clone(),
zttid,
conf.my_id,
),
serde_json::to_string(&sk_info)?,
Some(put_opts),
)
.await
.context("failed to push safekeeper info")?;
}
}
leases.retain(|zttid, _| active_tlis.contains(zttid));
// Push data concurrently to not suffer from latency, with many timelines it can be slow.
let handles = active_tlis
.iter()
.filter_map(|zttid| GlobalTimelines::get_loaded(*zttid))
.map(|tli| {
let sk_info = tli.get_public_info(&conf);
let key = timeline_safekeeper_path(
conf.broker_etcd_prefix.clone(),
tli.zttid,
conf.my_id,
);
let lease = leases.remove(&tli.zttid).unwrap();
tokio::spawn(push_sk_info(tli.zttid, client.clone(), key, sk_info, lease))
})
.collect::<Vec<_>>();
for h in handles {
let (zttid, lease) = h.await??;
// It is ugly to pull leases from hash and then put it back, but
// otherwise we have to resort to long living per tli tasks (which
// would generate a lot of errors when etcd is down) as task wants to
// have 'static objects, we can't borrow to it.
leases.insert(zttid, lease);
}
// revive the lease
keeper
.keep_alive()
.await
.context("failed to send LeaseKeepAliveRequest")?;
ka_stream
.message()
.await
.context("failed to receive LeaseKeepAliveResponse")?;
sleep(push_interval).await;
}
}

View File

@@ -239,19 +239,6 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<SafeKeeperState>
remote_consistent_lsn: Lsn(0),
peers: Peers(vec![]),
});
} else if version == 5 {
info!("reading safekeeper control file version {}", version);
let mut oldstate = SafeKeeperState::des(&buf[..buf.len()])?;
if oldstate.timeline_start_lsn != Lsn(0) {
return Ok(oldstate);
}
// set special timeline_start_lsn because we don't know the real one
info!("setting timeline_start_lsn and local_start_lsn to Lsn(1)");
oldstate.timeline_start_lsn = Lsn(1);
oldstate.local_start_lsn = Lsn(1);
return Ok(oldstate);
}
bail!("unsupported safekeeper control file version {}", version)
}

View File

@@ -28,7 +28,7 @@ use utils::{
};
pub const SK_MAGIC: u32 = 0xcafeceefu32;
pub const SK_FORMAT_VERSION: u32 = 6;
pub const SK_FORMAT_VERSION: u32 = 5;
const SK_PROTOCOL_VERSION: u32 = 2;
const UNKNOWN_SERVER_VERSION: u32 = 0;

View File

@@ -11,7 +11,7 @@ use serde::Serialize;
use tokio::sync::watch;
use std::cmp::{max, min};
use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use std::fs::{self};
use std::sync::{Arc, Mutex, MutexGuard};
@@ -445,9 +445,9 @@ impl Timeline {
}
/// Prepare public safekeeper info for reporting.
pub fn get_public_info(&self, conf: &SafeKeeperConf) -> SkTimelineInfo {
pub fn get_public_info(&self, conf: &SafeKeeperConf) -> anyhow::Result<SkTimelineInfo> {
let shared_state = self.mutex.lock().unwrap();
SkTimelineInfo {
Ok(SkTimelineInfo {
last_log_term: Some(shared_state.sk.get_epoch()),
flush_lsn: Some(shared_state.sk.wal_store.flush_lsn()),
// note: this value is not flushed to control file yet and can be lost
@@ -460,7 +460,7 @@ impl Timeline {
peer_horizon_lsn: Some(shared_state.sk.inmem.peer_horizon_lsn),
safekeeper_connstr: Some(conf.listen_pg_addr.clone()),
backup_lsn: Some(shared_state.sk.inmem.backup_lsn),
}
})
}
/// Update timeline state with peer safekeeper data.
@@ -625,8 +625,6 @@ impl GlobalTimelines {
zttid: ZTenantTimelineId,
create: bool,
) -> Result<Arc<Timeline>> {
let _enter = info_span!("", timeline = %zttid.tenant_id).entered();
let mut state = TIMELINES_STATE.lock().unwrap();
match state.timelines.get(&zttid) {
@@ -669,7 +667,7 @@ impl GlobalTimelines {
}
/// Get ZTenantTimelineIDs of all active timelines.
pub fn get_active_timelines() -> HashSet<ZTenantTimelineId> {
pub fn get_active_timelines() -> Vec<ZTenantTimelineId> {
let state = TIMELINES_STATE.lock().unwrap();
state
.timelines

View File

@@ -1,222 +0,0 @@
#
# Simple script to export nodes from one pageserver
# and import them into another page server
#
from os import path
import os
import requests
import uuid
import subprocess
import argparse
from pathlib import Path
# directory to save exported tar files to
basepath = path.dirname(path.abspath(__file__))
class NeonPageserverApiException(Exception):
pass
class NeonPageserverHttpClient(requests.Session):
def __init__(self, host, port):
super().__init__()
self.host = host
self.port = port
def verbose_error(self, res: requests.Response):
try:
res.raise_for_status()
except requests.RequestException as e:
try:
msg = res.json()['msg']
except:
msg = ''
raise NeonPageserverApiException(msg) from e
def check_status(self):
self.get(f"http://{self.host}:{self.port}/v1/status").raise_for_status()
def tenant_list(self):
res = self.get(f"http://{self.host}:{self.port}/v1/tenant")
self.verbose_error(res)
res_json = res.json()
assert isinstance(res_json, list)
return res_json
def tenant_create(self, new_tenant_id: uuid.UUID, ok_if_exists):
res = self.post(
f"http://{self.host}:{self.port}/v1/tenant",
json={
'new_tenant_id': new_tenant_id.hex,
},
)
if res.status_code == 409:
if ok_if_exists:
print(f'could not create tenant: already exists for id {new_tenant_id}')
else:
res.raise_for_status()
elif res.status_code == 201:
print(f'created tenant {new_tenant_id}')
else:
self.verbose_error(res)
return new_tenant_id
def timeline_list(self, tenant_id: uuid.UUID):
res = self.get(f"http://{self.host}:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
self.verbose_error(res)
res_json = res.json()
assert isinstance(res_json, list)
return res_json
def main(args: argparse.Namespace):
old_pageserver_host = args.old_pageserver_host
new_pageserver_host = args.new_pageserver_host
tenants = args.tenants
old_http_client = NeonPageserverHttpClient(old_pageserver_host, args.old_pageserver_http_port)
old_http_client.check_status()
old_pageserver_connstr = f"postgresql://{old_pageserver_host}:{args.old_pageserver_pg_port}"
new_http_client = NeonPageserverHttpClient(new_pageserver_host, args.new_pageserver_http_port)
new_http_client.check_status()
new_pageserver_connstr = f"postgresql://{new_pageserver_host}:{args.new_pageserver_pg_port}"
psql_env = {**os.environ, 'LD_LIBRARY_PATH': '/usr/local/lib/'}
for tenant_id in tenants:
print(f"Tenant: {tenant_id}")
timelines = old_http_client.timeline_list(uuid.UUID(tenant_id))
print(f"Timelines: {timelines}")
# Create tenant in new pageserver
if args.only_import is False:
new_http_client.tenant_create(uuid.UUID(tenant_id), args.ok_if_exists)
for timeline in timelines:
# Export timelines from old pageserver
if args.only_import is False:
query = f"fullbackup {timeline['tenant_id']} {timeline['timeline_id']} {timeline['local']['last_record_lsn']}"
cmd = ["psql", "--no-psqlrc", old_pageserver_connstr, "-c", query]
print(f"Running: {cmd}")
tar_filename = path.join(basepath,
f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
stderr_filename = path.join(
basepath, f"{timeline['tenant_id']}_{timeline['timeline_id']}.stderr")
with open(tar_filename, 'w') as stdout_f:
with open(stderr_filename, 'w') as stderr_f:
print(f"(capturing output to {tar_filename})")
subprocess.run(cmd, stdout=stdout_f, stderr=stderr_f, env=psql_env)
print(f"Done export: {tar_filename}")
# Import timelines to new pageserver
psql_path = Path(args.psql_path)
import_cmd = f"import basebackup {timeline['tenant_id']} {timeline['timeline_id']} {timeline['local']['last_record_lsn']} {timeline['local']['last_record_lsn']}"
tar_filename = path.join(basepath,
f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
full_cmd = rf"""cat {tar_filename} | {psql_path} {new_pageserver_connstr} -c '{import_cmd}' """
stderr_filename2 = path.join(
basepath, f"import_{timeline['tenant_id']}_{timeline['timeline_id']}.stderr")
stdout_filename = path.join(
basepath, f"import_{timeline['tenant_id']}_{timeline['timeline_id']}.stdout")
print(f"Running: {full_cmd}")
with open(stdout_filename, 'w') as stdout_f:
with open(stderr_filename2, 'w') as stderr_f:
print(f"(capturing output to {stdout_filename})")
subprocess.run(full_cmd,
stdout=stdout_f,
stderr=stderr_f,
env=psql_env,
shell=True)
print(f"Done import")
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--tenant-id',
dest='tenants',
required=True,
nargs='+',
help='Id of the tenant to migrate. You can pass multiple arguments',
)
parser.add_argument(
'--from-host',
dest='old_pageserver_host',
required=True,
help='Host of the pageserver to migrate data from',
)
parser.add_argument(
'--from-http-port',
dest='old_pageserver_http_port',
required=False,
type=int,
default=9898,
help='HTTP port of the pageserver to migrate data from. Default: 9898',
)
parser.add_argument(
'--from-pg-port',
dest='old_pageserver_pg_port',
required=False,
type=int,
default=6400,
help='pg port of the pageserver to migrate data from. Default: 6400',
)
parser.add_argument(
'--to-host',
dest='new_pageserver_host',
required=True,
help='Host of the pageserver to migrate data to',
)
parser.add_argument(
'--to-http-port',
dest='new_pageserver_http_port',
required=False,
default=9898,
type=int,
help='HTTP port of the pageserver to migrate data to. Default: 9898',
)
parser.add_argument(
'--to-pg-port',
dest='new_pageserver_pg_port',
required=False,
default=6400,
type=int,
help='pg port of the pageserver to migrate data to. Default: 6400',
)
parser.add_argument(
'--ignore-tenant-exists',
dest='ok_if_exists',
required=False,
help=
'Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.',
)
parser.add_argument(
'--psql-path',
dest='psql_path',
required=False,
default='/usr/local/bin/psql',
help='Path to the psql binary. Default: /usr/local/bin/psql',
)
parser.add_argument(
'--only-import',
dest='only_import',
required=False,
default=False,
action='store_true',
help='Skip export and tenant creation part',
)
args = parser.parse_args()
main(args)

View File

@@ -28,10 +28,6 @@ strict = true
# There is some work in progress, though: https://github.com/MagicStack/asyncpg/pull/577
ignore_missing_imports = true
[mypy-pg8000.*]
# Used only in testing clients
ignore_missing_imports = true
[mypy-cached_property.*]
ignore_missing_imports = true

View File

@@ -45,7 +45,7 @@ If you want to run all tests that have the string "bench" in their names:
Useful environment variables:
`NEON_BIN`: The directory where neon binaries can be found.
`ZENITH_BIN`: The directory where zenith binaries can be found.
`POSTGRES_DISTRIB_DIR`: The directory where postgres distribution can be found.
`TEST_OUTPUT`: Set the directory where test state and test output files
should go.

View File

@@ -1,3 +1,6 @@
from contextlib import closing
import psycopg2.extras
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, NeonPageserverApiException

View File

@@ -1,6 +1,8 @@
from contextlib import closing
from uuid import uuid4
from typing import Iterator
from uuid import UUID, uuid4
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
from requests.exceptions import HTTPError
import pytest

View File

@@ -1,9 +1,11 @@
from contextlib import closing, contextmanager
import psycopg2.extras
import pytest
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.neon_fixtures import PgProtocol, NeonEnvBuilder
from fixtures.log_helper import log
import os
import time
import asyncpg
from fixtures.neon_fixtures import Postgres
import threading

View File

@@ -1,6 +1,8 @@
import pytest
from contextlib import closing
from fixtures.neon_fixtures import NeonEnv
from fixtures.log_helper import log
#

View File

@@ -1,3 +1,4 @@
import subprocess
from contextlib import closing
import psycopg2.extras

View File

@@ -1,10 +1,16 @@
import subprocess
from contextlib import closing
import psycopg2.extras
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, PgBin, PortDistributor, VanillaPostgres
from fixtures.neon_fixtures import pg_distrib_dir
import os
from fixtures.utils import subprocess_capture
from fixtures.utils import mkdir_if_needed, subprocess_capture
import shutil
import getpass
import pwd
num_rows = 1000
@@ -40,20 +46,19 @@ def test_fullbackup(neon_env_builder: NeonEnvBuilder,
psql_env = {'LD_LIBRARY_PATH': os.path.join(str(pg_distrib_dir), 'lib')}
# Get and unpack fullbackup from pageserver
restored_dir_path = env.repo_dir / "restored_datadir"
restored_dir_path = os.path.join(env.repo_dir, "restored_datadir")
os.mkdir(restored_dir_path, 0o750)
query = f"fullbackup {env.initial_tenant.hex} {timeline} {lsn}"
cmd = ["psql", "--no-psqlrc", env.pageserver.connstr(), "-c", query]
result_basepath = pg_bin.run_capture(cmd, env=psql_env)
tar_output_file = result_basepath + ".stdout"
subprocess_capture(str(env.repo_dir),
["tar", "-xf", tar_output_file, "-C", str(restored_dir_path)])
subprocess_capture(str(env.repo_dir), ["tar", "-xf", tar_output_file, "-C", restored_dir_path])
# HACK
# fullbackup returns neon specific pg_control and first WAL segment
# use resetwal to overwrite it
pg_resetwal_path = os.path.join(pg_bin.pg_bin_path, 'pg_resetwal')
cmd = [pg_resetwal_path, "-D", str(restored_dir_path)]
cmd = [pg_resetwal_path, "-D", restored_dir_path]
pg_bin.run_capture(cmd, env=psql_env)
# Restore from the backup and find the data we inserted

View File

@@ -191,8 +191,3 @@ def test_import_from_pageserver(test_output_dir, pg_bin, vanilla_pg, neon_env_bu
# Check it's the same as the first fullbackup
# TODO pageserver should be checking checksum
assert os.path.getsize(tar_output_file) == os.path.getsize(new_tar_output_file)
# Check that gc works
psconn = env.pageserver.connect()
pscur = psconn.cursor()
pscur.execute(f"do_gc {tenant.hex} {timeline} 0")

View File

@@ -1,5 +1,5 @@
# It's possible to run any regular test with the local fs remote storage via
# env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/neon_zzz/'}" poetry ......
# env ZENITH_PAGESERVER_OVERRIDES="remote_storage={local_path='/tmp/zenith_zzz/'}" poetry ......
import shutil, os
from contextlib import closing

View File

@@ -1,70 +0,0 @@
from fixtures.neon_fixtures import NeonEnvBuilder, wait_until
from uuid import UUID
import time
def get_only_element(l):
assert len(l) == 1
return l[0]
# Test that gc and compaction tenant tasks start and stop correctly
def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
# The gc and compaction loops don't bother to watch for tenant state
# changes while sleeping, so we use small periods to make this test
# run faster. With default settings we'd have to wait longer for tasks
# to notice state changes and shut down.
# TODO fix this behavior in the pageserver
tenant_config = "{gc_period = '1 s', compaction_period = '1 s'}"
neon_env_builder.pageserver_config_override = f"tenant_config={tenant_config}"
name = "test_tenant_tasks"
env = neon_env_builder.init_start()
client = env.pageserver.http_client()
def get_state(tenant):
all_states = client.tenant_list()
matching = [t for t in all_states if t["id"] == tenant.hex]
return get_only_element(matching)["state"]
def get_metric_value(name):
metrics = client.get_metrics()
relevant = [line for line in metrics.splitlines() if line.startswith(name)]
if len(relevant) == 0:
return 0
line = get_only_element(relevant)
value = line.lstrip(name).strip()
return int(value)
def detach_all_timelines(tenant):
timelines = [UUID(t["timeline_id"]) for t in client.timeline_list(tenant)]
for t in timelines:
client.timeline_detach(tenant, t)
def assert_idle(tenant):
assert get_state(tenant) == "Idle"
# Create tenant, start compute
tenant, _ = env.neon_cli.create_tenant()
timeline = env.neon_cli.create_timeline(name, tenant_id=tenant)
pg = env.postgres.create_start(name, tenant_id=tenant)
assert (get_state(tenant) == "Active")
# Stop compute
pg.stop()
# Detach all tenants and wait for them to go idle
# TODO they should be already idle since there are no active computes
for tenant_info in client.tenant_list():
tenant_id = UUID(tenant_info["id"])
detach_all_timelines(tenant_id)
wait_until(10, 0.2, lambda: assert_idle(tenant_id))
# Assert that all tasks finish quickly after tenants go idle
def assert_tasks_finish():
tasks_started = get_metric_value('pageserver_tenant_task_events{event="start"}')
tasks_ended = get_metric_value('pageserver_tenant_task_events{event="stop"}')
tasks_panicked = get_metric_value('pageserver_tenant_task_events{event="panic"}')
assert tasks_started == tasks_ended
assert tasks_panicked == 0
wait_until(10, 0.2, assert_tasks_finish)

View File

@@ -1,4 +1,3 @@
import pathlib
import pytest
import random
import time
@@ -15,7 +14,7 @@ from dataclasses import dataclass, field
from multiprocessing import Process, Value
from pathlib import Path
from fixtures.neon_fixtures import PgBin, Etcd, Postgres, RemoteStorageUsers, Safekeeper, NeonEnv, NeonEnvBuilder, PortDistributor, SafekeeperPort, neon_binpath, PgProtocol
from fixtures.utils import get_dir_size, lsn_to_hex, lsn_from_hex
from fixtures.utils import get_dir_size, lsn_to_hex, mkdir_if_needed, lsn_from_hex
from fixtures.log_helper import log
from typing import List, Optional, Any
from uuid import uuid4
@@ -646,7 +645,7 @@ class ProposerPostgres(PgProtocol):
def create_dir_config(self, safekeepers: str):
""" Create dir and config for running --sync-safekeepers """
pathlib.Path(self.pg_data_dir_path()).mkdir(exist_ok=True)
mkdir_if_needed(self.pg_data_dir_path())
with open(self.config_file_path(), "w") as f:
cfg = [
"synchronous_standby_names = 'walproposer'\n",
@@ -829,7 +828,7 @@ class SafekeeperEnv:
self.timeline_id = uuid.uuid4()
self.tenant_id = uuid.uuid4()
self.repo_dir.mkdir(exist_ok=True)
mkdir_if_needed(str(self.repo_dir))
# Create config and a Safekeeper object for each safekeeper
self.safekeepers = []
@@ -848,8 +847,8 @@ class SafekeeperEnv:
http=self.port_distributor.get_port(),
)
safekeeper_dir = self.repo_dir / f"sk{i}"
safekeeper_dir.mkdir(exist_ok=True)
safekeeper_dir = os.path.join(self.repo_dir, f"sk{i}")
mkdir_if_needed(safekeeper_dir)
args = [
self.bin_safekeeper,
@@ -858,7 +857,7 @@ class SafekeeperEnv:
"--listen-http",
f"127.0.0.1:{port.http}",
"-D",
str(safekeeper_dir),
safekeeper_dir,
"--id",
str(i),
"--broker-endpoints",

View File

@@ -1,17 +1,19 @@
import os
from pathlib import Path
import subprocess
from fixtures.neon_fixtures import (NeonEnvBuilder,
VanillaPostgres,
PortDistributor,
PgBin,
base_dir,
vanilla_pg,
pg_distrib_dir)
from fixtures.log_helper import log
def test_wal_restore(neon_env_builder: NeonEnvBuilder,
pg_bin: PgBin,
test_output_dir: Path,
test_output_dir,
port_distributor: PortDistributor):
env = neon_env_builder.init_start()
env.neon_cli.create_branch("test_wal_restore")
@@ -20,13 +22,13 @@ def test_wal_restore(neon_env_builder: NeonEnvBuilder,
tenant_id = pg.safe_psql("show neon.tenant_id")[0][0]
env.neon_cli.pageserver_stop()
port = port_distributor.get_port()
data_dir = test_output_dir / 'pgsql.restored'
data_dir = os.path.join(test_output_dir, 'pgsql.restored')
with VanillaPostgres(data_dir, PgBin(test_output_dir), port) as restored:
pg_bin.run_capture([
os.path.join(base_dir, 'libs/utils/scripts/restore_from_wal.sh'),
os.path.join(pg_distrib_dir, 'bin'),
str(test_output_dir / 'repo' / 'safekeepers' / 'sk1' / str(tenant_id) / '*'),
str(data_dir),
os.path.join(test_output_dir, 'repo/safekeepers/sk1/{}/*'.format(tenant_id)),
data_dir,
str(port)
])
restored.start()

View File

@@ -1,13 +1,13 @@
import os
from pathlib import Path
import pytest
from fixtures.utils import mkdir_if_needed
from fixtures.neon_fixtures import NeonEnv, base_dir, pg_distrib_dir
# The isolation tests run for a long time, especially in debug mode,
# so use a larger-than-default timeout.
@pytest.mark.timeout(1800)
def test_isolation(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys):
def test_isolation(neon_simple_env: NeonEnv, test_output_dir, pg_bin, capsys):
env = neon_simple_env
env.neon_cli.create_branch("test_isolation", "empty")
@@ -17,8 +17,9 @@ def test_isolation(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, caps
pg.safe_psql('CREATE DATABASE isolation_regression')
# Create some local directories for pg_isolation_regress to run in.
runpath = test_output_dir / 'regress'
(runpath / 'testtablespace').mkdir(parents=True)
runpath = os.path.join(test_output_dir, 'regress')
mkdir_if_needed(runpath)
mkdir_if_needed(os.path.join(runpath, 'testtablespace'))
# Compute all the file locations that pg_isolation_regress will need.
build_path = os.path.join(pg_distrib_dir, 'build/src/test/isolation')

View File

@@ -1,6 +1,6 @@
import os
from pathlib import Path
from fixtures.utils import mkdir_if_needed
from fixtures.neon_fixtures import (NeonEnv,
check_restored_datadir_content,
base_dir,
@@ -8,7 +8,7 @@ from fixtures.neon_fixtures import (NeonEnv,
from fixtures.log_helper import log
def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, capsys):
def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir, pg_bin, capsys):
env = neon_simple_env
env.neon_cli.create_branch("test_neon_regress", "empty")
@@ -17,8 +17,9 @@ def test_neon_regress(neon_simple_env: NeonEnv, test_output_dir: Path, pg_bin, c
pg.safe_psql('CREATE DATABASE regression')
# Create some local directories for pg_regress to run in.
runpath = test_output_dir / 'regress'
(runpath / 'testtablespace').mkdir(parents=True)
runpath = os.path.join(test_output_dir, 'regress')
mkdir_if_needed(runpath)
mkdir_if_needed(os.path.join(runpath, 'testtablespace'))
# Compute all the file locations that pg_regress will need.
# This test runs neon specific tests

View File

@@ -1,13 +1,13 @@
import os
import pathlib
import pytest
from fixtures.utils import mkdir_if_needed
from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content, base_dir, pg_distrib_dir
# The pg_regress tests run for a long time, especially in debug mode,
# so use a larger-than-default timeout.
@pytest.mark.timeout(1800)
def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_bin, capsys):
def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: str, pg_bin, capsys):
env = neon_simple_env
env.neon_cli.create_branch("test_pg_regress", "empty")
@@ -16,8 +16,9 @@ def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_
pg.safe_psql('CREATE DATABASE regression')
# Create some local directories for pg_regress to run in.
runpath = test_output_dir / 'regress'
(runpath / 'testtablespace').mkdir(parents=True)
runpath = os.path.join(test_output_dir, 'regress')
mkdir_if_needed(runpath)
mkdir_if_needed(os.path.join(runpath, 'testtablespace'))
# Compute all the file locations that pg_regress will need.
build_path = os.path.join(pg_distrib_dir, 'build/src/test/regress')
@@ -50,7 +51,7 @@ def test_pg_regress(neon_simple_env: NeonEnv, test_output_dir: pathlib.Path, pg_
# checkpoint one more time to ensure that the lsn we get is the latest one
pg.safe_psql('CHECKPOINT')
pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0]
lsn = pg.safe_psql('select pg_current_wal_insert_lsn()')[0][0]
# Check that we restore the content of the datadir correctly
check_restored_datadir_content(test_output_dir, env, pg)

View File

@@ -35,7 +35,12 @@ from typing_extensions import Literal
import requests
import backoff # type: ignore
from .utils import (etcd_path, get_self_dir, subprocess_capture, lsn_from_hex, lsn_to_hex)
from .utils import (etcd_path,
get_self_dir,
mkdir_if_needed,
subprocess_capture,
lsn_from_hex,
lsn_to_hex)
from fixtures.log_helper import log
"""
This file contains pytest fixtures. A fixture is a test resource that can be
@@ -45,7 +50,7 @@ A fixture is created with the decorator @pytest.fixture decorator.
See docs: https://docs.pytest.org/en/6.2.x/fixture.html
There are several environment variables that can control the running of tests:
NEON_BIN, POSTGRES_DISTRIB_DIR, etc. See README.md for more information.
ZENITH_BIN, POSTGRES_DISTRIB_DIR, etc. See README.md for more information.
There's no need to import this file to use it. It should be declared as a plugin
inside conftest.py, and that makes it available to all tests.
@@ -122,7 +127,7 @@ def pytest_configure(config):
top_output_dir = env_test_output
else:
top_output_dir = os.path.join(base_dir, DEFAULT_OUTPUT_DIR)
pathlib.Path(top_output_dir).mkdir(exist_ok=True)
mkdir_if_needed(top_output_dir)
# Find the postgres installation.
global pg_distrib_dir
@@ -146,7 +151,7 @@ def pytest_configure(config):
return
# Find the neon binaries.
global neon_binpath
env_neon_bin = os.environ.get('NEON_BIN')
env_neon_bin = os.environ.get('ZENITH_BIN')
if env_neon_bin:
neon_binpath = env_neon_bin
else:
@@ -1311,7 +1316,7 @@ def append_pageserver_param_overrides(
class PgBin:
""" A helper class for executing postgres binaries """
def __init__(self, log_dir: Path):
def __init__(self, log_dir: str):
self.log_dir = log_dir
self.pg_bin_path = os.path.join(str(pg_distrib_dir), 'bin')
self.env = os.environ.copy()
@@ -1362,27 +1367,22 @@ class PgBin:
self._fixpath(command)
log.info('Running command "{}"'.format(' '.join(command)))
env = self._build_env(env)
return subprocess_capture(str(self.log_dir),
command,
env=env,
cwd=cwd,
check=True,
**kwargs)
return subprocess_capture(self.log_dir, command, env=env, cwd=cwd, check=True, **kwargs)
@pytest.fixture(scope='function')
def pg_bin(test_output_dir: Path) -> PgBin:
def pg_bin(test_output_dir: str) -> PgBin:
return PgBin(test_output_dir)
class VanillaPostgres(PgProtocol):
def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True):
def __init__(self, pgdatadir: str, pg_bin: PgBin, port: int, init=True):
super().__init__(host='localhost', port=port, dbname='postgres')
self.pgdatadir = pgdatadir
self.pg_bin = pg_bin
self.running = False
if init:
self.pg_bin.run_capture(['initdb', '-D', str(pgdatadir)])
self.pg_bin.run_capture(['initdb', '-D', pgdatadir])
self.configure([f"port = {port}\n"])
def configure(self, options: List[str]):
@@ -1398,13 +1398,12 @@ class VanillaPostgres(PgProtocol):
if log_path is None:
log_path = os.path.join(self.pgdatadir, "pg.log")
self.pg_bin.run_capture(
['pg_ctl', '-w', '-D', str(self.pgdatadir), '-l', log_path, 'start'])
self.pg_bin.run_capture(['pg_ctl', '-w', '-D', self.pgdatadir, '-l', log_path, 'start'])
def stop(self):
assert self.running
self.running = False
self.pg_bin.run_capture(['pg_ctl', '-w', '-D', str(self.pgdatadir), 'stop'])
self.pg_bin.run_capture(['pg_ctl', '-w', '-D', self.pgdatadir, 'stop'])
def get_subdir_size(self, subdir) -> int:
"""Return size of pgdatadir subdirectory in bytes."""
@@ -1419,9 +1418,9 @@ class VanillaPostgres(PgProtocol):
@pytest.fixture(scope='function')
def vanilla_pg(test_output_dir: Path,
def vanilla_pg(test_output_dir: str,
port_distributor: PortDistributor) -> Iterator[VanillaPostgres]:
pgdatadir = test_output_dir / "pgdata-vanilla"
pgdatadir = os.path.join(test_output_dir, "pgdata-vanilla")
pg_bin = PgBin(test_output_dir)
port = port_distributor.get_port()
with VanillaPostgres(pgdatadir, pg_bin, port) as vanilla_pg:
@@ -1458,7 +1457,7 @@ class RemotePostgres(PgProtocol):
@pytest.fixture(scope='function')
def remote_pg(test_output_dir: Path) -> Iterator[RemotePostgres]:
def remote_pg(test_output_dir: str) -> Iterator[RemotePostgres]:
pg_bin = PgBin(test_output_dir)
connstr = os.getenv("BENCHMARK_CONNSTR")
@@ -1925,12 +1924,9 @@ class Etcd:
datadir: str
port: int
peer_port: int
binary_path: Path = field(init=False)
binary_path: Path = etcd_path()
handle: Optional[subprocess.Popen[Any]] = None # handle of running daemon
def __post_init__(self):
self.binary_path = etcd_path()
def client_url(self):
return f'http://127.0.0.1:{self.port}'
@@ -1984,13 +1980,11 @@ class Etcd:
self.handle.wait()
def get_test_output_dir(request: Any) -> pathlib.Path:
def get_test_output_dir(request: Any) -> str:
""" Compute the working directory for an individual test. """
test_name = request.node.name
test_dir = pathlib.Path(top_output_dir) / test_name.replace("/", "-")
test_dir = os.path.join(str(top_output_dir), test_name)
log.info(f'get_test_output_dir is {test_dir}')
# make mypy happy
assert isinstance(test_dir, pathlib.Path)
return test_dir
@@ -2004,14 +1998,14 @@ def get_test_output_dir(request: Any) -> pathlib.Path:
# this fixture ensures that the directory exists. That works because
# 'autouse' fixtures are run before other fixtures.
@pytest.fixture(scope='function', autouse=True)
def test_output_dir(request: Any) -> pathlib.Path:
def test_output_dir(request: Any) -> str:
""" Create the working directory for an individual test. """
# one directory per test
test_dir = get_test_output_dir(request)
log.info(f'test_output_dir is {test_dir}')
shutil.rmtree(test_dir, ignore_errors=True)
test_dir.mkdir()
mkdir_if_needed(test_dir)
return test_dir
@@ -2057,7 +2051,7 @@ def should_skip_file(filename: str) -> bool:
#
# Test helpers
#
def list_files_to_compare(pgdata_dir: pathlib.Path):
def list_files_to_compare(pgdata_dir: str):
pgdata_files = []
for root, _file, filenames in os.walk(pgdata_dir):
for filename in filenames:
@@ -2074,7 +2068,7 @@ def list_files_to_compare(pgdata_dir: pathlib.Path):
# pg is the existing and running compute node, that we want to compare with a basebackup
def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Postgres):
def check_restored_datadir_content(test_output_dir: str, env: NeonEnv, pg: Postgres):
# Get the timeline ID. We need it for the 'basebackup' command
with closing(pg.connect()) as conn:
@@ -2086,8 +2080,8 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post
pg.stop()
# Take a basebackup from pageserver
restored_dir_path = env.repo_dir / f"{pg.node_name}_restored_datadir"
restored_dir_path.mkdir(exist_ok=True)
restored_dir_path = os.path.join(env.repo_dir, f"{pg.node_name}_restored_datadir")
mkdir_if_needed(restored_dir_path)
pg_bin = PgBin(test_output_dir)
psql_path = os.path.join(pg_bin.pg_bin_path, 'psql')
@@ -2114,7 +2108,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post
# list files we're going to compare
assert pg.pgdata_dir
pgdata_files = list_files_to_compare(pathlib.Path(pg.pgdata_dir))
pgdata_files = list_files_to_compare(pg.pgdata_dir)
restored_files = list_files_to_compare(restored_dir_path)
# check that file sets are equal
@@ -2146,7 +2140,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, pg: Post
assert (mismatch, error) == ([], [])
def wait_until(number_of_iterations: int, interval: float, func):
def wait_until(number_of_iterations: int, interval: int, func):
"""
Wait until 'func' returns successfully, without exception. Returns the last return value
from the the function.

View File

@@ -12,6 +12,18 @@ def get_self_dir() -> str:
return os.path.dirname(os.path.abspath(__file__))
def mkdir_if_needed(path: str) -> None:
""" Create a directory if it doesn't already exist
Note this won't try to create intermediate directories.
"""
try:
os.mkdir(path)
except FileExistsError:
pass
assert os.path.isdir(path)
def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
""" Run a process and capture its output

View File

@@ -80,7 +80,6 @@ def start_heavy_write_workload(env: PgCompare, n_tables: int, scale: int, num_it
thread.join()
@pytest.mark.timeout(1000)
@pytest.mark.parametrize("n_tables", [5])
@pytest.mark.parametrize("scale", get_scales_matrix(5))
@pytest.mark.parametrize("num_iters", [10])
@@ -122,7 +121,6 @@ def start_pgbench_simple_update_workload(env: PgCompare, duration: int):
env.flush()
@pytest.mark.timeout(1000)
@pytest.mark.parametrize("scale", get_scales_matrix(100))
@pytest.mark.parametrize("duration", get_durations_matrix())
def test_pgbench_simple_update_workload(pg_compare: PgCompare, scale: int, duration: int):
@@ -160,7 +158,6 @@ def start_pgbench_intensive_initialization(env: PgCompare, scale: int):
])
@pytest.mark.timeout(1000)
@pytest.mark.parametrize("scale", get_scales_matrix(1000))
def test_pgbench_intensive_init_workload(pg_compare: PgCompare, scale: int):
env = pg_compare

View File

@@ -1,2 +0,0 @@
bin/
obj/

View File

@@ -1,2 +0,0 @@
bin/
obj/

View File

@@ -1,14 +0,0 @@
FROM mcr.microsoft.com/dotnet/sdk:6.0 AS build
WORKDIR /source
COPY *.csproj .
RUN dotnet restore
COPY . .
RUN dotnet publish -c release -o /app --no-restore
FROM mcr.microsoft.com/dotnet/runtime:6.0
WORKDIR /app
COPY --from=build /app .
ENTRYPOINT ["dotnet", "csharp-npgsql.dll"]

View File

@@ -1,19 +0,0 @@
using Npgsql;
var host = Environment.GetEnvironmentVariable("NEON_HOST");
var database = Environment.GetEnvironmentVariable("NEON_DATABASE");
var user = Environment.GetEnvironmentVariable("NEON_USER");
var password = Environment.GetEnvironmentVariable("NEON_PASSWORD");
var connString = $"Host={host};Username={user};Password={password};Database={database}";
await using var conn = new NpgsqlConnection(connString);
await conn.OpenAsync();
await using (var cmd = new NpgsqlCommand("SELECT 1", conn))
await using (var reader = await cmd.ExecuteReaderAsync())
{
while (await reader.ReadAsync())
Console.WriteLine(reader.GetInt32(0));
}
await conn.CloseAsync();

View File

@@ -1,14 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Npgsql" Version="6.0.5" />
</ItemGroup>
</Project>

View File

@@ -1 +0,0 @@

View File

@@ -1,10 +0,0 @@
FROM openjdk:17
WORKDIR /source
COPY . .
WORKDIR /app
RUN curl --output postgresql.jar https://jdbc.postgresql.org/download/postgresql-42.4.0.jar && \
javac -d /app /source/Example.java
CMD ["java", "-cp", "/app/postgresql.jar:.", "Example"]

View File

@@ -1,31 +0,0 @@
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Properties;
public class Example
{
public static void main( String[] args ) throws Exception
{
String host = System.getenv("NEON_HOST");
String database = System.getenv("NEON_DATABASE");
String user = System.getenv("NEON_USER");
String password = System.getenv("NEON_PASSWORD");
String url = "jdbc:postgresql://%s/%s".formatted(host, database);
Properties props = new Properties();
props.setProperty("user", user);
props.setProperty("password", password);
Connection conn = DriverManager.getConnection(url, props);
Statement st = conn.createStatement();
ResultSet rs = st.executeQuery("SELECT 1");
while (rs.next())
{
System.out.println(rs.getString(1));
}
rs.close();
st.close();
}
}

View File

@@ -1,8 +0,0 @@
FROM python:3.10
WORKDIR /source
COPY . .
RUN python3 -m pip install --no-cache-dir -r requirements.txt
CMD ["python3", "asyncpg_example.py"]

View File

@@ -1,30 +0,0 @@
#! /usr/bin/env python3
import asyncio
import os
import asyncpg
async def run(**kwargs) -> asyncpg.Record:
conn = await asyncpg.connect(
**kwargs,
statement_cache_size=0, # Prepared statements doesn't work pgbouncer
)
rv = await conn.fetchrow("SELECT 1")
await conn.close()
return rv
if __name__ == "__main__":
kwargs = {
k.lstrip("NEON_").lower(): v
for k in ("NEON_HOST", "NEON_DATABASE", "NEON_USER", "NEON_PASSWORD")
if (v := os.environ.get(k, None)) is not None
}
loop = asyncio.new_event_loop()
row = loop.run_until_complete(run(**kwargs))
print(row[0])

View File

@@ -1 +0,0 @@
asyncpg==0.25.0

View File

@@ -1,8 +0,0 @@
FROM python:3.10
WORKDIR /source
COPY . .
RUN python3 -m pip install --no-cache-dir -r requirements.txt
CMD ["python3", "pg8000_example.py"]

View File

@@ -1,23 +0,0 @@
#! /usr/bin/env python3
import os
import ssl
import pg8000.dbapi
if __name__ == "__main__":
kwargs = {
k.lstrip("NEON_").lower(): v
for k in ("NEON_HOST", "NEON_DATABASE", "NEON_USER", "NEON_PASSWORD")
if (v := os.environ.get(k, None)) is not None
}
conn = pg8000.dbapi.connect(
**kwargs,
ssl_context=True,
)
cursor = conn.cursor()
cursor.execute("SELECT 1")
row = cursor.fetchone()
print(row[0])
conn.close()

View File

@@ -1 +0,0 @@
pg8000==1.29.1

View File

@@ -1 +0,0 @@
.build/

View File

@@ -1,11 +0,0 @@
FROM swift:5.6 AS build
RUN apt-get -q update && apt-get -q install -y libssl-dev
WORKDIR /source
COPY . .
RUN swift build --configuration release
FROM swift:5.6
WORKDIR /app
COPY --from=build /source/.build/release/release .
CMD ["/app/PostgresClientKitExample"]

View File

@@ -1,41 +0,0 @@
{
"pins" : [
{
"identity" : "bluesocket",
"kind" : "remoteSourceControl",
"location" : "https://github.com/IBM-Swift/BlueSocket.git",
"state" : {
"revision" : "dd924c3bc2c1c144c42b8dda3896f1a03115ded4",
"version" : "2.0.2"
}
},
{
"identity" : "bluesslservice",
"kind" : "remoteSourceControl",
"location" : "https://github.com/IBM-Swift/BlueSSLService",
"state" : {
"revision" : "c249988fb748749739144e7f554710552acdc0bd",
"version" : "2.0.1"
}
},
{
"identity" : "postgresclientkit",
"kind" : "remoteSourceControl",
"location" : "https://github.com/codewinsdotcom/PostgresClientKit.git",
"state" : {
"branch" : "v1.4.3",
"revision" : "beafedaea6dc9f04712e9a8547b77f47c406a47e"
}
},
{
"identity" : "swift-argument-parser",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-argument-parser",
"state" : {
"revision" : "6b2aa2748a7881eebb9f84fb10c01293e15b52ca",
"version" : "0.5.0"
}
}
],
"version" : 2
}

View File

@@ -1,17 +0,0 @@
// swift-tools-version:5.6
import PackageDescription
let package = Package(
name: "PostgresClientKitExample",
dependencies: [
.package(
url: "https://github.com/codewinsdotcom/PostgresClientKit.git",
revision: "v1.4.3"
)
],
targets: [
.target(
name: "PostgresClientKitExample",
dependencies: [ "PostgresClientKit" ])
]
)

View File

@@ -1,38 +0,0 @@
import Foundation
import PostgresClientKit
do {
var configuration = PostgresClientKit.ConnectionConfiguration()
let env = ProcessInfo.processInfo.environment
if let host = env["NEON_HOST"] {
configuration.host = host
}
if let database = env["NEON_DATABASE"] {
configuration.database = database
}
if let user = env["NEON_USER"] {
configuration.user = user
}
if let password = env["NEON_PASSWORD"] {
configuration.credential = .scramSHA256(password: password)
}
let connection = try PostgresClientKit.Connection(configuration: configuration)
defer { connection.close() }
let text = "SELECT 1;"
let statement = try connection.prepareStatement(text: text)
defer { statement.close() }
let cursor = try statement.execute(parameterValues: [ ])
defer { cursor.close() }
for row in cursor {
let columns = try row.get().columns
print(columns[0])
}
} catch {
print(error)
}

View File

@@ -1,54 +0,0 @@
import os
import shutil
import subprocess
from pathlib import Path
from tempfile import NamedTemporaryFile
from urllib.parse import urlparse
import pytest
from fixtures.neon_fixtures import RemotePostgres
@pytest.mark.remote_cluster
@pytest.mark.parametrize(
"client",
[
"csharp/npgsql",
"java/jdbc",
"python/asyncpg",
pytest.param(
"python/pg8000", # See https://github.com/neondatabase/neon/pull/2008#discussion_r912264281
marks=pytest.mark.xfail(reason="Handles SSL in incompatible with Neon way")),
pytest.param(
"swift/PostgresClientKit", # See https://github.com/neondatabase/neon/pull/2008#discussion_r911896592
marks=pytest.mark.xfail(reason="Neither SNI nor parameters is supported")),
"typescript/postgresql-client",
],
)
def test_pg_clients(remote_pg: RemotePostgres, client: str):
conn_options = remote_pg.conn_options()
env_file = None
with NamedTemporaryFile(mode="w", delete=False) as f:
env_file = f.name
f.write(f"""
NEON_HOST={conn_options["host"]}
NEON_DATABASE={conn_options["dbname"]}
NEON_USER={conn_options["user"]}
NEON_PASSWORD={conn_options["password"]}
""")
image_tag = client.lower()
docker_bin = shutil.which("docker")
if docker_bin is None:
raise RuntimeError("docker is required for running this test")
build_cmd = [
docker_bin, "build", "--quiet", "--tag", image_tag, f"{Path(__file__).parent / client}"
]
run_cmd = [docker_bin, "run", "--rm", "--env-file", env_file, image_tag]
subprocess.run(build_cmd, check=True)
result = subprocess.run(run_cmd, check=True, capture_output=True, text=True)
assert result.stdout.strip() == "1"

View File

@@ -1 +0,0 @@
node_modules/

View File

@@ -1,7 +0,0 @@
FROM node:16
WORKDIR /source
COPY . .
RUN npm clean-install
CMD ["/source/index.js"]

View File

@@ -1,25 +0,0 @@
#! /usr/bin/env node
import {Connection} from 'postgresql-client';
const params = {
"host": process.env.NEON_HOST,
"database": process.env.NEON_DATABASE,
"user": process.env.NEON_USER,
"password": process.env.NEON_PASSWORD,
"ssl": true,
}
for (const key in params) {
if (params[key] === undefined) {
delete params[key];
}
}
const connection = new Connection(params);
await connection.connect();
const result = await connection.query(
'select 1'
);
const rows = result.rows;
await connection.close();
console.log(rows[0][0]);

View File

@@ -1,262 +0,0 @@
{
"name": "typescript",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"dependencies": {
"postgresql-client": "^2.1.3"
}
},
"node_modules/debug": {
"version": "4.3.4",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
"integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
"dependencies": {
"ms": "2.1.2"
},
"engines": {
"node": ">=6.0"
},
"peerDependenciesMeta": {
"supports-color": {
"optional": true
}
}
},
"node_modules/doublylinked": {
"version": "2.5.1",
"resolved": "https://registry.npmjs.org/doublylinked/-/doublylinked-2.5.1.tgz",
"integrity": "sha512-Lpqb+qyHpR5Bew8xfKsxVYdjXEYAQ7HLp1IX47kHKmVCZeXErInytonjkL+kE+L4yaKSYEmDNR9MJYr5zwuAKA==",
"engines": {
"node": ">= 10.0"
}
},
"node_modules/lightning-pool": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/lightning-pool/-/lightning-pool-3.1.3.tgz",
"integrity": "sha512-OgWuoh0BBrikWx/mc/XwIKwC9HHTe/GU3XODLMBPibv7jv8u0o2gQFS7KVEg5U8Oufg6N7mkm8Y1RoiLER0zeQ==",
"dependencies": {
"doublylinked": "^2.4.3",
"putil-promisify": "^1.8.2"
},
"engines": {
"node": ">= 10.0"
}
},
"node_modules/ms": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
},
"node_modules/obuf": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/obuf/-/obuf-1.1.2.tgz",
"integrity": "sha512-PX1wu0AmAdPqOL1mWhqmlOd8kOIZQwGZw6rh7uby9fTc5lhaOWFLX3I6R1hrF9k3zUY40e6igsLGkDXK92LJNg=="
},
"node_modules/postgres-bytea": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-3.0.0.tgz",
"integrity": "sha512-CNd4jim9RFPkObHSjVHlVrxoVQXz7quwNFpz7RY1okNNme49+sVyiTvTRobiLV548Hx/hb1BG+iE7h9493WzFw==",
"dependencies": {
"obuf": "~1.1.2"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/postgresql-client": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/postgresql-client/-/postgresql-client-2.1.3.tgz",
"integrity": "sha512-36Ga6JzhydsRzcCRcA/Y2hrX9C9sI0wS6sgRNBlOGkOwACXQVybmhDM7mAUbi9cT00N39Ee7btR0eMCyD//5Xg==",
"dependencies": {
"debug": "^4.3.4",
"doublylinked": "^2.5.1",
"lightning-pool": "^3.1.3",
"postgres-bytea": "^3.0.0",
"power-tasks": "^0.8.0",
"putil-merge": "^3.8.0",
"putil-promisify": "^1.8.5",
"putil-varhelpers": "^1.6.4"
},
"engines": {
"node": ">=14.0",
"npm": ">=7.0.0"
}
},
"node_modules/power-tasks": {
"version": "0.8.0",
"resolved": "https://registry.npmjs.org/power-tasks/-/power-tasks-0.8.0.tgz",
"integrity": "sha512-HhMcx+y5UkzlEmKslruz8uAU2Yq8CODJsFEMFsYMrGp5EzKpkNHGu0RNvBqyewKJDZHPNKtBSULsEAxMqQIBVQ==",
"dependencies": {
"debug": "^4.3.4",
"doublylinked": "^2.5.1",
"strict-typed-events": "^2.2.0"
},
"engines": {
"node": ">=14.0",
"npm": ">=7.0.0"
}
},
"node_modules/putil-merge": {
"version": "3.8.0",
"resolved": "https://registry.npmjs.org/putil-merge/-/putil-merge-3.8.0.tgz",
"integrity": "sha512-5tXPafJawWFoYZWLhkYXZ7IC/qkI45HgJsgv36lJBeq3qjFZfUITZE01CmWUBIlIn9f1yDiikqgYERARhVmgrg==",
"engines": {
"node": ">= 10.0"
}
},
"node_modules/putil-promisify": {
"version": "1.8.5",
"resolved": "https://registry.npmjs.org/putil-promisify/-/putil-promisify-1.8.5.tgz",
"integrity": "sha512-DItclasWWZokvpq3Aiaq0iV7WC8isP/0o/8mhC0yV6CQ781N/7NQHA1VyOm6hfpeFEwIQoo1C4Yjc5eH0q6Jbw==",
"engines": {
"node": ">= 6.0"
}
},
"node_modules/putil-varhelpers": {
"version": "1.6.4",
"resolved": "https://registry.npmjs.org/putil-varhelpers/-/putil-varhelpers-1.6.4.tgz",
"integrity": "sha512-nM2nO1HS2yJUyPgz0grd2XZAM0Spr6/tt6F4xXeNDjByV00BV2mq6lZ+sDff8WIfQBI9Hn1Czh93H1xBvKESxw==",
"engines": {
"node": ">= 6.0"
}
},
"node_modules/strict-typed-events": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/strict-typed-events/-/strict-typed-events-2.2.0.tgz",
"integrity": "sha512-yvHRtEfRRV7TJWi9cLhMt4Mb12JtAwXXONltUlLCA3fRB0LRy94B4E4e2gIlXzT5nZHTZVpOjJNOshri3LZ5bw==",
"dependencies": {
"putil-promisify": "^1.8.5",
"ts-gems": "^2.0.0"
},
"engines": {
"node": ">=14.0"
}
},
"node_modules/ts-gems": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/ts-gems/-/ts-gems-2.1.0.tgz",
"integrity": "sha512-5IqiG4nq1tsOhYPc4CwxA6bsE+TfU6uAABzf6bH4sdElgXpt/mlStvIYedvvtU7BM1+RRJxCaTLaaVFcCqNaiA==",
"peerDependencies": {
"typescript": ">=4.0.0"
}
},
"node_modules/typescript": {
"version": "4.7.4",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-4.7.4.tgz",
"integrity": "sha512-C0WQT0gezHuw6AdY1M2jxUO83Rjf0HP7Sk1DtXj6j1EwkQNZrHAg2XPWlq62oqEhYvONq5pkC2Y9oPljWToLmQ==",
"peer": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
},
"engines": {
"node": ">=4.2.0"
}
}
},
"dependencies": {
"debug": {
"version": "4.3.4",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
"integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
"requires": {
"ms": "2.1.2"
}
},
"doublylinked": {
"version": "2.5.1",
"resolved": "https://registry.npmjs.org/doublylinked/-/doublylinked-2.5.1.tgz",
"integrity": "sha512-Lpqb+qyHpR5Bew8xfKsxVYdjXEYAQ7HLp1IX47kHKmVCZeXErInytonjkL+kE+L4yaKSYEmDNR9MJYr5zwuAKA=="
},
"lightning-pool": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/lightning-pool/-/lightning-pool-3.1.3.tgz",
"integrity": "sha512-OgWuoh0BBrikWx/mc/XwIKwC9HHTe/GU3XODLMBPibv7jv8u0o2gQFS7KVEg5U8Oufg6N7mkm8Y1RoiLER0zeQ==",
"requires": {
"doublylinked": "^2.4.3",
"putil-promisify": "^1.8.2"
}
},
"ms": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
},
"obuf": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/obuf/-/obuf-1.1.2.tgz",
"integrity": "sha512-PX1wu0AmAdPqOL1mWhqmlOd8kOIZQwGZw6rh7uby9fTc5lhaOWFLX3I6R1hrF9k3zUY40e6igsLGkDXK92LJNg=="
},
"postgres-bytea": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-3.0.0.tgz",
"integrity": "sha512-CNd4jim9RFPkObHSjVHlVrxoVQXz7quwNFpz7RY1okNNme49+sVyiTvTRobiLV548Hx/hb1BG+iE7h9493WzFw==",
"requires": {
"obuf": "~1.1.2"
}
},
"postgresql-client": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/postgresql-client/-/postgresql-client-2.1.3.tgz",
"integrity": "sha512-36Ga6JzhydsRzcCRcA/Y2hrX9C9sI0wS6sgRNBlOGkOwACXQVybmhDM7mAUbi9cT00N39Ee7btR0eMCyD//5Xg==",
"requires": {
"debug": "^4.3.4",
"doublylinked": "^2.5.1",
"lightning-pool": "^3.1.3",
"postgres-bytea": "^3.0.0",
"power-tasks": "^0.8.0",
"putil-merge": "^3.8.0",
"putil-promisify": "^1.8.5",
"putil-varhelpers": "^1.6.4"
}
},
"power-tasks": {
"version": "0.8.0",
"resolved": "https://registry.npmjs.org/power-tasks/-/power-tasks-0.8.0.tgz",
"integrity": "sha512-HhMcx+y5UkzlEmKslruz8uAU2Yq8CODJsFEMFsYMrGp5EzKpkNHGu0RNvBqyewKJDZHPNKtBSULsEAxMqQIBVQ==",
"requires": {
"debug": "^4.3.4",
"doublylinked": "^2.5.1",
"strict-typed-events": "^2.2.0"
}
},
"putil-merge": {
"version": "3.8.0",
"resolved": "https://registry.npmjs.org/putil-merge/-/putil-merge-3.8.0.tgz",
"integrity": "sha512-5tXPafJawWFoYZWLhkYXZ7IC/qkI45HgJsgv36lJBeq3qjFZfUITZE01CmWUBIlIn9f1yDiikqgYERARhVmgrg=="
},
"putil-promisify": {
"version": "1.8.5",
"resolved": "https://registry.npmjs.org/putil-promisify/-/putil-promisify-1.8.5.tgz",
"integrity": "sha512-DItclasWWZokvpq3Aiaq0iV7WC8isP/0o/8mhC0yV6CQ781N/7NQHA1VyOm6hfpeFEwIQoo1C4Yjc5eH0q6Jbw=="
},
"putil-varhelpers": {
"version": "1.6.4",
"resolved": "https://registry.npmjs.org/putil-varhelpers/-/putil-varhelpers-1.6.4.tgz",
"integrity": "sha512-nM2nO1HS2yJUyPgz0grd2XZAM0Spr6/tt6F4xXeNDjByV00BV2mq6lZ+sDff8WIfQBI9Hn1Czh93H1xBvKESxw=="
},
"strict-typed-events": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/strict-typed-events/-/strict-typed-events-2.2.0.tgz",
"integrity": "sha512-yvHRtEfRRV7TJWi9cLhMt4Mb12JtAwXXONltUlLCA3fRB0LRy94B4E4e2gIlXzT5nZHTZVpOjJNOshri3LZ5bw==",
"requires": {
"putil-promisify": "^1.8.5",
"ts-gems": "^2.0.0"
}
},
"ts-gems": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/ts-gems/-/ts-gems-2.1.0.tgz",
"integrity": "sha512-5IqiG4nq1tsOhYPc4CwxA6bsE+TfU6uAABzf6bH4sdElgXpt/mlStvIYedvvtU7BM1+RRJxCaTLaaVFcCqNaiA==",
"requires": {}
},
"typescript": {
"version": "4.7.4",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-4.7.4.tgz",
"integrity": "sha512-C0WQT0gezHuw6AdY1M2jxUO83Rjf0HP7Sk1DtXj6j1EwkQNZrHAg2XPWlq62oqEhYvONq5pkC2Y9oPljWToLmQ==",
"peer": true
}
}
}

View File

@@ -1,6 +0,0 @@
{
"type": "module",
"dependencies": {
"postgresql-client": "^2.1.3"
}
}