mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-17 21:20:37 +00:00
Compare commits
113 Commits
separate-p
...
add-test-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f1907beb50 | ||
|
|
37d90dc3b3 | ||
|
|
a185821d6f | ||
|
|
f99ccb5041 | ||
|
|
2db675a2f2 | ||
|
|
77a2bdf3d7 | ||
|
|
976576ae59 | ||
|
|
1a07ddae5f | ||
|
|
9bc12f7444 | ||
|
|
92bdf04758 | ||
|
|
67e091c906 | ||
|
|
dc102197df | ||
|
|
262cdf8344 | ||
|
|
3b819ee159 | ||
|
|
e9a3499e87 | ||
|
|
3414feae03 | ||
|
|
e94a5ce360 | ||
|
|
d5ec84b87b | ||
|
|
b21f7382cc | ||
|
|
648e8bbefe | ||
|
|
9218426e41 | ||
|
|
1d4114183c | ||
|
|
4cde0e7a37 | ||
|
|
83f7b8ed22 | ||
|
|
b8f0f37de2 | ||
|
|
18f251384d | ||
|
|
4cddb0f1a4 | ||
|
|
7b12deead7 | ||
|
|
63a72d99bb | ||
|
|
116ecdf87a | ||
|
|
431393e361 | ||
|
|
f38f45b01d | ||
|
|
a5154dce3e | ||
|
|
da5f8486ce | ||
|
|
ad08c273d3 | ||
|
|
7f97269277 | ||
|
|
6d99b4f1d8 | ||
|
|
a7bf60631f | ||
|
|
07bb7a2afe | ||
|
|
142e247e85 | ||
|
|
7da47d8a0a | ||
|
|
dc52436a8f | ||
|
|
995a2de21e | ||
|
|
e593cbaaba | ||
|
|
4b9e02be45 | ||
|
|
7a36d06cc2 | ||
|
|
4227cfc96e | ||
|
|
1fc761983f | ||
|
|
227d47d2f3 | ||
|
|
0290893bcc | ||
|
|
32fd709b34 | ||
|
|
3a9bff81db | ||
|
|
743370de98 | ||
|
|
cdfa9fe705 | ||
|
|
7cd68a0c27 | ||
|
|
beaa991f81 | ||
|
|
9430abae05 | ||
|
|
4da4c7f769 | ||
|
|
0d14d4a1a8 | ||
|
|
8c8431ebc6 | ||
|
|
84d1bc06a9 | ||
|
|
5133db44e1 | ||
|
|
4cb1074fe5 | ||
|
|
0a958b0ea1 | ||
|
|
1bbc8090f3 | ||
|
|
f7d8db7e39 | ||
|
|
e54941b811 | ||
|
|
52ce1c9d53 | ||
|
|
bc2cb5382b | ||
|
|
5f71aa09d3 | ||
|
|
b4f2c5b514 | ||
|
|
71f39bac3d | ||
|
|
177d5b1f22 | ||
|
|
8ba41b8c18 | ||
|
|
1edf3eb2c8 | ||
|
|
0ebb6bc4b0 | ||
|
|
092a9b74d3 | ||
|
|
e73b95a09d | ||
|
|
539007c173 | ||
|
|
d0494c391a | ||
|
|
2af5a96f0d | ||
|
|
9733b24f4a | ||
|
|
d865892a06 | ||
|
|
a0f76253f8 | ||
|
|
02afa2762c | ||
|
|
d903dd61bd | ||
|
|
417d9e9db2 | ||
|
|
6ace347175 | ||
|
|
14a027cce5 | ||
|
|
09ddd34b2a | ||
|
|
aeb3f0ea07 | ||
|
|
58b04438f0 | ||
|
|
01f1f1c1bf | ||
|
|
6a664629fa | ||
|
|
f6f29f58cd | ||
|
|
fd46e52e00 | ||
|
|
d6f12cff8e | ||
|
|
5a4394a8df | ||
|
|
d301b8364c | ||
|
|
172314155e | ||
|
|
28243d68e6 | ||
|
|
45680f9a2d | ||
|
|
5f4ccae5c5 | ||
|
|
39c59b8df5 | ||
|
|
9dcb9ca3da | ||
|
|
e308265e42 | ||
|
|
ed102f44d9 | ||
|
|
572ae74388 | ||
|
|
b445cf7665 | ||
|
|
cc680dd81c | ||
|
|
f4233fde39 | ||
|
|
b4c74c0ecd | ||
|
|
abff15dd7c |
@@ -1,369 +0,0 @@
|
||||
version: 2.1
|
||||
|
||||
executors:
|
||||
neon-xlarge-executor:
|
||||
resource_class: xlarge
|
||||
docker:
|
||||
# NB: when changed, do not forget to update rust image tag in all Dockerfiles
|
||||
- image: neondatabase/rust:1.58
|
||||
neon-executor:
|
||||
docker:
|
||||
- image: neondatabase/rust:1.58
|
||||
|
||||
jobs:
|
||||
# A job to build postgres
|
||||
build-postgres:
|
||||
executor: neon-xlarge-executor
|
||||
parameters:
|
||||
build_type:
|
||||
type: enum
|
||||
enum: ["debug", "release"]
|
||||
environment:
|
||||
BUILD_TYPE: << parameters.build_type >>
|
||||
steps:
|
||||
# Checkout the git repo (circleci doesn't have a flag to enable submodules here)
|
||||
- checkout
|
||||
|
||||
# Grab the postgres git revision to build a cache key.
|
||||
# Append makefile as it could change the way postgres is built.
|
||||
# Note this works even though the submodule hasn't been checkout out yet.
|
||||
- run:
|
||||
name: Get postgres cache key
|
||||
command: |
|
||||
git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
|
||||
cat Makefile >> /tmp/cache-key-postgres
|
||||
|
||||
- restore_cache:
|
||||
name: Restore postgres cache
|
||||
keys:
|
||||
# Restore ONLY if the rev key matches exactly
|
||||
- v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
|
||||
|
||||
# Build postgres if the restore_cache didn't find a build.
|
||||
# `make` can't figure out whether the cache is valid, since
|
||||
# it only compares file timestamps.
|
||||
- run:
|
||||
name: build postgres
|
||||
command: |
|
||||
if [ ! -e tmp_install/bin/postgres ]; then
|
||||
# "depth 1" saves some time by not cloning the whole repo
|
||||
git submodule update --init --depth 1
|
||||
# bail out on any warnings
|
||||
COPT='-Werror' mold -run make postgres -j$(nproc)
|
||||
fi
|
||||
|
||||
- save_cache:
|
||||
name: Save postgres cache
|
||||
key: v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
|
||||
paths:
|
||||
- tmp_install
|
||||
|
||||
# A job to build Neon rust code
|
||||
build-neon:
|
||||
executor: neon-xlarge-executor
|
||||
parameters:
|
||||
build_type:
|
||||
type: enum
|
||||
enum: ["debug", "release"]
|
||||
environment:
|
||||
BUILD_TYPE: << parameters.build_type >>
|
||||
steps:
|
||||
# Checkout the git repo (without submodules)
|
||||
- checkout
|
||||
|
||||
# Grab the postgres git revision to build a cache key.
|
||||
# Append makefile as it could change the way postgres is built.
|
||||
# Note this works even though the submodule hasn't been checkout out yet.
|
||||
- run:
|
||||
name: Get postgres cache key
|
||||
command: |
|
||||
git rev-parse HEAD:vendor/postgres > /tmp/cache-key-postgres
|
||||
cat Makefile >> /tmp/cache-key-postgres
|
||||
|
||||
|
||||
- restore_cache:
|
||||
name: Restore postgres cache
|
||||
keys:
|
||||
# Restore ONLY if the rev key matches exactly
|
||||
- v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
|
||||
|
||||
- restore_cache:
|
||||
name: Restore rust cache
|
||||
keys:
|
||||
# Require an exact match. While an out of date cache might speed up the build,
|
||||
# there's no way to clean out old packages, so the cache grows every time something
|
||||
# changes.
|
||||
- v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||
|
||||
# Build the rust code, including test binaries
|
||||
- run:
|
||||
name: Rust build << parameters.build_type >>
|
||||
command: |
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
CARGO_FLAGS=
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
CARGO_FLAGS="--release --features profiling"
|
||||
fi
|
||||
|
||||
export CARGO_INCREMENTAL=0
|
||||
export CACHEPOT_BUCKET=zenith-rust-cachepot
|
||||
export RUSTC_WRAPPER=""
|
||||
export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
|
||||
export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
|
||||
mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
|
||||
cachepot -s
|
||||
|
||||
- save_cache:
|
||||
name: Save rust cache
|
||||
key: v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||
paths:
|
||||
- ~/.cargo/registry
|
||||
- ~/.cargo/git
|
||||
- target
|
||||
|
||||
# Run rust unit tests
|
||||
- run:
|
||||
name: cargo test
|
||||
command: |
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
CARGO_FLAGS=
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
CARGO_FLAGS=--release
|
||||
fi
|
||||
|
||||
cargo test $CARGO_FLAGS
|
||||
|
||||
# Install the rust binaries, for use by test jobs
|
||||
- run:
|
||||
name: Install rust binaries
|
||||
command: |
|
||||
binaries=$(
|
||||
cargo metadata --format-version=1 --no-deps |
|
||||
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
||||
)
|
||||
|
||||
mkdir -p /tmp/zenith/bin
|
||||
mkdir -p /tmp/zenith/test_bin
|
||||
mkdir -p /tmp/zenith/etc
|
||||
|
||||
# Install target binaries
|
||||
for bin in $binaries; do
|
||||
SRC=target/$BUILD_TYPE/$bin
|
||||
DST=/tmp/zenith/bin/$bin
|
||||
cp $SRC $DST
|
||||
done
|
||||
|
||||
# Install the postgres binaries, for use by test jobs
|
||||
- run:
|
||||
name: Install postgres binaries
|
||||
command: |
|
||||
cp -a tmp_install /tmp/zenith/pg_install
|
||||
|
||||
# Save rust binaries for other jobs in the workflow
|
||||
- persist_to_workspace:
|
||||
root: /tmp/zenith
|
||||
paths:
|
||||
- "*"
|
||||
|
||||
check-codestyle-python:
|
||||
executor: neon-executor
|
||||
steps:
|
||||
- checkout
|
||||
- restore_cache:
|
||||
keys:
|
||||
- v2-python-deps-{{ checksum "poetry.lock" }}
|
||||
- run:
|
||||
name: Install deps
|
||||
command: ./scripts/pysync
|
||||
- save_cache:
|
||||
key: v2-python-deps-{{ checksum "poetry.lock" }}
|
||||
paths:
|
||||
- /home/circleci/.cache/pypoetry/virtualenvs
|
||||
- run:
|
||||
name: Print versions
|
||||
when: always
|
||||
command: |
|
||||
poetry run python --version
|
||||
poetry show
|
||||
- run:
|
||||
name: Run yapf to ensure code format
|
||||
when: always
|
||||
command: poetry run yapf --recursive --diff .
|
||||
- run:
|
||||
name: Run mypy to check types
|
||||
when: always
|
||||
command: poetry run mypy .
|
||||
|
||||
run-pytest:
|
||||
executor: neon-executor
|
||||
parameters:
|
||||
# pytest args to specify the tests to run.
|
||||
#
|
||||
# This can be a test file name, e.g. 'test_pgbench.py, or a subdirectory,
|
||||
# or '-k foobar' to run tests containing string 'foobar'. See pytest man page
|
||||
# section SPECIFYING TESTS / SELECTING TESTS for details.
|
||||
#
|
||||
# Select the type of Rust build. Must be "release" or "debug".
|
||||
build_type:
|
||||
type: string
|
||||
default: "debug"
|
||||
# This parameter is required, to prevent the mistake of running all tests in one job.
|
||||
test_selection:
|
||||
type: string
|
||||
default: ""
|
||||
# Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr
|
||||
extra_params:
|
||||
type: string
|
||||
default: ""
|
||||
needs_postgres_source:
|
||||
type: boolean
|
||||
default: false
|
||||
run_in_parallel:
|
||||
type: boolean
|
||||
default: true
|
||||
save_perf_report:
|
||||
type: boolean
|
||||
default: false
|
||||
environment:
|
||||
BUILD_TYPE: << parameters.build_type >>
|
||||
steps:
|
||||
- attach_workspace:
|
||||
at: /tmp/zenith
|
||||
- checkout
|
||||
- when:
|
||||
condition: << parameters.needs_postgres_source >>
|
||||
steps:
|
||||
- run: git submodule update --init --depth 1
|
||||
- restore_cache:
|
||||
keys:
|
||||
- v2-python-deps-{{ checksum "poetry.lock" }}
|
||||
- run:
|
||||
name: Install deps
|
||||
command: ./scripts/pysync
|
||||
- save_cache:
|
||||
key: v2-python-deps-{{ checksum "poetry.lock" }}
|
||||
paths:
|
||||
- /home/circleci/.cache/pypoetry/virtualenvs
|
||||
- run:
|
||||
name: Run pytest
|
||||
# pytest doesn't output test logs in real time, so CI job may fail with
|
||||
# `Too long with no output` error, if a test is running for a long time.
|
||||
# In that case, tests should have internal timeouts that are less than
|
||||
# no_output_timeout, specified here.
|
||||
no_output_timeout: 10m
|
||||
environment:
|
||||
- NEON_BIN: /tmp/zenith/bin
|
||||
- POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
|
||||
- TEST_OUTPUT: /tmp/test_output
|
||||
# this variable will be embedded in perf test report
|
||||
# and is needed to distinguish different environments
|
||||
- PLATFORM: zenith-local-ci
|
||||
command: |
|
||||
PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
|
||||
rm -rf $PERF_REPORT_DIR
|
||||
|
||||
TEST_SELECTION="test_runner/<< parameters.test_selection >>"
|
||||
EXTRA_PARAMS="<< parameters.extra_params >>"
|
||||
if [ -z "$TEST_SELECTION" ]; then
|
||||
echo "test_selection must be set"
|
||||
exit 1
|
||||
fi
|
||||
if << parameters.run_in_parallel >>; then
|
||||
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
|
||||
fi
|
||||
if << parameters.save_perf_report >>; then
|
||||
if [[ $CIRCLE_BRANCH == "main" ]]; then
|
||||
mkdir -p "$PERF_REPORT_DIR"
|
||||
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
|
||||
fi
|
||||
fi
|
||||
|
||||
export GITHUB_SHA=$CIRCLE_SHA1
|
||||
|
||||
# Run the tests.
|
||||
#
|
||||
# The junit.xml file allows CircleCI to display more fine-grained test information
|
||||
# in its "Tests" tab in the results page.
|
||||
# --verbose prints name of each test (helpful when there are
|
||||
# multiple tests in one file)
|
||||
# -rA prints summary in the end
|
||||
# -n4 uses four processes to run tests via pytest-xdist
|
||||
# -s is not used to prevent pytest from capturing output, because tests are running
|
||||
# in parallel and logs are mixed between different tests
|
||||
./scripts/pytest \
|
||||
--junitxml=$TEST_OUTPUT/junit.xml \
|
||||
--tb=short \
|
||||
--verbose \
|
||||
-m "not remote_cluster" \
|
||||
-rA $TEST_SELECTION $EXTRA_PARAMS
|
||||
|
||||
if << parameters.save_perf_report >>; then
|
||||
if [[ $CIRCLE_BRANCH == "main" ]]; then
|
||||
export REPORT_FROM="$PERF_REPORT_DIR"
|
||||
export REPORT_TO=local
|
||||
scripts/generate_and_push_perf_report.sh
|
||||
fi
|
||||
fi
|
||||
- run:
|
||||
# CircleCI artifacts are preserved one file at a time, so skipping
|
||||
# this step isn't a good idea. If you want to extract the
|
||||
# pageserver state, perhaps a tarball would be a better idea.
|
||||
name: Delete all data but logs
|
||||
when: always
|
||||
command: |
|
||||
du -sh /tmp/test_output/*
|
||||
find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
|
||||
du -sh /tmp/test_output/*
|
||||
- store_artifacts:
|
||||
path: /tmp/test_output
|
||||
# The store_test_results step tells CircleCI where to find the junit.xml file.
|
||||
- store_test_results:
|
||||
path: /tmp/test_output
|
||||
# Save data (if any)
|
||||
- persist_to_workspace:
|
||||
root: /tmp/zenith
|
||||
paths:
|
||||
- "*"
|
||||
|
||||
workflows:
|
||||
build_and_test:
|
||||
jobs:
|
||||
- check-codestyle-python
|
||||
- build-postgres:
|
||||
name: build-postgres-<< matrix.build_type >>
|
||||
matrix:
|
||||
parameters:
|
||||
build_type: ["debug", "release"]
|
||||
- build-neon:
|
||||
name: build-neon-<< matrix.build_type >>
|
||||
matrix:
|
||||
parameters:
|
||||
build_type: ["debug", "release"]
|
||||
requires:
|
||||
- build-postgres-<< matrix.build_type >>
|
||||
- run-pytest:
|
||||
name: pg_regress-tests-<< matrix.build_type >>
|
||||
matrix:
|
||||
parameters:
|
||||
build_type: ["debug", "release"]
|
||||
test_selection: batch_pg_regress
|
||||
needs_postgres_source: true
|
||||
requires:
|
||||
- build-neon-<< matrix.build_type >>
|
||||
- run-pytest:
|
||||
name: other-tests-<< matrix.build_type >>
|
||||
matrix:
|
||||
parameters:
|
||||
build_type: ["debug", "release"]
|
||||
test_selection: batch_others
|
||||
requires:
|
||||
- build-neon-<< matrix.build_type >>
|
||||
- run-pytest:
|
||||
name: benchmarks
|
||||
context: PERF_TEST_RESULT_CONNSTR
|
||||
build_type: release
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: true
|
||||
requires:
|
||||
- build-neon-release
|
||||
56
.github/actions/download/action.yml
vendored
Normal file
56
.github/actions/download/action.yml
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
name: "Download an artifact"
|
||||
description: "Custom download action"
|
||||
inputs:
|
||||
name:
|
||||
description: "Artifact name"
|
||||
required: true
|
||||
path:
|
||||
description: "A directory to put artifact into"
|
||||
default: "."
|
||||
required: false
|
||||
skip-if-does-not-exist:
|
||||
description: "Allow to skip if file doesn't exist, fail otherwise"
|
||||
default: false
|
||||
required: false
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Download artifact
|
||||
id: download-artifact
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
TARGET: ${{ inputs.path }}
|
||||
ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
|
||||
SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
|
||||
run: |
|
||||
BUCKET=neon-github-public-dev
|
||||
PREFIX=artifacts/${GITHUB_RUN_ID}
|
||||
FILENAME=$(basename $ARCHIVE)
|
||||
|
||||
S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
|
||||
if [ -z "${S3_KEY}" ]; then
|
||||
if [ "${SKIP_IF_DOES_NOT_EXIST}" = "true" ]; then
|
||||
echo '::set-output name=SKIPPED::true'
|
||||
exit 0
|
||||
else
|
||||
echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME} nor its version from previous attempts exist"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo '::set-output name=SKIPPED::false'
|
||||
|
||||
mkdir -p $(dirname $ARCHIVE)
|
||||
time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} ${ARCHIVE}
|
||||
|
||||
- name: Extract artifact
|
||||
if: ${{ steps.download-artifact.outputs.SKIPPED == 'false' }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
TARGET: ${{ inputs.path }}
|
||||
ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
|
||||
run: |
|
||||
mkdir -p ${TARGET}
|
||||
time tar -xf ${ARCHIVE} -C ${TARGET}
|
||||
rm -f ${ARCHIVE}
|
||||
56
.github/actions/run-python-test-set/action.yml
vendored
56
.github/actions/run-python-test-set/action.yml
vendored
@@ -27,22 +27,35 @@ inputs:
|
||||
description: 'Whether to upload the performance report'
|
||||
required: false
|
||||
default: 'false'
|
||||
run_with_real_s3:
|
||||
description: 'Whether to pass real s3 credentials to the test suite'
|
||||
required: false
|
||||
default: 'false'
|
||||
real_s3_bucket:
|
||||
description: 'Bucket name for real s3 tests'
|
||||
required: false
|
||||
default: ''
|
||||
real_s3_region:
|
||||
description: 'Region name for real s3 tests'
|
||||
required: false
|
||||
default: ''
|
||||
real_s3_access_key_id:
|
||||
description: 'Access key id'
|
||||
required: false
|
||||
default: ''
|
||||
real_s3_secret_access_key:
|
||||
description: 'Secret access key'
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Get Neon artifact for restoration
|
||||
uses: actions/download-artifact@v3
|
||||
- name: Get Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-artifact
|
||||
path: ./neon-artifact/
|
||||
|
||||
- name: Extract Neon artifact
|
||||
shell: bash -ex {0}
|
||||
run: |
|
||||
mkdir -p /tmp/neon/
|
||||
tar -xf ./neon-artifact/neon.tar.zst -C /tmp/neon/
|
||||
rm -rf ./neon-artifact/
|
||||
path: /tmp/neon
|
||||
|
||||
- name: Checkout
|
||||
if: inputs.needs_postgres_source == 'true'
|
||||
@@ -59,7 +72,7 @@ runs:
|
||||
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- name: Install Python deps
|
||||
shell: bash -ex {0}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: ./scripts/pysync
|
||||
|
||||
- name: Run pytest
|
||||
@@ -70,7 +83,10 @@ runs:
|
||||
# this variable will be embedded in perf test report
|
||||
# and is needed to distinguish different environments
|
||||
PLATFORM: github-actions-selfhosted
|
||||
shell: bash -ex {0}
|
||||
BUILD_TYPE: ${{ inputs.build_type }}
|
||||
AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
|
||||
rm -rf $PERF_REPORT_DIR
|
||||
@@ -84,6 +100,14 @@ runs:
|
||||
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
|
||||
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
|
||||
fi
|
||||
|
||||
if [[ "${{ inputs.run_with_real_s3 }}" == "true" ]]; then
|
||||
echo "REAL S3 ENABLED"
|
||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||
export REMOTE_STORAGE_S3_BUCKET=${{ inputs.real_s3_bucket }}
|
||||
export REMOTE_STORAGE_S3_REGION=${{ inputs.real_s3_region }}
|
||||
fi
|
||||
|
||||
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
|
||||
if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
|
||||
mkdir -p "$PERF_REPORT_DIR"
|
||||
@@ -99,7 +123,7 @@ runs:
|
||||
|
||||
# Run the tests.
|
||||
#
|
||||
# The junit.xml file allows CircleCI to display more fine-grained test information
|
||||
# The junit.xml file allows CI tools to display more fine-grained test information
|
||||
# in its "Tests" tab in the results page.
|
||||
# --verbose prints name of each test (helpful when there are
|
||||
# multiple tests in one file)
|
||||
@@ -123,7 +147,7 @@ runs:
|
||||
fi
|
||||
|
||||
- name: Delete all data but logs
|
||||
shell: bash -ex {0}
|
||||
shell: bash -euxo pipefail {0}
|
||||
if: always()
|
||||
run: |
|
||||
du -sh /tmp/test_output/*
|
||||
@@ -132,9 +156,7 @@ runs:
|
||||
|
||||
- name: Upload python test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
retention-days: 7
|
||||
if-no-files-found: error
|
||||
name: python-test-${{ inputs.test_selection }}-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-logs
|
||||
path: /tmp/test_output/
|
||||
|
||||
17
.github/actions/save-coverage-data/action.yml
vendored
17
.github/actions/save-coverage-data/action.yml
vendored
@@ -5,13 +5,18 @@ runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Merge coverage data
|
||||
shell: bash -ex {0}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
|
||||
|
||||
- name: Upload coverage data
|
||||
uses: actions/upload-artifact@v3
|
||||
- name: Download previous coverage data into the same directory
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
retention-days: 7
|
||||
if-no-files-found: error
|
||||
name: coverage-data-artifact
|
||||
path: /tmp/coverage/
|
||||
path: /tmp/coverage
|
||||
skip-if-does-not-exist: true # skip if there's no previous coverage to download
|
||||
|
||||
- name: Upload coverage data
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
name: coverage-data-artifact
|
||||
path: /tmp/coverage
|
||||
|
||||
55
.github/actions/upload/action.yml
vendored
Normal file
55
.github/actions/upload/action.yml
vendored
Normal file
@@ -0,0 +1,55 @@
|
||||
name: "Upload an artifact"
|
||||
description: "Custom upload action"
|
||||
inputs:
|
||||
name:
|
||||
description: "Artifact name"
|
||||
required: true
|
||||
path:
|
||||
description: "A directory or file to upload"
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Prepare artifact
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
SOURCE: ${{ inputs.path }}
|
||||
ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
|
||||
run: |
|
||||
mkdir -p $(dirname $ARCHIVE)
|
||||
|
||||
if [ -f ${ARCHIVE} ]; then
|
||||
echo 2>&1 "File ${ARCHIVE} already exist. Something went wrong before"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ZSTD_NBTHREADS=0
|
||||
if [ -d ${SOURCE} ]; then
|
||||
time tar -C ${SOURCE} -cf ${ARCHIVE} --zstd .
|
||||
elif [ -f ${SOURCE} ]; then
|
||||
time tar -cf ${ARCHIVE} --zstd ${SOURCE}
|
||||
elif ! ls ${SOURCE} > /dev/null 2>&1; then
|
||||
echo 2>&1 "${SOURCE} does not exist"
|
||||
exit 2
|
||||
else
|
||||
echo 2>&1 "${SOURCE} is neither a directory nor a file, do not know how to handle it"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
- name: Upload artifact
|
||||
shell: bash -euxo pipefail {0}
|
||||
env:
|
||||
SOURCE: ${{ inputs.path }}
|
||||
ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
|
||||
run: |
|
||||
BUCKET=neon-github-public-dev
|
||||
PREFIX=artifacts/${GITHUB_RUN_ID}
|
||||
FILENAME=$(basename $ARCHIVE)
|
||||
|
||||
FILESIZE=$(du -sh ${ARCHIVE} | cut -f1)
|
||||
|
||||
time aws s3 mv --only-show-errors ${ARCHIVE} s3://${BUCKET}/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME}
|
||||
|
||||
# Ref https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-job-summary
|
||||
echo "[${FILENAME}](https://${BUCKET}.s3.amazonaws.com/${PREFIX}/${GITHUB_RUN_ATTEMPT}/${FILENAME}) ${FILESIZE}" >> ${GITHUB_STEP_SUMMARY}
|
||||
30
.github/ansible/get_binaries.sh
vendored
30
.github/ansible/get_binaries.sh
vendored
@@ -2,30 +2,14 @@
|
||||
|
||||
set -e
|
||||
|
||||
RELEASE=${RELEASE:-false}
|
||||
|
||||
# look at docker hub for latest tag for neon docker image
|
||||
if [ "${RELEASE}" = "true" ]; then
|
||||
echo "search latest release tag"
|
||||
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | grep -E '^[0-9]+$' | sort -n | tail -1)
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "no any docker tags found, exiting..."
|
||||
exit 1
|
||||
else
|
||||
TAG="release-${VERSION}"
|
||||
fi
|
||||
if [ -n "${DOCKER_TAG}" ]; then
|
||||
# Verson is DOCKER_TAG but without prefix
|
||||
VERSION=$(echo $DOCKER_TAG | sed 's/^.*-//g')
|
||||
else
|
||||
echo "search latest dev tag"
|
||||
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep -E '^[0-9]+$' | sort -n | tail -1)
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "no any docker tags found, exiting..."
|
||||
exit 1
|
||||
else
|
||||
TAG="${VERSION}"
|
||||
fi
|
||||
echo "Please set DOCKER_TAG environment variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "found ${VERSION}"
|
||||
|
||||
# do initial cleanup
|
||||
rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
|
||||
@@ -33,8 +17,8 @@ mkdir neon_install
|
||||
|
||||
# retrieve binaries from docker image
|
||||
echo "getting binaries from docker image"
|
||||
docker pull --quiet neondatabase/neon:${TAG}
|
||||
ID=$(docker create neondatabase/neon:${TAG})
|
||||
docker pull --quiet neondatabase/neon:${DOCKER_TAG}
|
||||
ID=$(docker create neondatabase/neon:${DOCKER_TAG})
|
||||
docker cp ${ID}:/data/postgres_install.tar.gz .
|
||||
tar -xzf postgres_install.tar.gz -C neon_install
|
||||
docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
|
||||
|
||||
2
.github/ansible/production.hosts
vendored
2
.github/ansible/production.hosts
vendored
@@ -17,4 +17,4 @@ env_name = prod-1
|
||||
console_mgmt_base_url = http://console-release.local
|
||||
bucket_name = zenith-storage-oregon
|
||||
bucket_region = us-west-2
|
||||
etcd_endpoints = etcd-release.local:2379
|
||||
etcd_endpoints = zenith-1-etcd.local:2379
|
||||
|
||||
7
.github/ansible/scripts/init_safekeeper.sh
vendored
7
.github/ansible/scripts/init_safekeeper.sh
vendored
@@ -1,7 +1,8 @@
|
||||
#!/bin/sh
|
||||
|
||||
# get instance id from meta-data service
|
||||
# fetch params from meta-data service
|
||||
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
|
||||
AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
|
||||
|
||||
# store fqdn hostname in var
|
||||
HOST=$(hostname -f)
|
||||
@@ -12,10 +13,10 @@ cat <<EOF | tee /tmp/payload
|
||||
"version": 1,
|
||||
"host": "${HOST}",
|
||||
"port": 6500,
|
||||
"http_port": 7676,
|
||||
"region_id": {{ console_region_id }},
|
||||
"instance_id": "${INSTANCE_ID}",
|
||||
"http_host": "${HOST}",
|
||||
"http_port": 7676
|
||||
"availability_zone_id": "${AZ_ID}"
|
||||
}
|
||||
EOF
|
||||
|
||||
|
||||
2
.github/ansible/staging.hosts
vendored
2
.github/ansible/staging.hosts
vendored
@@ -17,4 +17,4 @@ env_name = us-stage
|
||||
console_mgmt_base_url = http://console-staging.local
|
||||
bucket_name = zenith-staging-storage-us-east-1
|
||||
bucket_region = us-east-1
|
||||
etcd_endpoints = etcd-staging.local:2379
|
||||
etcd_endpoints = zenith-us-stage-etcd.local:2379
|
||||
|
||||
119
.github/workflows/benchmarking.yml
vendored
119
.github/workflows/benchmarking.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: benchmarking
|
||||
name: Benchmarking
|
||||
|
||||
on:
|
||||
# uncomment to run on push for debugging your PR
|
||||
@@ -15,6 +15,15 @@ on:
|
||||
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow per any non-`main` branch.
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
bench:
|
||||
# this workflow runs on self hosteed runner
|
||||
@@ -60,7 +69,6 @@ jobs:
|
||||
- name: Setup cluster
|
||||
env:
|
||||
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
|
||||
@@ -96,7 +104,9 @@ jobs:
|
||||
# since it might generate duplicates when calling ingest_perf_test_result.py
|
||||
rm -rf perf-report-staging
|
||||
mkdir -p perf-report-staging
|
||||
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging --timeout 3600
|
||||
# Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
|
||||
# it's important for test_perf_pgbench.py::test_pgbench_remote_* tests
|
||||
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --skip-interfering-proc-check --out-dir perf-report-staging --timeout 5400
|
||||
|
||||
- name: Submit result
|
||||
env:
|
||||
@@ -113,3 +123,106 @@ jobs:
|
||||
slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
pgbench-compare:
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10gb"
|
||||
REMOTE_ENV: "1"
|
||||
POSTGRES_DISTRIB_DIR: /usr
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
connstr: [ BENCHMARK_CAPTEST_CONNSTR, BENCHMARK_RDS_CONNSTR ]
|
||||
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2817580636
|
||||
|
||||
timeout-minutes: 360 # 6h
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Cache poetry deps
|
||||
id: cache_poetry
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- name: Install Python deps
|
||||
run: ./scripts/pysync
|
||||
|
||||
- name: Calculate platform
|
||||
id: calculate-platform
|
||||
env:
|
||||
CONNSTR: ${{ matrix.connstr }}
|
||||
run: |
|
||||
if [ "${CONNSTR}" = "BENCHMARK_CAPTEST_CONNSTR" ]; then
|
||||
PLATFORM=neon-captest
|
||||
elif [ "${CONNSTR}" = "BENCHMARK_RDS_CONNSTR" ]; then
|
||||
PLATFORM=rds-aurora
|
||||
else
|
||||
echo 2>&1 "Unknown CONNSTR=${CONNSTR}. Allowed are BENCHMARK_CAPTEST_CONNSTR, and BENCHMARK_RDS_CONNSTR only"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::set-output name=PLATFORM::${PLATFORM}"
|
||||
|
||||
- name: Install Deps
|
||||
run: |
|
||||
echo "deb http://apt.postgresql.org/pub/repos/apt focal-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
|
||||
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
|
||||
sudo apt -y update
|
||||
sudo apt install -y postgresql-14 postgresql-client-14
|
||||
|
||||
- name: Benchmark init
|
||||
env:
|
||||
PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
|
||||
BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
|
||||
run: |
|
||||
mkdir -p perf-report-captest
|
||||
|
||||
psql $BENCHMARK_CONNSTR -c "SELECT 1;"
|
||||
./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_init -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
|
||||
|
||||
- name: Benchmark simple-update
|
||||
env:
|
||||
PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
|
||||
BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
|
||||
run: |
|
||||
psql $BENCHMARK_CONNSTR -c "SELECT 1;"
|
||||
./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_simple_update -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
|
||||
|
||||
- name: Benchmark select-only
|
||||
env:
|
||||
PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
|
||||
BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
|
||||
run: |
|
||||
psql $BENCHMARK_CONNSTR -c "SELECT 1;"
|
||||
./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_select_only -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
|
||||
|
||||
- name: Submit result
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
run: |
|
||||
REPORT_FROM=$(realpath perf-report-captest) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
name: bench-captest-${{ steps.calculate-platform.outputs.PLATFORM }}
|
||||
path: /tmp/test_output/
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
469
.github/workflows/build_and_test.yml
vendored
469
.github/workflows/build_and_test.yml
vendored
@@ -3,14 +3,10 @@ name: Test and Deploy
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- release
|
||||
- main
|
||||
- release
|
||||
pull_request:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -ex {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow per any non-`main` branch.
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
|
||||
@@ -21,8 +17,39 @@ env:
|
||||
COPT: '-Werror'
|
||||
|
||||
jobs:
|
||||
tag:
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
|
||||
outputs:
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get build tag
|
||||
run: |
|
||||
echo run:$GITHUB_RUN_ID
|
||||
echo ref:$GITHUB_REF_NAME
|
||||
echo rev:$(git rev-list --count HEAD)
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "::set-output name=tag::$(git rev-list --count HEAD)"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
echo "::set-output name=tag::$GITHUB_RUN_ID"
|
||||
fi
|
||||
shell: bash
|
||||
id: build-tag
|
||||
|
||||
build-neon:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -31,8 +58,19 @@ jobs:
|
||||
|
||||
env:
|
||||
BUILD_TYPE: ${{ matrix.build_type }}
|
||||
GIT_VERSION: ${{ github.sha }}
|
||||
|
||||
steps:
|
||||
- name: Fix git ownership
|
||||
run: |
|
||||
# Workaround for `fatal: detected dubious ownership in repository at ...`
|
||||
#
|
||||
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
|
||||
# Ref https://github.com/actions/checkout/issues/785
|
||||
#
|
||||
git config --global --add safe.directory ${{ github.workspace }}
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
@@ -42,6 +80,7 @@ jobs:
|
||||
- name: Set pg revision for caching
|
||||
id: pg_ver
|
||||
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
# Set some environment variables used by all the steps.
|
||||
#
|
||||
@@ -65,6 +104,7 @@ jobs:
|
||||
echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
|
||||
echo "CARGO_FEATURES=${CARGO_FEATURES}" >> $GITHUB_ENV
|
||||
echo "CARGO_FLAGS=${CARGO_FLAGS}" >> $GITHUB_ENV
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
# Don't include the ~/.cargo/registry/src directory. It contains just
|
||||
# uncompressed versions of the crates in ~/.cargo/registry/cache
|
||||
@@ -81,8 +121,8 @@ jobs:
|
||||
target/
|
||||
# Fall back to older versions of the key, if no cache for current Cargo.lock was found
|
||||
key: |
|
||||
v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
|
||||
v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
|
||||
v6-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
|
||||
v6-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
|
||||
|
||||
- name: Cache postgres build
|
||||
id: cache_pg
|
||||
@@ -94,14 +134,17 @@ jobs:
|
||||
- name: Build postgres
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres -j$(nproc)
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Run cargo build
|
||||
run: |
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Run cargo test
|
||||
run: |
|
||||
${cov_prefix} cargo test $CARGO_FLAGS
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Install rust binaries
|
||||
run: |
|
||||
@@ -123,6 +166,7 @@ jobs:
|
||||
mkdir -p /tmp/coverage/
|
||||
|
||||
mkdir -p /tmp/neon/test_bin/
|
||||
|
||||
test_exe_paths=$(
|
||||
${cov_prefix} cargo test $CARGO_FLAGS --message-format=json --no-run |
|
||||
jq -r '.executable | select(. != null)'
|
||||
@@ -141,29 +185,28 @@ jobs:
|
||||
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
|
||||
done
|
||||
fi
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Install postgres binaries
|
||||
run: cp -a tmp_install /tmp/neon/pg_install
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Prepare neon artifact
|
||||
run: ZSTD_NBTHREADS=0 tar -C /tmp/neon/ -cf ./neon.tar.zst --zstd .
|
||||
|
||||
- name: Upload neon binaries
|
||||
uses: actions/upload-artifact@v3
|
||||
- name: Upload Neon artifact
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
retention-days: 7
|
||||
if-no-files-found: error
|
||||
name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
|
||||
path: ./neon.tar.zst
|
||||
path: /tmp/neon
|
||||
|
||||
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
|
||||
- name: Merge and upload coverage data
|
||||
if: matrix.build_type == 'debug'
|
||||
uses: ./.github/actions/save-coverage-data
|
||||
|
||||
|
||||
pg_regress-tests:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
needs: [ build-neon ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -190,7 +233,10 @@ jobs:
|
||||
uses: ./.github/actions/save-coverage-data
|
||||
|
||||
other-tests:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
needs: [ build-neon ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -210,14 +256,22 @@ jobs:
|
||||
build_type: ${{ matrix.build_type }}
|
||||
rust_toolchain: ${{ matrix.rust_toolchain }}
|
||||
test_selection: batch_others
|
||||
|
||||
run_with_real_s3: true
|
||||
real_s3_bucket: ci-tests-s3
|
||||
real_s3_region: us-west-2
|
||||
real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
|
||||
real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
|
||||
- name: Merge and upload coverage data
|
||||
if: matrix.build_type == 'debug'
|
||||
uses: ./.github/actions/save-coverage-data
|
||||
|
||||
benchmarks:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
needs: [ build-neon ]
|
||||
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -245,7 +299,10 @@ jobs:
|
||||
# while coverage is currently collected for the debug ones
|
||||
|
||||
coverage-report:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
needs: [ other-tests, pg_regress-tests ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -268,28 +325,23 @@ jobs:
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git/
|
||||
target/
|
||||
key: v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
|
||||
key: v5-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
|
||||
|
||||
- name: Get Neon artifact for restoration
|
||||
uses: actions/download-artifact@v3
|
||||
- name: Get Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
|
||||
path: ./neon-artifact/
|
||||
path: /tmp/neon
|
||||
|
||||
- name: Extract Neon artifact
|
||||
run: |
|
||||
mkdir -p /tmp/neon/
|
||||
tar -xf ./neon-artifact/neon.tar.zst -C /tmp/neon/
|
||||
rm -rf ./neon-artifact/
|
||||
|
||||
- name: Restore coverage data
|
||||
uses: actions/download-artifact@v3
|
||||
- name: Get coverage artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: coverage-data-artifact
|
||||
path: /tmp/coverage/
|
||||
path: /tmp/coverage
|
||||
|
||||
- name: Merge coverage data
|
||||
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Build and upload coverage report
|
||||
run: |
|
||||
@@ -322,187 +374,191 @@ jobs:
|
||||
\"description\": \"Coverage report is ready\",
|
||||
\"target_url\": \"$REPORT_URL\"
|
||||
}"
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
trigger-e2e-tests:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ build-neon ]
|
||||
steps:
|
||||
- name: Set PR's status to pending and request a remote CI test
|
||||
run: |
|
||||
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
|
||||
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
needs: [ build-neon ]
|
||||
steps:
|
||||
- name: Set PR's status to pending and request a remote CI test
|
||||
run: |
|
||||
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
|
||||
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
|
||||
|
||||
REMOTE_REPO="${{ github.repository_owner }}/cloud"
|
||||
REMOTE_REPO="${{ github.repository_owner }}/cloud"
|
||||
|
||||
curl -f -X POST \
|
||||
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
|
||||
--data \
|
||||
"{
|
||||
\"state\": \"pending\",
|
||||
\"context\": \"neon-cloud-e2e\",
|
||||
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
|
||||
}"
|
||||
curl -f -X POST \
|
||||
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
|
||||
--data \
|
||||
"{
|
||||
\"state\": \"pending\",
|
||||
\"context\": \"neon-cloud-e2e\",
|
||||
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
|
||||
}"
|
||||
|
||||
curl -f -X POST \
|
||||
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
|
||||
--data \
|
||||
"{
|
||||
\"ref\": \"main\",
|
||||
\"inputs\": {
|
||||
\"ci_job_name\": \"neon-cloud-e2e\",
|
||||
\"commit_hash\": \"$COMMIT_SHA\",
|
||||
\"remote_repo\": \"${{ github.repository }}\"
|
||||
}
|
||||
}"
|
||||
curl -f -X POST \
|
||||
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
|
||||
--data \
|
||||
"{
|
||||
\"ref\": \"main\",
|
||||
\"inputs\": {
|
||||
\"ci_job_name\": \"neon-cloud-e2e\",
|
||||
\"commit_hash\": \"$COMMIT_SHA\",
|
||||
\"remote_repo\": \"${{ github.repository }}\"
|
||||
}
|
||||
}"
|
||||
|
||||
neon-image:
|
||||
runs-on: dev
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
|
||||
docker-image:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ pg_regress-tests, other-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
outputs:
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
- name: Configure ECR login
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
with:
|
||||
driver: docker
|
||||
- name: Kaniko build neon
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID
|
||||
|
||||
- name: Get build tag
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "::set-output name=tag::$(git rev-list --count HEAD)"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
fi
|
||||
id: build-tag
|
||||
compute-tools-image:
|
||||
runs-on: dev
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
|
||||
- name: Get legacy build tag
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "::set-output name=tag::latest"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "::set-output name=tag::release"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
fi
|
||||
id: legacy-build-tag
|
||||
|
||||
- name: Build neon Docker image
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: .
|
||||
build-args: |
|
||||
GIT_VERSION="${{github.sha}}"
|
||||
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
|
||||
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
|
||||
pull: true
|
||||
push: true
|
||||
tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}}
|
||||
|
||||
docker-image-compute:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ pg_regress-tests, other-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
outputs:
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
|
||||
- name: Configure ECR login
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Kaniko build compute tools
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID
|
||||
|
||||
promote-image-compute-tools:
|
||||
runs-on: dev
|
||||
needs: [ compute-tools-image ]
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
container: amazon/aws-cli
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
name: [ compute-tools ]
|
||||
|
||||
steps:
|
||||
- name: Promote image to latest
|
||||
run:
|
||||
MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
|
||||
|
||||
compute-node-image:
|
||||
runs-on: dev
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
# note: This image depends on neondatabase/compute-tools:latest (or :thisversion),
|
||||
# which isn't available until after the image is promoted.
|
||||
# Ergo, we must explicitly build and promote compute-tools separately.
|
||||
needs:
|
||||
- promote-image-compute-tools
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
- name: Configure ECR login
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
with:
|
||||
driver: docker
|
||||
- name: Kaniko build compute node
|
||||
working-directory: ./vendor/postgres/
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --build-arg=COMPUTE_TOOLS_TAG=$GITHUB_RUN_ID --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID
|
||||
|
||||
- name: Get build tag
|
||||
promote-images:
|
||||
runs-on: dev
|
||||
needs: [ neon-image, compute-node-image ]
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
container: amazon/aws-cli
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
name: [ neon, compute-node ]
|
||||
|
||||
steps:
|
||||
- name: Promote image to latest
|
||||
run:
|
||||
MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
|
||||
|
||||
push-docker-hub:
|
||||
runs-on: dev
|
||||
needs: [ promote-images, tag ]
|
||||
container: golang:1.19-bullseye
|
||||
|
||||
steps:
|
||||
- name: Install Crane & ECR helper
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "::set-output name=tag::$(git rev-list --count HEAD)"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
fi
|
||||
id: build-tag
|
||||
go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0
|
||||
go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
|
||||
|
||||
# - name: Get build tag
|
||||
# run: |
|
||||
# if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
# echo "::set-output name=tag::$(git rev-list --count HEAD)"
|
||||
# elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
# echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
|
||||
# else
|
||||
# echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release' "
|
||||
# echo "::set-output name=tag::$GITHUB_RUN_ID"
|
||||
# fi
|
||||
# id: build-tag
|
||||
|
||||
- name: Get legacy build tag
|
||||
- name: Configure ECR login
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "::set-output name=tag::latest"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "::set-output name=tag::release"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
fi
|
||||
id: legacy-build-tag
|
||||
mkdir /github/home/.docker/
|
||||
echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
|
||||
|
||||
- name: Build compute-tools Docker image
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: .
|
||||
build-args: |
|
||||
GIT_VERSION="${{github.sha}}"
|
||||
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
|
||||
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
|
||||
push: false
|
||||
file: Dockerfile.compute-tools
|
||||
tags: neondatabase/compute-tools:local
|
||||
- name: Pull neon image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:latest neon
|
||||
|
||||
- name: Push compute-tools Docker image
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: .
|
||||
build-args: |
|
||||
GIT_VERSION="${{github.sha}}"
|
||||
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
|
||||
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
|
||||
push: true
|
||||
file: Dockerfile.compute-tools
|
||||
tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}}
|
||||
- name: Pull compute tools image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest compute-tools
|
||||
|
||||
- name: Build compute-node Docker image
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: ./vendor/postgres/
|
||||
build-args:
|
||||
COMPUTE_TOOLS_TAG=local
|
||||
push: true
|
||||
tags: neondatabase/compute-node:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/compute-node:${{steps.build-tag.outputs.tag}}
|
||||
- name: Pull compute node image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest compute-node
|
||||
|
||||
- name: Configure docker login
|
||||
run: |
|
||||
# ECR Credential Helper & Docker Hub don't work together in config, hence reset
|
||||
echo "" > /github/home/.docker/config.json
|
||||
crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io
|
||||
|
||||
- name: Push neon image to Docker Hub
|
||||
run: crane push neon neondatabase/neon:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Push compute tools image to Docker Hub
|
||||
run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Push compute node image to Docker Hub
|
||||
run: crane push compute-node neondatabase/compute-node:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Add latest tag to images
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
run: |
|
||||
crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node:${{needs.tag.outputs.build-tag}} latest
|
||||
|
||||
calculate-deploy-targets:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
@@ -517,7 +573,7 @@ jobs:
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA"}'
|
||||
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA"}'
|
||||
echo "::set-output name=include::[$STAGING]"
|
||||
echo "::set-output name=include::[$STAGING, $NEON_STRESS]"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA"}'
|
||||
echo "::set-output name=include::[$PRODUCTION]"
|
||||
@@ -528,14 +584,16 @@ jobs:
|
||||
|
||||
deploy:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
# We need both storage **and** compute images for deploy, because control plane
|
||||
# picks the compute version based on the storage version. If it notices a fresh
|
||||
# storage it may bump the compute version. And if compute image failed to build
|
||||
# it may break things badly.
|
||||
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
|
||||
#container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
|
||||
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
|
||||
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, other-tests, pg_regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
|
||||
@@ -546,12 +604,19 @@ jobs:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
|
||||
- name: Setup ansible
|
||||
run: |
|
||||
export PATH="/root/.local/bin:$PATH"
|
||||
pip install --progress-bar off --user ansible boto3
|
||||
|
||||
- name: Redeploy
|
||||
run: |
|
||||
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
cd "$(pwd)/.github/ansible"
|
||||
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
@@ -574,13 +639,16 @@ jobs:
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-proxy:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it
|
||||
# to run all deploy jobs consistently.
|
||||
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, other-tests, pg_regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
|
||||
@@ -593,6 +661,9 @@ jobs:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Add curl
|
||||
run: apt update && apt install curl -y
|
||||
|
||||
- name: Store kubeconfig file
|
||||
run: |
|
||||
echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
|
||||
@@ -605,6 +676,6 @@ jobs:
|
||||
|
||||
- name: Re-deploy proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.docker-image.outputs.build-tag}}
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
6
.github/workflows/codestyle.yml
vendored
6
.github/workflows/codestyle.yml
vendored
@@ -8,7 +8,7 @@ on:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -ex {0}
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow per any non-`main` branch.
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
# Rust toolchains (e.g. nightly or 1.37.0), add them here.
|
||||
rust_toolchain: [1.58]
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
timeout-minutes: 50
|
||||
timeout-minutes: 60
|
||||
name: run regression test suite
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
@@ -101,7 +101,7 @@ jobs:
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v1-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
|
||||
key: v2-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
|
||||
|
||||
- name: Run cargo clippy
|
||||
run: ./run_clippy.sh
|
||||
|
||||
23
.github/workflows/pg_clients.yml
vendored
23
.github/workflows/pg_clients.yml
vendored
@@ -19,8 +19,12 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
test-postgres-client-libs:
|
||||
# TODO: switch to gen2 runner, requires docker
|
||||
runs-on: [ ubuntu-latest ]
|
||||
|
||||
env:
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
@@ -40,16 +44,16 @@ jobs:
|
||||
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- name: Install Python deps
|
||||
shell: bash -ex {0}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: ./scripts/pysync
|
||||
|
||||
- name: Run pytest
|
||||
env:
|
||||
REMOTE_ENV: 1
|
||||
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
shell: bash -ex {0}
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
# Test framework expects we have psql binary;
|
||||
# but since we don't really need it in this test, let's mock it
|
||||
@@ -61,9 +65,18 @@ jobs:
|
||||
-m "remote_cluster" \
|
||||
-rA "test_runner/pg_clients"
|
||||
|
||||
# We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
|
||||
# It will be fixed after switching to gen2 runner
|
||||
- name: Upload python test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
retention-days: 7
|
||||
name: python-test-pg_clients-${{ runner.os }}-stage-logs
|
||||
path: ${{ env.TEST_OUTPUT }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: failure()
|
||||
id: slack
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
|
||||
@@ -11,17 +11,15 @@ than it was before.
|
||||
|
||||
## Submitting changes
|
||||
|
||||
1. Make a PR for every change.
|
||||
|
||||
Even seemingly trivial patches can break things in surprising ways.
|
||||
Use of common sense is OK. If you're only fixing a typo in a comment,
|
||||
it's probably fine to just push it. But if in doubt, open a PR.
|
||||
|
||||
2. Get at least one +1 on your PR before you push.
|
||||
1. Get at least one +1 on your PR before you push.
|
||||
|
||||
For simple patches, it will only take a minute for someone to review
|
||||
it.
|
||||
|
||||
2. Don't force push small changes after making the PR ready for review.
|
||||
Doing so will force readers to re-read your entire PR, which will delay
|
||||
the review process.
|
||||
|
||||
3. Always keep the CI green.
|
||||
|
||||
Do not push, if the CI failed on your PR. Even if you think it's not
|
||||
|
||||
260
Cargo.lock
generated
260
Cargo.lock
generated
@@ -48,9 +48,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.58"
|
||||
version = "1.0.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704"
|
||||
checksum = "c91f1f46651137be86f3a2b9a8359f9ab421d04d941c62b5982e1ca21113adf9"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
@@ -77,7 +77,7 @@ dependencies = [
|
||||
"num-traits",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"time 0.3.11",
|
||||
"time 0.3.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -126,9 +126,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.56"
|
||||
version = "0.1.57"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96cf8829f67d2eab0b2dfa42c5d0ef737e0724e4a82b01b3e292456202b19716"
|
||||
checksum = "76464446b8bc32758d7e88ee1a804d9914cd9b1cb264c029899680b0be29826f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -154,9 +154,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.5.12"
|
||||
version = "0.5.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d16705af05732b7d3258ec0f7b73c03a658a28925e050d8852d5b568ee8bcf4e"
|
||||
checksum = "6b9496f0c1d1afb7a2af4338bbe1d969cddfead41d87a9fb3aaa6d0bbc7af648"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core",
|
||||
@@ -166,7 +166,7 @@ dependencies = [
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"itoa 1.0.2",
|
||||
"itoa 1.0.3",
|
||||
"matchit",
|
||||
"memchr",
|
||||
"mime",
|
||||
@@ -298,9 +298,9 @@ checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.10.0"
|
||||
version = "1.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c53dfa917ec274df8ed3c572698f381a24eef2efba9492d797301b72b6db408a"
|
||||
checksum = "a5377c8865e74a160d21f29c2d40669f53286db6eab59b88540cbb12ffc8b835"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
@@ -310,22 +310,13 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.1.0"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
|
||||
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a"
|
||||
dependencies = [
|
||||
"rustc_version",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
version = "0.3.0"
|
||||
@@ -395,9 +386,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "3.2.12"
|
||||
version = "3.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab8b79fe3946ceb4a0b1c080b4018992b8d27e9ff363644c1c9b6387c854614d"
|
||||
checksum = "a3dbbb6653e7c55cc8595ad3e1f7be8f32aba4eb7ff7f0fd1163d4f3d137c0a9"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags",
|
||||
@@ -464,10 +455,9 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
"clap 3.2.12",
|
||||
"clap 3.2.16",
|
||||
"env_logger",
|
||||
"hyper",
|
||||
"libc",
|
||||
"log",
|
||||
"postgres",
|
||||
"regex",
|
||||
@@ -505,8 +495,8 @@ name = "control_plane"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"lazy_static",
|
||||
"nix",
|
||||
"once_cell",
|
||||
"pageserver",
|
||||
"postgres",
|
||||
"regex",
|
||||
@@ -517,7 +507,6 @@ dependencies = [
|
||||
"tar",
|
||||
"thiserror",
|
||||
"toml",
|
||||
"url",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -581,7 +570,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"cast 0.3.0",
|
||||
"cast",
|
||||
"clap 2.34.0",
|
||||
"criterion-plot",
|
||||
"csv",
|
||||
@@ -602,19 +591,19 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "criterion-plot"
|
||||
version = "0.4.4"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57"
|
||||
checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876"
|
||||
dependencies = [
|
||||
"cast 0.2.7",
|
||||
"cast",
|
||||
"itertools",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.5"
|
||||
version = "0.5.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c"
|
||||
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
@@ -622,9 +611,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.1"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
|
||||
checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-epoch",
|
||||
@@ -633,9 +622,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.9"
|
||||
version = "0.9.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d"
|
||||
checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if",
|
||||
@@ -647,9 +636,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.10"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83"
|
||||
checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
@@ -682,9 +671,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.5"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ccfd8c0ee4cce11e45b3fd6f9d5e69e0cc62912aa6a0cb1bf4617b0eba5a12f"
|
||||
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"typenum",
|
||||
@@ -928,9 +917,9 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "1.7.0"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf"
|
||||
checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499"
|
||||
dependencies = [
|
||||
"instant",
|
||||
]
|
||||
@@ -1097,9 +1086,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.5"
|
||||
version = "0.14.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803"
|
||||
checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
"version_check",
|
||||
@@ -1118,9 +1107,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.26.1"
|
||||
version = "0.26.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4"
|
||||
checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d"
|
||||
|
||||
[[package]]
|
||||
name = "git-version"
|
||||
@@ -1186,9 +1175,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.2"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "607c8a29735385251a339424dd462993c0fed8fa09d378f259377df08c126022"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
@@ -1256,7 +1245,7 @@ checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fnv",
|
||||
"itoa 1.0.2",
|
||||
"itoa 1.0.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1319,7 +1308,7 @@ dependencies = [
|
||||
"http-body",
|
||||
"httparse",
|
||||
"httpdate",
|
||||
"itoa 1.0.2",
|
||||
"itoa 1.0.3",
|
||||
"pin-project-lite",
|
||||
"socket2",
|
||||
"tokio",
|
||||
@@ -1390,7 +1379,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown 0.12.2",
|
||||
"hashbrown 0.12.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1402,7 +1391,7 @@ dependencies = [
|
||||
"ahash",
|
||||
"atty",
|
||||
"indexmap",
|
||||
"itoa 1.0.2",
|
||||
"itoa 1.0.3",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"num-format",
|
||||
@@ -1443,15 +1432,15 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.2"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d"
|
||||
checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.58"
|
||||
version = "0.3.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27"
|
||||
checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2"
|
||||
dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
@@ -1493,9 +1482,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.126"
|
||||
version = "0.2.127"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
|
||||
checksum = "505e71a4706fa491e9b1b55f51b95d4037d0821ee40131190475f692b35b009b"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
@@ -1602,7 +1591,6 @@ dependencies = [
|
||||
name = "metrics"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"prometheus",
|
||||
@@ -1671,13 +1659,12 @@ name = "neon_local"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap 3.2.12",
|
||||
"clap 3.2.16",
|
||||
"comfy-table",
|
||||
"control_plane",
|
||||
"git-version",
|
||||
"pageserver",
|
||||
"postgres",
|
||||
"postgres_ffi",
|
||||
"safekeeper",
|
||||
"serde_json",
|
||||
"utils",
|
||||
@@ -1855,9 +1842,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "os_str_bytes"
|
||||
version = "6.1.0"
|
||||
version = "6.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa"
|
||||
checksum = "648001efe5d5c0102d8cea768e348da85d90af8ba91f0bea908f157951493cd4"
|
||||
|
||||
[[package]]
|
||||
name = "pageserver"
|
||||
@@ -1867,7 +1854,7 @@ dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"clap 3.2.12",
|
||||
"clap 3.2.16",
|
||||
"close_fds",
|
||||
"const_format",
|
||||
"crc32c",
|
||||
@@ -1883,7 +1870,6 @@ dependencies = [
|
||||
"humantime-serde",
|
||||
"hyper",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"metrics",
|
||||
"nix",
|
||||
"once_cell",
|
||||
@@ -1905,7 +1891,6 @@ dependencies = [
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"url",
|
||||
@@ -2130,9 +2115,9 @@ dependencies = [
|
||||
"crc32c",
|
||||
"env_logger",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"memoffset",
|
||||
"once_cell",
|
||||
"postgres",
|
||||
"rand",
|
||||
"regex",
|
||||
@@ -2170,9 +2155,9 @@ checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
|
||||
|
||||
[[package]]
|
||||
name = "prettyplease"
|
||||
version = "0.1.16"
|
||||
version = "0.1.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da6ffbe862780245013cb1c0a48c4e44b7d665548088f91f6b90876d0625e4c2"
|
||||
checksum = "697ae720ee02011f439e0701db107ffe2916d83f718342d65d7f8bf7b8a5fee9"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"syn",
|
||||
@@ -2186,9 +2171,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.40"
|
||||
version = "1.0.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7"
|
||||
checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
@@ -2284,17 +2269,18 @@ dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"base64",
|
||||
"bstr",
|
||||
"bytes",
|
||||
"clap 3.2.12",
|
||||
"clap 3.2.16",
|
||||
"futures",
|
||||
"git-version",
|
||||
"hashbrown 0.11.2",
|
||||
"hex",
|
||||
"hmac 0.12.1",
|
||||
"hyper",
|
||||
"lazy_static",
|
||||
"md5",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
"parking_lot 0.12.1",
|
||||
"pin-project-lite",
|
||||
"rand",
|
||||
@@ -2340,9 +2326,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.20"
|
||||
version = "1.0.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
|
||||
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
@@ -2425,9 +2411,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.13"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42"
|
||||
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
@@ -2522,7 +2508,7 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustls",
|
||||
"rustls-pemfile 1.0.0",
|
||||
"rustls-pemfile 1.0.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
@@ -2722,9 +2708,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustls-pemfile"
|
||||
version = "1.0.0"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7522c9de787ff061458fe9a829dc790a3f5b22dc571694fc5883f448b94d9a9"
|
||||
checksum = "0864aeff53f8c05aa08d86e5ef839d3dfcf07aeba2db32f12db0ef716e87bd55"
|
||||
dependencies = [
|
||||
"base64",
|
||||
]
|
||||
@@ -2740,15 +2726,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.7"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf"
|
||||
checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.10"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695"
|
||||
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
|
||||
|
||||
[[package]]
|
||||
name = "safekeeper"
|
||||
@@ -2758,18 +2744,16 @@ dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"clap 3.2.12",
|
||||
"clap 3.2.16",
|
||||
"const_format",
|
||||
"crc32c",
|
||||
"daemonize",
|
||||
"etcd_broker",
|
||||
"fs2",
|
||||
"futures",
|
||||
"git-version",
|
||||
"hex",
|
||||
"humantime",
|
||||
"hyper",
|
||||
"lazy_static",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
"postgres",
|
||||
@@ -2784,12 +2768,10 @@ dependencies = [
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"url",
|
||||
"utils",
|
||||
"walkdir",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
@@ -2853,15 +2835,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.12"
|
||||
version = "1.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2333e6df6d6598f2b1974829f853c2b4c5f4a6e503c10af918081aa6f8564e1"
|
||||
checksum = "93f6841e709003d68bb2deee8c343572bf446003ec20a583e76f7b15cebf3711"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.139"
|
||||
version = "1.0.142"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0171ebb889e45aa68b44aee0859b3eede84c6f5f5c228e6f140c0b2a0a46cad6"
|
||||
checksum = "e590c437916fb6b221e1d00df6e3294f3fccd70ca7e92541c475d6ed6ef5fee2"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
@@ -2878,9 +2860,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.139"
|
||||
version = "1.0.142"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc1d3230c1de7932af58ad8ffbe1d784bd55efd5a9d84ac24f69c72d83543dfb"
|
||||
checksum = "34b5b8d809babe02f538c2cfec6f2c1ed10804c0e5a6a041a049a4f5588ccc2e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -2889,11 +2871,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.82"
|
||||
version = "1.0.83"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7"
|
||||
checksum = "38dd04e3c8279e75b31ef29dbdceebfe5ad89f4d0937213c53f7d49d01b3d5a7"
|
||||
dependencies = [
|
||||
"itoa 1.0.2",
|
||||
"itoa 1.0.3",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
@@ -2905,7 +2887,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
|
||||
dependencies = [
|
||||
"form_urlencoded",
|
||||
"itoa 1.0.2",
|
||||
"itoa 1.0.3",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
@@ -3010,7 +2992,7 @@ dependencies = [
|
||||
"num-bigint",
|
||||
"num-traits",
|
||||
"thiserror",
|
||||
"time 0.3.11",
|
||||
"time 0.3.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3021,9 +3003,12 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.6"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"
|
||||
checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
@@ -3131,9 +3116,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.98"
|
||||
version = "1.0.99"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
|
||||
checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -3209,18 +3194,18 @@ checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb"
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.31"
|
||||
version = "1.0.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a"
|
||||
checksum = "f5f6586b7f764adc0231f4c79be7b920e766bb2f3e51b3661cdb263828f19994"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.31"
|
||||
version = "1.0.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a"
|
||||
checksum = "12bafc5b54507e0149cdf1b145a5d80ab80a90bcd9275df43d4fff68460f6c21"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -3249,11 +3234,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.11"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72c91f41dcb2f096c05f0873d667dceec1087ce5bcf984ec8ffb19acddbb3217"
|
||||
checksum = "74b7cc93fc23ba97fde84f7eea56c55d1ba183f495c6715defdfc7b9cb8c870f"
|
||||
dependencies = [
|
||||
"itoa 1.0.2",
|
||||
"itoa 1.0.3",
|
||||
"js-sys",
|
||||
"libc",
|
||||
"num_threads",
|
||||
"quickcheck",
|
||||
@@ -3293,9 +3279,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.20.0"
|
||||
version = "1.20.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57aec3cfa4c296db7255446efb4928a6be304b431a806216105542a67b6ca82e"
|
||||
checksum = "7a8325f63a7d4774dd041e363b2409ed1c5cbbd0f867795e661df066b2b0a581"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"bytes",
|
||||
@@ -3625,9 +3611,9 @@ checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.1"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
|
||||
checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
@@ -3688,9 +3674,9 @@ dependencies = [
|
||||
"hex-literal",
|
||||
"hyper",
|
||||
"jsonwebtoken",
|
||||
"lazy_static",
|
||||
"metrics",
|
||||
"nix",
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"postgres",
|
||||
"postgres-protocol",
|
||||
@@ -3746,7 +3732,7 @@ name = "wal_craft"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap 3.2.12",
|
||||
"clap 3.2.16",
|
||||
"env_logger",
|
||||
"log",
|
||||
"once_cell",
|
||||
@@ -3790,9 +3776,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.81"
|
||||
version = "0.2.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994"
|
||||
checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"wasm-bindgen-macro",
|
||||
@@ -3800,13 +3786,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-backend"
|
||||
version = "0.2.81"
|
||||
version = "0.2.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a"
|
||||
checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
@@ -3815,9 +3801,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-futures"
|
||||
version = "0.4.31"
|
||||
version = "0.4.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de9a9cec1733468a8c657e57fa2413d2ae2c0129b95e87c5b72b8ace4d13f31f"
|
||||
checksum = "fa76fb221a1f8acddf5b54ace85912606980ad661ac7a503b4570ffd3a624dad"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"js-sys",
|
||||
@@ -3827,9 +3813,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.81"
|
||||
version = "0.2.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa"
|
||||
checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"wasm-bindgen-macro-support",
|
||||
@@ -3837,9 +3823,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.81"
|
||||
version = "0.2.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048"
|
||||
checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -3850,15 +3836,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.81"
|
||||
version = "0.2.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be"
|
||||
checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a"
|
||||
|
||||
[[package]]
|
||||
name = "web-sys"
|
||||
version = "0.3.58"
|
||||
version = "0.3.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90"
|
||||
checksum = "ed055ab27f941423197eb86b2035720b1a3ce40504df082cac2ecc6ed73335a1"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
@@ -4011,7 +3997,7 @@ dependencies = [
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"syn",
|
||||
"time 0.3.11",
|
||||
"time 0.3.12",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
@@ -4033,7 +4019,7 @@ dependencies = [
|
||||
"oid-registry",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"time 0.3.11",
|
||||
"time 0.3.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4062,6 +4048,6 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zeroize"
|
||||
version = "1.5.6"
|
||||
version = "1.5.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20b578acffd8516a6c3f2a1bdefc1ec37e547bb4e0fb8b6b01a4cafc886b4442"
|
||||
checksum = "c394b5bd0c6f669e7275d9c20aa90ae064cb22e75a1cad54e1b34088034b149f"
|
||||
|
||||
44
Dockerfile
44
Dockerfile
@@ -1,8 +1,6 @@
|
||||
# Build Postgres
|
||||
FROM neondatabase/rust:1.58 AS pg-build
|
||||
WORKDIR /pg
|
||||
|
||||
USER root
|
||||
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS pg-build
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
COPY vendor/postgres vendor/postgres
|
||||
COPY Makefile Makefile
|
||||
@@ -11,23 +9,30 @@ ENV BUILD_TYPE release
|
||||
RUN set -e \
|
||||
&& mold -run make -j $(nproc) -s postgres \
|
||||
&& rm -rf tmp_install/build \
|
||||
&& tar -C tmp_install -czf /postgres_install.tar.gz .
|
||||
&& tar -C tmp_install -czf /home/nonroot/postgres_install.tar.gz .
|
||||
|
||||
# Build zenith binaries
|
||||
FROM neondatabase/rust:1.58 AS build
|
||||
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS build
|
||||
WORKDIR /home/nonroot
|
||||
ARG GIT_VERSION=local
|
||||
|
||||
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
|
||||
ARG AWS_ACCESS_KEY_ID
|
||||
ARG AWS_SECRET_ACCESS_KEY
|
||||
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
|
||||
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
|
||||
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
|
||||
ARG RUSTC_WRAPPER=cachepot
|
||||
ENV AWS_REGION=eu-central-1
|
||||
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
|
||||
ARG CACHEPOT_BUCKET=neon-github-dev
|
||||
#ARG AWS_ACCESS_KEY_ID
|
||||
#ARG AWS_SECRET_ACCESS_KEY
|
||||
|
||||
COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
|
||||
COPY . .
|
||||
|
||||
# Show build caching stats to check if it was used in the end.
|
||||
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
|
||||
RUN set -e \
|
||||
&& sudo -E "PATH=$PATH" mold -run cargo build --release \
|
||||
&& mold -run cargo build --release \
|
||||
&& cachepot -s
|
||||
|
||||
# Build final image
|
||||
@@ -36,8 +41,8 @@ FROM debian:bullseye-slim
|
||||
WORKDIR /data
|
||||
|
||||
RUN set -e \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y \
|
||||
&& apt update \
|
||||
&& apt install -y \
|
||||
libreadline-dev \
|
||||
libseccomp-dev \
|
||||
openssl \
|
||||
@@ -46,17 +51,14 @@ RUN set -e \
|
||||
&& useradd -d /data zenith \
|
||||
&& chown -R zenith:zenith /data
|
||||
|
||||
COPY --from=build --chown=zenith:zenith /home/runner/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/runner/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/runner/target/release/proxy /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/nonroot/target/release/proxy /usr/local/bin
|
||||
|
||||
COPY --from=pg-build /pg/tmp_install/ /usr/local/
|
||||
COPY --from=pg-build /postgres_install.tar.gz /data/
|
||||
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
COPY --from=pg-build /home/nonroot/tmp_install/ /usr/local/
|
||||
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
|
||||
|
||||
VOLUME ["/data"]
|
||||
USER zenith
|
||||
EXPOSE 6400
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
CMD ["pageserver"]
|
||||
|
||||
@@ -1,18 +1,25 @@
|
||||
# First transient image to build compute_tools binaries
|
||||
# NB: keep in sync with rust image version in .circle/config.yml
|
||||
FROM neondatabase/rust:1.58 AS rust-build
|
||||
# NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
|
||||
FROM 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned AS rust-build
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
|
||||
ARG AWS_ACCESS_KEY_ID
|
||||
ARG AWS_SECRET_ACCESS_KEY
|
||||
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
|
||||
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
|
||||
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build.
|
||||
ARG RUSTC_WRAPPER=cachepot
|
||||
ENV AWS_REGION=eu-central-1
|
||||
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
|
||||
ARG CACHEPOT_BUCKET=neon-github-dev
|
||||
#ARG AWS_ACCESS_KEY_ID
|
||||
#ARG AWS_SECRET_ACCESS_KEY
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN set -e \
|
||||
&& sudo -E "PATH=$PATH" mold -run cargo build -p compute_tools --release \
|
||||
&& mold -run cargo build -p compute_tools --release \
|
||||
&& cachepot -s
|
||||
|
||||
# Final image that only has one binary
|
||||
FROM debian:buster-slim
|
||||
FROM debian:bullseye-slim
|
||||
|
||||
COPY --from=rust-build /home/runner/target/release/compute_ctl /usr/local/bin/compute_ctl
|
||||
COPY --from=rust-build /home/nonroot/target/release/compute_ctl /usr/local/bin/compute_ctl
|
||||
|
||||
4
Makefile
4
Makefile
@@ -29,9 +29,11 @@ else
|
||||
endif
|
||||
|
||||
# macOS with brew-installed openssl requires explicit paths
|
||||
# It can be configured with OPENSSL_PREFIX variable
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(HOMEBREW_PREFIX)/opt/openssl/include --with-libraries=$(HOMEBREW_PREFIX)/opt/openssl/lib
|
||||
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
||||
endif
|
||||
|
||||
# Choose whether we should be silent or verbose
|
||||
|
||||
59
README.md
59
README.md
@@ -1,6 +1,6 @@
|
||||
# Neon
|
||||
|
||||
Neon is a serverless open source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes PostgreSQL storage layer by redistributing data across a cluster of nodes.
|
||||
Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
|
||||
|
||||
The project used to be called "Zenith". Many of the commands and code comments
|
||||
still refer to "zenith", but we are in the process of renaming things.
|
||||
@@ -12,32 +12,31 @@ Alternatively, compile and run the project [locally](#running-local-installation
|
||||
|
||||
## Architecture overview
|
||||
|
||||
A Neon installation consists of compute nodes and Neon storage engine.
|
||||
A Neon installation consists of compute nodes and a Neon storage engine.
|
||||
|
||||
Compute nodes are stateless PostgreSQL nodes, backed by Neon storage engine.
|
||||
Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
|
||||
|
||||
Neon storage engine consists of two major components:
|
||||
- Pageserver. Scalable storage backend for compute nodes.
|
||||
- WAL service. The service that receives WAL from compute node and ensures that it is stored durably.
|
||||
The Neon storage engine consists of two major components:
|
||||
- Pageserver. Scalable storage backend for the compute nodes.
|
||||
- WAL service. The service receives WAL from the compute node and ensures that it is stored durably.
|
||||
|
||||
Pageserver consists of:
|
||||
- Repository - Neon storage implementation.
|
||||
- WAL receiver - service that receives WAL from WAL service and stores it in the repository.
|
||||
- Page service - service that communicates with compute nodes and responds with pages from the repository.
|
||||
- WAL redo - service that builds pages from base images and WAL records on Page service request.
|
||||
|
||||
- WAL redo - service that builds pages from base images and WAL records on Page service request
|
||||
## Running local installation
|
||||
|
||||
|
||||
#### Installing dependencies on Linux
|
||||
1. Install build dependencies and other useful packages
|
||||
1. Install build dependencies and other applicable packages
|
||||
|
||||
* On Ubuntu or Debian this set of packages should be sufficient to build the code:
|
||||
* On Ubuntu or Debian, this set of packages should be sufficient to build the code:
|
||||
```bash
|
||||
apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
|
||||
libssl-dev clang pkg-config libpq-dev etcd cmake postgresql-client
|
||||
```
|
||||
* On Fedora these packages are needed:
|
||||
* On Fedora, these packages are needed:
|
||||
```bash
|
||||
dnf install flex bison readline-devel zlib-devel openssl-devel \
|
||||
libseccomp-devel perl clang cmake etcd postgresql postgresql-contrib
|
||||
@@ -69,7 +68,7 @@ brew install libpq
|
||||
brew link --force libpq
|
||||
```
|
||||
|
||||
#### Building on Linux and OSX
|
||||
#### Building on Linux
|
||||
|
||||
1. Build neon and patched postgres
|
||||
```
|
||||
@@ -80,19 +79,35 @@ cd neon
|
||||
|
||||
# The preferred and default is to make a debug build. This will create a
|
||||
# demonstrably slower build than a release build. If you want to use a release
|
||||
# build, utilize "`BUILD_TYPE=release make -j`nproc``"
|
||||
# build, utilize "BUILD_TYPE=release make -j`nproc`"
|
||||
|
||||
make -j`nproc`
|
||||
```
|
||||
|
||||
#### dependency installation notes
|
||||
#### Building on OSX
|
||||
|
||||
1. Build neon and patched postgres
|
||||
```
|
||||
# Note: The path to the neon sources can not contain a space.
|
||||
|
||||
git clone --recursive https://github.com/neondatabase/neon.git
|
||||
cd neon
|
||||
|
||||
# The preferred and default is to make a debug build. This will create a
|
||||
# demonstrably slower build than a release build. If you want to use a release
|
||||
# build, utilize "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu`"
|
||||
|
||||
make -j`sysctl -n hw.logicalcpu`
|
||||
```
|
||||
|
||||
#### Dependency installation notes
|
||||
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.
|
||||
|
||||
To run the integration tests or Python scripts (not required to use the code), install
|
||||
Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.
|
||||
Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires [poetry](https://python-poetry.org/)) in the project directory.
|
||||
|
||||
|
||||
#### running neon database
|
||||
#### Running neon database
|
||||
1. Start pageserver and postgres on top of it (should be called from repo root):
|
||||
```sh
|
||||
# Create repository in .neon with proper paths to binaries and data
|
||||
@@ -123,7 +138,7 @@ Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=pos
|
||||
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running
|
||||
```
|
||||
|
||||
2. Now it is possible to connect to postgres and run some queries:
|
||||
2. Now, it is possible to connect to postgres and run some queries:
|
||||
```text
|
||||
> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
|
||||
postgres=# CREATE TABLE t(key int primary key, value text);
|
||||
@@ -181,14 +196,16 @@ postgres=# select * from t;
|
||||
(1 row)
|
||||
```
|
||||
|
||||
4. If you want to run tests afterwards (see below), you have to stop all the running the pageserver, safekeeper and postgres instances
|
||||
you have just started. You can stop them all with one command:
|
||||
4. If you want to run tests afterward (see below), you must stop all the running of the pageserver, safekeeper, and postgres instances
|
||||
you have just started. You can terminate them all with one command:
|
||||
```sh
|
||||
> ./target/debug/neon_local stop
|
||||
```
|
||||
|
||||
## Running tests
|
||||
|
||||
Ensure your dependencies are installed as described [here](https://github.com/neondatabase/neon#dependency-installation-notes).
|
||||
|
||||
```sh
|
||||
git clone --recursive https://github.com/neondatabase/neon.git
|
||||
make # builds also postgres and installs it to ./tmp_install
|
||||
@@ -205,8 +222,8 @@ To view your `rustdoc` documentation in a browser, try running `cargo doc --no-d
|
||||
|
||||
### Postgres-specific terms
|
||||
|
||||
Due to Neon's very close relation with PostgreSQL internals, there are numerous specific terms used.
|
||||
Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.
|
||||
Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.
|
||||
The same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.
|
||||
|
||||
To get more familiar with this aspect, refer to:
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
libc = "0.2"
|
||||
anyhow = "1.0"
|
||||
chrono = "0.4"
|
||||
clap = "3.0"
|
||||
|
||||
@@ -157,7 +157,7 @@ fn main() -> Result<()> {
|
||||
exit(code)
|
||||
}
|
||||
Err(error) => {
|
||||
error!("could not start the compute node: {}", error);
|
||||
error!("could not start the compute node: {:?}", error);
|
||||
|
||||
let mut state = compute.state.write().unwrap();
|
||||
state.error = Some(format!("{:?}", error));
|
||||
|
||||
@@ -178,6 +178,7 @@ impl ComputeNode {
|
||||
.args(&["--sync-safekeepers"])
|
||||
.env("PGDATA", &self.pgdata) // we cannot use -D in this mode
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.expect("postgres --sync-safekeepers failed to start");
|
||||
|
||||
@@ -187,10 +188,13 @@ impl ComputeNode {
|
||||
let sync_output = sync_handle
|
||||
.wait_with_output()
|
||||
.expect("postgres --sync-safekeepers failed");
|
||||
|
||||
if !sync_output.status.success() {
|
||||
anyhow::bail!(
|
||||
"postgres --sync-safekeepers exited with non-zero status: {}",
|
||||
"postgres --sync-safekeepers exited with non-zero status: {}. stdout: {}, stderr: {}",
|
||||
sync_output.status,
|
||||
String::from_utf8(sync_output.stdout).expect("postgres --sync-safekeepers exited, and stdout is not utf-8"),
|
||||
String::from_utf8(sync_output.stderr).expect("postgres --sync-safekeepers exited, and stderr is not utf-8"),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -62,9 +62,16 @@ impl GenericOption {
|
||||
/// Represent `GenericOption` as configuration option.
|
||||
pub fn to_pg_setting(&self) -> String {
|
||||
if let Some(val) = &self.value {
|
||||
let name = match self.name.as_str() {
|
||||
"safekeepers" => "neon.safekeepers",
|
||||
"wal_acceptor_reconnect" => "neon.safekeeper_reconnect_timeout",
|
||||
"wal_acceptor_connect_timeout" => "neon.safekeeper_connect_timeout",
|
||||
it => it,
|
||||
};
|
||||
|
||||
match self.vartype.as_ref() {
|
||||
"string" => format!("{} = '{}'", self.name, val),
|
||||
_ => format!("{} = {}", self.name, val),
|
||||
"string" => format!("{} = '{}'", name, val),
|
||||
_ => format!("{} = {}", name, val),
|
||||
}
|
||||
} else {
|
||||
self.name.to_owned()
|
||||
|
||||
@@ -85,7 +85,7 @@
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "safekeepers",
|
||||
"name": "neon.safekeepers",
|
||||
"value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
|
||||
"vartype": "string"
|
||||
},
|
||||
@@ -181,7 +181,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"delta_operations": [
|
||||
{
|
||||
"action": "delete_db",
|
||||
|
||||
@@ -28,7 +28,7 @@ mod pg_helpers_tests {
|
||||
|
||||
assert_eq!(
|
||||
spec.cluster.settings.as_pg_settings(),
|
||||
"fsync = off\nwal_level = replica\nhot_standby = on\nsafekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
|
||||
"fsync = off\nwal_level = replica\nhot_standby = on\nneon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -9,12 +9,11 @@ postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = "1.12.0"
|
||||
toml = "0.5"
|
||||
lazy_static = "1.4"
|
||||
once_cell = "1.13.0"
|
||||
regex = "1"
|
||||
anyhow = "1.0"
|
||||
thiserror = "1"
|
||||
nix = "0.23"
|
||||
url = "2.2.2"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
|
||||
pageserver = { path = "../pageserver" }
|
||||
|
||||
@@ -150,7 +150,7 @@ impl PostgresNode {
|
||||
let port: u16 = conf.parse_field("port", &context)?;
|
||||
let timeline_id: ZTimelineId = conf.parse_field("neon.timeline_id", &context)?;
|
||||
let tenant_id: ZTenantId = conf.parse_field("neon.tenant_id", &context)?;
|
||||
let uses_wal_proposer = conf.get("safekeepers").is_some();
|
||||
let uses_wal_proposer = conf.get("neon.safekeepers").is_some();
|
||||
|
||||
// parse recovery_target_lsn, if any
|
||||
let recovery_target_lsn: Option<Lsn> =
|
||||
@@ -341,7 +341,7 @@ impl PostgresNode {
|
||||
.map(|sk| format!("localhost:{}", sk.pg_port))
|
||||
.collect::<Vec<String>>()
|
||||
.join(",");
|
||||
conf.append("safekeepers", &safekeepers);
|
||||
conf.append("neon.safekeepers", &safekeepers);
|
||||
} else {
|
||||
// We only use setup without safekeepers for tests,
|
||||
// and don't care about data durability on pageserver,
|
||||
|
||||
@@ -30,14 +30,14 @@ pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
let etcd_stdout_file =
|
||||
fs::File::create(etcd_data_dir.join("etcd.stdout.log")).with_context(|| {
|
||||
format!(
|
||||
"Failed to create ectd stout file in directory {}",
|
||||
"Failed to create etcd stout file in directory {}",
|
||||
etcd_data_dir.display()
|
||||
)
|
||||
})?;
|
||||
let etcd_stderr_file =
|
||||
fs::File::create(etcd_data_dir.join("etcd.stderr.log")).with_context(|| {
|
||||
format!(
|
||||
"Failed to create ectd stderr file in directory {}",
|
||||
"Failed to create etcd stderr file in directory {}",
|
||||
etcd_data_dir.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
@@ -51,7 +51,11 @@ fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
|
||||
}
|
||||
|
||||
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
|
||||
for env_key in [
|
||||
"AWS_ACCESS_KEY_ID",
|
||||
"AWS_SECRET_ACCESS_KEY",
|
||||
"AWS_SESSION_TOKEN",
|
||||
] {
|
||||
if let Ok(value) = std::env::var(env_key) {
|
||||
cmd = cmd.env(env_key, value);
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ use crate::safekeeper::SafekeeperNode;
|
||||
// This data structures represents neon_local CLI config
|
||||
//
|
||||
// It is deserialized from the .neon/config file, or the config file passed
|
||||
// to 'zenith init --config=<path>' option. See control_plane/simple.conf for
|
||||
// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
|
||||
// an example.
|
||||
//
|
||||
#[serde_as]
|
||||
@@ -320,7 +320,7 @@ impl LocalEnv {
|
||||
|
||||
if !repopath.exists() {
|
||||
bail!(
|
||||
"Zenith config is not found in {}. You need to run 'zenith init' first",
|
||||
"Zenith config is not found in {}. You need to run 'neon_local init' first",
|
||||
repopath.to_str().unwrap()
|
||||
);
|
||||
}
|
||||
@@ -337,12 +337,12 @@ impl LocalEnv {
|
||||
}
|
||||
|
||||
pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
|
||||
// Currently, the user first passes a config file with 'zenith init --config=<path>'
|
||||
// Currently, the user first passes a config file with 'neon_local init --config=<path>'
|
||||
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
|
||||
// to .neon/config. TODO: We lose any formatting and comments along the way, which is
|
||||
// a bit sad.
|
||||
let mut conf_content = r#"# This file describes a locale deployment of the page server
|
||||
# and safekeeeper node. It is read by the 'zenith' command-line
|
||||
# and safekeeeper node. It is read by the 'neon_local' command-line
|
||||
# utility.
|
||||
"#
|
||||
.to_string();
|
||||
@@ -382,7 +382,7 @@ impl LocalEnv {
|
||||
}
|
||||
|
||||
//
|
||||
// Initialize a new Zenith repository
|
||||
// Initialize a new Neon repository
|
||||
//
|
||||
pub fn init(&mut self) -> anyhow::Result<()> {
|
||||
// check if config already exists
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
/// enough to extract a few settings we need in Zenith, assuming you don't do
|
||||
/// funny stuff like include-directives or funny escaping.
|
||||
use anyhow::{bail, Context, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
@@ -19,9 +19,7 @@ pub struct PostgresConf {
|
||||
hash: HashMap<String, String>,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref CONF_LINE_RE: Regex = Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap();
|
||||
}
|
||||
static CONF_LINE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap());
|
||||
|
||||
impl PostgresConf {
|
||||
pub fn new() -> PostgresConf {
|
||||
@@ -139,10 +137,10 @@ fn escape_str(s: &str) -> String {
|
||||
//
|
||||
// This regex is a bit more conservative than the rules in guc-file.l, so we quote some
|
||||
// strings that PostgreSQL would accept without quoting, but that's OK.
|
||||
lazy_static! {
|
||||
static ref UNQUOTED_RE: Regex =
|
||||
Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap();
|
||||
}
|
||||
|
||||
static UNQUOTED_RE: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"(^[-+]?[0-9]+[a-zA-Z]*$)|(^[a-zA-Z][a-zA-Z0-9]*$)").unwrap());
|
||||
|
||||
if UNQUOTED_RE.is_match(s) {
|
||||
s.to_string()
|
||||
} else {
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use std::io::Write;
|
||||
use std::net::TcpStream;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
@@ -52,7 +51,7 @@ impl ResponseErrorMessageExt for Response {
|
||||
Err(SafekeeperHttpError::Response(
|
||||
match self.json::<HttpErrorBody>() {
|
||||
Ok(err_body) => format!("Error: {}", err_body.msg),
|
||||
Err(_) => format!("Http error ({}) at {url}.", status.as_u16()),
|
||||
Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
|
||||
},
|
||||
))
|
||||
}
|
||||
@@ -241,40 +240,28 @@ impl SafekeeperNode {
|
||||
),
|
||||
}
|
||||
|
||||
let address = connection_address(&self.pg_connection_config);
|
||||
// Wait until process is gone
|
||||
for i in 0..600 {
|
||||
let signal = None; // Send no signal, just get the error code
|
||||
match kill(pid, signal) {
|
||||
Ok(_) => (), // Process exists, keep waiting
|
||||
Err(Errno::ESRCH) => {
|
||||
// Process not found, we're done
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => bail!(
|
||||
"Failed to send signal to pageserver with pid {}: {}",
|
||||
pid,
|
||||
err.desc()
|
||||
),
|
||||
};
|
||||
|
||||
// TODO Remove this "timeout" and handle it on caller side instead.
|
||||
// Shutting down may take a long time,
|
||||
// if safekeeper flushes a lot of data
|
||||
let mut tcp_stopped = false;
|
||||
for _ in 0..100 {
|
||||
if !tcp_stopped {
|
||||
if let Err(err) = TcpStream::connect(&address) {
|
||||
tcp_stopped = true;
|
||||
if err.kind() != io::ErrorKind::ConnectionRefused {
|
||||
eprintln!("\nSafekeeper connection failed with error: {err}");
|
||||
}
|
||||
}
|
||||
if i % 10 == 0 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
}
|
||||
if tcp_stopped {
|
||||
// Also check status on the HTTP port
|
||||
match self.check_status() {
|
||||
Err(SafekeeperHttpError::Transport(err)) if err.is_connect() => {
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("\nSafekeeper status check failed with error: {err}");
|
||||
return Ok(());
|
||||
}
|
||||
Ok(()) => {
|
||||
// keep waiting
|
||||
}
|
||||
}
|
||||
}
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
}
|
||||
|
||||
bail!("Failed to stop safekeeper with pid {}", pid);
|
||||
@@ -304,10 +291,9 @@ impl SafekeeperNode {
|
||||
Ok(self
|
||||
.http_request(
|
||||
Method::POST,
|
||||
format!("{}/{}", self.http_base_url, "timeline"),
|
||||
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
|
||||
)
|
||||
.json(&TimelineCreateRequest {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
peer_ids,
|
||||
})
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Write};
|
||||
use std::net::TcpStream;
|
||||
use std::num::NonZeroU64;
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
use std::{io, result, thread};
|
||||
@@ -12,9 +11,9 @@ use anyhow::{bail, Context};
|
||||
use nix::errno::Errno;
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use pageserver::http::models::{TenantConfigRequest, TenantCreateRequest, TimelineCreateRequest};
|
||||
use pageserver::tenant_mgr::TenantInfo;
|
||||
use pageserver::timelines::TimelineInfo;
|
||||
use pageserver::http::models::{
|
||||
TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
|
||||
};
|
||||
use postgres::{Config, NoTls};
|
||||
use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
use reqwest::{IntoUrl, Method};
|
||||
@@ -103,23 +102,19 @@ impl PageServerNode {
|
||||
|
||||
/// Construct libpq connection string for connecting to the pageserver.
|
||||
fn pageserver_connection_config(password: &str, listen_addr: &str) -> Config {
|
||||
format!("postgresql://no_user:{}@{}/no_db", password, listen_addr)
|
||||
format!("postgresql://no_user:{password}@{listen_addr}/no_db")
|
||||
.parse()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn init(
|
||||
pub fn initialize(
|
||||
&self,
|
||||
create_tenant: Option<ZTenantId>,
|
||||
initial_timeline_id: Option<ZTimelineId>,
|
||||
config_overrides: &[&str],
|
||||
) -> anyhow::Result<ZTimelineId> {
|
||||
let mut cmd = Command::new(self.env.pageserver_bin()?);
|
||||
|
||||
let id = format!("id={}", self.env.pageserver.id);
|
||||
|
||||
// FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
|
||||
let base_data_dir_param = self.env.base_data_dir.display().to_string();
|
||||
let pg_distrib_dir_param =
|
||||
format!("pg_distrib_dir='{}'", self.env.pg_distrib_dir.display());
|
||||
let authg_type_param = format!("auth_type='{}'", self.env.pageserver.auth_type);
|
||||
@@ -139,67 +134,52 @@ impl PageServerNode {
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
);
|
||||
let mut args = Vec::with_capacity(20);
|
||||
|
||||
args.push("--init");
|
||||
args.extend(["-D", &base_data_dir_param]);
|
||||
args.extend(["-c", &pg_distrib_dir_param]);
|
||||
args.extend(["-c", &authg_type_param]);
|
||||
args.extend(["-c", &listen_http_addr_param]);
|
||||
args.extend(["-c", &listen_pg_addr_param]);
|
||||
args.extend(["-c", &broker_endpoints_param]);
|
||||
args.extend(["-c", &id]);
|
||||
|
||||
let broker_etcd_prefix_param = self
|
||||
.env
|
||||
.etcd_broker
|
||||
.broker_etcd_prefix
|
||||
.as_ref()
|
||||
.map(|prefix| format!("broker_etcd_prefix='{prefix}'"));
|
||||
if let Some(broker_etcd_prefix_param) = broker_etcd_prefix_param.as_deref() {
|
||||
args.extend(["-c", broker_etcd_prefix_param]);
|
||||
}
|
||||
|
||||
for config_override in config_overrides {
|
||||
args.extend(["-c", config_override]);
|
||||
let mut init_config_overrides = config_overrides.to_vec();
|
||||
init_config_overrides.push(&id);
|
||||
init_config_overrides.push(&pg_distrib_dir_param);
|
||||
init_config_overrides.push(&authg_type_param);
|
||||
init_config_overrides.push(&listen_http_addr_param);
|
||||
init_config_overrides.push(&listen_pg_addr_param);
|
||||
init_config_overrides.push(&broker_endpoints_param);
|
||||
|
||||
if let Some(broker_etcd_prefix_param) = broker_etcd_prefix_param.as_deref() {
|
||||
init_config_overrides.push(broker_etcd_prefix_param);
|
||||
}
|
||||
|
||||
if self.env.pageserver.auth_type != AuthType::Trust {
|
||||
args.extend([
|
||||
"-c",
|
||||
"auth_validation_public_key_path='auth_public_key.pem'",
|
||||
]);
|
||||
init_config_overrides.push("auth_validation_public_key_path='auth_public_key.pem'");
|
||||
}
|
||||
|
||||
let create_tenant = create_tenant.map(|id| id.to_string());
|
||||
if let Some(tenant_id) = create_tenant.as_deref() {
|
||||
args.extend(["--create-tenant", tenant_id])
|
||||
self.start_node(&init_config_overrides, &self.env.base_data_dir, true)?;
|
||||
let init_result = self
|
||||
.try_init_timeline(create_tenant, initial_timeline_id)
|
||||
.context("Failed to create initial tenant and timeline for pageserver");
|
||||
match &init_result {
|
||||
Ok(initial_timeline_id) => {
|
||||
println!("Successfully initialized timeline {initial_timeline_id}")
|
||||
}
|
||||
Err(e) => eprintln!("{e:#}"),
|
||||
}
|
||||
self.stop(false)?;
|
||||
init_result
|
||||
}
|
||||
|
||||
let initial_timeline_id = initial_timeline_id.unwrap_or_else(ZTimelineId::generate);
|
||||
let initial_timeline_id_string = initial_timeline_id.to_string();
|
||||
args.extend(["--initial-timeline-id", &initial_timeline_id_string]);
|
||||
|
||||
let cmd_with_args = cmd.args(args);
|
||||
let init_output = fill_rust_env_vars(cmd_with_args)
|
||||
.output()
|
||||
.with_context(|| {
|
||||
format!("failed to init pageserver with command {:?}", cmd_with_args)
|
||||
})?;
|
||||
|
||||
if !init_output.status.success() {
|
||||
bail!(
|
||||
"init invocation failed, {}\nStdout: {}\nStderr: {}",
|
||||
init_output.status,
|
||||
String::from_utf8_lossy(&init_output.stdout),
|
||||
String::from_utf8_lossy(&init_output.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
// echo the captured output of the init command
|
||||
println!("{}", String::from_utf8_lossy(&init_output.stdout));
|
||||
|
||||
Ok(initial_timeline_id)
|
||||
fn try_init_timeline(
|
||||
&self,
|
||||
new_tenant_id: Option<ZTenantId>,
|
||||
new_timeline_id: Option<ZTimelineId>,
|
||||
) -> anyhow::Result<ZTimelineId> {
|
||||
let initial_tenant_id = self.tenant_create(new_tenant_id, HashMap::new())?;
|
||||
let initial_timeline_info =
|
||||
self.timeline_create(initial_tenant_id, new_timeline_id, None, None)?;
|
||||
Ok(initial_timeline_info.timeline_id)
|
||||
}
|
||||
|
||||
pub fn repo_path(&self) -> PathBuf {
|
||||
@@ -211,15 +191,35 @@ impl PageServerNode {
|
||||
}
|
||||
|
||||
pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
|
||||
print!(
|
||||
self.start_node(config_overrides, &self.repo_path(), false)
|
||||
}
|
||||
|
||||
fn start_node(
|
||||
&self,
|
||||
config_overrides: &[&str],
|
||||
datadir: &Path,
|
||||
update_config: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
println!(
|
||||
"Starting pageserver at '{}' in '{}'",
|
||||
connection_address(&self.pg_connection_config),
|
||||
self.repo_path().display()
|
||||
datadir.display()
|
||||
);
|
||||
io::stdout().flush().unwrap();
|
||||
io::stdout().flush()?;
|
||||
|
||||
let repo_path = self.repo_path();
|
||||
let mut args = vec!["-D", repo_path.to_str().unwrap()];
|
||||
let mut args = vec![
|
||||
"-D",
|
||||
datadir.to_str().with_context(|| {
|
||||
format!(
|
||||
"Datadir path '{}' cannot be represented as a unicode string",
|
||||
datadir.display()
|
||||
)
|
||||
})?,
|
||||
];
|
||||
|
||||
if update_config {
|
||||
args.push("--update-config");
|
||||
}
|
||||
|
||||
for config_override in config_overrides {
|
||||
args.extend(["-c", config_override]);
|
||||
@@ -231,8 +231,8 @@ impl PageServerNode {
|
||||
|
||||
if !filled_cmd.status()?.success() {
|
||||
bail!(
|
||||
"Pageserver failed to start. See '{}' for details.",
|
||||
self.repo_path().join("pageserver.log").display()
|
||||
"Pageserver failed to start. See console output and '{}' for details.",
|
||||
datadir.join("pageserver.log").display()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -241,7 +241,7 @@ impl PageServerNode {
|
||||
const RETRIES: i8 = 15;
|
||||
for retries in 1..RETRIES {
|
||||
match self.check_status() {
|
||||
Ok(_) => {
|
||||
Ok(()) => {
|
||||
println!("\nPageserver started");
|
||||
return Ok(());
|
||||
}
|
||||
@@ -255,21 +255,18 @@ impl PageServerNode {
|
||||
if retries == 5 {
|
||||
println!() // put a line break after dots for second message
|
||||
}
|
||||
println!(
|
||||
"Pageserver not responding yet, err {} retrying ({})...",
|
||||
err, retries
|
||||
);
|
||||
println!("Pageserver not responding yet, err {err} retrying ({retries})...");
|
||||
}
|
||||
}
|
||||
PageserverHttpError::Response(msg) => {
|
||||
bail!("pageserver failed to start: {} ", msg)
|
||||
bail!("pageserver failed to start: {msg} ")
|
||||
}
|
||||
}
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
bail!("pageserver failed to start in {} seconds", RETRIES);
|
||||
bail!("pageserver failed to start in {RETRIES} seconds");
|
||||
}
|
||||
|
||||
///
|
||||
@@ -299,63 +296,46 @@ impl PageServerNode {
|
||||
match kill(pid, sig) {
|
||||
Ok(_) => (),
|
||||
Err(Errno::ESRCH) => {
|
||||
println!(
|
||||
"Pageserver with pid {} does not exist, but a PID file was found",
|
||||
pid
|
||||
);
|
||||
println!("Pageserver with pid {pid} does not exist, but a PID file was found");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => bail!(
|
||||
"Failed to send signal to pageserver with pid {}: {}",
|
||||
pid,
|
||||
"Failed to send signal to pageserver with pid {pid}: {}",
|
||||
err.desc()
|
||||
),
|
||||
}
|
||||
|
||||
let address = connection_address(&self.pg_connection_config);
|
||||
|
||||
// TODO Remove this "timeout" and handle it on caller side instead.
|
||||
// Shutting down may take a long time,
|
||||
// if pageserver checkpoints a lot of data
|
||||
let mut tcp_stopped = false;
|
||||
for _ in 0..100 {
|
||||
if !tcp_stopped {
|
||||
if let Err(err) = TcpStream::connect(&address) {
|
||||
tcp_stopped = true;
|
||||
if err.kind() != io::ErrorKind::ConnectionRefused {
|
||||
eprintln!("\nPageserver connection failed with error: {err}");
|
||||
}
|
||||
// Wait until process is gone
|
||||
for i in 0..600 {
|
||||
let signal = None; // Send no signal, just get the error code
|
||||
match kill(pid, signal) {
|
||||
Ok(_) => (), // Process exists, keep waiting
|
||||
Err(Errno::ESRCH) => {
|
||||
// Process not found, we're done
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
if tcp_stopped {
|
||||
// Also check status on the HTTP port
|
||||
Err(err) => bail!(
|
||||
"Failed to send signal to pageserver with pid {}: {}",
|
||||
pid,
|
||||
err.desc()
|
||||
),
|
||||
};
|
||||
|
||||
match self.check_status() {
|
||||
Err(PageserverHttpError::Transport(err)) if err.is_connect() => {
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("\nPageserver status check failed with error: {err}");
|
||||
return Ok(());
|
||||
}
|
||||
Ok(()) => {
|
||||
// keep waiting
|
||||
}
|
||||
}
|
||||
if i % 10 == 0 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
}
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
}
|
||||
|
||||
bail!("Failed to stop pageserver with pid {}", pid);
|
||||
bail!("Failed to stop pageserver with pid {pid}");
|
||||
}
|
||||
|
||||
pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
|
||||
let mut client = self.pg_connection_config.connect(NoTls).unwrap();
|
||||
|
||||
println!("Pageserver query: '{}'", sql);
|
||||
println!("Pageserver query: '{sql}'");
|
||||
client.simple_query(sql).unwrap()
|
||||
}
|
||||
|
||||
@@ -390,15 +370,15 @@ impl PageServerNode {
|
||||
&self,
|
||||
new_tenant_id: Option<ZTenantId>,
|
||||
settings: HashMap<&str, &str>,
|
||||
) -> anyhow::Result<Option<ZTenantId>> {
|
||||
let tenant_id_string = self
|
||||
.http_request(Method::POST, format!("{}/tenant", self.http_base_url))
|
||||
) -> anyhow::Result<ZTenantId> {
|
||||
self.http_request(Method::POST, format!("{}/tenant", self.http_base_url))
|
||||
.json(&TenantCreateRequest {
|
||||
new_tenant_id,
|
||||
checkpoint_distance: settings
|
||||
.get("checkpoint_distance")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()?,
|
||||
checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
|
||||
compaction_target_size: settings
|
||||
.get("compaction_target_size")
|
||||
.map(|x| x.parse::<u64>())
|
||||
@@ -430,18 +410,16 @@ impl PageServerNode {
|
||||
})
|
||||
.send()?
|
||||
.error_from_body()?
|
||||
.json::<Option<String>>()?;
|
||||
|
||||
tenant_id_string
|
||||
.map(|id| {
|
||||
id.parse().with_context(|| {
|
||||
format!(
|
||||
"Failed to parse tennat creation response as tenant id: {}",
|
||||
id
|
||||
)
|
||||
.json::<Option<String>>()
|
||||
.with_context(|| {
|
||||
format!("Failed to parse tenant creation response for tenant id: {new_tenant_id:?}")
|
||||
})?
|
||||
.context("No tenant id was found in the tenant creation response")
|
||||
.and_then(|tenant_id_string| {
|
||||
tenant_id_string.parse().with_context(|| {
|
||||
format!("Failed to parse response string as tenant id: '{tenant_id_string}'")
|
||||
})
|
||||
})
|
||||
.transpose()
|
||||
}
|
||||
|
||||
pub fn tenant_config(&self, tenant_id: ZTenantId, settings: HashMap<&str, &str>) -> Result<()> {
|
||||
@@ -453,6 +431,7 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'checkpoint_distance' as an integer")?,
|
||||
checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
|
||||
compaction_target_size: settings
|
||||
.get("compaction_target_size")
|
||||
.map(|x| x.parse::<u64>())
|
||||
@@ -511,22 +490,27 @@ impl PageServerNode {
|
||||
new_timeline_id: Option<ZTimelineId>,
|
||||
ancestor_start_lsn: Option<Lsn>,
|
||||
ancestor_timeline_id: Option<ZTimelineId>,
|
||||
) -> anyhow::Result<Option<TimelineInfo>> {
|
||||
let timeline_info_response = self
|
||||
.http_request(
|
||||
Method::POST,
|
||||
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
self.http_request(
|
||||
Method::POST,
|
||||
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
|
||||
)
|
||||
.json(&TimelineCreateRequest {
|
||||
new_timeline_id,
|
||||
ancestor_start_lsn,
|
||||
ancestor_timeline_id,
|
||||
})
|
||||
.send()?
|
||||
.error_from_body()?
|
||||
.json::<Option<TimelineInfo>>()
|
||||
.with_context(|| {
|
||||
format!("Failed to parse timeline creation response for tenant id: {tenant_id}")
|
||||
})?
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"No timeline id was found in the timeline creation response for tenant {tenant_id}"
|
||||
)
|
||||
.json(&TimelineCreateRequest {
|
||||
new_timeline_id,
|
||||
ancestor_start_lsn,
|
||||
ancestor_timeline_id,
|
||||
})
|
||||
.send()?
|
||||
.error_from_body()?
|
||||
.json::<Option<TimelineInfo>>()?;
|
||||
|
||||
Ok(timeline_info_response)
|
||||
})
|
||||
}
|
||||
|
||||
/// Import a basebackup prepared using either:
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -eux
|
||||
|
||||
broker_endpoints_param="${BROKER_ENDPOINT:-absent}"
|
||||
if [ "$broker_endpoints_param" != "absent" ]; then
|
||||
broker_endpoints_param="-c broker_endpoints=['$broker_endpoints_param']"
|
||||
else
|
||||
broker_endpoints_param=''
|
||||
fi
|
||||
|
||||
if [ "$1" = 'pageserver' ]; then
|
||||
if [ ! -d "/data/tenants" ]; then
|
||||
echo "Initializing pageserver data directory"
|
||||
pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10" $broker_endpoints_param
|
||||
fi
|
||||
echo "Staring pageserver at 0.0.0.0:6400"
|
||||
pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" $broker_endpoints_param -D /data
|
||||
else
|
||||
"$@"
|
||||
fi
|
||||
@@ -52,10 +52,8 @@
|
||||
- [multitenancy.md](./multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
|
||||
- [settings.md](./settings.md)
|
||||
#FIXME: move these under sourcetree.md
|
||||
#- [pageserver/README.md](/pageserver/README.md)
|
||||
#- [postgres_ffi/README.md](/libs/postgres_ffi/README.md)
|
||||
#- [test_runner/README.md](/test_runner/README.md)
|
||||
#- [safekeeper/README.md](/safekeeper/README.md)
|
||||
|
||||
|
||||
# RFCs
|
||||
|
||||
@@ -75,7 +75,7 @@ layer's Segment and range of LSNs.
|
||||
There are two kinds of layers, in-memory and on-disk layers. In-memory
|
||||
layers are used to ingest incoming WAL, and provide fast access
|
||||
to the recent page versions. On-disk layers are stored as files on disk, and
|
||||
are immutable. See pageserver/src/layered_repository/README.md for more.
|
||||
are immutable. See [pageserver-storage.md](./pageserver-storage.md) for more.
|
||||
|
||||
### Layer file (on-disk layer)
|
||||
|
||||
@@ -111,7 +111,7 @@ PostgreSQL LSNs and functions to monitor them:
|
||||
* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
|
||||
[source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):
|
||||
|
||||
Neon safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
|
||||
Neon safekeeper LSNs. See [safekeeper protocol section](safekeeper-protocol.md) for more information.
|
||||
* `CommitLSN`: position in WAL confirmed by quorum safekeepers.
|
||||
* `RestartLSN`: position in WAL confirmed by all safekeepers.
|
||||
* `FlushLSN`: part of WAL persisted to the disk by safekeeper.
|
||||
|
||||
@@ -68,8 +68,6 @@ There are the following implementations present:
|
||||
* local filesystem — to use in tests mainly
|
||||
* AWS S3 - to use in production
|
||||
|
||||
Implementation details are covered in the [backup readme](./src/remote_storage/README.md) and corresponding Rust file docs, parameters documentation can be found at [settings docs](../docs/settings.md).
|
||||
|
||||
The backup service is disabled by default and can be enabled to interact with a single remote storage.
|
||||
|
||||
CLI examples:
|
||||
@@ -118,7 +116,7 @@ implemented by the LayeredRepository object in
|
||||
`layered_repository.rs`. There is only that one implementation of the
|
||||
Repository trait, but it's still a useful abstraction that keeps the
|
||||
interface for the low-level storage functionality clean. The layered
|
||||
storage format is described in layered_repository/README.md.
|
||||
storage format is described in [pageserver-storage.md](./pageserver-storage.md).
|
||||
|
||||
Each repository consists of multiple Timelines. Timeline is a
|
||||
workhorse that accepts page changes from the WAL, and serves
|
||||
|
||||
@@ -15,7 +15,7 @@ listen_pg_addr = '127.0.0.1:64000'
|
||||
listen_http_addr = '127.0.0.1:9898'
|
||||
|
||||
checkpoint_distance = '268435456' # in bytes
|
||||
checkpoint_period = '1 s'
|
||||
checkpoint_timeout = '10m'
|
||||
|
||||
gc_period = '100 s'
|
||||
gc_horizon = '67108864'
|
||||
@@ -46,7 +46,7 @@ Note the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#ta
|
||||
|
||||
All values can be passed as an argument to the pageserver binary, using the `-c` parameter and specified as a valid TOML string. All tables should be passed in the inline form.
|
||||
|
||||
Example: `${PAGESERVER_BIN} -c "checkpoint_period = '100 s'" -c "remote_storage={local_path='/some/local/path/'}"`
|
||||
Example: `${PAGESERVER_BIN} -c "checkpoint_timeout = '10 m'" -c "remote_storage={local_path='/some/local/path/'}"`
|
||||
|
||||
Note that TOML distinguishes between strings and integers, the former require single or double quotes around them.
|
||||
|
||||
@@ -82,6 +82,14 @@ S3.
|
||||
|
||||
The unit is # of bytes.
|
||||
|
||||
#### checkpoint_timeout
|
||||
|
||||
Apart from `checkpoint_distance`, open layer flushing is also triggered
|
||||
`checkpoint_timeout` after the last flush. This makes WAL eventually uploaded to
|
||||
s3 when activity is stopped.
|
||||
|
||||
The default is 10m.
|
||||
|
||||
#### compaction_period
|
||||
|
||||
Every `compaction_period` seconds, the page server checks if
|
||||
|
||||
@@ -28,7 +28,7 @@ The pageserver has a few different duties:
|
||||
- Receive WAL from the WAL service and decode it.
|
||||
- Replay WAL that's applicable to the chunks that the Page Server maintains
|
||||
|
||||
For more detailed info, see [/pageserver/README](/pageserver/README.md)
|
||||
For more detailed info, see [pageserver-services.md](./pageserver-services.md)
|
||||
|
||||
`/proxy`:
|
||||
|
||||
@@ -57,7 +57,7 @@ PostgreSQL extension that contains functions needed for testing and debugging.
|
||||
The zenith WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
|
||||
It acts as a holding area and redistribution center for recently generated WAL.
|
||||
|
||||
For more detailed info, see [/safekeeper/README](/safekeeper/README.md)
|
||||
For more detailed info, see [walservice.md](./walservice.md)
|
||||
|
||||
`/workspace_hack`:
|
||||
The workspace_hack crate exists only to pin down some dependencies.
|
||||
|
||||
@@ -75,8 +75,8 @@ safekeepers. The Paxos and crash recovery algorithm ensures that only
|
||||
one primary node can be actively streaming WAL to the quorum of
|
||||
safekeepers.
|
||||
|
||||
See README_PROTO.md for a more detailed description of the consensus
|
||||
protocol. spec/ contains TLA+ specification of it.
|
||||
See [this section](safekeeper-protocol.md) for a more detailed description of
|
||||
the consensus protocol. spec/ contains TLA+ specification of it.
|
||||
|
||||
# Q&A
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_with = "1.12.0"
|
||||
once_cell = "1.8.0"
|
||||
once_cell = "1.13.0"
|
||||
|
||||
utils = { path = "../utils" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
@@ -6,6 +6,5 @@ edition = "2021"
|
||||
[dependencies]
|
||||
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
|
||||
libc = "0.2"
|
||||
lazy_static = "1.4"
|
||||
once_cell = "1.8.0"
|
||||
once_cell = "1.13.0"
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
@@ -2,7 +2,10 @@
|
||||
//! make sure that we use the same dep version everywhere.
|
||||
//! Otherwise, we might not see all metrics registered via
|
||||
//! a default registry.
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
use prometheus::core::{AtomicU64, GenericGauge, GenericGaugeVec};
|
||||
pub use prometheus::opts;
|
||||
pub use prometheus::register;
|
||||
pub use prometheus::{core, default_registry, proto};
|
||||
pub use prometheus::{exponential_buckets, linear_buckets};
|
||||
pub use prometheus::{register_gauge, Gauge};
|
||||
@@ -18,6 +21,17 @@ pub use prometheus::{Encoder, TextEncoder};
|
||||
mod wrappers;
|
||||
pub use wrappers::{CountedReader, CountedWriter};
|
||||
|
||||
pub type UIntGauge = GenericGauge<AtomicU64>;
|
||||
pub type UIntGaugeVec = GenericGaugeVec<AtomicU64>;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! register_uint_gauge_vec {
|
||||
($NAME:expr, $HELP:expr, $LABELS_NAMES:expr $(,)?) => {{
|
||||
let gauge_vec = UIntGaugeVec::new($crate::opts!($NAME, $HELP), $LABELS_NAMES).unwrap();
|
||||
$crate::register(Box::new(gauge_vec.clone())).map(|_| gauge_vec)
|
||||
}};
|
||||
}
|
||||
|
||||
/// Gathers all Prometheus metrics and records the I/O stats just before that.
|
||||
///
|
||||
/// Metrics gathering is a relatively simple and standalone operation, so
|
||||
@@ -27,19 +41,22 @@ pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
|
||||
prometheus::gather()
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref DISK_IO_BYTES: IntGaugeVec = register_int_gauge_vec!(
|
||||
static DISK_IO_BYTES: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
register_int_gauge_vec!(
|
||||
"libmetrics_disk_io_bytes_total",
|
||||
"Bytes written and read from disk, grouped by the operation (read|write)",
|
||||
&["io_operation"]
|
||||
)
|
||||
.expect("Failed to register disk i/o bytes int gauge vec");
|
||||
static ref MAXRSS_KB: IntGauge = register_int_gauge!(
|
||||
.expect("Failed to register disk i/o bytes int gauge vec")
|
||||
});
|
||||
|
||||
static MAXRSS_KB: Lazy<IntGauge> = Lazy::new(|| {
|
||||
register_int_gauge!(
|
||||
"libmetrics_maxrss_kb",
|
||||
"Memory usage (Maximum Resident Set Size)"
|
||||
)
|
||||
.expect("Failed to register maxrss_kb int gauge");
|
||||
}
|
||||
.expect("Failed to register maxrss_kb int gauge")
|
||||
});
|
||||
|
||||
pub const DISK_WRITE_SECONDS_BUCKETS: &[f64] = &[
|
||||
0.000_050, 0.000_100, 0.000_500, 0.001, 0.003, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5,
|
||||
|
||||
@@ -10,13 +10,13 @@ use std::io::{Read, Result, Write};
|
||||
/// # use std::io::{Result, Read};
|
||||
/// # use metrics::{register_int_counter, IntCounter};
|
||||
/// # use metrics::CountedReader;
|
||||
/// # use once_cell::sync::Lazy;
|
||||
/// #
|
||||
/// # lazy_static::lazy_static! {
|
||||
/// # static ref INT_COUNTER: IntCounter = register_int_counter!(
|
||||
/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(
|
||||
/// # "int_counter",
|
||||
/// # "let's count something!"
|
||||
/// # ).unwrap();
|
||||
/// # }
|
||||
/// # ).unwrap()
|
||||
/// # });
|
||||
/// #
|
||||
/// fn do_some_reads(stream: impl Read, count: usize) -> Result<Vec<u8>> {
|
||||
/// let mut reader = CountedReader::new(stream, |cnt| {
|
||||
@@ -85,13 +85,13 @@ impl<T: Read> Read for CountedReader<'_, T> {
|
||||
/// # use std::io::{Result, Write};
|
||||
/// # use metrics::{register_int_counter, IntCounter};
|
||||
/// # use metrics::CountedWriter;
|
||||
/// # use once_cell::sync::Lazy;
|
||||
/// #
|
||||
/// # lazy_static::lazy_static! {
|
||||
/// # static ref INT_COUNTER: IntCounter = register_int_counter!(
|
||||
/// # static INT_COUNTER: Lazy<IntCounter> = Lazy::new( || { register_int_counter!(
|
||||
/// # "int_counter",
|
||||
/// # "let's count something!"
|
||||
/// # ).unwrap();
|
||||
/// # }
|
||||
/// # ).unwrap()
|
||||
/// # });
|
||||
/// #
|
||||
/// fn do_some_writes(stream: impl Write, payload: &[u8]) -> Result<()> {
|
||||
/// let mut writer = CountedWriter::new(stream, |cnt| {
|
||||
|
||||
@@ -12,7 +12,7 @@ byteorder = "1.4.3"
|
||||
anyhow = "1.0"
|
||||
crc32c = "0.6.0"
|
||||
hex = "0.4.3"
|
||||
lazy_static = "1.4"
|
||||
once_cell = "1.13.0"
|
||||
log = "0.4.14"
|
||||
memoffset = "0.6.2"
|
||||
thiserror = "1.0"
|
||||
|
||||
@@ -44,7 +44,7 @@ impl ParseCallbacks for PostgresFfiCallbacks {
|
||||
|
||||
fn main() {
|
||||
// Tell cargo to invalidate the built crate whenever the wrapper changes
|
||||
println!("cargo:rerun-if-changed=pg_control_ffi.h");
|
||||
println!("cargo:rerun-if-changed=bindgen_deps.h");
|
||||
|
||||
// Finding the location of C headers for the Postgres server:
|
||||
// - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/tmp_install`
|
||||
@@ -88,9 +88,9 @@ fn main() {
|
||||
// the resulting bindings.
|
||||
let bindings = bindgen::Builder::default()
|
||||
//
|
||||
// All the needed PostgreSQL headers are included from 'pg_control_ffi.h'
|
||||
// All the needed PostgreSQL headers are included from 'bindgen_deps.h'
|
||||
//
|
||||
.header("pg_control_ffi.h")
|
||||
.header("bindgen_deps.h")
|
||||
//
|
||||
// Tell cargo to invalidate the built crate whenever any of the
|
||||
// included header files changed.
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
//! information. You can use PostgreSQL's pg_controldata utility to view its
|
||||
//! contents.
|
||||
//!
|
||||
use crate::{ControlFileData, PG_CONTROL_FILE_SIZE};
|
||||
use super::bindings::{ControlFileData, PG_CONTROL_FILE_SIZE};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use bytes::{Bytes, BytesMut};
|
||||
|
||||
@@ -7,21 +7,62 @@
|
||||
// https://github.com/rust-lang/rust-bindgen/issues/1651
|
||||
#![allow(deref_nullptr)]
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||
macro_rules! postgres_ffi {
|
||||
($version:ident) => {
|
||||
#[path = "."]
|
||||
pub mod $version {
|
||||
// fixme: does this have to be 'pub'?
|
||||
pub mod bindings {
|
||||
// bindgen generates bindings for a lot of stuff we don't need
|
||||
#![allow(dead_code)]
|
||||
|
||||
pub mod controlfile_utils;
|
||||
pub mod nonrelfile_utils;
|
||||
pub mod pg_constants;
|
||||
pub mod relfile_utils;
|
||||
pub mod waldecoder;
|
||||
pub mod xlog_utils;
|
||||
use serde::{Deserialize, Serialize};
|
||||
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||
}
|
||||
pub mod controlfile_utils;
|
||||
pub mod nonrelfile_utils;
|
||||
pub mod pg_constants;
|
||||
pub mod relfile_utils;
|
||||
pub mod waldecoder;
|
||||
pub mod xlog_utils;
|
||||
|
||||
// Re-export some symbols from bindings
|
||||
pub use bindings::DBState_DB_SHUTDOWNED;
|
||||
pub use bindings::{CheckPoint, ControlFileData, XLogRecord};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
postgres_ffi!(v14);
|
||||
|
||||
// Export some widely used datatypes that are unlikely to change across Postgres versions
|
||||
pub use v14::bindings::{uint32, uint64, Oid};
|
||||
pub use v14::bindings::{BlockNumber, OffsetNumber};
|
||||
pub use v14::bindings::{MultiXactId, TransactionId};
|
||||
|
||||
// Likewise for these, although the assumption that these don't change is a little more iffy.
|
||||
pub use v14::bindings::{MultiXactOffset, MultiXactStatus};
|
||||
|
||||
// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
|
||||
// --with-segsize=SEGSIZE, but assume the defaults for now.
|
||||
pub const BLCKSZ: u16 = 8192;
|
||||
pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
|
||||
pub const XLOG_BLCKSZ: usize = 8192;
|
||||
|
||||
// PG timeline is always 1, changing it doesn't have any useful meaning in Neon.
|
||||
//
|
||||
// NOTE: this is not to be confused with Neon timelines; different concept!
|
||||
//
|
||||
// It's a shaky assumption, that it's always 1. We might import a
|
||||
// PostgreSQL data directory that has gone through timeline bumps,
|
||||
// for example. FIXME later.
|
||||
pub const PG_TLI: u32 = 1;
|
||||
|
||||
// See TransactionIdIsNormal in transam.h
|
||||
pub const fn transaction_id_is_normal(id: TransactionId) -> bool {
|
||||
id > pg_constants::FIRST_NORMAL_TRANSACTION_ID
|
||||
id > v14::pg_constants::FIRST_NORMAL_TRANSACTION_ID
|
||||
}
|
||||
|
||||
// See TransactionIdPrecedes in transam.c
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
//!
|
||||
//! Common utilities for dealing with PostgreSQL non-relation files.
|
||||
//!
|
||||
use crate::{pg_constants, transaction_id_precedes};
|
||||
use crate::transaction_id_precedes;
|
||||
use super::pg_constants;
|
||||
use bytes::BytesMut;
|
||||
use log::*;
|
||||
|
||||
use crate::MultiXactId;
|
||||
use super::bindings::MultiXactId;
|
||||
|
||||
pub fn transaction_id_set_status(xid: u32, status: u8, page: &mut BytesMut) {
|
||||
trace!(
|
||||
|
||||
@@ -7,7 +7,8 @@
|
||||
//! comments on them.
|
||||
//!
|
||||
|
||||
use crate::PageHeaderData;
|
||||
use super::bindings::PageHeaderData;
|
||||
use crate::BLCKSZ;
|
||||
|
||||
//
|
||||
// From pg_tablespace_d.h
|
||||
@@ -31,11 +32,6 @@ pub const SMGR_TRUNCATE_HEAP: u32 = 0x0001;
|
||||
pub const SMGR_TRUNCATE_VM: u32 = 0x0002;
|
||||
pub const SMGR_TRUNCATE_FSM: u32 = 0x0004;
|
||||
|
||||
// from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
|
||||
// --with-segsize=SEGSIZE, but assume the defaults for now.
|
||||
pub const BLCKSZ: u16 = 8192;
|
||||
pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
|
||||
|
||||
//
|
||||
// From bufpage.h
|
||||
//
|
||||
@@ -213,7 +209,6 @@ pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;
|
||||
/* FIXME: pageserver should request wal_seg_size from compute node */
|
||||
pub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024;
|
||||
|
||||
pub const XLOG_BLCKSZ: usize = 8192;
|
||||
pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
|
||||
pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
|
||||
pub const XLP_LONG_HEADER: u16 = 0x0002;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
//!
|
||||
//! Common utilities for dealing with PostgreSQL relation files.
|
||||
//!
|
||||
use crate::pg_constants;
|
||||
use lazy_static::lazy_static;
|
||||
use super::pg_constants;
|
||||
use once_cell::sync::OnceCell;
|
||||
use regex::Regex;
|
||||
|
||||
#[derive(Debug, Clone, thiserror::Error, PartialEq)]
|
||||
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
|
||||
pub enum FilePathError {
|
||||
#[error("invalid relation fork name")]
|
||||
InvalidForkName,
|
||||
@@ -54,11 +54,14 @@ pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
|
||||
/// See functions relpath() and _mdfd_segpath() in PostgreSQL sources.
|
||||
///
|
||||
pub fn parse_relfilename(fname: &str) -> Result<(u32, u8, u32), FilePathError> {
|
||||
lazy_static! {
|
||||
static ref RELFILE_RE: Regex =
|
||||
Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
|
||||
}
|
||||
static RELFILE_RE: OnceCell<Regex> = OnceCell::new();
|
||||
RELFILE_RE.get_or_init(|| {
|
||||
Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap()
|
||||
});
|
||||
|
||||
let caps = RELFILE_RE
|
||||
.get()
|
||||
.unwrap()
|
||||
.captures(fname)
|
||||
.ok_or(FilePathError::InvalidFileName)?;
|
||||
|
||||
|
||||
@@ -10,27 +10,30 @@
|
||||
//!
|
||||
use super::pg_constants;
|
||||
use super::xlog_utils::*;
|
||||
use super::XLogLongPageHeaderData;
|
||||
use super::XLogPageHeaderData;
|
||||
use super::XLogRecord;
|
||||
use super::bindings::{XLogLongPageHeaderData, XLogPageHeaderData, XLogRecord, XLOG_PAGE_MAGIC};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use crc32c::*;
|
||||
use log::*;
|
||||
use std::cmp::min;
|
||||
use std::num::NonZeroU32;
|
||||
use thiserror::Error;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
enum State {
|
||||
WaitingForRecord,
|
||||
ReassemblingRecord {
|
||||
recordbuf: BytesMut,
|
||||
contlen: NonZeroU32,
|
||||
},
|
||||
SkippingEverything {
|
||||
skip_until_lsn: Lsn,
|
||||
},
|
||||
}
|
||||
|
||||
pub struct WalStreamDecoder {
|
||||
lsn: Lsn,
|
||||
|
||||
startlsn: Lsn, // LSN where this record starts
|
||||
contlen: u32,
|
||||
padlen: u32,
|
||||
|
||||
inputbuf: BytesMut,
|
||||
|
||||
/// buffer used to reassemble records that cross page boundaries.
|
||||
recordbuf: BytesMut,
|
||||
state: State,
|
||||
}
|
||||
|
||||
#[derive(Error, Debug, Clone)]
|
||||
@@ -48,13 +51,8 @@ impl WalStreamDecoder {
|
||||
pub fn new(lsn: Lsn) -> WalStreamDecoder {
|
||||
WalStreamDecoder {
|
||||
lsn,
|
||||
|
||||
startlsn: Lsn(0),
|
||||
contlen: 0,
|
||||
padlen: 0,
|
||||
|
||||
inputbuf: BytesMut::new(),
|
||||
recordbuf: BytesMut::new(),
|
||||
state: State::WaitingForRecord,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,6 +65,58 @@ impl WalStreamDecoder {
|
||||
self.inputbuf.extend_from_slice(buf);
|
||||
}
|
||||
|
||||
fn validate_page_header(&self, hdr: &XLogPageHeaderData) -> Result<(), WalDecodeError> {
|
||||
let validate_impl = || {
|
||||
if hdr.xlp_magic != XLOG_PAGE_MAGIC as u16 {
|
||||
return Err(format!(
|
||||
"invalid xlog page header: xlp_magic={}, expected {}",
|
||||
hdr.xlp_magic, XLOG_PAGE_MAGIC
|
||||
));
|
||||
}
|
||||
if hdr.xlp_pageaddr != self.lsn.0 {
|
||||
return Err(format!(
|
||||
"invalid xlog page header: xlp_pageaddr={}, expected {}",
|
||||
hdr.xlp_pageaddr, self.lsn
|
||||
));
|
||||
}
|
||||
match self.state {
|
||||
State::WaitingForRecord => {
|
||||
if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD != 0 {
|
||||
return Err(
|
||||
"invalid xlog page header: unexpected XLP_FIRST_IS_CONTRECORD".into(),
|
||||
);
|
||||
}
|
||||
if hdr.xlp_rem_len != 0 {
|
||||
return Err(format!(
|
||||
"invalid xlog page header: xlp_rem_len={}, but it's not a contrecord",
|
||||
hdr.xlp_rem_len
|
||||
));
|
||||
}
|
||||
}
|
||||
State::ReassemblingRecord { contlen, .. } => {
|
||||
if hdr.xlp_info & XLP_FIRST_IS_CONTRECORD == 0 {
|
||||
return Err(
|
||||
"invalid xlog page header: XLP_FIRST_IS_CONTRECORD expected, not found"
|
||||
.into(),
|
||||
);
|
||||
}
|
||||
if hdr.xlp_rem_len != contlen.get() {
|
||||
return Err(format!(
|
||||
"invalid xlog page header: xlp_rem_len={}, expected {}",
|
||||
hdr.xlp_rem_len,
|
||||
contlen.get()
|
||||
));
|
||||
}
|
||||
}
|
||||
State::SkippingEverything { .. } => {
|
||||
panic!("Should not be validating page header in the SkippingEverything state");
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
};
|
||||
validate_impl().map_err(|msg| WalDecodeError { msg, lsn: self.lsn })
|
||||
}
|
||||
|
||||
/// Attempt to decode another WAL record from the input that has been fed to the
|
||||
/// decoder so far.
|
||||
///
|
||||
@@ -76,128 +126,121 @@ impl WalStreamDecoder {
|
||||
/// Err(WalDecodeError): an error occurred while decoding, meaning the input was invalid.
|
||||
///
|
||||
pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
|
||||
let recordbuf;
|
||||
|
||||
// Run state machine that validates page headers, and reassembles records
|
||||
// that cross page boundaries.
|
||||
loop {
|
||||
// parse and verify page boundaries as we go
|
||||
if self.padlen > 0 {
|
||||
// We should first skip padding, as we may have to skip some page headers if we're processing the XLOG_SWITCH record.
|
||||
if self.inputbuf.remaining() < self.padlen as usize {
|
||||
return Ok(None);
|
||||
}
|
||||
// However, we may have to skip some page headers if we're processing the XLOG_SWITCH record or skipping padding for whatever reason.
|
||||
match self.state {
|
||||
State::WaitingForRecord | State::ReassemblingRecord { .. } => {
|
||||
if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
|
||||
// parse long header
|
||||
|
||||
// skip padding
|
||||
self.inputbuf.advance(self.padlen as usize);
|
||||
self.lsn += self.padlen as u64;
|
||||
self.padlen = 0;
|
||||
} else if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
|
||||
// parse long header
|
||||
if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
|
||||
return Ok(None);
|
||||
}
|
||||
let hdr = XLogLongPageHeaderData::from_bytes(&mut self.inputbuf).map_err(
|
||||
|e| WalDecodeError {
|
||||
msg: format!("long header deserialization failed {}", e),
|
||||
lsn: self.lsn,
|
||||
},
|
||||
)?;
|
||||
|
||||
let hdr = XLogLongPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
|
||||
WalDecodeError {
|
||||
msg: format!("long header deserialization failed {}", e),
|
||||
lsn: self.lsn,
|
||||
self.validate_page_header(&hdr.std)?;
|
||||
|
||||
self.lsn += XLOG_SIZE_OF_XLOG_LONG_PHD as u64;
|
||||
} else if self.lsn.block_offset() == 0 {
|
||||
if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_SHORT_PHD {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let hdr =
|
||||
XLogPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
|
||||
WalDecodeError {
|
||||
msg: format!("header deserialization failed {}", e),
|
||||
lsn: self.lsn,
|
||||
}
|
||||
})?;
|
||||
|
||||
self.validate_page_header(&hdr)?;
|
||||
|
||||
self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
|
||||
}
|
||||
})?;
|
||||
|
||||
if hdr.std.xlp_pageaddr != self.lsn.0 {
|
||||
return Err(WalDecodeError {
|
||||
msg: "invalid xlog segment header".into(),
|
||||
lsn: self.lsn,
|
||||
});
|
||||
}
|
||||
// TODO: verify the remaining fields in the header
|
||||
|
||||
self.lsn += XLOG_SIZE_OF_XLOG_LONG_PHD as u64;
|
||||
continue;
|
||||
} else if self.lsn.block_offset() == 0 {
|
||||
if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_SHORT_PHD {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let hdr = XLogPageHeaderData::from_bytes(&mut self.inputbuf).map_err(|e| {
|
||||
WalDecodeError {
|
||||
msg: format!("header deserialization failed {}", e),
|
||||
lsn: self.lsn,
|
||||
State::SkippingEverything { .. } => {}
|
||||
}
|
||||
match &mut self.state {
|
||||
State::WaitingForRecord => {
|
||||
// need to have at least the xl_tot_len field
|
||||
if self.inputbuf.remaining() < 4 {
|
||||
return Ok(None);
|
||||
}
|
||||
})?;
|
||||
|
||||
if hdr.xlp_pageaddr != self.lsn.0 {
|
||||
return Err(WalDecodeError {
|
||||
msg: "invalid xlog page header".into(),
|
||||
lsn: self.lsn,
|
||||
});
|
||||
// peek xl_tot_len at the beginning of the record.
|
||||
// FIXME: assumes little-endian
|
||||
let xl_tot_len = (&self.inputbuf[0..4]).get_u32_le();
|
||||
if (xl_tot_len as usize) < XLOG_SIZE_OF_XLOG_RECORD {
|
||||
return Err(WalDecodeError {
|
||||
msg: format!("invalid xl_tot_len {}", xl_tot_len),
|
||||
lsn: self.lsn,
|
||||
});
|
||||
}
|
||||
// Fast path for the common case that the whole record fits on the page.
|
||||
let pageleft = self.lsn.remaining_in_block() as u32;
|
||||
if self.inputbuf.remaining() >= xl_tot_len as usize && xl_tot_len <= pageleft {
|
||||
self.lsn += xl_tot_len as u64;
|
||||
let recordbuf = self.inputbuf.copy_to_bytes(xl_tot_len as usize);
|
||||
return Ok(Some(self.complete_record(recordbuf)?));
|
||||
} else {
|
||||
// Need to assemble the record from pieces. Remember the size of the
|
||||
// record, and loop back. On next iteration, we will reach the 'else'
|
||||
// branch below, and copy the part of the record that was on this page
|
||||
// to 'recordbuf'. Subsequent iterations will skip page headers, and
|
||||
// append the continuations from the next pages to 'recordbuf'.
|
||||
self.state = State::ReassemblingRecord {
|
||||
recordbuf: BytesMut::with_capacity(xl_tot_len as usize),
|
||||
contlen: NonZeroU32::new(xl_tot_len).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: verify the remaining fields in the header
|
||||
State::ReassemblingRecord { recordbuf, contlen } => {
|
||||
// we're continuing a record, possibly from previous page.
|
||||
let pageleft = self.lsn.remaining_in_block() as u32;
|
||||
|
||||
self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
|
||||
continue;
|
||||
} else if self.contlen == 0 {
|
||||
assert!(self.recordbuf.is_empty());
|
||||
// read the rest of the record, or as much as fits on this page.
|
||||
let n = min(contlen.get(), pageleft) as usize;
|
||||
|
||||
// need to have at least the xl_tot_len field
|
||||
if self.inputbuf.remaining() < 4 {
|
||||
return Ok(None);
|
||||
if self.inputbuf.remaining() < n {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
recordbuf.put(self.inputbuf.split_to(n));
|
||||
self.lsn += n as u64;
|
||||
*contlen = match NonZeroU32::new(contlen.get() - n as u32) {
|
||||
Some(x) => x,
|
||||
None => {
|
||||
// The record is now complete.
|
||||
let recordbuf = std::mem::replace(recordbuf, BytesMut::new()).freeze();
|
||||
return Ok(Some(self.complete_record(recordbuf)?));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// peek xl_tot_len at the beginning of the record.
|
||||
// FIXME: assumes little-endian
|
||||
self.startlsn = self.lsn;
|
||||
let xl_tot_len = (&self.inputbuf[0..4]).get_u32_le();
|
||||
if (xl_tot_len as usize) < XLOG_SIZE_OF_XLOG_RECORD {
|
||||
return Err(WalDecodeError {
|
||||
msg: format!("invalid xl_tot_len {}", xl_tot_len),
|
||||
lsn: self.lsn,
|
||||
});
|
||||
State::SkippingEverything { skip_until_lsn } => {
|
||||
assert!(*skip_until_lsn >= self.lsn);
|
||||
let n = skip_until_lsn.0 - self.lsn.0;
|
||||
if self.inputbuf.remaining() < n as usize {
|
||||
return Ok(None);
|
||||
}
|
||||
self.inputbuf.advance(n as usize);
|
||||
self.lsn += n;
|
||||
self.state = State::WaitingForRecord;
|
||||
}
|
||||
|
||||
// Fast path for the common case that the whole record fits on the page.
|
||||
let pageleft = self.lsn.remaining_in_block() as u32;
|
||||
if self.inputbuf.remaining() >= xl_tot_len as usize && xl_tot_len <= pageleft {
|
||||
// Take the record from the 'inputbuf', and validate it.
|
||||
recordbuf = self.inputbuf.copy_to_bytes(xl_tot_len as usize);
|
||||
self.lsn += xl_tot_len as u64;
|
||||
break;
|
||||
} else {
|
||||
// Need to assemble the record from pieces. Remember the size of the
|
||||
// record, and loop back. On next iteration, we will reach the 'else'
|
||||
// branch below, and copy the part of the record that was on this page
|
||||
// to 'recordbuf'. Subsequent iterations will skip page headers, and
|
||||
// append the continuations from the next pages to 'recordbuf'.
|
||||
self.recordbuf.reserve(xl_tot_len as usize);
|
||||
self.contlen = xl_tot_len;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// we're continuing a record, possibly from previous page.
|
||||
let pageleft = self.lsn.remaining_in_block() as u32;
|
||||
|
||||
// read the rest of the record, or as much as fits on this page.
|
||||
let n = min(self.contlen, pageleft) as usize;
|
||||
|
||||
if self.inputbuf.remaining() < n {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
self.recordbuf.put(self.inputbuf.split_to(n));
|
||||
self.lsn += n as u64;
|
||||
self.contlen -= n as u32;
|
||||
|
||||
if self.contlen == 0 {
|
||||
// The record is now complete.
|
||||
recordbuf = std::mem::replace(&mut self.recordbuf, BytesMut::new()).freeze();
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn complete_record(&mut self, recordbuf: Bytes) -> Result<(Lsn, Bytes), WalDecodeError> {
|
||||
// We now have a record in the 'recordbuf' local variable.
|
||||
let xlogrec =
|
||||
XLogRecord::from_slice(&recordbuf[0..XLOG_SIZE_OF_XLOG_RECORD]).map_err(|e| {
|
||||
@@ -219,18 +262,20 @@ impl WalStreamDecoder {
|
||||
|
||||
// XLOG_SWITCH records are special. If we see one, we need to skip
|
||||
// to the next WAL segment.
|
||||
if xlogrec.is_xlog_switch_record() {
|
||||
let next_lsn = if xlogrec.is_xlog_switch_record() {
|
||||
trace!("saw xlog switch record at {}", self.lsn);
|
||||
self.padlen = self.lsn.calc_padding(pg_constants::WAL_SEGMENT_SIZE as u64) as u32;
|
||||
self.lsn + self.lsn.calc_padding(pg_constants::WAL_SEGMENT_SIZE as u64)
|
||||
} else {
|
||||
// Pad to an 8-byte boundary
|
||||
self.padlen = self.lsn.calc_padding(8u32) as u32;
|
||||
}
|
||||
self.lsn.align()
|
||||
};
|
||||
self.state = State::SkippingEverything {
|
||||
skip_until_lsn: next_lsn,
|
||||
};
|
||||
|
||||
// We should return LSN of the next record, not the last byte of this record or
|
||||
// the byte immediately after. Note that this handles both XLOG_SWITCH and usual
|
||||
// records, the former "spans" until the next WAL segment (see test_xlog_switch).
|
||||
let result = (self.lsn + self.padlen as u64, recordbuf);
|
||||
Ok(Some(result))
|
||||
Ok((next_lsn, recordbuf))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,31 +7,33 @@
|
||||
// have been named the same as the corresponding PostgreSQL functions instead.
|
||||
//
|
||||
|
||||
use crate::pg_constants;
|
||||
use crate::CheckPoint;
|
||||
use crate::FullTransactionId;
|
||||
use crate::XLogLongPageHeaderData;
|
||||
use crate::XLogPageHeaderData;
|
||||
use crate::XLogRecord;
|
||||
use crate::XLOG_PAGE_MAGIC;
|
||||
use crc32c::crc32c_append;
|
||||
|
||||
use super::bindings::{
|
||||
CheckPoint, FullTransactionId, XLogLongPageHeaderData, XLogPageHeaderData, XLogRecord,
|
||||
XLOG_PAGE_MAGIC,
|
||||
};
|
||||
use super::pg_constants;
|
||||
use super::pg_constants::WAL_SEGMENT_SIZE;
|
||||
use crate::v14::waldecoder::WalStreamDecoder;
|
||||
use crate::PG_TLI;
|
||||
use crate::{uint32, uint64, Oid};
|
||||
|
||||
use crate::pg_constants::WAL_SEGMENT_SIZE;
|
||||
use anyhow::{bail, ensure};
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
use bytes::BytesMut;
|
||||
use bytes::{Buf, Bytes};
|
||||
use crc32c::*;
|
||||
|
||||
use log::*;
|
||||
use std::cmp::max;
|
||||
use std::cmp::min;
|
||||
use std::fs::{self, File};
|
||||
|
||||
use serde::Serialize;
|
||||
use std::fs::File;
|
||||
use std::io::prelude::*;
|
||||
use std::io::ErrorKind;
|
||||
use std::io::SeekFrom;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::SystemTime;
|
||||
use utils::bin_ser::DeserializeError;
|
||||
use utils::bin_ser::SerializeError;
|
||||
use utils::const_assert;
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
pub const XLOG_FNAME_LEN: usize = 24;
|
||||
@@ -47,9 +49,6 @@ pub const XLOG_SIZE_OF_XLOG_RECORD: usize = std::mem::size_of::<XLogRecord>();
|
||||
#[allow(clippy::identity_op)]
|
||||
pub const SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT: usize = 1 * 2;
|
||||
|
||||
// PG timeline is always 1, changing it doesn't have useful meaning in Zenith.
|
||||
pub const PG_TLI: u32 = 1;
|
||||
|
||||
pub type XLogRecPtr = u64;
|
||||
pub type TimeLineID = u32;
|
||||
pub type TimestampTz = i64;
|
||||
@@ -80,12 +79,12 @@ pub fn XLogSegNoOffsetToRecPtr(
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
|
||||
return format!(
|
||||
format!(
|
||||
"{:>08X}{:>08X}{:>08X}",
|
||||
tli,
|
||||
logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes),
|
||||
logSegNo % XLogSegmentsPerXLogId(wal_segsz_bytes)
|
||||
);
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
@@ -140,335 +139,93 @@ pub fn to_pg_timestamp(time: SystemTime) -> TimestampTz {
|
||||
}
|
||||
}
|
||||
|
||||
/// Return offset of the last valid record in the segment segno, starting
|
||||
/// looking at start_offset. Returns start_offset if no records found.
|
||||
fn find_end_of_wal_segment(
|
||||
data_dir: &Path,
|
||||
segno: XLogSegNo,
|
||||
tli: TimeLineID,
|
||||
wal_seg_size: usize,
|
||||
start_offset: usize, // start reading at this point
|
||||
) -> anyhow::Result<u32> {
|
||||
// step back to the beginning of the page to read it in...
|
||||
let mut offs: usize = start_offset - start_offset % XLOG_BLCKSZ;
|
||||
let mut skipping_first_contrecord: bool = false;
|
||||
let mut contlen: usize = 0;
|
||||
let mut xl_crc: u32 = 0;
|
||||
let mut crc: u32 = 0;
|
||||
let mut rec_offs: usize = 0;
|
||||
let mut buf = [0u8; XLOG_BLCKSZ];
|
||||
let file_name = XLogFileName(tli, segno, wal_seg_size);
|
||||
let mut last_valid_rec_pos: usize = start_offset; // assume at given start_offset begins new record
|
||||
let mut file = File::open(data_dir.join(file_name.clone() + ".partial")).unwrap();
|
||||
file.seek(SeekFrom::Start(offs as u64))?;
|
||||
// xl_crc is the last field in XLogRecord, will not be read into rec_hdr
|
||||
const_assert!(XLOG_RECORD_CRC_OFFS + 4 == XLOG_SIZE_OF_XLOG_RECORD);
|
||||
let mut rec_hdr = [0u8; XLOG_RECORD_CRC_OFFS];
|
||||
|
||||
trace!("find_end_of_wal_segment(data_dir={}, segno={}, tli={}, wal_seg_size={}, start_offset=0x{:x})", data_dir.display(), segno, tli, wal_seg_size, start_offset);
|
||||
while offs < wal_seg_size {
|
||||
// we are at the beginning of the page; read it in
|
||||
if offs % XLOG_BLCKSZ == 0 {
|
||||
trace!("offs=0x{:x}: new page", offs);
|
||||
let bytes_read = file.read(&mut buf)?;
|
||||
if bytes_read != buf.len() {
|
||||
bail!(
|
||||
"failed to read {} bytes from {} at {}",
|
||||
XLOG_BLCKSZ,
|
||||
file_name,
|
||||
offs
|
||||
);
|
||||
}
|
||||
|
||||
let xlp_magic = LittleEndian::read_u16(&buf[0..2]);
|
||||
let xlp_info = LittleEndian::read_u16(&buf[2..4]);
|
||||
let xlp_rem_len = LittleEndian::read_u32(&buf[XLP_REM_LEN_OFFS..XLP_REM_LEN_OFFS + 4]);
|
||||
trace!(
|
||||
" xlp_magic=0x{:x}, xlp_info=0x{:x}, xlp_rem_len={}",
|
||||
xlp_magic,
|
||||
xlp_info,
|
||||
xlp_rem_len
|
||||
);
|
||||
// this is expected in current usage when valid WAL starts after page header
|
||||
if xlp_magic != XLOG_PAGE_MAGIC as u16 {
|
||||
trace!(
|
||||
" invalid WAL file {}.partial magic {} at {:?}",
|
||||
file_name,
|
||||
xlp_magic,
|
||||
Lsn(XLogSegNoOffsetToRecPtr(segno, offs as u32, wal_seg_size)),
|
||||
);
|
||||
}
|
||||
if offs == 0 {
|
||||
offs += XLOG_SIZE_OF_XLOG_LONG_PHD;
|
||||
if (xlp_info & XLP_FIRST_IS_CONTRECORD) != 0 {
|
||||
trace!(" first record is contrecord");
|
||||
skipping_first_contrecord = true;
|
||||
contlen = xlp_rem_len as usize;
|
||||
if offs < start_offset {
|
||||
// Pre-condition failed: the beginning of the segment is unexpectedly corrupted.
|
||||
ensure!(start_offset - offs >= contlen,
|
||||
"start_offset is in the middle of the first record (which happens to be a contrecord), \
|
||||
expected to be on a record boundary. Is beginning of the segment corrupted?");
|
||||
contlen = 0;
|
||||
// keep skipping_first_contrecord to avoid counting the contrecord as valid, we did not check it.
|
||||
}
|
||||
} else {
|
||||
trace!(" first record is not contrecord");
|
||||
}
|
||||
} else {
|
||||
offs += XLOG_SIZE_OF_XLOG_SHORT_PHD;
|
||||
}
|
||||
// ... and step forward again if asked
|
||||
trace!(" skipped header to 0x{:x}", offs);
|
||||
offs = max(offs, start_offset);
|
||||
// beginning of the next record
|
||||
} else if contlen == 0 {
|
||||
let page_offs = offs % XLOG_BLCKSZ;
|
||||
let xl_tot_len = LittleEndian::read_u32(&buf[page_offs..page_offs + 4]) as usize;
|
||||
trace!("offs=0x{:x}: new record, xl_tot_len={}", offs, xl_tot_len);
|
||||
if xl_tot_len == 0 {
|
||||
info!(
|
||||
"find_end_of_wal_segment reached zeros at {:?}, last records ends at {:?}",
|
||||
Lsn(XLogSegNoOffsetToRecPtr(segno, offs as u32, wal_seg_size)),
|
||||
Lsn(XLogSegNoOffsetToRecPtr(
|
||||
segno,
|
||||
last_valid_rec_pos as u32,
|
||||
wal_seg_size
|
||||
))
|
||||
);
|
||||
break; // zeros, reached the end
|
||||
}
|
||||
if skipping_first_contrecord {
|
||||
skipping_first_contrecord = false;
|
||||
trace!(" first contrecord has been just completed");
|
||||
} else {
|
||||
trace!(
|
||||
" updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
|
||||
last_valid_rec_pos,
|
||||
offs
|
||||
);
|
||||
last_valid_rec_pos = offs;
|
||||
}
|
||||
offs += 4;
|
||||
rec_offs = 4;
|
||||
contlen = xl_tot_len - 4;
|
||||
trace!(
|
||||
" reading rec_hdr[0..4] <-- [0x{:x}; 0x{:x})",
|
||||
page_offs,
|
||||
page_offs + 4
|
||||
);
|
||||
rec_hdr[0..4].copy_from_slice(&buf[page_offs..page_offs + 4]);
|
||||
} else {
|
||||
// we're continuing a record, possibly from previous page.
|
||||
let page_offs = offs % XLOG_BLCKSZ;
|
||||
let pageleft = XLOG_BLCKSZ - page_offs;
|
||||
|
||||
// read the rest of the record, or as much as fits on this page.
|
||||
let n = min(contlen, pageleft);
|
||||
trace!(
|
||||
"offs=0x{:x}, record continuation, pageleft={}, contlen={}",
|
||||
offs,
|
||||
pageleft,
|
||||
contlen
|
||||
);
|
||||
// fill rec_hdr header up to (but not including) xl_crc field
|
||||
trace!(
|
||||
" rec_offs={}, XLOG_RECORD_CRC_OFFS={}, XLOG_SIZE_OF_XLOG_RECORD={}",
|
||||
rec_offs,
|
||||
XLOG_RECORD_CRC_OFFS,
|
||||
XLOG_SIZE_OF_XLOG_RECORD
|
||||
);
|
||||
if rec_offs < XLOG_RECORD_CRC_OFFS {
|
||||
let len = min(XLOG_RECORD_CRC_OFFS - rec_offs, n);
|
||||
trace!(
|
||||
" reading rec_hdr[{}..{}] <-- [0x{:x}; 0x{:x})",
|
||||
rec_offs,
|
||||
rec_offs + len,
|
||||
page_offs,
|
||||
page_offs + len
|
||||
);
|
||||
rec_hdr[rec_offs..rec_offs + len].copy_from_slice(&buf[page_offs..page_offs + len]);
|
||||
}
|
||||
if rec_offs <= XLOG_RECORD_CRC_OFFS && rec_offs + n >= XLOG_SIZE_OF_XLOG_RECORD {
|
||||
let crc_offs = page_offs - rec_offs + XLOG_RECORD_CRC_OFFS;
|
||||
// All records are aligned on 8-byte boundary, so their 8-byte frames
|
||||
// cannot be split between pages. As xl_crc is the last field,
|
||||
// its content is always on the same page.
|
||||
const_assert!(XLOG_RECORD_CRC_OFFS % 8 == 4);
|
||||
// We should always start reading aligned records even in incorrect WALs so if
|
||||
// the condition is false it is likely a bug. However, it is localized somewhere
|
||||
// in this function, hence we do not crash and just report failure instead.
|
||||
ensure!(crc_offs % 8 == 4, "Record is not aligned properly (bug?)");
|
||||
xl_crc = LittleEndian::read_u32(&buf[crc_offs..crc_offs + 4]);
|
||||
trace!(
|
||||
" reading xl_crc: [0x{:x}; 0x{:x}) = 0x{:x}",
|
||||
crc_offs,
|
||||
crc_offs + 4,
|
||||
xl_crc
|
||||
);
|
||||
crc = crc32c_append(0, &buf[crc_offs + 4..page_offs + n]);
|
||||
trace!(
|
||||
" initializing crc: [0x{:x}; 0x{:x}); crc = 0x{:x}",
|
||||
crc_offs + 4,
|
||||
page_offs + n,
|
||||
crc
|
||||
);
|
||||
} else if rec_offs > XLOG_RECORD_CRC_OFFS {
|
||||
// As all records are 8-byte aligned, the header is already fully read and `crc` is initialized in the branch above.
|
||||
ensure!(rec_offs >= XLOG_SIZE_OF_XLOG_RECORD);
|
||||
let old_crc = crc;
|
||||
crc = crc32c_append(crc, &buf[page_offs..page_offs + n]);
|
||||
trace!(
|
||||
" appending to crc: [0x{:x}; 0x{:x}); 0x{:x} --> 0x{:x}",
|
||||
page_offs,
|
||||
page_offs + n,
|
||||
old_crc,
|
||||
crc
|
||||
);
|
||||
} else {
|
||||
// Correct because of the way conditions are written above.
|
||||
assert!(rec_offs + n < XLOG_SIZE_OF_XLOG_RECORD);
|
||||
// If `skipping_first_contrecord == true`, we may be reading from a middle of a record
|
||||
// which started in the previous segment. Hence there is no point in validating the header.
|
||||
if !skipping_first_contrecord && rec_offs + n > XLOG_RECORD_CRC_OFFS {
|
||||
info!(
|
||||
"Curiously corrupted WAL: a record stops inside the header; \
|
||||
offs=0x{:x}, record continuation, pageleft={}, contlen={}",
|
||||
offs, pageleft, contlen
|
||||
);
|
||||
break;
|
||||
}
|
||||
// Do nothing: we are still reading the header. It's accounted in CRC in the end of the record.
|
||||
}
|
||||
rec_offs += n;
|
||||
offs += n;
|
||||
contlen -= n;
|
||||
|
||||
if contlen == 0 {
|
||||
trace!(" record completed at 0x{:x}", offs);
|
||||
crc = crc32c_append(crc, &rec_hdr);
|
||||
offs = (offs + 7) & !7; // pad on 8 bytes boundary */
|
||||
trace!(
|
||||
" padded offs to 0x{:x}, crc is {:x}, expected crc is {:x}",
|
||||
offs,
|
||||
crc,
|
||||
xl_crc
|
||||
);
|
||||
if skipping_first_contrecord {
|
||||
// do nothing, the flag will go down on next iteration when we're reading new record
|
||||
trace!(" first conrecord has been just completed");
|
||||
} else if crc == xl_crc {
|
||||
// record is valid, advance the result to its end (with
|
||||
// alignment to the next record taken into account)
|
||||
trace!(
|
||||
" updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
|
||||
last_valid_rec_pos,
|
||||
offs
|
||||
);
|
||||
last_valid_rec_pos = offs;
|
||||
} else {
|
||||
info!(
|
||||
"CRC mismatch {} vs {} at {}",
|
||||
crc, xl_crc, last_valid_rec_pos
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
trace!("last_valid_rec_pos=0x{:x}", last_valid_rec_pos);
|
||||
Ok(last_valid_rec_pos as u32)
|
||||
}
|
||||
|
||||
///
|
||||
/// Scan a directory that contains PostgreSQL WAL files, for the end of WAL.
|
||||
/// If precise, returns end LSN (next insertion point, basically);
|
||||
/// otherwise, start of the last segment.
|
||||
/// Returns (0, 0) if there is no WAL.
|
||||
///
|
||||
// Returns (aligned) end_lsn of the last record in data_dir with WAL segments.
|
||||
// start_lsn must point to some previously known record boundary (beginning of
|
||||
// the next record). If no valid record after is found, start_lsn is returned
|
||||
// back.
|
||||
pub fn find_end_of_wal(
|
||||
data_dir: &Path,
|
||||
wal_seg_size: usize,
|
||||
precise: bool,
|
||||
start_lsn: Lsn, // start reading WAL at this point or later
|
||||
) -> anyhow::Result<(XLogRecPtr, TimeLineID)> {
|
||||
let mut high_segno: XLogSegNo = 0;
|
||||
let mut high_tli: TimeLineID = 0;
|
||||
let mut high_ispartial = false;
|
||||
start_lsn: Lsn, // start reading WAL at this point; must point at record start_lsn.
|
||||
) -> anyhow::Result<Lsn> {
|
||||
let mut result = start_lsn;
|
||||
let mut curr_lsn = start_lsn;
|
||||
let mut buf = [0u8; XLOG_BLCKSZ];
|
||||
let mut decoder = WalStreamDecoder::new(start_lsn);
|
||||
|
||||
for entry in fs::read_dir(data_dir).unwrap().flatten() {
|
||||
let ispartial: bool;
|
||||
let entry_name = entry.file_name();
|
||||
let fname = entry_name.to_str().unwrap();
|
||||
/*
|
||||
* Check if the filename looks like an xlog file, or a .partial file.
|
||||
*/
|
||||
if IsXLogFileName(fname) {
|
||||
ispartial = false;
|
||||
} else if IsPartialXLogFileName(fname) {
|
||||
ispartial = true;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
let (segno, tli) = XLogFromFileName(fname, wal_seg_size);
|
||||
if !ispartial && entry.metadata().unwrap().len() != wal_seg_size as u64 {
|
||||
continue;
|
||||
}
|
||||
if segno > high_segno
|
||||
|| (segno == high_segno && tli > high_tli)
|
||||
|| (segno == high_segno && tli == high_tli && high_ispartial && !ispartial)
|
||||
{
|
||||
high_segno = segno;
|
||||
high_tli = tli;
|
||||
high_ispartial = ispartial;
|
||||
}
|
||||
}
|
||||
if high_segno > 0 {
|
||||
let mut high_offs = 0;
|
||||
/*
|
||||
* Move the starting pointer to the start of the next segment, if the
|
||||
* highest one we saw was completed.
|
||||
*/
|
||||
if !high_ispartial {
|
||||
high_segno += 1;
|
||||
} else if precise {
|
||||
/* otherwise locate last record in last partial segment */
|
||||
if start_lsn.segment_number(wal_seg_size) > high_segno {
|
||||
bail!(
|
||||
"provided start_lsn {:?} is beyond highest segno {:?} available",
|
||||
start_lsn,
|
||||
high_segno,
|
||||
// loop over segments
|
||||
loop {
|
||||
let segno = curr_lsn.segment_number(wal_seg_size);
|
||||
let seg_file_name = XLogFileName(PG_TLI, segno, wal_seg_size);
|
||||
let seg_file_path = data_dir.join(seg_file_name);
|
||||
match open_wal_segment(&seg_file_path)? {
|
||||
None => {
|
||||
// no more segments
|
||||
info!(
|
||||
"find_end_of_wal reached end at {:?}, segment {:?} doesn't exist",
|
||||
result, seg_file_path
|
||||
);
|
||||
return Ok(result);
|
||||
}
|
||||
Some(mut segment) => {
|
||||
let seg_offs = curr_lsn.segment_offset(wal_seg_size);
|
||||
segment.seek(SeekFrom::Start(seg_offs as u64))?;
|
||||
// loop inside segment
|
||||
loop {
|
||||
let bytes_read = segment.read(&mut buf)?;
|
||||
if bytes_read == 0 {
|
||||
break; // EOF
|
||||
}
|
||||
curr_lsn += bytes_read as u64;
|
||||
decoder.feed_bytes(&buf[0..bytes_read]);
|
||||
|
||||
// advance result past all completely read records
|
||||
loop {
|
||||
match decoder.poll_decode() {
|
||||
Ok(Some(record)) => result = record.0,
|
||||
Err(e) => {
|
||||
info!(
|
||||
"find_end_of_wal reached end at {:?}, decode error: {:?}",
|
||||
result, e
|
||||
);
|
||||
return Ok(result);
|
||||
}
|
||||
Ok(None) => break, // need more data
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let start_offset = if start_lsn.segment_number(wal_seg_size) == high_segno {
|
||||
start_lsn.segment_offset(wal_seg_size)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
high_offs = find_end_of_wal_segment(
|
||||
data_dir,
|
||||
high_segno,
|
||||
high_tli,
|
||||
wal_seg_size,
|
||||
start_offset,
|
||||
)?;
|
||||
}
|
||||
let high_ptr = XLogSegNoOffsetToRecPtr(high_segno, high_offs, wal_seg_size);
|
||||
return Ok((high_ptr, high_tli));
|
||||
}
|
||||
Ok((0, 0))
|
||||
}
|
||||
|
||||
// Open .partial or full WAL segment file, if present.
|
||||
fn open_wal_segment(seg_file_path: &Path) -> anyhow::Result<Option<File>> {
|
||||
let mut partial_path = seg_file_path.to_owned();
|
||||
partial_path.set_extension("partial");
|
||||
match File::open(partial_path) {
|
||||
Ok(file) => Ok(Some(file)),
|
||||
Err(e) => match e.kind() {
|
||||
ErrorKind::NotFound => {
|
||||
// .partial not found, try full
|
||||
match File::open(seg_file_path) {
|
||||
Ok(file) => Ok(Some(file)),
|
||||
Err(e) => match e.kind() {
|
||||
ErrorKind::NotFound => Ok(None),
|
||||
_ => Err(e.into()),
|
||||
},
|
||||
}
|
||||
}
|
||||
_ => Err(e.into()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
let mut data_dir = PathBuf::new();
|
||||
data_dir.push(".");
|
||||
let (wal_end, tli) = find_end_of_wal(&data_dir, WAL_SEGMENT_SIZE, true, Lsn(0)).unwrap();
|
||||
println!(
|
||||
"wal_end={:>08X}{:>08X}, tli={}",
|
||||
(wal_end >> 32) as u32,
|
||||
wal_end as u32,
|
||||
tli
|
||||
);
|
||||
let wal_end = find_end_of_wal(&data_dir, WAL_SEGMENT_SIZE, Lsn(0)).unwrap();
|
||||
println!("wal_end={:?}", wal_end);
|
||||
}
|
||||
|
||||
impl XLogRecord {
|
||||
@@ -588,11 +345,93 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result<Bytes, Seriali
|
||||
Ok(seg_buf.freeze())
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Serialize)]
|
||||
struct XlLogicalMessage {
|
||||
db_id: Oid,
|
||||
transactional: uint32, // bool, takes 4 bytes due to alignment in C structures
|
||||
prefix_size: uint64,
|
||||
message_size: uint64,
|
||||
}
|
||||
|
||||
impl XlLogicalMessage {
|
||||
pub fn encode(&self) -> Bytes {
|
||||
use utils::bin_ser::LeSer;
|
||||
self.ser().unwrap().into()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create new WAL record for non-transactional logical message.
|
||||
/// Used for creating artificial WAL for tests, as LogicalMessage
|
||||
/// record is basically no-op.
|
||||
///
|
||||
/// NOTE: This leaves the xl_prev field zero. The safekeeper and
|
||||
/// pageserver tolerate that, but PostgreSQL does not.
|
||||
pub fn encode_logical_message(prefix: &str, message: &str) -> Vec<u8> {
|
||||
let mut prefix_bytes: Vec<u8> = Vec::with_capacity(prefix.len() + 1);
|
||||
prefix_bytes.write_all(prefix.as_bytes()).unwrap();
|
||||
prefix_bytes.push(0);
|
||||
|
||||
let message_bytes = message.as_bytes();
|
||||
|
||||
let logical_message = XlLogicalMessage {
|
||||
db_id: 0,
|
||||
transactional: 0,
|
||||
prefix_size: prefix_bytes.len() as u64,
|
||||
message_size: message_bytes.len() as u64,
|
||||
};
|
||||
|
||||
let mainrdata = logical_message.encode();
|
||||
let mainrdata_len: usize = mainrdata.len() + prefix_bytes.len() + message_bytes.len();
|
||||
// only short mainrdata is supported for now
|
||||
assert!(mainrdata_len <= 255);
|
||||
let mainrdata_len = mainrdata_len as u8;
|
||||
|
||||
let mut data: Vec<u8> = vec![pg_constants::XLR_BLOCK_ID_DATA_SHORT, mainrdata_len];
|
||||
data.extend_from_slice(&mainrdata);
|
||||
data.extend_from_slice(&prefix_bytes);
|
||||
data.extend_from_slice(message_bytes);
|
||||
|
||||
let total_len = XLOG_SIZE_OF_XLOG_RECORD + data.len();
|
||||
|
||||
let mut header = XLogRecord {
|
||||
xl_tot_len: total_len as u32,
|
||||
xl_xid: 0,
|
||||
xl_prev: 0,
|
||||
xl_info: 0,
|
||||
xl_rmid: 21,
|
||||
__bindgen_padding_0: [0u8; 2usize],
|
||||
xl_crc: 0, // crc will be calculated later
|
||||
};
|
||||
|
||||
let header_bytes = header.encode().expect("failed to encode header");
|
||||
let crc = crc32c_append(0, &data);
|
||||
let crc = crc32c_append(crc, &header_bytes[0..XLOG_RECORD_CRC_OFFS]);
|
||||
header.xl_crc = crc;
|
||||
|
||||
let mut wal: Vec<u8> = Vec::new();
|
||||
wal.extend_from_slice(&header.encode().expect("failed to encode header"));
|
||||
wal.extend_from_slice(&data);
|
||||
|
||||
// WAL start position must be aligned at 8 bytes,
|
||||
// this will add padding for the next WAL record.
|
||||
const PADDING: usize = 8;
|
||||
let padding_rem = wal.len() % PADDING;
|
||||
if padding_rem != 0 {
|
||||
wal.resize(wal.len() + PADDING - padding_rem, 0);
|
||||
}
|
||||
|
||||
wal
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use regex::Regex;
|
||||
use std::cmp::min;
|
||||
use std::fs;
|
||||
use std::{env, str::FromStr};
|
||||
use utils::const_assert;
|
||||
|
||||
fn init_logging() {
|
||||
let _ = env_logger::Builder::from_env(
|
||||
@@ -603,10 +442,7 @@ mod tests {
|
||||
.try_init();
|
||||
}
|
||||
|
||||
fn test_end_of_wal<C: wal_craft::Crafter>(
|
||||
test_name: &str,
|
||||
expected_end_of_wal_non_partial: Lsn,
|
||||
) {
|
||||
fn test_end_of_wal<C: wal_craft::Crafter>(test_name: &str) {
|
||||
use wal_craft::*;
|
||||
// Craft some WAL
|
||||
let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
@@ -627,7 +463,7 @@ mod tests {
|
||||
.iter()
|
||||
.map(|&lsn| u64::from(lsn).into())
|
||||
.collect();
|
||||
let expected_end_of_wal_partial: Lsn = u64::from(expected_end_of_wal_partial).into();
|
||||
let expected_end_of_wal: Lsn = u64::from(expected_end_of_wal_partial).into();
|
||||
srv.kill();
|
||||
|
||||
// Check find_end_of_wal on the initial WAL
|
||||
@@ -639,10 +475,10 @@ mod tests {
|
||||
.filter(|fname| IsXLogFileName(fname))
|
||||
.max()
|
||||
.unwrap();
|
||||
check_pg_waldump_end_of_wal(&cfg, &last_segment, expected_end_of_wal_partial);
|
||||
for start_lsn in std::iter::once(Lsn(0))
|
||||
.chain(intermediate_lsns)
|
||||
.chain(std::iter::once(expected_end_of_wal_partial))
|
||||
check_pg_waldump_end_of_wal(&cfg, &last_segment, expected_end_of_wal);
|
||||
for start_lsn in intermediate_lsns
|
||||
.iter()
|
||||
.chain(std::iter::once(&expected_end_of_wal))
|
||||
{
|
||||
// Erase all WAL before `start_lsn` to ensure it's not used by `find_end_of_wal`.
|
||||
// We assume that `start_lsn` is non-decreasing.
|
||||
@@ -657,7 +493,7 @@ mod tests {
|
||||
}
|
||||
let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE);
|
||||
let seg_start_lsn = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE);
|
||||
if seg_start_lsn > u64::from(start_lsn) {
|
||||
if seg_start_lsn > u64::from(*start_lsn) {
|
||||
continue;
|
||||
}
|
||||
let mut f = File::options().write(true).open(file.path()).unwrap();
|
||||
@@ -665,18 +501,12 @@ mod tests {
|
||||
f.write_all(
|
||||
&ZEROS[0..min(
|
||||
WAL_SEGMENT_SIZE,
|
||||
(u64::from(start_lsn) - seg_start_lsn) as usize,
|
||||
(u64::from(*start_lsn) - seg_start_lsn) as usize,
|
||||
)],
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
check_end_of_wal(
|
||||
&cfg,
|
||||
&last_segment,
|
||||
start_lsn,
|
||||
expected_end_of_wal_non_partial,
|
||||
expected_end_of_wal_partial,
|
||||
);
|
||||
check_end_of_wal(&cfg, &last_segment, *start_lsn, expected_end_of_wal);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -713,18 +543,15 @@ mod tests {
|
||||
cfg: &wal_craft::Conf,
|
||||
last_segment: &str,
|
||||
start_lsn: Lsn,
|
||||
expected_end_of_wal_non_partial: Lsn,
|
||||
expected_end_of_wal_partial: Lsn,
|
||||
expected_end_of_wal: Lsn,
|
||||
) {
|
||||
// Check end_of_wal on non-partial WAL segment (we treat it as fully populated)
|
||||
let (wal_end, tli) =
|
||||
find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, true, start_lsn).unwrap();
|
||||
let wal_end = Lsn(wal_end);
|
||||
info!(
|
||||
"find_end_of_wal returned (wal_end={}, tli={}) with non-partial WAL segment",
|
||||
wal_end, tli
|
||||
);
|
||||
assert_eq!(wal_end, expected_end_of_wal_non_partial);
|
||||
// let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();
|
||||
// info!(
|
||||
// "find_end_of_wal returned wal_end={} with non-partial WAL segment",
|
||||
// wal_end
|
||||
// );
|
||||
// assert_eq!(wal_end, expected_end_of_wal_non_partial);
|
||||
|
||||
// Rename file to partial to actually find last valid lsn, then rename it back.
|
||||
fs::rename(
|
||||
@@ -732,14 +559,12 @@ mod tests {
|
||||
cfg.wal_dir().join(format!("{}.partial", last_segment)),
|
||||
)
|
||||
.unwrap();
|
||||
let (wal_end, tli) =
|
||||
find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, true, start_lsn).unwrap();
|
||||
let wal_end = Lsn(wal_end);
|
||||
let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();
|
||||
info!(
|
||||
"find_end_of_wal returned (wal_end={}, tli={}) with partial WAL segment",
|
||||
wal_end, tli
|
||||
"find_end_of_wal returned wal_end={} with partial WAL segment",
|
||||
wal_end
|
||||
);
|
||||
assert_eq!(wal_end, expected_end_of_wal_partial);
|
||||
assert_eq!(wal_end, expected_end_of_wal);
|
||||
fs::rename(
|
||||
cfg.wal_dir().join(format!("{}.partial", last_segment)),
|
||||
cfg.wal_dir().join(last_segment),
|
||||
@@ -752,10 +577,7 @@ mod tests {
|
||||
#[test]
|
||||
pub fn test_find_end_of_wal_simple() {
|
||||
init_logging();
|
||||
test_end_of_wal::<wal_craft::Simple>(
|
||||
"test_find_end_of_wal_simple",
|
||||
"0/2000000".parse::<Lsn>().unwrap(),
|
||||
);
|
||||
test_end_of_wal::<wal_craft::Simple>("test_find_end_of_wal_simple");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -763,17 +585,14 @@ mod tests {
|
||||
init_logging();
|
||||
test_end_of_wal::<wal_craft::WalRecordCrossingSegmentFollowedBySmallOne>(
|
||||
"test_find_end_of_wal_crossing_segment_followed_by_small_one",
|
||||
"0/3000000".parse::<Lsn>().unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "not yet fixed, needs correct parsing of pre-last segments"] // TODO
|
||||
pub fn test_find_end_of_wal_last_crossing_segment() {
|
||||
init_logging();
|
||||
test_end_of_wal::<wal_craft::LastWalRecordCrossingSegment>(
|
||||
"test_find_end_of_wal_last_crossing_segment",
|
||||
"0/3000000".parse::<Lsn>().unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -806,4 +625,15 @@ mod tests {
|
||||
checkpoint.update_next_xid(1024);
|
||||
assert_eq!(checkpoint.nextXid.value, 2048);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_encode_logical_message() {
|
||||
let expected = [
|
||||
64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, 170, 34, 166, 227, 255,
|
||||
38, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 112, 114,
|
||||
101, 102, 105, 120, 0, 109, 101, 115, 115, 97, 103, 101,
|
||||
];
|
||||
let actual = encode_logical_message("prefix", "message");
|
||||
assert_eq!(expected, actual[..]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ anyhow = "1.0"
|
||||
clap = "3.0"
|
||||
env_logger = "0.9"
|
||||
log = "0.4"
|
||||
once_cell = "1.8.0"
|
||||
once_cell = "1.13.0"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
postgres_ffi = { path = "../" }
|
||||
tempfile = "3.2"
|
||||
|
||||
@@ -4,8 +4,8 @@ use log::*;
|
||||
use once_cell::sync::Lazy;
|
||||
use postgres::types::PgLsn;
|
||||
use postgres::Client;
|
||||
use postgres_ffi::pg_constants::WAL_SEGMENT_SIZE;
|
||||
use postgres_ffi::xlog_utils::{
|
||||
use postgres_ffi::v14::pg_constants::WAL_SEGMENT_SIZE;
|
||||
use postgres_ffi::v14::xlog_utils::{
|
||||
XLOG_BLCKSZ, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,
|
||||
};
|
||||
use std::cmp::Ordering;
|
||||
|
||||
@@ -7,7 +7,7 @@ edition = "2021"
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
async-trait = "0.1"
|
||||
metrics = { version = "0.1", path = "../metrics" }
|
||||
once_cell = "1.8.0"
|
||||
once_cell = "1.13.0"
|
||||
rusoto_core = "0.48"
|
||||
rusoto_s3 = "0.48"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
||||
@@ -66,6 +66,9 @@ pub trait RemoteStorage: Send + Sync {
|
||||
async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;
|
||||
|
||||
/// Lists all top level subdirectories for a given prefix
|
||||
/// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
|
||||
/// which already takes into account any kind of global prefix (prefix_in_bucket for S3 or storage_root for LocalFS)
|
||||
/// so this method doesnt need to.
|
||||
async fn list_prefixes(
|
||||
&self,
|
||||
prefix: Option<Self::RemoteObjectId>,
|
||||
|
||||
@@ -116,7 +116,7 @@ impl RemoteStorage for LocalFs {
|
||||
prefix: Option<Self::RemoteObjectId>,
|
||||
) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
|
||||
let path = match prefix {
|
||||
Some(prefix) => Cow::Owned(self.storage_root.join(prefix)),
|
||||
Some(prefix) => Cow::Owned(prefix),
|
||||
None => Cow::Borrowed(&self.storage_root),
|
||||
};
|
||||
get_all_files(path.as_ref(), false).await
|
||||
|
||||
@@ -171,17 +171,25 @@ impl S3Bucket {
|
||||
|
||||
let access_key_id = std::env::var("AWS_ACCESS_KEY_ID").ok();
|
||||
let secret_access_key = std::env::var("AWS_SECRET_ACCESS_KEY").ok();
|
||||
// session token is used when authorizing through sso
|
||||
// which is typically the case when testing locally on developer machine
|
||||
let session_token = std::env::var("AWS_SESSION_TOKEN").ok();
|
||||
|
||||
let client = if access_key_id.is_none() && secret_access_key.is_none() {
|
||||
debug!("Using IAM-based AWS access");
|
||||
S3Client::new_with(request_dispatcher, InstanceMetadataProvider::new(), region)
|
||||
} else {
|
||||
debug!("Using credentials-based AWS access");
|
||||
debug!(
|
||||
"Using credentials-based AWS access. Session token is set: {}",
|
||||
session_token.is_some()
|
||||
);
|
||||
S3Client::new_with(
|
||||
request_dispatcher,
|
||||
StaticProvider::new_minimal(
|
||||
StaticProvider::new(
|
||||
access_key_id.unwrap_or_default(),
|
||||
secret_access_key.unwrap_or_default(),
|
||||
session_token,
|
||||
None,
|
||||
),
|
||||
region,
|
||||
)
|
||||
@@ -304,32 +312,24 @@ impl RemoteStorage for S3Bucket {
|
||||
Ok(document_keys)
|
||||
}
|
||||
|
||||
/// See the doc for `RemoteStorage::list_prefixes`
|
||||
/// Note: it wont include empty "directories"
|
||||
async fn list_prefixes(
|
||||
&self,
|
||||
prefix: Option<Self::RemoteObjectId>,
|
||||
) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
|
||||
let list_prefix = match prefix {
|
||||
Some(prefix) => {
|
||||
let mut prefix_in_bucket = self.prefix_in_bucket.clone().unwrap_or_default();
|
||||
// if there is no trailing / in default prefix and
|
||||
// supplied prefix does not start with "/" insert it
|
||||
if !(prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR)
|
||||
|| prefix.0.starts_with(S3_PREFIX_SEPARATOR))
|
||||
{
|
||||
prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
|
||||
}
|
||||
|
||||
prefix_in_bucket.push_str(&prefix.0);
|
||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||
let list_prefix = prefix
|
||||
.map(|p| p.0)
|
||||
.or_else(|| self.prefix_in_bucket.clone())
|
||||
.map(|mut p| {
|
||||
// required to end with a separator
|
||||
// otherwise request will return only the entry of a prefix
|
||||
if !prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR) {
|
||||
prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
|
||||
if !p.ends_with(S3_PREFIX_SEPARATOR) {
|
||||
p.push(S3_PREFIX_SEPARATOR);
|
||||
}
|
||||
Some(prefix_in_bucket)
|
||||
}
|
||||
None => self.prefix_in_bucket.clone(),
|
||||
};
|
||||
p
|
||||
});
|
||||
|
||||
let mut document_keys = Vec::new();
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ anyhow = "1.0"
|
||||
bincode = "1.3"
|
||||
bytes = "1.0.1"
|
||||
hyper = { version = "0.14.7", features = ["full"] }
|
||||
lazy_static = "1.4.0"
|
||||
pin-project-lite = "0.2.7"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
@@ -28,6 +27,8 @@ rustls = "0.20.2"
|
||||
rustls-split = "0.3.0"
|
||||
git-version = "0.3.5"
|
||||
serde_with = "1.12.0"
|
||||
once_cell = "1.13.0"
|
||||
|
||||
|
||||
metrics = { path = "../metrics" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
@@ -265,7 +265,7 @@ mod tests {
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::io::Cursor;
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ShortStruct {
|
||||
a: u8,
|
||||
b: u32,
|
||||
@@ -286,7 +286,7 @@ mod tests {
|
||||
const SHORT2_ENC_LE: &[u8] = &[8, 0, 0, 3, 7];
|
||||
const SHORT2_ENC_LE_TRAILING: &[u8] = &[8, 0, 0, 3, 7, 0xff, 0xff, 0xff];
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct LongMsg {
|
||||
pub tag: u8,
|
||||
pub blockpos: u32,
|
||||
|
||||
@@ -4,8 +4,8 @@ use crate::zid::ZTenantId;
|
||||
use anyhow::anyhow;
|
||||
use hyper::header::AUTHORIZATION;
|
||||
use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
|
||||
use lazy_static::lazy_static;
|
||||
use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
|
||||
use once_cell::sync::Lazy;
|
||||
use routerify::ext::RequestExt;
|
||||
use routerify::RequestInfo;
|
||||
use routerify::{Middleware, Router, RouterBuilder, RouterService};
|
||||
@@ -16,13 +16,13 @@ use std::net::TcpListener;
|
||||
|
||||
use super::error::ApiError;
|
||||
|
||||
lazy_static! {
|
||||
static ref SERVE_METRICS_COUNT: IntCounter = register_int_counter!(
|
||||
static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"libmetrics_metric_handler_requests_total",
|
||||
"Number of metric requests made"
|
||||
)
|
||||
.expect("failed to define a metric");
|
||||
}
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
async fn logger(res: Response<Body>, info: RequestInfo) -> Result<Response<Body>, ApiError> {
|
||||
info!("{} {} {}", info.method(), info.uri().path(), res.status(),);
|
||||
|
||||
@@ -10,12 +10,10 @@ pub fn get_request_param<'a>(
|
||||
) -> Result<&'a str, ApiError> {
|
||||
match request.param(param_name) {
|
||||
Some(arg) => Ok(arg),
|
||||
None => {
|
||||
return Err(ApiError::BadRequest(format!(
|
||||
"no {} specified in path param",
|
||||
param_name
|
||||
)))
|
||||
}
|
||||
None => Err(ApiError::BadRequest(format!(
|
||||
"no {} specified in path param",
|
||||
param_name
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ pub const XLOG_BLCKSZ: u32 = 8192;
|
||||
pub struct Lsn(pub u64);
|
||||
|
||||
/// We tried to parse an LSN from a string, but failed
|
||||
#[derive(Debug, PartialEq, thiserror::Error)]
|
||||
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
|
||||
#[error("LsnParseError")]
|
||||
pub struct LsnParseError;
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ pub trait Handler {
|
||||
|
||||
/// PostgresBackend protocol state.
|
||||
/// XXX: The order of the constructors matters.
|
||||
#[derive(Clone, Copy, PartialEq, PartialOrd)]
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]
|
||||
pub enum ProtoState {
|
||||
Initialization,
|
||||
Encrypted,
|
||||
|
||||
@@ -47,10 +47,12 @@ pub enum FeStartupPacket {
|
||||
StartupMessage {
|
||||
major_version: u32,
|
||||
minor_version: u32,
|
||||
params: HashMap<String, String>,
|
||||
params: StartupMessageParams,
|
||||
},
|
||||
}
|
||||
|
||||
pub type StartupMessageParams = HashMap<String, String>;
|
||||
|
||||
#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct CancelKeyData {
|
||||
pub backend_pid: i32,
|
||||
@@ -928,7 +930,7 @@ impl<'a> BeMessage<'a> {
|
||||
|
||||
// Neon extension of postgres replication protocol
|
||||
// See NEON_STATUS_UPDATE_TAG_BYTE
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ReplicationFeedback {
|
||||
// Last known size of the timeline. Used to enforce timeline size limit.
|
||||
pub current_timeline_size: u64,
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::sync::Mutex;
|
||||
use std::time::Duration;
|
||||
|
||||
/// An error happened while waiting for a number
|
||||
#[derive(Debug, PartialEq, thiserror::Error)]
|
||||
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
|
||||
#[error("SeqWaitError")]
|
||||
pub enum SeqWaitError {
|
||||
/// The wait timeout was reached
|
||||
|
||||
@@ -4,7 +4,7 @@ use serde::Deserialize;
|
||||
use std::io::Read;
|
||||
use utils::bin_ser::LeSer;
|
||||
|
||||
#[derive(Debug, PartialEq, Deserialize)]
|
||||
#[derive(Debug, PartialEq, Eq, Deserialize)]
|
||||
pub struct HeaderData {
|
||||
magic: u16,
|
||||
info: u16,
|
||||
|
||||
@@ -7,7 +7,7 @@ use std::{
|
||||
|
||||
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use utils::postgres_backend::{AuthType, Handler, PostgresBackend};
|
||||
|
||||
@@ -19,18 +19,20 @@ fn make_tcp_pair() -> (TcpStream, TcpStream) {
|
||||
(server_stream, client_stream)
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref KEY: rustls::PrivateKey = {
|
||||
let mut cursor = Cursor::new(include_bytes!("key.pem"));
|
||||
rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
|
||||
};
|
||||
static ref CERT: rustls::Certificate = {
|
||||
let mut cursor = Cursor::new(include_bytes!("cert.pem"));
|
||||
rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
|
||||
};
|
||||
}
|
||||
static KEY: Lazy<rustls::PrivateKey> = Lazy::new(|| {
|
||||
let mut cursor = Cursor::new(include_bytes!("key.pem"));
|
||||
rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
|
||||
});
|
||||
|
||||
static CERT: Lazy<rustls::Certificate> = Lazy::new(|| {
|
||||
let mut cursor = Cursor::new(include_bytes!("cert.pem"));
|
||||
rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
|
||||
});
|
||||
|
||||
#[test]
|
||||
// [false-positive](https://github.com/rust-lang/rust-clippy/issues/9274),
|
||||
// we resize the vector so doing some modifications after all
|
||||
#[allow(clippy::read_zero_byte_vec)]
|
||||
fn ssl() {
|
||||
let (mut client_sock, server_sock) = make_tcp_pair();
|
||||
|
||||
|
||||
@@ -15,6 +15,5 @@ git-version = "0.3.5"
|
||||
pageserver = { path = "../pageserver" }
|
||||
control_plane = { path = "../control_plane" }
|
||||
safekeeper = { path = "../safekeeper" }
|
||||
postgres_ffi = { path = "../libs/postgres_ffi" }
|
||||
utils = { path = "../libs/utils" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
|
||||
@@ -9,6 +9,7 @@ use pageserver::config::defaults::{
|
||||
DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR,
|
||||
DEFAULT_PG_LISTEN_ADDR as DEFAULT_PAGESERVER_PG_ADDR,
|
||||
};
|
||||
use pageserver::http::models::TimelineInfo;
|
||||
use safekeeper::defaults::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
|
||||
@@ -25,8 +26,6 @@ use utils::{
|
||||
zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
};
|
||||
|
||||
use pageserver::timelines::TimelineInfo;
|
||||
|
||||
// Default id of a safekeeper node, if not specified on the command line.
|
||||
const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);
|
||||
const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
|
||||
@@ -502,10 +501,10 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
|
||||
// default_tenantid was generated by the `env.init()` call above
|
||||
let initial_tenant_id = env.default_tenant_id.unwrap();
|
||||
|
||||
// Call 'pageserver init'.
|
||||
// Initialize pageserver, create initial tenant and timeline.
|
||||
let pageserver = PageServerNode::from_env(&env);
|
||||
let initial_timeline_id = pageserver
|
||||
.init(
|
||||
.initialize(
|
||||
Some(initial_tenant_id),
|
||||
initial_timeline_id_arg,
|
||||
&pageserver_config_overrides(init_match),
|
||||
@@ -552,25 +551,15 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
|
||||
.values_of("config")
|
||||
.map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
|
||||
.unwrap_or_default();
|
||||
let new_tenant_id = pageserver
|
||||
.tenant_create(initial_tenant_id, tenant_conf)?
|
||||
.ok_or_else(|| {
|
||||
anyhow!("Tenant with id {:?} was already created", initial_tenant_id)
|
||||
})?;
|
||||
println!(
|
||||
"tenant {} successfully created on the pageserver",
|
||||
new_tenant_id
|
||||
);
|
||||
let new_tenant_id = pageserver.tenant_create(initial_tenant_id, tenant_conf)?;
|
||||
println!("tenant {new_tenant_id} successfully created on the pageserver");
|
||||
|
||||
// Create an initial timeline for the new tenant
|
||||
let new_timeline_id = parse_timeline_id(create_match)?;
|
||||
let timeline = pageserver
|
||||
.timeline_create(new_tenant_id, new_timeline_id, None, None)?
|
||||
.context(format!(
|
||||
"Failed to create initial timeline for tenant {new_tenant_id}"
|
||||
))?;
|
||||
let new_timeline_id = timeline.timeline_id;
|
||||
let last_record_lsn = timeline
|
||||
let timeline_info =
|
||||
pageserver.timeline_create(new_tenant_id, new_timeline_id, None, None)?;
|
||||
let new_timeline_id = timeline_info.timeline_id;
|
||||
let last_record_lsn = timeline_info
|
||||
.local
|
||||
.context(format!("Failed to get last record LSN: no local timeline info for timeline {new_timeline_id}"))?
|
||||
.last_record_lsn;
|
||||
@@ -617,20 +606,18 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
let new_branch_name = create_match
|
||||
.value_of("branch-name")
|
||||
.ok_or_else(|| anyhow!("No branch name provided"))?;
|
||||
let timeline = pageserver
|
||||
.timeline_create(tenant_id, None, None, None)?
|
||||
.ok_or_else(|| anyhow!("Failed to create new timeline for tenant {}", tenant_id))?;
|
||||
let new_timeline_id = timeline.timeline_id;
|
||||
let timeline_info = pageserver.timeline_create(tenant_id, None, None, None)?;
|
||||
let new_timeline_id = timeline_info.timeline_id;
|
||||
|
||||
let last_record_lsn = timeline
|
||||
let last_record_lsn = timeline_info
|
||||
.local
|
||||
.expect("no local timeline info")
|
||||
.last_record_lsn;
|
||||
env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
|
||||
|
||||
println!(
|
||||
"Created timeline '{}' at Lsn {} for tenant: {}",
|
||||
timeline.timeline_id, last_record_lsn, tenant_id,
|
||||
"Created timeline '{}' at Lsn {last_record_lsn} for tenant: {tenant_id}",
|
||||
timeline_info.timeline_id
|
||||
);
|
||||
}
|
||||
Some(("import", import_match)) => {
|
||||
@@ -681,10 +668,7 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
let ancestor_timeline_id = env
|
||||
.get_branch_timeline_id(ancestor_branch_name, tenant_id)
|
||||
.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"Found no timeline id for branch name '{}'",
|
||||
ancestor_branch_name
|
||||
)
|
||||
anyhow!("Found no timeline id for branch name '{ancestor_branch_name}'")
|
||||
})?;
|
||||
|
||||
let start_lsn = branch_match
|
||||
@@ -692,12 +676,15 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
.map(Lsn::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse ancestor start Lsn from the request")?;
|
||||
let timeline = pageserver
|
||||
.timeline_create(tenant_id, None, start_lsn, Some(ancestor_timeline_id))?
|
||||
.ok_or_else(|| anyhow!("Failed to create new timeline for tenant {}", tenant_id))?;
|
||||
let new_timeline_id = timeline.timeline_id;
|
||||
let timeline_info = pageserver.timeline_create(
|
||||
tenant_id,
|
||||
None,
|
||||
start_lsn,
|
||||
Some(ancestor_timeline_id),
|
||||
)?;
|
||||
let new_timeline_id = timeline_info.timeline_id;
|
||||
|
||||
let last_record_lsn = timeline
|
||||
let last_record_lsn = timeline_info
|
||||
.local
|
||||
.expect("no local timeline info")
|
||||
.last_record_lsn;
|
||||
@@ -705,11 +692,11 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
|
||||
env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
|
||||
|
||||
println!(
|
||||
"Created timeline '{}' at Lsn {} for tenant: {}. Ancestor timeline: '{}'",
|
||||
timeline.timeline_id, last_record_lsn, tenant_id, ancestor_branch_name,
|
||||
"Created timeline '{}' at Lsn {last_record_lsn} for tenant: {tenant_id}. Ancestor timeline: '{ancestor_branch_name}'",
|
||||
timeline_info.timeline_id
|
||||
);
|
||||
}
|
||||
Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{}'", sub_name),
|
||||
Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{sub_name}'"),
|
||||
None => bail!("no tenant subcommand provided"),
|
||||
}
|
||||
|
||||
@@ -885,7 +872,7 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
match sub_match.subcommand() {
|
||||
Some(("start", start_match)) => {
|
||||
if let Err(e) = pageserver.start(&pageserver_config_overrides(start_match)) {
|
||||
eprintln!("pageserver start failed: {}", e);
|
||||
eprintln!("pageserver start failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
@@ -907,10 +894,19 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
|
||||
}
|
||||
|
||||
if let Err(e) = pageserver.start(&pageserver_config_overrides(restart_match)) {
|
||||
eprintln!("pageserver start failed: {}", e);
|
||||
eprintln!("pageserver start failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
Some(("status", _)) => match PageServerNode::from_env(env).check_status() {
|
||||
Ok(_) => println!("Page server is up and running"),
|
||||
Err(err) => {
|
||||
eprintln!("Page server is not available: {}", err);
|
||||
exit(1);
|
||||
}
|
||||
},
|
||||
|
||||
Some((sub_name, _)) => bail!("Unexpected pageserver subcommand '{}'", sub_name),
|
||||
None => bail!("no pageserver subcommand provided"),
|
||||
}
|
||||
|
||||
@@ -21,7 +21,6 @@ futures = "0.3.13"
|
||||
hex = "0.4.3"
|
||||
hyper = "0.14"
|
||||
itertools = "0.10.3"
|
||||
lazy_static = "1.4.0"
|
||||
clap = "3.0"
|
||||
daemonize = "0.4.1"
|
||||
tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
|
||||
@@ -29,7 +28,6 @@ postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d
|
||||
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
tokio-stream = "0.1.8"
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
crc32c = "0.6.0"
|
||||
thiserror = "1.0"
|
||||
@@ -49,7 +47,7 @@ tracing = "0.1.27"
|
||||
signal-hook = "0.3.10"
|
||||
url = "2"
|
||||
nix = "0.23"
|
||||
once_cell = "1.8.0"
|
||||
once_cell = "1.13.0"
|
||||
crossbeam-utils = "0.8.5"
|
||||
fail = "0.5.0"
|
||||
git-version = "0.3.5"
|
||||
|
||||
@@ -23,21 +23,26 @@ use tar::{Builder, EntryType, Header};
|
||||
use tracing::*;
|
||||
|
||||
use crate::reltag::{RelTag, SlruKind};
|
||||
use crate::repository::Timeline;
|
||||
use crate::DatadirTimelineImpl;
|
||||
use postgres_ffi::xlog_utils::*;
|
||||
use postgres_ffi::*;
|
||||
use crate::DatadirTimeline;
|
||||
|
||||
use postgres_ffi::v14::pg_constants;
|
||||
use postgres_ffi::v14::xlog_utils::{generate_wal_segment, normalize_lsn, XLogFileName};
|
||||
use postgres_ffi::v14::{CheckPoint, ControlFileData};
|
||||
use postgres_ffi::TransactionId;
|
||||
use postgres_ffi::PG_TLI;
|
||||
use postgres_ffi::{BLCKSZ, RELSEG_SIZE};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
/// This is short-living object only for the time of tarball creation,
|
||||
/// created mostly to avoid passing a lot of parameters between various functions
|
||||
/// used for constructing tarball.
|
||||
pub struct Basebackup<'a, W>
|
||||
pub struct Basebackup<'a, W, T>
|
||||
where
|
||||
W: Write,
|
||||
T: DatadirTimeline,
|
||||
{
|
||||
ar: Builder<AbortableWrite<W>>,
|
||||
timeline: &'a Arc<DatadirTimelineImpl>,
|
||||
timeline: &'a Arc<T>,
|
||||
pub lsn: Lsn,
|
||||
prev_record_lsn: Lsn,
|
||||
full_backup: bool,
|
||||
@@ -52,17 +57,18 @@ where
|
||||
// * When working without safekeepers. In this situation it is important to match the lsn
|
||||
// we are taking basebackup on with the lsn that is used in pageserver's walreceiver
|
||||
// to start the replication.
|
||||
impl<'a, W> Basebackup<'a, W>
|
||||
impl<'a, W, T> Basebackup<'a, W, T>
|
||||
where
|
||||
W: Write,
|
||||
T: DatadirTimeline,
|
||||
{
|
||||
pub fn new(
|
||||
write: W,
|
||||
timeline: &'a Arc<DatadirTimelineImpl>,
|
||||
timeline: &'a Arc<T>,
|
||||
req_lsn: Option<Lsn>,
|
||||
prev_lsn: Option<Lsn>,
|
||||
full_backup: bool,
|
||||
) -> Result<Basebackup<'a, W>> {
|
||||
) -> Result<Basebackup<'a, W, T>> {
|
||||
// Compute postgres doesn't have any previous WAL files, but the first
|
||||
// record that it's going to write needs to include the LSN of the
|
||||
// previous record (xl_prev). We include prev_record_lsn in the
|
||||
@@ -79,13 +85,13 @@ where
|
||||
let (backup_prev, backup_lsn) = if let Some(req_lsn) = req_lsn {
|
||||
// Backup was requested at a particular LSN. Wait for it to arrive.
|
||||
info!("waiting for {}", req_lsn);
|
||||
timeline.tline.wait_lsn(req_lsn)?;
|
||||
timeline.wait_lsn(req_lsn)?;
|
||||
|
||||
// If the requested point is the end of the timeline, we can
|
||||
// provide prev_lsn. (get_last_record_rlsn() might return it as
|
||||
// zero, though, if no WAL has been generated on this timeline
|
||||
// yet.)
|
||||
let end_of_timeline = timeline.tline.get_last_record_rlsn();
|
||||
let end_of_timeline = timeline.get_last_record_rlsn();
|
||||
if req_lsn == end_of_timeline.last {
|
||||
(end_of_timeline.prev, req_lsn)
|
||||
} else {
|
||||
@@ -93,7 +99,7 @@ where
|
||||
}
|
||||
} else {
|
||||
// Backup was requested at end of the timeline.
|
||||
let end_of_timeline = timeline.tline.get_last_record_rlsn();
|
||||
let end_of_timeline = timeline.get_last_record_rlsn();
|
||||
(end_of_timeline.prev, end_of_timeline.last)
|
||||
};
|
||||
|
||||
@@ -199,7 +205,7 @@ where
|
||||
}
|
||||
|
||||
// Add a file for each chunk of blocks (aka segment)
|
||||
let chunks = (0..nblocks).chunks(pg_constants::RELSEG_SIZE as usize);
|
||||
let chunks = (0..nblocks).chunks(RELSEG_SIZE as usize);
|
||||
for (seg, blocks) in chunks.into_iter().enumerate() {
|
||||
let mut segment_data: Vec<u8> = vec![];
|
||||
for blknum in blocks {
|
||||
@@ -219,23 +225,19 @@ where
|
||||
fn add_slru_segment(&mut self, slru: SlruKind, segno: u32) -> anyhow::Result<()> {
|
||||
let nblocks = self.timeline.get_slru_segment_size(slru, segno, self.lsn)?;
|
||||
|
||||
let mut slru_buf: Vec<u8> =
|
||||
Vec::with_capacity(nblocks as usize * pg_constants::BLCKSZ as usize);
|
||||
let mut slru_buf: Vec<u8> = Vec::with_capacity(nblocks as usize * BLCKSZ as usize);
|
||||
for blknum in 0..nblocks {
|
||||
let img = self
|
||||
.timeline
|
||||
.get_slru_page_at_lsn(slru, segno, blknum, self.lsn)?;
|
||||
|
||||
if slru == SlruKind::Clog {
|
||||
ensure!(
|
||||
img.len() == pg_constants::BLCKSZ as usize
|
||||
|| img.len() == pg_constants::BLCKSZ as usize + 8
|
||||
);
|
||||
ensure!(img.len() == BLCKSZ as usize || img.len() == BLCKSZ as usize + 8);
|
||||
} else {
|
||||
ensure!(img.len() == pg_constants::BLCKSZ as usize);
|
||||
ensure!(img.len() == BLCKSZ as usize);
|
||||
}
|
||||
|
||||
slru_buf.extend_from_slice(&img[..pg_constants::BLCKSZ as usize]);
|
||||
slru_buf.extend_from_slice(&img[..BLCKSZ as usize]);
|
||||
}
|
||||
|
||||
let segname = format!("{}/{:>04X}", slru.to_str(), segno);
|
||||
@@ -371,7 +373,7 @@ where
|
||||
// add zenith.signal file
|
||||
let mut zenith_signal = String::new();
|
||||
if self.prev_record_lsn == Lsn(0) {
|
||||
if self.lsn == self.timeline.tline.get_ancestor_lsn() {
|
||||
if self.lsn == self.timeline.get_ancestor_lsn() {
|
||||
write!(zenith_signal, "PREV LSN: none")?;
|
||||
} else {
|
||||
write!(zenith_signal, "PREV LSN: invalid")?;
|
||||
@@ -402,9 +404,10 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, W> Drop for Basebackup<'a, W>
|
||||
impl<'a, W, T> Drop for Basebackup<'a, W, T>
|
||||
where
|
||||
W: Write,
|
||||
T: DatadirTimeline,
|
||||
{
|
||||
/// If the basebackup was not finished, prevent the Archive::drop() from
|
||||
/// writing the end-of-archive marker.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! Main entry point for the Page Server executable.
|
||||
|
||||
use std::{env, path::Path, str::FromStr};
|
||||
use std::{env, ops::ControlFlow, path::Path, str::FromStr};
|
||||
use tracing::*;
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
@@ -13,7 +13,7 @@ use pageserver::{
|
||||
config::{defaults::*, PageServerConf},
|
||||
http, page_cache, page_service, profiling, tenant_mgr, thread_mgr,
|
||||
thread_mgr::ThreadKind,
|
||||
timelines, virtual_file, LOG_FILE_NAME,
|
||||
virtual_file, LOG_FILE_NAME,
|
||||
};
|
||||
use utils::{
|
||||
auth::JwtAuth,
|
||||
@@ -24,7 +24,6 @@ use utils::{
|
||||
shutdown::exit_now,
|
||||
signals::{self, Signal},
|
||||
tcp_listener,
|
||||
zid::{ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
@@ -42,6 +41,7 @@ fn main() -> anyhow::Result<()> {
|
||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||
.version(&*version())
|
||||
.arg(
|
||||
|
||||
Arg::new("daemonize")
|
||||
.short('d')
|
||||
.long("daemonize")
|
||||
@@ -52,7 +52,7 @@ fn main() -> anyhow::Result<()> {
|
||||
Arg::new("init")
|
||||
.long("init")
|
||||
.takes_value(false)
|
||||
.help("Initialize pageserver service: creates an initial config, tenant and timeline, if specified"),
|
||||
.help("Initialize pageserver with all given config overrides"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("workdir")
|
||||
@@ -61,20 +61,6 @@ fn main() -> anyhow::Result<()> {
|
||||
.takes_value(true)
|
||||
.help("Working directory for the pageserver"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("create-tenant")
|
||||
.long("create-tenant")
|
||||
.takes_value(true)
|
||||
.help("Create tenant during init")
|
||||
.requires("init"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("initial-timeline-id")
|
||||
.long("initial-timeline-id")
|
||||
.takes_value(true)
|
||||
.help("Use a specific timeline id during init and tenant creation")
|
||||
.requires("create-tenant"),
|
||||
)
|
||||
// See `settings.md` for more details on the extra configuration patameters pageserver can process
|
||||
.arg(
|
||||
Arg::new("config-override")
|
||||
@@ -85,6 +71,9 @@ fn main() -> anyhow::Result<()> {
|
||||
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
|
||||
Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
|
||||
)
|
||||
.arg(Arg::new("update-config").long("update-config").takes_value(false).help(
|
||||
"Update the config file when started",
|
||||
))
|
||||
.arg(
|
||||
Arg::new("enabled-features")
|
||||
.long("enabled-features")
|
||||
@@ -110,18 +99,6 @@ fn main() -> anyhow::Result<()> {
|
||||
.with_context(|| format!("Error opening workdir '{}'", workdir.display()))?;
|
||||
let cfg_file_path = workdir.join("pageserver.toml");
|
||||
|
||||
let init = arg_matches.is_present("init");
|
||||
let create_tenant = arg_matches
|
||||
.value_of("create-tenant")
|
||||
.map(ZTenantId::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse tenant id from the arguments")?;
|
||||
let initial_timeline_id = arg_matches
|
||||
.value_of("initial-timeline-id")
|
||||
.map(ZTimelineId::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse timeline id from the arguments")?;
|
||||
|
||||
// Set CWD to workdir for non-daemon modes
|
||||
env::set_current_dir(&workdir).with_context(|| {
|
||||
format!(
|
||||
@@ -131,30 +108,86 @@ fn main() -> anyhow::Result<()> {
|
||||
})?;
|
||||
|
||||
let daemonize = arg_matches.is_present("daemonize");
|
||||
if init && daemonize {
|
||||
bail!("--daemonize cannot be used with --init")
|
||||
}
|
||||
|
||||
let mut toml = if init {
|
||||
// We're initializing the repo, so there's no config file yet
|
||||
DEFAULT_CONFIG_FILE
|
||||
.parse::<toml_edit::Document>()
|
||||
.context("could not parse built-in config file")?
|
||||
} else {
|
||||
// Supplement the CLI arguments with the config file
|
||||
let cfg_file_contents = std::fs::read_to_string(&cfg_file_path)
|
||||
.with_context(|| format!("No pageserver config at '{}'", cfg_file_path.display()))?;
|
||||
cfg_file_contents
|
||||
.parse::<toml_edit::Document>()
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to read '{}' as pageserver config",
|
||||
cfg_file_path.display()
|
||||
)
|
||||
})?
|
||||
let conf = match initialize_config(&cfg_file_path, arg_matches, &workdir)? {
|
||||
ControlFlow::Continue(conf) => conf,
|
||||
ControlFlow::Break(()) => {
|
||||
info!("Pageserver config init successful");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
let tenants_path = conf.tenants_path();
|
||||
if !tenants_path.exists() {
|
||||
utils::crashsafe_dir::create_dir_all(conf.tenants_path()).with_context(|| {
|
||||
format!(
|
||||
"Failed to create tenants root dir at '{}'",
|
||||
tenants_path.display()
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Initialize up failpoints support
|
||||
let scenario = FailScenario::setup();
|
||||
|
||||
// Basic initialization of things that don't change after startup
|
||||
virtual_file::init(conf.max_file_descriptors);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
|
||||
|
||||
scenario.teardown();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn initialize_config(
|
||||
cfg_file_path: &Path,
|
||||
arg_matches: clap::ArgMatches,
|
||||
workdir: &Path,
|
||||
) -> anyhow::Result<ControlFlow<(), &'static PageServerConf>> {
|
||||
let init = arg_matches.is_present("init");
|
||||
let update_config = init || arg_matches.is_present("update-config");
|
||||
|
||||
let (mut toml, config_file_exists) = if cfg_file_path.is_file() {
|
||||
if init {
|
||||
anyhow::bail!(
|
||||
"Config file '{}' already exists, cannot init it, use --update-config to update it",
|
||||
cfg_file_path.display()
|
||||
);
|
||||
}
|
||||
// Supplement the CLI arguments with the config file
|
||||
let cfg_file_contents = std::fs::read_to_string(&cfg_file_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to read pageserver config at '{}'",
|
||||
cfg_file_path.display()
|
||||
)
|
||||
})?;
|
||||
(
|
||||
cfg_file_contents
|
||||
.parse::<toml_edit::Document>()
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to parse '{}' as pageserver config",
|
||||
cfg_file_path.display()
|
||||
)
|
||||
})?,
|
||||
true,
|
||||
)
|
||||
} else if cfg_file_path.exists() {
|
||||
anyhow::bail!(
|
||||
"Config file '{}' exists but is not a regular file",
|
||||
cfg_file_path.display()
|
||||
);
|
||||
} else {
|
||||
// We're initializing the repo, so there's no config file yet
|
||||
(
|
||||
DEFAULT_CONFIG_FILE
|
||||
.parse::<toml_edit::Document>()
|
||||
.context("could not parse built-in config file")?,
|
||||
false,
|
||||
)
|
||||
};
|
||||
|
||||
// Process any extra options given with -c
|
||||
if let Some(values) = arg_matches.values_of("config-override") {
|
||||
for option_line in values {
|
||||
let doc = toml_edit::Document::from_str(option_line).with_context(|| {
|
||||
@@ -165,49 +198,38 @@ fn main() -> anyhow::Result<()> {
|
||||
})?;
|
||||
|
||||
for (key, item) in doc.iter() {
|
||||
if key == "id" {
|
||||
anyhow::ensure!(
|
||||
init,
|
||||
"node id can only be set during pageserver init and cannot be overridden"
|
||||
);
|
||||
if config_file_exists && update_config && key == "id" && toml.contains_key(key) {
|
||||
anyhow::bail!("Pageserver config file exists at '{}' and has node id already, it cannot be overridden", cfg_file_path.display());
|
||||
}
|
||||
toml.insert(key, item.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
trace!("Resulting toml: {}", toml);
|
||||
let conf = PageServerConf::parse_and_validate(&toml, &workdir)
|
||||
|
||||
debug!("Resulting toml: {toml}");
|
||||
let conf = PageServerConf::parse_and_validate(&toml, workdir)
|
||||
.context("Failed to parse pageserver configuration")?;
|
||||
|
||||
// The configuration is all set up now. Turn it into a 'static
|
||||
// that can be freely stored in structs and passed across threads
|
||||
// as a ref.
|
||||
let conf: &'static PageServerConf = Box::leak(Box::new(conf));
|
||||
if update_config {
|
||||
info!("Writing pageserver config to '{}'", cfg_file_path.display());
|
||||
|
||||
// Initialize up failpoints support
|
||||
let scenario = FailScenario::setup();
|
||||
|
||||
// Basic initialization of things that don't change after startup
|
||||
virtual_file::init(conf.max_file_descriptors);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
// Create repo and exit if init was requested
|
||||
if init {
|
||||
timelines::init_pageserver(conf, create_tenant, initial_timeline_id)
|
||||
.context("Failed to init pageserver")?;
|
||||
// write the config file
|
||||
std::fs::write(&cfg_file_path, toml.to_string()).with_context(|| {
|
||||
format!(
|
||||
"Failed to initialize pageserver config at '{}'",
|
||||
"Failed to write pageserver config to '{}'",
|
||||
cfg_file_path.display()
|
||||
)
|
||||
})?;
|
||||
} else {
|
||||
start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
|
||||
info!(
|
||||
"Config successfully written to '{}'",
|
||||
cfg_file_path.display()
|
||||
)
|
||||
}
|
||||
|
||||
scenario.teardown();
|
||||
Ok(())
|
||||
Ok(if init {
|
||||
ControlFlow::Break(())
|
||||
} else {
|
||||
ControlFlow::Continue(Box::leak(Box::new(conf)))
|
||||
})
|
||||
}
|
||||
|
||||
fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()> {
|
||||
|
||||
@@ -59,6 +59,7 @@ pub mod defaults {
|
||||
|
||||
# [tenant_config]
|
||||
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
||||
#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
|
||||
#compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
|
||||
#compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
|
||||
#compaction_threshold = '{DEFAULT_COMPACTION_THRESHOLD}'
|
||||
@@ -452,6 +453,13 @@ impl PageServerConf {
|
||||
Some(parse_toml_u64("checkpoint_distance", checkpoint_distance)?);
|
||||
}
|
||||
|
||||
if let Some(checkpoint_timeout) = item.get("checkpoint_timeout") {
|
||||
t_conf.checkpoint_timeout = Some(parse_toml_duration(
|
||||
"checkpoint_timeout",
|
||||
checkpoint_timeout,
|
||||
)?);
|
||||
}
|
||||
|
||||
if let Some(compaction_target_size) = item.get("compaction_target_size") {
|
||||
t_conf.compaction_target_size = Some(parse_toml_u64(
|
||||
"compaction_target_size",
|
||||
|
||||
@@ -7,6 +7,10 @@ use utils::{
|
||||
zid::{NodeId, ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
// These enums are used in the API response fields.
|
||||
use crate::repository::LocalTimelineState;
|
||||
use crate::tenant_mgr::TenantState;
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
@@ -28,6 +32,7 @@ pub struct TenantCreateRequest {
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub new_tenant_id: Option<ZTenantId>,
|
||||
pub checkpoint_distance: Option<u64>,
|
||||
pub checkpoint_timeout: Option<String>,
|
||||
pub compaction_target_size: Option<u64>,
|
||||
pub compaction_period: Option<String>,
|
||||
pub compaction_threshold: Option<usize>,
|
||||
@@ -66,6 +71,7 @@ pub struct TenantConfigRequest {
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub checkpoint_distance: Option<u64>,
|
||||
pub checkpoint_timeout: Option<String>,
|
||||
pub compaction_target_size: Option<u64>,
|
||||
pub compaction_period: Option<String>,
|
||||
pub compaction_threshold: Option<usize>,
|
||||
@@ -83,6 +89,7 @@ impl TenantConfigRequest {
|
||||
TenantConfigRequest {
|
||||
tenant_id,
|
||||
checkpoint_distance: None,
|
||||
checkpoint_timeout: None,
|
||||
compaction_target_size: None,
|
||||
compaction_period: None,
|
||||
compaction_threshold: None,
|
||||
@@ -97,14 +104,59 @@ impl TenantConfigRequest {
|
||||
}
|
||||
}
|
||||
|
||||
/// A WAL receiver's data stored inside the global `WAL_RECEIVERS`.
|
||||
/// We keep one WAL receiver active per timeline.
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TenantInfo {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub id: ZTenantId,
|
||||
pub state: Option<TenantState>,
|
||||
pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
|
||||
pub has_in_progress_downloads: Option<bool>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct WalReceiverEntry {
|
||||
pub wal_producer_connstr: Option<String>,
|
||||
pub struct LocalTimelineInfo {
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_timeline_id: Option<ZTimelineId>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_lsn: Option<Lsn>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub last_record_lsn: Lsn,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub prev_record_lsn: Option<Lsn>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub latest_gc_cutoff_lsn: Lsn,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub disk_consistent_lsn: Lsn,
|
||||
pub current_logical_size: Option<usize>, // is None when timeline is Unloaded
|
||||
pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
|
||||
pub current_logical_size_non_incremental: Option<usize>,
|
||||
pub current_physical_size_non_incremental: Option<u64>,
|
||||
pub timeline_state: LocalTimelineState,
|
||||
|
||||
pub wal_source_connstr: Option<String>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub last_received_msg_lsn: Option<Lsn>,
|
||||
/// the timestamp (in microseconds) of the last received message
|
||||
pub last_received_msg_ts: Option<u128>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct RemoteTimelineInfo {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub remote_consistent_lsn: Lsn,
|
||||
pub awaits_download: bool,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineInfo {
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub tenant_id: ZTenantId,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub timeline_id: ZTimelineId,
|
||||
pub local: Option<LocalTimelineInfo>,
|
||||
pub remote: Option<RemoteTimelineInfo>,
|
||||
}
|
||||
|
||||
@@ -78,6 +78,11 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
description: Controls calculation of current_logical_size_non_incremental
|
||||
- name: include-non-incremental-physical-size
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: Controls calculation of current_physical_size_non_incremental
|
||||
get:
|
||||
description: Get timelines for tenant
|
||||
responses:
|
||||
@@ -136,6 +141,11 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
description: Controls calculation of current_logical_size_non_incremental
|
||||
- name: include-non-incremental-physical-size
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: Controls calculation of current_physical_size_non_incremental
|
||||
responses:
|
||||
"200":
|
||||
description: TimelineInfo
|
||||
@@ -197,54 +207,6 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/tenant/{tenant_id}/timeline/{timeline_id}/wal_receiver:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
- name: timeline_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
get:
|
||||
description: Get wal receiver's data attached to the timeline
|
||||
responses:
|
||||
"200":
|
||||
description: WalReceiverEntry
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/WalReceiverEntry"
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/UnauthorizedError"
|
||||
"403":
|
||||
description: Forbidden Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"404":
|
||||
description: Error when no wal receiver is running or found
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/NotFoundError"
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/tenant/{tenant_id}/attach:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
@@ -577,6 +539,8 @@ components:
|
||||
type: string
|
||||
state:
|
||||
type: string
|
||||
current_physical_size:
|
||||
type: integer
|
||||
has_in_progress_downloads:
|
||||
type: boolean
|
||||
TenantCreateInfo:
|
||||
@@ -596,6 +560,8 @@ components:
|
||||
type: string
|
||||
checkpoint_distance:
|
||||
type: integer
|
||||
checkpoint_timeout:
|
||||
type: string
|
||||
compaction_period:
|
||||
type: string
|
||||
compaction_threshold:
|
||||
@@ -614,6 +580,8 @@ components:
|
||||
type: string
|
||||
checkpoint_distance:
|
||||
type: integer
|
||||
checkpoint_timeout:
|
||||
type: string
|
||||
compaction_period:
|
||||
type: string
|
||||
compaction_threshold:
|
||||
@@ -671,18 +639,13 @@ components:
|
||||
format: hex
|
||||
current_logical_size:
|
||||
type: integer
|
||||
current_physical_size:
|
||||
type: integer
|
||||
current_logical_size_non_incremental:
|
||||
type: integer
|
||||
|
||||
WalReceiverEntry:
|
||||
type: object
|
||||
required:
|
||||
- thread_id
|
||||
- wal_producer_connstr
|
||||
properties:
|
||||
thread_id:
|
||||
current_physical_size_non_incremental:
|
||||
type: integer
|
||||
wal_producer_connstr:
|
||||
wal_source_connstr:
|
||||
type: string
|
||||
last_received_msg_lsn:
|
||||
type: string
|
||||
|
||||
@@ -6,16 +6,18 @@ use hyper::{Body, Request, Response, Uri};
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use tracing::*;
|
||||
|
||||
use super::models::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
|
||||
use super::models::{
|
||||
StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse,
|
||||
StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
|
||||
TimelineCreateRequest,
|
||||
};
|
||||
use crate::repository::Repository;
|
||||
use crate::layered_repository::{metadata::TimelineMetadata, LayeredTimeline};
|
||||
use crate::pgdatadir_mapping::DatadirTimeline;
|
||||
use crate::repository::{LocalTimelineState, RepositoryTimeline};
|
||||
use crate::repository::{Repository, Timeline};
|
||||
use crate::storage_sync;
|
||||
use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
|
||||
use crate::tenant_config::TenantConfOpt;
|
||||
use crate::tenant_mgr::TenantInfo;
|
||||
use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
|
||||
use crate::{config::PageServerConf, tenant_mgr, timelines};
|
||||
use utils::{
|
||||
auth::JwtAuth,
|
||||
@@ -26,6 +28,7 @@ use utils::{
|
||||
request::parse_request_param,
|
||||
RequestExt, RouterBuilder,
|
||||
},
|
||||
lsn::Lsn,
|
||||
zid::{ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
};
|
||||
|
||||
@@ -79,6 +82,123 @@ fn get_config(request: &Request<Body>) -> &'static PageServerConf {
|
||||
get_state(request).conf
|
||||
}
|
||||
|
||||
// Helper functions to construct a LocalTimelineInfo struct for a timeline
|
||||
|
||||
fn local_timeline_info_from_loaded_timeline(
|
||||
timeline: &LayeredTimeline,
|
||||
include_non_incremental_logical_size: bool,
|
||||
include_non_incremental_physical_size: bool,
|
||||
) -> anyhow::Result<LocalTimelineInfo> {
|
||||
let last_record_lsn = timeline.get_last_record_lsn();
|
||||
let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
|
||||
let guard = timeline.last_received_wal.lock().unwrap();
|
||||
if let Some(info) = guard.as_ref() {
|
||||
(
|
||||
Some(info.wal_source_connstr.clone()),
|
||||
Some(info.last_received_msg_lsn),
|
||||
Some(info.last_received_msg_ts),
|
||||
)
|
||||
} else {
|
||||
(None, None, None)
|
||||
}
|
||||
};
|
||||
|
||||
let info = LocalTimelineInfo {
|
||||
ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
|
||||
ancestor_lsn: {
|
||||
match timeline.get_ancestor_lsn() {
|
||||
Lsn(0) => None,
|
||||
lsn @ Lsn(_) => Some(lsn),
|
||||
}
|
||||
},
|
||||
disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
|
||||
last_record_lsn,
|
||||
prev_record_lsn: Some(timeline.get_prev_record_lsn()),
|
||||
latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
|
||||
timeline_state: LocalTimelineState::Loaded,
|
||||
current_logical_size: Some(timeline.get_current_logical_size()),
|
||||
current_physical_size: Some(timeline.get_physical_size()),
|
||||
current_logical_size_non_incremental: if include_non_incremental_logical_size {
|
||||
Some(timeline.get_current_logical_size_non_incremental(last_record_lsn)?)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
current_physical_size_non_incremental: if include_non_incremental_physical_size {
|
||||
Some(timeline.get_physical_size_non_incremental()?)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
wal_source_connstr,
|
||||
last_received_msg_lsn,
|
||||
last_received_msg_ts,
|
||||
};
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
fn local_timeline_info_from_unloaded_timeline(metadata: &TimelineMetadata) -> LocalTimelineInfo {
|
||||
LocalTimelineInfo {
|
||||
ancestor_timeline_id: metadata.ancestor_timeline(),
|
||||
ancestor_lsn: {
|
||||
match metadata.ancestor_lsn() {
|
||||
Lsn(0) => None,
|
||||
lsn @ Lsn(_) => Some(lsn),
|
||||
}
|
||||
},
|
||||
disk_consistent_lsn: metadata.disk_consistent_lsn(),
|
||||
last_record_lsn: metadata.disk_consistent_lsn(),
|
||||
prev_record_lsn: metadata.prev_record_lsn(),
|
||||
latest_gc_cutoff_lsn: metadata.latest_gc_cutoff_lsn(),
|
||||
timeline_state: LocalTimelineState::Unloaded,
|
||||
current_logical_size: None,
|
||||
current_physical_size: None,
|
||||
current_logical_size_non_incremental: None,
|
||||
current_physical_size_non_incremental: None,
|
||||
wal_source_connstr: None,
|
||||
last_received_msg_lsn: None,
|
||||
last_received_msg_ts: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn local_timeline_info_from_repo_timeline(
|
||||
repo_timeline: &RepositoryTimeline<LayeredTimeline>,
|
||||
include_non_incremental_logical_size: bool,
|
||||
include_non_incremental_physical_size: bool,
|
||||
) -> anyhow::Result<LocalTimelineInfo> {
|
||||
match repo_timeline {
|
||||
RepositoryTimeline::Loaded(timeline) => local_timeline_info_from_loaded_timeline(
|
||||
timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
),
|
||||
RepositoryTimeline::Unloaded { metadata } => {
|
||||
Ok(local_timeline_info_from_unloaded_timeline(metadata))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn list_local_timelines(
|
||||
tenant_id: ZTenantId,
|
||||
include_non_incremental_logical_size: bool,
|
||||
include_non_incremental_physical_size: bool,
|
||||
) -> Result<Vec<(ZTimelineId, LocalTimelineInfo)>> {
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)
|
||||
.with_context(|| format!("Failed to get repo for tenant {}", tenant_id))?;
|
||||
let repo_timelines = repo.list_timelines();
|
||||
|
||||
let mut local_timeline_info = Vec::with_capacity(repo_timelines.len());
|
||||
for (timeline_id, repository_timeline) in repo_timelines {
|
||||
local_timeline_info.push((
|
||||
timeline_id,
|
||||
local_timeline_info_from_repo_timeline(
|
||||
&repository_timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
)?,
|
||||
))
|
||||
}
|
||||
Ok(local_timeline_info)
|
||||
}
|
||||
|
||||
// healthcheck handler
|
||||
async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let config = get_config(&request);
|
||||
@@ -93,16 +213,30 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
|
||||
let new_timeline_info = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("/timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, lsn=?request_data.ancestor_start_lsn).entered();
|
||||
timelines::create_timeline(
|
||||
|
||||
match timelines::create_timeline(
|
||||
get_config(&request),
|
||||
tenant_id,
|
||||
request_data.new_timeline_id.map(ZTimelineId::from),
|
||||
request_data.ancestor_timeline_id.map(ZTimelineId::from),
|
||||
request_data.ancestor_start_lsn,
|
||||
)
|
||||
) {
|
||||
Ok(Some((new_timeline_id, new_timeline))) => {
|
||||
// Created. Construct a TimelineInfo for it.
|
||||
let local_info = local_timeline_info_from_loaded_timeline(new_timeline.as_ref(), false, false)?;
|
||||
Ok(Some(TimelineInfo {
|
||||
tenant_id,
|
||||
timeline_id: new_timeline_id,
|
||||
local: Some(local_info),
|
||||
remote: None,
|
||||
}))
|
||||
}
|
||||
Ok(None) => Ok(None), // timeline already exists
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
Ok(match new_timeline_info {
|
||||
Some(info) => json_response(StatusCode::CREATED, info)?,
|
||||
@@ -113,10 +247,17 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
let include_non_incremental_logical_size = get_include_non_incremental_logical_size(&request);
|
||||
let include_non_incremental_logical_size =
|
||||
query_param_present(&request, "include-non-incremental-logical-size");
|
||||
let include_non_incremental_physical_size =
|
||||
query_param_present(&request, "include-non-incremental-physical-size");
|
||||
let local_timeline_infos = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("timeline_list", tenant = %tenant_id).entered();
|
||||
crate::timelines::get_local_timelines(tenant_id, include_non_incremental_logical_size)
|
||||
list_local_timelines(
|
||||
tenant_id,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
@@ -145,17 +286,15 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
json_response(StatusCode::OK, response_data)
|
||||
}
|
||||
|
||||
// Gate non incremental logical size calculation behind a flag
|
||||
// after pgbench -i -s100 calculation took 28ms so if multiplied by the number of timelines
|
||||
// and tenants it can take noticeable amount of time. Also the value currently used only in tests
|
||||
fn get_include_non_incremental_logical_size(request: &Request<Body>) -> bool {
|
||||
/// Checks if a query param is present in the request's URL
|
||||
fn query_param_present(request: &Request<Body>, param: &str) -> bool {
|
||||
request
|
||||
.uri()
|
||||
.query()
|
||||
.map(|v| {
|
||||
url::form_urlencoded::parse(v.as_bytes())
|
||||
.into_owned()
|
||||
.any(|(param, _)| param == "include-non-incremental-logical-size")
|
||||
.any(|(p, _)| p == param)
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
@@ -165,7 +304,10 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let include_non_incremental_logical_size = get_include_non_incremental_logical_size(&request);
|
||||
let include_non_incremental_logical_size =
|
||||
query_param_present(&request, "include-non-incremental-logical-size");
|
||||
let include_non_incremental_physical_size =
|
||||
query_param_present(&request, "include-non-incremental-physical-size");
|
||||
|
||||
let (local_timeline_info, remote_timeline_info) = async {
|
||||
// any error here will render local timeline as None
|
||||
@@ -176,11 +318,10 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
repo.get_timeline(timeline_id)
|
||||
.as_ref()
|
||||
.map(|timeline| {
|
||||
LocalTimelineInfo::from_repo_timeline(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
local_timeline_info_from_repo_timeline(
|
||||
timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
)
|
||||
})
|
||||
.transpose()?
|
||||
@@ -225,23 +366,6 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
json_response(StatusCode::OK, timeline_info)
|
||||
}
|
||||
|
||||
async fn wal_receiver_get_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let wal_receiver_entry = crate::walreceiver::get_wal_receiver_entry(tenant_id, timeline_id)
|
||||
.instrument(info_span!("wal_receiver_get", tenant = %tenant_id, timeline = %timeline_id))
|
||||
.await
|
||||
.ok_or_else(|| {
|
||||
ApiError::NotFound(format!(
|
||||
"WAL receiver data not found for tenant {tenant_id} and timeline {timeline_id}"
|
||||
))
|
||||
})?;
|
||||
|
||||
json_response(StatusCode::OK, &wal_receiver_entry)
|
||||
}
|
||||
|
||||
// TODO makes sense to provide tenant config right away the same way as it handled in tenant_create
|
||||
async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
@@ -429,14 +553,36 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
let index_accessor = remote_index.read().await;
|
||||
let has_in_progress_downloads = index_accessor
|
||||
.tenant_entry(&tenant_id)
|
||||
.ok_or_else(|| ApiError::NotFound("Tenant not found in remote index".to_string()))?
|
||||
.has_in_progress_downloads();
|
||||
.map(|t| t.has_in_progress_downloads())
|
||||
.unwrap_or_else(|| {
|
||||
info!("Tenant {tenant_id} not found in remote index");
|
||||
false
|
||||
});
|
||||
|
||||
let current_physical_size =
|
||||
match tokio::task::spawn_blocking(move || list_local_timelines(tenant_id, false, false))
|
||||
.await
|
||||
.map_err(ApiError::from_err)?
|
||||
{
|
||||
Err(err) => {
|
||||
// Getting local timelines can fail when no local repo is on disk (e.g, when tenant data is being downloaded).
|
||||
// In that case, put a warning message into log and operate normally.
|
||||
warn!("Failed to get local timelines for tenant {tenant_id}: {err}");
|
||||
None
|
||||
}
|
||||
Ok(local_timeline_infos) => Some(
|
||||
local_timeline_infos
|
||||
.into_iter()
|
||||
.fold(0, |acc, x| acc + x.1.current_physical_size.unwrap()),
|
||||
),
|
||||
};
|
||||
|
||||
json_response(
|
||||
StatusCode::OK,
|
||||
TenantInfo {
|
||||
id: tenant_id,
|
||||
state: tenant_state,
|
||||
current_physical_size,
|
||||
has_in_progress_downloads: Some(has_in_progress_downloads),
|
||||
},
|
||||
)
|
||||
@@ -476,6 +622,11 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
}
|
||||
|
||||
tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
|
||||
if let Some(checkpoint_timeout) = request_data.checkpoint_timeout {
|
||||
tenant_conf.checkpoint_timeout =
|
||||
Some(humantime::parse_duration(&checkpoint_timeout).map_err(ApiError::from_err)?);
|
||||
}
|
||||
|
||||
tenant_conf.compaction_target_size = request_data.compaction_target_size;
|
||||
tenant_conf.compaction_threshold = request_data.compaction_threshold;
|
||||
|
||||
@@ -536,6 +687,10 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
}
|
||||
|
||||
tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
|
||||
if let Some(checkpoint_timeout) = request_data.checkpoint_timeout {
|
||||
tenant_conf.checkpoint_timeout =
|
||||
Some(humantime::parse_duration(&checkpoint_timeout).map_err(ApiError::from_err)?);
|
||||
}
|
||||
tenant_conf.compaction_target_size = request_data.compaction_target_size;
|
||||
tenant_conf.compaction_threshold = request_data.compaction_threshold;
|
||||
|
||||
@@ -606,9 +761,5 @@ pub fn make_router(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
|
||||
timeline_delete_handler,
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/wal_receiver",
|
||||
wal_receiver_get_handler,
|
||||
)
|
||||
.any(handler_404))
|
||||
}
|
||||
|
||||
@@ -13,25 +13,35 @@ use walkdir::WalkDir;
|
||||
|
||||
use crate::pgdatadir_mapping::*;
|
||||
use crate::reltag::{RelTag, SlruKind};
|
||||
use crate::repository::Repository;
|
||||
use crate::repository::Timeline;
|
||||
use crate::walingest::WalIngest;
|
||||
use postgres_ffi::relfile_utils::*;
|
||||
use postgres_ffi::waldecoder::*;
|
||||
use postgres_ffi::xlog_utils::*;
|
||||
use crate::walrecord::DecodedWALRecord;
|
||||
use postgres_ffi::v14::relfile_utils::*;
|
||||
use postgres_ffi::v14::waldecoder::*;
|
||||
use postgres_ffi::v14::xlog_utils::*;
|
||||
use postgres_ffi::v14::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
|
||||
use postgres_ffi::Oid;
|
||||
use postgres_ffi::{pg_constants, ControlFileData, DBState_DB_SHUTDOWNED};
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
// Returns checkpoint LSN from controlfile
|
||||
pub fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
|
||||
// Read control file to extract the LSN
|
||||
let controlfile_path = path.join("global").join("pg_control");
|
||||
let controlfile = ControlFileData::decode(&std::fs::read(controlfile_path)?)?;
|
||||
let lsn = controlfile.checkPoint;
|
||||
|
||||
Ok(Lsn(lsn))
|
||||
}
|
||||
|
||||
///
|
||||
/// Import all relation data pages from local disk into the repository.
|
||||
///
|
||||
/// This is currently only used to import a cluster freshly created by initdb.
|
||||
/// The code that deals with the checkpoint would not work right if the
|
||||
/// cluster was not shut down cleanly.
|
||||
pub fn import_timeline_from_postgres_datadir<R: Repository>(
|
||||
pub fn import_timeline_from_postgres_datadir<T: DatadirTimeline>(
|
||||
path: &Path,
|
||||
tline: &mut DatadirTimeline<R>,
|
||||
tline: &T,
|
||||
lsn: Lsn,
|
||||
) -> Result<()> {
|
||||
let mut pg_control: Option<ControlFileData> = None;
|
||||
@@ -89,8 +99,8 @@ pub fn import_timeline_from_postgres_datadir<R: Repository>(
|
||||
}
|
||||
|
||||
// subroutine of import_timeline_from_postgres_datadir(), to load one relation file.
|
||||
fn import_rel<R: Repository, Reader: Read>(
|
||||
modification: &mut DatadirModification<R>,
|
||||
fn import_rel<T: DatadirTimeline, Reader: Read>(
|
||||
modification: &mut DatadirModification<T>,
|
||||
path: &Path,
|
||||
spcoid: Oid,
|
||||
dboid: Oid,
|
||||
@@ -111,8 +121,8 @@ fn import_rel<R: Repository, Reader: Read>(
|
||||
|
||||
let mut buf: [u8; 8192] = [0u8; 8192];
|
||||
|
||||
ensure!(len % pg_constants::BLCKSZ as usize == 0);
|
||||
let nblocks = len / pg_constants::BLCKSZ as usize;
|
||||
ensure!(len % BLCKSZ as usize == 0);
|
||||
let nblocks = len / BLCKSZ as usize;
|
||||
|
||||
let rel = RelTag {
|
||||
spcnode: spcoid,
|
||||
@@ -121,7 +131,7 @@ fn import_rel<R: Repository, Reader: Read>(
|
||||
forknum,
|
||||
};
|
||||
|
||||
let mut blknum: u32 = segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
|
||||
let mut blknum: u32 = segno * (1024 * 1024 * 1024 / BLCKSZ as u32);
|
||||
|
||||
// Call put_rel_creation for every segment of the relation,
|
||||
// because there is no guarantee about the order in which we are processing segments.
|
||||
@@ -145,8 +155,7 @@ fn import_rel<R: Repository, Reader: Read>(
|
||||
Err(err) => match err.kind() {
|
||||
std::io::ErrorKind::UnexpectedEof => {
|
||||
// reached EOF. That's expected.
|
||||
let relative_blknum =
|
||||
blknum - segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32);
|
||||
let relative_blknum = blknum - segno * (1024 * 1024 * 1024 / BLCKSZ as u32);
|
||||
ensure!(relative_blknum == nblocks as u32, "unexpected EOF");
|
||||
break;
|
||||
}
|
||||
@@ -169,8 +178,8 @@ fn import_rel<R: Repository, Reader: Read>(
|
||||
|
||||
/// Import an SLRU segment file
|
||||
///
|
||||
fn import_slru<R: Repository, Reader: Read>(
|
||||
modification: &mut DatadirModification<R>,
|
||||
fn import_slru<T: DatadirTimeline, Reader: Read>(
|
||||
modification: &mut DatadirModification<T>,
|
||||
slru: SlruKind,
|
||||
path: &Path,
|
||||
mut reader: Reader,
|
||||
@@ -185,8 +194,8 @@ fn import_slru<R: Repository, Reader: Read>(
|
||||
.to_string_lossy();
|
||||
let segno = u32::from_str_radix(filename, 16)?;
|
||||
|
||||
ensure!(len % pg_constants::BLCKSZ as usize == 0); // we assume SLRU block size is the same as BLCKSZ
|
||||
let nblocks = len / pg_constants::BLCKSZ as usize;
|
||||
ensure!(len % BLCKSZ as usize == 0); // we assume SLRU block size is the same as BLCKSZ
|
||||
let nblocks = len / BLCKSZ as usize;
|
||||
|
||||
ensure!(nblocks <= pg_constants::SLRU_PAGES_PER_SEGMENT as usize);
|
||||
|
||||
@@ -225,9 +234,9 @@ fn import_slru<R: Repository, Reader: Read>(
|
||||
|
||||
/// Scan PostgreSQL WAL files in given directory and load all records between
|
||||
/// 'startpoint' and 'endpoint' into the repository.
|
||||
fn import_wal<R: Repository>(
|
||||
fn import_wal<T: DatadirTimeline>(
|
||||
walpath: &Path,
|
||||
tline: &mut DatadirTimeline<R>,
|
||||
tline: &T,
|
||||
startpoint: Lsn,
|
||||
endpoint: Lsn,
|
||||
) -> Result<()> {
|
||||
@@ -268,9 +277,11 @@ fn import_wal<R: Repository>(
|
||||
waldecoder.feed_bytes(&buf);
|
||||
|
||||
let mut nrecords = 0;
|
||||
let mut modification = tline.begin_modification(endpoint);
|
||||
let mut decoded = DecodedWALRecord::default();
|
||||
while last_lsn <= endpoint {
|
||||
if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
|
||||
walingest.ingest_record(tline, recdata, lsn)?;
|
||||
walingest.ingest_record(recdata, lsn, &mut modification, &mut decoded)?;
|
||||
last_lsn = lsn;
|
||||
|
||||
nrecords += 1;
|
||||
@@ -294,8 +305,8 @@ fn import_wal<R: Repository>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn import_basebackup_from_tar<R: Repository, Reader: Read>(
|
||||
tline: &mut DatadirTimeline<R>,
|
||||
pub fn import_basebackup_from_tar<T: DatadirTimeline, Reader: Read>(
|
||||
tline: &T,
|
||||
reader: Reader,
|
||||
base_lsn: Lsn,
|
||||
) -> Result<()> {
|
||||
@@ -336,8 +347,8 @@ pub fn import_basebackup_from_tar<R: Repository, Reader: Read>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn import_wal_from_tar<R: Repository, Reader: Read>(
|
||||
tline: &mut DatadirTimeline<R>,
|
||||
pub fn import_wal_from_tar<T: DatadirTimeline, Reader: Read>(
|
||||
tline: &T,
|
||||
reader: Reader,
|
||||
start_lsn: Lsn,
|
||||
end_lsn: Lsn,
|
||||
@@ -384,9 +395,11 @@ pub fn import_wal_from_tar<R: Repository, Reader: Read>(
|
||||
|
||||
waldecoder.feed_bytes(&bytes[offset..]);
|
||||
|
||||
let mut modification = tline.begin_modification(end_lsn);
|
||||
let mut decoded = DecodedWALRecord::default();
|
||||
while last_lsn <= end_lsn {
|
||||
if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
|
||||
walingest.ingest_record(tline, recdata, lsn)?;
|
||||
walingest.ingest_record(recdata, lsn, &mut modification, &mut decoded)?;
|
||||
last_lsn = lsn;
|
||||
|
||||
debug!("imported record at {} (end {})", lsn, end_lsn);
|
||||
@@ -415,8 +428,8 @@ pub fn import_wal_from_tar<R: Repository, Reader: Read>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn import_file<R: Repository, Reader: Read>(
|
||||
modification: &mut DatadirModification<R>,
|
||||
pub fn import_file<T: DatadirTimeline, Reader: Read>(
|
||||
modification: &mut DatadirModification<T>,
|
||||
file_path: &Path,
|
||||
reader: Reader,
|
||||
len: usize,
|
||||
@@ -535,7 +548,7 @@ pub fn import_file<R: Repository, Reader: Read>(
|
||||
// zenith.signal is not necessarily the last file, that we handle
|
||||
// but it is ok to call `finish_write()`, because final `modification.commit()`
|
||||
// will update lsn once more to the final one.
|
||||
let writer = modification.tline.tline.writer();
|
||||
let writer = modification.tline.writer();
|
||||
writer.finish_write(prev_lsn);
|
||||
|
||||
debug!("imported zenith signal {}", prev_lsn);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::repository::{key_range_size, singleton_range, Key};
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use std::ops::Range;
|
||||
|
||||
///
|
||||
@@ -19,7 +19,7 @@ impl KeySpace {
|
||||
///
|
||||
pub fn partition(&self, target_size: u64) -> KeyPartitioning {
|
||||
// Assume that each value is 8k in size.
|
||||
let target_nblocks = (target_size / pg_constants::BLCKSZ as u64) as usize;
|
||||
let target_nblocks = (target_size / BLCKSZ as u64) as usize;
|
||||
|
||||
let mut parts = Vec::new();
|
||||
let mut current_part = Vec::new();
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,7 +5,7 @@
|
||||
use crate::page_cache;
|
||||
use crate::page_cache::{ReadBufResult, PAGE_SZ};
|
||||
use bytes::Bytes;
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
@@ -117,9 +117,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref NEXT_ID: AtomicU64 = AtomicU64::new(1);
|
||||
}
|
||||
static NEXT_ID: Lazy<AtomicU64> = Lazy::new(|| AtomicU64::new(1));
|
||||
|
||||
/// An adapter for reading a (virtual) file using the page cache.
|
||||
///
|
||||
@@ -159,7 +157,14 @@ where
|
||||
// Look up the right page
|
||||
let cache = page_cache::get();
|
||||
loop {
|
||||
match cache.read_immutable_buf(self.file_id, blknum) {
|
||||
match cache
|
||||
.read_immutable_buf(self.file_id, blknum)
|
||||
.map_err(|e| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
format!("Failed to read immutable buf: {e:#}"),
|
||||
)
|
||||
})? {
|
||||
ReadBufResult::Found(guard) => break Ok(guard),
|
||||
ReadBufResult::NotFound(mut write_guard) => {
|
||||
// Read the page from disk into the buffer
|
||||
|
||||
@@ -316,6 +316,18 @@ impl Layer for DeltaLayer {
|
||||
}
|
||||
}
|
||||
|
||||
fn key_iter<'a>(&'a self) -> Box<dyn Iterator<Item = (Key, Lsn, u64)> + 'a> {
|
||||
let inner = match self.load() {
|
||||
Ok(inner) => inner,
|
||||
Err(e) => panic!("Failed to load a delta layer: {e:?}"),
|
||||
};
|
||||
|
||||
match DeltaKeyIter::new(inner) {
|
||||
Ok(iter) => Box::new(iter),
|
||||
Err(e) => panic!("Layer index is corrupted: {e:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn delete(&self) -> Result<()> {
|
||||
// delete underlying file
|
||||
fs::remove_file(self.path())?;
|
||||
@@ -660,11 +672,21 @@ impl DeltaLayerWriter {
|
||||
/// The values must be appended in key, lsn order.
|
||||
///
|
||||
pub fn put_value(&mut self, key: Key, lsn: Lsn, val: Value) -> Result<()> {
|
||||
self.put_value_bytes(key, lsn, &Value::ser(&val)?, val.will_init())
|
||||
}
|
||||
|
||||
pub fn put_value_bytes(
|
||||
&mut self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
val: &[u8],
|
||||
will_init: bool,
|
||||
) -> Result<()> {
|
||||
assert!(self.lsn_range.start <= lsn);
|
||||
|
||||
let off = self.blob_writer.write_blob(&Value::ser(&val)?)?;
|
||||
let off = self.blob_writer.write_blob(val)?;
|
||||
|
||||
let blob_ref = BlobRef::new(off, val.will_init());
|
||||
let blob_ref = BlobRef::new(off, will_init);
|
||||
|
||||
let delta_key = DeltaKey::from_key_lsn(&key, lsn);
|
||||
self.tree.append(&delta_key.0, blob_ref.0)?;
|
||||
@@ -822,3 +844,75 @@ impl<'a> DeltaValueIter<'a> {
|
||||
}
|
||||
}
|
||||
}
|
||||
///
|
||||
/// Iterator over all keys stored in a delta layer
|
||||
///
|
||||
/// FIXME: This creates a Vector to hold all keys.
|
||||
/// That takes up quite a lot of memory. Should do this in a more streaming
|
||||
/// fashion.
|
||||
///
|
||||
struct DeltaKeyIter {
|
||||
all_keys: Vec<(DeltaKey, u64)>,
|
||||
next_idx: usize,
|
||||
}
|
||||
|
||||
impl Iterator for DeltaKeyIter {
|
||||
type Item = (Key, Lsn, u64);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.next_idx < self.all_keys.len() {
|
||||
let (delta_key, size) = &self.all_keys[self.next_idx];
|
||||
|
||||
let key = delta_key.key();
|
||||
let lsn = delta_key.lsn();
|
||||
|
||||
self.next_idx += 1;
|
||||
Some((key, lsn, *size))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> DeltaKeyIter {
|
||||
fn new(inner: RwLockReadGuard<'a, DeltaLayerInner>) -> Result<Self> {
|
||||
let file = inner.file.as_ref().unwrap();
|
||||
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
||||
inner.index_start_blk,
|
||||
inner.index_root_blk,
|
||||
file,
|
||||
);
|
||||
|
||||
let mut all_keys: Vec<(DeltaKey, u64)> = Vec::new();
|
||||
tree_reader.visit(
|
||||
&[0u8; DELTA_KEY_SIZE],
|
||||
VisitDirection::Forwards,
|
||||
|key, value| {
|
||||
let delta_key = DeltaKey::from_slice(key);
|
||||
let pos = BlobRef(value).pos();
|
||||
if let Some(last) = all_keys.last_mut() {
|
||||
if last.0.key() == delta_key.key() {
|
||||
return true;
|
||||
} else {
|
||||
// subtract offset of new key BLOB and first blob of this key
|
||||
// to get total size if values associated with this key
|
||||
let first_pos = last.1;
|
||||
last.1 = pos - first_pos;
|
||||
}
|
||||
}
|
||||
all_keys.push((delta_key, pos));
|
||||
true
|
||||
},
|
||||
)?;
|
||||
if let Some(last) = all_keys.last_mut() {
|
||||
// Last key occupies all space till end of layer
|
||||
last.1 = std::fs::metadata(&file.file.path)?.len() - last.1;
|
||||
}
|
||||
let iter = DeltaKeyIter {
|
||||
all_keys,
|
||||
next_idx: 0,
|
||||
};
|
||||
|
||||
Ok(iter)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,7 +209,7 @@ where
|
||||
reader: R,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum VisitDirection {
|
||||
Forwards,
|
||||
Backwards,
|
||||
|
||||
@@ -8,11 +8,11 @@ use crate::page_cache;
|
||||
use crate::page_cache::PAGE_SZ;
|
||||
use crate::page_cache::{ReadBufResult, WriteBufResult};
|
||||
use crate::virtual_file::VirtualFile;
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
use std::cmp::min;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::{Error, ErrorKind};
|
||||
use std::io::{self, ErrorKind};
|
||||
use std::ops::DerefMut;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, RwLock};
|
||||
@@ -21,15 +21,15 @@ use utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
use std::os::unix::fs::FileExt;
|
||||
|
||||
lazy_static! {
|
||||
///
|
||||
/// This is the global cache of file descriptors (File objects).
|
||||
///
|
||||
static ref EPHEMERAL_FILES: RwLock<EphemeralFiles> = RwLock::new(EphemeralFiles {
|
||||
///
|
||||
/// This is the global cache of file descriptors (File objects).
|
||||
///
|
||||
static EPHEMERAL_FILES: Lazy<RwLock<EphemeralFiles>> = Lazy::new(|| {
|
||||
RwLock::new(EphemeralFiles {
|
||||
next_file_id: 1,
|
||||
files: HashMap::new(),
|
||||
});
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
pub struct EphemeralFiles {
|
||||
next_file_id: u64,
|
||||
@@ -43,7 +43,7 @@ pub struct EphemeralFile {
|
||||
_timelineid: ZTimelineId,
|
||||
file: Arc<VirtualFile>,
|
||||
|
||||
size: u64,
|
||||
pub size: u64,
|
||||
}
|
||||
|
||||
impl EphemeralFile {
|
||||
@@ -51,7 +51,7 @@ impl EphemeralFile {
|
||||
conf: &PageServerConf,
|
||||
tenantid: ZTenantId,
|
||||
timelineid: ZTimelineId,
|
||||
) -> Result<EphemeralFile, std::io::Error> {
|
||||
) -> Result<EphemeralFile, io::Error> {
|
||||
let mut l = EPHEMERAL_FILES.write().unwrap();
|
||||
let file_id = l.next_file_id;
|
||||
l.next_file_id += 1;
|
||||
@@ -76,7 +76,7 @@ impl EphemeralFile {
|
||||
})
|
||||
}
|
||||
|
||||
fn fill_buffer(&self, buf: &mut [u8], blkno: u32) -> Result<(), Error> {
|
||||
fn fill_buffer(&self, buf: &mut [u8], blkno: u32) -> Result<(), io::Error> {
|
||||
let mut off = 0;
|
||||
while off < PAGE_SZ {
|
||||
let n = self
|
||||
@@ -96,10 +96,13 @@ impl EphemeralFile {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_buf_for_write(&self, blkno: u32) -> Result<page_cache::PageWriteGuard, Error> {
|
||||
fn get_buf_for_write(&self, blkno: u32) -> Result<page_cache::PageWriteGuard, io::Error> {
|
||||
// Look up the right page
|
||||
let cache = page_cache::get();
|
||||
let mut write_guard = match cache.write_ephemeral_buf(self.file_id, blkno) {
|
||||
let mut write_guard = match cache
|
||||
.write_ephemeral_buf(self.file_id, blkno)
|
||||
.map_err(|e| to_io_error(e, "Failed to write ephemeral buf"))?
|
||||
{
|
||||
WriteBufResult::Found(guard) => guard,
|
||||
WriteBufResult::NotFound(mut guard) => {
|
||||
// Read the page from disk into the buffer
|
||||
@@ -127,7 +130,7 @@ pub fn is_ephemeral_file(filename: &str) -> bool {
|
||||
}
|
||||
|
||||
impl FileExt for EphemeralFile {
|
||||
fn read_at(&self, dstbuf: &mut [u8], offset: u64) -> Result<usize, Error> {
|
||||
fn read_at(&self, dstbuf: &mut [u8], offset: u64) -> Result<usize, io::Error> {
|
||||
// Look up the right page
|
||||
let blkno = (offset / PAGE_SZ as u64) as u32;
|
||||
let off = offset as usize % PAGE_SZ;
|
||||
@@ -137,7 +140,10 @@ impl FileExt for EphemeralFile {
|
||||
let mut write_guard;
|
||||
|
||||
let cache = page_cache::get();
|
||||
let buf = match cache.read_ephemeral_buf(self.file_id, blkno) {
|
||||
let buf = match cache
|
||||
.read_ephemeral_buf(self.file_id, blkno)
|
||||
.map_err(|e| to_io_error(e, "Failed to read ephemeral buf"))?
|
||||
{
|
||||
ReadBufResult::Found(guard) => {
|
||||
read_guard = guard;
|
||||
read_guard.as_ref()
|
||||
@@ -158,7 +164,7 @@ impl FileExt for EphemeralFile {
|
||||
Ok(len)
|
||||
}
|
||||
|
||||
fn write_at(&self, srcbuf: &[u8], offset: u64) -> Result<usize, Error> {
|
||||
fn write_at(&self, srcbuf: &[u8], offset: u64) -> Result<usize, io::Error> {
|
||||
// Look up the right page
|
||||
let blkno = (offset / PAGE_SZ as u64) as u32;
|
||||
let off = offset as usize % PAGE_SZ;
|
||||
@@ -166,7 +172,10 @@ impl FileExt for EphemeralFile {
|
||||
|
||||
let mut write_guard;
|
||||
let cache = page_cache::get();
|
||||
let buf = match cache.write_ephemeral_buf(self.file_id, blkno) {
|
||||
let buf = match cache
|
||||
.write_ephemeral_buf(self.file_id, blkno)
|
||||
.map_err(|e| to_io_error(e, "Failed to write ephemeral buf"))?
|
||||
{
|
||||
WriteBufResult::Found(guard) => {
|
||||
write_guard = guard;
|
||||
write_guard.deref_mut()
|
||||
@@ -190,7 +199,7 @@ impl FileExt for EphemeralFile {
|
||||
}
|
||||
|
||||
impl BlobWriter for EphemeralFile {
|
||||
fn write_blob(&mut self, srcbuf: &[u8]) -> Result<u64, Error> {
|
||||
fn write_blob(&mut self, srcbuf: &[u8]) -> Result<u64, io::Error> {
|
||||
let pos = self.size;
|
||||
|
||||
let mut blknum = (self.size / PAGE_SZ as u64) as u32;
|
||||
@@ -268,11 +277,11 @@ impl Drop for EphemeralFile {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), std::io::Error> {
|
||||
pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), io::Error> {
|
||||
if let Some(file) = EPHEMERAL_FILES.read().unwrap().files.get(&file_id) {
|
||||
match file.write_all_at(buf, blkno as u64 * PAGE_SZ as u64) {
|
||||
Ok(_) => Ok(()),
|
||||
Err(e) => Err(std::io::Error::new(
|
||||
Err(e) => Err(io::Error::new(
|
||||
ErrorKind::Other,
|
||||
format!(
|
||||
"failed to write back to ephemeral file at {} error: {}",
|
||||
@@ -282,7 +291,7 @@ pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), std::io::Er
|
||||
)),
|
||||
}
|
||||
} else {
|
||||
Err(std::io::Error::new(
|
||||
Err(io::Error::new(
|
||||
ErrorKind::Other,
|
||||
"could not write back page, not found in ephemeral files hash",
|
||||
))
|
||||
@@ -292,11 +301,14 @@ pub fn writeback(file_id: u64, blkno: u32, buf: &[u8]) -> Result<(), std::io::Er
|
||||
impl BlockReader for EphemeralFile {
|
||||
type BlockLease = page_cache::PageReadGuard<'static>;
|
||||
|
||||
fn read_blk(&self, blknum: u32) -> Result<Self::BlockLease, std::io::Error> {
|
||||
fn read_blk(&self, blknum: u32) -> Result<Self::BlockLease, io::Error> {
|
||||
// Look up the right page
|
||||
let cache = page_cache::get();
|
||||
loop {
|
||||
match cache.read_ephemeral_buf(self.file_id, blknum) {
|
||||
match cache
|
||||
.read_ephemeral_buf(self.file_id, blknum)
|
||||
.map_err(|e| to_io_error(e, "Failed to read ephemeral buf"))?
|
||||
{
|
||||
ReadBufResult::Found(guard) => return Ok(guard),
|
||||
ReadBufResult::NotFound(mut write_guard) => {
|
||||
// Read the page from disk into the buffer
|
||||
@@ -311,6 +323,10 @@ impl BlockReader for EphemeralFile {
|
||||
}
|
||||
}
|
||||
|
||||
fn to_io_error(e: anyhow::Error, context: &str) -> io::Error {
|
||||
io::Error::new(ErrorKind::Other, format!("{context}: {e:#}"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -322,7 +338,7 @@ mod tests {
|
||||
|
||||
fn repo_harness(
|
||||
test_name: &str,
|
||||
) -> Result<(&'static PageServerConf, ZTenantId, ZTimelineId), Error> {
|
||||
) -> Result<(&'static PageServerConf, ZTenantId, ZTimelineId), io::Error> {
|
||||
let repo_dir = PageServerConf::test_repo_dir(test_name);
|
||||
let _ = fs::remove_dir_all(&repo_dir);
|
||||
let conf = PageServerConf::dummy_conf(repo_dir);
|
||||
@@ -339,7 +355,7 @@ mod tests {
|
||||
|
||||
// Helper function to slurp contents of a file, starting at the current position,
|
||||
// into a string
|
||||
fn read_string(efile: &EphemeralFile, offset: u64, len: usize) -> Result<String, Error> {
|
||||
fn read_string(efile: &EphemeralFile, offset: u64, len: usize) -> Result<String, io::Error> {
|
||||
let mut buf = Vec::new();
|
||||
buf.resize(len, 0u8);
|
||||
|
||||
@@ -351,7 +367,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ephemeral_files() -> Result<(), Error> {
|
||||
fn test_ephemeral_files() -> Result<(), io::Error> {
|
||||
let (conf, tenantid, timelineid) = repo_harness("ephemeral_files")?;
|
||||
|
||||
let file_a = EphemeralFile::create(conf, tenantid, timelineid)?;
|
||||
@@ -382,7 +398,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ephemeral_blobs() -> Result<(), Error> {
|
||||
fn test_ephemeral_blobs() -> Result<(), io::Error> {
|
||||
let (conf, tenantid, timelineid) = repo_harness("ephemeral_blobs")?;
|
||||
|
||||
let mut file = EphemeralFile::create(conf, tenantid, timelineid)?;
|
||||
|
||||
@@ -15,6 +15,7 @@ use crate::layered_repository::storage_layer::{
|
||||
use crate::repository::{Key, Value};
|
||||
use crate::walrecord;
|
||||
use anyhow::{bail, ensure, Result};
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
use tracing::*;
|
||||
use utils::{
|
||||
@@ -30,6 +31,12 @@ use std::ops::Range;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::RwLock;
|
||||
|
||||
thread_local! {
|
||||
/// A buffer for serializing object during [`InMemoryLayer::put_value`].
|
||||
/// This buffer is reused for each serialization to avoid additional malloc calls.
|
||||
static SER_BUFFER: RefCell<Vec<u8>> = RefCell::new(Vec::new());
|
||||
}
|
||||
|
||||
pub struct InMemoryLayer {
|
||||
conf: &'static PageServerConf,
|
||||
tenantid: ZTenantId,
|
||||
@@ -233,6 +240,14 @@ impl Layer for InMemoryLayer {
|
||||
}
|
||||
|
||||
impl InMemoryLayer {
|
||||
///
|
||||
/// Get layer size on the disk
|
||||
///
|
||||
pub fn size(&self) -> Result<u64> {
|
||||
let inner = self.inner.read().unwrap();
|
||||
Ok(inner.file.size)
|
||||
}
|
||||
|
||||
///
|
||||
/// Create a new, empty, in-memory layer
|
||||
///
|
||||
@@ -270,10 +285,17 @@ impl InMemoryLayer {
|
||||
pub fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> Result<()> {
|
||||
trace!("put_value key {} at {}/{}", key, self.timelineid, lsn);
|
||||
let mut inner = self.inner.write().unwrap();
|
||||
|
||||
inner.assert_writeable();
|
||||
|
||||
let off = inner.file.write_blob(&Value::ser(val)?)?;
|
||||
let off = {
|
||||
SER_BUFFER.with(|x| -> Result<_> {
|
||||
let mut buf = x.borrow_mut();
|
||||
buf.clear();
|
||||
val.ser_into(&mut (*buf))?;
|
||||
let off = inner.file.write_blob(&buf)?;
|
||||
Ok(off)
|
||||
})?
|
||||
};
|
||||
|
||||
let vec_map = inner.index.entry(key).or_default();
|
||||
let old = vec_map.append_or_update_last(lsn, off).unwrap().0;
|
||||
@@ -342,8 +364,8 @@ impl InMemoryLayer {
|
||||
// Write all page versions
|
||||
for (lsn, pos) in vec_map.as_slice() {
|
||||
cursor.read_blob_into_buf(*pos, &mut buf)?;
|
||||
let val = Value::des(&buf)?;
|
||||
delta_layer_writer.put_value(key, *lsn, val)?;
|
||||
let will_init = Value::des(&buf)?.will_init();
|
||||
delta_layer_writer.put_value_bytes(key, *lsn, &buf, will_init)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,24 +10,23 @@
|
||||
//! corresponding files are written to disk.
|
||||
//!
|
||||
|
||||
use crate::layered_repository::inmemory_layer::InMemoryLayer;
|
||||
use crate::layered_repository::storage_layer::Layer;
|
||||
use crate::layered_repository::storage_layer::{range_eq, range_overlaps};
|
||||
use crate::layered_repository::InMemoryLayer;
|
||||
use crate::repository::Key;
|
||||
use anyhow::Result;
|
||||
use lazy_static::lazy_static;
|
||||
use metrics::{register_int_gauge, IntGauge};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::collections::VecDeque;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
use tracing::*;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
lazy_static! {
|
||||
static ref NUM_ONDISK_LAYERS: IntGauge =
|
||||
register_int_gauge!("pageserver_ondisk_layers", "Number of layers on-disk")
|
||||
.expect("failed to define a metric");
|
||||
}
|
||||
static NUM_ONDISK_LAYERS: Lazy<IntGauge> = Lazy::new(|| {
|
||||
register_int_gauge!("pageserver_ondisk_layers", "Number of layers on-disk")
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
///
|
||||
/// LayerMap tracks what layers exist on a timeline.
|
||||
|
||||
@@ -139,6 +139,12 @@ pub trait Layer: Send + Sync {
|
||||
/// Iterate through all keys and values stored in the layer
|
||||
fn iter(&self) -> Box<dyn Iterator<Item = Result<(Key, Lsn, Value)>> + '_>;
|
||||
|
||||
/// Iterate through all keys stored in the layer. Returns key, lsn and value size
|
||||
/// It is used only for compaction and so is currently implemented only for DeltaLayer
|
||||
fn key_iter(&self) -> Box<dyn Iterator<Item = (Key, Lsn, u64)> + '_> {
|
||||
panic!("Not implemented")
|
||||
}
|
||||
|
||||
/// Permanently remove this layer from disk.
|
||||
fn delete(&self) -> Result<()>;
|
||||
|
||||
|
||||
2303
pageserver/src/layered_repository/timeline.rs
Normal file
2303
pageserver/src/layered_repository/timeline.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -22,13 +22,12 @@ pub mod walreceiver;
|
||||
pub mod walrecord;
|
||||
pub mod walredo;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
use tracing::info;
|
||||
|
||||
use crate::thread_mgr::ThreadKind;
|
||||
use metrics::{register_int_gauge_vec, IntGaugeVec};
|
||||
|
||||
use layered_repository::LayeredRepository;
|
||||
use pgdatadir_mapping::DatadirTimeline;
|
||||
|
||||
/// Current storage format version
|
||||
@@ -42,14 +41,14 @@ pub const STORAGE_FORMAT_VERSION: u16 = 3;
|
||||
pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
|
||||
pub const DELTA_FILE_MAGIC: u16 = 0x5A61;
|
||||
|
||||
lazy_static! {
|
||||
static ref LIVE_CONNECTIONS_COUNT: IntGaugeVec = register_int_gauge_vec!(
|
||||
static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
register_int_gauge_vec!(
|
||||
"pageserver_live_connections",
|
||||
"Number of live network connections",
|
||||
&["pageserver_connection_kind"]
|
||||
)
|
||||
.expect("failed to define a metric");
|
||||
}
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub const LOG_FILE_NAME: &str = "pageserver.log";
|
||||
|
||||
@@ -62,10 +61,6 @@ pub enum CheckpointConfig {
|
||||
Forced,
|
||||
}
|
||||
|
||||
pub type RepositoryImpl = LayeredRepository;
|
||||
|
||||
pub type DatadirTimelineImpl = DatadirTimeline<RepositoryImpl>;
|
||||
|
||||
pub fn shutdown_pageserver(exit_code: i32) {
|
||||
// Shut down the libpq endpoint thread. This prevents new connections from
|
||||
// being accepted.
|
||||
@@ -94,3 +89,56 @@ pub fn shutdown_pageserver(exit_code: i32) {
|
||||
info!("Shut down successfully completed");
|
||||
std::process::exit(exit_code);
|
||||
}
|
||||
|
||||
const DEFAULT_BASE_BACKOFF_SECONDS: f64 = 0.1;
|
||||
const DEFAULT_MAX_BACKOFF_SECONDS: f64 = 3.0;
|
||||
|
||||
async fn exponential_backoff(n: u32, base_increment: f64, max_seconds: f64) {
|
||||
let backoff_duration_seconds =
|
||||
exponential_backoff_duration_seconds(n, base_increment, max_seconds);
|
||||
if backoff_duration_seconds > 0.0 {
|
||||
info!(
|
||||
"Backoff: waiting {backoff_duration_seconds} seconds before processing with the task",
|
||||
);
|
||||
tokio::time::sleep(std::time::Duration::from_secs_f64(backoff_duration_seconds)).await;
|
||||
}
|
||||
}
|
||||
|
||||
fn exponential_backoff_duration_seconds(n: u32, base_increment: f64, max_seconds: f64) -> f64 {
|
||||
if n == 0 {
|
||||
0.0
|
||||
} else {
|
||||
(1.0 + base_increment).powf(f64::from(n)).min(max_seconds)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod backoff_defaults_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn backoff_defaults_produce_growing_backoff_sequence() {
|
||||
let mut current_backoff_value = None;
|
||||
|
||||
for i in 0..10_000 {
|
||||
let new_backoff_value = exponential_backoff_duration_seconds(
|
||||
i,
|
||||
DEFAULT_BASE_BACKOFF_SECONDS,
|
||||
DEFAULT_MAX_BACKOFF_SECONDS,
|
||||
);
|
||||
|
||||
if let Some(old_backoff_value) = current_backoff_value.replace(new_backoff_value) {
|
||||
assert!(
|
||||
old_backoff_value <= new_backoff_value,
|
||||
"{i}th backoff value {new_backoff_value} is smaller than the previous one {old_backoff_value}"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
current_backoff_value.expect("Should have produced backoff values to compare"),
|
||||
DEFAULT_MAX_BACKOFF_SECONDS,
|
||||
"Given big enough of retries, backoff should reach its allowed max value"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,6 +45,7 @@ use std::{
|
||||
},
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use once_cell::sync::OnceCell;
|
||||
use tracing::error;
|
||||
use utils::{
|
||||
@@ -83,7 +84,7 @@ pub fn get() -> &'static PageCache {
|
||||
}
|
||||
}
|
||||
|
||||
pub const PAGE_SZ: usize = postgres_ffi::pg_constants::BLCKSZ as usize;
|
||||
pub const PAGE_SZ: usize = postgres_ffi::BLCKSZ as usize;
|
||||
const MAX_USAGE_COUNT: u8 = 5;
|
||||
|
||||
///
|
||||
@@ -342,7 +343,7 @@ impl PageCache {
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
img: &[u8],
|
||||
) {
|
||||
) -> anyhow::Result<()> {
|
||||
let cache_key = CacheKey::MaterializedPage {
|
||||
hash_key: MaterializedPageHashKey {
|
||||
tenant_id,
|
||||
@@ -352,7 +353,7 @@ impl PageCache {
|
||||
lsn,
|
||||
};
|
||||
|
||||
match self.lock_for_write(&cache_key) {
|
||||
match self.lock_for_write(&cache_key)? {
|
||||
WriteBufResult::Found(write_guard) => {
|
||||
// We already had it in cache. Another thread must've put it there
|
||||
// concurrently. Check that it had the same contents that we
|
||||
@@ -364,17 +365,19 @@ impl PageCache {
|
||||
write_guard.mark_valid();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Section 1.2: Public interface functions for working with Ephemeral pages.
|
||||
|
||||
pub fn read_ephemeral_buf(&self, file_id: u64, blkno: u32) -> ReadBufResult {
|
||||
pub fn read_ephemeral_buf(&self, file_id: u64, blkno: u32) -> anyhow::Result<ReadBufResult> {
|
||||
let mut cache_key = CacheKey::EphemeralPage { file_id, blkno };
|
||||
|
||||
self.lock_for_read(&mut cache_key)
|
||||
}
|
||||
|
||||
pub fn write_ephemeral_buf(&self, file_id: u64, blkno: u32) -> WriteBufResult {
|
||||
pub fn write_ephemeral_buf(&self, file_id: u64, blkno: u32) -> anyhow::Result<WriteBufResult> {
|
||||
let cache_key = CacheKey::EphemeralPage { file_id, blkno };
|
||||
|
||||
self.lock_for_write(&cache_key)
|
||||
@@ -402,7 +405,7 @@ impl PageCache {
|
||||
|
||||
// Section 1.3: Public interface functions for working with immutable file pages.
|
||||
|
||||
pub fn read_immutable_buf(&self, file_id: u64, blkno: u32) -> ReadBufResult {
|
||||
pub fn read_immutable_buf(&self, file_id: u64, blkno: u32) -> anyhow::Result<ReadBufResult> {
|
||||
let mut cache_key = CacheKey::ImmutableFilePage { file_id, blkno };
|
||||
|
||||
self.lock_for_read(&mut cache_key)
|
||||
@@ -495,15 +498,16 @@ impl PageCache {
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
fn lock_for_read(&self, cache_key: &mut CacheKey) -> ReadBufResult {
|
||||
fn lock_for_read(&self, cache_key: &mut CacheKey) -> anyhow::Result<ReadBufResult> {
|
||||
loop {
|
||||
// First check if the key already exists in the cache.
|
||||
if let Some(read_guard) = self.try_lock_for_read(cache_key) {
|
||||
return ReadBufResult::Found(read_guard);
|
||||
return Ok(ReadBufResult::Found(read_guard));
|
||||
}
|
||||
|
||||
// Not found. Find a victim buffer
|
||||
let (slot_idx, mut inner) = self.find_victim();
|
||||
let (slot_idx, mut inner) =
|
||||
self.find_victim().context("Failed to find evict victim")?;
|
||||
|
||||
// Insert mapping for this. At this point, we may find that another
|
||||
// thread did the same thing concurrently. In that case, we evicted
|
||||
@@ -526,10 +530,10 @@ impl PageCache {
|
||||
inner.dirty = false;
|
||||
slot.usage_count.store(1, Ordering::Relaxed);
|
||||
|
||||
return ReadBufResult::NotFound(PageWriteGuard {
|
||||
return Ok(ReadBufResult::NotFound(PageWriteGuard {
|
||||
inner,
|
||||
valid: false,
|
||||
});
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -556,15 +560,16 @@ impl PageCache {
|
||||
///
|
||||
/// Similar to lock_for_read(), but the returned buffer is write-locked and
|
||||
/// may be modified by the caller even if it's already found in the cache.
|
||||
fn lock_for_write(&self, cache_key: &CacheKey) -> WriteBufResult {
|
||||
fn lock_for_write(&self, cache_key: &CacheKey) -> anyhow::Result<WriteBufResult> {
|
||||
loop {
|
||||
// First check if the key already exists in the cache.
|
||||
if let Some(write_guard) = self.try_lock_for_write(cache_key) {
|
||||
return WriteBufResult::Found(write_guard);
|
||||
return Ok(WriteBufResult::Found(write_guard));
|
||||
}
|
||||
|
||||
// Not found. Find a victim buffer
|
||||
let (slot_idx, mut inner) = self.find_victim();
|
||||
let (slot_idx, mut inner) =
|
||||
self.find_victim().context("Failed to find evict victim")?;
|
||||
|
||||
// Insert mapping for this. At this point, we may find that another
|
||||
// thread did the same thing concurrently. In that case, we evicted
|
||||
@@ -587,10 +592,10 @@ impl PageCache {
|
||||
inner.dirty = false;
|
||||
slot.usage_count.store(1, Ordering::Relaxed);
|
||||
|
||||
return WriteBufResult::NotFound(PageWriteGuard {
|
||||
return Ok(WriteBufResult::NotFound(PageWriteGuard {
|
||||
inner,
|
||||
valid: false,
|
||||
});
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -754,7 +759,7 @@ impl PageCache {
|
||||
/// Find a slot to evict.
|
||||
///
|
||||
/// On return, the slot is empty and write-locked.
|
||||
fn find_victim(&self) -> (usize, RwLockWriteGuard<SlotInner>) {
|
||||
fn find_victim(&self) -> anyhow::Result<(usize, RwLockWriteGuard<SlotInner>)> {
|
||||
let iter_limit = self.slots.len() * 10;
|
||||
let mut iters = 0;
|
||||
loop {
|
||||
@@ -767,7 +772,7 @@ impl PageCache {
|
||||
let mut inner = match slot.inner.try_write() {
|
||||
Ok(inner) => inner,
|
||||
Err(TryLockError::Poisoned(err)) => {
|
||||
panic!("buffer lock was poisoned: {:?}", err)
|
||||
anyhow::bail!("buffer lock was poisoned: {err:?}")
|
||||
}
|
||||
Err(TryLockError::WouldBlock) => {
|
||||
// If we have looped through the whole buffer pool 10 times
|
||||
@@ -777,7 +782,7 @@ impl PageCache {
|
||||
// there are buffers in the pool. In practice, with a reasonably
|
||||
// large buffer pool it really shouldn't happen.
|
||||
if iters > iter_limit {
|
||||
panic!("could not find a victim buffer to evict");
|
||||
anyhow::bail!("exceeded evict iter limit");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@@ -804,7 +809,7 @@ impl PageCache {
|
||||
inner.dirty = false;
|
||||
inner.key = None;
|
||||
}
|
||||
return (slot_idx, inner);
|
||||
return Ok((slot_idx, inner));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use std::io::{self, Read};
|
||||
use std::net::TcpListener;
|
||||
@@ -30,7 +30,6 @@ use utils::{
|
||||
use crate::basebackup;
|
||||
use crate::config::{PageServerConf, ProfilingConfig};
|
||||
use crate::import_datadir::{import_basebackup_from_tar, import_wal_from_tar};
|
||||
use crate::layered_repository::LayeredRepository;
|
||||
use crate::pgdatadir_mapping::{DatadirTimeline, LsnForTimestamp};
|
||||
use crate::profiling::profpoint_start;
|
||||
use crate::reltag::RelTag;
|
||||
@@ -41,9 +40,10 @@ use crate::thread_mgr;
|
||||
use crate::thread_mgr::ThreadKind;
|
||||
use crate::CheckpointConfig;
|
||||
use metrics::{register_histogram_vec, HistogramVec};
|
||||
use postgres_ffi::xlog_utils::to_pg_timestamp;
|
||||
use postgres_ffi::v14::xlog_utils::to_pg_timestamp;
|
||||
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::v14::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
enum PagestreamFeMessage {
|
||||
@@ -435,15 +435,15 @@ const TIME_BUCKETS: &[f64] = &[
|
||||
0.1, // 1/10 s
|
||||
];
|
||||
|
||||
lazy_static! {
|
||||
static ref SMGR_QUERY_TIME: HistogramVec = register_histogram_vec!(
|
||||
static SMGR_QUERY_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
register_histogram_vec!(
|
||||
"pageserver_smgr_query_seconds",
|
||||
"Time spent on smgr query handling",
|
||||
&["smgr_query_type", "tenant_id", "timeline_id"],
|
||||
TIME_BUCKETS.into()
|
||||
)
|
||||
.expect("failed to define a metric");
|
||||
}
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
impl PageServerHandler {
|
||||
pub fn new(conf: &'static PageServerConf, auth: Option<Arc<JwtAuth>>) -> Self {
|
||||
@@ -555,9 +555,6 @@ impl PageServerHandler {
|
||||
info!("creating new timeline");
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
||||
let timeline = repo.create_empty_timeline(timeline_id, base_lsn)?;
|
||||
let repartition_distance = repo.get_checkpoint_distance();
|
||||
let mut datadir_timeline =
|
||||
DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);
|
||||
|
||||
// TODO mark timeline as not ready until it reaches end_lsn.
|
||||
// We might have some wal to import as well, and we should prevent compute
|
||||
@@ -573,7 +570,7 @@ impl PageServerHandler {
|
||||
info!("importing basebackup");
|
||||
pgb.write_message(&BeMessage::CopyInResponse)?;
|
||||
let reader = CopyInReader::new(pgb);
|
||||
import_basebackup_from_tar(&mut datadir_timeline, reader, base_lsn)?;
|
||||
import_basebackup_from_tar(&*timeline, reader, base_lsn)?;
|
||||
|
||||
// TODO check checksum
|
||||
// Meanwhile you can verify client-side by taking fullbackup
|
||||
@@ -583,7 +580,7 @@ impl PageServerHandler {
|
||||
|
||||
// Flush data to disk, then upload to s3
|
||||
info!("flushing layers");
|
||||
datadir_timeline.tline.checkpoint(CheckpointConfig::Flush)?;
|
||||
timeline.checkpoint(CheckpointConfig::Flush)?;
|
||||
|
||||
info!("done");
|
||||
Ok(())
|
||||
@@ -605,10 +602,6 @@ impl PageServerHandler {
|
||||
let timeline = repo.get_timeline_load(timeline_id)?;
|
||||
ensure!(timeline.get_last_record_lsn() == start_lsn);
|
||||
|
||||
let repartition_distance = repo.get_checkpoint_distance();
|
||||
let mut datadir_timeline =
|
||||
DatadirTimeline::<LayeredRepository>::new(timeline, repartition_distance);
|
||||
|
||||
// TODO leave clean state on error. For now you can use detach to clean
|
||||
// up broken state from a failed import.
|
||||
|
||||
@@ -616,16 +609,16 @@ impl PageServerHandler {
|
||||
info!("importing wal");
|
||||
pgb.write_message(&BeMessage::CopyInResponse)?;
|
||||
let reader = CopyInReader::new(pgb);
|
||||
import_wal_from_tar(&mut datadir_timeline, reader, start_lsn, end_lsn)?;
|
||||
import_wal_from_tar(&*timeline, reader, start_lsn, end_lsn)?;
|
||||
|
||||
// TODO Does it make sense to overshoot?
|
||||
ensure!(datadir_timeline.tline.get_last_record_lsn() >= end_lsn);
|
||||
ensure!(timeline.get_last_record_lsn() >= end_lsn);
|
||||
|
||||
// Flush data to disk, then upload to s3. No need for a forced checkpoint.
|
||||
// We only want to persist the data, and it doesn't matter if it's in the
|
||||
// shape of deltas or images.
|
||||
info!("flushing layers");
|
||||
datadir_timeline.tline.checkpoint(CheckpointConfig::Flush)?;
|
||||
timeline.checkpoint(CheckpointConfig::Flush)?;
|
||||
|
||||
info!("done");
|
||||
Ok(())
|
||||
@@ -643,8 +636,8 @@ impl PageServerHandler {
|
||||
/// In either case, if the page server hasn't received the WAL up to the
|
||||
/// requested LSN yet, we will wait for it to arrive. The return value is
|
||||
/// the LSN that should be used to look up the page versions.
|
||||
fn wait_or_get_last_lsn<R: Repository>(
|
||||
timeline: &DatadirTimeline<R>,
|
||||
fn wait_or_get_last_lsn<T: DatadirTimeline>(
|
||||
timeline: &T,
|
||||
mut lsn: Lsn,
|
||||
latest: bool,
|
||||
latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
|
||||
@@ -671,7 +664,7 @@ impl PageServerHandler {
|
||||
if lsn <= last_record_lsn {
|
||||
lsn = last_record_lsn;
|
||||
} else {
|
||||
timeline.tline.wait_lsn(lsn)?;
|
||||
timeline.wait_lsn(lsn)?;
|
||||
// Since we waited for 'lsn' to arrive, that is now the last
|
||||
// record LSN. (Or close enough for our purposes; the
|
||||
// last-record LSN can advance immediately after we return
|
||||
@@ -681,7 +674,7 @@ impl PageServerHandler {
|
||||
if lsn == Lsn(0) {
|
||||
bail!("invalid LSN(0) in request");
|
||||
}
|
||||
timeline.tline.wait_lsn(lsn)?;
|
||||
timeline.wait_lsn(lsn)?;
|
||||
}
|
||||
ensure!(
|
||||
lsn >= **latest_gc_cutoff_lsn,
|
||||
@@ -691,14 +684,14 @@ impl PageServerHandler {
|
||||
Ok(lsn)
|
||||
}
|
||||
|
||||
fn handle_get_rel_exists_request<R: Repository>(
|
||||
fn handle_get_rel_exists_request<T: DatadirTimeline>(
|
||||
&self,
|
||||
timeline: &DatadirTimeline<R>,
|
||||
timeline: &T,
|
||||
req: &PagestreamExistsRequest,
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
let _enter = info_span!("get_rel_exists", rel = %req.rel, req_lsn = %req.lsn).entered();
|
||||
|
||||
let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;
|
||||
|
||||
let exists = timeline.get_rel_exists(req.rel, lsn)?;
|
||||
@@ -708,13 +701,13 @@ impl PageServerHandler {
|
||||
}))
|
||||
}
|
||||
|
||||
fn handle_get_nblocks_request<R: Repository>(
|
||||
fn handle_get_nblocks_request<T: DatadirTimeline>(
|
||||
&self,
|
||||
timeline: &DatadirTimeline<R>,
|
||||
timeline: &T,
|
||||
req: &PagestreamNblocksRequest,
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
let _enter = info_span!("get_nblocks", rel = %req.rel, req_lsn = %req.lsn).entered();
|
||||
let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;
|
||||
|
||||
let n_blocks = timeline.get_rel_size(req.rel, lsn)?;
|
||||
@@ -724,33 +717,32 @@ impl PageServerHandler {
|
||||
}))
|
||||
}
|
||||
|
||||
fn handle_db_size_request<R: Repository>(
|
||||
fn handle_db_size_request<T: DatadirTimeline>(
|
||||
&self,
|
||||
timeline: &DatadirTimeline<R>,
|
||||
timeline: &T,
|
||||
req: &PagestreamDbSizeRequest,
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
let _enter = info_span!("get_db_size", dbnode = %req.dbnode, req_lsn = %req.lsn).entered();
|
||||
let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;
|
||||
|
||||
let total_blocks =
|
||||
timeline.get_db_size(pg_constants::DEFAULTTABLESPACE_OID, req.dbnode, lsn)?;
|
||||
let total_blocks = timeline.get_db_size(DEFAULTTABLESPACE_OID, req.dbnode, lsn)?;
|
||||
|
||||
let db_size = total_blocks as i64 * pg_constants::BLCKSZ as i64;
|
||||
let db_size = total_blocks as i64 * BLCKSZ as i64;
|
||||
|
||||
Ok(PagestreamBeMessage::DbSize(PagestreamDbSizeResponse {
|
||||
db_size,
|
||||
}))
|
||||
}
|
||||
|
||||
fn handle_get_page_at_lsn_request<R: Repository>(
|
||||
fn handle_get_page_at_lsn_request<T: DatadirTimeline>(
|
||||
&self,
|
||||
timeline: &DatadirTimeline<R>,
|
||||
timeline: &T,
|
||||
req: &PagestreamGetPageRequest,
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
let _enter = info_span!("get_page", rel = %req.rel, blkno = &req.blkno, req_lsn = %req.lsn)
|
||||
.entered();
|
||||
let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)?;
|
||||
/*
|
||||
// Add a 1s delay to some requests. The delayed causes the requests to
|
||||
@@ -783,7 +775,7 @@ impl PageServerHandler {
|
||||
// check that the timeline exists
|
||||
let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
|
||||
.context("Cannot load local timeline")?;
|
||||
let latest_gc_cutoff_lsn = timeline.tline.get_latest_gc_cutoff_lsn();
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
if let Some(lsn) = lsn {
|
||||
timeline
|
||||
.check_lsn_is_in_scope(lsn, &latest_gc_cutoff_lsn)
|
||||
@@ -921,7 +913,7 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
|
||||
.context("Cannot load local timeline")?;
|
||||
|
||||
let end_of_timeline = timeline.tline.get_last_record_rlsn();
|
||||
let end_of_timeline = timeline.get_last_record_rlsn();
|
||||
|
||||
pgb.write_message_noflush(&BeMessage::RowDescription(&[
|
||||
RowDescriptor::text_col(b"prev_lsn"),
|
||||
@@ -1052,6 +1044,7 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
|
||||
pgb.write_message_noflush(&BeMessage::RowDescription(&[
|
||||
RowDescriptor::int8_col(b"checkpoint_distance"),
|
||||
RowDescriptor::int8_col(b"checkpoint_timeout"),
|
||||
RowDescriptor::int8_col(b"compaction_target_size"),
|
||||
RowDescriptor::int8_col(b"compaction_period"),
|
||||
RowDescriptor::int8_col(b"compaction_threshold"),
|
||||
@@ -1062,6 +1055,12 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
]))?
|
||||
.write_message_noflush(&BeMessage::DataRow(&[
|
||||
Some(repo.get_checkpoint_distance().to_string().as_bytes()),
|
||||
Some(
|
||||
repo.get_checkpoint_timeout()
|
||||
.as_secs()
|
||||
.to_string()
|
||||
.as_bytes(),
|
||||
),
|
||||
Some(repo.get_compaction_target_size().to_string().as_bytes()),
|
||||
Some(
|
||||
repo.get_compaction_period()
|
||||
@@ -1139,7 +1138,7 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
let timelineid = ZTimelineId::from_str(caps.get(2).unwrap().as_str())?;
|
||||
let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
|
||||
.context("Couldn't load timeline")?;
|
||||
timeline.tline.compact()?;
|
||||
timeline.compact()?;
|
||||
|
||||
pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
|
||||
.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
@@ -1159,13 +1158,8 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
|
||||
.context("Cannot load local timeline")?;
|
||||
|
||||
timeline.tline.checkpoint(CheckpointConfig::Forced)?;
|
||||
|
||||
// Also compact it.
|
||||
//
|
||||
// FIXME: This probably shouldn't be part of a "checkpoint" command, but a
|
||||
// separate operation. Update the tests if you change this.
|
||||
timeline.tline.compact()?;
|
||||
// Checkpoint the timeline and also compact it (due to `CheckpointConfig::Forced`).
|
||||
timeline.checkpoint(CheckpointConfig::Forced)?;
|
||||
|
||||
pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
|
||||
.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
|
||||
@@ -6,46 +6,26 @@
|
||||
//! walingest.rs handles a few things like implicit relation creation and extension.
|
||||
//! Clarify that)
|
||||
//!
|
||||
use crate::keyspace::{KeyPartitioning, KeySpace, KeySpaceAccum};
|
||||
use crate::keyspace::{KeySpace, KeySpaceAccum};
|
||||
use crate::reltag::{RelTag, SlruKind};
|
||||
use crate::repository::Timeline;
|
||||
use crate::repository::*;
|
||||
use crate::repository::{Repository, Timeline};
|
||||
use crate::walrecord::ZenithWalRecord;
|
||||
use anyhow::{bail, ensure, Result};
|
||||
use bytes::{Buf, Bytes};
|
||||
use postgres_ffi::xlog_utils::TimestampTz;
|
||||
use postgres_ffi::{pg_constants, Oid, TransactionId};
|
||||
use postgres_ffi::v14::pg_constants;
|
||||
use postgres_ffi::v14::xlog_utils::TimestampTz;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use postgres_ffi::{Oid, TransactionId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::ops::Range;
|
||||
use std::sync::atomic::{AtomicIsize, Ordering};
|
||||
use std::sync::{Arc, Mutex, RwLockReadGuard};
|
||||
use tracing::{debug, error, trace, warn};
|
||||
use tracing::{debug, trace, warn};
|
||||
use utils::{bin_ser::BeSer, lsn::Lsn};
|
||||
|
||||
/// Block number within a relation or SLRU. This matches PostgreSQL's BlockNumber type.
|
||||
pub type BlockNumber = u32;
|
||||
|
||||
pub struct DatadirTimeline<R>
|
||||
where
|
||||
R: Repository,
|
||||
{
|
||||
/// The underlying key-value store. Callers should not read or modify the
|
||||
/// data in the underlying store directly. However, it is exposed to have
|
||||
/// access to information like last-LSN, ancestor, and operations like
|
||||
/// compaction.
|
||||
pub tline: Arc<R::Timeline>,
|
||||
|
||||
/// When did we last calculate the partitioning?
|
||||
partitioning: Mutex<(KeyPartitioning, Lsn)>,
|
||||
|
||||
/// Configuration: how often should the partitioning be recalculated.
|
||||
repartition_threshold: u64,
|
||||
|
||||
/// Current logical size of the "datadir", at the last LSN.
|
||||
current_logical_size: AtomicIsize,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum LsnForTimestamp {
|
||||
Present(Lsn),
|
||||
@@ -54,33 +34,29 @@ pub enum LsnForTimestamp {
|
||||
NoData(Lsn),
|
||||
}
|
||||
|
||||
impl<R: Repository> DatadirTimeline<R> {
|
||||
pub fn new(tline: Arc<R::Timeline>, repartition_threshold: u64) -> Self {
|
||||
DatadirTimeline {
|
||||
tline,
|
||||
partitioning: Mutex::new((KeyPartitioning::new(), Lsn(0))),
|
||||
current_logical_size: AtomicIsize::new(0),
|
||||
repartition_threshold,
|
||||
}
|
||||
}
|
||||
|
||||
/// (Re-)calculate the logical size of the database at the latest LSN.
|
||||
///
|
||||
/// This can be a slow operation.
|
||||
pub fn init_logical_size(&self) -> Result<()> {
|
||||
let last_lsn = self.tline.get_last_record_lsn();
|
||||
self.current_logical_size.store(
|
||||
self.get_current_logical_size_non_incremental(last_lsn)? as isize,
|
||||
Ordering::SeqCst,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// This trait provides all the functionality to store PostgreSQL relations, SLRUs,
|
||||
/// and other special kinds of files, in a versioned key-value store. The
|
||||
/// Timeline trait provides the key-value store.
|
||||
///
|
||||
/// This is a trait, so that we can easily include all these functions in a Timeline
|
||||
/// implementation. You're not expected to have different implementations of this trait,
|
||||
/// rather, this provides an interface and implementation, over Timeline.
|
||||
///
|
||||
/// If you wanted to store other kinds of data in the Neon repository, e.g.
|
||||
/// flat files or MySQL, you would create a new trait like this, with all the
|
||||
/// functions that make sense for the kind of data you're storing. For flat files,
|
||||
/// for example, you might have a function like "fn read(path, offset, size)".
|
||||
/// We might also have that situation in the future, to support multiple PostgreSQL
|
||||
/// versions, if there are big changes in how the data is organized in the data
|
||||
/// directory, or if new special files are introduced.
|
||||
///
|
||||
pub trait DatadirTimeline: Timeline {
|
||||
/// Start ingesting a WAL record, or other atomic modification of
|
||||
/// the timeline.
|
||||
///
|
||||
/// This provides a transaction-like interface to perform a bunch
|
||||
/// of modifications atomically, all stamped with one LSN.
|
||||
/// of modifications atomically.
|
||||
///
|
||||
/// To ingest a WAL record, call begin_modification(lsn) to get a
|
||||
/// DatadirModification object. Use the functions in the object to
|
||||
@@ -88,18 +64,27 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
/// that the WAL record affects. When you're done, call commit() to
|
||||
/// commit the changes.
|
||||
///
|
||||
/// Lsn stored in modification is advanced by `ingest_record` and
|
||||
/// is used by `commit()` to update `last_record_lsn`.
|
||||
///
|
||||
/// Calling commit() will flush all the changes and reset the state,
|
||||
/// so the `DatadirModification` struct can be reused to perform the next modification.
|
||||
///
|
||||
/// Note that any pending modifications you make through the
|
||||
/// modification object won't be visible to calls to the 'get' and list
|
||||
/// functions of the timeline until you finish! And if you update the
|
||||
/// same page twice, the last update wins.
|
||||
///
|
||||
pub fn begin_modification(&self, lsn: Lsn) -> DatadirModification<R> {
|
||||
fn begin_modification(&self, lsn: Lsn) -> DatadirModification<Self>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
DatadirModification {
|
||||
tline: self,
|
||||
lsn,
|
||||
pending_updates: HashMap::new(),
|
||||
pending_deletions: Vec::new(),
|
||||
pending_nblocks: 0,
|
||||
lsn,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -108,7 +93,7 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/// Look up given page version.
|
||||
pub fn get_rel_page_at_lsn(&self, tag: RelTag, blknum: BlockNumber, lsn: Lsn) -> Result<Bytes> {
|
||||
fn get_rel_page_at_lsn(&self, tag: RelTag, blknum: BlockNumber, lsn: Lsn) -> Result<Bytes> {
|
||||
ensure!(tag.relnode != 0, "invalid relnode");
|
||||
|
||||
let nblocks = self.get_rel_size(tag, lsn)?;
|
||||
@@ -121,11 +106,11 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
}
|
||||
|
||||
let key = rel_block_to_key(tag, blknum);
|
||||
self.tline.get(key, lsn)
|
||||
self.get(key, lsn)
|
||||
}
|
||||
|
||||
// Get size of a database in blocks
|
||||
pub fn get_db_size(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<usize> {
|
||||
fn get_db_size(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<usize> {
|
||||
let mut total_blocks = 0;
|
||||
|
||||
let rels = self.list_rels(spcnode, dbnode, lsn)?;
|
||||
@@ -138,9 +123,13 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
}
|
||||
|
||||
/// Get size of a relation file
|
||||
pub fn get_rel_size(&self, tag: RelTag, lsn: Lsn) -> Result<BlockNumber> {
|
||||
fn get_rel_size(&self, tag: RelTag, lsn: Lsn) -> Result<BlockNumber> {
|
||||
ensure!(tag.relnode != 0, "invalid relnode");
|
||||
|
||||
if let Some(nblocks) = self.get_cached_rel_size(&tag, lsn) {
|
||||
return Ok(nblocks);
|
||||
}
|
||||
|
||||
if (tag.forknum == pg_constants::FSM_FORKNUM
|
||||
|| tag.forknum == pg_constants::VISIBILITYMAP_FORKNUM)
|
||||
&& !self.get_rel_exists(tag, lsn)?
|
||||
@@ -153,17 +142,25 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
}
|
||||
|
||||
let key = rel_size_to_key(tag);
|
||||
let mut buf = self.tline.get(key, lsn)?;
|
||||
Ok(buf.get_u32_le())
|
||||
let mut buf = self.get(key, lsn)?;
|
||||
let nblocks = buf.get_u32_le();
|
||||
|
||||
// Update relation size cache
|
||||
self.update_cached_rel_size(tag, lsn, nblocks);
|
||||
Ok(nblocks)
|
||||
}
|
||||
|
||||
/// Does relation exist?
|
||||
pub fn get_rel_exists(&self, tag: RelTag, lsn: Lsn) -> Result<bool> {
|
||||
fn get_rel_exists(&self, tag: RelTag, lsn: Lsn) -> Result<bool> {
|
||||
ensure!(tag.relnode != 0, "invalid relnode");
|
||||
|
||||
// first try to lookup relation in cache
|
||||
if let Some(_nblocks) = self.get_cached_rel_size(&tag, lsn) {
|
||||
return Ok(true);
|
||||
}
|
||||
// fetch directory listing
|
||||
let key = rel_dir_to_key(tag.spcnode, tag.dbnode);
|
||||
let buf = self.tline.get(key, lsn)?;
|
||||
let buf = self.get(key, lsn)?;
|
||||
let dir = RelDirectory::des(&buf)?;
|
||||
|
||||
let exists = dir.rels.get(&(tag.relnode, tag.forknum)).is_some();
|
||||
@@ -172,10 +169,10 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
}
|
||||
|
||||
/// Get a list of all existing relations in given tablespace and database.
|
||||
pub fn list_rels(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<HashSet<RelTag>> {
|
||||
fn list_rels(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<HashSet<RelTag>> {
|
||||
// fetch directory listing
|
||||
let key = rel_dir_to_key(spcnode, dbnode);
|
||||
let buf = self.tline.get(key, lsn)?;
|
||||
let buf = self.get(key, lsn)?;
|
||||
let dir = RelDirectory::des(&buf)?;
|
||||
|
||||
let rels: HashSet<RelTag> =
|
||||
@@ -190,7 +187,7 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
}
|
||||
|
||||
/// Look up given SLRU page version.
|
||||
pub fn get_slru_page_at_lsn(
|
||||
fn get_slru_page_at_lsn(
|
||||
&self,
|
||||
kind: SlruKind,
|
||||
segno: u32,
|
||||
@@ -198,26 +195,21 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
lsn: Lsn,
|
||||
) -> Result<Bytes> {
|
||||
let key = slru_block_to_key(kind, segno, blknum);
|
||||
self.tline.get(key, lsn)
|
||||
self.get(key, lsn)
|
||||
}
|
||||
|
||||
/// Get size of an SLRU segment
|
||||
pub fn get_slru_segment_size(
|
||||
&self,
|
||||
kind: SlruKind,
|
||||
segno: u32,
|
||||
lsn: Lsn,
|
||||
) -> Result<BlockNumber> {
|
||||
fn get_slru_segment_size(&self, kind: SlruKind, segno: u32, lsn: Lsn) -> Result<BlockNumber> {
|
||||
let key = slru_segment_size_to_key(kind, segno);
|
||||
let mut buf = self.tline.get(key, lsn)?;
|
||||
let mut buf = self.get(key, lsn)?;
|
||||
Ok(buf.get_u32_le())
|
||||
}
|
||||
|
||||
/// Get size of an SLRU segment
|
||||
pub fn get_slru_segment_exists(&self, kind: SlruKind, segno: u32, lsn: Lsn) -> Result<bool> {
|
||||
fn get_slru_segment_exists(&self, kind: SlruKind, segno: u32, lsn: Lsn) -> Result<bool> {
|
||||
// fetch directory listing
|
||||
let key = slru_dir_to_key(kind);
|
||||
let buf = self.tline.get(key, lsn)?;
|
||||
let buf = self.get(key, lsn)?;
|
||||
let dir = SlruSegmentDirectory::des(&buf)?;
|
||||
|
||||
let exists = dir.segments.get(&segno).is_some();
|
||||
@@ -231,10 +223,10 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
/// so it's not well defined which LSN you get if there were multiple commits
|
||||
/// "in flight" at that point in time.
|
||||
///
|
||||
pub fn find_lsn_for_timestamp(&self, search_timestamp: TimestampTz) -> Result<LsnForTimestamp> {
|
||||
let gc_cutoff_lsn_guard = self.tline.get_latest_gc_cutoff_lsn();
|
||||
fn find_lsn_for_timestamp(&self, search_timestamp: TimestampTz) -> Result<LsnForTimestamp> {
|
||||
let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn();
|
||||
let min_lsn = *gc_cutoff_lsn_guard;
|
||||
let max_lsn = self.tline.get_last_record_lsn();
|
||||
let max_lsn = self.get_last_record_lsn();
|
||||
|
||||
// LSNs are always 8-byte aligned. low/mid/high represent the
|
||||
// LSN divided by 8.
|
||||
@@ -307,9 +299,9 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
let clog_page =
|
||||
self.get_slru_page_at_lsn(SlruKind::Clog, segno, blknum, probe_lsn)?;
|
||||
|
||||
if clog_page.len() == pg_constants::BLCKSZ as usize + 8 {
|
||||
if clog_page.len() == BLCKSZ as usize + 8 {
|
||||
let mut timestamp_bytes = [0u8; 8];
|
||||
timestamp_bytes.copy_from_slice(&clog_page[pg_constants::BLCKSZ as usize..]);
|
||||
timestamp_bytes.copy_from_slice(&clog_page[BLCKSZ as usize..]);
|
||||
let timestamp = TimestampTz::from_be_bytes(timestamp_bytes);
|
||||
|
||||
if timestamp >= search_timestamp {
|
||||
@@ -325,88 +317,51 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
}
|
||||
|
||||
/// Get a list of SLRU segments
|
||||
pub fn list_slru_segments(&self, kind: SlruKind, lsn: Lsn) -> Result<HashSet<u32>> {
|
||||
fn list_slru_segments(&self, kind: SlruKind, lsn: Lsn) -> Result<HashSet<u32>> {
|
||||
// fetch directory entry
|
||||
let key = slru_dir_to_key(kind);
|
||||
|
||||
let buf = self.tline.get(key, lsn)?;
|
||||
let buf = self.get(key, lsn)?;
|
||||
let dir = SlruSegmentDirectory::des(&buf)?;
|
||||
|
||||
Ok(dir.segments)
|
||||
}
|
||||
|
||||
pub fn get_relmap_file(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<Bytes> {
|
||||
fn get_relmap_file(&self, spcnode: Oid, dbnode: Oid, lsn: Lsn) -> Result<Bytes> {
|
||||
let key = relmap_file_key(spcnode, dbnode);
|
||||
|
||||
let buf = self.tline.get(key, lsn)?;
|
||||
let buf = self.get(key, lsn)?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
pub fn list_dbdirs(&self, lsn: Lsn) -> Result<HashMap<(Oid, Oid), bool>> {
|
||||
fn list_dbdirs(&self, lsn: Lsn) -> Result<HashMap<(Oid, Oid), bool>> {
|
||||
// fetch directory entry
|
||||
let buf = self.tline.get(DBDIR_KEY, lsn)?;
|
||||
let buf = self.get(DBDIR_KEY, lsn)?;
|
||||
let dir = DbDirectory::des(&buf)?;
|
||||
|
||||
Ok(dir.dbdirs)
|
||||
}
|
||||
|
||||
pub fn get_twophase_file(&self, xid: TransactionId, lsn: Lsn) -> Result<Bytes> {
|
||||
fn get_twophase_file(&self, xid: TransactionId, lsn: Lsn) -> Result<Bytes> {
|
||||
let key = twophase_file_key(xid);
|
||||
let buf = self.tline.get(key, lsn)?;
|
||||
let buf = self.get(key, lsn)?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
pub fn list_twophase_files(&self, lsn: Lsn) -> Result<HashSet<TransactionId>> {
|
||||
fn list_twophase_files(&self, lsn: Lsn) -> Result<HashSet<TransactionId>> {
|
||||
// fetch directory entry
|
||||
let buf = self.tline.get(TWOPHASEDIR_KEY, lsn)?;
|
||||
let buf = self.get(TWOPHASEDIR_KEY, lsn)?;
|
||||
let dir = TwoPhaseDirectory::des(&buf)?;
|
||||
|
||||
Ok(dir.xids)
|
||||
}
|
||||
|
||||
pub fn get_control_file(&self, lsn: Lsn) -> Result<Bytes> {
|
||||
self.tline.get(CONTROLFILE_KEY, lsn)
|
||||
fn get_control_file(&self, lsn: Lsn) -> Result<Bytes> {
|
||||
self.get(CONTROLFILE_KEY, lsn)
|
||||
}
|
||||
|
||||
pub fn get_checkpoint(&self, lsn: Lsn) -> Result<Bytes> {
|
||||
self.tline.get(CHECKPOINT_KEY, lsn)
|
||||
}
|
||||
|
||||
/// Get the LSN of the last ingested WAL record.
|
||||
///
|
||||
/// This is just a convenience wrapper that calls through to the underlying
|
||||
/// repository.
|
||||
pub fn get_last_record_lsn(&self) -> Lsn {
|
||||
self.tline.get_last_record_lsn()
|
||||
}
|
||||
|
||||
/// Check that it is valid to request operations with that lsn.
|
||||
///
|
||||
/// This is just a convenience wrapper that calls through to the underlying
|
||||
/// repository.
|
||||
pub fn check_lsn_is_in_scope(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
|
||||
) -> Result<()> {
|
||||
self.tline.check_lsn_is_in_scope(lsn, latest_gc_cutoff_lsn)
|
||||
}
|
||||
|
||||
/// Retrieve current logical size of the timeline
|
||||
///
|
||||
/// NOTE: counted incrementally, includes ancestors,
|
||||
pub fn get_current_logical_size(&self) -> usize {
|
||||
let current_logical_size = self.current_logical_size.load(Ordering::Acquire);
|
||||
match usize::try_from(current_logical_size) {
|
||||
Ok(sz) => sz,
|
||||
Err(_) => {
|
||||
error!(
|
||||
"current_logical_size is out of range: {}",
|
||||
current_logical_size
|
||||
);
|
||||
0
|
||||
}
|
||||
}
|
||||
fn get_checkpoint(&self, lsn: Lsn) -> Result<Bytes> {
|
||||
self.get(CHECKPOINT_KEY, lsn)
|
||||
}
|
||||
|
||||
/// Does the same as get_current_logical_size but counted on demand.
|
||||
@@ -414,22 +369,22 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
///
|
||||
/// Only relation blocks are counted currently. That excludes metadata,
|
||||
/// SLRUs, twophase files etc.
|
||||
pub fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result<usize> {
|
||||
fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result<usize> {
|
||||
// Fetch list of database dirs and iterate them
|
||||
let buf = self.tline.get(DBDIR_KEY, lsn)?;
|
||||
let buf = self.get(DBDIR_KEY, lsn)?;
|
||||
let dbdir = DbDirectory::des(&buf)?;
|
||||
|
||||
let mut total_size: usize = 0;
|
||||
for (spcnode, dbnode) in dbdir.dbdirs.keys() {
|
||||
for rel in self.list_rels(*spcnode, *dbnode, lsn)? {
|
||||
let relsize_key = rel_size_to_key(rel);
|
||||
let mut buf = self.tline.get(relsize_key, lsn)?;
|
||||
let mut buf = self.get(relsize_key, lsn)?;
|
||||
let relsize = buf.get_u32_le();
|
||||
|
||||
total_size += relsize as usize;
|
||||
}
|
||||
}
|
||||
Ok(total_size * pg_constants::BLCKSZ as usize)
|
||||
Ok(total_size * BLCKSZ as usize)
|
||||
}
|
||||
|
||||
///
|
||||
@@ -444,7 +399,7 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
result.add_key(DBDIR_KEY);
|
||||
|
||||
// Fetch list of database dirs and iterate them
|
||||
let buf = self.tline.get(DBDIR_KEY, lsn)?;
|
||||
let buf = self.get(DBDIR_KEY, lsn)?;
|
||||
let dbdir = DbDirectory::des(&buf)?;
|
||||
|
||||
let mut dbs: Vec<(Oid, Oid)> = dbdir.dbdirs.keys().cloned().collect();
|
||||
@@ -461,7 +416,7 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
rels.sort_unstable();
|
||||
for rel in rels {
|
||||
let relsize_key = rel_size_to_key(rel);
|
||||
let mut buf = self.tline.get(relsize_key, lsn)?;
|
||||
let mut buf = self.get(relsize_key, lsn)?;
|
||||
let relsize = buf.get_u32_le();
|
||||
|
||||
result.add_range(rel_block_to_key(rel, 0)..rel_block_to_key(rel, relsize));
|
||||
@@ -477,13 +432,13 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
] {
|
||||
let slrudir_key = slru_dir_to_key(kind);
|
||||
result.add_key(slrudir_key);
|
||||
let buf = self.tline.get(slrudir_key, lsn)?;
|
||||
let buf = self.get(slrudir_key, lsn)?;
|
||||
let dir = SlruSegmentDirectory::des(&buf)?;
|
||||
let mut segments: Vec<u32> = dir.segments.iter().cloned().collect();
|
||||
segments.sort_unstable();
|
||||
for segno in segments {
|
||||
let segsize_key = slru_segment_size_to_key(kind, segno);
|
||||
let mut buf = self.tline.get(segsize_key, lsn)?;
|
||||
let mut buf = self.get(segsize_key, lsn)?;
|
||||
let segsize = buf.get_u32_le();
|
||||
|
||||
result.add_range(
|
||||
@@ -495,7 +450,7 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
|
||||
// Then pg_twophase
|
||||
result.add_key(TWOPHASEDIR_KEY);
|
||||
let buf = self.tline.get(TWOPHASEDIR_KEY, lsn)?;
|
||||
let buf = self.get(TWOPHASEDIR_KEY, lsn)?;
|
||||
let twophase_dir = TwoPhaseDirectory::des(&buf)?;
|
||||
let mut xids: Vec<TransactionId> = twophase_dir.xids.iter().cloned().collect();
|
||||
xids.sort_unstable();
|
||||
@@ -509,31 +464,31 @@ impl<R: Repository> DatadirTimeline<R> {
|
||||
Ok(result.to_keyspace())
|
||||
}
|
||||
|
||||
pub fn repartition(&self, lsn: Lsn, partition_size: u64) -> Result<(KeyPartitioning, Lsn)> {
|
||||
let mut partitioning_guard = self.partitioning.lock().unwrap();
|
||||
if partitioning_guard.1 == Lsn(0)
|
||||
|| lsn.0 - partitioning_guard.1 .0 > self.repartition_threshold
|
||||
{
|
||||
let keyspace = self.collect_keyspace(lsn)?;
|
||||
let partitioning = keyspace.partition(partition_size);
|
||||
*partitioning_guard = (partitioning, lsn);
|
||||
return Ok((partitioning_guard.0.clone(), lsn));
|
||||
}
|
||||
Ok((partitioning_guard.0.clone(), partitioning_guard.1))
|
||||
}
|
||||
/// Get cached size of relation if it not updated after specified LSN
|
||||
fn get_cached_rel_size(&self, tag: &RelTag, lsn: Lsn) -> Option<BlockNumber>;
|
||||
|
||||
/// Update cached relation size if there is no more recent update
|
||||
fn update_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber);
|
||||
|
||||
/// Store cached relation size
|
||||
fn set_cached_rel_size(&self, tag: RelTag, lsn: Lsn, nblocks: BlockNumber);
|
||||
|
||||
/// Remove cached relation size
|
||||
fn remove_cached_rel_size(&self, tag: &RelTag);
|
||||
}
|
||||
|
||||
/// DatadirModification represents an operation to ingest an atomic set of
|
||||
/// updates to the repository. It is created by the 'begin_record'
|
||||
/// function. It is called for each WAL record, so that all the modifications
|
||||
/// by a one WAL record appear atomic.
|
||||
pub struct DatadirModification<'a, R: Repository> {
|
||||
pub struct DatadirModification<'a, T: DatadirTimeline> {
|
||||
/// The timeline this modification applies to. You can access this to
|
||||
/// read the state, but note that any pending updates are *not* reflected
|
||||
/// in the state in 'tline' yet.
|
||||
pub tline: &'a DatadirTimeline<R>,
|
||||
pub tline: &'a T,
|
||||
|
||||
lsn: Lsn,
|
||||
/// Lsn assigned by begin_modification
|
||||
pub lsn: Lsn,
|
||||
|
||||
// The modifications are not applied directly to the underlying key-value store.
|
||||
// The put-functions add the modifications here, and they are flushed to the
|
||||
@@ -543,7 +498,7 @@ pub struct DatadirModification<'a, R: Repository> {
|
||||
pending_nblocks: isize,
|
||||
}
|
||||
|
||||
impl<'a, R: Repository> DatadirModification<'a, R> {
|
||||
impl<'a, T: DatadirTimeline> DatadirModification<'a, T> {
|
||||
/// Initialize a completely new repository.
|
||||
///
|
||||
/// This inserts the directory metadata entries that are assumed to
|
||||
@@ -744,26 +699,36 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
|
||||
|
||||
self.pending_nblocks += nblocks as isize;
|
||||
|
||||
// Update relation size cache
|
||||
self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
|
||||
|
||||
// Even if nblocks > 0, we don't insert any actual blocks here. That's up to the
|
||||
// caller.
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Truncate relation
|
||||
pub fn put_rel_truncation(&mut self, rel: RelTag, nblocks: BlockNumber) -> Result<()> {
|
||||
ensure!(rel.relnode != 0, "invalid relnode");
|
||||
let size_key = rel_size_to_key(rel);
|
||||
let last_lsn = self.tline.get_last_record_lsn();
|
||||
if self.tline.get_rel_exists(rel, last_lsn)? {
|
||||
let size_key = rel_size_to_key(rel);
|
||||
// Fetch the old size first
|
||||
let old_size = self.get(size_key)?.get_u32_le();
|
||||
|
||||
// Fetch the old size first
|
||||
let old_size = self.get(size_key)?.get_u32_le();
|
||||
// Update the entry with the new size.
|
||||
let buf = nblocks.to_le_bytes();
|
||||
self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));
|
||||
|
||||
// Update the entry with the new size.
|
||||
let buf = nblocks.to_le_bytes();
|
||||
self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));
|
||||
// Update relation size cache
|
||||
self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
|
||||
|
||||
// Update logical database size.
|
||||
self.pending_nblocks -= old_size as isize - nblocks as isize;
|
||||
// Update relation size cache
|
||||
self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
|
||||
|
||||
// Update logical database size.
|
||||
self.pending_nblocks -= old_size as isize - nblocks as isize;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -781,6 +746,9 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
|
||||
let buf = nblocks.to_le_bytes();
|
||||
self.put(size_key, Value::Image(Bytes::from(buf.to_vec())));
|
||||
|
||||
// Update relation size cache
|
||||
self.tline.set_cached_rel_size(rel, self.lsn, nblocks);
|
||||
|
||||
self.pending_nblocks += nblocks as isize - old_size as isize;
|
||||
}
|
||||
Ok(())
|
||||
@@ -806,6 +774,9 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
|
||||
let old_size = self.get(size_key)?.get_u32_le();
|
||||
self.pending_nblocks -= old_size as isize;
|
||||
|
||||
// Remove enty from relation size cache
|
||||
self.tline.remove_cached_rel_size(&rel);
|
||||
|
||||
// Delete size entry, as well as all blocks
|
||||
self.delete(rel_key_range(rel));
|
||||
|
||||
@@ -928,7 +899,7 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let writer = self.tline.tline.writer();
|
||||
let writer = self.tline.writer();
|
||||
|
||||
// Flush relation and SLRU data blocks, keep metadata.
|
||||
let mut result: Result<()> = Ok(());
|
||||
@@ -943,10 +914,7 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
|
||||
result?;
|
||||
|
||||
if pending_nblocks != 0 {
|
||||
self.tline.current_logical_size.fetch_add(
|
||||
pending_nblocks * pg_constants::BLCKSZ as isize,
|
||||
Ordering::SeqCst,
|
||||
);
|
||||
writer.update_current_logical_size(pending_nblocks * BLCKSZ as isize);
|
||||
self.pending_nblocks = 0;
|
||||
}
|
||||
|
||||
@@ -956,26 +924,25 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
|
||||
///
|
||||
/// Finish this atomic update, writing all the updated keys to the
|
||||
/// underlying timeline.
|
||||
/// All the modifications in this atomic update are stamped by the specified LSN.
|
||||
///
|
||||
pub fn commit(self) -> Result<()> {
|
||||
let writer = self.tline.tline.writer();
|
||||
|
||||
pub fn commit(&mut self) -> Result<()> {
|
||||
let writer = self.tline.writer();
|
||||
let lsn = self.lsn;
|
||||
let pending_nblocks = self.pending_nblocks;
|
||||
self.pending_nblocks = 0;
|
||||
|
||||
for (key, value) in self.pending_updates {
|
||||
writer.put(key, self.lsn, &value)?;
|
||||
for (key, value) in self.pending_updates.drain() {
|
||||
writer.put(key, lsn, &value)?;
|
||||
}
|
||||
for key_range in self.pending_deletions {
|
||||
writer.delete(key_range.clone(), self.lsn)?;
|
||||
for key_range in self.pending_deletions.drain(..) {
|
||||
writer.delete(key_range, lsn)?;
|
||||
}
|
||||
|
||||
writer.finish_write(self.lsn);
|
||||
writer.finish_write(lsn);
|
||||
|
||||
if pending_nblocks != 0 {
|
||||
self.tline.current_logical_size.fetch_add(
|
||||
pending_nblocks * pg_constants::BLCKSZ as isize,
|
||||
Ordering::SeqCst,
|
||||
);
|
||||
writer.update_current_logical_size(pending_nblocks * BLCKSZ as isize);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -1001,8 +968,8 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
|
||||
bail!("unexpected pending WAL record");
|
||||
}
|
||||
} else {
|
||||
let last_lsn = self.tline.get_last_record_lsn();
|
||||
self.tline.tline.get(key, last_lsn)
|
||||
let lsn = Lsn::max(self.tline.get_last_record_lsn(), self.lsn);
|
||||
self.tline.get(key, lsn)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1049,7 +1016,7 @@ struct SlruSegmentDirectory {
|
||||
segments: HashSet<u32>,
|
||||
}
|
||||
|
||||
static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; pg_constants::BLCKSZ as usize]);
|
||||
static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
|
||||
|
||||
// Layout of the Key address space
|
||||
//
|
||||
@@ -1404,13 +1371,12 @@ fn is_slru_block_key(key: Key) -> bool {
|
||||
pub fn create_test_timeline<R: Repository>(
|
||||
repo: R,
|
||||
timeline_id: utils::zid::ZTimelineId,
|
||||
) -> Result<Arc<crate::DatadirTimeline<R>>> {
|
||||
) -> Result<std::sync::Arc<R::Timeline>> {
|
||||
let tline = repo.create_empty_timeline(timeline_id, Lsn(8))?;
|
||||
let tline = DatadirTimeline::new(tline, 256 * 1024);
|
||||
let mut m = tline.begin_modification(Lsn(8));
|
||||
m.init_empty()?;
|
||||
m.commit()?;
|
||||
Ok(Arc::new(tline))
|
||||
Ok(tline)
|
||||
}
|
||||
|
||||
#[allow(clippy::bool_assert_comparison)]
|
||||
@@ -1483,7 +1449,7 @@ mod tests {
|
||||
.contains(&TESTREL_A));
|
||||
|
||||
// Run checkpoint and garbage collection and check that it's still not visible
|
||||
newtline.tline.checkpoint(CheckpointConfig::Forced)?;
|
||||
newtline.checkpoint(CheckpointConfig::Forced)?;
|
||||
repo.gc_iteration(Some(NEW_TIMELINE_ID), 0, true)?;
|
||||
|
||||
assert!(!newtline
|
||||
|
||||
@@ -2,8 +2,9 @@ use serde::{Deserialize, Serialize};
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
|
||||
use postgres_ffi::relfile_utils::forknumber_to_name;
|
||||
use postgres_ffi::{pg_constants, Oid};
|
||||
use postgres_ffi::v14::pg_constants;
|
||||
use postgres_ffi::v14::relfile_utils::forknumber_to_name;
|
||||
use postgres_ffi::Oid;
|
||||
|
||||
///
|
||||
/// Relation data file segment id throughout the Postgres cluster.
|
||||
|
||||
@@ -185,7 +185,7 @@ impl Value {
|
||||
/// A repository corresponds to one .neon directory. One repository holds multiple
|
||||
/// timelines, forked off from the same initial call to 'initdb'.
|
||||
pub trait Repository: Send + Sync {
|
||||
type Timeline: Timeline;
|
||||
type Timeline: crate::DatadirTimeline;
|
||||
|
||||
/// Updates timeline based on the `TimelineSyncStatusUpdate`, received from the remote storage synchronization.
|
||||
/// See [`crate::remote_storage`] for more details about the synchronization.
|
||||
@@ -277,15 +277,6 @@ pub enum LocalTimelineState {
|
||||
Unloaded,
|
||||
}
|
||||
|
||||
impl<'a, T> From<&'a RepositoryTimeline<T>> for LocalTimelineState {
|
||||
fn from(local_timeline_entry: &'a RepositoryTimeline<T>) -> Self {
|
||||
match local_timeline_entry {
|
||||
RepositoryTimeline::Loaded(_) => LocalTimelineState::Loaded,
|
||||
RepositoryTimeline::Unloaded { .. } => LocalTimelineState::Unloaded,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Result of performing GC
|
||||
///
|
||||
@@ -382,6 +373,11 @@ pub trait Timeline: Send + Sync {
|
||||
lsn: Lsn,
|
||||
latest_gc_cutoff_lsn: &RwLockReadGuard<Lsn>,
|
||||
) -> Result<()>;
|
||||
|
||||
/// Get the physical size of the timeline at the latest LSN
|
||||
fn get_physical_size(&self) -> u64;
|
||||
/// Get the physical size of the timeline at the latest LSN non incrementally
|
||||
fn get_physical_size_non_incremental(&self) -> Result<u64>;
|
||||
}
|
||||
|
||||
/// Various functions to mutate the timeline.
|
||||
@@ -405,16 +401,17 @@ pub trait TimelineWriter<'a> {
|
||||
/// the 'lsn' or anything older. The previous last record LSN is stored alongside
|
||||
/// the latest and can be read.
|
||||
fn finish_write(&self, lsn: Lsn);
|
||||
|
||||
fn update_current_logical_size(&self, delta: isize);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod repo_harness {
|
||||
use bytes::BytesMut;
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
|
||||
use std::{fs, path::PathBuf};
|
||||
|
||||
use crate::RepositoryImpl;
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
layered_repository::LayeredRepository,
|
||||
@@ -441,14 +438,13 @@ pub mod repo_harness {
|
||||
buf.freeze()
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref LOCK: RwLock<()> = RwLock::new(());
|
||||
}
|
||||
static LOCK: Lazy<RwLock<()>> = Lazy::new(|| RwLock::new(()));
|
||||
|
||||
impl From<TenantConf> for TenantConfOpt {
|
||||
fn from(tenant_conf: TenantConf) -> Self {
|
||||
Self {
|
||||
checkpoint_distance: Some(tenant_conf.checkpoint_distance),
|
||||
checkpoint_timeout: Some(tenant_conf.checkpoint_timeout),
|
||||
compaction_target_size: Some(tenant_conf.compaction_target_size),
|
||||
compaction_period: Some(tenant_conf.compaction_period),
|
||||
compaction_threshold: Some(tenant_conf.compaction_threshold),
|
||||
@@ -511,11 +507,11 @@ pub mod repo_harness {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn load(&self) -> RepositoryImpl {
|
||||
pub fn load(&self) -> LayeredRepository {
|
||||
self.try_load().expect("failed to load test repo")
|
||||
}
|
||||
|
||||
pub fn try_load(&self) -> Result<RepositoryImpl> {
|
||||
pub fn try_load(&self) -> Result<LayeredRepository> {
|
||||
let walredo_mgr = Arc::new(TestRedoManager);
|
||||
|
||||
let repo = LayeredRepository::new(
|
||||
@@ -591,11 +587,10 @@ mod tests {
|
||||
//use std::sync::Arc;
|
||||
use bytes::BytesMut;
|
||||
use hex_literal::hex;
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
lazy_static! {
|
||||
static ref TEST_KEY: Key = Key::from_slice(&hex!("112222222233333333444444445500000001"));
|
||||
}
|
||||
static TEST_KEY: Lazy<Key> =
|
||||
Lazy::new(|| Key::from_slice(&hex!("112222222233333333444444445500000001")));
|
||||
|
||||
#[test]
|
||||
fn test_basic() -> Result<()> {
|
||||
|
||||
@@ -155,8 +155,7 @@ use std::{
|
||||
|
||||
use anyhow::{anyhow, bail, Context};
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::OnceCell;
|
||||
use once_cell::sync::{Lazy, OnceCell};
|
||||
use remote_storage::{GenericRemoteStorage, RemoteStorage};
|
||||
use tokio::{
|
||||
fs,
|
||||
@@ -173,10 +172,10 @@ use self::{
|
||||
};
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
exponential_backoff,
|
||||
layered_repository::{
|
||||
ephemeral_file::is_ephemeral_file,
|
||||
metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME},
|
||||
LayeredRepository,
|
||||
},
|
||||
storage_sync::{self, index::RemoteIndex},
|
||||
tenant_mgr::attach_downloaded_tenants,
|
||||
@@ -185,8 +184,8 @@ use crate::{
|
||||
};
|
||||
|
||||
use metrics::{
|
||||
register_histogram_vec, register_int_counter, register_int_counter_vec, register_int_gauge,
|
||||
HistogramVec, IntCounter, IntCounterVec, IntGauge,
|
||||
register_histogram_vec, register_int_counter_vec, register_int_gauge, HistogramVec,
|
||||
IntCounterVec, IntGauge,
|
||||
};
|
||||
use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};
|
||||
|
||||
@@ -194,32 +193,33 @@ use self::download::download_index_parts;
|
||||
pub use self::download::gather_tenant_timelines_index_parts;
|
||||
pub use self::download::TEMP_DOWNLOAD_EXTENSION;
|
||||
|
||||
lazy_static! {
|
||||
static ref REMAINING_SYNC_ITEMS: IntGauge = register_int_gauge!(
|
||||
static REMAINING_SYNC_ITEMS: Lazy<IntGauge> = Lazy::new(|| {
|
||||
register_int_gauge!(
|
||||
"pageserver_remote_storage_remaining_sync_items",
|
||||
"Number of storage sync items left in the queue"
|
||||
)
|
||||
.expect("failed to register pageserver remote storage remaining sync items int gauge");
|
||||
static ref FATAL_TASK_FAILURES: IntCounter = register_int_counter!(
|
||||
"pageserver_remote_storage_fatal_task_failures_total",
|
||||
"Number of critically failed tasks"
|
||||
)
|
||||
.expect("failed to register pageserver remote storage remaining sync items int gauge");
|
||||
static ref IMAGE_SYNC_TIME: HistogramVec = register_histogram_vec!(
|
||||
.expect("failed to register pageserver remote storage remaining sync items int gauge")
|
||||
});
|
||||
|
||||
static IMAGE_SYNC_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
register_histogram_vec!(
|
||||
"pageserver_remote_storage_image_sync_seconds",
|
||||
"Time took to synchronize (download or upload) a whole pageserver image. \
|
||||
Grouped by tenant and timeline ids, `operation_kind` (upload|download) and `status` (success|failure)",
|
||||
&["tenant_id", "timeline_id", "operation_kind", "status"],
|
||||
vec![0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 3.0, 10.0, 20.0]
|
||||
)
|
||||
.expect("failed to register pageserver image sync time histogram vec");
|
||||
static ref REMOTE_INDEX_UPLOAD: IntCounterVec = register_int_counter_vec!(
|
||||
.expect("failed to register pageserver image sync time histogram vec")
|
||||
});
|
||||
|
||||
static REMOTE_INDEX_UPLOAD: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_remote_storage_remote_index_uploads_total",
|
||||
"Number of remote index uploads",
|
||||
&["tenant_id", "timeline_id"],
|
||||
)
|
||||
.expect("failed to register pageserver remote index upload vec");
|
||||
}
|
||||
.expect("failed to register pageserver remote index upload vec")
|
||||
});
|
||||
|
||||
static SYNC_QUEUE: OnceCell<SyncQueue> = OnceCell::new();
|
||||
|
||||
@@ -970,14 +970,19 @@ fn storage_sync_loop<P, S>(
|
||||
}
|
||||
}
|
||||
|
||||
// needed to check whether the download happened
|
||||
// more informative than just a bool
|
||||
#[derive(Debug)]
|
||||
enum DownloadMarker {
|
||||
enum DownloadStatus {
|
||||
Downloaded,
|
||||
Nothing,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum UploadStatus {
|
||||
Uploaded,
|
||||
Failed(anyhow::Error),
|
||||
Nothing,
|
||||
}
|
||||
|
||||
async fn process_batches<P, S>(
|
||||
conf: &'static PageServerConf,
|
||||
max_sync_errors: NonZeroU32,
|
||||
@@ -1017,7 +1022,7 @@ where
|
||||
"Finished storage sync task for sync id {sync_id} download marker {:?}",
|
||||
download_marker
|
||||
);
|
||||
if matches!(download_marker, DownloadMarker::Downloaded) {
|
||||
if matches!(download_marker, DownloadStatus::Downloaded) {
|
||||
downloaded_timelines.insert(sync_id.tenant_id);
|
||||
}
|
||||
}
|
||||
@@ -1031,7 +1036,7 @@ async fn process_sync_task_batch<P, S>(
|
||||
max_sync_errors: NonZeroU32,
|
||||
sync_id: ZTenantTimelineId,
|
||||
batch: SyncTaskBatch,
|
||||
) -> DownloadMarker
|
||||
) -> DownloadStatus
|
||||
where
|
||||
P: Debug + Send + Sync + 'static,
|
||||
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
||||
@@ -1048,66 +1053,71 @@ where
|
||||
// When operating in a system without tasks failing over the error threshold,
|
||||
// current batching and task processing systems aim to update the layer set and metadata files (remote and local),
|
||||
// without "losing" such layer files.
|
||||
let (upload_result, status_update) = tokio::join!(
|
||||
let (upload_status, download_status) = tokio::join!(
|
||||
async {
|
||||
if let Some(upload_data) = upload_data {
|
||||
match validate_task_retries(upload_data, max_sync_errors)
|
||||
let upload_retries = upload_data.retries;
|
||||
match validate_task_retries(upload_retries, max_sync_errors)
|
||||
.instrument(info_span!("retries_validation"))
|
||||
.await
|
||||
{
|
||||
ControlFlow::Continue(new_upload_data) => {
|
||||
ControlFlow::Continue(()) => {
|
||||
upload_timeline_data(
|
||||
conf,
|
||||
(storage.as_ref(), &index, sync_queue),
|
||||
current_remote_timeline.as_ref(),
|
||||
sync_id,
|
||||
new_upload_data,
|
||||
upload_data,
|
||||
sync_start,
|
||||
"upload",
|
||||
)
|
||||
.await;
|
||||
return Some(());
|
||||
}
|
||||
ControlFlow::Break(failed_upload_data) => {
|
||||
if let Err(e) = update_remote_data(
|
||||
conf,
|
||||
storage.as_ref(),
|
||||
&index,
|
||||
sync_id,
|
||||
RemoteDataUpdate::Upload {
|
||||
uploaded_data: failed_upload_data.data,
|
||||
upload_failed: true,
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
error!("Failed to update remote timeline {sync_id}: {e:?}");
|
||||
}
|
||||
}
|
||||
ControlFlow::Break(()) => match update_remote_data(
|
||||
conf,
|
||||
storage.as_ref(),
|
||||
&index,
|
||||
sync_id,
|
||||
RemoteDataUpdate::Upload {
|
||||
uploaded_data: upload_data.data,
|
||||
upload_failed: true,
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => UploadStatus::Failed(anyhow::anyhow!(
|
||||
"Aborted after retries validation, current retries: {upload_retries}, max retries allowed: {max_sync_errors}"
|
||||
)),
|
||||
Err(e) => {
|
||||
error!("Failed to update remote timeline {sync_id}: {e:?}");
|
||||
UploadStatus::Failed(e)
|
||||
}
|
||||
},
|
||||
}
|
||||
} else {
|
||||
UploadStatus::Nothing
|
||||
}
|
||||
None
|
||||
}
|
||||
.instrument(info_span!("upload_timeline_data")),
|
||||
async {
|
||||
if let Some(download_data) = download_data {
|
||||
match validate_task_retries(download_data, max_sync_errors)
|
||||
match validate_task_retries(download_data.retries, max_sync_errors)
|
||||
.instrument(info_span!("retries_validation"))
|
||||
.await
|
||||
{
|
||||
ControlFlow::Continue(new_download_data) => {
|
||||
ControlFlow::Continue(()) => {
|
||||
return download_timeline_data(
|
||||
conf,
|
||||
(storage.as_ref(), &index, sync_queue),
|
||||
current_remote_timeline.as_ref(),
|
||||
sync_id,
|
||||
new_download_data,
|
||||
download_data,
|
||||
sync_start,
|
||||
"download",
|
||||
)
|
||||
.await;
|
||||
}
|
||||
ControlFlow::Break(_) => {
|
||||
ControlFlow::Break(()) => {
|
||||
index
|
||||
.write()
|
||||
.await
|
||||
@@ -1116,50 +1126,53 @@ where
|
||||
}
|
||||
}
|
||||
}
|
||||
DownloadMarker::Nothing
|
||||
DownloadStatus::Nothing
|
||||
}
|
||||
.instrument(info_span!("download_timeline_data")),
|
||||
);
|
||||
|
||||
if let Some(delete_data) = batch.delete {
|
||||
if upload_result.is_some() {
|
||||
match validate_task_retries(delete_data, max_sync_errors)
|
||||
.instrument(info_span!("retries_validation"))
|
||||
.await
|
||||
{
|
||||
ControlFlow::Continue(new_delete_data) => {
|
||||
delete_timeline_data(
|
||||
conf,
|
||||
(storage.as_ref(), &index, sync_queue),
|
||||
sync_id,
|
||||
new_delete_data,
|
||||
sync_start,
|
||||
"delete",
|
||||
)
|
||||
.instrument(info_span!("delete_timeline_data"))
|
||||
.await;
|
||||
}
|
||||
ControlFlow::Break(failed_delete_data) => {
|
||||
if let Err(e) = update_remote_data(
|
||||
conf,
|
||||
storage.as_ref(),
|
||||
&index,
|
||||
sync_id,
|
||||
RemoteDataUpdate::Delete(&failed_delete_data.data.deleted_layers),
|
||||
)
|
||||
match upload_status {
|
||||
UploadStatus::Uploaded | UploadStatus::Nothing => {
|
||||
match validate_task_retries(delete_data.retries, max_sync_errors)
|
||||
.instrument(info_span!("retries_validation"))
|
||||
.await
|
||||
{
|
||||
error!("Failed to update remote timeline {sync_id}: {e:?}");
|
||||
{
|
||||
ControlFlow::Continue(()) => {
|
||||
delete_timeline_data(
|
||||
conf,
|
||||
(storage.as_ref(), &index, sync_queue),
|
||||
sync_id,
|
||||
delete_data,
|
||||
sync_start,
|
||||
"delete",
|
||||
)
|
||||
.instrument(info_span!("delete_timeline_data"))
|
||||
.await;
|
||||
}
|
||||
ControlFlow::Break(()) => {
|
||||
if let Err(e) = update_remote_data(
|
||||
conf,
|
||||
storage.as_ref(),
|
||||
&index,
|
||||
sync_id,
|
||||
RemoteDataUpdate::Delete(&delete_data.data.deleted_layers),
|
||||
)
|
||||
.await
|
||||
{
|
||||
error!("Failed to update remote timeline {sync_id}: {e:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync_queue.push(sync_id, SyncTask::Delete(delete_data));
|
||||
warn!("Skipping delete task due to failed upload tasks, reenqueuing");
|
||||
UploadStatus::Failed(e) => {
|
||||
warn!("Skipping delete task due to failed upload tasks, reenqueuing. Upload data: {:?}, delete data: {delete_data:?}. Upload failure: {e:#}", batch.upload);
|
||||
sync_queue.push(sync_id, SyncTask::Delete(delete_data));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
status_update
|
||||
download_status
|
||||
}
|
||||
|
||||
async fn download_timeline_data<P, S>(
|
||||
@@ -1170,7 +1183,7 @@ async fn download_timeline_data<P, S>(
|
||||
new_download_data: SyncData<LayersDownload>,
|
||||
sync_start: Instant,
|
||||
task_name: &str,
|
||||
) -> DownloadMarker
|
||||
) -> DownloadStatus
|
||||
where
|
||||
P: Debug + Send + Sync + 'static,
|
||||
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
||||
@@ -1199,7 +1212,7 @@ where
|
||||
Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
|
||||
Ok(()) => {
|
||||
register_sync_status(sync_id, sync_start, task_name, Some(true));
|
||||
return DownloadMarker::Downloaded;
|
||||
return DownloadStatus::Downloaded;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
|
||||
@@ -1215,7 +1228,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
DownloadMarker::Nothing
|
||||
DownloadStatus::Nothing
|
||||
}
|
||||
|
||||
async fn update_local_metadata(
|
||||
@@ -1257,7 +1270,13 @@ async fn update_local_metadata(
|
||||
timeline_id,
|
||||
} = sync_id;
|
||||
tokio::task::spawn_blocking(move || {
|
||||
LayeredRepository::save_metadata(conf, timeline_id, tenant_id, &cloned_metadata, true)
|
||||
crate::layered_repository::save_metadata(
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_id,
|
||||
&cloned_metadata,
|
||||
true,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.with_context(|| {
|
||||
@@ -1332,7 +1351,8 @@ async fn upload_timeline_data<P, S>(
|
||||
new_upload_data: SyncData<LayersUpload>,
|
||||
sync_start: Instant,
|
||||
task_name: &str,
|
||||
) where
|
||||
) -> UploadStatus
|
||||
where
|
||||
P: Debug + Send + Sync + 'static,
|
||||
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
||||
{
|
||||
@@ -1345,9 +1365,9 @@ async fn upload_timeline_data<P, S>(
|
||||
)
|
||||
.await
|
||||
{
|
||||
UploadedTimeline::FailedAndRescheduled => {
|
||||
UploadedTimeline::FailedAndRescheduled(e) => {
|
||||
register_sync_status(sync_id, sync_start, task_name, Some(false));
|
||||
return;
|
||||
return UploadStatus::Failed(e);
|
||||
}
|
||||
UploadedTimeline::Successful(upload_data) => upload_data,
|
||||
};
|
||||
@@ -1366,12 +1386,14 @@ async fn upload_timeline_data<P, S>(
|
||||
{
|
||||
Ok(()) => {
|
||||
register_sync_status(sync_id, sync_start, task_name, Some(true));
|
||||
UploadStatus::Uploaded
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to update remote timeline {sync_id}: {e:?}");
|
||||
uploaded_data.retries += 1;
|
||||
sync_queue.push(sync_id, SyncTask::Upload(uploaded_data));
|
||||
register_sync_status(sync_id, sync_start, task_name, Some(false));
|
||||
UploadStatus::Failed(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1474,25 +1496,17 @@ where
|
||||
.context("Failed to upload new index part")
|
||||
}
|
||||
|
||||
async fn validate_task_retries<T>(
|
||||
sync_data: SyncData<T>,
|
||||
async fn validate_task_retries(
|
||||
current_attempt: u32,
|
||||
max_sync_errors: NonZeroU32,
|
||||
) -> ControlFlow<SyncData<T>, SyncData<T>> {
|
||||
let current_attempt = sync_data.retries;
|
||||
) -> ControlFlow<(), ()> {
|
||||
let max_sync_errors = max_sync_errors.get();
|
||||
if current_attempt >= max_sync_errors {
|
||||
error!(
|
||||
"Aborting task that failed {current_attempt} times, exceeding retries threshold of {max_sync_errors}",
|
||||
);
|
||||
return ControlFlow::Break(sync_data);
|
||||
return ControlFlow::Break(());
|
||||
}
|
||||
|
||||
if current_attempt > 0 {
|
||||
let seconds_to_wait = 2.0_f64.powf(current_attempt as f64 - 1.0).min(30.0);
|
||||
info!("Waiting {seconds_to_wait} seconds before starting the task");
|
||||
tokio::time::sleep(Duration::from_secs_f64(seconds_to_wait)).await;
|
||||
}
|
||||
ControlFlow::Continue(sync_data)
|
||||
exponential_backoff(current_attempt, 1.0, 30.0).await;
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
|
||||
fn schedule_first_sync_tasks(
|
||||
|
||||
@@ -95,6 +95,8 @@ where
|
||||
debug!("Reenqueuing failed delete task for timeline {sync_id}");
|
||||
delete_data.retries += 1;
|
||||
sync_queue.push(sync_id, SyncTask::Delete(delete_data));
|
||||
} else {
|
||||
info!("Successfully deleted all layers");
|
||||
}
|
||||
errored
|
||||
}
|
||||
|
||||
@@ -130,6 +130,7 @@ where
|
||||
tenant_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let timelines = storage
|
||||
.list_prefixes(Some(tenant_storage_path))
|
||||
.await
|
||||
@@ -140,6 +141,13 @@ where
|
||||
)
|
||||
})?;
|
||||
|
||||
if timelines.is_empty() {
|
||||
anyhow::bail!(
|
||||
"no timelines found on the remote storage for tenant {}",
|
||||
tenant_id
|
||||
)
|
||||
}
|
||||
|
||||
let mut sync_ids = HashSet::new();
|
||||
|
||||
for timeline_remote_storage_key in timelines {
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::{fmt::Debug, path::PathBuf};
|
||||
|
||||
use anyhow::Context;
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::sync::Lazy;
|
||||
use remote_storage::RemoteStorage;
|
||||
use tokio::fs;
|
||||
use tracing::{debug, error, info, warn};
|
||||
@@ -20,14 +20,14 @@ use crate::{
|
||||
};
|
||||
use metrics::{register_int_counter_vec, IntCounterVec};
|
||||
|
||||
lazy_static! {
|
||||
static ref NO_LAYERS_UPLOAD: IntCounterVec = register_int_counter_vec!(
|
||||
static NO_LAYERS_UPLOAD: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_remote_storage_no_layers_uploads_total",
|
||||
"Number of skipped uploads due to no layers",
|
||||
&["tenant_id", "timeline_id"],
|
||||
)
|
||||
.expect("failed to register pageserver no layers upload vec");
|
||||
}
|
||||
.expect("failed to register pageserver no layers upload vec")
|
||||
});
|
||||
|
||||
/// Serializes and uploads the given index part data to the remote storage.
|
||||
pub(super) async fn upload_index_part<P, S>(
|
||||
@@ -75,7 +75,7 @@ where
|
||||
#[derive(Debug)]
|
||||
pub(super) enum UploadedTimeline {
|
||||
/// Upload failed due to some error, the upload task is rescheduled for another retry.
|
||||
FailedAndRescheduled,
|
||||
FailedAndRescheduled(anyhow::Error),
|
||||
/// No issues happened during the upload, all task files were put into the remote storage.
|
||||
Successful(SyncData<LayersUpload>),
|
||||
}
|
||||
@@ -179,7 +179,7 @@ where
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
let mut errors_happened = false;
|
||||
let mut errors = Vec::new();
|
||||
while let Some(upload_result) = upload_tasks.next().await {
|
||||
match upload_result {
|
||||
Ok(uploaded_path) => {
|
||||
@@ -188,13 +188,13 @@ where
|
||||
}
|
||||
Err(e) => match e {
|
||||
UploadError::Other(e) => {
|
||||
errors_happened = true;
|
||||
error!("Failed to upload a layer for timeline {sync_id}: {e:?}");
|
||||
errors.push(format!("{e:#}"));
|
||||
}
|
||||
UploadError::MissingLocalFile(source_path, e) => {
|
||||
if source_path.exists() {
|
||||
errors_happened = true;
|
||||
error!("Failed to upload a layer for timeline {sync_id}: {e:?}");
|
||||
errors.push(format!("{e:#}"));
|
||||
} else {
|
||||
// We have run the upload sync task, but the file we wanted to upload is gone.
|
||||
// This is "fine" due the asynchronous nature of the sync loop: it only reacts to events and might need to
|
||||
@@ -217,14 +217,17 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
if errors_happened {
|
||||
if errors.is_empty() {
|
||||
info!("Successfully uploaded all layers");
|
||||
UploadedTimeline::Successful(upload_data)
|
||||
} else {
|
||||
debug!("Reenqueuing failed upload task for timeline {sync_id}");
|
||||
upload_data.retries += 1;
|
||||
sync_queue.push(sync_id, SyncTask::Upload(upload_data));
|
||||
UploadedTimeline::FailedAndRescheduled
|
||||
} else {
|
||||
info!("Successfully uploaded all layers");
|
||||
UploadedTimeline::Successful(upload_data)
|
||||
UploadedTimeline::FailedAndRescheduled(anyhow::anyhow!(
|
||||
"Errors appeared during layer uploads: {:?}",
|
||||
errors
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user